Skip to content

Commit

Permalink
🔀 Merge pull request #34 from cnr-ibba/issue-33
Browse files Browse the repository at this point in the history
new module: ESTSFS
  • Loading branch information
bunop authored Feb 26, 2024
2 parents 10aac07 + 78f9168 commit cc54f79
Show file tree
Hide file tree
Showing 8 changed files with 179 additions and 7 deletions.
14 changes: 7 additions & 7 deletions .github/workflows/pytest-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ jobs:
# Expose matched filters as job 'modules' output variable
modules: ${{ steps.filter.outputs.changes }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: dorny/paths-filter@v2
- uses: dorny/paths-filter@v3
id: filter
with:
filters: "tests/config/pytest_modules.yml"

test:
runs-on: ubuntu-20.04
runs-on: ubuntu-latest

name: ${{ matrix.tags }} ${{ matrix.profile }}
needs: changes
Expand All @@ -39,14 +39,14 @@ jobs:
env:
NXF_ANSI_LOG: false
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.x"

- uses: actions/cache@v3
- uses: actions/cache@v4
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
Expand All @@ -56,7 +56,7 @@ jobs:
- name: Install Python dependencies
run: python -m pip install --upgrade pip pytest-workflow

- uses: actions/cache@v3
- uses: actions/cache@v4
with:
path: /usr/local/bin/nextflow
key: ${{ runner.os }}
Expand Down
48 changes: 48 additions & 0 deletions modules/cnr-ibba/estsfs/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
process ESTSFS {
tag "$meta.id"
label 'process_single'
label 'process_long'

// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
container "docker.io/bunop/est-sfs:2.0.5"

input:
tuple val(meta), path(e_config), path(data), path(seed)

output:
tuple val(meta), path("${prefix}_sfs.txt") , emit: sfs_out
tuple val(meta), path("${prefix}_pvalues.txt") , emit: pvalues_out
tuple val(meta), path("${prefix}.seed") , emit: seed
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def VERSION = '2.05' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
prefix = task.ext.prefix ?: "${meta.id}"
"""
cp ${seed} ${prefix}.seed
est-sfs ${e_config} ${data} ${prefix}.seed ${prefix}_sfs.txt ${prefix}_pvalues.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
est-sfs: $VERSION
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}_sfs.txt
touch ${prefix}_pvalues.txt
touch ${prefix}.seed
cat <<-END_VERSIONS > versions.yml
"${task.process}":
est-sfs: $VERSION
END_VERSIONS
"""
}
53 changes: 53 additions & 0 deletions modules/cnr-ibba/estsfs/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
---
name: "estsfs"
description: estimation of the unfolded site frequency spectrum
keywords:
- site frequency spectrum
- ancestral alleles
- derived alleles
tools:
- "estsfs":
description: "est-sfs ( Keightley and Jackson, 2018) is a stand-alone implementation of a method to infer the unfolded site frequency spectrum (the uSFS) and ancestral state probabilities by maximum likelihood (ML)."
homepage: "https://sourceforge.net/projects/est-usfs/"
documentation: "https://sourceforge.net/projects/est-usfs/"
tool_dev_url: "https://sourceforge.net/projects/est-usfs/files/est-sfs-release-2.04.tar.gz"
doi: "10.1534/genetics.118.301120"
licence: "['Free for Academic Use']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test']`
- e_config:
type: file
description: config file for est-sfs
pattern: "*.txt"
- data:
type: file
description: input data file for est-sfs
pattern: "*.txt"
- seed:
type: file
description: text file containing random number seed
pattern: "*.txt"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test']`
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- sfs_out:
type: file
description: output file consists of the comma-separated estimated uSFS vector
pattern: "*.txt"
- pvalues_out:
type: file
description: this file contains the estimated ancestral state probabilities for each site
pattern: "*.txt"
authors:
- "@BioInf2305"
4 changes: 4 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ bcftools/concat:
- modules/nf-core/bcftools/concat/**
- tests/modules/nf-core/bcftools/concat/**

estsfs:
- modules/cnr-ibba/estsfs/**
- tests/modules/cnr-ibba/estsfs/**

freebayes/chunk:
- modules/cnr-ibba/freebayes/chunk/**
- tests/modules/cnr-ibba/freebayes/chunk/**
Expand Down
36 changes: 36 additions & 0 deletions tests/config/test_data.config
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ params {

mmseqs_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/db/mmseqs.tar.gz"

resfinder_tar_gz = "${params.test_data_base}/data/genomics/sarscov2/genome/db/resfinder.tar.gz"

all_sites_fas = "${params.test_data_base}/data/genomics/sarscov2/genome/alignment/all_sites.fas"
informative_sites_fas = "${params.test_data_base}/data/genomics/sarscov2/genome/alignment/informative_sites.fas"

Expand Down Expand Up @@ -129,6 +131,8 @@ params {
test_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/nanopore/fastq/test.fastq.gz"

test_sequencing_summary = "${params.test_data_base}/data/genomics/sarscov2/nanopore/sequencing_summary/test.sequencing_summary.txt"

test2_sequencing_summary = "${params.test_data_base}/data/genomics/sarscov2/nanopore/sequencing_summary/test2.sequencing_summary.txt"
}
'metagenome' {
classified_reads_assignment = "${params.test_data_base}/data/genomics/sarscov2/metagenome/test_1.kraken2.reads.txt"
Expand All @@ -150,10 +154,19 @@ params {
rnaseq_matrix = "${params.test_data_base}/data/genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv"
rnaseq_lengths = "${params.test_data_base}/data/genomics/mus_musculus/rnaseq_expression/SRP254919.spoofed_lengths.tsv"
deseq_results = "${params.test_data_base}/data/genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.deseq2.results.tsv"
genome_19_fasta = "${params.test_data_base}/data/genomics/mus_musculus/genome/chr19.fa.gz"
genome_19_gtf = "${params.test_data_base}/data/genomics/mus_musculus/genome/chr19.filtered.gtf.gz"
}
'illumina' {
test_1_fastq_gz = "${params.test_data_base}/data/genomics/mus_musculus/mageck/ERR376998.small.fastq.gz"
test_2_fastq_gz = "${params.test_data_base}/data/genomics/mus_musculus/mageck/ERR376999.small.fastq.gz"
genome_config = "${params.test_data_base}/data/genomics/mus_musculus/illumina/10xgenomics/multiome/cellranger_arc_mkref_test_mm39_chr19_config.json"
multiome_lib_csv = "${params.test_data_base}/data/genomics/mus_musculus/illumina/10xgenomics/multiome/lib.csv"
test_scARC_gex_R1_fastq_gz = "${params.test_data_base}/data/genomics/mus_musculus/illumina/10xgenomics/multiome/SRR18907480_chr19_sub_S1_L001_R1_001.fastq.gz"
test_scARC_gex_R2_fastq_gz = "${params.test_data_base}/data/genomics/mus_musculus/illumina/10xgenomics/multiome/SRR18907480_chr19_sub_S1_L001_R2_001.fastq.gz"
test_scARC_atac_R1_fastq_gz = "${params.test_data_base}/data/genomics/mus_musculus/illumina/10xgenomics/multiome/SRR18907481_chr19_sub_S1_L001_R1_001.fastq.gz"
test_scARC_atac_R2_fastq_gz = "${params.test_data_base}/data/genomics/mus_musculus/illumina/10xgenomics/multiome/SRR18907481_chr19_sub_S1_L001_R2_001.fastq.gz"
test_scARC_atac_I2_fastq_gz = "${params.test_data_base}/data/genomics/mus_musculus/illumina/10xgenomics/multiome/SRR18907481_chr19_sub_S1_L001_I2_001.fastq.gz"
}
'csv' {
count_table = "${params.test_data_base}/data/genomics/mus_musculus/mageck/count_table.csv"
Expand Down Expand Up @@ -278,6 +291,7 @@ params {
syntheticvcf_short_score = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.score"
gnomad_r2_1_1_sv_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1-sv.vcf.gz"
gnomad2_r2_1_1_sv_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD2.r2.1.1-sv.vcf.gz"
genmod_compound_vcf_mt = "${params.test_data_base}/data/genomics/homo_sapiens/genome/vcf/genmod_compound.vcf"

hapmap_3_3_hg38_21_vcf_gz = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/hapmap_3.3.hg38.vcf.gz"
hapmap_3_3_hg38_21_vcf_gz_tbi = "${params.test_data_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/hapmap_3.3.hg38.vcf.gz.tbi"
Expand Down Expand Up @@ -612,6 +626,19 @@ params {
genome_aln_nwk = "${params.test_data_base}/data/genomics/prokaryotes/haemophilus_influenzae/genome/genome.aln.nwk"
}
}
'saccharomyces_cerevisiae' {
'genome' {
samplesheet = "${params.test_data_base}/data/genomics/eukaryotes/saccharomyces_cerevisiae/samplesheet.csv"
genome_gfp_gtf = "${params.test_data_base}/data/genomics/eukaryotes/saccharomyces_cerevisiae/genome_gfp.gtf"
kallisto_results = "${params.test_data_base}/data/genomics/eukaryotes/saccharomyces_cerevisiae/kallisto_results.tar.gz"
salmon_results = "${params.test_data_base}/data/genomics/eukaryotes/saccharomyces_cerevisiae/salmon_results.tar.gz"
}
}
'actinidia_chinensis' {
'genome' {
genome_21_fasta_gz = "${params.test_data_base}/data/genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.fasta.gz"
}
}
'generic' {
'csv' {
test_csv = "${params.test_data_base}/data/generic/csv/test.csv"
Expand All @@ -620,6 +647,8 @@ params {
rmarkdown = "${params.test_data_base}/data/generic/notebooks/rmarkdown/rmarkdown_notebook.Rmd"
ipython_md = "${params.test_data_base}/data/generic/notebooks/jupyter/ipython_notebook.md"
ipython_ipynb = "${params.test_data_base}/data/generic/notebooks/jupyter/ipython_notebook.ipynb"
quarto_r = "${params.test_data_base}/data/generic/notebooks/quarto/quarto_r.qmd"
quarto_python = "${params.test_data_base}/data/generic/notebooks/quarto/quarto_python.qmd"
}
'tar' {
tar_gz = "${params.test_data_base}/data/generic/tar/hello.tar.gz"
Expand Down Expand Up @@ -670,6 +699,7 @@ params {
ups_file1 = "${params.test_data_base}/data/proteomics/msspectra/OVEMB150205_12.raw"
ups_file2 = "${params.test_data_base}/data/proteomics/msspectra/OVEMB150205_14.raw"
profile_spectra = "${params.test_data_base}/data/proteomics/msspectra/peakpicker_tutorial_1.mzML"
hla2_file = "${params.test_data_base}/data/proteomics/msspectra/PXD012083_e005640_II.raw"
}
'database' {
yeast_ups = "${params.test_data_base}/data/proteomics/database/yeast_UPS.fasta"
Expand Down Expand Up @@ -705,6 +735,12 @@ params {
contigs = "${params.test_data_base}/data/genomics/eukaryotes/deilephila_porcellus/mito/ilDeiPorc1.contigs.fa"
}
}
'arabidopsis_thaliana' {
'plastid' {
hifi_reads = "${params.test_data_base}/data/genomics/eukaryotes/arabidopsis_thaliana/plastid/ddAraThal4.HiFi.reads.fasta"
}
}

'imaging' {
'h5' {
plant_wga = "${params.test_data_base}/data/imaging/h5/plant_wga.h5"
Expand Down
15 changes: 15 additions & 0 deletions tests/modules/cnr-ibba/estsfs/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { ESTSFS } from '../../../../modules/cnr-ibba/estsfs/main.nf'

workflow test_estsfs {

input = [
[ id:'test' ], // meta map
file(params.test_data['generic']['estsfs']['config_file'], checkIfExists: true), file(params.test_data['generic']['estsfs']['data_file'], checkIfExists: true), file(params.test_data['generic']['estsfs']['seed_file'], checkIfExists: true)
]

ESTSFS ( input )
}
5 changes: 5 additions & 0 deletions tests/modules/cnr-ibba/estsfs/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

}
11 changes: 11 additions & 0 deletions tests/modules/cnr-ibba/estsfs/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
- name: estsfs test_estsfs
command: nextflow run ./tests/modules/cnr-ibba/estsfs -entry test_estsfs -c ./tests/config/nextflow.config
tags:
- estsfs
files:
- path: output/estsfs/test_pvalues.txt
md5sum: 63f370e9f2d08591b52c0ea05c8b45a0
- path: output/estsfs/test_sfs.txt
md5sum: 5da246e273d22b177f0683b44fe6d80a
- path: output/estsfs/test.seed
- path: output/estsfs/versions.yml

0 comments on commit cc54f79

Please sign in to comment.