diff --git a/.github/actions/nf-test-action/action.yml b/.github/actions/nf-test-action/action.yml index 8cd79f21680..a6f2532f926 100644 --- a/.github/actions/nf-test-action/action.yml +++ b/.github/actions/nf-test-action/action.yml @@ -10,9 +10,9 @@ inputs: total_shards: description: "Total number of test shards(NOT the total number of matrix jobs)" required: true - tags: - description: "Tags to test (`[,...]`)" - required: false + paths: + description: "Test paths" + required: true runs: using: "composite" @@ -72,7 +72,6 @@ runs: env: SENTIEON_LICSRVR_IP: ${{ env.SENTIEON_LICSRVR_IP }} SENTIEON_AUTH_MECH: "GitHub Actions - token" - TAGS: ${{ inputs.tags && format('--tag {0}', inputs.tags) || '' }} run: | NFT_WORKDIR=~ \ nf-test test \ @@ -80,11 +79,9 @@ runs: --tap=test.tap \ --verbose \ --ci \ - --changed-since HEAD^ \ --shard ${{ inputs.shard }}/${{ inputs.total_shards }} \ --filter process,workflow \ - --follow-dependencies \ - ${{ env.TAGS }} + ${{ inputs.paths }} # TODO If no test.tap, then make one to spoof? - uses: pcolby/tap-summary@0959cbe1d4422e62afc65778cdaea6716c41d936 # v1 diff --git a/.github/workflows/gpu-tests.yml b/.github/workflows/gpu-tests.yml index 34206298769..78667065a3b 100644 --- a/.github/workflows/gpu-tests.yml +++ b/.github/workflows/gpu-tests.yml @@ -6,9 +6,6 @@ on: - "renovate/**" # branches Renovate creates pull_request: branches: [master] - paths: - - ".github/workflows/gpu-tests.yml" - - "modules/nf-core/parabricks/**" merge_group: types: [checks_requested] branches: [master] @@ -16,11 +13,8 @@ on: inputs: runners: description: "Runners to test on" - type: choice - options: - - "ubuntu-latest" - - "self-hosted" - default: "self-hosted" + type: string + default: "gpu" # Cancel if a newer run is started concurrency: @@ -43,7 +37,7 @@ jobs: runs-on: ubuntu-latest outputs: # Expose detected tags as 'modules' and 'workflows' output variables - paths: ${{ steps.outputs.outputs.components }} + paths: ${{ steps.list.outputs.components }} modules: ${{ steps.outputs.outputs.modules }} subworkflows: ${{ steps.outputs.outputs.subworkflows}} # Prod for version bumping @@ -60,11 +54,11 @@ jobs: - name: List nf-test files id: list - uses: adamrtalbot/detect-nf-test-changes@6bf6fd9fe0fb63a0362fb0e09de5acb6d055a754 # v0.0.5 + uses: adamrtalbot/detect-nf-test-changes@de3c3c8e113031b4f15a3c1104b5f135e8346997 # v0.0.6 with: head: ${{ github.sha }} base: ${{ github.event.pull_request.base.sha || github.event.merge_group.base_sha }} - n_parents: 2 + n_parents: 0 tags: "gpu" - name: Separate modules and subworkflows @@ -74,21 +68,20 @@ jobs: echo subworkflows=$(echo '${{ steps.list.outputs.components }}' | jq '. | map(select(contains("subworkflows"))) | map(gsub("subworkflows/nf-core/"; ""))') >> $GITHUB_OUTPUT - name: debug run: | - echo ${{ steps.outputs.outputs.components }} + echo ${{ steps.list.outputs.components }} echo ${{ steps.outputs.outputs.modules }} echo ${{ steps.outputs.outputs.subworkflows }} nf-test-gpu: runs-on: "gpu" + name: "GPU | ${{ matrix.profile }} | ${{ matrix.shard }}" needs: nf-test-changes - if: ${{ fromJSON(needs.nf-test-changes.outputs.paths) != '[]' || needs.nf-test-changes.outputs.paths != '' }} - name: "GPU | ${{ matrix.tags}} | ${{ matrix.profile }} | ${{ matrix.shard }}" + if: ${{ needs.nf-test-changes.outputs.modules != '[]' || needs.nf-test-changes.outputs.subworkflows != '[]' }} strategy: fail-fast: false matrix: shard: [1, 2] profile: [docker_self_hosted, singularity] # conda? - tags: ${{ fromJSON(needs.nf-test-changes.outputs.modules) && fromJSON(needs.nf-test-changes.outputs.subworkflows) }} env: NXF_ANSI_LOG: false TOTAL_SHARDS: 2 @@ -108,4 +101,22 @@ jobs: profile: ${{ matrix.profile }},gpu shard: ${{ matrix.shard }} total_shards: ${{ env.TOTAL_SHARDS }} - tags: ${{matrix.tags}},gpu + paths: "${{ join(fromJson(needs.nf-test-changes.outputs.paths), ' ') }}" + + confirm-pass: + runs-on: ubuntu-latest + needs: [nf-test-gpu] + if: always() + steps: + - name: All tests ok + if: ${{ success() || !contains(needs.*.result, 'failure') }} + run: exit 0 + - name: One or more tests failed + if: ${{ contains(needs.*.result, 'failure') }} + run: exit 1 + + - name: debug-print + if: always() + run: | + echo "toJSON(needs) = ${{ toJSON(needs) }}" + echo "toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml index 5445a444531..f8c71275bf9 100644 --- a/.github/workflows/nf-test.yml +++ b/.github/workflows/nf-test.yml @@ -35,45 +35,90 @@ env: NXF_VER: "24.10.1" jobs: + nf-test-changes: + name: nf-test-changes + runs-on: ubuntu-latest + outputs: + # Expose detected tags as 'modules' and 'workflows' output variables + paths: ${{ steps.list.outputs.components }} + modules: ${{ steps.outputs.outputs.modules }} + subworkflows: ${{ steps.outputs.outputs.subworkflows}} + # Prod for version bumping + steps: + - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner + run: | + ls -la ./ + rm -rf ./* || true + rm -rf ./.??* || true + ls -la ./ + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + with: + fetch-depth: 0 + + - name: List nf-test files + id: list + uses: adamrtalbot/detect-nf-test-changes@de3c3c8e113031b4f15a3c1104b5f135e8346997 # v0.0.6 + with: + head: ${{ github.sha }} + base: ${{ github.event.pull_request.base.sha || github.event.merge_group.base_sha }} + n_parents: 0 + exclude_tags: "gpu" + + - name: Separate modules and subworkflows + id: outputs + run: | + echo modules=$(echo '${{ steps.list.outputs.components }}' | jq -c '. | map(select(contains("modules"))) | map(gsub("modules/nf-core/"; ""))') >> $GITHUB_OUTPUT + echo subworkflows=$(echo '${{ steps.list.outputs.components }}' | jq '. | map(select(contains("subworkflows"))) | map(gsub("subworkflows/nf-core/"; ""))') >> $GITHUB_OUTPUT + - name: debug + run: | + echo ${{ steps.list.outputs.components }} + echo ${{ steps.outputs.outputs.modules }} + echo ${{ steps.outputs.outputs.subworkflows }} nf-test: runs-on: ${{ github.event.inputs.runners || 'self-hosted' }} - # NOTE I think this is the cleanest way to get them organized - # process | conda | 1 - # process | conda | 2 - # process | conda | 3 - # process | docker_self_hosted | 1 - # ... - # workflow | singularity | 3 name: "${{ matrix.profile }} | ${{ matrix.shard }}" - # TODO - # needs: get-number-of-shards - # if: ${{ fromJSON(needs.get-number-of-shards.outputs.shards) != fromJSON('["1", "0"]') }} + needs: nf-test-changes + if: ${{ needs.nf-test-changes.outputs.modules != '[]' || needs.nf-test-changes.outputs.subworkflows != '[]' }} strategy: fail-fast: false matrix: - # NOTE We could split these, but there's probably going to be more process tests than workflow tests, so we're just going to combine them all and bump up the shards for now - # NOTE The name of the test would be name: "${{ matrix.filter }} | ${{ matrix.profile }} | ${{ matrix.shard }}" - # filter: [process, workflow] - profile: [conda, docker_self_hosted, singularity] shard: [1, 2, 3, 4, 5] + profile: [conda, docker_self_hosted, singularity] env: - # FIXME Bumping them up to make the transition smooth, then we can throttle them back + NXF_ANSI_LOG: false TOTAL_SHARDS: 5 - SENTIEON_LICENSE_MESSAGE: ${{ secrets.SENTIEON_LICENSE_MESSAGE }} - SENTIEON_ENCRYPTION_KEY: ${{ secrets.SENTIEON_ENCRYPTION_KEY }} + steps: - - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner - run: | - ls -la ./ - rm -rf ./* || true - rm -rf ./.??* || true - ls -la ./ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 with: fetch-depth: 0 - name: Run nf-test Action uses: ./.github/actions/nf-test-action + env: + SENTIEON_ENCRYPTION_KEY: ${{ secrets.SENTIEON_ENCRYPTION_KEY }} + SENTIEON_LICENSE_MESSAGE: ${{ secrets.SENTIEON_LICENSE_MESSAGE }} + SENTIEON_LICSRVR_IP: ${{ secrets.SENTIEON_LICSRVR_IP }} + SENTIEON_AUTH_MECH: "GitHub Actions - token" with: profile: ${{ matrix.profile }} shard: ${{ matrix.shard }} total_shards: ${{ env.TOTAL_SHARDS }} + paths: "${{ join(fromJson(needs.nf-test-changes.outputs.paths), ' ') }}" + + confirm-pass: + runs-on: ubuntu-latest + needs: [nf-test] + if: always() + steps: + - name: All tests ok + if: ${{ success() || !contains(needs.*.result, 'failure') }} + run: exit 0 + - name: One or more tests failed + if: ${{ contains(needs.*.result, 'failure') }} + run: exit 1 + + - name: debug-print + if: always() + run: | + echo "toJSON(needs) = ${{ toJSON(needs) }}" + echo "toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" diff --git a/modules/nf-core/bismark/align/main.nf b/modules/nf-core/bismark/align/main.nf index df8b7e0633b..4aaa1be2ae3 100644 --- a/modules/nf-core/bismark/align/main.nf +++ b/modules/nf-core/bismark/align/main.nf @@ -45,7 +45,7 @@ process BISMARK_ALIGN { // Check that we have enough memory try { - def tmem = (task.memory as nextflow.util.MemoryUnit).toBytes() + def tmem = (task.memory as MemoryUnit).toBytes() def mcore = (tmem / mem_per_multicore) as int ccore = Math.min(ccore, mcore) } catch (all) { diff --git a/modules/nf-core/gffread/tests/main.nf.test b/modules/nf-core/gffread/tests/main.nf.test index 4cd13dcd33b..d039f367c15 100644 --- a/modules/nf-core/gffread/tests/main.nf.test +++ b/modules/nf-core/gffread/tests/main.nf.test @@ -23,6 +23,7 @@ nextflow_process { file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) ] input[1] = [] + """ } } @@ -220,4 +221,4 @@ nextflow_process { } -} +} \ No newline at end of file diff --git a/modules/nf-core/hisat2/build/main.nf b/modules/nf-core/hisat2/build/main.nf index 37a3e456581..7a5f28ba5fa 100644 --- a/modules/nf-core/hisat2/build/main.nf +++ b/modules/nf-core/hisat2/build/main.nf @@ -33,7 +33,7 @@ process HISAT2_BUILD { def ss = '' def exon = '' def extract_exons = '' - def hisat2_build_memory = params.hisat2_build_memory ? (params.hisat2_build_memory as nextflow.util.MemoryUnit).toGiga() : 0 + def hisat2_build_memory = params.hisat2_build_memory ? (params.hisat2_build_memory as MemoryUnit).toGiga() : 0 if (avail_mem >= hisat2_build_memory) { log.info "[HISAT2 index build] At least ${hisat2_build_memory} GB available, so using splice sites and exons to build HISAT2 index" extract_exons = gtf ? "hisat2_extract_exons.py $gtf > ${gtf.baseName}.exons.txt" : "" diff --git a/modules/nf-core/muse/call/environment.yml b/modules/nf-core/muse/call/environment.yml new file mode 100644 index 00000000000..5bc34c10360 --- /dev/null +++ b/modules/nf-core/muse/call/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::muse=2.1.2" diff --git a/modules/nf-core/muse/call/main.nf b/modules/nf-core/muse/call/main.nf new file mode 100644 index 00000000000..b4559761ab0 --- /dev/null +++ b/modules/nf-core/muse/call/main.nf @@ -0,0 +1,50 @@ +process MUSE_CALL { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9f/9f0ebb574ef5eed2a6e034f1b2feea6c252d1ab0c8bc5135a669059aa1f4d2ca/data': + 'community.wave.seqera.io/library/muse:6637291dcbb0bdb8' }" + + input: + tuple val(meta), path(tumor_bam), path(tumor_bai), path(normal_bam), path(normal_bai) + tuple val(meta2), path(reference) + + output: + tuple val(meta), path("*.MuSE.txt"), emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + MuSE \\ + call \\ + $args \\ + -f $reference \\ + -O ${prefix} \\ + -n $task.cpus \\ + $tumor_bam \\ + $normal_bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MuSE: \$( MuSE --version | sed -e "s/MuSE, version //g" ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.MuSE.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MuSE: \$( MuSE --version | sed -e "s/MuSE, version //g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/muse/call/meta.yml b/modules/nf-core/muse/call/meta.yml new file mode 100644 index 00000000000..6733677dbdc --- /dev/null +++ b/modules/nf-core/muse/call/meta.yml @@ -0,0 +1,72 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "muse_call" +description: pre-filtering and calculating position-specific summary statistics using + the Markov substitution model +keywords: + - variant calling + - somatic + - wgs + - wxs + - vcf +tools: + - "MuSE": + description: "Somatic point mutation caller based on Markov substitution model + for molecular evolution" + homepage: "https://bioinformatics.mdanderson.org/public-software/muse/" + documentation: "https://github.com/wwylab/MuSE" + tool_dev_url: "https://github.com/wwylab/MuSE" + doi: "10.1101/gr.278456.123" + licence: ["https://github.com/danielfan/MuSE/blob/master/LICENSE"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - tumor_bam: + type: file + description: Sorted tumor BAM file + pattern: "*.bam" + - tumor_bai: + type: file + description: Index file for the tumor BAM file + pattern: "*.bai" + - normal_bam: + type: file + description: Sorted matched normal BAM file + pattern: "*.bam" + - normal_bai: + type: file + description: Index file for the normal BAM file + pattern: "*.bai" + - - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. `[ id:'test' ]` + - reference: + type: file + description: reference genome file + pattern: ".fasta" +output: + - txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.MuSE.txt": + type: file + description: position-specific summary statistics + pattern: "*.MuSE.txt" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@famosab" +maintainers: + - "@famosab" diff --git a/modules/nf-core/muse/call/tests/main.nf.test b/modules/nf-core/muse/call/tests/main.nf.test new file mode 100644 index 00000000000..b5e441ec32d --- /dev/null +++ b/modules/nf-core/muse/call/tests/main.nf.test @@ -0,0 +1,72 @@ +nextflow_process { + + name "Test Process MUSE_CALL" + script "../main.nf" + process "MUSE_CALL" + + tag "modules" + tag "modules_nfcore" + tag "muse" + tag "muse/call" + + test("human - bam") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [ + [ id:'reference' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("human - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [ + [ id:'reference' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/muse/call/tests/main.nf.test.snap b/modules/nf-core/muse/call/tests/main.nf.test.snap new file mode 100644 index 00000000000..ead8906a9ce --- /dev/null +++ b/modules/nf-core/muse/call/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "human - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.MuSE.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,de7c8f535f5b17473ed6aab68f1d70c1" + ], + "txt": [ + [ + { + "id": "test" + }, + "test.MuSE.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,de7c8f535f5b17473ed6aab68f1d70c1" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-29T14:30:48.292828" + }, + "human - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.MuSE.txt:md5,3a38ee9131a217cc56199bd4a6b18e1d" + ] + ], + "1": [ + "versions.yml:md5,de7c8f535f5b17473ed6aab68f1d70c1" + ], + "txt": [ + [ + { + "id": "test" + }, + "test.MuSE.txt:md5,3a38ee9131a217cc56199bd4a6b18e1d" + ] + ], + "versions": [ + "versions.yml:md5,de7c8f535f5b17473ed6aab68f1d70c1" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-29T14:30:32.522553" + } +} \ No newline at end of file diff --git a/modules/nf-core/muse/call/tests/tags.yml b/modules/nf-core/muse/call/tests/tags.yml new file mode 100644 index 00000000000..4e04a91a4af --- /dev/null +++ b/modules/nf-core/muse/call/tests/tags.yml @@ -0,0 +1,2 @@ +muse/call: + - "modules/nf-core/muse/call/**" diff --git a/modules/nf-core/nacho/normalize/environment.yml b/modules/nf-core/nacho/normalize/environment.yml new file mode 100644 index 00000000000..9cf652c88fe --- /dev/null +++ b/modules/nf-core/nacho/normalize/environment.yml @@ -0,0 +1,12 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - conda-forge::r-dplyr=1.1.4 + - conda-forge::r-fs=1.6.4 + - conda-forge::r-ggplot2=3.4.4 + - conda-forge::r-nacho=2.0.6 + - conda-forge::r-optparse=1.7.5 + - conda-forge::r-readr=2.1.5 + - conda-forge::r-tidyr=1.3.0 diff --git a/modules/nf-core/nacho/normalize/main.nf b/modules/nf-core/nacho/normalize/main.nf new file mode 100644 index 00000000000..69cc49ec143 --- /dev/null +++ b/modules/nf-core/nacho/normalize/main.nf @@ -0,0 +1,59 @@ +process NACHO_NORMALIZE { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container 'community.wave.seqera.io/library/r-dplyr_r-fs_r-ggplot2_r-nacho_pruned:033bc017f5f36b6d' + + input: + tuple val(meta) , path(rcc_files, stageAs: "input/*") + tuple val(meta2), path(sample_sheet) + + output: + tuple val(meta), path("normalized_counts.tsv") , emit: normalized_counts + tuple val(meta), path("normalized_counts_wo_HKnorm.tsv"), emit: normalized_counts_wo_HK + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + nacho_norm.R \\ + --input_rcc_path input \\ + $args \\ + --input_samplesheet ${sample_sheet} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + r-nacho: \$(Rscript -e "library(NACHO); cat(as.character(packageVersion('NACHO')))") + r-dplyr: \$(Rscript -e "library(dplyr); cat(as.character(packageVersion('dplyr')))") + r-ggplot2: \$(Rscript -e "library(ggplot2); cat(as.character(packageVersion('ggplot2')))") + r-tidyr: \$(Rscript -e "library(tidyr); cat(as.character(packageVersion('tidyr')))") + r-readr: \$(Rscript -e "library(readr); cat(as.character(packageVersion('readr')))") + r-fs: \$(Rscript -e "library(fs); cat(as.character(packageVersion('fs')))") + r-optparse: \$(Rscript -e "library(optparse); cat(as.character(packageVersion('optparse')))") + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + """ + touch normalized_counts.tsv + touch normalized_counts_wo_HKnorm.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + r-nacho: \$(Rscript -e "library(NACHO); cat(as.character(packageVersion('NACHO')))") + r-dplyr: \$(Rscript -e "library(dplyr); cat(as.character(packageVersion('dplyr')))") + r-ggplot2: \$(Rscript -e "library(ggplot2); cat(as.character(packageVersion('ggplot2')))") + r-tidyr: \$(Rscript -e "library(tidyr); cat(as.character(packageVersion('tidyr')))") + r-readr: \$(Rscript -e "library(readr); cat(as.character(packageVersion('readr')))") + r-fs: \$(Rscript -e "library(fs); cat(as.character(packageVersion('fs')))") + r-optparse: \$(Rscript -e "library(optparse); cat(as.character(packageVersion('optparse')))") + END_VERSIONS + """ +} diff --git a/modules/nf-core/nacho/normalize/meta.yml b/modules/nf-core/nacho/normalize/meta.yml new file mode 100644 index 00000000000..96e9a541a72 --- /dev/null +++ b/modules/nf-core/nacho/normalize/meta.yml @@ -0,0 +1,84 @@ +--- +name: nacho_normalize +description: | + NACHO (NAnostring quality Control dasHbOard) is developed for NanoString nCounter data. + NanoString nCounter data is a messenger-RNA/micro-RNA (mRNA/miRNA) expression assay and works with fluorescent barcodes. + Each barcode is assigned a mRNA/miRNA, which can be counted after bonding with its target. + As a result each count of a specific barcode represents the presence of its target mRNA/miRNA. +keywords: + - nacho + - nanostring + - mRNA + - miRNA + - qc +tools: + - NACHO: + description: | + R package that uses two main functions to summarize and visualize NanoString RCC files, + namely: `load_rcc()` and `visualise()`. It also includes a function `normalise()`, which (re)calculates + sample specific size factors and normalises the data. + For more information `vignette("NACHO")` and `vignette("NACHO-analysis")` + homepage: https://github.com/mcanouil/NACHO + documentation: https://cran.r-project.org/web/packages/NACHO/vignettes/NACHO.html + doi: "10.1093/bioinformatics/btz647" + licence: ["GPL-3.0"] + identifier: "" + args_id: "$args" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - rcc_files: + type: file + description: | + List of RCC files for all samples, which are direct outputs from NanoString runs + pattern: "*.RCC" + - - meta2: + type: map + description: | + Groovy Map containing file information + e.g. [ id:'test_samplesheet' ] + - sample_sheet: + type: "file" + pattern: "*.csv" + description: | + Comma-separated file with 3 columns: RCC_FILE, RCC_FILE_NAME, and SAMPLE_ID + +output: + - normalized_counts: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "normalized_counts.tsv": + type: file + description: | + Tab-separated file with gene normalized counts for the samples + pattern: "normalized_counts.tsv" + + - normalized_counts_wo_HK: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "normalized_counts_wo_HKnorm.tsv": + type: file + description: | + Tab-separated file with gene normalized counts for the samples, without housekeeping genes. + pattern: "normalized_counts_wo_HKnorm.tsv" + - versions: + - "versions.yml": + type: file + description: | + File containing software versions + pattern: "versions.yml" + +authors: + - "@alanmmobbs93" +maintainers: + - "@alanmmobbs93" diff --git a/modules/nf-core/nacho/normalize/resources/usr/bin/nacho_norm.R b/modules/nf-core/nacho/normalize/resources/usr/bin/nacho_norm.R new file mode 100755 index 00000000000..53899126a3b --- /dev/null +++ b/modules/nf-core/nacho/normalize/resources/usr/bin/nacho_norm.R @@ -0,0 +1,92 @@ +#!/usr/bin/env Rscript +library(optparse) +library(dplyr) +library(ggplot2) +library(fs) +library(NACHO) +library(readr) +library(tidyr) + +# Parse Arguments +norm_methods <- c("GLM", "GEO") +option_list <- list( + make_option( + c("--input_rcc_path"), + type = "character", + default = "./" , + help = "Path to the folder that contains the RCC input file(s)", + metavar = "character"), + make_option( + c("--input_samplesheet"), + type = "character", + default = NULL , + help = "Path to the sample sheet file", + metavar = "character"), + make_option( + c("--norm_method"), + type = "character", + default = "GLM", + help = paste0("Normalization method. One of ", paste(norm_methods, collapse = " "), paste = " "), + metavar = "character") +) + +# Parse the command-line arguments +opt <- parse_args(OptionParser(option_list = option_list)) + +# Validate mandatory arguments +if (is.null(opt$input_rcc_path)) { + stop("Error: The --input_rcc_path parameter is mandatory and must be specified.") +} + +if (is.null(opt$input_samplesheet)) { + stop("Error: The --input_samplesheet parameter is mandatory and must be specified.") +} + +# Validate that --norm_method is one of the allowed values +if (!(opt$norm_method %in% norm_methods)) { + stop(paste("Error: The --norm_method parameter must be one of:", paste(norm_methods, collapse = " "))) +} + +input_rcc_path <- opt$input_rcc_path +input_samplesheet <- opt$input_samplesheet +norm_method <- opt$norm_method + +# Create filelist for NachoQC + +list_of_rccs <- dir_ls(path = input_rcc_path, glob = "*.RCC") +print(list_of_rccs) + +# Core Code +## Read data +nacho_data <- load_rcc(data_directory = input_rcc_path, + ssheet_csv = input_samplesheet, + id_colname = "RCC_FILE_NAME", + normalisation_method = norm_method) + +output_base <- "./" + +get_counts <- function( + nacho, + codeclass = "Endogenous", + rownames = "RCC_FILE_NAME", + colnames = c("Name", "Accession") +) { + nacho[["nacho"]] %>% + dplyr::select(c("RCC_FILE_NAME", "Name", "Count_Norm", "CodeClass")) %>% + tidyr::pivot_wider(names_from = "RCC_FILE_NAME", values_from = "Count_Norm") +} + +## Write out normalized counts +norm_counts <- as.data.frame(get_counts(nacho_data)) +write_tsv(norm_counts, file = "normalized_counts.tsv") + +## Create non-hk normalized counts too +nacho_data_no_hk <- load_rcc(data_directory = input_rcc_path, + ssheet_csv = input_samplesheet, + id_colname = "RCC_FILE_NAME", + normalisation_method = norm_method, + housekeeping_norm = FALSE) + +## Export non-hk tables +norm_counts_without_hks <- as.data.frame(get_counts(nacho_data_no_hk)) +write_tsv(norm_counts_without_hks, file = "normalized_counts_wo_HKnorm.tsv") diff --git a/modules/nf-core/nacho/normalize/tests/main.nf.test b/modules/nf-core/nacho/normalize/tests/main.nf.test new file mode 100644 index 00000000000..ec21ec5c87f --- /dev/null +++ b/modules/nf-core/nacho/normalize/tests/main.nf.test @@ -0,0 +1,86 @@ +nextflow_process { + + name "Test Process NACHO_NORMALIZE" + script "../main.nf" + process "NACHO_NORMALIZE" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "nacho" + tag "nacho/normalize" + + test("Salmon - RCC files") { + + when { + params { + module_args = '--norm_method "GEO"' + } + process { + """ + // RCC Files: Collect from sample sheet + input[0] = + Channel.fromPath('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) + .splitCsv( header: true ) + .map { row -> return file(row.RCC_FILE, checkIfExists: true) } // Select first column: path to file + .collect() + .map{ files -> + tuple( [id: 'test'], files ) // Add meta component + } + + + // Sample sheet + input[1] = Channel.of( [ + [ id: 'test_samplesheet'], + [ file('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) ] + ] ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Salmon - RCC files - stub") { + + options "-stub" + when { + params { + module_args = '--norm_method "GEO"' + } + process { + """ + // RCC Files: Collect from sample sheet + input[0] = + Channel.fromPath('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) + .splitCsv( header: true ) + .map { row -> return file(row.RCC_FILE, checkIfExists: true) } // Select first column: path to file // Select first column: path to file + .collect() + .map{ files -> + tuple( [id: 'test'], files ) // Add meta component + } + + // Sample sheet + input[1] = + Channel.of( [ + [id: 'test_samplesheet'], + [ file('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) ] + ] ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/nacho/normalize/tests/main.nf.test.snap b/modules/nf-core/nacho/normalize/tests/main.nf.test.snap new file mode 100644 index 00000000000..f6e20e3947d --- /dev/null +++ b/modules/nf-core/nacho/normalize/tests/main.nf.test.snap @@ -0,0 +1,100 @@ +{ + "Salmon - RCC files": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "normalized_counts.tsv:md5,a0124c7a24bd04296f441d9ade82a05f" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "normalized_counts_wo_HKnorm.tsv:md5,5a2ce112c24e1b0d0f4cf3392111ef9e" + ] + ], + "2": [ + "versions.yml:md5,dbc82908e1d1fcd2429022a4f327b9ba" + ], + "normalized_counts": [ + [ + { + "id": "test" + }, + "normalized_counts.tsv:md5,a0124c7a24bd04296f441d9ade82a05f" + ] + ], + "normalized_counts_wo_HK": [ + [ + { + "id": "test" + }, + "normalized_counts_wo_HKnorm.tsv:md5,5a2ce112c24e1b0d0f4cf3392111ef9e" + ] + ], + "versions": [ + "versions.yml:md5,dbc82908e1d1fcd2429022a4f327b9ba" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-28T18:31:49.03241566" + }, + "Salmon - RCC files - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "normalized_counts.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "normalized_counts_wo_HKnorm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,dbc82908e1d1fcd2429022a4f327b9ba" + ], + "normalized_counts": [ + [ + { + "id": "test" + }, + "normalized_counts.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "normalized_counts_wo_HK": [ + [ + { + "id": "test" + }, + "normalized_counts_wo_HKnorm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,dbc82908e1d1fcd2429022a4f327b9ba" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-28T18:32:02.81614763" + } +} \ No newline at end of file diff --git a/modules/nf-core/nacho/normalize/tests/nextflow.config b/modules/nf-core/nacho/normalize/tests/nextflow.config new file mode 100644 index 00000000000..b08db067be7 --- /dev/null +++ b/modules/nf-core/nacho/normalize/tests/nextflow.config @@ -0,0 +1,7 @@ +nextflow.enable.moduleBinaries = true + +process { + withName: 'NACHO_NORMALIZE' { + ext.args = params.module_args + } +} diff --git a/modules/nf-core/nacho/qc/environment.yml b/modules/nf-core/nacho/qc/environment.yml new file mode 100644 index 00000000000..9cf652c88fe --- /dev/null +++ b/modules/nf-core/nacho/qc/environment.yml @@ -0,0 +1,12 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - conda-forge::r-dplyr=1.1.4 + - conda-forge::r-fs=1.6.4 + - conda-forge::r-ggplot2=3.4.4 + - conda-forge::r-nacho=2.0.6 + - conda-forge::r-optparse=1.7.5 + - conda-forge::r-readr=2.1.5 + - conda-forge::r-tidyr=1.3.0 diff --git a/modules/nf-core/nacho/qc/main.nf b/modules/nf-core/nacho/qc/main.nf new file mode 100644 index 00000000000..54bf2ae368e --- /dev/null +++ b/modules/nf-core/nacho/qc/main.nf @@ -0,0 +1,77 @@ +process NACHO_QC { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container 'community.wave.seqera.io/library/r-dplyr_r-fs_r-ggplot2_r-nacho_pruned:033bc017f5f36b6d' + + input: + tuple val(meta) , path(rcc_files, stageAs: "input/*") + tuple val(meta2), path(sample_sheet) + + output: + tuple val(meta), path("*.html") , emit: nacho_qc_reports + tuple val(meta), path("*_mqc.png"), emit: nacho_qc_png + tuple val(meta), path("*_mqc.txt"), emit: nacho_qc_txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + nacho_qc.R \\ + --input_rcc_path input \\ + --input_samplesheet ${sample_sheet} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + r-nacho: \$(Rscript -e "library(NACHO); cat(as.character(packageVersion('NACHO')))") + r-dplyr: \$(Rscript -e "library(dplyr); cat(as.character(packageVersion('dplyr')))") + r-ggplot2: \$(Rscript -e "library(ggplot2); cat(as.character(packageVersion('ggplot2')))") + r-tidyr: \$(Rscript -e "library(tidyr); cat(as.character(packageVersion('tidyr')))") + r-readr: \$(Rscript -e "library(readr); cat(as.character(packageVersion('readr')))") + r-fs: \$(Rscript -e "library(fs); cat(as.character(packageVersion('fs')))") + r-optparse: \$(Rscript -e "library(optparse); cat(as.character(packageVersion('optparse')))") + END_VERSIONS + """ + + stub: + """ + touch qc.html + touch qc_with_outliers.html + touch AVG_vs_BD_mqc.png + touch AVG_vs_MED_mqc.png + touch BD_mqc.png + touch FOV_mqc.png + touch HKF_mqc.png + touch HK_mqc.png + touch LOD_mqc.png + touch Neg_mqc.png + touch PCA1_vs_PCA2_mqc.png + touch PCAi_mqc.png + touch PCA_mqc.png + touch plot_normf_mqc.png + touch Posctrl_linearity_mqc.png + touch POSF_vs_NEGF_mqc.png + touch Pos_mqc.png + touch Pos_vs_neg_mqc.png + touch normalized_qc_mqc.txt + touch hk_detected_mqc.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + r-nacho: \$(Rscript -e "library(NACHO); cat(as.character(packageVersion('NACHO')))") + r-dplyr: \$(Rscript -e "library(dplyr); cat(as.character(packageVersion('dplyr')))") + r-ggplot2: \$(Rscript -e "library(ggplot2); cat(as.character(packageVersion('ggplot2')))") + r-tidyr: \$(Rscript -e "library(tidyr); cat(as.character(packageVersion('tidyr')))") + r-readr: \$(Rscript -e "library(readr); cat(as.character(packageVersion('readr')))") + r-fs: \$(Rscript -e "library(fs); cat(as.character(packageVersion('fs')))") + r-optparse: \$(Rscript -e "library(optparse); cat(as.character(packageVersion('optparse')))") + END_VERSIONS + """ +} diff --git a/modules/nf-core/nacho/qc/meta.yml b/modules/nf-core/nacho/qc/meta.yml new file mode 100644 index 00000000000..6d5aaadc6c9 --- /dev/null +++ b/modules/nf-core/nacho/qc/meta.yml @@ -0,0 +1,89 @@ +name: nacho_qc +description: | + NACHO (NAnostring quality Control dasHbOard) is developed for NanoString nCounter data. + NanoString nCounter data is a messenger-RNA/micro-RNA (mRNA/miRNA) expression assay and works with fluorescent barcodes. + Each barcode is assigned a mRNA/miRNA, which can be counted after bonding with its target. + As a result each count of a specific barcode represents the presence of its target mRNA/miRNA. +keywords: + - nacho + - nanostring + - mRNA + - miRNA + - qc +tools: + - NACHO: + description: | + R package that uses two main functions to summarize and visualize NanoString RCC files, + namely: `load_rcc()` and `visualise()`. It also includes a function `normalise()`, which (re)calculates + sample specific size factors and normalises the data. + For more information `vignette("NACHO")` and `vignette("NACHO-analysis")` + homepage: https://github.com/mcanouil/NACHO + documentation: https://cran.r-project.org/web/packages/NACHO/vignettes/NACHO.html + doi: "10.1093/bioinformatics/btz647" + licence: ["GPL-3.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - rcc_files: + type: file + description: | + List of RCC files for all samples, which are direct outputs from NanoString runs + pattern: "*.RCC" + - - meta2: + type: map + description: | + Groovy Map containing file information + e.g. [ id:'test_samplesheet' ] + - sample_sheet: + type: "file" + pattern: "*.csv" + description: | + Comma-separated file with 3 columns: RCC_FILE, RCC_FILE_NAME, and SAMPLE_ID +output: + - nacho_qc_reports: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.html": + type: file + description: | + HTML report + pattern: "*.html" + - nacho_qc_png: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*_mqc.png": + type: file + description: | + Output PNG files + pattern: "*_mqc.png" + - nacho_qc_txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*_mqc.txt": + type: file + description: | + Plain text reports + pattern: "*_mqc.txt" + - versions: + - "versions.yml": + type: file + description: | + File containing software versions + pattern: "versions.yml" +authors: + - "@alanmmobbs93" +maintainers: + - "@alanmmobbs93" diff --git a/modules/nf-core/nacho/qc/resources/usr/bin/nacho_qc.R b/modules/nf-core/nacho/qc/resources/usr/bin/nacho_qc.R new file mode 100755 index 00000000000..21d20b317b0 --- /dev/null +++ b/modules/nf-core/nacho/qc/resources/usr/bin/nacho_qc.R @@ -0,0 +1,263 @@ +#!/usr/bin/env Rscript +library(optparse) +library(dplyr) +library(ggplot2) +library(fs) +library(NACHO) +library(readr) +library(tidyr) + +# Commandline Argument parsing +option_list <- list( + make_option( + c("--input_rcc_path"), + type = "character", + default = "./" , + help = "Path to the folder that contains the RCC input file(s)", + metavar = "character"), + make_option( + c("--input_samplesheet"), + type = "character", + default = NULL , + help = "Path to the sample sheet file", + metavar = "character") +) + +opt <- parse_args(OptionParser(option_list = option_list)) + +# Validate mandatory arguments +if (is.null(opt$input_rcc_path)) { + stop("Error: The --input_rcc_path parameter is mandatory and must be specified.") +} + +if (is.null(opt$input_samplesheet)) { + stop("Error: The --input_samplesheet parameter is mandatory and must be specified.") +} + +input_rcc_path <- opt$input_rcc_path +input_samplesheet <- opt$input_samplesheet + +# Create filelist for NachoQC +list_of_rccs <- dir_ls(path = input_rcc_path, glob = "*.RCC") + +# Core Code +nacho_data <- load_rcc(data_directory = input_rcc_path, + ssheet_csv = input_samplesheet, + id_colname = "RCC_FILE_NAME") + +output_base <- "./" + +# Write out HK genes detected and add to MultiQC report as custom content +line="#id: nf-core-nanostring-hk-genes +#section_name: 'Housekeeping Genes' +#description: 'The following Housekeeping Genes have been detected in the input RCC Files:' +#plot_type: 'html' +#section_href: 'https://github.com/nf-core/nanostring' +#data: + " + +write(line,file=paste0(output_base, "hk_detected_mqc.txt"),append=TRUE) +write(nacho_data$housekeeping_genes ,paste0(output_base,"hk_detected_mqc.txt"),append=TRUE) + +# Add in all plots as MQC output for MultiQC +plot_bd <- autoplot( + object = nacho_data, + x = "BD", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="BD_mqc.png", plot_bd) + +## Field of View (FoV) Imaging + +plot_fov <- autoplot( + object = nacho_data, + x = "FoV", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="FOV_mqc.png", plot_fov) + + +## Positive Control Linearity + +plot_posctrl_lin <- autoplot( + object = nacho_data, + x = "PCL", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) + +ggsave(filename="Posctrl_linearity_mqc.png", plot_posctrl_lin) + +## Limit of Detection + +plot_lod <- autoplot( + object = nacho_data, + x = "LoD", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) + +ggsave(filename="LOD_mqc.png", plot_lod) + +## Positive Controls + +plot_pos <- autoplot( + object = nacho_data, + x = "Positive", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="Pos_mqc.png", plot_pos) + + +## Negative Controls + +plot_neg <- autoplot( + object = nacho_data, + x = "Negative", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="Neg_mqc.png", plot_neg) + +## Housekeeping Genes + +plot_hk <- autoplot( + object = nacho_data, + x = "Housekeeping", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="HK_mqc.png", plot_hk) + +## Positive Controls vs Negative Controls + +plot_pos_vs_neg <- autoplot( + object = nacho_data, + x = "PN", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="Pos_vs_neg_mqc.png", plot_pos_vs_neg) + +## Average Counts vs. Binding Density + +plot_avg_vs_bd <- autoplot( + object = nacho_data, + x = "ACBD", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="AVG_vs_BD_mqc.png", plot_avg_vs_bd) + +## Average Counts vs. Median Counts + +plot_avg_vs_med <- autoplot( + object = nacho_data, + x = "ACMC", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="AVG_vs_MED_mqc.png", plot_avg_vs_med) + +## Principal Component 1 vs. 2 + +plot_pc12 <- autoplot( + object = nacho_data, + x = "PCA12", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="PCA1_vs_PCA2_mqc.png", plot_pc12) + +## Principal Component i + +plot_pcai <- autoplot( + object = nacho_data, + x = "PCAi", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="PCAi_mqc.png", plot_pcai) + +## Principal Component planes +plot_pcap <- autoplot( + object = nacho_data, + x = "PCA", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="PCA_mqc.png", plot_pcap) + +## Positive Factor vs. Negative Factor +plot_posf_vs_negf <- autoplot( + object = nacho_data, + x = "PFNF", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="POSF_vs_NEGF_mqc.png", plot_posf_vs_negf) + +## Housekeeping Factor + +plot_hkf <- autoplot( + object = nacho_data, + x = "HF", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="HKF_mqc.png", plot_hkf) + +## Normalization Factors + +plot_normf <- autoplot( + object = nacho_data, + x = "NORM", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="plot_normf_mqc.png", plot_normf) + +# Create QC table for MultiQC Report +outliers_thresholds <- nacho_data[["outliers_thresholds"]] + +qc_table <- nacho_data[["nacho"]] %>% + select(c(RCC_FILE_NAME,BD,FoV,PCL,LoD,MC,MedC,Positive_factor,Negative_factor,House_factor)) %>% + unique() %>% + mutate("BD QC" = if_else(BD < outliers_thresholds[["BD"]][1] | BD > outliers_thresholds[["BD"]][2], "FAIL", "PASS"), .after = BD) %>% + mutate("FoV QC" = if_else(FoV < outliers_thresholds[["FoV"]], "FAIL", "PASS"), .after = FoV) %>% + mutate("PCL QC" = if_else(PCL < outliers_thresholds[["PCL"]], "FAIL", "PASS"), .after = PCL) %>% + mutate("LoD QC" = if_else(LoD < outliers_thresholds[["LoD"]], "FAIL", "PASS"), .after = LoD) %>% + mutate("PNF QC" = if_else(Positive_factor < outliers_thresholds[["Positive_factor"]][1] | Positive_factor > outliers_thresholds[["Positive_factor"]][2], "FAIL", "PASS"), .after = Positive_factor) %>% + mutate("HKNF QC" = if_else(House_factor < outliers_thresholds[["House_factor"]][1] | House_factor > outliers_thresholds[["House_factor"]][2], "FAIL", "PASS"), .after = House_factor) %>% + relocate(Negative_factor, .after = last_col()) %>% + rename("Negative Factor" = Negative_factor) %>% + rename("House Factor" = House_factor) %>% + rename("Positive Factor" = Positive_factor) %>% + rename("RCC_FILE" = RCC_FILE_NAME) + +write_tsv(qc_table ,file=paste0(output_base,"normalized_qc_mqc.txt")) + +# Render Standard Report for investigation in main MultiQC Report +render(nacho_data, output_dir = output_base, output_file = "NanoQC.html", show_outliers = FALSE) + +# Render the same Report for standard investigation, but not for MultiQC Report +render(nacho_data, output_dir = output_base, output_file = "NanoQC_with_outliers.html", show_outliers = TRUE) diff --git a/modules/nf-core/nacho/qc/tests/main.nf.test b/modules/nf-core/nacho/qc/tests/main.nf.test new file mode 100644 index 00000000000..fe4176bdaf1 --- /dev/null +++ b/modules/nf-core/nacho/qc/tests/main.nf.test @@ -0,0 +1,97 @@ +nextflow_process { + + name "Test Process NACHO_QC" + script "../main.nf" + process "NACHO_QC" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "nacho" + tag "nacho/qc" + + test("Salmon - RCC files") { + + when { + process { + """ + // RCC Files: Collect from sample sheet + input[0] = + Channel.fromPath('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) + .splitCsv( header: true ) + .map { row -> return file(row.RCC_FILE, checkIfExists: true) } // Select first column: path to file + .collect() + .map{ files -> + return tuple( [id: 'test1'], files ) // Add meta component + } + + // Sample sheet + input[1] = Channel.of( [ + [ id: 'test_samplesheet'], + [ file('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) ] + ] ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { with(process.out) { + assert nacho_qc_reports.get(0).get(1).size() == 2 + assert nacho_qc_png.get(0).get(1).size() == 16 + assert nacho_qc_txt.get(0).get(1).size() == 2 + assert snapshot( + nacho_qc_reports.get(0).get(1).collect { file(it).name }, //undeterministic .html mqc files + nacho_qc_png.get(0).get(1).collect { file(it).name }, //undeterministic .png mqc files + nacho_qc_txt.get(0).get(1), //stable .txt mqc files + versions + ).match() } + } + ) + } + } + + test("Salmon - RCC files - stub") { + + options "-stub" + when { + process { + """ + // RCC Files: Collect from sample sheet + input[0] = + Channel.fromPath('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) + .splitCsv( header: true ) + .map{ row -> return file(row.RCC_FILE, checkIfExists: true) } // Select first column: path to file + .collect() + .map{ files -> + tuple( [id: 'test_stub'], files ) // Add meta component + } + + // Sample sheet + input[1] = Channel.of( [ + [ id: 'test_samplesheet'], + [ file('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) ] + ] ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { with(process.out) { + assert nacho_qc_reports.get(0).get(1).size() == 2 + assert nacho_qc_png.get(0).get(1).size() == 16 + assert nacho_qc_txt.get(0).get(1).size() == 2 + assert snapshot( + nacho_qc_reports.get(0).get(1).collect { file(it).name }, //undeterministic .html mqc files + nacho_qc_png.get(0).get(1).collect { file(it).name }, //undeterministic .png mqc files + nacho_qc_txt.get(0).get(1), //stable .txt mqc files + versions + ).match() } + } + ) + } + } +} diff --git a/modules/nf-core/nacho/qc/tests/main.nf.test.snap b/modules/nf-core/nacho/qc/tests/main.nf.test.snap new file mode 100644 index 00000000000..296b6a7562b --- /dev/null +++ b/modules/nf-core/nacho/qc/tests/main.nf.test.snap @@ -0,0 +1,78 @@ +{ + "Salmon - RCC files": { + "content": [ + [ + "NanoQC.html", + "NanoQC_with_outliers.html" + ], + [ + "AVG_vs_BD_mqc.png", + "AVG_vs_MED_mqc.png", + "BD_mqc.png", + "FOV_mqc.png", + "HKF_mqc.png", + "HK_mqc.png", + "LOD_mqc.png", + "Neg_mqc.png", + "PCA1_vs_PCA2_mqc.png", + "PCA_mqc.png", + "PCAi_mqc.png", + "POSF_vs_NEGF_mqc.png", + "Pos_mqc.png", + "Pos_vs_neg_mqc.png", + "Posctrl_linearity_mqc.png", + "plot_normf_mqc.png" + ], + [ + "hk_detected_mqc.txt:md5,61209383acc2abf6fc3ea309b5a5e094", + "normalized_qc_mqc.txt:md5,9a0b015a28094a17331b12b08898da8e" + ], + [ + "versions.yml:md5,771de828b0a5e1f2e715fd3f62d9a9c9" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-28T14:10:45.10759212" + }, + "Salmon - RCC files - stub": { + "content": [ + [ + "qc.html", + "qc_with_outliers.html" + ], + [ + "AVG_vs_BD_mqc.png", + "AVG_vs_MED_mqc.png", + "BD_mqc.png", + "FOV_mqc.png", + "HKF_mqc.png", + "HK_mqc.png", + "LOD_mqc.png", + "Neg_mqc.png", + "PCA1_vs_PCA2_mqc.png", + "PCA_mqc.png", + "PCAi_mqc.png", + "POSF_vs_NEGF_mqc.png", + "Pos_mqc.png", + "Pos_vs_neg_mqc.png", + "Posctrl_linearity_mqc.png", + "plot_normf_mqc.png" + ], + [ + "hk_detected_mqc.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "normalized_qc_mqc.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "versions.yml:md5,771de828b0a5e1f2e715fd3f62d9a9c9" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-28T14:07:24.754188513" + } +} \ No newline at end of file diff --git a/modules/nf-core/nacho/qc/tests/nextflow.config b/modules/nf-core/nacho/qc/tests/nextflow.config new file mode 100644 index 00000000000..651f0b86a59 --- /dev/null +++ b/modules/nf-core/nacho/qc/tests/nextflow.config @@ -0,0 +1 @@ +nextflow.enable.moduleBinaries = true diff --git a/modules/nf-core/sylph/sketch/tests/main.nf.test b/modules/nf-core/sylph/sketch/tests/main.nf.test index 004f5137f69..f5e4d5887b7 100644 --- a/modules/nf-core/sylph/sketch/tests/main.nf.test +++ b/modules/nf-core/sylph/sketch/tests/main.nf.test @@ -1,5 +1,6 @@ nextflow_process { + name "Test Process SYLPH_SKETCH" script "../main.nf" process "SYLPH_SKETCH" tag "modules" diff --git a/modules/nf-core/umicollapse/tests/main.nf.test b/modules/nf-core/umicollapse/tests/main.nf.test index cc28359a667..db578775ee2 100644 --- a/modules/nf-core/umicollapse/tests/main.nf.test +++ b/modules/nf-core/umicollapse/tests/main.nf.test @@ -7,63 +7,18 @@ nextflow_process { tag "modules" tag "modules_nfcore" tag "umicollapse" - tag "umitools/extract" - tag "samtools/index" - tag "bwa/index" - tag "bwa/mem" test("umicollapse single end test") { - setup{ - run("UMITOOLS_EXTRACT"){ - script "../../umitools/extract/main.nf" - config "./nextflow_SE.config" - process{ - """ - input[0] = [ - [ id:'test', single_end:true ], // meta map - [ - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) - ] - ] - """ - } - } - - run("BWA_INDEX"){ - script "../../bwa/index/main.nf" - process{ - """ - input[0] = [[ id:'sarscov2'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] - """ - } - } - run("BWA_MEM"){ - script "../../bwa/mem/main.nf" - process{ - """ - input[0] = UMITOOLS_EXTRACT.out.reads - input[1] = BWA_INDEX.out.index - input[2] = [[ id:'sarscov2'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] - input[3] = true - """ - } - } - run("SAMTOOLS_INDEX"){ - script "../../samtools/index/main.nf" - process{ - """ - input[0] = BWA_MEM.out.bam - """ - } - } - } when { - config "./nextflow_SE.config" + config "./nextflow.config" process { """ - input[0] = BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam.bai', checkIfExists: true) + ]) input[1] = 'bam' """ } @@ -73,7 +28,7 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.bam, + bam(process.out.bam[0][1]).getSamLinesMD5(), process.out.versions).match() } ) } @@ -81,60 +36,16 @@ nextflow_process { } test("umicollapse paired tests") { - setup{ - run("UMITOOLS_EXTRACT"){ - script "../../umitools/extract/main.nf" - config "./nextflow_PE.config" - process{ - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - [ - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) - ] - ] - """ - } - } - - run("BWA_INDEX"){ - script "../../bwa/index/main.nf" - process{ - """ - input[0] = [ - [ id:'sarscov2'], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ] - """ - } - } - run("BWA_MEM"){ - script "../../bwa/mem/main.nf" - process{ - """ - input[0] = UMITOOLS_EXTRACT.out.reads - input[1] = BWA_INDEX.out.index - input[2] = [[ id:'sarscov2'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] - input[3] = true - """ - } - } - run("SAMTOOLS_INDEX"){ - script "../../samtools/index/main.nf" - process{ - """ - input[0] = BWA_MEM.out.bam - """ - } - } - } when { - config "./nextflow_PE.config" + config "./nextflow.config" process { """ - input[0] = BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) + input[0] = Channel.of([ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai', checkIfExists: true) + ]) input[1] = 'bam' """ } @@ -144,22 +55,22 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.bam, + bam(process.out.bam[0][1]).getSamLinesMD5(), process.out.versions).match() } ) } } - test("umicollapse fastq tests") { + test("umicollapse fastq test (single-end)") { when { - config "./nextflow_SE.config" + config "./nextflow.config" process { """ input[0] = [ [ id:'test', single_end:true ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test.umi_extract_single.fastq.gz', checkIfExists: true), [] ] input[1] = 'fastq' @@ -177,61 +88,76 @@ nextflow_process { } } - test("umicollapse stub tests") { - options "-stub-run" - setup{ - run("UMITOOLS_EXTRACT"){ - script "../../umitools/extract/main.nf" - config "./nextflow_PE.config" - process{ + test("umicollapse fastq test (paired-end)") { + + when { + config "./nextflow.config" + process { """ input[0] = [ - [ id:'test', single_end:false ], // meta map - [ - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) - ] + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test.umi_extract_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test.umi_extract_2.fastq.gz', checkIfExists: true) ] + input[1] = 'fastq' """ } } - run("BWA_INDEX"){ - script "../../bwa/index/main.nf" - process{ - """ - input[0] = [ - [ id:'sarscov2'], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ] - """ - } - } - run("BWA_MEM"){ - script "../../bwa/mem/main.nf" - process{ - """ - input[0] = UMITOOLS_EXTRACT.out.reads - input[1] = BWA_INDEX.out.index - input[2] = [[ id:'sarscov2'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] - input[3] = true - """ - } + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.fastq, + process.out.versions).match() } + ) } - run("SAMTOOLS_INDEX"){ - script "../../samtools/index/main.nf" - process{ - """ - input[0] = BWA_MEM.out.bam - """ - } + } + + // Stub tests + + test("umicollapse single end test - stub") { + + options "-stub" + + when { + config "./nextflow.config" + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = 'bam' + """ } } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.versions).match() } + ) + } + + } + + test("umicollapse paired tests - stub") { + + options "-stub" + when { - config "./nextflow_PE.config" + config "./nextflow.config" process { """ - input[0] = BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) + input[0] = Channel.of([ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai', checkIfExists: true) + ]) input[1] = 'bam' """ } @@ -240,7 +166,9 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.bam, + process.out.versions).match() } ) } diff --git a/modules/nf-core/umicollapse/tests/main.nf.test.snap b/modules/nf-core/umicollapse/tests/main.nf.test.snap index bf6d5f30cb4..e903c0ce054 100644 --- a/modules/nf-core/umicollapse/tests/main.nf.test.snap +++ b/modules/nf-core/umicollapse/tests/main.nf.test.snap @@ -1,13 +1,38 @@ { "umicollapse single end test": { + "content": [ + "9158ea6e7a0e54819e25cbac5fbc5cc0", + [ + "versions.yml:md5,03fdbcb1ba9bd40325ca42859d39deb1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T17:31:45.024306" + }, + "umicollapse paired tests": { + "content": [ + "b7be15ac7aae194b04bdbb56f3534495", + [ + "versions.yml:md5,03fdbcb1ba9bd40325ca42859d39deb1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T17:31:52.072799" + }, + "umicollapse fastq test (paired-end)": { "content": [ [ [ { - "id": "test", - "single_end": true + "id": "test" }, - "test.dedup.bam:md5,89e844724f73fae9e7100506d0be5775" + "test.dedup.fastq.gz:md5,721a84a2accac988d636e837c60e47bc" ] ], [ @@ -15,12 +40,12 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.8.4", + "nextflow": "24.10.1" }, - "timestamp": "2024-10-22T10:43:04.890267074" + "timestamp": "2024-11-24T13:57:36.968147" }, - "umicollapse fastq tests": { + "umicollapse fastq test (single-end)": { "content": [ [ [ @@ -28,7 +53,7 @@ "id": "test", "single_end": true }, - "test.dedup.fastq.gz:md5,c9bac08c7fd8df3e0203e3eeafc73155" + "test.dedup.fastq.gz:md5,2e602ed23eb87f434e4f0a9e491c0310" ] ], [ @@ -36,89 +61,50 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.8.4", + "nextflow": "24.10.1" }, - "timestamp": "2024-10-22T10:43:45.691571914" + "timestamp": "2024-11-24T13:57:28.328682" }, - "umicollapse stub tests": { + "umicollapse single end test - stub": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.dedup.dedup.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "test.dedup_UMICollapse.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - "versions.yml:md5,c1e0275d81b1c97a9344d216f9154996" - ], - "bam": [ - [ - { - "id": "test", - "single_end": false - }, - "test.dedup.dedup.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "fastq": [ - - ], - "log": [ - [ - { - "id": "test", - "single_end": false - }, - "test.dedup_UMICollapse.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,c1e0275d81b1c97a9344d216f9154996" + [ + [ + { + "id": "test", + "single_end": true + }, + "test.dedup.dedup.bam:md5,d41d8cd98f00b204e9800998ecf8427e" ] - } + ], + [ + "versions.yml:md5,c1e0275d81b1c97a9344d216f9154996" + ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.10.1" }, - "timestamp": "2024-01-30T10:46:12.482697713" + "timestamp": "2024-11-24T14:09:40.277318" }, - "umicollapse paired tests": { + "umicollapse paired tests - stub": { "content": [ [ [ { - "id": "test", - "single_end": false + "id": "test" }, - "test.dedup.bam:md5,3e2ae4701e3d2ca074ea878a314a3e4f" + "test.dedup.dedup.bam:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], [ - "versions.yml:md5,03fdbcb1ba9bd40325ca42859d39deb1" + "versions.yml:md5,c1e0275d81b1c97a9344d216f9154996" ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.8.4", + "nextflow": "24.10.1" }, - "timestamp": "2024-10-22T10:43:33.250587075" + "timestamp": "2024-11-24T14:09:44.224965" } } \ No newline at end of file diff --git a/modules/nf-core/umicollapse/tests/nextflow.config b/modules/nf-core/umicollapse/tests/nextflow.config index 844edbdc671..105d8e13261 100644 --- a/modules/nf-core/umicollapse/tests/nextflow.config +++ b/modules/nf-core/umicollapse/tests/nextflow.config @@ -1,8 +1,5 @@ process { - withName: UMITOOLS_EXTRACT { - ext.args = '--bc-pattern="NNNN"' - } withName: UMICOLLAPSE { ext.prefix = { "${meta.id}.dedup" } } -} \ No newline at end of file +} diff --git a/modules/nf-core/umicollapse/tests/nextflow_PE.config b/modules/nf-core/umicollapse/tests/nextflow_PE.config deleted file mode 100644 index ae4c96320e9..00000000000 --- a/modules/nf-core/umicollapse/tests/nextflow_PE.config +++ /dev/null @@ -1,10 +0,0 @@ -process { - - withName: UMITOOLS_EXTRACT { - ext.args = '--bc-pattern="NNNN" --bc-pattern2="NNNN"' - } - - withName: UMICOLLAPSE { - ext.prefix = { "${meta.id}.dedup" } - } -} diff --git a/modules/nf-core/umicollapse/tests/nextflow_SE.config b/modules/nf-core/umicollapse/tests/nextflow_SE.config deleted file mode 100644 index d4b9443652a..00000000000 --- a/modules/nf-core/umicollapse/tests/nextflow_SE.config +++ /dev/null @@ -1,10 +0,0 @@ -process { - - withName: UMITOOLS_EXTRACT { - ext.args = '--bc-pattern="NNNN"' - } - - withName: UMICOLLAPSE { - ext.prefix = { "${meta.id}.dedup" } - } -} diff --git a/modules/nf-core/umitools/dedup/tests/main.nf.test b/modules/nf-core/umitools/dedup/tests/main.nf.test index ab4455366e7..f00a8cbed7b 100644 --- a/modules/nf-core/umitools/dedup/tests/main.nf.test +++ b/modules/nf-core/umitools/dedup/tests/main.nf.test @@ -19,8 +19,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:true ], // meta map - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.sorted.bam", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam.bai", checkIfExists: true) ] input[1] = get_output_stats """ @@ -48,8 +48,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai", checkIfExists: true) ] input[1] = get_output_stats """ @@ -61,7 +61,7 @@ nextflow_process { { assert process.success }, { assert path("${process.out.log[0][1]}").exists() }, { assert snapshot( - process.out.bam, + bam(process.out.bam[0][1]).getSamLinesMD5(), process.out.versions).match() } ) } @@ -77,8 +77,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai", checkIfExists: true) ] input[1] = get_output_stats """ @@ -90,7 +90,7 @@ nextflow_process { { assert process.success }, { assert path("${process.out.log[0][1]}").exists() }, { assert snapshot( - process.out.bam, + bam(process.out.bam[0][1]).getSamLinesMD5(), process.out.tsv_edit_distance, process.out.tsv_per_umi, process.out.tsv_umi_per_position, @@ -112,8 +112,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:true ], // meta map - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.sorted.bam", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam.bai", checkIfExists: true) ] input[1] = get_output_stats """ @@ -141,8 +141,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai", checkIfExists: true) ] input[1] = get_output_stats """ @@ -170,8 +170,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai", checkIfExists: true) ] input[1] = get_output_stats """ diff --git a/modules/nf-core/umitools/dedup/tests/main.nf.test.snap b/modules/nf-core/umitools/dedup/tests/main.nf.test.snap index f7f4e94f196..04b81692a6d 100644 --- a/modules/nf-core/umitools/dedup/tests/main.nf.test.snap +++ b/modules/nf-core/umitools/dedup/tests/main.nf.test.snap @@ -37,13 +37,14 @@ }, "pe - with stats": { "content": [ + "b7be15ac7aae194b04bdbb56f3534495", [ [ { "id": "test", "single_end": false }, - "test.dedup.bam:md5,350e942a0d45e8356fa24bc8c47dc1ed" + "test.dedup_edit_distance.tsv:md5,c247a49b58768e6e2e86a6c08483e612" ] ], [ @@ -52,7 +53,7 @@ "id": "test", "single_end": false }, - "test.dedup_edit_distance.tsv:md5,65186b0964e2f8d970cc04d736d8b119" + "test.dedup_per_umi.tsv:md5,ced75f7bdbf38bf78f3137d5325a8773" ] ], [ @@ -61,16 +62,7 @@ "id": "test", "single_end": false }, - "test.dedup_per_umi.tsv:md5,8e6783a4a79437b095f095f2aefe7c01" - ] - ], - [ - [ - { - "id": "test", - "single_end": false - }, - "test.dedup_per_umi_per_position.tsv:md5,9386db4a104b8e4e32f3ca4a84efa4ac" + "test.dedup_per_umi_per_position.tsv:md5,2e1a12e6f720510880068deddeefe063" ] ], [ @@ -79,9 +71,9 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nextflow": "24.10.1" }, - "timestamp": "2024-07-03T11:27:24.231325" + "timestamp": "2024-11-25T17:25:28.939957" }, "se - no stats - stub": { "content": [ @@ -103,36 +95,28 @@ }, "se - no stats": { "content": [ - "a114abd9fccce6fe2869852b5cd18964", + "9158ea6e7a0e54819e25cbac5fbc5cc0", [ "versions.yml:md5,e2f5146464c09bf7ae98c85ea5410e50" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nextflow": "24.10.1" }, - "timestamp": "2024-07-03T13:45:48.553561" + "timestamp": "2024-11-23T09:06:54.373171" }, "pe - no stats": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test.dedup.bam:md5,350e942a0d45e8356fa24bc8c47dc1ed" - ] - ], + "b7be15ac7aae194b04bdbb56f3534495", [ "versions.yml:md5,e2f5146464c09bf7ae98c85ea5410e50" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nextflow": "24.10.1" }, - "timestamp": "2024-07-03T11:27:06.957467" + "timestamp": "2024-11-25T17:24:51.423637" } } \ No newline at end of file diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test index dd7f2371869..f4f14c71b9c 100644 --- a/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test @@ -22,55 +22,6 @@ nextflow_workflow { test("sarscov2_bam_bai") { - setup{ - run("UMITOOLS_EXTRACT"){ - script "../../../../modules/nf-core/umitools/extract/main.nf" - config "./paired-end-umis.config" - process{ - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - [ - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) - ] - ] - """ - } - } - - run("BWA_INDEX"){ - script "../../../../modules/nf-core/bwa/index/main.nf" - process{ - """ - input[0] = [ - [ id:'sarscov2'], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ] - """ - } - } - run("BWA_MEM"){ - script "../../../../modules/nf-core/bwa/mem/main.nf" - process{ - """ - input[0] = UMITOOLS_EXTRACT.out.reads - input[1] = BWA_INDEX.out.index - input[2] = [[ id:'sarscov2'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] - input[3] = true - """ - } - } - run("SAMTOOLS_INDEX"){ - script "../../../../modules/nf-core/samtools/index/main.nf" - process{ - """ - input[0] = BWA_MEM.out.bam - """ - } - } - } - when { config "./paired-end-umis.config" params { @@ -78,9 +29,11 @@ nextflow_workflow { } workflow { """ - - input[0] = BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) - + input[0] = Channel.of([ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai', checkIfExists: true) + ]) """ } } @@ -88,7 +41,8 @@ nextflow_workflow { then { assertAll( { assert workflow.success}, - { assert snapshot(workflow.out.bam, workflow.out.versions).match() }, + { assert snapshot(bam(workflow.out.bam[0][1]).getSamLinesMD5()).match("test_bam_dedup_stats_samtools_umicollapse_bam")}, // separate, because of linting error otherwise + { assert snapshot(workflow.out.versions).match("test_bam_dedup_stats_samtools_umicollapse_versions") }, { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, { assert workflow.out.dedup_stats.get(0).get(1) ==~ ".*_UMICollapse.log"}, diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test.snap b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test.snap index f2b1fb20d9a..9694c6d5f68 100644 --- a/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test.snap +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test.snap @@ -4,69 +4,57 @@ [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.stats:md5,498621f92e86d55e4f7ae93170e6e733" ] ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.8.4", + "nextflow": "24.10.1" }, - "timestamp": "2024-09-16T08:04:02.179870196" + "timestamp": "2024-11-24T13:57:02.323104" }, - "test_bam_dedup_stats_samtools_umicollapse_flagstats": { + "test_bam_dedup_stats_samtools_umicollapse_versions": { "content": [ [ - [ - { - "id": "test", - "single_end": false - }, - "test.flagstat:md5,18d602435a02a4d721b78d1812622159" - ] + "versions.yml:md5,20605eb79c410c0ed179ba660d82f75b", + "versions.yml:md5,23617661d2c899996bee2b05db027e25", + "versions.yml:md5,657bce03545b4c57f9c5fc4314bf85f7", + "versions.yml:md5,e02a62a393a833778e16542eeed0d148", + "versions.yml:md5,ef00762e264b99ac45713dc0dedf4060" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.10.1" }, - "timestamp": "2024-04-09T17:05:48.69612524" + "timestamp": "2024-11-25T18:39:15.637444" }, - "sarscov2_bam_bai": { + "test_bam_dedup_stats_samtools_umicollapse_flagstats": { "content": [ [ [ { - "id": "test", - "single_end": false + "id": "test" }, - "test.dedup.bam:md5,3e2ae4701e3d2ca074ea878a314a3e4f" + "test.flagstat:md5,18d602435a02a4d721b78d1812622159" ] - ], - [ - "versions.yml:md5,20605eb79c410c0ed179ba660d82f75b", - "versions.yml:md5,23617661d2c899996bee2b05db027e25", - "versions.yml:md5,657bce03545b4c57f9c5fc4314bf85f7", - "versions.yml:md5,e02a62a393a833778e16542eeed0d148", - "versions.yml:md5,ef00762e264b99ac45713dc0dedf4060" ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.8.4", + "nextflow": "24.10.1" }, - "timestamp": "2024-10-22T10:44:38.266860983" + "timestamp": "2024-11-24T13:57:02.366866" }, "test_bam_dedup_stats_samtools_umicollapse_idxstats": { "content": [ [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.idxstats:md5,85d20a901eef23ca50c323638a2eb602" ] @@ -74,8 +62,18 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-24T13:57:02.410712" + }, + "test_bam_dedup_stats_samtools_umicollapse_bam": { + "content": [ + "b7be15ac7aae194b04bdbb56f3534495" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.1" }, - "timestamp": "2024-04-09T17:05:48.740441747" + "timestamp": "2024-11-25T18:39:15.613319" } } \ No newline at end of file diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/paired-end-umis.config b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/paired-end-umis.config index 602c026f058..8d58a861f4b 100644 --- a/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/paired-end-umis.config +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/paired-end-umis.config @@ -1,10 +1,6 @@ process { - withName: UMITOOLS_EXTRACT { - ext.args = '--bc-pattern="NNNN" --bc-pattern2="NNNN"' - } - withName: UMICOLLAPSE { ext.prefix = { "${meta.id}.dedup" } } -} \ No newline at end of file +} diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test index 9d38022b4bf..93e62485764 100644 --- a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test @@ -26,8 +26,8 @@ nextflow_workflow { input[0] = Channel.of([ [ id:'test'], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai', checkIfExists: true) ]) input[1] = val_get_dedup_stats """ @@ -41,6 +41,7 @@ nextflow_workflow { { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, { assert snapshot( + bam(workflow.out.bam[0][1]).getSamLinesMD5(), workflow.out.stats, workflow.out.flagstat, workflow.out.idxstats, @@ -61,8 +62,8 @@ nextflow_workflow { input[0] = Channel.of([ [ id:'test'], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai', checkIfExists: true) ]) input[1] = val_get_dedup_stats """ diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test.snap index d39f9129b9a..3b36135720b 100644 --- a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test.snap +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test.snap @@ -1,12 +1,13 @@ { "sarscov2_bam_bai": { "content": [ + "b7be15ac7aae194b04bdbb56f3534495", [ [ { "id": "test" }, - "test.stats:md5,84891a894010aeb882c4092db9248d2c" + "test.stats:md5,41ba57a9b90b54587e7d154e5405ea5e" ] ], [ @@ -14,7 +15,7 @@ { "id": "test" }, - "test.flagstat:md5,0bb716e40fae381b97484b58e0b16efe" + "test.flagstat:md5,18d602435a02a4d721b78d1812622159" ] ], [ @@ -22,7 +23,7 @@ { "id": "test" }, - "test.idxstats:md5,1adb27b52d4d64b826f48b59d61dcd4d" + "test.idxstats:md5,85d20a901eef23ca50c323638a2eb602" ] ], [ @@ -34,10 +35,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.8.4", + "nextflow": "24.10.1" }, - "timestamp": "2024-09-16T08:04:23.444693448" + "timestamp": "2024-11-25T17:23:13.841219" }, "sarscov2_bam_bai - stub": { "content": [ diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/main.nf b/subworkflows/nf-core/fastq_align_dedup_bwameth/main.nf new file mode 100644 index 00000000000..c0cc67b83c7 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/main.nf @@ -0,0 +1,163 @@ +include { BWAMETH_ALIGN } from '../../../modules/nf-core/bwameth/align/main' +include { PARABRICKS_FQ2BAMMETH } from '../../../modules/nf-core/parabricks/fq2bammeth/main' +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_ALIGNMENTS } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main' +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' +include { PICARD_MARKDUPLICATES } from '../../../modules/nf-core/picard/markduplicates/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_DEDUPLICATED } from '../../../modules/nf-core/samtools/index/main' +include { METHYLDACKEL_EXTRACT } from '../../../modules/nf-core/methyldackel/extract/main' +include { METHYLDACKEL_MBIAS } from '../../../modules/nf-core/methyldackel/mbias/main' + +workflow FASTQ_ALIGN_DEDUP_BWAMETH { + + take: + ch_reads // channel: [ val(meta), [ reads ] ] + ch_fasta // channel: [ val(meta), [ fasta ] ] + ch_fasta_index // channel: [ val(meta), [ fasta index ] ] + ch_bwameth_index // channel: [ val(meta), [ bwameth index ] ] + skip_deduplication // boolean: whether to deduplicate alignments + + main: + + ch_alignment = Channel.empty() + ch_alignment_index = Channel.empty() + ch_samtools_flagstat = Channel.empty() + ch_samtools_stats = Channel.empty() + ch_methydackel_extract_bedgraph = Channel.empty() + ch_methydackel_extract_methylkit = Channel.empty() + ch_methydackel_mbias = Channel.empty() + ch_picard_metrics = Channel.empty() + ch_multiqc_files = Channel.empty() + ch_versions = Channel.empty() + + /* + * Align with bwameth + */ + if (params.use_gpu) { + /* + * Align with parabricks GPU enabled fq2bammeth implementation of bwameth + */ + PARABRICKS_FQ2BAMMETH ( + ch_reads, + ch_fasta, + ch_bwameth_index, + [] // known sites + ) + ch_alignment = PARABRICKS_FQ2BAMMETH.out.bam + ch_versions = ch_versions.mix(PARABRICKS_FQ2BAMMETH.out.versions) + } else { + /* + * Align with CPU version of bwameth + */ + BWAMETH_ALIGN ( + ch_reads, + ch_fasta, + ch_bwameth_index + ) + ch_alignment = BWAMETH_ALIGN.out.bam + ch_versions = BWAMETH_ALIGN.out.versions + } + + /* + * Sort raw output BAM + */ + SAMTOOLS_SORT ( + ch_alignment, + [[:],[]] // [ [meta], [fasta]] + ) + ch_alignment = SAMTOOLS_SORT.out.bam + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions) + + /* + * Run samtools index on alignment + */ + SAMTOOLS_INDEX_ALIGNMENTS ( + ch_alignment + ) + ch_alignment_index = SAMTOOLS_INDEX_ALIGNMENTS.out.bai + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_ALIGNMENTS.out.versions) + + /* + * Run samtools flagstat + */ + SAMTOOLS_FLAGSTAT ( + ch_alignment.join(ch_alignment_index) + ) + ch_samtools_flagstat = SAMTOOLS_FLAGSTAT.out.flagstat + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) + + /* + * Run samtools stats + */ + SAMTOOLS_STATS ( + ch_alignment.join(ch_alignment_index), + [[:],[]] // [ [meta], [fasta]] + ) + ch_samtools_stats = SAMTOOLS_STATS.out.stats + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) + + if (!skip_deduplication) { + /* + * Run Picard MarkDuplicates + */ + PICARD_MARKDUPLICATES ( + ch_alignment, + ch_fasta, + ch_fasta_index + ) + /* + * Run samtools index on deduplicated alignment + */ + SAMTOOLS_INDEX_DEDUPLICATED ( + PICARD_MARKDUPLICATES.out.bam + ) + ch_alignment = PICARD_MARKDUPLICATES.out.bam + ch_alignment_index = SAMTOOLS_INDEX_DEDUPLICATED.out.bai + ch_picard_metrics = PICARD_MARKDUPLICATES.out.metrics + ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_DEDUPLICATED.out.versions) + } + + /* + * Extract per-base methylation and plot methylation bias + */ + + METHYLDACKEL_EXTRACT ( + ch_alignment.join(ch_alignment_index), + ch_fasta.map{ meta, fasta_file -> fasta_file }, + ch_fasta_index.map{ meta, fasta_index -> fasta_index } + ) + ch_methydackel_extract_bedgraph = METHYLDACKEL_EXTRACT.out.bedgraph + ch_methydackel_extract_methylkit = METHYLDACKEL_EXTRACT.out.methylkit + ch_versions = ch_versions.mix(METHYLDACKEL_EXTRACT.out.versions) + + METHYLDACKEL_MBIAS ( + ch_alignment.join(ch_alignment_index), + ch_fasta.map{ meta, fasta_file -> fasta_file }, + ch_fasta_index.map{ meta, fasta_index -> fasta_index } + ) + ch_methydackel_mbias = METHYLDACKEL_MBIAS.out.txt + ch_versions = ch_versions.mix(METHYLDACKEL_MBIAS.out.versions) + + /* + * Collect MultiQC inputs + */ + ch_multiqc_files = ch_picard_metrics.collect{ meta, metrics -> metrics } + .mix(ch_samtools_flagstat.collect{ meta, flagstat -> flagstat }) + .mix(ch_samtools_stats.collect{ meta, stats -> stats }) + .mix(ch_methydackel_extract_bedgraph.collect{ meta, bedgraph -> bedgraph }) + .mix(ch_methydackel_mbias.collect{ meta, txt -> txt }) + + emit: + bam = ch_alignment // channel: [ val(meta), [ bam ] ] + bai = ch_alignment_index // channel: [ val(meta), [ bai ] ] + samtools_flagstat = ch_samtools_flagstat // channel: [ val(meta), [ flagstat ] ] + samtools_stats = ch_samtools_stats // channel: [ val(meta), [ stats ] ] + methydackel_extract_bedgraph = ch_methydackel_extract_bedgraph // channel: [ val(meta), [ bedgraph ] ] + methydackel_extract_methylkit = ch_methydackel_extract_methylkit // channel: [ val(meta), [ methylkit ] ] + methydackel_mbias = ch_methydackel_mbias // channel: [ val(meta), [ mbias ] ] + picard_metrics = ch_picard_metrics // channel: [ val(meta), [ metrics ] ] + multiqc = ch_multiqc_files // channel: [ *{html,txt} ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/meta.yml b/subworkflows/nf-core/fastq_align_dedup_bwameth/meta.yml new file mode 100644 index 00000000000..a66ea024e84 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/meta.yml @@ -0,0 +1,116 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fastq_align_dedup_bwameth" +description: Performs alignment of BS-Seq reads using bwameth or parabricks/fq2bammeth, sort and deduplicate +keywords: + - bwameth + - alignment + - 3-letter genome + - map + - methylation + - 5mC + - methylseq + - bisulphite + - bisulfite + - fastq + - bam +components: + - bwameth/align + - parabricks/fq2bammeth + - samtools/sort + - samtools/index + - samtools/flagstat + - samtools/stats + - picard/markduplicates + - samtools/index + - methyldackel/extract + - methyldackel/mbias +input: + - ch_reads: + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + Structure: [ val(meta), [ path(reads) ] ] + pattern: "*.{fastq,fastq.gz}" + - ch_fasta: + type: file + description: | + Structure: [ val(meta), path(fasta) ] + pattern: "*.{fa,fa.gz}" + - ch_fasta_index: + type: file + description: | + Structure: [ val(meta), path(fasta index) ] + - ch_bwameth_index: + description: | + Bismark genome index files + Structure: [ val(meta), path(index) ] + pattern: "BismarkIndex" + - skip_deduplication: + type: boolean + description: | + Skip deduplication of aligned reads +output: + - bam: + type: file + description: | + Channel containing BAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.bam" + - bai: + type: file + description: | + Channel containing indexed BAM (BAI) files + Structure: [ val(meta), path(bai) ] + pattern: "*.bai" + - samtools_flagstat: + type: file + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + pattern: "*.flagstat" + - samtools_stats: + type: file + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + pattern: "*.{stats}" + - methydackel_extract_bedgraph: + type: file + description: | + bedGraph file, containing per-base methylation metrics + Structure: [ val(meta), path(bedgraph) ] + pattern: "*.bedGraph" + - methydackel_extract_methylkit: + type: file + description: | + methylKit file, containing per-base methylation metrics + Structure: [ val(meta), path(methylKit) ] + pattern: "*.methylKit" + - methydackel_mbias: + type: file + description: | + Text file containing methylation bias + Structure: [ val(meta), path(mbias) ] + pattern: "*.{txt}" + - picard_metrics: + type: file + description: | + Duplicate metrics file generated by picard + Structure: [ val(meta), path(metrics) ] + pattern: "*.{metrics.txt}" + - multiqc: + type: file + description: | + Channel containing MultiQC report aggregating results across samples. + Structure: [ val(meta), path(multiqc_report.html) ] + pattern: "*.html" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@sateeshperi" +maintainers: + - "@sateeshperi" diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test new file mode 100644 index 00000000000..b156de6bc47 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test @@ -0,0 +1,132 @@ +nextflow_workflow { + + name "Test Subworkflow FASTQ_ALIGN_DEDUP_BWAMETH" + script "../main.nf" + workflow "FASTQ_ALIGN_DEDUP_BWAMETH" + config "./nextflow.config" + + tag "gpu" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_align_dedup_bwameth" + tag "bwameth/align" + tag "parabricks/fq2bammeth" + tag "samtools/sort" + tag "samtools/index" + tag "samtools/flagstat" + tag "samtools/stats" + tag "picard/markduplicates" + tag "samtools/index" + tag "methyldackel/extract" + tag "methyldackel/mbias" + tag "untar" + + setup { + run("UNTAR") { + script "../../../../modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/Bwameth_Index.tar.gz', checkIfExists: true) + ] + """ + } + } + } + + test("Params: parabricks/fq2bammeth single-end | use_gpu") { + + when { + params { + skip_deduplication = false + use_gpu = true + } + + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', checkIfExists: true) + ]) + input[1] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true) + ]) + input[2] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa.fai', checkIfExists: true) + ]) + input[3] = UNTAR.out.untar + input[4] = params.skip_deduplication + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.samtools_flagstat, + workflow.out.samtools_stats, + workflow.out.methydackel_extract_bedgraph, + workflow.out.methydackel_extract_methylkit, + workflow.out.methydackel_mbias, + workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name }, + workflow.out.multiqc.flatten().collect { path -> file(path).name }, + workflow.out.versions + ).match() } + ) + } + } + + test("Params: parabricks/fq2bammeth single-end | use_gpu | skip_deduplication") { + + when { + params { + skip_deduplication = true + use_gpu = true + } + + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', checkIfExists: true) + ]) + input[1] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true) + ]) + input[2] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa.fai', checkIfExists: true) + ]) + input[3] = UNTAR.out.untar + input[4] = params.skip_deduplication + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.samtools_flagstat, + workflow.out.samtools_stats, + workflow.out.methydackel_extract_bedgraph, + workflow.out.methydackel_extract_methylkit, + workflow.out.methydackel_mbias, + workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name }, + workflow.out.multiqc.flatten().collect { path -> file(path).name }, + workflow.out.versions + ).match() } + ) + } + } + +} diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test.snap b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test.snap new file mode 100644 index 00000000000..a1602b3074a --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test.snap @@ -0,0 +1,149 @@ +{ + "Params: parabricks/fq2bammeth single-end | use_gpu": { + "content": [ + [ + "a7f7ca7b5eb503ab58790d64a0273ed6" + ], + [ + "test.markdup.sorted.bam.bai" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,897d500a710a56a7098172167fa71108" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,9aac964b859fda8239aa0eae16382d56" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.markdup.sorted_CpG.bedGraph:md5,f2fe02f180456f5f4922a2a8aa559fca" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.mbias.txt:md5,fce04d733e066d0b933cedc602e2af81" + ] + ], + [ + "test.markdup.sorted.MarkDuplicates.metrics.txt" + ], + [ + "test.flagstat", + "test.markdup.sorted.MarkDuplicates.metrics.txt", + "test.markdup.sorted_CpG.bedGraph", + "test.mbias.txt", + "test.stats" + ], + [ + "versions.yml:md5,36bd052d24ec766084f6aa2fb8a6ae4c", + "versions.yml:md5,45239309d0c40b5f0a56eba4347f09be", + "versions.yml:md5,4a6bb9a47d944ab197c823ae0ae61092", + "versions.yml:md5,8b72c7013fa6f632d28933b60ad1f2ea", + "versions.yml:md5,a80a57d29a4d72830f033bc0326b1abf", + "versions.yml:md5,b6492c12bfae23b6e279f4abfd4780e5", + "versions.yml:md5,baba90d5bd57679b913be2abd531ae15", + "versions.yml:md5,ddbe480ff81df55c6d95f911e7b6dc8a", + "versions.yml:md5,e9602257141b65a907ad9036e8a32a83" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-17T06:56:47.211389" + }, + "Params: parabricks/fq2bammeth single-end | use_gpu | skip_deduplication": { + "content": [ + [ + "a7f7ca7b5eb503ab58790d64a0273ed6" + ], + [ + "test.sorted.bam.bai" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,897d500a710a56a7098172167fa71108" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,9aac964b859fda8239aa0eae16382d56" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted_CpG.bedGraph:md5,b0cb426020f8beb45b4e8f09b9a17bfa" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.mbias.txt:md5,fce04d733e066d0b933cedc602e2af81" + ] + ], + [ + + ], + [ + "test.flagstat", + "test.mbias.txt", + "test.sorted_CpG.bedGraph", + "test.stats" + ], + [ + "versions.yml:md5,36bd052d24ec766084f6aa2fb8a6ae4c", + "versions.yml:md5,4a6bb9a47d944ab197c823ae0ae61092", + "versions.yml:md5,8b72c7013fa6f632d28933b60ad1f2ea", + "versions.yml:md5,a80a57d29a4d72830f033bc0326b1abf", + "versions.yml:md5,b6492c12bfae23b6e279f4abfd4780e5", + "versions.yml:md5,baba90d5bd57679b913be2abd531ae15", + "versions.yml:md5,e9602257141b65a907ad9036e8a32a83" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-17T06:57:34.41912" + } +} diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test new file mode 100644 index 00000000000..6b20d545684 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test @@ -0,0 +1,179 @@ +nextflow_workflow { + + name "Test Subworkflow FASTQ_ALIGN_DEDUP_BWAMETH" + script "../main.nf" + workflow "FASTQ_ALIGN_DEDUP_BWAMETH" + config "./nextflow.config" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_align_dedup_bwameth" + tag "bwameth/align" + tag "parabricks/fq2bammeth" + tag "samtools/sort" + tag "samtools/index" + tag "samtools/flagstat" + tag "samtools/stats" + tag "picard/markduplicates" + tag "samtools/index" + tag "methyldackel/extract" + tag "methyldackel/mbias" + tag "untar" + + setup { + run("UNTAR") { + script "../../../../modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/Bwameth_Index.tar.gz', checkIfExists: true) + ] + """ + } + } + } + + test("Params: bwameth single-end | default") { + + when { + params { + skip_deduplication = false + use_gpu = false + } + + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', checkIfExists: true) + ]) + input[1] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true) + ]) + input[2] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa.fai', checkIfExists: true) + ]) + input[3] = UNTAR.out.untar + input[4] = params.skip_deduplication + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.samtools_flagstat, + workflow.out.samtools_stats, + workflow.out.methydackel_extract_bedgraph, + workflow.out.methydackel_extract_methylkit, + workflow.out.methydackel_mbias, + workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name }, + workflow.out.multiqc.flatten().collect { path -> file(path).name }, + workflow.out.versions + ).match() } + ) + } + } + + test("Params: bwameth paired-end | default") { + + when { + params { + skip_deduplication = false + use_gpu = false + } + + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/Ecoli_10K_methylated_R1.fastq.gz', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/Ecoli_10K_methylated_R2.fastq.gz', checkIfExists: true) + ]) + input[1] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true) + ]) + input[2] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa.fai', checkIfExists: true) + ]) + input[3] = UNTAR.out.untar + input[4] = params.skip_deduplication + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.samtools_flagstat, + workflow.out.samtools_stats, + workflow.out.methydackel_extract_bedgraph, + workflow.out.methydackel_extract_methylkit, + workflow.out.methydackel_mbias, + workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name }, + workflow.out.multiqc.flatten().collect { path -> file(path).name }, + workflow.out.versions + ).match() } + ) + } + } + + test("Params: bwameth paired-end | skip_deduplication") { + + when { + params { + skip_deduplication = true + use_gpu = false + } + + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/Ecoli_10K_methylated_R1.fastq.gz', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/Ecoli_10K_methylated_R2.fastq.gz', checkIfExists: true) + ]) + input[1] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true) + ]) + input[2] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa.fai', checkIfExists: true) + ]) + input[3] = UNTAR.out.untar + input[4] = params.skip_deduplication + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.samtools_flagstat, + workflow.out.samtools_stats, + workflow.out.methydackel_extract_bedgraph, + workflow.out.methydackel_extract_methylkit, + workflow.out.methydackel_mbias, + workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name }, + workflow.out.multiqc.flatten().collect { path -> file(path).name }, + workflow.out.versions + ).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test.snap new file mode 100644 index 00000000000..90c9601506b --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test.snap @@ -0,0 +1,224 @@ +{ + "Params: bwameth single-end | default": { + "content": [ + [ + "37ec1c6338cc3fee7ab1cb2d48dba38" + ], + [ + "test.markdup.sorted.bam.bai" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,897d500a710a56a7098172167fa71108" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,9aac964b859fda8239aa0eae16382d56" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.markdup.sorted_CpG.bedGraph:md5,f2fe02f180456f5f4922a2a8aa559fca" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.mbias.txt:md5,fce04d733e066d0b933cedc602e2af81" + ] + ], + [ + "test.markdup.sorted.MarkDuplicates.metrics.txt" + ], + [ + "test.flagstat", + "test.markdup.sorted.MarkDuplicates.metrics.txt", + "test.markdup.sorted_CpG.bedGraph", + "test.mbias.txt", + "test.stats" + ], + [ + "versions.yml:md5,36bd052d24ec766084f6aa2fb8a6ae4c", + "versions.yml:md5,45239309d0c40b5f0a56eba4347f09be", + "versions.yml:md5,4a6bb9a47d944ab197c823ae0ae61092", + "versions.yml:md5,8b72c7013fa6f632d28933b60ad1f2ea", + "versions.yml:md5,8edf3166176c863b88ba488f8b715aa3", + "versions.yml:md5,a80a57d29a4d72830f033bc0326b1abf", + "versions.yml:md5,b6492c12bfae23b6e279f4abfd4780e5", + "versions.yml:md5,baba90d5bd57679b913be2abd531ae15", + "versions.yml:md5,ddbe480ff81df55c6d95f911e7b6dc8a" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-17T05:42:12.81856451" + }, + "Params: bwameth paired-end | skip_deduplication": { + "content": [ + [ + "cf25656fffc044f2bb7d9f1b3686ecb4" + ], + [ + "test.sorted.bam.bai" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,4ff87d121ca174953734723938c99081" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,c753c72eb4e1c32f74afb1fbd932fe1f" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted_CpG.bedGraph:md5,285e492823182f5705bf0817e2d088b8" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.mbias.txt:md5,c1fda203c1b19aca2498efe0fd4cc9e3" + ] + ], + [ + + ], + [ + "test.flagstat", + "test.mbias.txt", + "test.sorted_CpG.bedGraph", + "test.stats" + ], + [ + "versions.yml:md5,36bd052d24ec766084f6aa2fb8a6ae4c", + "versions.yml:md5,4a6bb9a47d944ab197c823ae0ae61092", + "versions.yml:md5,8b72c7013fa6f632d28933b60ad1f2ea", + "versions.yml:md5,8edf3166176c863b88ba488f8b715aa3", + "versions.yml:md5,a80a57d29a4d72830f033bc0326b1abf", + "versions.yml:md5,b6492c12bfae23b6e279f4abfd4780e5", + "versions.yml:md5,baba90d5bd57679b913be2abd531ae15" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-17T05:10:43.907134648" + }, + "Params: bwameth paired-end | default": { + "content": [ + [ + "cf25656fffc044f2bb7d9f1b3686ecb4" + ], + [ + "test.markdup.sorted.bam.bai" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,4ff87d121ca174953734723938c99081" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,c753c72eb4e1c32f74afb1fbd932fe1f" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.markdup.sorted_CpG.bedGraph:md5,c6c73e5abba70ac799500f592fec5c29" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.mbias.txt:md5,c1fda203c1b19aca2498efe0fd4cc9e3" + ] + ], + [ + "test.markdup.sorted.MarkDuplicates.metrics.txt" + ], + [ + "test.flagstat", + "test.markdup.sorted.MarkDuplicates.metrics.txt", + "test.markdup.sorted_CpG.bedGraph", + "test.mbias.txt", + "test.stats" + ], + [ + "versions.yml:md5,36bd052d24ec766084f6aa2fb8a6ae4c", + "versions.yml:md5,45239309d0c40b5f0a56eba4347f09be", + "versions.yml:md5,4a6bb9a47d944ab197c823ae0ae61092", + "versions.yml:md5,8b72c7013fa6f632d28933b60ad1f2ea", + "versions.yml:md5,8edf3166176c863b88ba488f8b715aa3", + "versions.yml:md5,a80a57d29a4d72830f033bc0326b1abf", + "versions.yml:md5,b6492c12bfae23b6e279f4abfd4780e5", + "versions.yml:md5,baba90d5bd57679b913be2abd531ae15", + "versions.yml:md5,ddbe480ff81df55c6d95f911e7b6dc8a" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-17T05:42:39.183331191" + } +} diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/nextflow.config b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/nextflow.config new file mode 100644 index 00000000000..55385ec0621 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/nextflow.config @@ -0,0 +1,14 @@ +process { + withName: 'PARABRICKS_FQ2BAMMETH' { + ext.args = '--low-memory' + } + + withName: 'SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.sorted" } + } + + withName: 'PICARD_MARKDUPLICATES' { + ext.args = "--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --PROGRAM_RECORD_ID 'null' --TMP_DIR tmp" + ext.prefix = { "${meta.id}.markdup.sorted" } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index 4cd33626fb1..228dbff8979 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -56,21 +56,6 @@ def checkProfileProvided(nextflow_cli_args) { } } -// -// Citation string for pipeline -// -def workflowCitation() { - def temp_doi_ref = "" - def manifest_doi = workflow.manifest.doi.tokenize(",") - // Handling multiple DOIs - // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers - // Removing ` ` since the manifest.doi is a string and not a proper list - manifest_doi.each { doi_ref -> - temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" - } - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + temp_doi_ref + "\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" -} - // // Generate workflow version string // @@ -150,33 +135,6 @@ def paramsSummaryMultiqc(summary_params) { return yaml_file_text } -// -// nf-core logo -// -def nfCoreLogo(monochrome_logs=true) { - def colors = logColours(monochrome_logs) as Map - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) -} - -// -// Return dashed line -// -def dashedLine(monochrome_logs=true) { - def colors = logColours(monochrome_logs) as Map - return "-${colors.dim}----------------------------------------------------${colors.reset}-" -} - // // ANSII colours used for terminal logging // @@ -341,7 +299,7 @@ def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdi def email_html = html_template.toString() // Render the sendmail template - def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as MemoryUnit def smail_fields = [email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test index 1dc317f8f7b..e43d208b1b0 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -41,58 +41,6 @@ nextflow_function { } } - test("Test Function workflowCitation") { - - function "workflowCitation" - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - - test("Test Function nfCoreLogo") { - - function "nfCoreLogo" - - when { - function { - """ - input[0] = false - """ - } - } - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - - test("Test Function dashedLine") { - - function "dashedLine" - - when { - function { - """ - input[0] = false - """ - } - } - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - test("Test Function without logColours") { function "logColours" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap index 1037232c9e4..02c67014139 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -17,26 +17,6 @@ }, "timestamp": "2024-02-28T12:02:59.729647" }, - "Test Function nfCoreLogo": { - "content": [ - "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:10.562934" - }, - "Test Function workflowCitation": { - "content": [ - "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:07.019761" - }, "Test Function without logColours": { "content": [ { @@ -95,16 +75,6 @@ }, "timestamp": "2024-02-28T12:03:17.969323" }, - "Test Function dashedLine": { - "content": [ - "-\u001b[2m----------------------------------------------------\u001b[0m-" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:14.366181" - }, "Test Function with logColours": { "content": [ {