From caab1314ca62679b629da4c79afa9a4cab2bb8ee Mon Sep 17 00:00:00 2001 From: Louis LE NEZET <58640615+LouisLeNezet@users.noreply.github.com> Date: Thu, 28 Nov 2024 15:35:19 +0100 Subject: [PATCH 01/12] Add compressed output support for `GAWK` (#7105) * Update gawk for compressed output * Update nf-test and linting --------- Co-authored-by: LouisLeNezet --- .github/workflows/lint.yml | 1 - modules/nf-core/gawk/main.nf | 6 ++ modules/nf-core/gawk/tests/main.nf.test | 77 +++++++++++++++++-- modules/nf-core/gawk/tests/main.nf.test.snap | 35 ++++++++- modules/nf-core/gawk/tests/nextflow.config | 4 +- .../tests/nextflow_with_program_file.config | 5 -- 6 files changed, 111 insertions(+), 17 deletions(-) delete mode 100644 modules/nf-core/gawk/tests/nextflow_with_program_file.config diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 72fa5678035..69963507b93 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -80,7 +80,6 @@ jobs: script: | return [...new Set(${{ steps.filter.outputs.modules_files }} .map(path => path - .replace('tests/', '') .replace('modules/nf-core/', '') .split('/') .slice(0, 2) diff --git a/modules/nf-core/gawk/main.nf b/modules/nf-core/gawk/main.nf index 7514246eeb8..b9df2b8c5d0 100644 --- a/modules/nf-core/gawk/main.nf +++ b/modules/nf-core/gawk/main.nf @@ -28,6 +28,11 @@ process GAWK { lst_gz = input.collect{ it.getExtension().endsWith("gz") } unzip = lst_gz.contains(false) ? "" : "find ${input} -exec zcat {} \\; | \\" input_cmd = unzip ? "" : "${input}" + output_cmd = suffix.endsWith("gz") ? "| gzip" : "" + + input.collect{ + assert it.name != "${prefix}.${suffix}" : "Input and output names are the same, set prefix in module configuration to disambiguate!" + } """ ${unzip} @@ -35,6 +40,7 @@ process GAWK { ${args} \\ ${program} \\ ${input_cmd} \\ + ${output_cmd} \\ > ${prefix}.${suffix} cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/gawk/tests/main.nf.test b/modules/nf-core/gawk/tests/main.nf.test index 5952e9a2935..b3cde8bf0ac 100644 --- a/modules/nf-core/gawk/tests/main.nf.test +++ b/modules/nf-core/gawk/tests/main.nf.test @@ -8,10 +8,14 @@ nextflow_process { tag "modules_nfcore" tag "gawk" - test("Convert fasta to bed") { - config "./nextflow.config" + config "./nextflow.config" + test("Convert fasta to bed") { when { + params { + gawk_suffix = "bed" + gawk_args2 = '\'BEGIN {FS="\t"}; {print \$1 FS "0" FS \$2}\'' + } process { """ input[0] = [ @@ -32,9 +36,11 @@ nextflow_process { } test("Convert fasta to bed with program file") { - config "./nextflow_with_program_file.config" - when { + params { + gawk_suffix = "bed" + gawk_args2 = "" + } process { """ input[0] = [ @@ -55,9 +61,11 @@ nextflow_process { } test("Extract first column from multiple files") { - config "./nextflow_with_program_file.config" - tag "test" when { + params { + gawk_suffix = "bed" + gawk_args2 = "" + } process { """ input[0] = [ @@ -79,9 +87,11 @@ nextflow_process { } test("Unzip files before processing") { - config "./nextflow_with_program_file.config" - when { + params { + gawk_suffix = "bed" + gawk_args2 = "" + } process { """ input[0] = [ @@ -101,4 +111,55 @@ nextflow_process { ) } } + + test("Compress after processing") { + when { + params { + gawk_suffix = "txt.gz" + gawk_args2 = '\'BEGIN {FS="\t"}; {print \$1 FS "0" FS \$2}\'' + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Input and output files are similar") { + when { + params { + gawk_suffix = "txt" + gawk_args2 = "" + } + process { + """ + input[0] = [ + [ id:'hello' ], // meta map + [file(params.modules_testdata_base_path + 'generic/txt/hello.txt', checkIfExists: true), + file(params.modules_testdata_base_path + 'generic/txt/species_names.txt', checkIfExists: true)] + ] + input[1] = Channel.of('BEGIN {FS=" "}; {print \$1}').collectFile(name:"program.txt") + """ + } + } + + then { + assertAll( + { assert process.failed }, + { assert process.errorReport.contains("Input and output names are the same, set prefix in module configuration to disambiguate!") } + ) + } + } } \ No newline at end of file diff --git a/modules/nf-core/gawk/tests/main.nf.test.snap b/modules/nf-core/gawk/tests/main.nf.test.snap index d396f738b6a..1b3c2f714b7 100644 --- a/modules/nf-core/gawk/tests/main.nf.test.snap +++ b/modules/nf-core/gawk/tests/main.nf.test.snap @@ -1,4 +1,37 @@ { + "Compress after processing": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.txt.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ], + "output": [ + [ + { + "id": "test" + }, + "test.txt.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "versions": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-27T17:11:20.054143406" + }, "Convert fasta to bed": { "content": [ { @@ -131,4 +164,4 @@ }, "timestamp": "2024-10-19T22:08:19.533527657" } -} +} \ No newline at end of file diff --git a/modules/nf-core/gawk/tests/nextflow.config b/modules/nf-core/gawk/tests/nextflow.config index 6e5d43a35c5..895709a7639 100644 --- a/modules/nf-core/gawk/tests/nextflow.config +++ b/modules/nf-core/gawk/tests/nextflow.config @@ -1,6 +1,6 @@ process { withName: GAWK { - ext.suffix = "bed" - ext.args2 = '\'BEGIN {FS="\t"}; {print \$1 FS "0" FS \$2}\'' + ext.suffix = params.gawk_suffix + ext.args2 = params.gawk_args2 } } diff --git a/modules/nf-core/gawk/tests/nextflow_with_program_file.config b/modules/nf-core/gawk/tests/nextflow_with_program_file.config deleted file mode 100644 index 693ad41963d..00000000000 --- a/modules/nf-core/gawk/tests/nextflow_with_program_file.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: GAWK { - ext.suffix = "bed" - } -} From b4d8dd4341c85c4898175759ab156bd7e05927a2 Mon Sep 17 00:00:00 2001 From: Alex Bartlett <74612800+abartlett004@users.noreply.github.com> Date: Thu, 28 Nov 2024 10:00:56 -0500 Subject: [PATCH 02/12] spaceranger_bump_version (#7088) * spaceranger_bump_version * build: Use ADD to download tar.gz * style: Clean up Dockerfile * chore: Update labels and use an ARG * chore: Switch base image to match Seqera Containers * build: Make a manual checksum * chore: Update hello@edmundmiller.dev * docs: Update documentation * build: Bump image location and tag * build: Can't actually get run of the tar.gz unzip step Because of the Spaceranger URL. * test(spaceranger): Add stub for mkref * test: Bump snapshot * test: Update versions yaml snapshot * docs: Use a placeholder to avoid confusion Co-authored-by: sebotic --------- Co-authored-by: Edmund Miller Co-authored-by: Edmund Miller Co-authored-by: sebotic --- modules/nf-core/spaceranger/Dockerfile | 42 +++--- modules/nf-core/spaceranger/README.md | 22 ++-- modules/nf-core/spaceranger/count/main.nf | 2 +- .../spaceranger/count/tests/main.nf.test | 6 +- .../spaceranger/count/tests/main.nf.test.snap | 120 +++++++++--------- modules/nf-core/spaceranger/mkgtf/main.nf | 2 +- .../spaceranger/mkgtf/tests/main.nf.test | 5 +- .../spaceranger/mkgtf/tests/main.nf.test.snap | 24 ++-- modules/nf-core/spaceranger/mkref/main.nf | 35 ++++- .../spaceranger/mkref/tests/main.nf.test | 2 +- .../spaceranger/mkref/tests/main.nf.test.snap | 18 +-- 11 files changed, 164 insertions(+), 114 deletions(-) diff --git a/modules/nf-core/spaceranger/Dockerfile b/modules/nf-core/spaceranger/Dockerfile index bb414dcf480..c08c4c14dd9 100644 --- a/modules/nf-core/spaceranger/Dockerfile +++ b/modules/nf-core/spaceranger/Dockerfile @@ -1,21 +1,33 @@ -FROM continuumio/miniconda3:4.9.2 -LABEL authors="Gisela Gabernet ,Edmund Miller " \ - description="Docker image containing Space Ranger" +# syntax=docker/dockerfile:1 + +FROM mambaorg/micromamba:1.5.10-noble +ARG SPACERANGER_VER="3.1.2" +# NOTE you need to go fill out the form to get a new download link +ARG SPACERANGER_URL="https://cf.10xgenomics.com/releases/spatial-exp/spaceranger-3.1.2.tar.gz?Expires=1732608367&Key-Pair-Id=APKAI7S6A5RYOXBWRPDA&Signature=S4jcGCs5H0wLFfREhSc~AfuPIOXE~PW96kX9o2EyxZnmn1goUecgfEWTj67fL1DnZJBIa06kFdUdNpsMn2ustiSWOfXEgjlYQnxIcNnQIiEBGoQTHWphvx3LEQ6wtZnkWS80P6IcE0HJkIsgy04t6Sohih5cxY4jgytYsrAfZDYr5G3KKFwTfCKmhzMaXqW635yPbyQ8xEcQHK0QwviAx8-EFq-PE8UzC4QgUKi2MW-ivcfZkSDSfF8C3s7SgwDXIGIWv52mzeszenxMjN4KrWQotZ7ZpktzI0Vfpz0dNC17dQeDQUHj4LuNYbdh3RqsPKtqu3wjCe2Q7KiyoWnmaw__" \" +ARG SPACERANGER_SHA256="2566b24f29829b39f3add112a674990b1c54ae2fbe7ccb50a4c7dce9ccf152e6" +LABEL org.opencontainers.image.authors="Gisela Gabernet ,Edmund Miller " +LABEL org.opencontainers.image.description="Docker image containing spaceranger" +LABEL org.opencontainers.image.version="$SPACERANGER_VER" +LABEL org.opencontainers.image.documentation="https://github.com/nf-core/modules/blob/master/modules/nf-core/spaceranger/README.md" +LABEL org.opencontainers.image.source="https://github.com/nf-core/modules" +LABEL org.opencontainers.image.vendor="nf-core" +LABEL org.opencontainers.image.license="https://www.10xgenomics.com/support/software/space-ranger/downloads/eula" # Disclaimer: this container is not provided nor supported by 10x Genomics. # Install procps and curl and clean apt cache -RUN apt-get update --allow-releaseinfo-change \ - && apt-get install -y procps curl \ - && apt-get clean -y && rm -rf /var/lib/apt/lists/* +RUN micromamba install -y -n base conda-forge::procps-ng \ + && micromamba clean -a -y +USER root +ENV PATH="$MAMBA_ROOT_PREFIX/bin:$PATH" -# Copy pre-downloaded spaceranger file -ENV SPACERANGER_VER 3.0.0 -COPY spaceranger-$SPACERANGER_VER.tar.gz /opt/spaceranger-$SPACERANGER_VER.tar.gz +# NOTE Docker doesn't support md5sum and that's what spaceranger gives us --checksum=md5sum:949088b1e2cc5bd03079b60a65b41d39 +# https://docs.docker.com/reference/dockerfile/#add---checksum +ADD --checksum=sha256:$SPACERANGER_SHA256 \ + $SPACERANGER_URL \ + /opt/spaceranger-$SPACERANGER_VER.tar.gz # Install spaceranger -RUN \ - cd /opt && \ - tar -xzvf spaceranger-$SPACERANGER_VER.tar.gz && \ - export PATH=/opt/spaceranger-$SPACERANGER_VER:$PATH && \ - ln -s /opt/spaceranger-$SPACERANGER_VER/spaceranger /usr/bin/spaceranger && \ - rm -rf /opt/spaceranger-$SPACERANGER_VER.tar +RUN tar -xzf /opt/spaceranger-$SPACERANGER_VER.tar.gz -C /opt/ \ + && rm /opt/spaceranger-$SPACERANGER_VER.tar.gz \ + && ln -s /opt/spaceranger-$SPACERANGER_VER/spaceranger /usr/bin/spaceranger +ENV PATH="/opt/spaceranger-$SPACERANGER_VER:$PATH" diff --git a/modules/nf-core/spaceranger/README.md b/modules/nf-core/spaceranger/README.md index 4efa8791642..3fb4113f279 100644 --- a/modules/nf-core/spaceranger/README.md +++ b/modules/nf-core/spaceranger/README.md @@ -2,17 +2,17 @@ Space Ranger is a commercial tool by 10X Genomics. The container provided for the spaceranger nf-core module is not provided nor supported by 10x Genomics. Updating the Space Ranger version in the container and pushing the update to Dockerhub needs to be done manually. -1. Navigate to the [Space Ranger download page](https://support.10xgenomics.com/spatial-gene-expression/software/downloads/latest) and download the tar ball of the desired Space Ranger version with `curl` or `wget`. Place this file in the same folder where the Dockerfile lies. +1. Navigate to the [Space Ranger download page](https://support.10xgenomics.com/spatial-gene-expression/software/downloads/latest) and get a link for the Space Ranger downloads -2. Edit the Dockerfile: update the Cell Ranger version in this line: +2. Edit the Dockerfile: update the Space Ranger version in this line: - ```bash - ENV SPACERANGER_VER - ``` +```diff +- ARG SPACERANGER_VER="3.1.2" ++ ARG SPACERANGER_VER="" +- ARG SPACERANGER_URL="https://cf.10xgenomics.com/releases/spatial-exp/spaceranger-3.1.2.tar.gz?Expires=1732608367&Key-Pair-Id=APKAI7S6A5RYOXBWRPDA&Signature=S4jcGCs5H0wLFfREhSc~AfuPIOXE~PW96kX9o2EyxZnmn1goUecgfEWTj67fL1DnZJBIa06kFdUdNpsMn2ustiSWOfXEgjlYQnxIcNnQIiEBGoQTHWphvx3LEQ6wtZnkWS80P6IcE0HJkIsgy04t6Sohih5cxY4jgytYsrAfZDYr5G3KKFwTfCKmhzMaXqW635yPbyQ8xEcQHK0QwviAx8-EFq-PE8UzC4QgUKi2MW-ivcfZkSDSfF8C3s7SgwDXIGIWv52mzeszenxMjN4KrWQotZ7ZpktzI0Vfpz0dNC17dQeDQUHj4LuNYbdh3RqsPKtqu3wjCe2Q7KiyoWnmaw__" \" ++ ARG SPACERANGER_URL=" - docker push quay.io/nf-core/spaceranger: - ``` +3. Push the changes and the Dockerfile should be built and uploaded to `quay.io/nf-core/modules/spaceranger`! diff --git a/modules/nf-core/spaceranger/count/main.nf b/modules/nf-core/spaceranger/count/main.nf index 4f766cb2dec..f33d82fcc7e 100644 --- a/modules/nf-core/spaceranger/count/main.nf +++ b/modules/nf-core/spaceranger/count/main.nf @@ -2,7 +2,7 @@ process SPACERANGER_COUNT { tag "$meta.id" label 'process_high' - container "nf-core/spaceranger:3.0.0" + container "nf-core/modules/spaceranger:d71611e316a8614b" input: tuple val(meta), path(reads), path(image), path(cytaimage), path(darkimage), path(colorizedimage), path(alignment), path(slidefile) diff --git a/modules/nf-core/spaceranger/count/tests/main.nf.test b/modules/nf-core/spaceranger/count/tests/main.nf.test index 2be65252513..49a9d5f69c9 100644 --- a/modules/nf-core/spaceranger/count/tests/main.nf.test +++ b/modules/nf-core/spaceranger/count/tests/main.nf.test @@ -68,7 +68,7 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.versions, + path(process.out.versions.get(0)).yaml, process.out.outs.get(0).get(1).findAll { file(it).name !in [ 'web_summary.html', 'scalefactors_json.json', @@ -144,7 +144,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.versions).match() } + { assert snapshot(path(process.out.versions.get(0)).yaml).match() }, ) } } @@ -204,7 +204,7 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.versions, + path(process.out.versions.get(0)).yaml, process.out.outs.get(0).get(1).findAll { file(it).name !in [ 'web_summary.html', 'scalefactors_json.json', diff --git a/modules/nf-core/spaceranger/count/tests/main.nf.test.snap b/modules/nf-core/spaceranger/count/tests/main.nf.test.snap index c13496e2c4d..7a49d454526 100644 --- a/modules/nf-core/spaceranger/count/tests/main.nf.test.snap +++ b/modules/nf-core/spaceranger/count/tests/main.nf.test.snap @@ -1,90 +1,96 @@ { "spaceranger v1 (stub) - homo_sapiens - fasta - gtf - fastq - tif - csv": { "content": [ - [ - "versions.yml:md5,1539e8a9a3d63ce3653920721d1af509" - ] + { + "SPACERANGER_COUNT": { + "spaceranger": "3.1.2" + } + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-04-02T09:29:02.205153668" + "timestamp": "2024-11-26T01:48:31.348979531" }, "spaceranger v2 - homo_sapiens - fasta - gtf - fastq - tif - csv": { "content": [ + { + "SPACERANGER_COUNT": { + "spaceranger": "3.1.2" + } + }, [ - "versions.yml:md5,1539e8a9a3d63ce3653920721d1af509" - ], - [ - "clusters.csv:md5,2cc2d0c94ec0af69f03db235f9ea6932", - "clusters.csv:md5,46c12f3845e28f27f2cd580cb004c0ea", - "clusters.csv:md5,4e5f082240b9c9903168842d1f9dbe34", - "clusters.csv:md5,e626eb7049baf591ea49f5d8c305621c", - "clusters.csv:md5,65cfb24fc937e4df903a742c1adf8b08", - "clusters.csv:md5,819a71787618945dacfa2d5301b953b1", - "clusters.csv:md5,5ae17ed02cdb9f61d7ceb0cd6922c9d4", - "clusters.csv:md5,641550bec22e02fff3611087f7fd6e07", - "clusters.csv:md5,9fbe5c79035175bc1899e9a7fc80f7ac", - "clusters.csv:md5,ed0c2dcca15c14a9983407ff9af0daaf", - "differential_expression.csv:md5,d37a8ef21699372ec4a4bdf0c43d71b7", - "differential_expression.csv:md5,ac3181524385c88d38a0fc17d3bdd526", - "differential_expression.csv:md5,557d6dfec7421c392aa6443725608cd1", - "differential_expression.csv:md5,1437fad68d701c97a4a46318aee45575", - "differential_expression.csv:md5,7a2f3d0e90782055580f4903617a7d27", - "differential_expression.csv:md5,41756e9570d07aee6aed710e6a965846", - "differential_expression.csv:md5,62ea7651c3f195d3c960c6c688dca477", - "differential_expression.csv:md5,b630542266c4abb71f4205922340498d", - "differential_expression.csv:md5,0deb97f0be7e72ad73e456092db31e6d", - "differential_expression.csv:md5,3bba8490f753507e7e2e29be759f218b", - "components.csv:md5,568bb9bcb6ee913356fcb4be3fea1911", - "dispersion.csv:md5,e2037b1db404f6e5d8b3144629f2500d", - "features_selected.csv:md5,3ba6d1315ae594963b306d94ba1180e7", - "projection.csv:md5,aef5d71381678d5245e471f3d5a8ab67", - "variance.csv:md5,475a95e51ce66e639ae21d801c455e2b", - "projection.csv:md5,928c0f68a9c773fba590941d3d5af7ca", - "projection.csv:md5,216dcc5589a083fcc27d981aa90fa2ab", - "filtered_feature_bc_matrix.h5:md5,f1a8f225c113974b47efffe08e70f367", - "metrics_summary.csv:md5,faa17487b479eab361050d3266da2efb", + "clusters.csv:md5,221a4554e62ea94b0df8dbf637d2c13c", + "clusters.csv:md5,53ee76645943b5562392aac51d2d9f3f", + "clusters.csv:md5,b791359469683ad19cdb8d1af3de5705", + "clusters.csv:md5,9a4f9148e0e834c1127bf8393ece6330", + "clusters.csv:md5,c11bcc64f870469ab2f136d9272a7a6d", + "clusters.csv:md5,488846bbb469365e199928c7a440320a", + "clusters.csv:md5,5941f7e847d35a4f06d3631e21d2eb9d", + "clusters.csv:md5,d244d405c32766339d2b7a3fa8bf8cee", + "clusters.csv:md5,981386408cd953548994c31253e787de", + "clusters.csv:md5,24c4f13449e5362fcbcd41b9ff413992", + "differential_expression.csv:md5,589c1bd4529f092bb1d332e7da561dad", + "differential_expression.csv:md5,d9d978b398b33ac9687b44531909e0cd", + "differential_expression.csv:md5,4edbc893280f9d03c3de00a503e86f8c", + "differential_expression.csv:md5,316181d501c495384016227309856b09", + "differential_expression.csv:md5,dae49941396609fb08df13b82fe89151", + "differential_expression.csv:md5,4a13ae44c8454dbcb0298eb63df8b8e8", + "differential_expression.csv:md5,eeb02c4afe1f49d5502fb024b25b2c38", + "differential_expression.csv:md5,9a456828fe5d762e6e07383da5c2791d", + "differential_expression.csv:md5,bcbd1504976824e9f4d20a8dd36e2a1f", + "differential_expression.csv:md5,3ad93fc4d52950cfede885dc58cd2823", + "components.csv:md5,811a32dce6c795e958dc4bc635ee53be", + "dispersion.csv:md5,64c2e57ef0ca9a80cce8b952c81b62f5", + "features_selected.csv:md5,bd0c0a20b0b0009df796e8a755d028c1", + "projection.csv:md5,e530c925a185965514fa82f4da83fa81", + "variance.csv:md5,4159711ab5d64e97737fad9d75d945b3", + "projection.csv:md5,ce729f7e237df4570ac3e4a79251df24", + "projection.csv:md5,fa7bdefa8424b233fe6461129ab76d57", + "filtered_feature_bc_matrix.h5:md5,f625d7e2c063c8c079ccc35a853b356d", + "metrics_summary.csv:md5,5ece84f5f8e08839749b1c8f2bff6701", "probe_set.csv:md5,5bfb8f12319be1b2b6c14142537c3804", - "raw_feature_bc_matrix.h5:md5,6e40ae93a116c6fc0adbe707b0eb415f", - "raw_probe_bc_matrix.h5:md5,3d5e711d0891ca2caaf301a2c1fbda91", + "raw_feature_bc_matrix.h5:md5,90575715eb252f0b652c9d36a1f5628e", + "raw_probe_bc_matrix.h5:md5,8ab08437814506f98e3f10107cfc38ac", "aligned_fiducials.jpg:md5,51dcc3a32d3d5ca4704f664c8ede81ef", "cytassist_image.tiff:md5,0fb04a55e5658f4d158d986a334b034d", - "detected_tissue_image.jpg:md5,1d3ccc1e12c4fee091b006e48b9cc16a", - "spatial_enrichment.csv:md5,1117792553e82feb2b4b3934907a0136", + "detected_tissue_image.jpg:md5,11c9fa90913b5c6e93cecdb8f53d58db", + "spatial_enrichment.csv:md5,4379bc4fef891b45ff9264ee8c408bd0", "tissue_hires_image.png:md5,834706fff299024fab48e6366afc9cb9", "tissue_lowres_image.png:md5,8c1fcb378f7f886301f49ffc4f84360a", - "tissue_positions.csv:md5,425601ef21661ec0126000f905ef044f" + "tissue_positions.csv:md5,930aeb2b790032337d91dd27cc70f135" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-04-02T10:13:00.787792273" + "timestamp": "2024-11-26T01:58:16.555517048" }, "spaceranger v1 - homo_sapiens - fasta - gtf - fastq - tif - csv": { "content": [ + { + "SPACERANGER_COUNT": { + "spaceranger": "3.1.2" + } + }, [ - "versions.yml:md5,1539e8a9a3d63ce3653920721d1af509" - ], - [ - "filtered_feature_bc_matrix.h5:md5,7e09d1cd2e1f497a698c5efde9e4af84", - "metrics_summary.csv:md5,07a6fcc2e20f854f8d3fcde2457a2f9a", - "molecule_info.h5:md5,1f2e0fd31d15509e7916e84f22632c9c", - "raw_feature_bc_matrix.h5:md5,5a4184a3bfaf722eec8d1a763a45906e", + "filtered_feature_bc_matrix.h5:md5,a756f6dda550f52f9fb3e347207a2c6c", + "metrics_summary.csv:md5,38774fc5f54873d711b4898a2dd50e72", + "molecule_info.h5:md5,9e6393dbbccdfe58edf9e92181261f88", + "raw_feature_bc_matrix.h5:md5,860702876f936f89fdcec2b5f599a7d2", "aligned_fiducials.jpg:md5,f6217ddd707bb189e665f56b130c3da8", "detected_tissue_image.jpg:md5,c1c7e8741701a576c1ec103c1aaf98ea", "tissue_hires_image.png:md5,d91f8f176ae35ab824ede87117ac0889", "tissue_lowres_image.png:md5,475a04208d193191c84d7a3b5d4eb287", - "tissue_positions.csv:md5,748bf590c445db409d7dbdf5a08e72e8" + "tissue_positions.csv:md5,7f9cb407b3dd69726a12967b979a5624" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-04-02T09:37:13.128424153" + "timestamp": "2024-11-26T01:48:13.569651476" } } \ No newline at end of file diff --git a/modules/nf-core/spaceranger/mkgtf/main.nf b/modules/nf-core/spaceranger/mkgtf/main.nf index 734263567e8..7339b6a42a5 100644 --- a/modules/nf-core/spaceranger/mkgtf/main.nf +++ b/modules/nf-core/spaceranger/mkgtf/main.nf @@ -2,7 +2,7 @@ process SPACERANGER_MKGTF { tag "$gtf" label 'process_low' - container "nf-core/spaceranger:3.0.0" + container "nf-core/modules/spaceranger:d71611e316a8614b" input: path gtf diff --git a/modules/nf-core/spaceranger/mkgtf/tests/main.nf.test b/modules/nf-core/spaceranger/mkgtf/tests/main.nf.test index cbabc09b107..626f5b5ef26 100644 --- a/modules/nf-core/spaceranger/mkgtf/tests/main.nf.test +++ b/modules/nf-core/spaceranger/mkgtf/tests/main.nf.test @@ -24,7 +24,10 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.gtf, + path(process.out.versions.get(0)).yaml, + ).match() } ) } diff --git a/modules/nf-core/spaceranger/mkgtf/tests/main.nf.test.snap b/modules/nf-core/spaceranger/mkgtf/tests/main.nf.test.snap index 4a3d191eafe..6b65a702734 100644 --- a/modules/nf-core/spaceranger/mkgtf/tests/main.nf.test.snap +++ b/modules/nf-core/spaceranger/mkgtf/tests/main.nf.test.snap @@ -1,25 +1,19 @@ { "homo_sapiens - gtf": { "content": [ + [ + "genome.filtered.gtf:md5,50fc877b1c53b36b3b413aff88bda48c" + ], { - "0": [ - "genome.filtered.gtf:md5,50fc877b1c53b36b3b413aff88bda48c" - ], - "1": [ - "versions.yml:md5,2e30a28641165e16d77eec844cbaffe5" - ], - "gtf": [ - "genome.filtered.gtf:md5,50fc877b1c53b36b3b413aff88bda48c" - ], - "versions": [ - "versions.yml:md5,2e30a28641165e16d77eec844cbaffe5" - ] + "SPACERANGER_MKGTF": { + "spaceranger": "3.1.2" + } } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-04-02T09:29:35.376030595" + "timestamp": "2024-11-26T01:58:29.469616437" } } \ No newline at end of file diff --git a/modules/nf-core/spaceranger/mkref/main.nf b/modules/nf-core/spaceranger/mkref/main.nf index 0c1e5eb1c5e..0646fa514f2 100644 --- a/modules/nf-core/spaceranger/mkref/main.nf +++ b/modules/nf-core/spaceranger/mkref/main.nf @@ -2,7 +2,7 @@ process SPACERANGER_MKREF { tag "$fasta" label 'process_high' - container "nf-core/spaceranger:3.0.0" + container "nf-core/modules/spaceranger:d71611e316a8614b" input: path fasta @@ -41,4 +41,37 @@ process SPACERANGER_MKREF { spaceranger: \$(spaceranger -V | sed -e "s/spaceranger spaceranger-//g") END_VERSIONS """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "SPACERANGER_COUNT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + """ + mkdir -p $reference_name + + touch ${reference_name}/genome.fa + touch ${reference_name}/genome.fa.fai + touch ${reference_name}/genes.gtf.gz + touch ${reference_name}/reference.json + touch ${reference_name}/Genome + touch ${reference_name}/SA + touch ${reference_name}/SAindex + touch ${reference_name}/chrLength.txt + touch ${reference_name}/chrName.txt + touch ${reference_name}/chrNameLength.txt + touch ${reference_name}/chrStart.txt + touch ${reference_name}/exonGeTrInfo.tab + touch ${reference_name}/exonInfo.tab + touch ${reference_name}/geneInfo.tab + touch ${reference_name}/sjdbInfo.txt + touch ${reference_name}/sjdbList.fromGTF.out.tab + touch ${reference_name}/sjdbList.out.tab + touch ${reference_name}/transcriptInfo.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + spaceranger: \$(spaceranger -V | sed -e "s/spaceranger spaceranger-//g") + END_VERSIONS + """ } diff --git a/modules/nf-core/spaceranger/mkref/tests/main.nf.test b/modules/nf-core/spaceranger/mkref/tests/main.nf.test index eb710c72759..a16da76afa0 100644 --- a/modules/nf-core/spaceranger/mkref/tests/main.nf.test +++ b/modules/nf-core/spaceranger/mkref/tests/main.nf.test @@ -25,7 +25,7 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.versions, + path(process.out.versions.get(0)).yaml, "${process.out.reference[0]}/fasta", "${process.out.reference[0]}/genes", "${process.out.reference[0]}/reference.json", diff --git a/modules/nf-core/spaceranger/mkref/tests/main.nf.test.snap b/modules/nf-core/spaceranger/mkref/tests/main.nf.test.snap index 525fe0b9dcf..0801cc4694e 100644 --- a/modules/nf-core/spaceranger/mkref/tests/main.nf.test.snap +++ b/modules/nf-core/spaceranger/mkref/tests/main.nf.test.snap @@ -1,9 +1,11 @@ { "homo_sapiens - fasta - gtf": { "content": [ - [ - "versions.yml:md5,99379b88d42d3be049e84a05517d19d9" - ], + { + "SPACERANGER_MKREF": { + "spaceranger": "3.1.2" + } + }, [ "genome.fa:md5,f315020d899597c1b57e5fe9f60f4c3e", "genome.fa.fai:md5,3520cd30e1b100e55f578db9c855f685" @@ -11,7 +13,7 @@ [ "genes.gtf.gz:md5,50fc877b1c53b36b3b413aff88bda48c" ], - "reference.json:md5,84d9557f28590a90901613131ac66c3f", + "reference.json:md5,fa6ca32036814b883644e323706bd247", [ "Genome:md5,22102926fadf5890e905ca71b2da3f35", "SA:md5,bcf3e1a855783105150b46c905465333", @@ -30,9 +32,9 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-08-02T17:39:00.238723" + "timestamp": "2024-11-26T01:58:49.663386635" } -} +} \ No newline at end of file From 4764dec8eebca9c851746089e2a4c5db97d4b847 Mon Sep 17 00:00:00 2001 From: Suzanne Jin Date: Thu, 28 Nov 2024 18:12:09 +0100 Subject: [PATCH 03/12] update module propr/propd (#7083) * update propd module * it works, need to add test * create empty file when no DE genes were found; nf-test for permutation setting works * modified propd module to save pairwise results optionally, and save empty output when no DE genes were found * added additional tests in propd module * round digits for reproducibility matters * updated the test snapshots for propd * update meta * add description to meta * fix meta * lint propr/propd * updated container for propr/propd * update test snapshots for propr/propd * [propr/propd] update container and snapshots * [propr/propd] fix bug when defining the test configs * [propr/propd] fix bug: only round numeric columns * [propr/propd] update test snapshots * [propr/propd] update containers * [propr/propd] update test snapshots * [propr/propd] update singularity url --- modules/nf-core/propr/propd/environment.yml | 4 +- modules/nf-core/propr/propd/main.nf | 24 +- modules/nf-core/propr/propd/meta.yml | 162 ++-- modules/nf-core/propr/propd/templates/propd.R | 711 +++++++++++++----- .../propr/propd/tests/adjacency.config | 3 - .../nf-core/propr/propd/tests/boxcox.config | 5 + .../propr/propd/tests/boxcox_theta_e.config | 4 - .../nf-core/propr/propd/tests/default.config | 5 + .../propr/propd/tests/default_boxcox.config | 4 - .../propd/tests/default_permutation.config | 4 - .../nf-core/propr/propd/tests/main.nf.test | 178 +++-- .../propr/propd/tests/main.nf.test.snap | 159 ++-- .../propr/propd/tests/permutation.config | 5 + .../nf-core/propr/propd/tests/save_all.config | 5 + .../nf-core/propr/propd/tests/theta_e.config | 4 - 15 files changed, 867 insertions(+), 410 deletions(-) delete mode 100644 modules/nf-core/propr/propd/tests/adjacency.config create mode 100644 modules/nf-core/propr/propd/tests/boxcox.config delete mode 100755 modules/nf-core/propr/propd/tests/boxcox_theta_e.config create mode 100644 modules/nf-core/propr/propd/tests/default.config delete mode 100755 modules/nf-core/propr/propd/tests/default_boxcox.config delete mode 100755 modules/nf-core/propr/propd/tests/default_permutation.config create mode 100644 modules/nf-core/propr/propd/tests/permutation.config create mode 100644 modules/nf-core/propr/propd/tests/save_all.config delete mode 100755 modules/nf-core/propr/propd/tests/theta_e.config diff --git a/modules/nf-core/propr/propd/environment.yml b/modules/nf-core/propr/propd/environment.yml index f7da56ebc39..9744dab906b 100644 --- a/modules/nf-core/propr/propd/environment.yml +++ b/modules/nf-core/propr/propd/environment.yml @@ -1,5 +1,7 @@ channels: - conda-forge - bioconda + dependencies: - - conda-forge::r-propr=5.0.3 + - bioconda::bioconductor-limma=3.58.1 + - conda-forge::r-propr=5.1.5 diff --git a/modules/nf-core/propr/propd/main.nf b/modules/nf-core/propr/propd/main.nf index ba7727d0987..54b81c8eb70 100644 --- a/modules/nf-core/propr/propd/main.nf +++ b/modules/nf-core/propr/propd/main.nf @@ -4,21 +4,23 @@ process PROPR_PROPD { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-propr:5.0.3': - 'biocontainers/r-propr:5.0.3' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b6/b65f7192866fbd9a947df15b104808abb720e7a224bbe3ca8f7f8f680f52c97a/data' : + 'community.wave.seqera.io/library/bioconductor-limma_r-propr:f52f1d4fea746393' }" input: - tuple val(meta), path(count) - tuple val(meta2), path(samplesheet) + tuple val(meta), val(contrast_variable), val(reference), val(target) + tuple val(meta2), path(samplesheet), path(counts) output: - tuple val(meta), path("*.propd.rds"), emit: propd - tuple val(meta), path("*.propd.tsv"), emit: results - tuple val(meta), path("*.fdr.tsv") , emit: fdr , optional:true - tuple val(meta), path("*.adj.csv"), emit: adj , optional:true - path "*.warnings.log", emit: warnings - path "*.R_sessionInfo.log" , emit: session_info - path "versions.yml" , emit: versions + tuple val(meta), path("*.propd.genewise.tsv") , emit: results_genewise + tuple val(meta), path("*.propd.genewise.png") , emit: genewise_plot + tuple val(meta), path("*.propd.rds") , emit: rdata , optional:true + tuple val(meta), path("*.propd.pairwise.tsv") , emit: results_pairwise , optional:true + tuple val(meta), path("*.propd.pairwise_filtered.tsv"), emit: results_pairwise_filtered, optional:true + tuple val(meta), path("*.propd.adjacency.csv") , emit: adjacency , optional:true + tuple val(meta), path("*.propd.fdr.tsv") , emit: fdr , optional:true + path "*.R_sessionInfo.log" , emit: session_info + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/propr/propd/meta.yml b/modules/nf-core/propr/propd/meta.yml index 261e80ed216..57f1521f15f 100644 --- a/modules/nf-core/propr/propd/meta.yml +++ b/modules/nf-core/propr/propd/meta.yml @@ -6,6 +6,7 @@ keywords: - logratio - expression - propr + - propd tools: - "propr": description: "Logratio methods for omics data" @@ -19,73 +20,141 @@ input: - - meta: type: map description: | - Groovy Map containing additional information. - This can be used at the workflow level to pass optional parameters to the module. - [id: 'test', ...] - - count: - type: file + Groovy Map containing contrast information. This can be used at the + workflow level to pass optional parameters to the module, e.g. + [ id:'contrast1', blocking:'patient' ] passed in as ext.args like: + '--blocking_variable $meta.blocking'. + - contrast_variable: + type: string + description: | + The column in the sample sheet that should be used to define groups for + comparison + - reference: + type: string + description: | + The value within the contrast_variable column of the sample sheet that + should be used to derive the reference samples + - target: + type: string description: | - Count matrix, where rows = variables or genes, columns = samples or cells. - This matrix should not contain zeros. One should plug this module after another one that handles the zeros. - pattern: "*.{csv,tsv}" + The value within the contrast_variable column of the sample sheet that + should be used to derive the target samples - - meta2: type: map description: | - Groovy map containing study-wide metadata related to the sample sheet and matrix + Groovy map containing study-wide metadata related to the sample sheet + and matrix - samplesheet: type: file description: | CSV or TSV format sample sheet with sample metadata + - counts: + type: file + description: | + Raw TSV or CSV format expression matrix as output from the nf-core + RNA-seq workflow output: - - propd: + - results_genewise: - meta: - type: map + type: file + description: | + TSV-format table of genes associated with differential expression + information as compiled from the propd results + pattern: "*.propd.genewise.tsv" + - "*.propd.genewise.tsv": + type: file description: | - Groovy Map containing additional information. - This can be used at the workflow level to pass optional parameters to the module. - [id: 'test', ...] + TSV-format table of genes associated with differential expression + information as compiled from the propd results + pattern: "*.propd.genewise.tsv" + - genewise_plot: + - meta: + type: file + description: | + PNG-format plot of accumulated between group variance vs median log + fold change. Genes with high between group variance and high log fold + change are likely to be differentially expressed. + pattern: "*.propd.genewise.png" + - "*.propd.genewise.png": + type: file + description: | + PNG-format plot of accumulated between group variance vs median log + fold change. Genes with high between group variance and high log fold + change are likely to be differentially expressed. + pattern: "*.propd.genewise.png" + - rdata: + - meta: + type: file + description: | + (Optional) R data containing propd object + pattern: "*.propd.rds" - "*.propd.rds": type: file - description: R propd object + description: | + (Optional) R data containing propd object pattern: "*.propd.rds" - - results: + - results_pairwise: - meta: - type: map + type: file description: | - Groovy Map containing additional information. - This can be used at the workflow level to pass optional parameters to the module. - [id: 'test', ...] - - "*.propd.tsv": + (Optional) TSV-format table of the native propd pairwise results. This + table contains the differential proportionality values associated to + each pair of genes. + pattern: "*.propd.pairwise.tsv" + - "*.propd.pairwise.tsv": type: file - description: Results table - pattern: "*.propd.tsv" - - fdr: + description: | + (Optional) TSV-format table of the native propd pairwise results. This + table contains the differential proportionality values associated to + each pair of genes. + pattern: "*.propd.pairwise.tsv" + - results_pairwise_filtered: + - meta: + type: file + description: | + (Optional) TSV-format table of the filtered propd pairwise results. This + table contains the pairs of genes with significant differential + proportionality values. + pattern: "*.propd.pairwise_filtered.tsv" + - "*.propd.pairwise_filtered.tsv": + type: file + description: | + (Optional) TSV-format table of the filtered propd pairwise results. This + table contains the pairs of genes with significant differential + proportionality values. + pattern: "*.propd.pairwise_filtered.tsv" + - adjacency: - meta: - type: map + type: file + description: | + (Optional) CSV-format table of the adjacency matrix defining a graph, with + edges (1) associated to pairs of genes that are significantly differentially + proportional. + pattern: "*.propd.adjacency.csv" + - "*.propd.adjacency.csv": + type: file description: | - Groovy Map containing additional information. - This can be used at the workflow level to pass optional parameters to the module. - [id: 'test', ...] - - "*.fdr.tsv": - type: file - description: (optional) propd fdr table - pattern: "*.fdr.tsv" - - adj: + (Optional) CSV-format table of the adjacency matrix defining a graph, with + edges (1) associated to pairs of genes that are significantly differentially + proportional. + pattern: "*.propd.adjacency.csv" + - fdr: - meta: - type: map + type: file + description: | + (Optional) TSV-format table of FDR values. When permutation tests is performed, + this table is generated with the FDR values calculated by the permutation tests. + This is a more conservative test than the default BH method, but more + computationally expensive. + pattern: "*.propd.fdr.tsv" + - "*.propd.fdr.tsv": + type: file description: | - Groovy Map containing additional information. - This can be used at the workflow level to pass optional parameters to the module. - [id: 'test', ...] - - "*.adj.csv": - type: file - description: (optional) propd adj table - pattern: "*.adj.csv" - - warnings: - - "*.warnings.log": - type: file - description: propd warnings - pattern: "*.warnings.txt" + (Optional) TSV-format table of FDR values. When permutation tests is performed, + this table is generated with the FDR values calculated by the permutation tests. + This is a more conservative test than the default BH method, but more + computationally expensive. + pattern: "*.propd.fdr.tsv" - session_info: - "*.R_sessionInfo.log": type: file @@ -98,5 +167,6 @@ output: pattern: "versions.yml" authors: - "@suzannejin" + - "@caraiz2001" maintainers: - "@suzannejin" diff --git a/modules/nf-core/propr/propd/templates/propd.R b/modules/nf-core/propr/propd/templates/propd.R index 63dbed49d31..f9102096b61 100644 --- a/modules/nf-core/propr/propd/templates/propd.R +++ b/modules/nf-core/propr/propd/templates/propd.R @@ -1,6 +1,5 @@ #!/usr/bin/env Rscript - ################################################ ################################################ ## Functions ## @@ -12,7 +11,6 @@ #' @param x Long-form argument list like --opt1 val1 --opt2 val2 #' #' @return named list of options and values similar to optparse - parse_args <- function(x){ args_list <- unlist(strsplit(x, ' ?--')[[1]])[-1] args_vals <- lapply(args_list, function(x) scan(text=x, what='character', quiet = TRUE)) @@ -51,84 +49,114 @@ read_delim_flexible <- function(file, header = TRUE, row.names = 1, check.names row.names = row.names, check.names = check.names ) - - if (!is.null(row.names)){ - if ( (row.names == 'gene_id') & ('gene_name' %in% colnames(mat)) ){ - mat <- mat[, -which(colnames(mat) == 'gene_name')] - } else if ( (row.names == 'gene_name') & ('gene_id' %in% colnames(mat)) ){ - mat <- mat[, -which(colnames(mat) == 'gene_id')] - } - } - - return(mat) -} - -#' Extract the values for a single metric and convert it into a genes x genes matrix. -#' -#' @param object propd object -one_metric_df <- function(object){ - results <- getResults(object) - #keep only the metric of interest - one_metric <- cbind(results\$Partner, results\$Pair, results\$theta) - colnames(one_metric) <- c("Partner", "Pair", "theta") - one_metric <- as.data.frame(one_metric) - - # Extract the unique gene names - gene_names <- sort(unique(c(one_metric\$Partner, one_metric\$Pair))) - # Initialize a square matrix with NA - square_matrix <- matrix(NA, nrow = length(gene_names), ncol = length(gene_names)) - rownames(square_matrix) <- gene_names - colnames(square_matrix) <- gene_names - - # Use the `match` function to get the row and column indices - row_indices <- match(one_metric\$Partner, gene_names) - col_indices <- match(one_metric\$Pair, gene_names) - # Use these indices to populate the matrix - square_matrix[cbind(row_indices, col_indices)] <- one_metric[["theta"]] - # Populate the reverse pairs to ensure symmetry - square_matrix[cbind(col_indices, row_indices)] <- one_metric[["theta"]] - return(square_matrix) } -#' Extract the differential proportionality cutoff for a specified FDR value. -#' Gene pairs with a value higher than the extracted cutoff will be considered significantly differentially proportional. +#' Get genewise table with logfold changes and connectivity information #' -#' @param object propd object. Output from propd function. updateCutoffs function should be applied to the object previous to valCutoff. -#' @param fdrVal FDR value to extract the cutoff for. Per default 0.05. +#' This function calculates the logfold changes of genes with respect to the reference set, +#' which is dynamically defined as the set of genes that are significantly proportional to +#' each target gene. Note that the output table will only contain genes that are significantly +#' proportional to at least one other gene. #' -#' @return cutoff value. Differential proportionality values lower than this cutoff are considered significant. -valCutoff <- function(object, fdrVal = 0.05){ - fdr_df <- object@fdr - if (prod(dim(fdr_df) == 0)){ - warning("Please run updateCutoff on propd first") - }else{ - fdr_vals <- fdr_df\$FDR - if (any(!is.na(fdr_vals))){ # Si hay algun valor de FDR correcto - threshold <- any(fdr_vals <= fdrVal) - if (threshold){ - fdr_threshold <- fdr_vals[which.min(fdr_vals <= fdrVal) - 1] - }else{ - warning("FDR is higher than the specified threshold for all proportionality values. Using the lowest fdr instead") - fdr_threshold <- fdr_vals[1] - } - }else{ - stop("No true counts in the given interval. FDR values are not defined") - geterrmessage() - } +#' @param results Data frame with significant pairs +#' @return Data frame with the following columns: +#' - lfc = overall logfold change of the gene with respect to the reference set +#' - lfc_error = median average deviation of the logfold changes -> this reflects the error +#' - connectivity = size of the reference set -> this also reflects the connectivity of the gene +# - weighted_connectivity = this reflects the weighted connectivity of the gene, so the lower +#' the theta the closer to 1 full connectivity. One can also interpret this as the accumulated +#' between group variance of the gene (as the theta values reflects the between group variance +#' vs within group variance). +get_genewise_information <- function(results) { + + message("Alert: Genewise information is computed based on pairwise ratios.") + + # get unique genes + + genes <- unique(c(results\$Pair, results\$Partner)) + n_genes <- length(genes) + + # create empty matrix + + mat <- data.frame( + 'features_id_col' = character(n_genes), + lfc = numeric(n_genes), + lfc_error = numeric(n_genes), + connectivity = numeric(n_genes), + weighted_connectivity = numeric(n_genes) + ) + colnames(mat) <- c(opt\$features_id_col, 'lfc', 'lfc_error', 'connectivity', 'weighted_connectivity') + mat[, 1] <- genes + + i <- 0 + for (gene in genes){ + i <- i + 1 + + # get rows with this gene involved + # NOTE that gene can be a partner or a pair and we have to consider both cases. + # NOTE that reference set is the set of genes that are partners or pairs of the gene. + # In other words, the set of genes that are significantly proportional to the gene, + # hence connected to the gene in the network. + + idx1 <- which(results[,1] == gene) + idx2 <- which(results[,2] == gene) + reference_idx <- union(idx1, idx2) + + # calculate logfold changes of the gene with respect to the reference set + # Differently to the approach usually implemented in methods like DESeq2, + # here we have a dynamic reference defined by all the genes significantly proportional to the target gene. + + logfoldchange1 <- results[idx1, 'lrm1'] - results[idx1, 'lrm2'] + logfoldchange2 <- results[idx2, 'lrm2'] - results[idx2, 'lrm1'] + logfoldchanges <- union(logfoldchange1, logfoldchange2) + + # fill in matrix values + + mat[i, 'lfc'] <- median(logfoldchanges) + mat[i, 'lfc_error'] <- mad(logfoldchanges) + mat[i, 'connectivity'] <- length(reference_idx) + mat[i, 'weighted_connectivity'] <- sum(1 - results[reference_idx, 'theta']) } - cutoff <- fdr_df\$cutoff[fdr_df\$FDR == fdr_threshold] - return(cutoff) + + return(mat) } -#' Convert a proportionality matrix to an adjacency matrix based on a threshold. +#' Plot genewise information #' -#' @param matrix proportionality matrix. Can be extracted from propr object with getMatrix(). -#' @param cutoff Significant proportionality value extracted from valCutoff function. +#' This function plots the genewise information, which is a scatter plot of the logfold changes +#' of the genes with respect to the reference set (x-axis) and the accumulated between group variance +#' of the genes (y-axis). The accumulated between group variance is calculated as the sum of 1 - theta +#' values of the genes that are significantly proportional to the target gene. This can be interpreted +#' as the weighted connectivity of the gene in the network. #' -#' @return Adjacency matrix. Gene pairs with a proportionality value lower than the threshold will have 1, otherwise 0. -convert_to_adjacency <- function(matrix, cutoff) { - adjacency <- ifelse(matrix < cutoff, 1, 0) - return(adjacency) +#' @param results Data frame with genewise information +#' @param output Output png file name +plot_genewise_information <- function(results, output) { + + # create figure + png(output, width=1200, height=600) # Adjust width to accommodate two plots side by side + par(mfrow = c(1, 2)) + + # plot scatter plot with normal y-axis + plot( + results\$lfc, + results\$weighted_connectivity, + xlab = 'Logfold change', + ylab = 'Accumulated between group variance', + main = 'Normal Y-axis' + ) + + # plot scatter plot with log10 y-axis + plot( + results\$lfc, + results\$weighted_connectivity, + xlab = 'Logfold change', + ylab = 'Accumulated between group variance', + log = 'y', + main = 'Log10 Y-axis' + ) + + dev.off() } ################################################ @@ -137,71 +165,98 @@ convert_to_adjacency <- function(matrix, cutoff) { ################################################ ################################################ +# Set defaults and classes + opt <- list( - prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix'), - count = '$count', - samplesheet = '$samplesheet', - features_id_col = 'gene_id', # column name of feature ids - obs_id_col = 'sample', # column name of observation ids - group_col = 'treatment', # column name of grouping variable - metric = 'theta_d', # differential proportionality metric: theta_d, theta_e or theta_f - alpha = NA, # alpha for boxcox transformation - permutation = 0, # permutation cycles for computing FDR - cutoff_min = NA, # minimun threshold to test - cutoff_max = NA, # maximun threshold to test - cutoff_interval = NA, # interval between thresholds - fixseed = FALSE, - adjacency = FALSE, - fdrVal = 0.05, - ncores = as.integer('$task.cpus') + prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix'), + + # input count matrix + counts = '$counts', + features_id_col = 'gene_id', # column name of feature ids + + # comparison groups + samplesheet = '$samplesheet', + obs_id_col = 'sample', # column name of observation ids + contrast_variable = "$contrast_variable", # column name of contrast variable + reference_group = "$reference", # reference group for contrast variable + target_group = "$target", # target group for contrast variable + + # parameters for computing differential proportionality + alpha = NA, # alpha for boxcox transformation + moderated = TRUE, # use moderated theta + + # parameters for getting the significant differentially proportional pairs + fdr = 0.05, # FDR threshold + permutation = 0, # if permutation > 0, use permutation test to compute FDR + number_of_cutoffs = 100, # number of cutoffs for permutation test + + # saving options + # note that pairwise outputs are very large, so it is recommended to save them only when needed + save_pairwise_full = FALSE, # save full pairwise results + save_pairwise = FALSE, # save filtered pairwise results + save_adjacency = FALSE, # save adjacency matrix + save_rdata = FALSE, # save rdata + + # other parameters + seed = NA, # seed for reproducibility + round_digits = NA, # number of digits to round results + ncores = as.integer('$task.cpus') ) + opt_types <- list( - prefix = 'character', - count = 'character', - samplesheet = 'character', - features_id_col = 'character', - obs_id_col = 'character', - group_col = 'character', - metric = 'character', - alpha = 'numeric', - permutation = 'numeric', - cutoff_min = 'numeric', - cutoff_max = 'numeric', - cutoff_interval = 'numeric', - fixseed = 'logical', - adjacency = 'logical', - fdrVal = 'numeric', - ncores = 'numeric' + prefix = 'character', + counts = 'character', + samplesheet = 'character', + features_id_col = 'character', + obs_id_col = 'character', + contrast_variable = 'character', + reference_group = 'character', + target_group = 'character', + alpha = 'numeric', + moderated = 'logical', + fdr = 'numeric', + permutation = 'numeric', + number_of_cutoffs = 'numeric', + save_pairwise_full = 'logical', + save_pairwise = 'logical', + save_adjacency = 'logical', + save_rdata = 'logical', + seed = 'numeric', + round_digits = 'numeric', + ncores = 'numeric' ) # Apply parameter overrides -args_opt <- parse_args('$task.ext.args') + +args_ext <- ifelse('$task.ext.args' == 'null', '', '$task.ext.args') +args_opt <- parse_args(args_ext) for ( ao in names(args_opt)){ if (! ao %in% names(opt)){ stop(paste("Invalid option:", ao)) } else { # Preserve classes from defaults where possible - if (! is.null(opt[[ao]])){ - args_opt[[ao]] <- as(args_opt[[ao]], opt_types[[ao]]) - } - # set NA - if (args_opt[[ao]] %in% c('NA', NA, 'null')){ - args_opt[[ao]] <- NA - } + args_opt[[ao]] <- as(args_opt[[ao]], opt_types[[ao]]) + + # handle NA, and avoid errors when NA is provided by user as character + if (args_opt[[ao]] %in% c('NA', NA)) args_opt[[ao]] <- NA + + # replace values opt[[ao]] <- args_opt[[ao]] } } # Check if required parameters have been provided -required_opts <- c('count','samplesheet') + +required_opts <- c('counts','samplesheet','contrast_variable','reference_group','target_group') missing <- required_opts[unlist(lapply(opt[required_opts], is.null)) | ! required_opts %in% names(opt)] if (length(missing) > 0){ stop(paste("Missing required options:", paste(missing, collapse=', '))) } # Check file inputs are valid -for (file_input in c('count','samplesheet')){ + +for (file_input in c('counts','samplesheet')){ if (is.null(opt[[file_input]])) { stop(paste("Please provide", file_input), call. = FALSE) } @@ -210,8 +265,13 @@ for (file_input in c('count','samplesheet')){ } } -# check parameters -if (! opt\$metric %in% c('theta_d', 'theta_e', 'theta_f')) stop('Please provide a valid differential proportionality metric') +# check parameters are valid + +if (opt\$permutation < 0) { + stop('permutation should be a positive integer') +} + +print(opt) ################################################ ################################################ @@ -223,71 +283,268 @@ library(propr) ################################################ ################################################ -## Perform differential proportionality ## +## Load data ## ################################################ ################################################ -# read matrix -mat <- read_delim_flexible( - opt\$count, +# set seed when required + +if (!is.na(opt\$seed)) { + warning('Setting seed ', opt\$seed, ' for reproducibility') + set.seed(opt\$seed) +} + +# read input matrix + +counts <- read_delim_flexible( + opt\$counts, header = TRUE, row.names = opt\$features_id_col, check.names = FALSE ) -mat <- t(mat) +counts <- t(counts) # transpose matrix to have features (genes) as columns -# check zeros -# log transformation should be applied on non-zero data -# otherwise Inf values are generated -if (any(mat == 0)) print("Zeros will be replaced by minimun value before logratio analysis") +# read input samplesheet -# parse group -# this creates a vector referring to the group id for each observation samplesheet <- read_delim_flexible( opt\$samplesheet, header = TRUE, - row.names = NULL, + row.names = opt\$obs_id_col, check.names = FALSE ) -tmp <- samplesheet[[opt\$group_col]] -names(tmp) <- samplesheet[[opt\$obs_id_col]] -group <- as.vector(tmp[rownames(mat)]) -if (length(group) != nrow(mat)) stop('Error when parsing group') -# perform differential proportionality +# Check that all samples specified in the input samplesheet are present in the counts +# table. Assuming they are, subset and sort the count table to match the samplesheet + +missing_samples <- + samplesheet[!rownames(samplesheet) %in% rownames(counts), opt\$sample_id_col] + +if (length(missing_samples) > 0) { + stop(paste( + length(missing_samples), + 'specified samples missing from count table:', + paste(missing_samples, collapse = ',') + )) +} else{ + counts <- counts[rownames(samplesheet),] # this will remove non-sample columns, such as metadata columns + counts <- apply(counts, 2, as.numeric) # if there is a column with non-numeric values, the rest of the matrix will be coerced to character. This will convert it back to numeric +} + +# parse group and filter matrix and group values, keeping only the contrasted groups +# TODO propd can also handle more than two groups but that don't work properly with +# the current contrast format. Should we provide an alternative way to do that? + +idx <- which(samplesheet[,opt\$contrast_variable] %in% c(opt\$reference_group, opt\$target_group)) +counts <- counts[idx,] +samplesheet <- samplesheet[idx,] +group <- as.vector(samplesheet[,opt\$contrast_variable]) +group <- as.character(group) +if (length(group) != nrow(counts)) stop('Error when parsing group') +if (length(unique(group)) != 2) stop('Only two groups are allowed for contrast') + +################################################ +################################################ +## Perform differential proportionality ## +################################################ +################################################ + +# calculate the differential proportionality theta values + pd <- propd( - mat, + counts, group = group, alpha = opt\$alpha, weighted = FALSE, - p = opt\$permutation, - fixseed = opt\$fixseed + p = opt\$permutation ) -if (opt\$metric == 'theta_d'){ - pd <- setDisjointed(pd) -} else if (opt\$metric == 'theta_e'){ - pd <- setEmergent(pd) -} else if (opt\$metric == 'theta_f'){ - pd <- setActive(pd, what = "theta_f") -} +# calculate theta moderated, when required +# and calculate F-stat -# update FDR by permutation, if required -if (opt\$permutation > 0) { - cutoff <- seq( - opt\$cutoff_min, - opt\$cutoff_max, - opt\$cutoff_interval +pd <- updateF( + pd, + moderated = opt\$moderated +) +if (opt\$moderated) pd <- setActive(pd, what='theta_mod') + +# get significant results based on the FDR-adjusted F-stat p-values, if permutation == 0 +# otherwise get them based on the FDR obtained from permutation tests (more computationally expensive but likely more conservative FDRs) + +if (opt\$permutation == 0) { + + warning('FDR-adjusted p-values are used to get significant pairs.') + + # get theta value for which FDR is below desired threshold + # theta_cutoff is FALSE when no theta value has FDR below desired threshold + # otherwise it is the theta value for which FDR is below desired threshold + # Only when there is a meaningful theta, we can compute the next steps + # that involve extracting the significant pairs. + + theta_cutoff <- getCutoffFstat( + pd, + pval = opt\$fdr, + fdr_adjusted = TRUE ) - pd <- updateCutoffs(pd, cutoff=cutoff, ncores=opt\$ncores) - if (opt\$metric == 'theta_d') pd <- updateF(pd) + if (theta_cutoff) { + + warning('Significant theta value found: ', theta_cutoff) + + # get adjacency matrix + # this matrix will have 1s for significant pairs and 0s for the rest + # diagonals are set to 0 + + if (opt\$save_adjacency) { + adj <- getAdjacencyFstat( + pd, + pval = opt\$fdr, + fdr_adjusted = TRUE + ) + } + + # get significant pairs + + results_pairwise <- getSignificantResultsFstat( + pd, + pval = opt\$fdr, + fdr_adjusted = TRUE + ) + + # parse genewise information from pairwise results + + results_genewise <- get_genewise_information(results_pairwise) + } + +} else { + + warning('Permutation tests are used to compute FDR values.') + + # calculate FDR values using permutation tests + # This test is computationally expensive but it is likely to + # provide more conservative FDR values. + # This part will call the updateCutoffs function iteratively + # as far as it does not find a meaningful theta value + # and does not reach the maximum number of iterations. + + fdr_table <- data.frame( + 'cutoff' = numeric(0), + 'randcounts' = numeric(0), + 'truecounts' = numeric(0), + 'FDR' = numeric(0) + ) + theta_cutoff <- FALSE + max_cutoff <- 1 + ntry <- 0 + while (!theta_cutoff & max_cutoff > 0 & ntry < 10) { + ntry <- ntry + 1 + + # get a list of theta values served as cutoff to calculate the FDR values + # Given a theta value as cutoff, the FDR is defined as the proportion of + # false positives obtained from the null distribution vs the total number + # of positives obtained from the real data. + + cutoffs <- as.numeric(quantile( + pd@results[pd@results\$theta < max_cutoff, 'theta'], + seq(0, 1, length.out = opt\$number_of_cutoffs) + )) + + # update FDR values + + pd <- updateCutoffs( + pd, + custom_cutoffs = cutoffs, + ncores = opt\$ncores + ) + fdr_table <- rbind( + pd@fdr[pd@fdr\$cutoff < max_cutoff,], + fdr_table + ) + + # get theta value for which FDR is below desired threshold + # theta_cutoff is FALSE when no theta value has FDR below desired threshold + # otherwise it is the theta value for which FDR is below desired threshold + # Only when there is a meaningful theta, we can compute the next steps + # that involve extracting the significant pairs. + + theta_cutoff <- getCutoffFDR( + pd, + fdr=opt\$fdr, + window_size=1 + ) + + # update maximun theta value to test the FDR values for the next iteration + + part <- pd@fdr[which(pd@fdr\$truecounts > 0),] + max_cutoff <- ifelse(nrow(part) > 1, min(part\$cutoff), 0) + } + + if (theta_cutoff) { + + warning('Significant theta value found: ', theta_cutoff) + + # get adjacency matrix + # this matrix will have 1s for significant pairs and 0s for the rest + # diagonals are set to 0 + + if (opt\$save_adjacency) { + adj <- getAdjacencyFDR( + pd, + fdr=opt\$fdr, + window_size=1 + ) + } + + # get significant pairs + + results_pairwise <- getSignificantResultsFDR( + pd, + fdr = opt\$fdr, + window_size = 1 + ) + + # parse genewise information from pairwise results + + results_genewise <- get_genewise_information(results_pairwise) + } } -# Extract adjacency matrix if required -if (opt\$adjacency == TRUE) { - matrix <- one_metric_df(pd) - cutoff <- valCutoff(pd, opt\$fdrVal) - adj <- convert_to_adjacency(matrix, cutoff) +# deal with the situation when no significant thetas are found +# For the moment, we just create empty tables with the same data structure + +if (!theta_cutoff) { + warning('No theta value has FDR below desired threshold.') + + # create empty adjacency matrix + + if (opt\$save_adjacency) { + adj <- matrix(0, nrow=ncol(counts), ncol=ncol(counts)) + colnames(adj) <- rownames(adj) <- colnames(counts) + } + + # create empty pairwise results table + + if (opt\$save_pairwise) { + results <- data.frame( + 'Pair' = character(0), + 'Partner' = character(0), + 'theta' = numeric(0), + 'Fstat' = numeric(0), + 'Pval' = numeric(0), + 'FDR' = numeric(0) + ) + results_pairwise <- results + } + + # create empty genewise results table + + results_genewise <- data.frame( + 'features_id_col' = character(0), + lfc = numeric(0), + lfc_error = numeric(0), + connectivity = numeric(0), + weighted_connectivity = numeric(0) + ) + colnames(results_genewise) <- c(opt\$features_id_col, 'lfc', 'lfc_error', 'connectivity', 'weighted_connectivity') + } ################################################ @@ -296,51 +553,147 @@ if (opt\$adjacency == TRUE) { ################################################ ################################################ -saveRDS( - pd, - file = paste0(opt\$prefix, '.propd.rds') -) +# save plot of genewise information +# save empty plot if no DE genes were found + +if (nrow(results_genewise) > 0) { + plot_genewise_information( + results_genewise, + paste0(opt\$prefix, '.propd.genewise.png') + ) +} else { + warning('No genewise information to plot.') + png(paste0(opt\$prefix, '.propd.genewise.png')) + plot.new() + dev.off() +} + +# save main results - genewise + +results_genewise <- results_genewise[order( + results_genewise\$weighted_connectivity, + abs(results_genewise\$lfc), + decreasing = TRUE +),] + +if (!is.na(opt\$round_digits)) { + cols <- sapply(results_genewise, is.numeric) + results_genewise[,cols] <- round( + results_genewise[,cols], + digits = opt\$round_digits + ) +} write.table( - getResults(pd), - file = paste0(opt\$prefix, '.propd.tsv'), + results_genewise, + file = paste0(opt\$prefix, '.propd.genewise.tsv'), col.names = TRUE, row.names = FALSE, sep = '\\t', quote = FALSE ) -if (opt\$permutation > 0) { +# save rdata, if required + +if (opt\$save_rdata) { + saveRDS( + pd, + file = paste0(opt\$prefix, '.propd.rds') + ) +} + +# save pairwise results, if required + +if (opt\$save_pairwise) { + + # unfiltered pairwise results table + + if (opt\$save_pairwise_full) { + results <- getResults(pd) + rm(pd) + results <- results[order( + results\$theta, + results\$FDR + ), c('Pair', 'Partner', 'theta', 'Fstat', 'Pval', 'FDR')] + + if (!is.na(opt\$round_digits)) { + cols <- sapply(results, is.numeric) + results[,cols] <- round( + results[,cols], + digits = opt\$round_digits + ) + } + + write.table( + results, + file = paste0(opt\$prefix, '.propd.pairwise.tsv'), + col.names = TRUE, + row.names = FALSE, + sep = '\\t', + quote = FALSE + ) + } + + # filtered pairwise results table + + results_pairwise <- results_pairwise[order( + results_pairwise\$theta, + results_pairwise\$FDR + ), c('Pair', 'Partner', 'theta', 'Fstat', 'Pval', 'FDR')] + + if (!is.na(opt\$round_digits)) { + cols <- sapply(results_pairwise, is.numeric) + results_pairwise[,cols] <- round( + results_pairwise[,cols], + digits = opt\$round_digits + ) + } + write.table( - pd@fdr, - file = paste0(opt\$prefix, '.fdr.tsv'), + results_pairwise, + file = paste0(opt\$prefix, '.propd.pairwise_filtered.tsv'), col.names = TRUE, + row.names = FALSE, sep = '\\t', quote = FALSE ) } -if (opt\$adjacency == TRUE) { +# save adjacency matrix, if required + +if (opt\$save_adjacency) { + write.table( + adj, + file = paste0(opt\$prefix, '.propd.adjacency.csv'), + col.names = TRUE, + row.names = TRUE, + sep = ',', + quote = FALSE + ) +} + +# save FDR values, if permutation tests were run + +if (opt\$permutation > 0) { + fdr_table <- fdr_table[order(fdr_table\$cutoff),] + + if (!is.na(opt\$round_digits)) { + fdr_table\$FDR <- round( + fdr_table\$FDR, + digits = opt\$round_digits + ) + } + write.table( - adj, - file = paste0(opt\$prefix, '.adj.csv'), + fdr_table, + file = paste0(opt\$prefix, '.propd.fdr.tsv'), col.names = TRUE, - row.names = TRUE, - sep = ',', + row.names = FALSE, + sep = '\\t', quote = FALSE ) } -################################################ -################################################ -## WARNINGS ## -################################################ -################################################ - -sink(paste0(opt\$prefix, ".warnings.log")) -print(warnings()) -sink() - ################################################ ################################################ ## R SESSION INFO ## diff --git a/modules/nf-core/propr/propd/tests/adjacency.config b/modules/nf-core/propr/propd/tests/adjacency.config deleted file mode 100644 index 072a4d755c5..00000000000 --- a/modules/nf-core/propr/propd/tests/adjacency.config +++ /dev/null @@ -1,3 +0,0 @@ -process { - ext.args = {"--permutation 10 --cutoff_min 0.05 --cutoff_max 0.95 --cutoff_interval 0.1 --fixseed true --adjacency true"} -} \ No newline at end of file diff --git a/modules/nf-core/propr/propd/tests/boxcox.config b/modules/nf-core/propr/propd/tests/boxcox.config new file mode 100644 index 00000000000..f288b876199 --- /dev/null +++ b/modules/nf-core/propr/propd/tests/boxcox.config @@ -0,0 +1,5 @@ +process { + withName: 'PROPR_PROPD' { + ext.args = {"--alpha 0.1 --round_digits 5"} + } +} diff --git a/modules/nf-core/propr/propd/tests/boxcox_theta_e.config b/modules/nf-core/propr/propd/tests/boxcox_theta_e.config deleted file mode 100755 index 40c0548d9ad..00000000000 --- a/modules/nf-core/propr/propd/tests/boxcox_theta_e.config +++ /dev/null @@ -1,4 +0,0 @@ -process { - ext.args = {"--metric theta_e --alpha 0.2 --permutation 10 --cutoff_min 0.05 --cutoff_max 0.95 --cutoff_interval 0.05 --fixseed true"} - ext.prefix = {"test+theta_e+0.2"} -} \ No newline at end of file diff --git a/modules/nf-core/propr/propd/tests/default.config b/modules/nf-core/propr/propd/tests/default.config new file mode 100644 index 00000000000..ee1ed788ec4 --- /dev/null +++ b/modules/nf-core/propr/propd/tests/default.config @@ -0,0 +1,5 @@ +process { + withName: 'PROPR_PROPD' { + ext.args = {"--round_digits 5"} + } +} diff --git a/modules/nf-core/propr/propd/tests/default_boxcox.config b/modules/nf-core/propr/propd/tests/default_boxcox.config deleted file mode 100755 index 831002d9eba..00000000000 --- a/modules/nf-core/propr/propd/tests/default_boxcox.config +++ /dev/null @@ -1,4 +0,0 @@ -process { - ext.args = {"--alpha 0.2 --permutation 10 --cutoff_min 0.05 --cutoff_max 0.95 --cutoff_interval 0.05 --fixseed true"} - ext.prefix = {"test+theta_d+0.2"} -} \ No newline at end of file diff --git a/modules/nf-core/propr/propd/tests/default_permutation.config b/modules/nf-core/propr/propd/tests/default_permutation.config deleted file mode 100755 index e89c239fcf0..00000000000 --- a/modules/nf-core/propr/propd/tests/default_permutation.config +++ /dev/null @@ -1,4 +0,0 @@ -process { - ext.args = {"--permutation 10 --cutoff_min 0.05 --cutoff_max 0.95 --cutoff_interval 0.05 --fixseed true"} - ext.prefix = {"test+theta_d+NA"} -} \ No newline at end of file diff --git a/modules/nf-core/propr/propd/tests/main.nf.test b/modules/nf-core/propr/propd/tests/main.nf.test index 9fcaf93af1b..ed0de177f4b 100755 --- a/modules/nf-core/propr/propd/tests/main.nf.test +++ b/modules/nf-core/propr/propd/tests/main.nf.test @@ -9,146 +9,168 @@ nextflow_process { tag "propr" tag "propr/propd" - test("Test propr/propd using default permutation") { + test("Test propr/propd using default params") { tag "default" - config "./default_permutation.config" + config "./default.config" when { process { """ - input[0] = [ - [ id:'test' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv") + expression_test_data_dir = params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/' + + ch_contrasts = Channel.fromPath(file(expression_test_data_dir + 'SRP254919.contrasts.csv', checkIfExists: true)) + .splitCsv ( header:true, sep:',' ) + .map{ + tuple(it, it.variable, it.reference, it.target) + } + .first() + ch_matrix = [ + [id: 'test'], + file(expression_test_data_dir + 'SRP254919.samplesheet.csv', checkIfExists: true), + file(expression_test_data_dir + 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true) ] - input[1] = [ - [ id: 'test'], - file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.samplesheet.csv") - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.results).match("Test propr/propd using default permutation - results") }, - { assert snapshot(process.out.versions).match("versions") } - ) - } - } - - test("Test propr/propd using default boxcox permutation") { - tag "default_boxcox" - config "./default_boxcox.config" - - when { - process { - """ - input[0] = [ - [ id:'test' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv") - ] - input[1] = [ - [ id: 'test'], - file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.samplesheet.csv") - ] + input[0] = ch_contrasts + input[1] = ch_matrix """ } } - + then { assertAll( { assert process.success }, - { assert snapshot(process.out.results).match(" Test propr/propd using default boxcox permutation - results") }, - { assert snapshot(process.out.fdr).match(" Test propr/propd using default boxcox permutation - fdr") } + { assert snapshot( + process.out.results_genewise, + process.out.versions, + file(process.out.genewise_plot[0][1]).name + ).match()} ) } } - test("Test propr/propd using theta_e permutation") { + test("Test propr/propd when saving all outputs") { - tag "theta_e" - config "./theta_e.config" + tag "save_all" + config "./save_all.config" when { process { """ - input[0] = [ - [ id:'test' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv") - ] - input[1] = [ - [ id: 'test'], - file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.samplesheet.csv") + expression_test_data_dir = params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/' + + ch_contrasts = Channel.fromPath(file(expression_test_data_dir + 'SRP254919.contrasts.csv', checkIfExists: true)) + .splitCsv ( header:true, sep:',' ) + .map{ + tuple(it, it.variable, it.reference, it.target) + } + .first() + ch_matrix = [ + [id: 'test'], + file(expression_test_data_dir + 'SRP254919.samplesheet.csv', checkIfExists: true), + file(expression_test_data_dir + 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true) ] + + input[0] = ch_contrasts + input[1] = ch_matrix """ } } - + then { assertAll( { assert process.success }, - { assert snapshot(process.out.results).match("Test propr/propd using theta_e permutation - results") } + { assert snapshot( + process.out.results_genewise, + process.out.results_pairwise, + process.out.results_pairwise_filtered, + process.out.results_adjacency, + process.out.versions, + file(process.out.genewise_plot[0][1]).name, + file(process.out.rdata[0][1]).name + ).match()} ) } } - test("Test propr/propd using theta_e and boxcox permutation") { + test("Test propr/propd when using Box-cox transformation") { - tag "boxcox_theta_e" - config "./boxcox_theta_e.config" + tag "boxcox" + config "./boxcox.config" when { process { """ - input[0] = [ - [ id:'test' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv") - ] - input[1] = [ - [ id: 'test'], - file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.samplesheet.csv") + expression_test_data_dir = params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/' + + ch_contrasts = Channel.fromPath(file(expression_test_data_dir + 'SRP254919.contrasts.csv', checkIfExists: true)) + .splitCsv ( header:true, sep:',' ) + .map{ + tuple(it, it.variable, it.reference, it.target) + } + .first() + ch_matrix = [ + [id: 'test'], + file(expression_test_data_dir + 'SRP254919.samplesheet.csv', checkIfExists: true), + file(expression_test_data_dir + 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true) ] + + input[0] = ch_contrasts + input[1] = ch_matrix """ } } - + then { assertAll( { assert process.success }, - { assert snapshot(process.out.results).match("Test propr/propd using theta_e and boxcox permutation - results") } + { assert snapshot( + process.out.results_genewise, + process.out.versions, + file(process.out.genewise_plot[0][1]).name + ).match()} ) } } - test("Test propr/propd with adjacency matrix") { + test("Test propr/propd when using permutation tests") { - tag "adjacency" - config "./adjacency.config" + tag "permutation" + config "./permutation.config" when { process { """ - input[0] = [ - [ id:'test' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv") - ] - input[1] = [ - [ id: 'test'], - file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.samplesheet.csv") + expression_test_data_dir = params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/' + + ch_contrasts = Channel.fromPath(file(expression_test_data_dir + 'SRP254919.contrasts.csv', checkIfExists: true)) + .splitCsv ( header:true, sep:',' ) + .map{ + tuple(it, it.variable, it.reference, it.target) + } + .first() + ch_matrix = [ + [id: 'test'], + file(expression_test_data_dir + 'SRP254919.samplesheet.csv', checkIfExists: true), + file(expression_test_data_dir + 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true) ] + + input[0] = ch_contrasts + input[1] = ch_matrix """ } } - + then { assertAll( { assert process.success }, - { assert snapshot(process.out.adj).match("Test propr/propd with adjacency matrix - adj") }, - { assert snapshot(process.out.results).match(" - results") } + { assert snapshot( + process.out.results_genewise, + process.out.fdr, + process.out.versions, + file(process.out.genewise_plot[0][1]).name + ).match()} ) } } -} \ No newline at end of file +} \ No newline at end of file diff --git a/modules/nf-core/propr/propd/tests/main.nf.test.snap b/modules/nf-core/propr/propd/tests/main.nf.test.snap index e0291044aef..318a5f0b7dd 100644 --- a/modules/nf-core/propr/propd/tests/main.nf.test.snap +++ b/modules/nf-core/propr/propd/tests/main.nf.test.snap @@ -1,133 +1,140 @@ { - " Test propr/propd using default boxcox permutation - fdr": { + "Test propr/propd using default params": { "content": [ [ [ { - "id": "test" + "id": "treatment_mCherry_hND6_", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "" }, - "test+theta_d+0.2.fdr.tsv:md5,17e1c382e5f8275e2858a86e98c1aa6c" + "treatment_mCherry_hND6_.propd.genewise.tsv:md5,bdc19a4b7430f248cd332287b630c872" ] - ] + ], + [ + "versions.yml:md5,1d96e70c16cc53a1d7b2e1a54fd8b7e8" + ], + "treatment_mCherry_hND6_.propd.genewise.png" ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-05-23T13:10:06.778954" + "timestamp": "2024-11-27T10:11:40.654796611" }, - "Test propr/propd using theta_e and boxcox permutation - results": { + "Test propr/propd when using Box-cox transformation": { "content": [ [ [ { - "id": "test" + "id": "treatment_mCherry_hND6_", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "" }, - "test+theta_e+0.2.propd.tsv:md5,d56fcc7c8ae0b0853ea9ca6ac6484a08" + "treatment_mCherry_hND6_.propd.genewise.tsv:md5,6e8f9c57051a286b85cee317a87c8162" ] - ] + ], + [ + "versions.yml:md5,1d96e70c16cc53a1d7b2e1a54fd8b7e8" + ], + "treatment_mCherry_hND6_.propd.genewise.png" ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-05-23T13:13:35.158486" + "timestamp": "2024-11-27T10:12:22.252395938" }, - "Test propr/propd using theta_e permutation - results": { + "Test propr/propd when using permutation tests": { "content": [ [ [ { - "id": "test" + "id": "treatment_mCherry_hND6_", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "" }, - "test+theta_e+NA.propd.tsv:md5,c190d80c11ba99a0303a8dd5ab8ed76f" + "treatment_mCherry_hND6_.propd.genewise.tsv:md5,0ae150edfbd22a35f9ed1c7213217579" ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-05-23T13:12:03.500722" - }, - "versions": { - "content": [ - [ - "versions.yml:md5,b41d17751970fc8bcf4f8e0326d239e2" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-05-23T13:07:07.588326" - }, - " - results": { - "content": [ + ], [ [ { - "id": "test" + "id": "treatment_mCherry_hND6_", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "" }, - "test.propd.tsv:md5,34fda117492faf9a60f5807f56c4be68" + "treatment_mCherry_hND6_.propd.fdr.tsv:md5,9c7011afa34a2ce7e33da9a6a6820c7e" ] - ] + ], + [ + "versions.yml:md5,1d96e70c16cc53a1d7b2e1a54fd8b7e8" + ], + "treatment_mCherry_hND6_.propd.genewise.png" ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-05-23T13:16:44.428551" + "timestamp": "2024-11-27T10:13:03.55054778" }, - " Test propr/propd using default boxcox permutation - results": { + "Test propr/propd when saving all outputs": { "content": [ [ [ { - "id": "test" + "id": "treatment_mCherry_hND6_", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "" }, - "test+theta_d+0.2.propd.tsv:md5,f1886c538e6aeed1bbac4c8c1ef0c930" + "treatment_mCherry_hND6_.propd.genewise.tsv:md5,bdc19a4b7430f248cd332287b630c872" ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-05-23T13:10:02.25738" - }, - "Test propr/propd using default permutation - results": { - "content": [ + ], [ [ { - "id": "test" + "id": "treatment_mCherry_hND6_", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "" }, - "test+theta_d+NA.propd.tsv:md5,34fda117492faf9a60f5807f56c4be68" + "treatment_mCherry_hND6_.propd.pairwise.tsv:md5,ff79479ea826c54aa178b0376784cbf1" ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-05-23T13:07:04.720183" - }, - "Test propr/propd with adjacency matrix - adj": { - "content": [ + ], [ [ { - "id": "test" + "id": "treatment_mCherry_hND6_", + "variable": "treatment", + "reference": "mCherry", + "target": "hND6", + "blocking": "" }, - "test.adj.csv:md5,9da907136fba72b0e098c7fbacbeb837" + "treatment_mCherry_hND6_.propd.pairwise_filtered.tsv:md5,9473f045ca1acfe1f9d6138528f743d1" ] - ] + ], + null, + [ + "versions.yml:md5,1d96e70c16cc53a1d7b2e1a54fd8b7e8" + ], + "treatment_mCherry_hND6_.propd.genewise.png", + "treatment_mCherry_hND6_.propd.rds" ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-05-23T13:16:38.527389" + "timestamp": "2024-11-27T10:12:01.936451896" } } \ No newline at end of file diff --git a/modules/nf-core/propr/propd/tests/permutation.config b/modules/nf-core/propr/propd/tests/permutation.config new file mode 100644 index 00000000000..c7718a74da1 --- /dev/null +++ b/modules/nf-core/propr/propd/tests/permutation.config @@ -0,0 +1,5 @@ +process { + withName: 'PROPR_PROPD' { + ext.args = {"--permutation 10 --number_of_cutoffs 10 --seed 123 --round_digits 5"} + } +} diff --git a/modules/nf-core/propr/propd/tests/save_all.config b/modules/nf-core/propr/propd/tests/save_all.config new file mode 100644 index 00000000000..ba7edfd5087 --- /dev/null +++ b/modules/nf-core/propr/propd/tests/save_all.config @@ -0,0 +1,5 @@ +process { + withName: 'PROPR_PROPD' { + ext.args = {"--save_pairwise_full true --save_pairwise true --save_adjacency true --save_rdata true --round_digits 5"} + } +} diff --git a/modules/nf-core/propr/propd/tests/theta_e.config b/modules/nf-core/propr/propd/tests/theta_e.config deleted file mode 100755 index 37c0dd5a817..00000000000 --- a/modules/nf-core/propr/propd/tests/theta_e.config +++ /dev/null @@ -1,4 +0,0 @@ -process { - ext.args = {"--metric theta_e --permutation 10 --cutoff_min 0.05 --cutoff_max 0.95 --cutoff_interval 0.05 --fixseed true"} - ext.prefix = {"test+theta_e+NA"} -} \ No newline at end of file From 8aa4eca73fb6245b1b69bada50e9c31687b4d660 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20M=C3=B6bbs?= <64787947+alanmmobbs93@users.noreply.github.com> Date: Fri, 29 Nov 2024 08:41:19 -0300 Subject: [PATCH 04/12] New Module: NACHO_NORMALISE (#7100) * Create module * parse arguments on module R script * update meta info * update nf-test * update snaps after upgrading versions file * update snaps after upgrading versions file * add stub test, meta components and update names * remove comments from ymls * change output * add meta to output channels * fix environment.yml linting * update snaps * replace Stub with stub to pass linting * Update main.nf.test * change normalise by normalize * move moduleBinaries to test nextflow config * update snapshot after renaming --------- Co-authored-by: Anabella Trigila <18577080+atrigila@users.noreply.github.com> --- .../nf-core/nacho/normalize/environment.yml | 12 +++ modules/nf-core/nacho/normalize/main.nf | 59 +++++++++++ modules/nf-core/nacho/normalize/meta.yml | 84 +++++++++++++++ .../normalize/resources/usr/bin/nacho_norm.R | 92 ++++++++++++++++ .../nacho/normalize/tests/main.nf.test | 86 +++++++++++++++ .../nacho/normalize/tests/main.nf.test.snap | 100 ++++++++++++++++++ .../nacho/normalize/tests/nextflow.config | 7 ++ 7 files changed, 440 insertions(+) create mode 100644 modules/nf-core/nacho/normalize/environment.yml create mode 100644 modules/nf-core/nacho/normalize/main.nf create mode 100644 modules/nf-core/nacho/normalize/meta.yml create mode 100755 modules/nf-core/nacho/normalize/resources/usr/bin/nacho_norm.R create mode 100644 modules/nf-core/nacho/normalize/tests/main.nf.test create mode 100644 modules/nf-core/nacho/normalize/tests/main.nf.test.snap create mode 100644 modules/nf-core/nacho/normalize/tests/nextflow.config diff --git a/modules/nf-core/nacho/normalize/environment.yml b/modules/nf-core/nacho/normalize/environment.yml new file mode 100644 index 00000000000..9cf652c88fe --- /dev/null +++ b/modules/nf-core/nacho/normalize/environment.yml @@ -0,0 +1,12 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - conda-forge::r-dplyr=1.1.4 + - conda-forge::r-fs=1.6.4 + - conda-forge::r-ggplot2=3.4.4 + - conda-forge::r-nacho=2.0.6 + - conda-forge::r-optparse=1.7.5 + - conda-forge::r-readr=2.1.5 + - conda-forge::r-tidyr=1.3.0 diff --git a/modules/nf-core/nacho/normalize/main.nf b/modules/nf-core/nacho/normalize/main.nf new file mode 100644 index 00000000000..69cc49ec143 --- /dev/null +++ b/modules/nf-core/nacho/normalize/main.nf @@ -0,0 +1,59 @@ +process NACHO_NORMALIZE { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container 'community.wave.seqera.io/library/r-dplyr_r-fs_r-ggplot2_r-nacho_pruned:033bc017f5f36b6d' + + input: + tuple val(meta) , path(rcc_files, stageAs: "input/*") + tuple val(meta2), path(sample_sheet) + + output: + tuple val(meta), path("normalized_counts.tsv") , emit: normalized_counts + tuple val(meta), path("normalized_counts_wo_HKnorm.tsv"), emit: normalized_counts_wo_HK + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + nacho_norm.R \\ + --input_rcc_path input \\ + $args \\ + --input_samplesheet ${sample_sheet} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + r-nacho: \$(Rscript -e "library(NACHO); cat(as.character(packageVersion('NACHO')))") + r-dplyr: \$(Rscript -e "library(dplyr); cat(as.character(packageVersion('dplyr')))") + r-ggplot2: \$(Rscript -e "library(ggplot2); cat(as.character(packageVersion('ggplot2')))") + r-tidyr: \$(Rscript -e "library(tidyr); cat(as.character(packageVersion('tidyr')))") + r-readr: \$(Rscript -e "library(readr); cat(as.character(packageVersion('readr')))") + r-fs: \$(Rscript -e "library(fs); cat(as.character(packageVersion('fs')))") + r-optparse: \$(Rscript -e "library(optparse); cat(as.character(packageVersion('optparse')))") + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + """ + touch normalized_counts.tsv + touch normalized_counts_wo_HKnorm.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + r-nacho: \$(Rscript -e "library(NACHO); cat(as.character(packageVersion('NACHO')))") + r-dplyr: \$(Rscript -e "library(dplyr); cat(as.character(packageVersion('dplyr')))") + r-ggplot2: \$(Rscript -e "library(ggplot2); cat(as.character(packageVersion('ggplot2')))") + r-tidyr: \$(Rscript -e "library(tidyr); cat(as.character(packageVersion('tidyr')))") + r-readr: \$(Rscript -e "library(readr); cat(as.character(packageVersion('readr')))") + r-fs: \$(Rscript -e "library(fs); cat(as.character(packageVersion('fs')))") + r-optparse: \$(Rscript -e "library(optparse); cat(as.character(packageVersion('optparse')))") + END_VERSIONS + """ +} diff --git a/modules/nf-core/nacho/normalize/meta.yml b/modules/nf-core/nacho/normalize/meta.yml new file mode 100644 index 00000000000..8fddb762e3a --- /dev/null +++ b/modules/nf-core/nacho/normalize/meta.yml @@ -0,0 +1,84 @@ +--- +name: nacho_normalize +description: | + NACHO (NAnostring quality Control dasHbOard) is developed for NanoString nCounter data. + NanoString nCounter data is a messenger-RNA/micro-RNA (mRNA/miRNA) expression assay and works with fluorescent barcodes. + Each barcode is assigned a mRNA/miRNA, which can be counted after bonding with its target. + As a result each count of a specific barcode represents the presence of its target mRNA/miRNA. +keywords: + - nacho + - nanostring + - mRNA + - miRNA + - qc +tools: + - NACHO: + description: | + R package that uses two main functions to summarize and visualize NanoString RCC files, + namely: `load_rcc()` and `visualise()`. It also includes a function `normalise()`, which (re)calculates + sample specific size factors and normalises the data. + For more information `vignette("NACHO")` and `vignette("NACHO-analysis")` + homepage: https://github.com/mcanouil/NACHO + documentation: https://cran.r-project.org/web/packages/NACHO/vignettes/NACHO.html + doi: "10.1093/bioinformatics/btz647" + licence: [ "GPL-3.0" ] + identifier: "" + args_id: "$args" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - rcc_files: + type: file + description: | + List of RCC files for all samples, which are direct outputs from NanoString runs + pattern: "*.RCC" + - - meta2: + type: map + description: | + Groovy Map containing file information + e.g. [ id:'test_samplesheet' ] + - sample_sheet: + type: "file" + pattern: "*.csv" + description: | + Comma-separated file with 3 columns: RCC_FILE, RCC_FILE_NAME, and SAMPLE_ID + +output: + - normalized_counts: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "normalized_counts.tsv": + type: file + description: | + Tab-separated file with gene normalized counts for the samples + pattern: "normalized_counts.tsv" + + - normalized_counts_wo_HK: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "normalized_counts_wo_HKnorm.tsv": + type: file + description: | + Tab-separated file with gene normalized counts for the samples, without housekeeping genes. + pattern: "normalized_counts_wo_HKnorm.tsv" + - versions: + - "versions.yml": + type: file + description: | + File containing software versions + pattern: "versions.yml" + +authors: + - "@alanmmobbs93" +maintainers: + - "@alanmmobbs93" diff --git a/modules/nf-core/nacho/normalize/resources/usr/bin/nacho_norm.R b/modules/nf-core/nacho/normalize/resources/usr/bin/nacho_norm.R new file mode 100755 index 00000000000..53899126a3b --- /dev/null +++ b/modules/nf-core/nacho/normalize/resources/usr/bin/nacho_norm.R @@ -0,0 +1,92 @@ +#!/usr/bin/env Rscript +library(optparse) +library(dplyr) +library(ggplot2) +library(fs) +library(NACHO) +library(readr) +library(tidyr) + +# Parse Arguments +norm_methods <- c("GLM", "GEO") +option_list <- list( + make_option( + c("--input_rcc_path"), + type = "character", + default = "./" , + help = "Path to the folder that contains the RCC input file(s)", + metavar = "character"), + make_option( + c("--input_samplesheet"), + type = "character", + default = NULL , + help = "Path to the sample sheet file", + metavar = "character"), + make_option( + c("--norm_method"), + type = "character", + default = "GLM", + help = paste0("Normalization method. One of ", paste(norm_methods, collapse = " "), paste = " "), + metavar = "character") +) + +# Parse the command-line arguments +opt <- parse_args(OptionParser(option_list = option_list)) + +# Validate mandatory arguments +if (is.null(opt$input_rcc_path)) { + stop("Error: The --input_rcc_path parameter is mandatory and must be specified.") +} + +if (is.null(opt$input_samplesheet)) { + stop("Error: The --input_samplesheet parameter is mandatory and must be specified.") +} + +# Validate that --norm_method is one of the allowed values +if (!(opt$norm_method %in% norm_methods)) { + stop(paste("Error: The --norm_method parameter must be one of:", paste(norm_methods, collapse = " "))) +} + +input_rcc_path <- opt$input_rcc_path +input_samplesheet <- opt$input_samplesheet +norm_method <- opt$norm_method + +# Create filelist for NachoQC + +list_of_rccs <- dir_ls(path = input_rcc_path, glob = "*.RCC") +print(list_of_rccs) + +# Core Code +## Read data +nacho_data <- load_rcc(data_directory = input_rcc_path, + ssheet_csv = input_samplesheet, + id_colname = "RCC_FILE_NAME", + normalisation_method = norm_method) + +output_base <- "./" + +get_counts <- function( + nacho, + codeclass = "Endogenous", + rownames = "RCC_FILE_NAME", + colnames = c("Name", "Accession") +) { + nacho[["nacho"]] %>% + dplyr::select(c("RCC_FILE_NAME", "Name", "Count_Norm", "CodeClass")) %>% + tidyr::pivot_wider(names_from = "RCC_FILE_NAME", values_from = "Count_Norm") +} + +## Write out normalized counts +norm_counts <- as.data.frame(get_counts(nacho_data)) +write_tsv(norm_counts, file = "normalized_counts.tsv") + +## Create non-hk normalized counts too +nacho_data_no_hk <- load_rcc(data_directory = input_rcc_path, + ssheet_csv = input_samplesheet, + id_colname = "RCC_FILE_NAME", + normalisation_method = norm_method, + housekeeping_norm = FALSE) + +## Export non-hk tables +norm_counts_without_hks <- as.data.frame(get_counts(nacho_data_no_hk)) +write_tsv(norm_counts_without_hks, file = "normalized_counts_wo_HKnorm.tsv") diff --git a/modules/nf-core/nacho/normalize/tests/main.nf.test b/modules/nf-core/nacho/normalize/tests/main.nf.test new file mode 100644 index 00000000000..ec21ec5c87f --- /dev/null +++ b/modules/nf-core/nacho/normalize/tests/main.nf.test @@ -0,0 +1,86 @@ +nextflow_process { + + name "Test Process NACHO_NORMALIZE" + script "../main.nf" + process "NACHO_NORMALIZE" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "nacho" + tag "nacho/normalize" + + test("Salmon - RCC files") { + + when { + params { + module_args = '--norm_method "GEO"' + } + process { + """ + // RCC Files: Collect from sample sheet + input[0] = + Channel.fromPath('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) + .splitCsv( header: true ) + .map { row -> return file(row.RCC_FILE, checkIfExists: true) } // Select first column: path to file + .collect() + .map{ files -> + tuple( [id: 'test'], files ) // Add meta component + } + + + // Sample sheet + input[1] = Channel.of( [ + [ id: 'test_samplesheet'], + [ file('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) ] + ] ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Salmon - RCC files - stub") { + + options "-stub" + when { + params { + module_args = '--norm_method "GEO"' + } + process { + """ + // RCC Files: Collect from sample sheet + input[0] = + Channel.fromPath('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) + .splitCsv( header: true ) + .map { row -> return file(row.RCC_FILE, checkIfExists: true) } // Select first column: path to file // Select first column: path to file + .collect() + .map{ files -> + tuple( [id: 'test'], files ) // Add meta component + } + + // Sample sheet + input[1] = + Channel.of( [ + [id: 'test_samplesheet'], + [ file('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) ] + ] ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/nacho/normalize/tests/main.nf.test.snap b/modules/nf-core/nacho/normalize/tests/main.nf.test.snap new file mode 100644 index 00000000000..f6e20e3947d --- /dev/null +++ b/modules/nf-core/nacho/normalize/tests/main.nf.test.snap @@ -0,0 +1,100 @@ +{ + "Salmon - RCC files": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "normalized_counts.tsv:md5,a0124c7a24bd04296f441d9ade82a05f" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "normalized_counts_wo_HKnorm.tsv:md5,5a2ce112c24e1b0d0f4cf3392111ef9e" + ] + ], + "2": [ + "versions.yml:md5,dbc82908e1d1fcd2429022a4f327b9ba" + ], + "normalized_counts": [ + [ + { + "id": "test" + }, + "normalized_counts.tsv:md5,a0124c7a24bd04296f441d9ade82a05f" + ] + ], + "normalized_counts_wo_HK": [ + [ + { + "id": "test" + }, + "normalized_counts_wo_HKnorm.tsv:md5,5a2ce112c24e1b0d0f4cf3392111ef9e" + ] + ], + "versions": [ + "versions.yml:md5,dbc82908e1d1fcd2429022a4f327b9ba" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-28T18:31:49.03241566" + }, + "Salmon - RCC files - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "normalized_counts.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "normalized_counts_wo_HKnorm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,dbc82908e1d1fcd2429022a4f327b9ba" + ], + "normalized_counts": [ + [ + { + "id": "test" + }, + "normalized_counts.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "normalized_counts_wo_HK": [ + [ + { + "id": "test" + }, + "normalized_counts_wo_HKnorm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,dbc82908e1d1fcd2429022a4f327b9ba" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-28T18:32:02.81614763" + } +} \ No newline at end of file diff --git a/modules/nf-core/nacho/normalize/tests/nextflow.config b/modules/nf-core/nacho/normalize/tests/nextflow.config new file mode 100644 index 00000000000..b08db067be7 --- /dev/null +++ b/modules/nf-core/nacho/normalize/tests/nextflow.config @@ -0,0 +1,7 @@ +nextflow.enable.moduleBinaries = true + +process { + withName: 'NACHO_NORMALIZE' { + ext.args = params.module_args + } +} From 06093bb4fd6d910b6167bc51abc40329b3b2c4f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20M=C3=B6bbs?= <64787947+alanmmobbs93@users.noreply.github.com> Date: Fri, 29 Nov 2024 08:41:28 -0300 Subject: [PATCH 05/12] New Module: NACHO_QC (#7108) * initialize module * update test * Remove TO-DO from main.nf.test * Remove comment from main.nf.test * update test after meta component in output channels * split channel into png and txt outputs * update meta file * move moduleBinaries to test nextflow config --- modules/nf-core/nacho/qc/environment.yml | 12 + modules/nf-core/nacho/qc/main.nf | 77 +++++ modules/nf-core/nacho/qc/meta.yml | 89 ++++++ .../nacho/qc/resources/usr/bin/nacho_qc.R | 263 ++++++++++++++++++ modules/nf-core/nacho/qc/tests/main.nf.test | 97 +++++++ .../nf-core/nacho/qc/tests/main.nf.test.snap | 78 ++++++ .../nf-core/nacho/qc/tests/nextflow.config | 1 + 7 files changed, 617 insertions(+) create mode 100644 modules/nf-core/nacho/qc/environment.yml create mode 100644 modules/nf-core/nacho/qc/main.nf create mode 100644 modules/nf-core/nacho/qc/meta.yml create mode 100755 modules/nf-core/nacho/qc/resources/usr/bin/nacho_qc.R create mode 100644 modules/nf-core/nacho/qc/tests/main.nf.test create mode 100644 modules/nf-core/nacho/qc/tests/main.nf.test.snap create mode 100644 modules/nf-core/nacho/qc/tests/nextflow.config diff --git a/modules/nf-core/nacho/qc/environment.yml b/modules/nf-core/nacho/qc/environment.yml new file mode 100644 index 00000000000..9cf652c88fe --- /dev/null +++ b/modules/nf-core/nacho/qc/environment.yml @@ -0,0 +1,12 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - conda-forge::r-dplyr=1.1.4 + - conda-forge::r-fs=1.6.4 + - conda-forge::r-ggplot2=3.4.4 + - conda-forge::r-nacho=2.0.6 + - conda-forge::r-optparse=1.7.5 + - conda-forge::r-readr=2.1.5 + - conda-forge::r-tidyr=1.3.0 diff --git a/modules/nf-core/nacho/qc/main.nf b/modules/nf-core/nacho/qc/main.nf new file mode 100644 index 00000000000..54bf2ae368e --- /dev/null +++ b/modules/nf-core/nacho/qc/main.nf @@ -0,0 +1,77 @@ +process NACHO_QC { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container 'community.wave.seqera.io/library/r-dplyr_r-fs_r-ggplot2_r-nacho_pruned:033bc017f5f36b6d' + + input: + tuple val(meta) , path(rcc_files, stageAs: "input/*") + tuple val(meta2), path(sample_sheet) + + output: + tuple val(meta), path("*.html") , emit: nacho_qc_reports + tuple val(meta), path("*_mqc.png"), emit: nacho_qc_png + tuple val(meta), path("*_mqc.txt"), emit: nacho_qc_txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + nacho_qc.R \\ + --input_rcc_path input \\ + --input_samplesheet ${sample_sheet} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + r-nacho: \$(Rscript -e "library(NACHO); cat(as.character(packageVersion('NACHO')))") + r-dplyr: \$(Rscript -e "library(dplyr); cat(as.character(packageVersion('dplyr')))") + r-ggplot2: \$(Rscript -e "library(ggplot2); cat(as.character(packageVersion('ggplot2')))") + r-tidyr: \$(Rscript -e "library(tidyr); cat(as.character(packageVersion('tidyr')))") + r-readr: \$(Rscript -e "library(readr); cat(as.character(packageVersion('readr')))") + r-fs: \$(Rscript -e "library(fs); cat(as.character(packageVersion('fs')))") + r-optparse: \$(Rscript -e "library(optparse); cat(as.character(packageVersion('optparse')))") + END_VERSIONS + """ + + stub: + """ + touch qc.html + touch qc_with_outliers.html + touch AVG_vs_BD_mqc.png + touch AVG_vs_MED_mqc.png + touch BD_mqc.png + touch FOV_mqc.png + touch HKF_mqc.png + touch HK_mqc.png + touch LOD_mqc.png + touch Neg_mqc.png + touch PCA1_vs_PCA2_mqc.png + touch PCAi_mqc.png + touch PCA_mqc.png + touch plot_normf_mqc.png + touch Posctrl_linearity_mqc.png + touch POSF_vs_NEGF_mqc.png + touch Pos_mqc.png + touch Pos_vs_neg_mqc.png + touch normalized_qc_mqc.txt + touch hk_detected_mqc.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + r-nacho: \$(Rscript -e "library(NACHO); cat(as.character(packageVersion('NACHO')))") + r-dplyr: \$(Rscript -e "library(dplyr); cat(as.character(packageVersion('dplyr')))") + r-ggplot2: \$(Rscript -e "library(ggplot2); cat(as.character(packageVersion('ggplot2')))") + r-tidyr: \$(Rscript -e "library(tidyr); cat(as.character(packageVersion('tidyr')))") + r-readr: \$(Rscript -e "library(readr); cat(as.character(packageVersion('readr')))") + r-fs: \$(Rscript -e "library(fs); cat(as.character(packageVersion('fs')))") + r-optparse: \$(Rscript -e "library(optparse); cat(as.character(packageVersion('optparse')))") + END_VERSIONS + """ +} diff --git a/modules/nf-core/nacho/qc/meta.yml b/modules/nf-core/nacho/qc/meta.yml new file mode 100644 index 00000000000..f3c14934bd4 --- /dev/null +++ b/modules/nf-core/nacho/qc/meta.yml @@ -0,0 +1,89 @@ +name: nacho_qc +description: | + NACHO (NAnostring quality Control dasHbOard) is developed for NanoString nCounter data. + NanoString nCounter data is a messenger-RNA/micro-RNA (mRNA/miRNA) expression assay and works with fluorescent barcodes. + Each barcode is assigned a mRNA/miRNA, which can be counted after bonding with its target. + As a result each count of a specific barcode represents the presence of its target mRNA/miRNA. +keywords: + - nacho + - nanostring + - mRNA + - miRNA + - qc +tools: + - NACHO: + description: | + R package that uses two main functions to summarize and visualize NanoString RCC files, + namely: `load_rcc()` and `visualise()`. It also includes a function `normalise()`, which (re)calculates + sample specific size factors and normalises the data. + For more information `vignette("NACHO")` and `vignette("NACHO-analysis")` + homepage: https://github.com/mcanouil/NACHO + documentation: https://cran.r-project.org/web/packages/NACHO/vignettes/NACHO.html + doi: "10.1093/bioinformatics/btz647" + licence: [ "GPL-3.0" ] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - rcc_files: + type: file + description: | + List of RCC files for all samples, which are direct outputs from NanoString runs + pattern: "*.RCC" + - - meta2: + type: map + description: | + Groovy Map containing file information + e.g. [ id:'test_samplesheet' ] + - sample_sheet: + type: "file" + pattern: "*.csv" + description: | + Comma-separated file with 3 columns: RCC_FILE, RCC_FILE_NAME, and SAMPLE_ID +output: + - nacho_qc_reports: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.html": + type: file + description: | + HTML report + pattern: "*.html" + - nacho_qc_png: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*_mqc.png": + type: file + description: | + Output PNG files + pattern: "*_mqc.png" + - nacho_qc_txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*_mqc.txt": + type: file + description: | + Plain text reports + pattern: "*_mqc.txt" + - versions: + - "versions.yml": + type: file + description: | + File containing software versions + pattern: "versions.yml" +authors: + - "@alanmmobbs93" +maintainers: + - "@alanmmobbs93" diff --git a/modules/nf-core/nacho/qc/resources/usr/bin/nacho_qc.R b/modules/nf-core/nacho/qc/resources/usr/bin/nacho_qc.R new file mode 100755 index 00000000000..21d20b317b0 --- /dev/null +++ b/modules/nf-core/nacho/qc/resources/usr/bin/nacho_qc.R @@ -0,0 +1,263 @@ +#!/usr/bin/env Rscript +library(optparse) +library(dplyr) +library(ggplot2) +library(fs) +library(NACHO) +library(readr) +library(tidyr) + +# Commandline Argument parsing +option_list <- list( + make_option( + c("--input_rcc_path"), + type = "character", + default = "./" , + help = "Path to the folder that contains the RCC input file(s)", + metavar = "character"), + make_option( + c("--input_samplesheet"), + type = "character", + default = NULL , + help = "Path to the sample sheet file", + metavar = "character") +) + +opt <- parse_args(OptionParser(option_list = option_list)) + +# Validate mandatory arguments +if (is.null(opt$input_rcc_path)) { + stop("Error: The --input_rcc_path parameter is mandatory and must be specified.") +} + +if (is.null(opt$input_samplesheet)) { + stop("Error: The --input_samplesheet parameter is mandatory and must be specified.") +} + +input_rcc_path <- opt$input_rcc_path +input_samplesheet <- opt$input_samplesheet + +# Create filelist for NachoQC +list_of_rccs <- dir_ls(path = input_rcc_path, glob = "*.RCC") + +# Core Code +nacho_data <- load_rcc(data_directory = input_rcc_path, + ssheet_csv = input_samplesheet, + id_colname = "RCC_FILE_NAME") + +output_base <- "./" + +# Write out HK genes detected and add to MultiQC report as custom content +line="#id: nf-core-nanostring-hk-genes +#section_name: 'Housekeeping Genes' +#description: 'The following Housekeeping Genes have been detected in the input RCC Files:' +#plot_type: 'html' +#section_href: 'https://github.com/nf-core/nanostring' +#data: + " + +write(line,file=paste0(output_base, "hk_detected_mqc.txt"),append=TRUE) +write(nacho_data$housekeeping_genes ,paste0(output_base,"hk_detected_mqc.txt"),append=TRUE) + +# Add in all plots as MQC output for MultiQC +plot_bd <- autoplot( + object = nacho_data, + x = "BD", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="BD_mqc.png", plot_bd) + +## Field of View (FoV) Imaging + +plot_fov <- autoplot( + object = nacho_data, + x = "FoV", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="FOV_mqc.png", plot_fov) + + +## Positive Control Linearity + +plot_posctrl_lin <- autoplot( + object = nacho_data, + x = "PCL", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) + +ggsave(filename="Posctrl_linearity_mqc.png", plot_posctrl_lin) + +## Limit of Detection + +plot_lod <- autoplot( + object = nacho_data, + x = "LoD", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) + +ggsave(filename="LOD_mqc.png", plot_lod) + +## Positive Controls + +plot_pos <- autoplot( + object = nacho_data, + x = "Positive", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="Pos_mqc.png", plot_pos) + + +## Negative Controls + +plot_neg <- autoplot( + object = nacho_data, + x = "Negative", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="Neg_mqc.png", plot_neg) + +## Housekeeping Genes + +plot_hk <- autoplot( + object = nacho_data, + x = "Housekeeping", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="HK_mqc.png", plot_hk) + +## Positive Controls vs Negative Controls + +plot_pos_vs_neg <- autoplot( + object = nacho_data, + x = "PN", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="Pos_vs_neg_mqc.png", plot_pos_vs_neg) + +## Average Counts vs. Binding Density + +plot_avg_vs_bd <- autoplot( + object = nacho_data, + x = "ACBD", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="AVG_vs_BD_mqc.png", plot_avg_vs_bd) + +## Average Counts vs. Median Counts + +plot_avg_vs_med <- autoplot( + object = nacho_data, + x = "ACMC", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="AVG_vs_MED_mqc.png", plot_avg_vs_med) + +## Principal Component 1 vs. 2 + +plot_pc12 <- autoplot( + object = nacho_data, + x = "PCA12", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="PCA1_vs_PCA2_mqc.png", plot_pc12) + +## Principal Component i + +plot_pcai <- autoplot( + object = nacho_data, + x = "PCAi", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="PCAi_mqc.png", plot_pcai) + +## Principal Component planes +plot_pcap <- autoplot( + object = nacho_data, + x = "PCA", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="PCA_mqc.png", plot_pcap) + +## Positive Factor vs. Negative Factor +plot_posf_vs_negf <- autoplot( + object = nacho_data, + x = "PFNF", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="POSF_vs_NEGF_mqc.png", plot_posf_vs_negf) + +## Housekeeping Factor + +plot_hkf <- autoplot( + object = nacho_data, + x = "HF", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="HKF_mqc.png", plot_hkf) + +## Normalization Factors + +plot_normf <- autoplot( + object = nacho_data, + x = "NORM", + colour = "CartridgeID", + size = 0.5, + show_legend = TRUE +) +ggsave(filename="plot_normf_mqc.png", plot_normf) + +# Create QC table for MultiQC Report +outliers_thresholds <- nacho_data[["outliers_thresholds"]] + +qc_table <- nacho_data[["nacho"]] %>% + select(c(RCC_FILE_NAME,BD,FoV,PCL,LoD,MC,MedC,Positive_factor,Negative_factor,House_factor)) %>% + unique() %>% + mutate("BD QC" = if_else(BD < outliers_thresholds[["BD"]][1] | BD > outliers_thresholds[["BD"]][2], "FAIL", "PASS"), .after = BD) %>% + mutate("FoV QC" = if_else(FoV < outliers_thresholds[["FoV"]], "FAIL", "PASS"), .after = FoV) %>% + mutate("PCL QC" = if_else(PCL < outliers_thresholds[["PCL"]], "FAIL", "PASS"), .after = PCL) %>% + mutate("LoD QC" = if_else(LoD < outliers_thresholds[["LoD"]], "FAIL", "PASS"), .after = LoD) %>% + mutate("PNF QC" = if_else(Positive_factor < outliers_thresholds[["Positive_factor"]][1] | Positive_factor > outliers_thresholds[["Positive_factor"]][2], "FAIL", "PASS"), .after = Positive_factor) %>% + mutate("HKNF QC" = if_else(House_factor < outliers_thresholds[["House_factor"]][1] | House_factor > outliers_thresholds[["House_factor"]][2], "FAIL", "PASS"), .after = House_factor) %>% + relocate(Negative_factor, .after = last_col()) %>% + rename("Negative Factor" = Negative_factor) %>% + rename("House Factor" = House_factor) %>% + rename("Positive Factor" = Positive_factor) %>% + rename("RCC_FILE" = RCC_FILE_NAME) + +write_tsv(qc_table ,file=paste0(output_base,"normalized_qc_mqc.txt")) + +# Render Standard Report for investigation in main MultiQC Report +render(nacho_data, output_dir = output_base, output_file = "NanoQC.html", show_outliers = FALSE) + +# Render the same Report for standard investigation, but not for MultiQC Report +render(nacho_data, output_dir = output_base, output_file = "NanoQC_with_outliers.html", show_outliers = TRUE) diff --git a/modules/nf-core/nacho/qc/tests/main.nf.test b/modules/nf-core/nacho/qc/tests/main.nf.test new file mode 100644 index 00000000000..fe4176bdaf1 --- /dev/null +++ b/modules/nf-core/nacho/qc/tests/main.nf.test @@ -0,0 +1,97 @@ +nextflow_process { + + name "Test Process NACHO_QC" + script "../main.nf" + process "NACHO_QC" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "nacho" + tag "nacho/qc" + + test("Salmon - RCC files") { + + when { + process { + """ + // RCC Files: Collect from sample sheet + input[0] = + Channel.fromPath('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) + .splitCsv( header: true ) + .map { row -> return file(row.RCC_FILE, checkIfExists: true) } // Select first column: path to file + .collect() + .map{ files -> + return tuple( [id: 'test1'], files ) // Add meta component + } + + // Sample sheet + input[1] = Channel.of( [ + [ id: 'test_samplesheet'], + [ file('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) ] + ] ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { with(process.out) { + assert nacho_qc_reports.get(0).get(1).size() == 2 + assert nacho_qc_png.get(0).get(1).size() == 16 + assert nacho_qc_txt.get(0).get(1).size() == 2 + assert snapshot( + nacho_qc_reports.get(0).get(1).collect { file(it).name }, //undeterministic .html mqc files + nacho_qc_png.get(0).get(1).collect { file(it).name }, //undeterministic .png mqc files + nacho_qc_txt.get(0).get(1), //stable .txt mqc files + versions + ).match() } + } + ) + } + } + + test("Salmon - RCC files - stub") { + + options "-stub" + when { + process { + """ + // RCC Files: Collect from sample sheet + input[0] = + Channel.fromPath('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) + .splitCsv( header: true ) + .map{ row -> return file(row.RCC_FILE, checkIfExists: true) } // Select first column: path to file + .collect() + .map{ files -> + tuple( [id: 'test_stub'], files ) // Add meta component + } + + // Sample sheet + input[1] = Channel.of( [ + [ id: 'test_samplesheet'], + [ file('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) ] + ] ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { with(process.out) { + assert nacho_qc_reports.get(0).get(1).size() == 2 + assert nacho_qc_png.get(0).get(1).size() == 16 + assert nacho_qc_txt.get(0).get(1).size() == 2 + assert snapshot( + nacho_qc_reports.get(0).get(1).collect { file(it).name }, //undeterministic .html mqc files + nacho_qc_png.get(0).get(1).collect { file(it).name }, //undeterministic .png mqc files + nacho_qc_txt.get(0).get(1), //stable .txt mqc files + versions + ).match() } + } + ) + } + } +} diff --git a/modules/nf-core/nacho/qc/tests/main.nf.test.snap b/modules/nf-core/nacho/qc/tests/main.nf.test.snap new file mode 100644 index 00000000000..296b6a7562b --- /dev/null +++ b/modules/nf-core/nacho/qc/tests/main.nf.test.snap @@ -0,0 +1,78 @@ +{ + "Salmon - RCC files": { + "content": [ + [ + "NanoQC.html", + "NanoQC_with_outliers.html" + ], + [ + "AVG_vs_BD_mqc.png", + "AVG_vs_MED_mqc.png", + "BD_mqc.png", + "FOV_mqc.png", + "HKF_mqc.png", + "HK_mqc.png", + "LOD_mqc.png", + "Neg_mqc.png", + "PCA1_vs_PCA2_mqc.png", + "PCA_mqc.png", + "PCAi_mqc.png", + "POSF_vs_NEGF_mqc.png", + "Pos_mqc.png", + "Pos_vs_neg_mqc.png", + "Posctrl_linearity_mqc.png", + "plot_normf_mqc.png" + ], + [ + "hk_detected_mqc.txt:md5,61209383acc2abf6fc3ea309b5a5e094", + "normalized_qc_mqc.txt:md5,9a0b015a28094a17331b12b08898da8e" + ], + [ + "versions.yml:md5,771de828b0a5e1f2e715fd3f62d9a9c9" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-28T14:10:45.10759212" + }, + "Salmon - RCC files - stub": { + "content": [ + [ + "qc.html", + "qc_with_outliers.html" + ], + [ + "AVG_vs_BD_mqc.png", + "AVG_vs_MED_mqc.png", + "BD_mqc.png", + "FOV_mqc.png", + "HKF_mqc.png", + "HK_mqc.png", + "LOD_mqc.png", + "Neg_mqc.png", + "PCA1_vs_PCA2_mqc.png", + "PCA_mqc.png", + "PCAi_mqc.png", + "POSF_vs_NEGF_mqc.png", + "Pos_mqc.png", + "Pos_vs_neg_mqc.png", + "Posctrl_linearity_mqc.png", + "plot_normf_mqc.png" + ], + [ + "hk_detected_mqc.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "normalized_qc_mqc.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "versions.yml:md5,771de828b0a5e1f2e715fd3f62d9a9c9" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-28T14:07:24.754188513" + } +} \ No newline at end of file diff --git a/modules/nf-core/nacho/qc/tests/nextflow.config b/modules/nf-core/nacho/qc/tests/nextflow.config new file mode 100644 index 00000000000..651f0b86a59 --- /dev/null +++ b/modules/nf-core/nacho/qc/tests/nextflow.config @@ -0,0 +1 @@ +nextflow.enable.moduleBinaries = true From 0b27602842d3d79fd0e8db79f4afa764967fc3d1 Mon Sep 17 00:00:00 2001 From: Matthias Zepper <6963520+MatthiasZepper@users.noreply.github.com> Date: Fri, 29 Nov 2024 13:05:22 +0100 Subject: [PATCH 06/12] UMICollapse module: Drop external dependencies from UMICollapse module tests (#7075) * Update the umi-tools dedup tests to use the new test data with UMIs. * Update umicollapse tests to use the new UMI test data. * Switch to nf-bam plugin for output validation. --------- Co-authored-by: Matthias Zepper Co-authored-by: Jonathan Manning --- .../nf-core/umicollapse/tests/main.nf.test | 226 ++++++------------ .../umicollapse/tests/main.nf.test.snap | 128 +++++----- .../nf-core/umicollapse/tests/nextflow.config | 5 +- .../umicollapse/tests/nextflow_PE.config | 10 - .../umicollapse/tests/nextflow_SE.config | 10 - .../nf-core/umitools/dedup/tests/main.nf.test | 28 +-- .../umitools/dedup/tests/main.nf.test.snap | 40 +--- .../tests/main.nf.test | 60 +---- .../tests/main.nf.test.snap | 64 +++-- .../tests/paired-end-umis.config | 6 +- .../tests/main.nf.test | 9 +- .../tests/main.nf.test.snap | 13 +- 12 files changed, 212 insertions(+), 387 deletions(-) delete mode 100644 modules/nf-core/umicollapse/tests/nextflow_PE.config delete mode 100644 modules/nf-core/umicollapse/tests/nextflow_SE.config diff --git a/modules/nf-core/umicollapse/tests/main.nf.test b/modules/nf-core/umicollapse/tests/main.nf.test index cc28359a667..db578775ee2 100644 --- a/modules/nf-core/umicollapse/tests/main.nf.test +++ b/modules/nf-core/umicollapse/tests/main.nf.test @@ -7,63 +7,18 @@ nextflow_process { tag "modules" tag "modules_nfcore" tag "umicollapse" - tag "umitools/extract" - tag "samtools/index" - tag "bwa/index" - tag "bwa/mem" test("umicollapse single end test") { - setup{ - run("UMITOOLS_EXTRACT"){ - script "../../umitools/extract/main.nf" - config "./nextflow_SE.config" - process{ - """ - input[0] = [ - [ id:'test', single_end:true ], // meta map - [ - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) - ] - ] - """ - } - } - - run("BWA_INDEX"){ - script "../../bwa/index/main.nf" - process{ - """ - input[0] = [[ id:'sarscov2'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] - """ - } - } - run("BWA_MEM"){ - script "../../bwa/mem/main.nf" - process{ - """ - input[0] = UMITOOLS_EXTRACT.out.reads - input[1] = BWA_INDEX.out.index - input[2] = [[ id:'sarscov2'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] - input[3] = true - """ - } - } - run("SAMTOOLS_INDEX"){ - script "../../samtools/index/main.nf" - process{ - """ - input[0] = BWA_MEM.out.bam - """ - } - } - } when { - config "./nextflow_SE.config" + config "./nextflow.config" process { """ - input[0] = BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam.bai', checkIfExists: true) + ]) input[1] = 'bam' """ } @@ -73,7 +28,7 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.bam, + bam(process.out.bam[0][1]).getSamLinesMD5(), process.out.versions).match() } ) } @@ -81,60 +36,16 @@ nextflow_process { } test("umicollapse paired tests") { - setup{ - run("UMITOOLS_EXTRACT"){ - script "../../umitools/extract/main.nf" - config "./nextflow_PE.config" - process{ - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - [ - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) - ] - ] - """ - } - } - - run("BWA_INDEX"){ - script "../../bwa/index/main.nf" - process{ - """ - input[0] = [ - [ id:'sarscov2'], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ] - """ - } - } - run("BWA_MEM"){ - script "../../bwa/mem/main.nf" - process{ - """ - input[0] = UMITOOLS_EXTRACT.out.reads - input[1] = BWA_INDEX.out.index - input[2] = [[ id:'sarscov2'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] - input[3] = true - """ - } - } - run("SAMTOOLS_INDEX"){ - script "../../samtools/index/main.nf" - process{ - """ - input[0] = BWA_MEM.out.bam - """ - } - } - } when { - config "./nextflow_PE.config" + config "./nextflow.config" process { """ - input[0] = BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) + input[0] = Channel.of([ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai', checkIfExists: true) + ]) input[1] = 'bam' """ } @@ -144,22 +55,22 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.bam, + bam(process.out.bam[0][1]).getSamLinesMD5(), process.out.versions).match() } ) } } - test("umicollapse fastq tests") { + test("umicollapse fastq test (single-end)") { when { - config "./nextflow_SE.config" + config "./nextflow.config" process { """ input[0] = [ [ id:'test', single_end:true ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test.umi_extract_single.fastq.gz', checkIfExists: true), [] ] input[1] = 'fastq' @@ -177,61 +88,76 @@ nextflow_process { } } - test("umicollapse stub tests") { - options "-stub-run" - setup{ - run("UMITOOLS_EXTRACT"){ - script "../../umitools/extract/main.nf" - config "./nextflow_PE.config" - process{ + test("umicollapse fastq test (paired-end)") { + + when { + config "./nextflow.config" + process { """ input[0] = [ - [ id:'test', single_end:false ], // meta map - [ - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) - ] + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test.umi_extract_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test.umi_extract_2.fastq.gz', checkIfExists: true) ] + input[1] = 'fastq' """ } } - run("BWA_INDEX"){ - script "../../bwa/index/main.nf" - process{ - """ - input[0] = [ - [ id:'sarscov2'], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ] - """ - } - } - run("BWA_MEM"){ - script "../../bwa/mem/main.nf" - process{ - """ - input[0] = UMITOOLS_EXTRACT.out.reads - input[1] = BWA_INDEX.out.index - input[2] = [[ id:'sarscov2'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] - input[3] = true - """ - } + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.fastq, + process.out.versions).match() } + ) } - run("SAMTOOLS_INDEX"){ - script "../../samtools/index/main.nf" - process{ - """ - input[0] = BWA_MEM.out.bam - """ - } + } + + // Stub tests + + test("umicollapse single end test - stub") { + + options "-stub" + + when { + config "./nextflow.config" + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = 'bam' + """ } } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.versions).match() } + ) + } + + } + + test("umicollapse paired tests - stub") { + + options "-stub" + when { - config "./nextflow_PE.config" + config "./nextflow.config" process { """ - input[0] = BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) + input[0] = Channel.of([ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai', checkIfExists: true) + ]) input[1] = 'bam' """ } @@ -240,7 +166,9 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.bam, + process.out.versions).match() } ) } diff --git a/modules/nf-core/umicollapse/tests/main.nf.test.snap b/modules/nf-core/umicollapse/tests/main.nf.test.snap index bf6d5f30cb4..e903c0ce054 100644 --- a/modules/nf-core/umicollapse/tests/main.nf.test.snap +++ b/modules/nf-core/umicollapse/tests/main.nf.test.snap @@ -1,13 +1,38 @@ { "umicollapse single end test": { + "content": [ + "9158ea6e7a0e54819e25cbac5fbc5cc0", + [ + "versions.yml:md5,03fdbcb1ba9bd40325ca42859d39deb1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T17:31:45.024306" + }, + "umicollapse paired tests": { + "content": [ + "b7be15ac7aae194b04bdbb56f3534495", + [ + "versions.yml:md5,03fdbcb1ba9bd40325ca42859d39deb1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T17:31:52.072799" + }, + "umicollapse fastq test (paired-end)": { "content": [ [ [ { - "id": "test", - "single_end": true + "id": "test" }, - "test.dedup.bam:md5,89e844724f73fae9e7100506d0be5775" + "test.dedup.fastq.gz:md5,721a84a2accac988d636e837c60e47bc" ] ], [ @@ -15,12 +40,12 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.8.4", + "nextflow": "24.10.1" }, - "timestamp": "2024-10-22T10:43:04.890267074" + "timestamp": "2024-11-24T13:57:36.968147" }, - "umicollapse fastq tests": { + "umicollapse fastq test (single-end)": { "content": [ [ [ @@ -28,7 +53,7 @@ "id": "test", "single_end": true }, - "test.dedup.fastq.gz:md5,c9bac08c7fd8df3e0203e3eeafc73155" + "test.dedup.fastq.gz:md5,2e602ed23eb87f434e4f0a9e491c0310" ] ], [ @@ -36,89 +61,50 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.8.4", + "nextflow": "24.10.1" }, - "timestamp": "2024-10-22T10:43:45.691571914" + "timestamp": "2024-11-24T13:57:28.328682" }, - "umicollapse stub tests": { + "umicollapse single end test - stub": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.dedup.dedup.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "test.dedup_UMICollapse.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - "versions.yml:md5,c1e0275d81b1c97a9344d216f9154996" - ], - "bam": [ - [ - { - "id": "test", - "single_end": false - }, - "test.dedup.dedup.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "fastq": [ - - ], - "log": [ - [ - { - "id": "test", - "single_end": false - }, - "test.dedup_UMICollapse.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,c1e0275d81b1c97a9344d216f9154996" + [ + [ + { + "id": "test", + "single_end": true + }, + "test.dedup.dedup.bam:md5,d41d8cd98f00b204e9800998ecf8427e" ] - } + ], + [ + "versions.yml:md5,c1e0275d81b1c97a9344d216f9154996" + ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.10.1" }, - "timestamp": "2024-01-30T10:46:12.482697713" + "timestamp": "2024-11-24T14:09:40.277318" }, - "umicollapse paired tests": { + "umicollapse paired tests - stub": { "content": [ [ [ { - "id": "test", - "single_end": false + "id": "test" }, - "test.dedup.bam:md5,3e2ae4701e3d2ca074ea878a314a3e4f" + "test.dedup.dedup.bam:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], [ - "versions.yml:md5,03fdbcb1ba9bd40325ca42859d39deb1" + "versions.yml:md5,c1e0275d81b1c97a9344d216f9154996" ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.8.4", + "nextflow": "24.10.1" }, - "timestamp": "2024-10-22T10:43:33.250587075" + "timestamp": "2024-11-24T14:09:44.224965" } } \ No newline at end of file diff --git a/modules/nf-core/umicollapse/tests/nextflow.config b/modules/nf-core/umicollapse/tests/nextflow.config index 844edbdc671..105d8e13261 100644 --- a/modules/nf-core/umicollapse/tests/nextflow.config +++ b/modules/nf-core/umicollapse/tests/nextflow.config @@ -1,8 +1,5 @@ process { - withName: UMITOOLS_EXTRACT { - ext.args = '--bc-pattern="NNNN"' - } withName: UMICOLLAPSE { ext.prefix = { "${meta.id}.dedup" } } -} \ No newline at end of file +} diff --git a/modules/nf-core/umicollapse/tests/nextflow_PE.config b/modules/nf-core/umicollapse/tests/nextflow_PE.config deleted file mode 100644 index ae4c96320e9..00000000000 --- a/modules/nf-core/umicollapse/tests/nextflow_PE.config +++ /dev/null @@ -1,10 +0,0 @@ -process { - - withName: UMITOOLS_EXTRACT { - ext.args = '--bc-pattern="NNNN" --bc-pattern2="NNNN"' - } - - withName: UMICOLLAPSE { - ext.prefix = { "${meta.id}.dedup" } - } -} diff --git a/modules/nf-core/umicollapse/tests/nextflow_SE.config b/modules/nf-core/umicollapse/tests/nextflow_SE.config deleted file mode 100644 index d4b9443652a..00000000000 --- a/modules/nf-core/umicollapse/tests/nextflow_SE.config +++ /dev/null @@ -1,10 +0,0 @@ -process { - - withName: UMITOOLS_EXTRACT { - ext.args = '--bc-pattern="NNNN"' - } - - withName: UMICOLLAPSE { - ext.prefix = { "${meta.id}.dedup" } - } -} diff --git a/modules/nf-core/umitools/dedup/tests/main.nf.test b/modules/nf-core/umitools/dedup/tests/main.nf.test index ab4455366e7..f00a8cbed7b 100644 --- a/modules/nf-core/umitools/dedup/tests/main.nf.test +++ b/modules/nf-core/umitools/dedup/tests/main.nf.test @@ -19,8 +19,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:true ], // meta map - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.sorted.bam", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam.bai", checkIfExists: true) ] input[1] = get_output_stats """ @@ -48,8 +48,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai", checkIfExists: true) ] input[1] = get_output_stats """ @@ -61,7 +61,7 @@ nextflow_process { { assert process.success }, { assert path("${process.out.log[0][1]}").exists() }, { assert snapshot( - process.out.bam, + bam(process.out.bam[0][1]).getSamLinesMD5(), process.out.versions).match() } ) } @@ -77,8 +77,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai", checkIfExists: true) ] input[1] = get_output_stats """ @@ -90,7 +90,7 @@ nextflow_process { { assert process.success }, { assert path("${process.out.log[0][1]}").exists() }, { assert snapshot( - process.out.bam, + bam(process.out.bam[0][1]).getSamLinesMD5(), process.out.tsv_edit_distance, process.out.tsv_per_umi, process.out.tsv_umi_per_position, @@ -112,8 +112,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:true ], // meta map - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.sorted.bam", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.single_end.umi.sorted.bam.bai", checkIfExists: true) ] input[1] = get_output_stats """ @@ -141,8 +141,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai", checkIfExists: true) ] input[1] = get_output_stats """ @@ -170,8 +170,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai", checkIfExists: true) ] input[1] = get_output_stats """ diff --git a/modules/nf-core/umitools/dedup/tests/main.nf.test.snap b/modules/nf-core/umitools/dedup/tests/main.nf.test.snap index f7f4e94f196..04b81692a6d 100644 --- a/modules/nf-core/umitools/dedup/tests/main.nf.test.snap +++ b/modules/nf-core/umitools/dedup/tests/main.nf.test.snap @@ -37,13 +37,14 @@ }, "pe - with stats": { "content": [ + "b7be15ac7aae194b04bdbb56f3534495", [ [ { "id": "test", "single_end": false }, - "test.dedup.bam:md5,350e942a0d45e8356fa24bc8c47dc1ed" + "test.dedup_edit_distance.tsv:md5,c247a49b58768e6e2e86a6c08483e612" ] ], [ @@ -52,7 +53,7 @@ "id": "test", "single_end": false }, - "test.dedup_edit_distance.tsv:md5,65186b0964e2f8d970cc04d736d8b119" + "test.dedup_per_umi.tsv:md5,ced75f7bdbf38bf78f3137d5325a8773" ] ], [ @@ -61,16 +62,7 @@ "id": "test", "single_end": false }, - "test.dedup_per_umi.tsv:md5,8e6783a4a79437b095f095f2aefe7c01" - ] - ], - [ - [ - { - "id": "test", - "single_end": false - }, - "test.dedup_per_umi_per_position.tsv:md5,9386db4a104b8e4e32f3ca4a84efa4ac" + "test.dedup_per_umi_per_position.tsv:md5,2e1a12e6f720510880068deddeefe063" ] ], [ @@ -79,9 +71,9 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nextflow": "24.10.1" }, - "timestamp": "2024-07-03T11:27:24.231325" + "timestamp": "2024-11-25T17:25:28.939957" }, "se - no stats - stub": { "content": [ @@ -103,36 +95,28 @@ }, "se - no stats": { "content": [ - "a114abd9fccce6fe2869852b5cd18964", + "9158ea6e7a0e54819e25cbac5fbc5cc0", [ "versions.yml:md5,e2f5146464c09bf7ae98c85ea5410e50" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nextflow": "24.10.1" }, - "timestamp": "2024-07-03T13:45:48.553561" + "timestamp": "2024-11-23T09:06:54.373171" }, "pe - no stats": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test.dedup.bam:md5,350e942a0d45e8356fa24bc8c47dc1ed" - ] - ], + "b7be15ac7aae194b04bdbb56f3534495", [ "versions.yml:md5,e2f5146464c09bf7ae98c85ea5410e50" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nextflow": "24.10.1" }, - "timestamp": "2024-07-03T11:27:06.957467" + "timestamp": "2024-11-25T17:24:51.423637" } } \ No newline at end of file diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test index dd7f2371869..f4f14c71b9c 100644 --- a/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test @@ -22,55 +22,6 @@ nextflow_workflow { test("sarscov2_bam_bai") { - setup{ - run("UMITOOLS_EXTRACT"){ - script "../../../../modules/nf-core/umitools/extract/main.nf" - config "./paired-end-umis.config" - process{ - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - [ - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) - ] - ] - """ - } - } - - run("BWA_INDEX"){ - script "../../../../modules/nf-core/bwa/index/main.nf" - process{ - """ - input[0] = [ - [ id:'sarscov2'], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ] - """ - } - } - run("BWA_MEM"){ - script "../../../../modules/nf-core/bwa/mem/main.nf" - process{ - """ - input[0] = UMITOOLS_EXTRACT.out.reads - input[1] = BWA_INDEX.out.index - input[2] = [[ id:'sarscov2'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] - input[3] = true - """ - } - } - run("SAMTOOLS_INDEX"){ - script "../../../../modules/nf-core/samtools/index/main.nf" - process{ - """ - input[0] = BWA_MEM.out.bam - """ - } - } - } - when { config "./paired-end-umis.config" params { @@ -78,9 +29,11 @@ nextflow_workflow { } workflow { """ - - input[0] = BWA_MEM.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) - + input[0] = Channel.of([ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai', checkIfExists: true) + ]) """ } } @@ -88,7 +41,8 @@ nextflow_workflow { then { assertAll( { assert workflow.success}, - { assert snapshot(workflow.out.bam, workflow.out.versions).match() }, + { assert snapshot(bam(workflow.out.bam[0][1]).getSamLinesMD5()).match("test_bam_dedup_stats_samtools_umicollapse_bam")}, // separate, because of linting error otherwise + { assert snapshot(workflow.out.versions).match("test_bam_dedup_stats_samtools_umicollapse_versions") }, { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, { assert workflow.out.dedup_stats.get(0).get(1) ==~ ".*_UMICollapse.log"}, diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test.snap b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test.snap index f2b1fb20d9a..9694c6d5f68 100644 --- a/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test.snap +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/main.nf.test.snap @@ -4,69 +4,57 @@ [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.stats:md5,498621f92e86d55e4f7ae93170e6e733" ] ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.8.4", + "nextflow": "24.10.1" }, - "timestamp": "2024-09-16T08:04:02.179870196" + "timestamp": "2024-11-24T13:57:02.323104" }, - "test_bam_dedup_stats_samtools_umicollapse_flagstats": { + "test_bam_dedup_stats_samtools_umicollapse_versions": { "content": [ [ - [ - { - "id": "test", - "single_end": false - }, - "test.flagstat:md5,18d602435a02a4d721b78d1812622159" - ] + "versions.yml:md5,20605eb79c410c0ed179ba660d82f75b", + "versions.yml:md5,23617661d2c899996bee2b05db027e25", + "versions.yml:md5,657bce03545b4c57f9c5fc4314bf85f7", + "versions.yml:md5,e02a62a393a833778e16542eeed0d148", + "versions.yml:md5,ef00762e264b99ac45713dc0dedf4060" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.10.1" }, - "timestamp": "2024-04-09T17:05:48.69612524" + "timestamp": "2024-11-25T18:39:15.637444" }, - "sarscov2_bam_bai": { + "test_bam_dedup_stats_samtools_umicollapse_flagstats": { "content": [ [ [ { - "id": "test", - "single_end": false + "id": "test" }, - "test.dedup.bam:md5,3e2ae4701e3d2ca074ea878a314a3e4f" + "test.flagstat:md5,18d602435a02a4d721b78d1812622159" ] - ], - [ - "versions.yml:md5,20605eb79c410c0ed179ba660d82f75b", - "versions.yml:md5,23617661d2c899996bee2b05db027e25", - "versions.yml:md5,657bce03545b4c57f9c5fc4314bf85f7", - "versions.yml:md5,e02a62a393a833778e16542eeed0d148", - "versions.yml:md5,ef00762e264b99ac45713dc0dedf4060" ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.8.4", + "nextflow": "24.10.1" }, - "timestamp": "2024-10-22T10:44:38.266860983" + "timestamp": "2024-11-24T13:57:02.366866" }, "test_bam_dedup_stats_samtools_umicollapse_idxstats": { "content": [ [ [ { - "id": "test", - "single_end": false + "id": "test" }, "test.idxstats:md5,85d20a901eef23ca50c323638a2eb602" ] @@ -74,8 +62,18 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-24T13:57:02.410712" + }, + "test_bam_dedup_stats_samtools_umicollapse_bam": { + "content": [ + "b7be15ac7aae194b04bdbb56f3534495" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.10.1" }, - "timestamp": "2024-04-09T17:05:48.740441747" + "timestamp": "2024-11-25T18:39:15.613319" } } \ No newline at end of file diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/paired-end-umis.config b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/paired-end-umis.config index 602c026f058..8d58a861f4b 100644 --- a/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/paired-end-umis.config +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umicollapse/tests/paired-end-umis.config @@ -1,10 +1,6 @@ process { - withName: UMITOOLS_EXTRACT { - ext.args = '--bc-pattern="NNNN" --bc-pattern2="NNNN"' - } - withName: UMICOLLAPSE { ext.prefix = { "${meta.id}.dedup" } } -} \ No newline at end of file +} diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test index 9d38022b4bf..93e62485764 100644 --- a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test @@ -26,8 +26,8 @@ nextflow_workflow { input[0] = Channel.of([ [ id:'test'], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai', checkIfExists: true) ]) input[1] = val_get_dedup_stats """ @@ -41,6 +41,7 @@ nextflow_workflow { { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, { assert snapshot( + bam(workflow.out.bam[0][1]).getSamLinesMD5(), workflow.out.stats, workflow.out.flagstat, workflow.out.idxstats, @@ -61,8 +62,8 @@ nextflow_workflow { input[0] = Channel.of([ [ id:'test'], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.umi.sorted.bam.bai', checkIfExists: true) ]) input[1] = val_get_dedup_stats """ diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test.snap index d39f9129b9a..3b36135720b 100644 --- a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test.snap +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/tests/main.nf.test.snap @@ -1,12 +1,13 @@ { "sarscov2_bam_bai": { "content": [ + "b7be15ac7aae194b04bdbb56f3534495", [ [ { "id": "test" }, - "test.stats:md5,84891a894010aeb882c4092db9248d2c" + "test.stats:md5,41ba57a9b90b54587e7d154e5405ea5e" ] ], [ @@ -14,7 +15,7 @@ { "id": "test" }, - "test.flagstat:md5,0bb716e40fae381b97484b58e0b16efe" + "test.flagstat:md5,18d602435a02a4d721b78d1812622159" ] ], [ @@ -22,7 +23,7 @@ { "id": "test" }, - "test.idxstats:md5,1adb27b52d4d64b826f48b59d61dcd4d" + "test.idxstats:md5,85d20a901eef23ca50c323638a2eb602" ] ], [ @@ -34,10 +35,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.8.4", + "nextflow": "24.10.1" }, - "timestamp": "2024-09-16T08:04:23.444693448" + "timestamp": "2024-11-25T17:23:13.841219" }, "sarscov2_bam_bai - stub": { "content": [ From 219299d7fc22a55cd3a07c2122044e7ca24b815f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Famke=20B=C3=A4uerle?= <45968370+famosab@users.noreply.github.com> Date: Fri, 29 Nov 2024 14:59:31 +0100 Subject: [PATCH 07/12] Add module `muse/call` (#5630) * start work on muse/call * correct command and add snap * prettier * exclude conda * change to bioconda * update snap * add conda test * update meta * change process label * add indices and update meta * update containers and version extraction --- modules/nf-core/muse/call/environment.yml | 5 ++ modules/nf-core/muse/call/main.nf | 50 +++++++++++++ modules/nf-core/muse/call/meta.yml | 72 +++++++++++++++++++ modules/nf-core/muse/call/tests/main.nf.test | 72 +++++++++++++++++++ .../nf-core/muse/call/tests/main.nf.test.snap | 68 ++++++++++++++++++ modules/nf-core/muse/call/tests/tags.yml | 2 + 6 files changed, 269 insertions(+) create mode 100644 modules/nf-core/muse/call/environment.yml create mode 100644 modules/nf-core/muse/call/main.nf create mode 100644 modules/nf-core/muse/call/meta.yml create mode 100644 modules/nf-core/muse/call/tests/main.nf.test create mode 100644 modules/nf-core/muse/call/tests/main.nf.test.snap create mode 100644 modules/nf-core/muse/call/tests/tags.yml diff --git a/modules/nf-core/muse/call/environment.yml b/modules/nf-core/muse/call/environment.yml new file mode 100644 index 00000000000..5bc34c10360 --- /dev/null +++ b/modules/nf-core/muse/call/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::muse=2.1.2" diff --git a/modules/nf-core/muse/call/main.nf b/modules/nf-core/muse/call/main.nf new file mode 100644 index 00000000000..b4559761ab0 --- /dev/null +++ b/modules/nf-core/muse/call/main.nf @@ -0,0 +1,50 @@ +process MUSE_CALL { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9f/9f0ebb574ef5eed2a6e034f1b2feea6c252d1ab0c8bc5135a669059aa1f4d2ca/data': + 'community.wave.seqera.io/library/muse:6637291dcbb0bdb8' }" + + input: + tuple val(meta), path(tumor_bam), path(tumor_bai), path(normal_bam), path(normal_bai) + tuple val(meta2), path(reference) + + output: + tuple val(meta), path("*.MuSE.txt"), emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + MuSE \\ + call \\ + $args \\ + -f $reference \\ + -O ${prefix} \\ + -n $task.cpus \\ + $tumor_bam \\ + $normal_bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MuSE: \$( MuSE --version | sed -e "s/MuSE, version //g" ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.MuSE.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MuSE: \$( MuSE --version | sed -e "s/MuSE, version //g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/muse/call/meta.yml b/modules/nf-core/muse/call/meta.yml new file mode 100644 index 00000000000..6733677dbdc --- /dev/null +++ b/modules/nf-core/muse/call/meta.yml @@ -0,0 +1,72 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "muse_call" +description: pre-filtering and calculating position-specific summary statistics using + the Markov substitution model +keywords: + - variant calling + - somatic + - wgs + - wxs + - vcf +tools: + - "MuSE": + description: "Somatic point mutation caller based on Markov substitution model + for molecular evolution" + homepage: "https://bioinformatics.mdanderson.org/public-software/muse/" + documentation: "https://github.com/wwylab/MuSE" + tool_dev_url: "https://github.com/wwylab/MuSE" + doi: "10.1101/gr.278456.123" + licence: ["https://github.com/danielfan/MuSE/blob/master/LICENSE"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - tumor_bam: + type: file + description: Sorted tumor BAM file + pattern: "*.bam" + - tumor_bai: + type: file + description: Index file for the tumor BAM file + pattern: "*.bai" + - normal_bam: + type: file + description: Sorted matched normal BAM file + pattern: "*.bam" + - normal_bai: + type: file + description: Index file for the normal BAM file + pattern: "*.bai" + - - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. `[ id:'test' ]` + - reference: + type: file + description: reference genome file + pattern: ".fasta" +output: + - txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.MuSE.txt": + type: file + description: position-specific summary statistics + pattern: "*.MuSE.txt" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@famosab" +maintainers: + - "@famosab" diff --git a/modules/nf-core/muse/call/tests/main.nf.test b/modules/nf-core/muse/call/tests/main.nf.test new file mode 100644 index 00000000000..b5e441ec32d --- /dev/null +++ b/modules/nf-core/muse/call/tests/main.nf.test @@ -0,0 +1,72 @@ +nextflow_process { + + name "Test Process MUSE_CALL" + script "../main.nf" + process "MUSE_CALL" + + tag "modules" + tag "modules_nfcore" + tag "muse" + tag "muse/call" + + test("human - bam") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [ + [ id:'reference' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("human - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [ + [ id:'reference' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/muse/call/tests/main.nf.test.snap b/modules/nf-core/muse/call/tests/main.nf.test.snap new file mode 100644 index 00000000000..ead8906a9ce --- /dev/null +++ b/modules/nf-core/muse/call/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "human - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.MuSE.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,de7c8f535f5b17473ed6aab68f1d70c1" + ], + "txt": [ + [ + { + "id": "test" + }, + "test.MuSE.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,de7c8f535f5b17473ed6aab68f1d70c1" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-29T14:30:48.292828" + }, + "human - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.MuSE.txt:md5,3a38ee9131a217cc56199bd4a6b18e1d" + ] + ], + "1": [ + "versions.yml:md5,de7c8f535f5b17473ed6aab68f1d70c1" + ], + "txt": [ + [ + { + "id": "test" + }, + "test.MuSE.txt:md5,3a38ee9131a217cc56199bd4a6b18e1d" + ] + ], + "versions": [ + "versions.yml:md5,de7c8f535f5b17473ed6aab68f1d70c1" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-29T14:30:32.522553" + } +} \ No newline at end of file diff --git a/modules/nf-core/muse/call/tests/tags.yml b/modules/nf-core/muse/call/tests/tags.yml new file mode 100644 index 00000000000..4e04a91a4af --- /dev/null +++ b/modules/nf-core/muse/call/tests/tags.yml @@ -0,0 +1,2 @@ +muse/call: + - "modules/nf-core/muse/call/**" From 081e10ea2b4fd40c61ed49a424dd69a44450952d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlia=20Mir=20Pedrol?= Date: Fri, 29 Nov 2024 16:54:15 +0100 Subject: [PATCH 08/12] Remove unused code from utils_nfcore_pipeline swf (#7094) remove unused code from utils_nfcore_pipeline swf --- .../nf-core/utils_nfcore_pipeline/main.nf | 42 --------------- .../tests/main.function.nf.test | 52 ------------------- .../tests/main.function.nf.test.snap | 30 ----------- 3 files changed, 124 deletions(-) diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index 4cd33626fb1..1c912124767 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -56,21 +56,6 @@ def checkProfileProvided(nextflow_cli_args) { } } -// -// Citation string for pipeline -// -def workflowCitation() { - def temp_doi_ref = "" - def manifest_doi = workflow.manifest.doi.tokenize(",") - // Handling multiple DOIs - // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers - // Removing ` ` since the manifest.doi is a string and not a proper list - manifest_doi.each { doi_ref -> - temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" - } - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + temp_doi_ref + "\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" -} - // // Generate workflow version string // @@ -150,33 +135,6 @@ def paramsSummaryMultiqc(summary_params) { return yaml_file_text } -// -// nf-core logo -// -def nfCoreLogo(monochrome_logs=true) { - def colors = logColours(monochrome_logs) as Map - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) -} - -// -// Return dashed line -// -def dashedLine(monochrome_logs=true) { - def colors = logColours(monochrome_logs) as Map - return "-${colors.dim}----------------------------------------------------${colors.reset}-" -} - // // ANSII colours used for terminal logging // diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test index 1dc317f8f7b..e43d208b1b0 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -41,58 +41,6 @@ nextflow_function { } } - test("Test Function workflowCitation") { - - function "workflowCitation" - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - - test("Test Function nfCoreLogo") { - - function "nfCoreLogo" - - when { - function { - """ - input[0] = false - """ - } - } - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - - test("Test Function dashedLine") { - - function "dashedLine" - - when { - function { - """ - input[0] = false - """ - } - } - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - test("Test Function without logColours") { function "logColours" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap index 1037232c9e4..02c67014139 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -17,26 +17,6 @@ }, "timestamp": "2024-02-28T12:02:59.729647" }, - "Test Function nfCoreLogo": { - "content": [ - "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:10.562934" - }, - "Test Function workflowCitation": { - "content": [ - "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:07.019761" - }, "Test Function without logColours": { "content": [ { @@ -95,16 +75,6 @@ }, "timestamp": "2024-02-28T12:03:17.969323" }, - "Test Function dashedLine": { - "content": [ - "-\u001b[2m----------------------------------------------------\u001b[0m-" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:14.366181" - }, "Test Function with logColours": { "content": [ { From e69c465bede27c02ade4f0acec51f10732ebbe6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20M=C3=B6bbs?= <64787947+alanmmobbs93@users.noreply.github.com> Date: Fri, 29 Nov 2024 13:23:33 -0300 Subject: [PATCH 09/12] fix linting on meta.yml files (#7121) --- modules/nf-core/nacho/normalize/meta.yml | 52 ++++++++--------- modules/nf-core/nacho/qc/meta.yml | 72 ++++++++++++------------ 2 files changed, 62 insertions(+), 62 deletions(-) diff --git a/modules/nf-core/nacho/normalize/meta.yml b/modules/nf-core/nacho/normalize/meta.yml index 8fddb762e3a..96e9a541a72 100644 --- a/modules/nf-core/nacho/normalize/meta.yml +++ b/modules/nf-core/nacho/normalize/meta.yml @@ -21,7 +21,7 @@ tools: homepage: https://github.com/mcanouil/NACHO documentation: https://cran.r-project.org/web/packages/NACHO/vignettes/NACHO.html doi: "10.1093/bioinformatics/btz647" - licence: [ "GPL-3.0" ] + licence: ["GPL-3.0"] identifier: "" args_id: "$args" @@ -49,34 +49,34 @@ input: output: - normalized_counts: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - "normalized_counts.tsv": - type: file - description: | - Tab-separated file with gene normalized counts for the samples - pattern: "normalized_counts.tsv" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "normalized_counts.tsv": + type: file + description: | + Tab-separated file with gene normalized counts for the samples + pattern: "normalized_counts.tsv" - normalized_counts_wo_HK: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - "normalized_counts_wo_HKnorm.tsv": - type: file - description: | - Tab-separated file with gene normalized counts for the samples, without housekeeping genes. - pattern: "normalized_counts_wo_HKnorm.tsv" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "normalized_counts_wo_HKnorm.tsv": + type: file + description: | + Tab-separated file with gene normalized counts for the samples, without housekeeping genes. + pattern: "normalized_counts_wo_HKnorm.tsv" - versions: - - "versions.yml": - type: file - description: | - File containing software versions - pattern: "versions.yml" + - "versions.yml": + type: file + description: | + File containing software versions + pattern: "versions.yml" authors: - "@alanmmobbs93" diff --git a/modules/nf-core/nacho/qc/meta.yml b/modules/nf-core/nacho/qc/meta.yml index f3c14934bd4..6d5aaadc6c9 100644 --- a/modules/nf-core/nacho/qc/meta.yml +++ b/modules/nf-core/nacho/qc/meta.yml @@ -20,7 +20,7 @@ tools: homepage: https://github.com/mcanouil/NACHO documentation: https://cran.r-project.org/web/packages/NACHO/vignettes/NACHO.html doi: "10.1093/bioinformatics/btz647" - licence: [ "GPL-3.0" ] + licence: ["GPL-3.0"] identifier: "" input: - - meta: @@ -45,44 +45,44 @@ input: Comma-separated file with 3 columns: RCC_FILE, RCC_FILE_NAME, and SAMPLE_ID output: - nacho_qc_reports: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - "*.html": - type: file - description: | - HTML report - pattern: "*.html" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.html": + type: file + description: | + HTML report + pattern: "*.html" - nacho_qc_png: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - "*_mqc.png": - type: file - description: | - Output PNG files - pattern: "*_mqc.png" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*_mqc.png": + type: file + description: | + Output PNG files + pattern: "*_mqc.png" - nacho_qc_txt: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - "*_mqc.txt": - type: file - description: | - Plain text reports - pattern: "*_mqc.txt" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*_mqc.txt": + type: file + description: | + Plain text reports + pattern: "*_mqc.txt" - versions: - - "versions.yml": - type: file - description: | - File containing software versions - pattern: "versions.yml" + - "versions.yml": + type: file + description: | + File containing software versions + pattern: "versions.yml" authors: - "@alanmmobbs93" maintainers: From c1a6b176601ee6608c4fcd7e3ed057891f34f8a2 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Fri, 29 Nov 2024 17:22:12 +0000 Subject: [PATCH 10/12] Add name to SYLPH_SKETCH module tests (#7122) --- modules/nf-core/sylph/sketch/tests/main.nf.test | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/nf-core/sylph/sketch/tests/main.nf.test b/modules/nf-core/sylph/sketch/tests/main.nf.test index 004f5137f69..f5e4d5887b7 100644 --- a/modules/nf-core/sylph/sketch/tests/main.nf.test +++ b/modules/nf-core/sylph/sketch/tests/main.nf.test @@ -1,5 +1,6 @@ nextflow_process { + name "Test Process SYLPH_SKETCH" script "../main.nf" process "SYLPH_SKETCH" tag "modules" From bd5f75ccaf2345269810e66e85de8a70e4de8764 Mon Sep 17 00:00:00 2001 From: Sateesh_Peri <33637490+sateeshperi@users.noreply.github.com> Date: Sat, 30 Nov 2024 11:59:38 +0530 Subject: [PATCH 11/12] Fastq align dedup bwameth (#7007) * init bwameth subworkflow * update output channels * bwameth single, paired-end default, skip_dedup tests * add GPU tests * separate GPU tests * add subworkflow GPU test path to test.yml include * add subworkflow GPU test path to test.yml include * add subworkflow GPU test path to test.yml include * use more descriptive collect variables than it * rename file to be explicit * separate profile exclusions * add gpu tag * rm old test.yml * add gpu test path to gpu-tests.yml:ci * Added log_level: DEBUG * Setup CI for debug * add more debug steps * usman's fix for exclude tags * Updated fail condition * ci sync to master * fix filtering by usman * Removed --changed-since when tags are supplied * Enabled more checks and added a non-gpu module * Two fixes * Now using paths * Fixed paths * Fixed typo * Now pass all paths instead of matrix * Added confirm pass, cleaned dispatch and removed DEBUG * Renamed all passes to confirm-pass * Now using adamrtalbot/detect-nf-test-changes * small ch format fix --------- Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com> Co-authored-by: Usman Rashid --- .github/actions/nf-test-action/action.yml | 11 +- .github/workflows/gpu-tests.yml | 43 ++-- .github/workflows/nf-test.yml | 91 +++++-- modules/nf-core/gffread/tests/main.nf.test | 3 +- .../nf-core/fastq_align_dedup_bwameth/main.nf | 163 +++++++++++++ .../fastq_align_dedup_bwameth/meta.yml | 116 +++++++++ .../tests/gpu.nf.test | 132 +++++++++++ .../tests/gpu.nf.test.snap | 149 ++++++++++++ .../tests/main.nf.test | 179 ++++++++++++++ .../tests/main.nf.test.snap | 224 ++++++++++++++++++ .../tests/nextflow.config | 14 ++ 11 files changed, 1078 insertions(+), 47 deletions(-) create mode 100644 subworkflows/nf-core/fastq_align_dedup_bwameth/main.nf create mode 100644 subworkflows/nf-core/fastq_align_dedup_bwameth/meta.yml create mode 100644 subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test create mode 100644 subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test.snap create mode 100644 subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test create mode 100644 subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test.snap create mode 100644 subworkflows/nf-core/fastq_align_dedup_bwameth/tests/nextflow.config diff --git a/.github/actions/nf-test-action/action.yml b/.github/actions/nf-test-action/action.yml index 8cd79f21680..a6f2532f926 100644 --- a/.github/actions/nf-test-action/action.yml +++ b/.github/actions/nf-test-action/action.yml @@ -10,9 +10,9 @@ inputs: total_shards: description: "Total number of test shards(NOT the total number of matrix jobs)" required: true - tags: - description: "Tags to test (`[,...]`)" - required: false + paths: + description: "Test paths" + required: true runs: using: "composite" @@ -72,7 +72,6 @@ runs: env: SENTIEON_LICSRVR_IP: ${{ env.SENTIEON_LICSRVR_IP }} SENTIEON_AUTH_MECH: "GitHub Actions - token" - TAGS: ${{ inputs.tags && format('--tag {0}', inputs.tags) || '' }} run: | NFT_WORKDIR=~ \ nf-test test \ @@ -80,11 +79,9 @@ runs: --tap=test.tap \ --verbose \ --ci \ - --changed-since HEAD^ \ --shard ${{ inputs.shard }}/${{ inputs.total_shards }} \ --filter process,workflow \ - --follow-dependencies \ - ${{ env.TAGS }} + ${{ inputs.paths }} # TODO If no test.tap, then make one to spoof? - uses: pcolby/tap-summary@0959cbe1d4422e62afc65778cdaea6716c41d936 # v1 diff --git a/.github/workflows/gpu-tests.yml b/.github/workflows/gpu-tests.yml index 34206298769..78667065a3b 100644 --- a/.github/workflows/gpu-tests.yml +++ b/.github/workflows/gpu-tests.yml @@ -6,9 +6,6 @@ on: - "renovate/**" # branches Renovate creates pull_request: branches: [master] - paths: - - ".github/workflows/gpu-tests.yml" - - "modules/nf-core/parabricks/**" merge_group: types: [checks_requested] branches: [master] @@ -16,11 +13,8 @@ on: inputs: runners: description: "Runners to test on" - type: choice - options: - - "ubuntu-latest" - - "self-hosted" - default: "self-hosted" + type: string + default: "gpu" # Cancel if a newer run is started concurrency: @@ -43,7 +37,7 @@ jobs: runs-on: ubuntu-latest outputs: # Expose detected tags as 'modules' and 'workflows' output variables - paths: ${{ steps.outputs.outputs.components }} + paths: ${{ steps.list.outputs.components }} modules: ${{ steps.outputs.outputs.modules }} subworkflows: ${{ steps.outputs.outputs.subworkflows}} # Prod for version bumping @@ -60,11 +54,11 @@ jobs: - name: List nf-test files id: list - uses: adamrtalbot/detect-nf-test-changes@6bf6fd9fe0fb63a0362fb0e09de5acb6d055a754 # v0.0.5 + uses: adamrtalbot/detect-nf-test-changes@de3c3c8e113031b4f15a3c1104b5f135e8346997 # v0.0.6 with: head: ${{ github.sha }} base: ${{ github.event.pull_request.base.sha || github.event.merge_group.base_sha }} - n_parents: 2 + n_parents: 0 tags: "gpu" - name: Separate modules and subworkflows @@ -74,21 +68,20 @@ jobs: echo subworkflows=$(echo '${{ steps.list.outputs.components }}' | jq '. | map(select(contains("subworkflows"))) | map(gsub("subworkflows/nf-core/"; ""))') >> $GITHUB_OUTPUT - name: debug run: | - echo ${{ steps.outputs.outputs.components }} + echo ${{ steps.list.outputs.components }} echo ${{ steps.outputs.outputs.modules }} echo ${{ steps.outputs.outputs.subworkflows }} nf-test-gpu: runs-on: "gpu" + name: "GPU | ${{ matrix.profile }} | ${{ matrix.shard }}" needs: nf-test-changes - if: ${{ fromJSON(needs.nf-test-changes.outputs.paths) != '[]' || needs.nf-test-changes.outputs.paths != '' }} - name: "GPU | ${{ matrix.tags}} | ${{ matrix.profile }} | ${{ matrix.shard }}" + if: ${{ needs.nf-test-changes.outputs.modules != '[]' || needs.nf-test-changes.outputs.subworkflows != '[]' }} strategy: fail-fast: false matrix: shard: [1, 2] profile: [docker_self_hosted, singularity] # conda? - tags: ${{ fromJSON(needs.nf-test-changes.outputs.modules) && fromJSON(needs.nf-test-changes.outputs.subworkflows) }} env: NXF_ANSI_LOG: false TOTAL_SHARDS: 2 @@ -108,4 +101,22 @@ jobs: profile: ${{ matrix.profile }},gpu shard: ${{ matrix.shard }} total_shards: ${{ env.TOTAL_SHARDS }} - tags: ${{matrix.tags}},gpu + paths: "${{ join(fromJson(needs.nf-test-changes.outputs.paths), ' ') }}" + + confirm-pass: + runs-on: ubuntu-latest + needs: [nf-test-gpu] + if: always() + steps: + - name: All tests ok + if: ${{ success() || !contains(needs.*.result, 'failure') }} + run: exit 0 + - name: One or more tests failed + if: ${{ contains(needs.*.result, 'failure') }} + run: exit 1 + + - name: debug-print + if: always() + run: | + echo "toJSON(needs) = ${{ toJSON(needs) }}" + echo "toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml index 5445a444531..f8c71275bf9 100644 --- a/.github/workflows/nf-test.yml +++ b/.github/workflows/nf-test.yml @@ -35,45 +35,90 @@ env: NXF_VER: "24.10.1" jobs: + nf-test-changes: + name: nf-test-changes + runs-on: ubuntu-latest + outputs: + # Expose detected tags as 'modules' and 'workflows' output variables + paths: ${{ steps.list.outputs.components }} + modules: ${{ steps.outputs.outputs.modules }} + subworkflows: ${{ steps.outputs.outputs.subworkflows}} + # Prod for version bumping + steps: + - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner + run: | + ls -la ./ + rm -rf ./* || true + rm -rf ./.??* || true + ls -la ./ + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + with: + fetch-depth: 0 + + - name: List nf-test files + id: list + uses: adamrtalbot/detect-nf-test-changes@de3c3c8e113031b4f15a3c1104b5f135e8346997 # v0.0.6 + with: + head: ${{ github.sha }} + base: ${{ github.event.pull_request.base.sha || github.event.merge_group.base_sha }} + n_parents: 0 + exclude_tags: "gpu" + + - name: Separate modules and subworkflows + id: outputs + run: | + echo modules=$(echo '${{ steps.list.outputs.components }}' | jq -c '. | map(select(contains("modules"))) | map(gsub("modules/nf-core/"; ""))') >> $GITHUB_OUTPUT + echo subworkflows=$(echo '${{ steps.list.outputs.components }}' | jq '. | map(select(contains("subworkflows"))) | map(gsub("subworkflows/nf-core/"; ""))') >> $GITHUB_OUTPUT + - name: debug + run: | + echo ${{ steps.list.outputs.components }} + echo ${{ steps.outputs.outputs.modules }} + echo ${{ steps.outputs.outputs.subworkflows }} nf-test: runs-on: ${{ github.event.inputs.runners || 'self-hosted' }} - # NOTE I think this is the cleanest way to get them organized - # process | conda | 1 - # process | conda | 2 - # process | conda | 3 - # process | docker_self_hosted | 1 - # ... - # workflow | singularity | 3 name: "${{ matrix.profile }} | ${{ matrix.shard }}" - # TODO - # needs: get-number-of-shards - # if: ${{ fromJSON(needs.get-number-of-shards.outputs.shards) != fromJSON('["1", "0"]') }} + needs: nf-test-changes + if: ${{ needs.nf-test-changes.outputs.modules != '[]' || needs.nf-test-changes.outputs.subworkflows != '[]' }} strategy: fail-fast: false matrix: - # NOTE We could split these, but there's probably going to be more process tests than workflow tests, so we're just going to combine them all and bump up the shards for now - # NOTE The name of the test would be name: "${{ matrix.filter }} | ${{ matrix.profile }} | ${{ matrix.shard }}" - # filter: [process, workflow] - profile: [conda, docker_self_hosted, singularity] shard: [1, 2, 3, 4, 5] + profile: [conda, docker_self_hosted, singularity] env: - # FIXME Bumping them up to make the transition smooth, then we can throttle them back + NXF_ANSI_LOG: false TOTAL_SHARDS: 5 - SENTIEON_LICENSE_MESSAGE: ${{ secrets.SENTIEON_LICENSE_MESSAGE }} - SENTIEON_ENCRYPTION_KEY: ${{ secrets.SENTIEON_ENCRYPTION_KEY }} + steps: - - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner - run: | - ls -la ./ - rm -rf ./* || true - rm -rf ./.??* || true - ls -la ./ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 with: fetch-depth: 0 - name: Run nf-test Action uses: ./.github/actions/nf-test-action + env: + SENTIEON_ENCRYPTION_KEY: ${{ secrets.SENTIEON_ENCRYPTION_KEY }} + SENTIEON_LICENSE_MESSAGE: ${{ secrets.SENTIEON_LICENSE_MESSAGE }} + SENTIEON_LICSRVR_IP: ${{ secrets.SENTIEON_LICSRVR_IP }} + SENTIEON_AUTH_MECH: "GitHub Actions - token" with: profile: ${{ matrix.profile }} shard: ${{ matrix.shard }} total_shards: ${{ env.TOTAL_SHARDS }} + paths: "${{ join(fromJson(needs.nf-test-changes.outputs.paths), ' ') }}" + + confirm-pass: + runs-on: ubuntu-latest + needs: [nf-test] + if: always() + steps: + - name: All tests ok + if: ${{ success() || !contains(needs.*.result, 'failure') }} + run: exit 0 + - name: One or more tests failed + if: ${{ contains(needs.*.result, 'failure') }} + run: exit 1 + + - name: debug-print + if: always() + run: | + echo "toJSON(needs) = ${{ toJSON(needs) }}" + echo "toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" diff --git a/modules/nf-core/gffread/tests/main.nf.test b/modules/nf-core/gffread/tests/main.nf.test index 4cd13dcd33b..d039f367c15 100644 --- a/modules/nf-core/gffread/tests/main.nf.test +++ b/modules/nf-core/gffread/tests/main.nf.test @@ -23,6 +23,7 @@ nextflow_process { file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) ] input[1] = [] + """ } } @@ -220,4 +221,4 @@ nextflow_process { } -} +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/main.nf b/subworkflows/nf-core/fastq_align_dedup_bwameth/main.nf new file mode 100644 index 00000000000..c0cc67b83c7 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/main.nf @@ -0,0 +1,163 @@ +include { BWAMETH_ALIGN } from '../../../modules/nf-core/bwameth/align/main' +include { PARABRICKS_FQ2BAMMETH } from '../../../modules/nf-core/parabricks/fq2bammeth/main' +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_ALIGNMENTS } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main' +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' +include { PICARD_MARKDUPLICATES } from '../../../modules/nf-core/picard/markduplicates/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_DEDUPLICATED } from '../../../modules/nf-core/samtools/index/main' +include { METHYLDACKEL_EXTRACT } from '../../../modules/nf-core/methyldackel/extract/main' +include { METHYLDACKEL_MBIAS } from '../../../modules/nf-core/methyldackel/mbias/main' + +workflow FASTQ_ALIGN_DEDUP_BWAMETH { + + take: + ch_reads // channel: [ val(meta), [ reads ] ] + ch_fasta // channel: [ val(meta), [ fasta ] ] + ch_fasta_index // channel: [ val(meta), [ fasta index ] ] + ch_bwameth_index // channel: [ val(meta), [ bwameth index ] ] + skip_deduplication // boolean: whether to deduplicate alignments + + main: + + ch_alignment = Channel.empty() + ch_alignment_index = Channel.empty() + ch_samtools_flagstat = Channel.empty() + ch_samtools_stats = Channel.empty() + ch_methydackel_extract_bedgraph = Channel.empty() + ch_methydackel_extract_methylkit = Channel.empty() + ch_methydackel_mbias = Channel.empty() + ch_picard_metrics = Channel.empty() + ch_multiqc_files = Channel.empty() + ch_versions = Channel.empty() + + /* + * Align with bwameth + */ + if (params.use_gpu) { + /* + * Align with parabricks GPU enabled fq2bammeth implementation of bwameth + */ + PARABRICKS_FQ2BAMMETH ( + ch_reads, + ch_fasta, + ch_bwameth_index, + [] // known sites + ) + ch_alignment = PARABRICKS_FQ2BAMMETH.out.bam + ch_versions = ch_versions.mix(PARABRICKS_FQ2BAMMETH.out.versions) + } else { + /* + * Align with CPU version of bwameth + */ + BWAMETH_ALIGN ( + ch_reads, + ch_fasta, + ch_bwameth_index + ) + ch_alignment = BWAMETH_ALIGN.out.bam + ch_versions = BWAMETH_ALIGN.out.versions + } + + /* + * Sort raw output BAM + */ + SAMTOOLS_SORT ( + ch_alignment, + [[:],[]] // [ [meta], [fasta]] + ) + ch_alignment = SAMTOOLS_SORT.out.bam + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions) + + /* + * Run samtools index on alignment + */ + SAMTOOLS_INDEX_ALIGNMENTS ( + ch_alignment + ) + ch_alignment_index = SAMTOOLS_INDEX_ALIGNMENTS.out.bai + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_ALIGNMENTS.out.versions) + + /* + * Run samtools flagstat + */ + SAMTOOLS_FLAGSTAT ( + ch_alignment.join(ch_alignment_index) + ) + ch_samtools_flagstat = SAMTOOLS_FLAGSTAT.out.flagstat + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) + + /* + * Run samtools stats + */ + SAMTOOLS_STATS ( + ch_alignment.join(ch_alignment_index), + [[:],[]] // [ [meta], [fasta]] + ) + ch_samtools_stats = SAMTOOLS_STATS.out.stats + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) + + if (!skip_deduplication) { + /* + * Run Picard MarkDuplicates + */ + PICARD_MARKDUPLICATES ( + ch_alignment, + ch_fasta, + ch_fasta_index + ) + /* + * Run samtools index on deduplicated alignment + */ + SAMTOOLS_INDEX_DEDUPLICATED ( + PICARD_MARKDUPLICATES.out.bam + ) + ch_alignment = PICARD_MARKDUPLICATES.out.bam + ch_alignment_index = SAMTOOLS_INDEX_DEDUPLICATED.out.bai + ch_picard_metrics = PICARD_MARKDUPLICATES.out.metrics + ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_DEDUPLICATED.out.versions) + } + + /* + * Extract per-base methylation and plot methylation bias + */ + + METHYLDACKEL_EXTRACT ( + ch_alignment.join(ch_alignment_index), + ch_fasta.map{ meta, fasta_file -> fasta_file }, + ch_fasta_index.map{ meta, fasta_index -> fasta_index } + ) + ch_methydackel_extract_bedgraph = METHYLDACKEL_EXTRACT.out.bedgraph + ch_methydackel_extract_methylkit = METHYLDACKEL_EXTRACT.out.methylkit + ch_versions = ch_versions.mix(METHYLDACKEL_EXTRACT.out.versions) + + METHYLDACKEL_MBIAS ( + ch_alignment.join(ch_alignment_index), + ch_fasta.map{ meta, fasta_file -> fasta_file }, + ch_fasta_index.map{ meta, fasta_index -> fasta_index } + ) + ch_methydackel_mbias = METHYLDACKEL_MBIAS.out.txt + ch_versions = ch_versions.mix(METHYLDACKEL_MBIAS.out.versions) + + /* + * Collect MultiQC inputs + */ + ch_multiqc_files = ch_picard_metrics.collect{ meta, metrics -> metrics } + .mix(ch_samtools_flagstat.collect{ meta, flagstat -> flagstat }) + .mix(ch_samtools_stats.collect{ meta, stats -> stats }) + .mix(ch_methydackel_extract_bedgraph.collect{ meta, bedgraph -> bedgraph }) + .mix(ch_methydackel_mbias.collect{ meta, txt -> txt }) + + emit: + bam = ch_alignment // channel: [ val(meta), [ bam ] ] + bai = ch_alignment_index // channel: [ val(meta), [ bai ] ] + samtools_flagstat = ch_samtools_flagstat // channel: [ val(meta), [ flagstat ] ] + samtools_stats = ch_samtools_stats // channel: [ val(meta), [ stats ] ] + methydackel_extract_bedgraph = ch_methydackel_extract_bedgraph // channel: [ val(meta), [ bedgraph ] ] + methydackel_extract_methylkit = ch_methydackel_extract_methylkit // channel: [ val(meta), [ methylkit ] ] + methydackel_mbias = ch_methydackel_mbias // channel: [ val(meta), [ mbias ] ] + picard_metrics = ch_picard_metrics // channel: [ val(meta), [ metrics ] ] + multiqc = ch_multiqc_files // channel: [ *{html,txt} ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/meta.yml b/subworkflows/nf-core/fastq_align_dedup_bwameth/meta.yml new file mode 100644 index 00000000000..a66ea024e84 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/meta.yml @@ -0,0 +1,116 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fastq_align_dedup_bwameth" +description: Performs alignment of BS-Seq reads using bwameth or parabricks/fq2bammeth, sort and deduplicate +keywords: + - bwameth + - alignment + - 3-letter genome + - map + - methylation + - 5mC + - methylseq + - bisulphite + - bisulfite + - fastq + - bam +components: + - bwameth/align + - parabricks/fq2bammeth + - samtools/sort + - samtools/index + - samtools/flagstat + - samtools/stats + - picard/markduplicates + - samtools/index + - methyldackel/extract + - methyldackel/mbias +input: + - ch_reads: + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + Structure: [ val(meta), [ path(reads) ] ] + pattern: "*.{fastq,fastq.gz}" + - ch_fasta: + type: file + description: | + Structure: [ val(meta), path(fasta) ] + pattern: "*.{fa,fa.gz}" + - ch_fasta_index: + type: file + description: | + Structure: [ val(meta), path(fasta index) ] + - ch_bwameth_index: + description: | + Bismark genome index files + Structure: [ val(meta), path(index) ] + pattern: "BismarkIndex" + - skip_deduplication: + type: boolean + description: | + Skip deduplication of aligned reads +output: + - bam: + type: file + description: | + Channel containing BAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.bam" + - bai: + type: file + description: | + Channel containing indexed BAM (BAI) files + Structure: [ val(meta), path(bai) ] + pattern: "*.bai" + - samtools_flagstat: + type: file + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + pattern: "*.flagstat" + - samtools_stats: + type: file + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + pattern: "*.{stats}" + - methydackel_extract_bedgraph: + type: file + description: | + bedGraph file, containing per-base methylation metrics + Structure: [ val(meta), path(bedgraph) ] + pattern: "*.bedGraph" + - methydackel_extract_methylkit: + type: file + description: | + methylKit file, containing per-base methylation metrics + Structure: [ val(meta), path(methylKit) ] + pattern: "*.methylKit" + - methydackel_mbias: + type: file + description: | + Text file containing methylation bias + Structure: [ val(meta), path(mbias) ] + pattern: "*.{txt}" + - picard_metrics: + type: file + description: | + Duplicate metrics file generated by picard + Structure: [ val(meta), path(metrics) ] + pattern: "*.{metrics.txt}" + - multiqc: + type: file + description: | + Channel containing MultiQC report aggregating results across samples. + Structure: [ val(meta), path(multiqc_report.html) ] + pattern: "*.html" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@sateeshperi" +maintainers: + - "@sateeshperi" diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test new file mode 100644 index 00000000000..b156de6bc47 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test @@ -0,0 +1,132 @@ +nextflow_workflow { + + name "Test Subworkflow FASTQ_ALIGN_DEDUP_BWAMETH" + script "../main.nf" + workflow "FASTQ_ALIGN_DEDUP_BWAMETH" + config "./nextflow.config" + + tag "gpu" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_align_dedup_bwameth" + tag "bwameth/align" + tag "parabricks/fq2bammeth" + tag "samtools/sort" + tag "samtools/index" + tag "samtools/flagstat" + tag "samtools/stats" + tag "picard/markduplicates" + tag "samtools/index" + tag "methyldackel/extract" + tag "methyldackel/mbias" + tag "untar" + + setup { + run("UNTAR") { + script "../../../../modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/Bwameth_Index.tar.gz', checkIfExists: true) + ] + """ + } + } + } + + test("Params: parabricks/fq2bammeth single-end | use_gpu") { + + when { + params { + skip_deduplication = false + use_gpu = true + } + + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', checkIfExists: true) + ]) + input[1] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true) + ]) + input[2] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa.fai', checkIfExists: true) + ]) + input[3] = UNTAR.out.untar + input[4] = params.skip_deduplication + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.samtools_flagstat, + workflow.out.samtools_stats, + workflow.out.methydackel_extract_bedgraph, + workflow.out.methydackel_extract_methylkit, + workflow.out.methydackel_mbias, + workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name }, + workflow.out.multiqc.flatten().collect { path -> file(path).name }, + workflow.out.versions + ).match() } + ) + } + } + + test("Params: parabricks/fq2bammeth single-end | use_gpu | skip_deduplication") { + + when { + params { + skip_deduplication = true + use_gpu = true + } + + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', checkIfExists: true) + ]) + input[1] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true) + ]) + input[2] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa.fai', checkIfExists: true) + ]) + input[3] = UNTAR.out.untar + input[4] = params.skip_deduplication + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.samtools_flagstat, + workflow.out.samtools_stats, + workflow.out.methydackel_extract_bedgraph, + workflow.out.methydackel_extract_methylkit, + workflow.out.methydackel_mbias, + workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name }, + workflow.out.multiqc.flatten().collect { path -> file(path).name }, + workflow.out.versions + ).match() } + ) + } + } + +} diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test.snap b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test.snap new file mode 100644 index 00000000000..a1602b3074a --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/gpu.nf.test.snap @@ -0,0 +1,149 @@ +{ + "Params: parabricks/fq2bammeth single-end | use_gpu": { + "content": [ + [ + "a7f7ca7b5eb503ab58790d64a0273ed6" + ], + [ + "test.markdup.sorted.bam.bai" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,897d500a710a56a7098172167fa71108" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,9aac964b859fda8239aa0eae16382d56" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.markdup.sorted_CpG.bedGraph:md5,f2fe02f180456f5f4922a2a8aa559fca" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.mbias.txt:md5,fce04d733e066d0b933cedc602e2af81" + ] + ], + [ + "test.markdup.sorted.MarkDuplicates.metrics.txt" + ], + [ + "test.flagstat", + "test.markdup.sorted.MarkDuplicates.metrics.txt", + "test.markdup.sorted_CpG.bedGraph", + "test.mbias.txt", + "test.stats" + ], + [ + "versions.yml:md5,36bd052d24ec766084f6aa2fb8a6ae4c", + "versions.yml:md5,45239309d0c40b5f0a56eba4347f09be", + "versions.yml:md5,4a6bb9a47d944ab197c823ae0ae61092", + "versions.yml:md5,8b72c7013fa6f632d28933b60ad1f2ea", + "versions.yml:md5,a80a57d29a4d72830f033bc0326b1abf", + "versions.yml:md5,b6492c12bfae23b6e279f4abfd4780e5", + "versions.yml:md5,baba90d5bd57679b913be2abd531ae15", + "versions.yml:md5,ddbe480ff81df55c6d95f911e7b6dc8a", + "versions.yml:md5,e9602257141b65a907ad9036e8a32a83" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-17T06:56:47.211389" + }, + "Params: parabricks/fq2bammeth single-end | use_gpu | skip_deduplication": { + "content": [ + [ + "a7f7ca7b5eb503ab58790d64a0273ed6" + ], + [ + "test.sorted.bam.bai" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,897d500a710a56a7098172167fa71108" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,9aac964b859fda8239aa0eae16382d56" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted_CpG.bedGraph:md5,b0cb426020f8beb45b4e8f09b9a17bfa" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.mbias.txt:md5,fce04d733e066d0b933cedc602e2af81" + ] + ], + [ + + ], + [ + "test.flagstat", + "test.mbias.txt", + "test.sorted_CpG.bedGraph", + "test.stats" + ], + [ + "versions.yml:md5,36bd052d24ec766084f6aa2fb8a6ae4c", + "versions.yml:md5,4a6bb9a47d944ab197c823ae0ae61092", + "versions.yml:md5,8b72c7013fa6f632d28933b60ad1f2ea", + "versions.yml:md5,a80a57d29a4d72830f033bc0326b1abf", + "versions.yml:md5,b6492c12bfae23b6e279f4abfd4780e5", + "versions.yml:md5,baba90d5bd57679b913be2abd531ae15", + "versions.yml:md5,e9602257141b65a907ad9036e8a32a83" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-17T06:57:34.41912" + } +} diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test new file mode 100644 index 00000000000..6b20d545684 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test @@ -0,0 +1,179 @@ +nextflow_workflow { + + name "Test Subworkflow FASTQ_ALIGN_DEDUP_BWAMETH" + script "../main.nf" + workflow "FASTQ_ALIGN_DEDUP_BWAMETH" + config "./nextflow.config" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_align_dedup_bwameth" + tag "bwameth/align" + tag "parabricks/fq2bammeth" + tag "samtools/sort" + tag "samtools/index" + tag "samtools/flagstat" + tag "samtools/stats" + tag "picard/markduplicates" + tag "samtools/index" + tag "methyldackel/extract" + tag "methyldackel/mbias" + tag "untar" + + setup { + run("UNTAR") { + script "../../../../modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/Bwameth_Index.tar.gz', checkIfExists: true) + ] + """ + } + } + } + + test("Params: bwameth single-end | default") { + + when { + params { + skip_deduplication = false + use_gpu = false + } + + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/SRR389222_sub1.fastq.gz', checkIfExists: true) + ]) + input[1] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true) + ]) + input[2] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa.fai', checkIfExists: true) + ]) + input[3] = UNTAR.out.untar + input[4] = params.skip_deduplication + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.samtools_flagstat, + workflow.out.samtools_stats, + workflow.out.methydackel_extract_bedgraph, + workflow.out.methydackel_extract_methylkit, + workflow.out.methydackel_mbias, + workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name }, + workflow.out.multiqc.flatten().collect { path -> file(path).name }, + workflow.out.versions + ).match() } + ) + } + } + + test("Params: bwameth paired-end | default") { + + when { + params { + skip_deduplication = false + use_gpu = false + } + + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/Ecoli_10K_methylated_R1.fastq.gz', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/Ecoli_10K_methylated_R2.fastq.gz', checkIfExists: true) + ]) + input[1] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true) + ]) + input[2] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa.fai', checkIfExists: true) + ]) + input[3] = UNTAR.out.untar + input[4] = params.skip_deduplication + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.samtools_flagstat, + workflow.out.samtools_stats, + workflow.out.methydackel_extract_bedgraph, + workflow.out.methydackel_extract_methylkit, + workflow.out.methydackel_mbias, + workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name }, + workflow.out.multiqc.flatten().collect { path -> file(path).name }, + workflow.out.versions + ).match() } + ) + } + } + + test("Params: bwameth paired-end | skip_deduplication") { + + when { + params { + skip_deduplication = true + use_gpu = false + } + + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/Ecoli_10K_methylated_R1.fastq.gz', checkIfExists: true), + file('https://github.com/nf-core/test-datasets/raw/methylseq/testdata/Ecoli_10K_methylated_R2.fastq.gz', checkIfExists: true) + ]) + input[1] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa', checkIfExists: true) + ]) + input[2] = Channel.of([ + [:], + file('https://github.com/nf-core/test-datasets/raw/methylseq/reference/genome.fa.fai', checkIfExists: true) + ]) + input[3] = UNTAR.out.untar + input[4] = params.skip_deduplication + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.samtools_flagstat, + workflow.out.samtools_stats, + workflow.out.methydackel_extract_bedgraph, + workflow.out.methydackel_extract_methylkit, + workflow.out.methydackel_mbias, + workflow.out.picard_metrics.collect { meta, metrics -> file(metrics).name }, + workflow.out.multiqc.flatten().collect { path -> file(path).name }, + workflow.out.versions + ).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test.snap new file mode 100644 index 00000000000..90c9601506b --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/main.nf.test.snap @@ -0,0 +1,224 @@ +{ + "Params: bwameth single-end | default": { + "content": [ + [ + "37ec1c6338cc3fee7ab1cb2d48dba38" + ], + [ + "test.markdup.sorted.bam.bai" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,897d500a710a56a7098172167fa71108" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,9aac964b859fda8239aa0eae16382d56" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.markdup.sorted_CpG.bedGraph:md5,f2fe02f180456f5f4922a2a8aa559fca" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.mbias.txt:md5,fce04d733e066d0b933cedc602e2af81" + ] + ], + [ + "test.markdup.sorted.MarkDuplicates.metrics.txt" + ], + [ + "test.flagstat", + "test.markdup.sorted.MarkDuplicates.metrics.txt", + "test.markdup.sorted_CpG.bedGraph", + "test.mbias.txt", + "test.stats" + ], + [ + "versions.yml:md5,36bd052d24ec766084f6aa2fb8a6ae4c", + "versions.yml:md5,45239309d0c40b5f0a56eba4347f09be", + "versions.yml:md5,4a6bb9a47d944ab197c823ae0ae61092", + "versions.yml:md5,8b72c7013fa6f632d28933b60ad1f2ea", + "versions.yml:md5,8edf3166176c863b88ba488f8b715aa3", + "versions.yml:md5,a80a57d29a4d72830f033bc0326b1abf", + "versions.yml:md5,b6492c12bfae23b6e279f4abfd4780e5", + "versions.yml:md5,baba90d5bd57679b913be2abd531ae15", + "versions.yml:md5,ddbe480ff81df55c6d95f911e7b6dc8a" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-17T05:42:12.81856451" + }, + "Params: bwameth paired-end | skip_deduplication": { + "content": [ + [ + "cf25656fffc044f2bb7d9f1b3686ecb4" + ], + [ + "test.sorted.bam.bai" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,4ff87d121ca174953734723938c99081" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,c753c72eb4e1c32f74afb1fbd932fe1f" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted_CpG.bedGraph:md5,285e492823182f5705bf0817e2d088b8" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.mbias.txt:md5,c1fda203c1b19aca2498efe0fd4cc9e3" + ] + ], + [ + + ], + [ + "test.flagstat", + "test.mbias.txt", + "test.sorted_CpG.bedGraph", + "test.stats" + ], + [ + "versions.yml:md5,36bd052d24ec766084f6aa2fb8a6ae4c", + "versions.yml:md5,4a6bb9a47d944ab197c823ae0ae61092", + "versions.yml:md5,8b72c7013fa6f632d28933b60ad1f2ea", + "versions.yml:md5,8edf3166176c863b88ba488f8b715aa3", + "versions.yml:md5,a80a57d29a4d72830f033bc0326b1abf", + "versions.yml:md5,b6492c12bfae23b6e279f4abfd4780e5", + "versions.yml:md5,baba90d5bd57679b913be2abd531ae15" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-17T05:10:43.907134648" + }, + "Params: bwameth paired-end | default": { + "content": [ + [ + "cf25656fffc044f2bb7d9f1b3686ecb4" + ], + [ + "test.markdup.sorted.bam.bai" + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,4ff87d121ca174953734723938c99081" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,c753c72eb4e1c32f74afb1fbd932fe1f" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.markdup.sorted_CpG.bedGraph:md5,c6c73e5abba70ac799500f592fec5c29" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.mbias.txt:md5,c1fda203c1b19aca2498efe0fd4cc9e3" + ] + ], + [ + "test.markdup.sorted.MarkDuplicates.metrics.txt" + ], + [ + "test.flagstat", + "test.markdup.sorted.MarkDuplicates.metrics.txt", + "test.markdup.sorted_CpG.bedGraph", + "test.mbias.txt", + "test.stats" + ], + [ + "versions.yml:md5,36bd052d24ec766084f6aa2fb8a6ae4c", + "versions.yml:md5,45239309d0c40b5f0a56eba4347f09be", + "versions.yml:md5,4a6bb9a47d944ab197c823ae0ae61092", + "versions.yml:md5,8b72c7013fa6f632d28933b60ad1f2ea", + "versions.yml:md5,8edf3166176c863b88ba488f8b715aa3", + "versions.yml:md5,a80a57d29a4d72830f033bc0326b1abf", + "versions.yml:md5,b6492c12bfae23b6e279f4abfd4780e5", + "versions.yml:md5,baba90d5bd57679b913be2abd531ae15", + "versions.yml:md5,ddbe480ff81df55c6d95f911e7b6dc8a" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-17T05:42:39.183331191" + } +} diff --git a/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/nextflow.config b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/nextflow.config new file mode 100644 index 00000000000..55385ec0621 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_dedup_bwameth/tests/nextflow.config @@ -0,0 +1,14 @@ +process { + withName: 'PARABRICKS_FQ2BAMMETH' { + ext.args = '--low-memory' + } + + withName: 'SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.sorted" } + } + + withName: 'PICARD_MARKDUPLICATES' { + ext.args = "--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --PROGRAM_RECORD_ID 'null' --TMP_DIR tmp" + ext.prefix = { "${meta.id}.markdup.sorted" } + } +} From 9a1e8bb6a5d205cf7807dcefca872a3314b2f3e6 Mon Sep 17 00:00:00 2001 From: Sateesh_Peri <33637490+sateeshperi@users.noreply.github.com> Date: Sat, 30 Nov 2024 12:51:49 +0530 Subject: [PATCH 12/12] use just MemoryUnit as per language server reco (#7127) --- modules/nf-core/bismark/align/main.nf | 2 +- modules/nf-core/hisat2/build/main.nf | 2 +- subworkflows/nf-core/utils_nfcore_pipeline/main.nf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/nf-core/bismark/align/main.nf b/modules/nf-core/bismark/align/main.nf index df8b7e0633b..4aaa1be2ae3 100644 --- a/modules/nf-core/bismark/align/main.nf +++ b/modules/nf-core/bismark/align/main.nf @@ -45,7 +45,7 @@ process BISMARK_ALIGN { // Check that we have enough memory try { - def tmem = (task.memory as nextflow.util.MemoryUnit).toBytes() + def tmem = (task.memory as MemoryUnit).toBytes() def mcore = (tmem / mem_per_multicore) as int ccore = Math.min(ccore, mcore) } catch (all) { diff --git a/modules/nf-core/hisat2/build/main.nf b/modules/nf-core/hisat2/build/main.nf index 37a3e456581..7a5f28ba5fa 100644 --- a/modules/nf-core/hisat2/build/main.nf +++ b/modules/nf-core/hisat2/build/main.nf @@ -33,7 +33,7 @@ process HISAT2_BUILD { def ss = '' def exon = '' def extract_exons = '' - def hisat2_build_memory = params.hisat2_build_memory ? (params.hisat2_build_memory as nextflow.util.MemoryUnit).toGiga() : 0 + def hisat2_build_memory = params.hisat2_build_memory ? (params.hisat2_build_memory as MemoryUnit).toGiga() : 0 if (avail_mem >= hisat2_build_memory) { log.info "[HISAT2 index build] At least ${hisat2_build_memory} GB available, so using splice sites and exons to build HISAT2 index" extract_exons = gtf ? "hisat2_extract_exons.py $gtf > ${gtf.baseName}.exons.txt" : "" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index 1c912124767..228dbff8979 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -299,7 +299,7 @@ def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdi def email_html = html_template.toString() // Render the sendmail template - def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as MemoryUnit def smail_fields = [email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields)