diff --git a/modules/nf-core/nacho/normalize/environment.yml b/modules/nf-core/nacho/normalize/environment.yml new file mode 100644 index 00000000000..9cf652c88fe --- /dev/null +++ b/modules/nf-core/nacho/normalize/environment.yml @@ -0,0 +1,12 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - conda-forge::r-dplyr=1.1.4 + - conda-forge::r-fs=1.6.4 + - conda-forge::r-ggplot2=3.4.4 + - conda-forge::r-nacho=2.0.6 + - conda-forge::r-optparse=1.7.5 + - conda-forge::r-readr=2.1.5 + - conda-forge::r-tidyr=1.3.0 diff --git a/modules/nf-core/nacho/normalize/main.nf b/modules/nf-core/nacho/normalize/main.nf new file mode 100644 index 00000000000..69cc49ec143 --- /dev/null +++ b/modules/nf-core/nacho/normalize/main.nf @@ -0,0 +1,59 @@ +process NACHO_NORMALIZE { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container 'community.wave.seqera.io/library/r-dplyr_r-fs_r-ggplot2_r-nacho_pruned:033bc017f5f36b6d' + + input: + tuple val(meta) , path(rcc_files, stageAs: "input/*") + tuple val(meta2), path(sample_sheet) + + output: + tuple val(meta), path("normalized_counts.tsv") , emit: normalized_counts + tuple val(meta), path("normalized_counts_wo_HKnorm.tsv"), emit: normalized_counts_wo_HK + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + nacho_norm.R \\ + --input_rcc_path input \\ + $args \\ + --input_samplesheet ${sample_sheet} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + r-nacho: \$(Rscript -e "library(NACHO); cat(as.character(packageVersion('NACHO')))") + r-dplyr: \$(Rscript -e "library(dplyr); cat(as.character(packageVersion('dplyr')))") + r-ggplot2: \$(Rscript -e "library(ggplot2); cat(as.character(packageVersion('ggplot2')))") + r-tidyr: \$(Rscript -e "library(tidyr); cat(as.character(packageVersion('tidyr')))") + r-readr: \$(Rscript -e "library(readr); cat(as.character(packageVersion('readr')))") + r-fs: \$(Rscript -e "library(fs); cat(as.character(packageVersion('fs')))") + r-optparse: \$(Rscript -e "library(optparse); cat(as.character(packageVersion('optparse')))") + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + """ + touch normalized_counts.tsv + touch normalized_counts_wo_HKnorm.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + r-nacho: \$(Rscript -e "library(NACHO); cat(as.character(packageVersion('NACHO')))") + r-dplyr: \$(Rscript -e "library(dplyr); cat(as.character(packageVersion('dplyr')))") + r-ggplot2: \$(Rscript -e "library(ggplot2); cat(as.character(packageVersion('ggplot2')))") + r-tidyr: \$(Rscript -e "library(tidyr); cat(as.character(packageVersion('tidyr')))") + r-readr: \$(Rscript -e "library(readr); cat(as.character(packageVersion('readr')))") + r-fs: \$(Rscript -e "library(fs); cat(as.character(packageVersion('fs')))") + r-optparse: \$(Rscript -e "library(optparse); cat(as.character(packageVersion('optparse')))") + END_VERSIONS + """ +} diff --git a/modules/nf-core/nacho/normalize/meta.yml b/modules/nf-core/nacho/normalize/meta.yml new file mode 100644 index 00000000000..8fddb762e3a --- /dev/null +++ b/modules/nf-core/nacho/normalize/meta.yml @@ -0,0 +1,84 @@ +--- +name: nacho_normalize +description: | + NACHO (NAnostring quality Control dasHbOard) is developed for NanoString nCounter data. + NanoString nCounter data is a messenger-RNA/micro-RNA (mRNA/miRNA) expression assay and works with fluorescent barcodes. + Each barcode is assigned a mRNA/miRNA, which can be counted after bonding with its target. + As a result each count of a specific barcode represents the presence of its target mRNA/miRNA. +keywords: + - nacho + - nanostring + - mRNA + - miRNA + - qc +tools: + - NACHO: + description: | + R package that uses two main functions to summarize and visualize NanoString RCC files, + namely: `load_rcc()` and `visualise()`. It also includes a function `normalise()`, which (re)calculates + sample specific size factors and normalises the data. + For more information `vignette("NACHO")` and `vignette("NACHO-analysis")` + homepage: https://github.com/mcanouil/NACHO + documentation: https://cran.r-project.org/web/packages/NACHO/vignettes/NACHO.html + doi: "10.1093/bioinformatics/btz647" + licence: [ "GPL-3.0" ] + identifier: "" + args_id: "$args" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - rcc_files: + type: file + description: | + List of RCC files for all samples, which are direct outputs from NanoString runs + pattern: "*.RCC" + - - meta2: + type: map + description: | + Groovy Map containing file information + e.g. [ id:'test_samplesheet' ] + - sample_sheet: + type: "file" + pattern: "*.csv" + description: | + Comma-separated file with 3 columns: RCC_FILE, RCC_FILE_NAME, and SAMPLE_ID + +output: + - normalized_counts: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "normalized_counts.tsv": + type: file + description: | + Tab-separated file with gene normalized counts for the samples + pattern: "normalized_counts.tsv" + + - normalized_counts_wo_HK: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "normalized_counts_wo_HKnorm.tsv": + type: file + description: | + Tab-separated file with gene normalized counts for the samples, without housekeeping genes. + pattern: "normalized_counts_wo_HKnorm.tsv" + - versions: + - "versions.yml": + type: file + description: | + File containing software versions + pattern: "versions.yml" + +authors: + - "@alanmmobbs93" +maintainers: + - "@alanmmobbs93" diff --git a/modules/nf-core/nacho/normalize/resources/usr/bin/nacho_norm.R b/modules/nf-core/nacho/normalize/resources/usr/bin/nacho_norm.R new file mode 100755 index 00000000000..53899126a3b --- /dev/null +++ b/modules/nf-core/nacho/normalize/resources/usr/bin/nacho_norm.R @@ -0,0 +1,92 @@ +#!/usr/bin/env Rscript +library(optparse) +library(dplyr) +library(ggplot2) +library(fs) +library(NACHO) +library(readr) +library(tidyr) + +# Parse Arguments +norm_methods <- c("GLM", "GEO") +option_list <- list( + make_option( + c("--input_rcc_path"), + type = "character", + default = "./" , + help = "Path to the folder that contains the RCC input file(s)", + metavar = "character"), + make_option( + c("--input_samplesheet"), + type = "character", + default = NULL , + help = "Path to the sample sheet file", + metavar = "character"), + make_option( + c("--norm_method"), + type = "character", + default = "GLM", + help = paste0("Normalization method. One of ", paste(norm_methods, collapse = " "), paste = " "), + metavar = "character") +) + +# Parse the command-line arguments +opt <- parse_args(OptionParser(option_list = option_list)) + +# Validate mandatory arguments +if (is.null(opt$input_rcc_path)) { + stop("Error: The --input_rcc_path parameter is mandatory and must be specified.") +} + +if (is.null(opt$input_samplesheet)) { + stop("Error: The --input_samplesheet parameter is mandatory and must be specified.") +} + +# Validate that --norm_method is one of the allowed values +if (!(opt$norm_method %in% norm_methods)) { + stop(paste("Error: The --norm_method parameter must be one of:", paste(norm_methods, collapse = " "))) +} + +input_rcc_path <- opt$input_rcc_path +input_samplesheet <- opt$input_samplesheet +norm_method <- opt$norm_method + +# Create filelist for NachoQC + +list_of_rccs <- dir_ls(path = input_rcc_path, glob = "*.RCC") +print(list_of_rccs) + +# Core Code +## Read data +nacho_data <- load_rcc(data_directory = input_rcc_path, + ssheet_csv = input_samplesheet, + id_colname = "RCC_FILE_NAME", + normalisation_method = norm_method) + +output_base <- "./" + +get_counts <- function( + nacho, + codeclass = "Endogenous", + rownames = "RCC_FILE_NAME", + colnames = c("Name", "Accession") +) { + nacho[["nacho"]] %>% + dplyr::select(c("RCC_FILE_NAME", "Name", "Count_Norm", "CodeClass")) %>% + tidyr::pivot_wider(names_from = "RCC_FILE_NAME", values_from = "Count_Norm") +} + +## Write out normalized counts +norm_counts <- as.data.frame(get_counts(nacho_data)) +write_tsv(norm_counts, file = "normalized_counts.tsv") + +## Create non-hk normalized counts too +nacho_data_no_hk <- load_rcc(data_directory = input_rcc_path, + ssheet_csv = input_samplesheet, + id_colname = "RCC_FILE_NAME", + normalisation_method = norm_method, + housekeeping_norm = FALSE) + +## Export non-hk tables +norm_counts_without_hks <- as.data.frame(get_counts(nacho_data_no_hk)) +write_tsv(norm_counts_without_hks, file = "normalized_counts_wo_HKnorm.tsv") diff --git a/modules/nf-core/nacho/normalize/tests/main.nf.test b/modules/nf-core/nacho/normalize/tests/main.nf.test new file mode 100644 index 00000000000..ec21ec5c87f --- /dev/null +++ b/modules/nf-core/nacho/normalize/tests/main.nf.test @@ -0,0 +1,86 @@ +nextflow_process { + + name "Test Process NACHO_NORMALIZE" + script "../main.nf" + process "NACHO_NORMALIZE" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "nacho" + tag "nacho/normalize" + + test("Salmon - RCC files") { + + when { + params { + module_args = '--norm_method "GEO"' + } + process { + """ + // RCC Files: Collect from sample sheet + input[0] = + Channel.fromPath('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) + .splitCsv( header: true ) + .map { row -> return file(row.RCC_FILE, checkIfExists: true) } // Select first column: path to file + .collect() + .map{ files -> + tuple( [id: 'test'], files ) // Add meta component + } + + + // Sample sheet + input[1] = Channel.of( [ + [ id: 'test_samplesheet'], + [ file('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) ] + ] ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Salmon - RCC files - stub") { + + options "-stub" + when { + params { + module_args = '--norm_method "GEO"' + } + process { + """ + // RCC Files: Collect from sample sheet + input[0] = + Channel.fromPath('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) + .splitCsv( header: true ) + .map { row -> return file(row.RCC_FILE, checkIfExists: true) } // Select first column: path to file // Select first column: path to file + .collect() + .map{ files -> + tuple( [id: 'test'], files ) // Add meta component + } + + // Sample sheet + input[1] = + Channel.of( [ + [id: 'test_samplesheet'], + [ file('https://raw.githubusercontent.com/nf-core/test-datasets/nanostring/samplesheets/samplesheet_test.csv', checkIfExists: true) ] + ] ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/nacho/normalize/tests/main.nf.test.snap b/modules/nf-core/nacho/normalize/tests/main.nf.test.snap new file mode 100644 index 00000000000..f6e20e3947d --- /dev/null +++ b/modules/nf-core/nacho/normalize/tests/main.nf.test.snap @@ -0,0 +1,100 @@ +{ + "Salmon - RCC files": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "normalized_counts.tsv:md5,a0124c7a24bd04296f441d9ade82a05f" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "normalized_counts_wo_HKnorm.tsv:md5,5a2ce112c24e1b0d0f4cf3392111ef9e" + ] + ], + "2": [ + "versions.yml:md5,dbc82908e1d1fcd2429022a4f327b9ba" + ], + "normalized_counts": [ + [ + { + "id": "test" + }, + "normalized_counts.tsv:md5,a0124c7a24bd04296f441d9ade82a05f" + ] + ], + "normalized_counts_wo_HK": [ + [ + { + "id": "test" + }, + "normalized_counts_wo_HKnorm.tsv:md5,5a2ce112c24e1b0d0f4cf3392111ef9e" + ] + ], + "versions": [ + "versions.yml:md5,dbc82908e1d1fcd2429022a4f327b9ba" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-28T18:31:49.03241566" + }, + "Salmon - RCC files - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "normalized_counts.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "normalized_counts_wo_HKnorm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,dbc82908e1d1fcd2429022a4f327b9ba" + ], + "normalized_counts": [ + [ + { + "id": "test" + }, + "normalized_counts.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "normalized_counts_wo_HK": [ + [ + { + "id": "test" + }, + "normalized_counts_wo_HKnorm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,dbc82908e1d1fcd2429022a4f327b9ba" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-28T18:32:02.81614763" + } +} \ No newline at end of file diff --git a/modules/nf-core/nacho/normalize/tests/nextflow.config b/modules/nf-core/nacho/normalize/tests/nextflow.config new file mode 100644 index 00000000000..b08db067be7 --- /dev/null +++ b/modules/nf-core/nacho/normalize/tests/nextflow.config @@ -0,0 +1,7 @@ +nextflow.enable.moduleBinaries = true + +process { + withName: 'NACHO_NORMALIZE' { + ext.args = params.module_args + } +}