-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
🔀 Merge pull request #24 from cnr-ibba/issue-23
✨ add new module: seqkit/rmdup
- Loading branch information
Showing
7 changed files
with
167 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
- defaults | ||
dependencies: | ||
- bioconda::seqkit=2.6.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
process SEQKIT_RMDUP { | ||
tag "$meta.id" | ||
label 'process_high' | ||
|
||
conda "${moduleDir}/environment.yml" | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/seqkit:2.6.0--h9ee0642_0': | ||
'quay.io/biocontainers/seqkit:2.6.0--h9ee0642_0' }" | ||
|
||
input: | ||
tuple val(meta), path(sequence) | ||
|
||
output: | ||
tuple val(meta), path("*.{fa,fq}.gz") , emit: unique | ||
tuple val(meta), path("*.duplicated.detail.txt") , emit: detail, optional: true | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
// fasta or fastq. Exact pattern match .fasta or .fa suffix with optional .gz (gzip) suffix | ||
def suffix = task.ext.suffix ?: "${sequence}" ==~ /(.*f[astn]*a(.gz)?$)/ ? "fa" : "fq" | ||
|
||
""" | ||
seqkit \\ | ||
rmdup \\ | ||
$args \\ | ||
--threads $task.cpus \\ | ||
${sequence} \\ | ||
-o ${prefix}.${suffix}.gz \\ | ||
-D ${prefix}.duplicated.detail.txt \\ | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
seqkit: \$( seqkit version | sed 's/seqkit v//' ) | ||
END_VERSIONS | ||
""" | ||
|
||
stub: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
// fasta or fastq. Exact pattern match .fasta or .fa suffix with optional .gz (gzip) suffix | ||
def suffix = task.ext.suffix ?: "${sequence}" ==~ /(.*f[astn]*a(.gz)?$)/ ? "fa" : "fq" | ||
""" | ||
touch ${prefix}.${suffix}.gz | ||
touch ${prefix}.duplicated.detail.txt | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
seqkit: \$( seqkit version | sed 's/seqkit v//' ) | ||
END_VERSIONS | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
--- | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json | ||
name: "seqkit_rmdup" | ||
description: Remove duplicated sequences by ID/name/sequence | ||
keywords: | ||
- filter | ||
- set | ||
- seqkit | ||
- remove | ||
- duplicates | ||
tools: | ||
- "seqkit": | ||
description: Cross-platform and ultrafast toolkit for FASTA/Q file manipulation, written by Wei Shen. | ||
homepage: https://bioinf.shenwei.me/seqkit/usage/ | ||
documentation: https://bioinf.shenwei.me/seqkit/usage/ | ||
tool_dev_url: https://github.com/shenwei356/seqkit/ | ||
doi: "10.1371/journal.pone.0163962" | ||
licence: ["MIT"] | ||
input: | ||
- meta: | ||
type: map | ||
description: > | ||
Groovy Map containing sample information e.g. [ id:'test', single_end:false ] | ||
- sequence: | ||
type: file | ||
description: > | ||
Fasta or fastq file containing sequences to be filtered | ||
pattern: "*.{fa,fna,faa,fasta,fq,fastq}[.gz]" | ||
output: | ||
- meta: | ||
type: map | ||
description: > | ||
Groovy Map containing sample information e.g. [ id:'test', single_end:false ] | ||
- versions: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
- unique: | ||
type: file | ||
description: > | ||
Fasta or fastq file containing unique sequences | ||
pattern: "*.{fa,fq}[.gz]" | ||
- detail: | ||
type: file | ||
description: > | ||
number and list of duplicated seqs | ||
pattern: "*.duplicated.detail.txt" | ||
authors: | ||
- "@bunop" | ||
maintainers: | ||
- "@bunop" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#!/usr/bin/env nextflow | ||
|
||
nextflow.enable.dsl = 2 | ||
|
||
include { SEQKIT_RMDUP } from '../../../../../modules/cnr-ibba/seqkit/rmdup/main.nf' | ||
|
||
workflow test_seqkit_rmdup { | ||
input = [ | ||
[ id:'test' ], // meta map | ||
[ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] | ||
] | ||
SEQKIT_RMDUP ( input ) | ||
} | ||
|
||
workflow test_seqkit_rmdup_fq { | ||
input = [ | ||
[ id:'test' ], // meta map | ||
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] | ||
] | ||
SEQKIT_RMDUP ( input ) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
process { | ||
|
||
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
- name: seqkit rmdup test_seqkit_rmdup | ||
command: nextflow run ./tests/modules/cnr-ibba/seqkit/rmdup -entry test_seqkit_rmdup -c ./tests/config/nextflow.config | ||
tags: | ||
- seqkit/rmdup | ||
- seqkit | ||
files: | ||
- path: output/seqkit/test.fa.gz | ||
md5sum: 62127df7a59139d8b16c2ea90c777ad2 | ||
- path: output/seqkit/versions.yml | ||
|
||
- name: seqkit rmdup test_seqkit_rmdup_fq | ||
command: nextflow run ./tests/modules/cnr-ibba/seqkit/rmdup -entry test_seqkit_rmdup_fq -c ./tests/config/nextflow.config | ||
tags: | ||
- seqkit/rmdup | ||
- seqkit | ||
files: | ||
- path: output/seqkit/test.fq.gz | ||
md5sum: 994bc0f41ef4b198e0f2d99e1a8e9614 | ||
- path: output/seqkit/versions.yml |