Skip to content

Commit

Permalink
🔀 Merge pull request #24 from cnr-ibba/issue-23
Browse files Browse the repository at this point in the history
✨  add new module: seqkit/rmdup
  • Loading branch information
bunop authored Nov 13, 2023
2 parents 866124a + f4c8470 commit f96703a
Show file tree
Hide file tree
Showing 7 changed files with 167 additions and 0 deletions.
6 changes: 6 additions & 0 deletions modules/cnr-ibba/seqkit/rmdup/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::seqkit=2.6.0
56 changes: 56 additions & 0 deletions modules/cnr-ibba/seqkit/rmdup/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
process SEQKIT_RMDUP {
tag "$meta.id"
label 'process_high'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/seqkit:2.6.0--h9ee0642_0':
'quay.io/biocontainers/seqkit:2.6.0--h9ee0642_0' }"

input:
tuple val(meta), path(sequence)

output:
tuple val(meta), path("*.{fa,fq}.gz") , emit: unique
tuple val(meta), path("*.duplicated.detail.txt") , emit: detail, optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
// fasta or fastq. Exact pattern match .fasta or .fa suffix with optional .gz (gzip) suffix
def suffix = task.ext.suffix ?: "${sequence}" ==~ /(.*f[astn]*a(.gz)?$)/ ? "fa" : "fq"

"""
seqkit \\
rmdup \\
$args \\
--threads $task.cpus \\
${sequence} \\
-o ${prefix}.${suffix}.gz \\
-D ${prefix}.duplicated.detail.txt \\
cat <<-END_VERSIONS > versions.yml
"${task.process}":
seqkit: \$( seqkit version | sed 's/seqkit v//' )
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
// fasta or fastq. Exact pattern match .fasta or .fa suffix with optional .gz (gzip) suffix
def suffix = task.ext.suffix ?: "${sequence}" ==~ /(.*f[astn]*a(.gz)?$)/ ? "fa" : "fq"
"""
touch ${prefix}.${suffix}.gz
touch ${prefix}.duplicated.detail.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
seqkit: \$( seqkit version | sed 's/seqkit v//' )
END_VERSIONS
"""
}
56 changes: 56 additions & 0 deletions modules/cnr-ibba/seqkit/rmdup/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
name: "seqkit_rmdup"
description: Remove duplicated sequences by ID/name/sequence
keywords:
- filter
- set
- seqkit
- remove
- duplicates
tools:
- "seqkit":
description: Cross-platform and ultrafast toolkit for FASTA/Q file manipulation, written by Wei Shen.
homepage: https://bioinf.shenwei.me/seqkit/usage/
documentation: https://bioinf.shenwei.me/seqkit/usage/
tool_dev_url: https://github.com/shenwei356/seqkit/
doi: "10.1371/journal.pone.0163962"
licence: ["MIT"]
input:
- meta:
type: map
description: >
Groovy Map containing sample information e.g. [ id:'test', single_end:false ]
- sequence:
type: file
description: >
Fasta or fastq file containing sequences to be filtered
pattern: "*.{fa,fna,faa,fasta,fq,fastq}[.gz]"
output:
- meta:
type: map
description: >
Groovy Map containing sample information e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- unique:
type: file
description: >
Fasta or fastq file containing unique sequences
pattern: "*.{fa,fq}[.gz]"
- detail:
type: file
description: >
number and list of duplicated seqs
pattern: "*.duplicated.detail.txt"
authors:
- "@bunop"
maintainers:
- "@bunop"
4 changes: 4 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2411,6 +2411,10 @@ seqkit/replace:
- modules/cnr-ibba/seqkit/replace/**
- tests/modules/cnr-ibba/seqkit/replace/**

seqkit/rmdup:
- modules/cnr-ibba/seqkit/rmdup/**
- tests/modules/cnr-ibba/seqkit/rmdup/**

seqkit/split2:
- modules/cnr-ibba/seqkit/split2/**
- tests/modules/cnr-ibba/seqkit/split2/**
Expand Down
21 changes: 21 additions & 0 deletions tests/modules/cnr-ibba/seqkit/rmdup/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { SEQKIT_RMDUP } from '../../../../../modules/cnr-ibba/seqkit/rmdup/main.nf'

workflow test_seqkit_rmdup {
input = [
[ id:'test' ], // meta map
[ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
]
SEQKIT_RMDUP ( input )
}

workflow test_seqkit_rmdup_fq {
input = [
[ id:'test' ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
]
SEQKIT_RMDUP ( input )
}
5 changes: 5 additions & 0 deletions tests/modules/cnr-ibba/seqkit/rmdup/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

}
19 changes: 19 additions & 0 deletions tests/modules/cnr-ibba/seqkit/rmdup/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
- name: seqkit rmdup test_seqkit_rmdup
command: nextflow run ./tests/modules/cnr-ibba/seqkit/rmdup -entry test_seqkit_rmdup -c ./tests/config/nextflow.config
tags:
- seqkit/rmdup
- seqkit
files:
- path: output/seqkit/test.fa.gz
md5sum: 62127df7a59139d8b16c2ea90c777ad2
- path: output/seqkit/versions.yml

- name: seqkit rmdup test_seqkit_rmdup_fq
command: nextflow run ./tests/modules/cnr-ibba/seqkit/rmdup -entry test_seqkit_rmdup_fq -c ./tests/config/nextflow.config
tags:
- seqkit/rmdup
- seqkit
files:
- path: output/seqkit/test.fq.gz
md5sum: 994bc0f41ef4b198e0f2d99e1a8e9614
- path: output/seqkit/versions.yml

0 comments on commit f96703a

Please sign in to comment.