-
Notifications
You must be signed in to change notification settings - Fork 723
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added a draft for fasta_ltrretriever_lai
- Loading branch information
Showing
5 changed files
with
275 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
include { CUSTOM_SHORTENFASTAIDS } from '../../../modules/nf-core/custom/shortenfastaids/main' | ||
include { LTRHARVEST } from '../../../modules/nf-core/ltrharvest/main' | ||
include { LTRFINDER } from '../../../modules/nf-core/ltrfinder/main' | ||
include { LTRRETRIEVER_LTRRETRIEVER } from '../../../modules/nf-core/ltrretriever/ltrretriever/main' | ||
include { CAT_CAT } from '../../../modules/nf-core/cat/cat/main' | ||
include { LTRRETRIEVER_LAI } from '../../../modules/nf-core/ltrretriever/lai/main' | ||
include { CUSTOM_RESTOREGFFIDS } from '../../../modules/nf-core/custom/restoregffids/main' | ||
|
||
workflow FASTA_LTRRETRIEVER_LAI { | ||
|
||
take: | ||
ch_fasta // channel: [ val(meta), fasta ] | ||
ch_monoploid_seqs // channel: [ val(meta), txt ]; Optional: Set to [] if not needed | ||
// val(meta) from ch_fasta and ch_monoploid_seqs are only required | ||
// to have the same `id` | ||
skip_lai // val(true|false) | ||
|
||
main: | ||
ch_versions = Channel.empty() | ||
|
||
// MOUDLE: CUSTOM_SHORTENFASTAIDS | ||
CUSTOM_SHORTENFASTAIDS ( ch_fasta ) | ||
|
||
ch_short_ids_fasta = ch_fasta | ||
| join(CUSTOM_SHORTENFASTAIDS.out.short_ids_fasta, by:0, remainder:true) | ||
| map { meta, fasta, short_ids_fasta -> | ||
[ meta, short_ids_fasta ?: fasta ] | ||
} | ||
|
||
ch_short_ids_tsv = CUSTOM_SHORTENFASTAIDS.out.short_ids_tsv | ||
ch_versions = ch_versions.mix(CUSTOM_SHORTENFASTAIDS.out.versions.first()) | ||
|
||
// MODULE: LTRHARVEST | ||
LTRHARVEST ( ch_short_ids_fasta ) | ||
|
||
ch_ltrharvest_scn = LTRHARVEST.out.scn | ||
ch_versions = ch_versions.mix(LTRHARVEST.out.versions.first()) | ||
|
||
// MODULE: LTRFINDER | ||
LTRFINDER ( ch_short_ids_fasta ) | ||
|
||
ch_ltrfinder_scn = LTRFINDER.out.scn | ||
ch_versions = ch_versions.mix(LTRFINDER.out.versions.first()) | ||
|
||
// MODULE: CAT_CAT | ||
ch_cat_cat_inputs = ch_ltrharvest_scn | ||
| join(ch_ltrfinder_scn) | ||
| map { meta, harvested, found -> [ meta, [ harvested, found ] ] } | ||
|
||
CAT_CAT ( ch_cat_cat_inputs ) | ||
|
||
ch_ltr_candidates = CAT_CAT.out.file_out | ||
ch_versions = ch_versions.mix(CAT_CAT.out.versions.first()) | ||
|
||
// MODULE: LTRRETRIEVER_LTRRETRIEVER | ||
ch_ltrretriever_inputs = ch_short_ids_fasta.join(ch_ltr_candidates) | ||
|
||
LTRRETRIEVER_LTRRETRIEVER ( | ||
ch_ltrretriever_inputs.map { meta, fasta, ltr -> [ meta, fasta ] }, | ||
ch_ltrretriever_inputs.map { meta, fasta, ltr -> ltr }, | ||
[], | ||
[], | ||
[] | ||
) | ||
|
||
ch_pass_list = LTRRETRIEVER_LTRRETRIEVER.out.pass_list | ||
ch_ltrlib = LTRRETRIEVER_LTRRETRIEVER.out.ltrlib | ||
ch_annotation_out = LTRRETRIEVER_LTRRETRIEVER.out.annotation_out | ||
ch_annotation_gff = LTRRETRIEVER_LTRRETRIEVER.out.annotation_gff | ||
ch_versions = ch_versions.mix(LTRRETRIEVER_LTRRETRIEVER.out.versions.first()) | ||
|
||
// MODULE: LAI | ||
ch_lai_inputs = skip_lai | ||
? Channel.empty() | ||
: ch_short_ids_fasta | ||
| join(ch_pass_list) | ||
| join(ch_annotation_out) | ||
| map { meta, fasta, pass, out -> | ||
[ meta.id, meta, fasta, pass, out ] | ||
} | ||
| join( | ||
ch_monoploid_seqs | ||
?: Channel.empty() | ||
| map { meta, mono -> [ meta.id, mono ] }, | ||
by:0, | ||
remainder: true | ||
) | ||
| map { id, meta, fasta, pass, out, mono -> | ||
[ meta, fasta, pass, out, mono ?: [] ] | ||
} | ||
LTRRETRIEVER_LAI( | ||
ch_lai_inputs.map { meta, fasta, pass, out, mono -> [ meta, fasta ] }, | ||
ch_lai_inputs.map { meta, fasta, pass, out, mono -> pass }, | ||
ch_lai_inputs.map { meta, fasta, pass, out, mono -> out }, | ||
ch_lai_inputs.map { meta, fasta, pass, out, mono -> mono } | ||
) | ||
|
||
ch_lai_log = LTRRETRIEVER_LAI.out.log | ||
ch_lai_out = LTRRETRIEVER_LAI.out.lai_out | ||
ch_versions = ch_versions.mix(LTRRETRIEVER_LAI.out.versions.first()) | ||
|
||
// MODULE: CUSTOM_RESTOREGFFIDS | ||
ch_restorable_gff_tsv = ch_annotation_gff.join(ch_short_ids_tsv) | ||
|
||
CUSTOM_RESTOREGFFIDS ( | ||
ch_restorable_gff_tsv.map { meta, gff, tsv -> [ meta, gff ] }, | ||
ch_restorable_gff_tsv.map { meta, gff, tsv -> tsv } | ||
) | ||
|
||
ch_restored_gff = ch_annotation_gff | ||
| join(CUSTOM_RESTOREGFFIDS.out.restored_ids_gff3, by:0, remainder:true) | ||
| map { meta, gff, restored_gff -> [ meta, restored_gff ?: gff ] } | ||
|
||
ch_versions = ch_versions.mix(CUSTOM_RESTOREGFFIDS.out.versions.first()) | ||
|
||
emit: | ||
ltrlib = ch_ltrlib // channel: [ val(meta), fasta ] | ||
annotation_gff = ch_restored_gff // channel: [ val(meta), gff ] | ||
lai_log = ch_lai_log // channel: [ val(meta), log ] | ||
lai_out = ch_lai_out // channel: [ val(meta), out ] | ||
versions = ch_versions // channel: [ versions.yml ] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json | ||
name: "fasta_ltrretriever_lai" | ||
description: | | ||
Performs long terminal retrotransposons (LTR) annotation with gt/ltrharvest, ltrfinder | ||
and ltrretriever along with LTR assembly index (LAI). | ||
keywords: | ||
- genomics | ||
- genome | ||
- annotation | ||
- repeat | ||
- transposons | ||
- retrotransposons | ||
- stats | ||
- qc | ||
components: | ||
- custom/shortenfastaids | ||
- ltrharvest | ||
- ltrfinder | ||
- ltrretriever/ltrretriever | ||
- cat/cat | ||
- ltrretriever/lai | ||
- custom/restoregffids | ||
input: | ||
- ch_fasta: | ||
type: file | ||
description: | | ||
Channel for the assembly fasta file | ||
Structure: [ val(meta), path(fasta) ] | ||
pattern: "*.{fsa/fa/fasta}" | ||
- ch_monoploid_seqs: | ||
type: file | ||
description: | | ||
Channel for providing a list of monoploid sequences | ||
for correct estimation of LAI for polyploid genomes. | ||
This parameter is useful when all the haplotypes are | ||
stored in a single fasta file. | ||
Structure: [ val(meta), path(txt) ] | ||
pattern: "*.txt" | ||
- skip_lai: | ||
type: boolean | ||
description: | | ||
Skip LAI estimation | ||
Structure: [ val(boolean) ] | ||
output: | ||
- ltrlib: | ||
type: file | ||
description: | | ||
LTR library constructed by ltrretriever | ||
Structure: [ val(meta), path(fasta) ] | ||
pattern: "*.LTRlib.fa" | ||
- annotation_gff: | ||
type: file | ||
description: | | ||
Annotation based on the LTR library | ||
Structure: [ val(meta), path(gff3) ] | ||
pattern: "*.gff3" | ||
- lai_log: | ||
type: file | ||
description: | | ||
Log from LAI | ||
Structure: [ val(meta), path(log) ] | ||
pattern: "*.LAI.log" | ||
- lai_out: | ||
type: file | ||
description: | | ||
LAI output | ||
Structure: [ val(meta), path(out) ] | ||
pattern: "*.LAI.out" | ||
- versions: | ||
type: file | ||
description: | | ||
File containing software versions | ||
Structure: [ path(versions.yml) ] | ||
pattern: "versions.yml" | ||
authors: | ||
- "@GallVp" | ||
maintainers: | ||
- "@GallVp" |
59 changes: 59 additions & 0 deletions
59
subworkflows/nf-core/fasta_ltrretriever_lai/tests/main.nf.test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
nextflow_workflow { | ||
|
||
name "Test Workflow FASTA_LTRRETRIEVER_LAI" | ||
script "../main.nf" | ||
workflow "FASTA_LTRRETRIEVER_LAI" | ||
config "./nextflow.config" | ||
|
||
tag "subworkflows" | ||
tag "subworkflows_nfcore" | ||
tag "subworkflows/fasta_ltrretriever_lai" | ||
tag "fasta_ltrretriever_lai" | ||
tag "gunzip" | ||
tag "custom/shortenfastaids" | ||
tag "ltrharvest" | ||
tag "ltrfinder" | ||
tag "ltrretriever/ltrretriever" | ||
tag "cat/cat" | ||
tag "ltrretriever/lai" | ||
tag "custom/restoregffids" | ||
|
||
test("actinidia_chinensis-genome_21_fasta_gz-success") { | ||
|
||
setup { | ||
run("GUNZIP") { | ||
script "../../../../modules/nf-core/gunzip" | ||
|
||
process { | ||
""" | ||
input[0] = [ | ||
[ id:'test' ], | ||
file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true) | ||
] | ||
""" | ||
} | ||
} | ||
} | ||
|
||
when { | ||
workflow { | ||
""" | ||
input[0] = GUNZIP.out.gunzip | ||
input[1] = [] | ||
input[2] = false | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert workflow.success }, | ||
{ assert file(workflow.out.annotation_gff[0][1]).text.contains('Copia_LTR_retrotransposon') }, | ||
{ assert file(workflow.out.lai_log[0][1]).text.contains('Calculate LAI:') }, | ||
{ assert file(workflow.out.lai_log[0][1]).text.contains('Done!') }, | ||
{ assert Math.abs(Float.parseFloat(path(workflow.out.lai_out[0][1]).text.split("\n")[1].split("\t")[6]) - 31.29) <= 1.0 }, | ||
{ assert file(workflow.out.ltrlib[0][1]).text.contains('#LTR/Copia') } | ||
) | ||
} | ||
} | ||
} |
14 changes: 14 additions & 0 deletions
14
subworkflows/nf-core/fasta_ltrretriever_lai/tests/nextflow.config
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
process { | ||
|
||
withName: LTRHARVEST { | ||
ext.prefix = { "${meta.id}_ltrharvest" } | ||
} | ||
|
||
withName: LTRFINDER { | ||
ext.args = '-harvest_out -size 1000000 -time 300' | ||
} | ||
|
||
withName: CAT_CAT { | ||
ext.prefix = { "${meta.id}_ltrharvest_ltrfinder.tabout" } | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
subworkflows/fasta_ltrretriever_lai: | ||
- subworkflows/nf-core/fasta_ltrretriever_lai/** |