Skip to content

Commit

Permalink
Added a draft for fasta_ltrretriever_lai
Browse files Browse the repository at this point in the history
  • Loading branch information
GallVp committed Feb 25, 2024
1 parent 26c8a95 commit 48013af
Show file tree
Hide file tree
Showing 5 changed files with 275 additions and 0 deletions.
122 changes: 122 additions & 0 deletions subworkflows/nf-core/fasta_ltrretriever_lai/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
include { CUSTOM_SHORTENFASTAIDS } from '../../../modules/nf-core/custom/shortenfastaids/main'
include { LTRHARVEST } from '../../../modules/nf-core/ltrharvest/main'
include { LTRFINDER } from '../../../modules/nf-core/ltrfinder/main'
include { LTRRETRIEVER_LTRRETRIEVER } from '../../../modules/nf-core/ltrretriever/ltrretriever/main'
include { CAT_CAT } from '../../../modules/nf-core/cat/cat/main'
include { LTRRETRIEVER_LAI } from '../../../modules/nf-core/ltrretriever/lai/main'
include { CUSTOM_RESTOREGFFIDS } from '../../../modules/nf-core/custom/restoregffids/main'

workflow FASTA_LTRRETRIEVER_LAI {

take:
ch_fasta // channel: [ val(meta), fasta ]
ch_monoploid_seqs // channel: [ val(meta), txt ]; Optional: Set to [] if not needed
// val(meta) from ch_fasta and ch_monoploid_seqs are only required
// to have the same `id`
skip_lai // val(true|false)

main:
ch_versions = Channel.empty()

// MOUDLE: CUSTOM_SHORTENFASTAIDS
CUSTOM_SHORTENFASTAIDS ( ch_fasta )

ch_short_ids_fasta = ch_fasta
| join(CUSTOM_SHORTENFASTAIDS.out.short_ids_fasta, by:0, remainder:true)
| map { meta, fasta, short_ids_fasta ->
[ meta, short_ids_fasta ?: fasta ]
}

ch_short_ids_tsv = CUSTOM_SHORTENFASTAIDS.out.short_ids_tsv
ch_versions = ch_versions.mix(CUSTOM_SHORTENFASTAIDS.out.versions.first())

// MODULE: LTRHARVEST
LTRHARVEST ( ch_short_ids_fasta )

ch_ltrharvest_scn = LTRHARVEST.out.scn
ch_versions = ch_versions.mix(LTRHARVEST.out.versions.first())

// MODULE: LTRFINDER
LTRFINDER ( ch_short_ids_fasta )

ch_ltrfinder_scn = LTRFINDER.out.scn
ch_versions = ch_versions.mix(LTRFINDER.out.versions.first())

// MODULE: CAT_CAT
ch_cat_cat_inputs = ch_ltrharvest_scn
| join(ch_ltrfinder_scn)
| map { meta, harvested, found -> [ meta, [ harvested, found ] ] }

CAT_CAT ( ch_cat_cat_inputs )

ch_ltr_candidates = CAT_CAT.out.file_out
ch_versions = ch_versions.mix(CAT_CAT.out.versions.first())

// MODULE: LTRRETRIEVER_LTRRETRIEVER
ch_ltrretriever_inputs = ch_short_ids_fasta.join(ch_ltr_candidates)

LTRRETRIEVER_LTRRETRIEVER (
ch_ltrretriever_inputs.map { meta, fasta, ltr -> [ meta, fasta ] },
ch_ltrretriever_inputs.map { meta, fasta, ltr -> ltr },
[],
[],
[]
)

ch_pass_list = LTRRETRIEVER_LTRRETRIEVER.out.pass_list
ch_ltrlib = LTRRETRIEVER_LTRRETRIEVER.out.ltrlib
ch_annotation_out = LTRRETRIEVER_LTRRETRIEVER.out.annotation_out
ch_annotation_gff = LTRRETRIEVER_LTRRETRIEVER.out.annotation_gff
ch_versions = ch_versions.mix(LTRRETRIEVER_LTRRETRIEVER.out.versions.first())

// MODULE: LAI
ch_lai_inputs = skip_lai
? Channel.empty()
: ch_short_ids_fasta
| join(ch_pass_list)
| join(ch_annotation_out)
| map { meta, fasta, pass, out ->
[ meta.id, meta, fasta, pass, out ]
}
| join(
ch_monoploid_seqs
?: Channel.empty()
| map { meta, mono -> [ meta.id, mono ] },
by:0,
remainder: true
)
| map { id, meta, fasta, pass, out, mono ->
[ meta, fasta, pass, out, mono ?: [] ]
}
LTRRETRIEVER_LAI(
ch_lai_inputs.map { meta, fasta, pass, out, mono -> [ meta, fasta ] },
ch_lai_inputs.map { meta, fasta, pass, out, mono -> pass },
ch_lai_inputs.map { meta, fasta, pass, out, mono -> out },
ch_lai_inputs.map { meta, fasta, pass, out, mono -> mono }
)

ch_lai_log = LTRRETRIEVER_LAI.out.log
ch_lai_out = LTRRETRIEVER_LAI.out.lai_out
ch_versions = ch_versions.mix(LTRRETRIEVER_LAI.out.versions.first())

// MODULE: CUSTOM_RESTOREGFFIDS
ch_restorable_gff_tsv = ch_annotation_gff.join(ch_short_ids_tsv)

CUSTOM_RESTOREGFFIDS (
ch_restorable_gff_tsv.map { meta, gff, tsv -> [ meta, gff ] },
ch_restorable_gff_tsv.map { meta, gff, tsv -> tsv }
)

ch_restored_gff = ch_annotation_gff
| join(CUSTOM_RESTOREGFFIDS.out.restored_ids_gff3, by:0, remainder:true)
| map { meta, gff, restored_gff -> [ meta, restored_gff ?: gff ] }

ch_versions = ch_versions.mix(CUSTOM_RESTOREGFFIDS.out.versions.first())

emit:
ltrlib = ch_ltrlib // channel: [ val(meta), fasta ]
annotation_gff = ch_restored_gff // channel: [ val(meta), gff ]
lai_log = ch_lai_log // channel: [ val(meta), log ]
lai_out = ch_lai_out // channel: [ val(meta), out ]
versions = ch_versions // channel: [ versions.yml ]
}
78 changes: 78 additions & 0 deletions subworkflows/nf-core/fasta_ltrretriever_lai/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
name: "fasta_ltrretriever_lai"
description: |
Performs long terminal retrotransposons (LTR) annotation with gt/ltrharvest, ltrfinder
and ltrretriever along with LTR assembly index (LAI).
keywords:
- genomics
- genome
- annotation
- repeat
- transposons
- retrotransposons
- stats
- qc
components:
- custom/shortenfastaids
- ltrharvest
- ltrfinder
- ltrretriever/ltrretriever
- cat/cat
- ltrretriever/lai
- custom/restoregffids
input:
- ch_fasta:
type: file
description: |
Channel for the assembly fasta file
Structure: [ val(meta), path(fasta) ]
pattern: "*.{fsa/fa/fasta}"
- ch_monoploid_seqs:
type: file
description: |
Channel for providing a list of monoploid sequences
for correct estimation of LAI for polyploid genomes.
This parameter is useful when all the haplotypes are
stored in a single fasta file.
Structure: [ val(meta), path(txt) ]
pattern: "*.txt"
- skip_lai:
type: boolean
description: |
Skip LAI estimation
Structure: [ val(boolean) ]
output:
- ltrlib:
type: file
description: |
LTR library constructed by ltrretriever
Structure: [ val(meta), path(fasta) ]
pattern: "*.LTRlib.fa"
- annotation_gff:
type: file
description: |
Annotation based on the LTR library
Structure: [ val(meta), path(gff3) ]
pattern: "*.gff3"
- lai_log:
type: file
description: |
Log from LAI
Structure: [ val(meta), path(log) ]
pattern: "*.LAI.log"
- lai_out:
type: file
description: |
LAI output
Structure: [ val(meta), path(out) ]
pattern: "*.LAI.out"
- versions:
type: file
description: |
File containing software versions
Structure: [ path(versions.yml) ]
pattern: "versions.yml"
authors:
- "@GallVp"
maintainers:
- "@GallVp"
59 changes: 59 additions & 0 deletions subworkflows/nf-core/fasta_ltrretriever_lai/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
nextflow_workflow {

name "Test Workflow FASTA_LTRRETRIEVER_LAI"
script "../main.nf"
workflow "FASTA_LTRRETRIEVER_LAI"
config "./nextflow.config"

tag "subworkflows"
tag "subworkflows_nfcore"
tag "subworkflows/fasta_ltrretriever_lai"
tag "fasta_ltrretriever_lai"
tag "gunzip"
tag "custom/shortenfastaids"
tag "ltrharvest"
tag "ltrfinder"
tag "ltrretriever/ltrretriever"
tag "cat/cat"
tag "ltrretriever/lai"
tag "custom/restoregffids"

test("actinidia_chinensis-genome_21_fasta_gz-success") {

setup {
run("GUNZIP") {
script "../../../../modules/nf-core/gunzip"

process {
"""
input[0] = [
[ id:'test' ],
file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true)
]
"""
}
}
}

when {
workflow {
"""
input[0] = GUNZIP.out.gunzip
input[1] = []
input[2] = false
"""
}
}

then {
assertAll(
{ assert workflow.success },
{ assert file(workflow.out.annotation_gff[0][1]).text.contains('Copia_LTR_retrotransposon') },
{ assert file(workflow.out.lai_log[0][1]).text.contains('Calculate LAI:') },
{ assert file(workflow.out.lai_log[0][1]).text.contains('Done!') },
{ assert Math.abs(Float.parseFloat(path(workflow.out.lai_out[0][1]).text.split("\n")[1].split("\t")[6]) - 31.29) <= 1.0 },
{ assert file(workflow.out.ltrlib[0][1]).text.contains('#LTR/Copia') }
)
}
}
}
14 changes: 14 additions & 0 deletions subworkflows/nf-core/fasta_ltrretriever_lai/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
process {

withName: LTRHARVEST {
ext.prefix = { "${meta.id}_ltrharvest" }
}

withName: LTRFINDER {
ext.args = '-harvest_out -size 1000000 -time 300'
}

withName: CAT_CAT {
ext.prefix = { "${meta.id}_ltrharvest_ltrfinder.tabout" }
}
}
2 changes: 2 additions & 0 deletions subworkflows/nf-core/fasta_ltrretriever_lai/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
subworkflows/fasta_ltrretriever_lai:
- subworkflows/nf-core/fasta_ltrretriever_lai/**

0 comments on commit 48013af

Please sign in to comment.