-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.nf
274 lines (227 loc) · 8.43 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
// check parameters
if (!params.prefix) { exit 1, "Error: 'prefix' parameter not specified" }
if (params.steps) {
def range = (1..params.steps)
steps_ch = Channel.of( range )
} else {
exit 1, "Error: 'steps' parameter not specified"
}
if (params.individuals) { individuals_ch = Channel.fromList( params.individuals ) } else { exit 1, "Error: 'individuals' parameter not specified" }
process PLINK_SUBSET {
tag "$meta.id"
label 'process_low'
conda "bioconda::plink=1.90b6.21"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/plink:1.90b6.21--h779adbc_1' :
'quay.io/biocontainers/plink:1.90b6.21--h779adbc_1' }"
input:
tuple val(meta), val(seed)
tuple path(bed), path(bim), path(fam)
val(species_opts)
output:
tuple val(meta), path("*.map"), emit: map
tuple val(meta), path("*.ped"), emit: ped
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
def step = "${meta.step}"
def individuals = "${meta.individuals}"
def prefix = "${bed.getBaseName()}"
def outfile = "${bed.getBaseName()}_${individuals}_individuals_${step}_step"
"""
plink \\
$args \\
$species_opts \\
--seed $seed \\
--bfile $prefix \\
--thin-indiv-count ${meta.individuals} \\
--threads $task.cpus \\
--recode \\
--out ${outfile}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
plink: \$(echo \$(plink --version) | sed 's/^PLINK v//;s/64.*//')
END_VERSIONS
"""
stub:
def step = "${meta.step}"
def individuals = "${meta.individuals}"
def prefix = "${bed.getBaseName()}"
def outfile = "${bed.getBaseName()}_${individuals}_individuals_${step}_step"
"""
touch ${outfile}.map
touch ${outfile}.ped
touch versions.yml
"""
}
process PED2GENEPOP {
tag "$meta.id"
label 'process_low'
conda "bioconda::pgdspider=2.1.1.5"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pgdspider:2.1.1.5--hdfd78af_1' :
'quay.io/biocontainers/pgdspider:2.1.1.5--hdfd78af_1' }"
input:
tuple val(meta), path(ped)
tuple val(meta), path(map)
output:
tuple val(meta), path("*.txt"), emit: genepop
tuple val(meta), path("*.spid"), emit: spid
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
def prefix = "${ped.getBaseName()}"
def xmx = (task.memory.mega*0.95).intValue()
def xms = task.memory.mega / 2
"""
cat <<EOF > PED_GENEPOP.spid
# PED Parser questions
PARSER_FORMAT=PED
# Is the Phenotype absent in the input file?
PED_PARSER_PHENOTYPE_QUESTION=false
# Is the Family ID column absent in the input file?
PED_PARSER_FAMILY_ID_QUESTION=false
# Do you want to include a MAP file with loci information?
PED_PARSER_INCLUDE_MAP_QUESTION=true
# Is the Sex column absent in the input file?
PED_PARSER_SEX_QUESTION=false
# Are the Paternal ID and the Maternal ID columns absent in the input file?
PED_PARSER_PATERNAL_MATERNAL_ID_QUESTION=false
# Open MAP file
PED_PARSER_MAP_FILE_QUESTION=${map}
# Group individuals into populations according to:
PED_PARSER_POPULATION_QUESTION=FAMILY
# Is the Individual ID column absent in the input file?
PED_PARSER_IND_ID_QUESTION=false
# GENEPOP Writer questions
WRITER_FORMAT=GENEPOP
# Specify the locus/locus combination you want to write to the GENEPOP file:
GENEPOP_WRITER_LOCUS_COMBINATION_QUESTION=
# Specify which data type should be included in the GENEPOP file (GENEPOP can only analyze one data type per file):
GENEPOP_WRITER_DATA_TYPE_QUESTION=SNP
EOF
PGDSpider2-cli \\
$args \\
-Xmx${xmx}M \\
-Xms${xms}M \\
-inputfile ${ped} \\
-inputformat PED \\
-outputfile ${prefix}_genepop.txt \\
-outputformat GENEPOP \\
-spid PED_GENEPOP.spid
cat <<-END_VERSIONS > versions.yml
"${task.process}":
plink: \$(echo \$(PGDSpider2-cli -h) | grep -i Copyright | sed 's/ PGDSpider //;s/Copyright.*//')
END_VERSIONS
"""
stub:
def prefix = "${ped.getBaseName()}"
"""
touch ${prefix}_genepop.txt
touch PED_GENEPOP.spid
touch versions.yml
"""
}
process LDNE {
tag "$meta.id"
label 'process_single'
label 'error_retry'
container "bunop/neestimator2x:0.2"
input:
tuple val(meta), path(genepop)
output:
tuple val(meta), path("*_info.txt"), emit: info
tuple val(meta), path("*_option.txt"), emit: option
tuple val(meta), path("*_Ne.txt"), emit: output
tuple val(meta), path("*_NexLD.txt"), emit: tabular
tuple val(meta), path("*NoDat.txt"), emit: missing, optional: true
script:
def prefix = "${genepop.getBaseName()}"
def info_file = "${prefix}_info.txt"
def option_file = "${prefix}_option.txt"
def output_file = "${prefix}_Ne.txt"
def missing_file = "${prefix}NoDat.txt"
"""
cat <<EOF > ${info_file}
1 * A number = sum of method(s) to run: LD(=1), Het(=2), Coan(=4), Temporal(=8).
\$PWD/ * Input Directory
${genepop} * Input file name
2 * 1 = FSTAT format, 2 = GENEPOP format
\$PWD/ * Output Directory
${output_file}* * Output file name (put asterisk adjacent to the name to append)
1 * Number of critical values, added 1 if a run by rejecting only singleton alleles is included
0.02 -1 * Critical values, a special value '1' is for rejecting only singleton alleles
1 * 0: Random mating, 1: Monogamy (LD method)
EOF
cat <<EOF > ${option_file}
1 0 1 1 * First number = sum of method(s) to have extra output: LD(=1), Het(=2), Coan(=4), Temporal(=8)
0 * Maximum individuals/pop. If 0: no limit
0 * First entry n1 = 0: No Freq output. If n1 = -1: Freq. output up to population 50. Two entries n1, n2 with n1 <= n2: Freq output for populations from n1 to n2. Max. populations to have Freq output is set at 50
0 * For Burrow output file (up to 50 populations can have output). See remark below
1 * Parameter CI: 1 for Yes, 0 for No
1 * Jackknife CI: 1 for Yes, 0 for No
0 * Up to population, or range of populations to run (if 2 entries). If first entry = 0: no restriction
0 * All loci are accepted
1 * Enter 1: A file is created to document missing data if there are any in input file. Enter 0: no file created
0 * Line for chromosomes/loci option and file
EOF
Ne2-1L i:${info_file} o:${option_file}
"""
stub:
def prefix = "${genepop.getBaseName()}"
"""
touch ${prefix}_info.txt
touch ${prefix}_option.txt
touch ${prefix}_Ne.txt
touch ${prefix}_NexLD.txt
"""
}
process SUMMARIZE {
tag "$meta individuals"
label 'process_low'
input:
tuple val(meta), path(ne_output)
tuple path(bed), path(bim), path(fam)
output:
tuple val(meta), path("*_individuals.csv"), emit: csv
script:
def prefix = "${bed.getBaseName()}"
def summary = "${prefix}_Ne_${meta}_individuals.csv"
"""
parse_ne_results.py > ${summary}
"""
stub:
def summary = "Ne_${meta}_individuals.csv"
"""
touch ${summary}
"""
}
workflow LDNE_PIPELINE {
iterations_ch = individuals_ch.combine(steps_ch)//.view()
.map{ iteration -> [[
id:"${iteration[0]}_individual_${iteration[1]}_step",
individuals: "${iteration[0]}",
step: "${iteration[1]}"], iteration[0] * iteration[1] ]}//.view()
plink_input_ch = Channel.fromPath( "${params.prefix}.{bim,bed,fam}")
.collect()//.view()
// subsetting plink files
PLINK_SUBSET(iterations_ch, plink_input_ch, params.species_opts)
// create GENEPOP file
PED2GENEPOP(PLINK_SUBSET.out.ped, PLINK_SUBSET.out.map)
// launch LDNE
LDNE(PED2GENEPOP.out.genepop)
neestimator_ch = LDNE.out.tabular
.map { meta, path -> [
meta["individuals"],
path
]}
.groupTuple( by: 0, size: params.steps )
//.view()
// collect and parse results
SUMMARIZE(neestimator_ch, plink_input_ch)
}
workflow {
LDNE_PIPELINE()
}