-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.nf
156 lines (130 loc) · 4.96 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
nextflow.enable.dsl = 2
include { SEQTK_SUBSET as SUBSET } from './modules/seqtk/main'
//include { SEQKIT_GET_LENGTH } from './modules/seqkit/main'
include { ALIGN_GENOMES } from './modules/align/main'
include { FIXCHR } from './modules/fixchr/main'
include { SYRI } from './modules/syri/main'
include { PLOTSR } from './modules/plotsr/main'
// Pairwise modules
include { ALIGN_PAIRWISE } from './modules/align/main'
include { SYRI_PAIRWISE } from './modules/syri/main'
include { PLOTSR_PAIRWISE } from './modules/plotsr/main'
/*
Samplesheet:
name,fasta
*/
log.info """\
=======================================================================================================================
=======================================================================================================================
nf-plotsv
---------
Plot structural variation across genomes using the 'Schneeberger tools'
-----------------------------------------------------------------------------------------------------------------------
Niklas Schandry niklas@bio.lmu.de github.com/nschan/nf-plotsv
-----------------------------------------------------------------------------------------------------------------------
Results directory : ${params.out}
Parameters:
samplesheet : ${params.samplesheet}
reference : ${params.reference}
ref_genome : ${params.ref_genome}
reorient : ${params.reorient}
pairwise : ${params.pairwise}
subset_pattern : ${params.subset_pattern}
plotsr config : ${params.plotsr_conf}
plotsr args : ${params.plotsr_args}
plotsr tracks : ${params.plotsr_tracks}
=======================================================================================================================
=======================================================================================================================
"""
.stripIndent(false)
/*
PREPARE GENOMES
---------------
Subset to whatever is the pattern
branch into reference and non-referene geomes
align genomes
fix orientation
Prepare genomes uses FIXCHR to find inverted chromsomes, and then passes those to SEQTK_ORIENT to rc the inverted chromosomes..
*/
workflow PREPARE_GENOMES {
take:
input
main:
if(params.reorient) {
input
| SUBSET
| branch { row ->
REF: row[0] == params.reference
ASSEMBLY: row[0] != params.reference }
| set { ch_branched }
ALIGN_GENOMES(ch_branched.ASSEMBLY, tuple(params.reference, params.ref_genome))
| FIXCHR
| map { it -> [name: it[0], path: it[1]]}
| set { fixed }
ch_branched.REF
.concat(fixed)
.set { fixed }
} else {
input
| SUBSET
| map { it -> [name: it[0], path: it[1]]}
| set { fixed }
}
emit:
fixed
}
/*
This workflow is simple, and largely an exercise in manipulating channels.
In pairwise mode, the input channel is collated so that two consecutive rows become a tuple, which is then unnnested
After passing everything through alingment and syri, the syri outputs are joined to input channel (to preserve the order)
and then collected into a tuple which is deconstructed in PLOTSR_PAIRWISE into a list of arguments.
The nextflow part is okay, but naturally the PLOTSR_PAIRWISE process is a pile of bash.
*/
workflow PLOTSV {
ch_input = Channel.fromPath(params.samplesheet)
| splitCsv(header:true)
ch_input.map { it -> [name: it.name] }
| set { ch_order }
PREPARE_GENOMES(ch_input)
//SEQKIT_GET_LENGTH(PREPARE_GENOMES.out)
//SEQKIT_GET_LENGTH
// .out
// .collect()
// .set { lengths }
if(params.pairwise) {
ch_order
.cross(PREPARE_GENOMES.out)
.map { it -> it[1] }
.collate(2, 1, false)
.set { ch_chunked }
ch_chunked.map { it -> [name_A: it[0].name, genome_A: it[0].path, name_B: it[1].name, genome_B: it[1].path]}
.set { ch_chunked }
ch_chunked
| ALIGN_PAIRWISE
| SYRI_PAIRWISE
SYRI_PAIRWISE
.out
.syri_out
.map { it -> it[2] }
.collect()
.dump(tag: 'SYRI_out')
.set { plotsr_in }
ch_order
.cross(PREPARE_GENOMES.out)
.map { it -> it[1] }
.map { it -> it.path }
.collect()
.set { ch_prepared_files }
ch_order
.map { it -> it.name }
.flatten()
.collect()
.set { ch_names }
PLOTSR_PAIRWISE(plotsr_in, ch_names, ch_prepared_files, params.plotsr_conf, params.plotsr_args, params.plotsr_tracks, params.plotsr_colors)
} else {
ALIGN_GENOMES(PREPARE_GENOMES.out, tuple(params.reference, params.ref_genome))
SYRI(ALIGN_GENOMES.out)
PLOTSR(SYRI.out.syri_out, params.reference, params.plotsr_conf, params.plotsr_args)
}
}
workflow { PLOTSV() }