-
Notifications
You must be signed in to change notification settings - Fork 0
/
atac_seq_nextflow_pipeline.nf
226 lines (177 loc) · 7.23 KB
/
atac_seq_nextflow_pipeline.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
// ATAC-seq pipeline is a sequential pipeline that accomplishes to generate peaks from the fastq data provided
// Here are the steps involved in the ATAC-seq Analysis
// 1. Pre Alignment processing - Trimming the adapters using trim galore.
// 2. Genome alignment using bowtie2.
// 3. Indexing and sorting using bamtools.
// 4. Post Alignment processing - Filtering uninformatice reads: Picard to MarkDuplicates and CollectInsertSizeMetrics.
// 5. Post Alignment processing - Peak calling using macs2.
println " ATAC-seq pipeline"
println " "
println "Here are the steps involved in the ATAC-seq Analysis"
println "1. Pre Alignment processing - Trimming the adapters using trim galore."
println "2. Genome alignment using bowtie2."
println "3. Indexing and sorting using bamtools."
println "4. Post Alignment processing - Filtering uninformatice reads: Picard to MarkDuplicates and CollectInsertSizeMetrics."
println "5. Post Alignment processing - Peak calling using macs2."
process generate_fastqc_multiqc_reports {
debug true
errorStrategy 'terminate'
input:
val config_directory
val fastq_files
val fastqc_cores
output:
val "${config_directory}/fastqc_and_multiqc_reports", emit: fastqc_output
script:
"""
echo '${config_directory}/generate_fastqc_reports.sh $fastq_files $fastqc_cores $config_directory'
bash ${config_directory}/generate_fastqc_reports.sh $fastq_files $fastqc_cores $config_directory
"""
}
process trim_galore_adapter_trimming {
debug true
errorStrategy 'terminate'
input:
val config_directory
val fastq_files
val samples_file
val trim_galore_cores
val fastqc_output
output:
val "${config_directory}/trim_galore_output", emit: trimmed_files
script:
"""
echo '${config_directory}/trim_galore_script.sh $fastq_files $samples_file $trim_galore_cores $config_directory'
bash ${config_directory}/trim_galore_script.sh $fastq_files $samples_file $trim_galore_cores $config_directory
"""
}
process mapping_bowtie2 {
debug true
errorStrategy 'terminate'
input:
val config_directory
val samples_file
val genome_index_directory
val trimmed_files
output:
val "${config_directory}/bowtie_output", emit: mapped_files
script:
"""
echo '${config_directory}/bowtie2_fq_2bam.sh $trimmed_files $samples_file $config_directory $genome_index_directory'
bash ${config_directory}/bowtie2_fq_2bam.sh $trimmed_files $samples_file $config_directory $genome_index_directory
"""
}
process mark_duplicates_picard {
debug true
errorStrategy 'terminate'
input:
val config_directory
val samples_file
val picard_filepath
val mapped_files
output:
val "${config_directory}/mark_duplicate_output", emit: marked_dup_files
script:
"""
echo '${config_directory}/mark_duplicates_bam.sh $mapped_files $samples_file $picard_filepath $config_directory'
bash ${config_directory}/mark_duplicates_bam.sh $mapped_files $samples_file $picard_filepath $config_directory
"""
}
process collect_insert_sizes_picard {
debug true
errorStrategy 'terminate'
input:
val config_directory
val samples_file
val picard_filepath
val marked_dup_files
output:
val "${config_directory}/collect_insert_metrics_output", emit: insert_size_metrics
script:
"""
echo '${config_directory}/collect_insert_size_metrics.sh $marked_dup_files $samples_file $picard_filepath $config_directory'
bash ${config_directory}/collect_insert_size_metrics.sh $marked_dup_files $samples_file $picard_filepath $config_directory
"""
}
process macs2_peak_calling {
debug true
errorStrategy 'terminate'
input:
val config_directory
val samples_file
val marked_dup_files
output:
val "${config_directory}/macs2_peak_calling_output", emit: peak_files
script:
"""
echo '${config_directory}/macs2_peak_calling.sh $marked_dup_files $samples_file $config_directory'
bash ${config_directory}/macs2_peak_calling.sh $marked_dup_files $samples_file $config_directory
"""
}
process generate_bigwig_files {
debug true
errorStrategy 'terminate'
input:
val config_directory
val hg38gn_filepath
val peak_files
output:
val "${config_directory}/macs2_peak_calling_bedgraph_output", emit: bigwig_files
script:
"""
echo '${config_directory}/bedGraph_to_bigWig.sh $peak_files $hg38gn_filepath $config_directory'
bash ${config_directory}/bedGraph_to_bigWig.sh $peak_files $hg38gn_filepath $config_directory
"""
}
workflow {
config_directory = params.config_directory
fastq_files = params.fastq_files
samples_file = params.samples_file
// logging config variables
println " "
println "Config directory: ${config_directory}"
println "Fastq files directory: ${fastq_files}"
println " "
// Running FastQC and MultiQC Reports
fastqc_cores = params.fastqc_cores
println " "
println "FastQC and MultiQC output directory: ${config_directory}/fastqc_and_multiqc_reports"
println "No. of cores to be used for generating FastQC reports: ${fastqc_cores}"
println " "
fastqc_output = generate_fastqc_multiqc_reports(config_directory, fastq_files, fastqc_cores)
// Running trim-galore
trim_galore_cores = params.tm_galore_cores
println " "
println "Trim Galore output directory: ${config_directory}/trim_galore_output"
println "No. of cores to be used to trim the adapters: ${trim_galore_cores}"
println " "
trimmed_files = trim_galore_adapter_trimming(config_directory, fastq_files, samples_file, trim_galore_cores, fastqc_output.fastqc_output)
// Running Bowtie2 Mapping
genome_index_directory = params.genome_index_directory
println " "
println "Bowtie2 mapping output directory: ${config_directory}/bowtie_output"
println " "
mapped_files = mapping_bowtie2(config_directory, samples_file, genome_index_directory, trimmed_files.trimmed_files)
// Running MarkDuplicates (Picard)
picard_filepath = params.picard_filepath
println " "
println "Mark Duplicates using picard output directory: ${config_directory}/mark_duplicate_output"
println " "
marked_dup_files = mark_duplicates_picard(config_directory, samples_file, picard_filepath, mapped_files.mapped_files)
// Running CollectInsertSize (Picard)
println " "
println "Collect insert size metrics output directory: ${config_directory}/collect_insert_metrics_output"
println " "
insert_size_metrics = collect_insert_sizes_picard(config_directory, samples_file, picard_filepath, marked_dup_files.marked_dup_files)
// Running MACS2 Peak Calling
println " "
println "MACS2 peak-calling output directory: ${config_directory}/macs2_peak_calling_output"
println " "
peak_files = macs2_peak_calling(config_directory, samples_file, marked_dup_files.marked_dup_files)
// Converting .bed to .bedGraph to .bigWig files
hg38gn_filepath = params.hg38gn_filepath
println " "
println "Generating .bigWig files peak-calling output directory: ${config_directory}/macs2_peak_calling_bedgraph_output"
println " "
bigwig_files = generate_bigwig_files(config_directory, hg38gn_filepath, peak_files.peak_files)
}