forked from bharatij/STRetchPipeline-wdl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
STRetchRealignSTRCounts.wdl
255 lines (214 loc) · 7.13 KB
/
STRetchRealignSTRCounts.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
#Workflow : STRetchPipeline-Step2 : Read realignment and generating STR read counts
workflow STRetchPipeline {
String pipeline_version = "1.0"
meta {
author: "Bharati Jadhav"
email: "bharati.jadhav@mssm.edu"
description: "STRetch read realignment and generating STR read counts step"
}
File cram_fasta
File cram_fasta_fai
File cramFile
File craiFile
File TRF_BED
String output_prefix = basename(cramFile, ".cram")
# STRetch STRdecoy reference genome fasta and index files
File ref_fasta
File ref_fasta_fai
File ref_fasta_amb
File ref_fasta_ann
File ref_fasta_bwt
File ref_fasta_pac
File ref_fasta_sa
File ref_fasta_genome
File STR_BED # STRdecoys.sorted.bed"
Int preemptible_tries = 3
call RunMosDepth {
input:
cramFile = cramFile,
craiFile = craiFile,
cram_fasta = cram_fasta,
cram_fasta_fai = cram_fasta_fai,
output_prefix = output_prefix,
preemptible_tries = preemptible_tries
}
call Runbwa {
input:
cramFile = cramFile,
craiFile = craiFile,
cram_fasta = cram_fasta,
cram_fasta_fai = cram_fasta_fai,
ref_fasta = ref_fasta,
ref_fasta_fai = ref_fasta_fai,
ref_fasta_amb = ref_fasta_amb,
ref_fasta_ann = ref_fasta_ann,
ref_fasta_bwt = ref_fasta_bwt,
ref_fasta_pac = ref_fasta_pac,
ref_fasta_sa = ref_fasta_sa,
TRF_BED = TRF_BED,
output_prefix = output_prefix,
preemptible_tries = preemptible_tries
}
call RunSTRcov {
input:
bamOut = Runbwa.bamOut,
baiOut = Runbwa.baiOut,
ref_fasta_genome = ref_fasta_genome,
STR_BED = STR_BED,
output_prefix = output_prefix,
preemptible_tries = preemptible_tries
}
call RunLocuscov {
input:
bamOut = Runbwa.bamOut,
baiOut = Runbwa.baiOut,
TRF_BED = TRF_BED,
output_prefix = output_prefix,
preemptible_tries = preemptible_tries
}
output {
File medCov = RunMosDepth.medCov
File strCov = RunSTRcov.strCov
File locusCov = RunLocuscov.locusCov
}
}
#Task Definitions
task RunMosDepth {
File cramFile
File craiFile
File cram_fasta
File cram_fasta_fai
String output_prefix
Int addtional_disk_size = 1
String machine_mem_size = 5
Int preemptible_tries
Float distTxt_size = 0.01
Float ref_size = size(cram_fasta, "GB")
Float refidx_size = size(cram_fasta_fai, "GB")
Int disk_size = ceil(size(cramFile, "GB") + size(craiFile, "GB") + distTxt_size + ref_size + refidx_size) + addtional_disk_size
command {
/STRetch-master/tools/bin/mosdepth -n -t 8 -f ${cram_fasta} ${output_prefix} ${cramFile}
/STRetch-master/tools/bin/python /STRetch-master/scripts/mosdepth_median.py \
--out ${output_prefix}.median_cov \
${output_prefix}.mosdepth.global.dist.txt
}
runtime {
docker:"bharatij/stretch_pipeline:stretch"
memory: machine_mem_size + " GB"
disks: "local-disk " + disk_size + " HDD"
cpu: "1"
zones: "us-central1-c us-central1-b"
preemptible: preemptible_tries
continueOnReturnCode: [0,1]
}
output {
File medCov = "${output_prefix}.median_cov"
}
}
task Runbwa {
File cramFile
File craiFile
File cram_fasta
File cram_fasta_fai
File TRF_BED
String output_prefix
File ref_fasta
File ref_fasta_fai
File ref_fasta_amb
File ref_fasta_ann
File ref_fasta_bwt
File ref_fasta_pac
File ref_fasta_sa
Int addtional_disk_size = 10
String machine_mem_size = 32
Int preemptible_tries
Float out_size = size(cramFile, "GB") * 3
Float ref_size = size(ref_fasta, "GB") * 3
Int disk_size = ceil(out_size + ref_size) + addtional_disk_size
command {
set -o pipefail
java -Xmx16g -Dsamjdk.reference_fasta=${cram_fasta} \
-jar /STRetch-master/tools/bin/bazam.jar \
-pad 5 \
-n 6 \
-L ${TRF_BED} \
-bam ${cramFile} | \
/STRetch-master/tools/bin/bwa mem -p -M -t 7 -R "@RG\tID:${output_prefix}\tPL:illumina\tPU:NA\tLB:NA\tSM:${output_prefix}" ${ref_fasta} - | \
/STRetch-master/tools/bin/samtools view -bSuh - | \
/STRetch-master/tools/bin/samtools sort -o ${output_prefix}.STRdecoy.bam -T ${output_prefix}.STRdecoy
/STRetch-master/tools/bin/samtools index ${output_prefix}.STRdecoy.bam
}
runtime {
docker:"bharatij/stretch_pipeline:stretch"
memory: machine_mem_size + " GB"
disks: "local-disk " + disk_size + " HDD"
cpu: "1"
zones: "us-central1-c us-central1-b"
preemptible: preemptible_tries
continueOnReturnCode: [0,1]
}
output {
File bamOut = "${output_prefix}.STRdecoy.bam"
File baiOut = "${output_prefix}.STRdecoy.bam.bai"
}
}
task RunSTRcov {
File bamOut
File baiOut
String output_prefix
File ref_fasta_genome
File STR_BED
Int addtional_disk_size = 1
String machine_mem_size = 4
Int preemptible_tries
Float strCov_size = 0.01
Int disk_size = ceil(size(bamOut, "GB") + size(baiOut, "GB") + size(STR_BED, "GB") + strCov_size) + addtional_disk_size
command {
/STRetch-master/tools/bin/bedtools coverage -counts -sorted -g ${ref_fasta_genome} -a ${STR_BED} -b ${bamOut} > ${output_prefix}.STRdecoy.STR_counts
}
runtime {
docker:"bharatij/stretch_pipeline:stretch"
memory: machine_mem_size + " GB"
disks: "local-disk " + disk_size + " HDD"
cpu: "1"
zones: "us-central1-c us-central1-b"
preemptible: preemptible_tries
continueOnReturnCode: [0,1]
}
output {
File strCov = "${output_prefix}.STRdecoy.STR_counts"
}
}
task RunLocuscov {
File bamOut
File baiOut
String output_prefix
File TRF_BED = "TRF.bed"
Int addtional_disk_size = 1
String machine_mem_size = 4
Int preemptible_tries
Float strLocus_size = 0.01
Int disk_size = ceil(size(bamOut, "GB") + size(baiOut, "GB") + strLocus_size) + addtional_disk_size
command {
STRPATH=$PATH; PATH=/STRetch-master/tools/bin:$PATH; \
/STRetch-master/tools/bin/python /STRetch-master/scripts/identify_locus.py \
--bam ${bamOut} \
--bed ${TRF_BED} \
--output ${output_prefix}.STRdecoy.locus_counts \
;PATH=$STRPATH
rm ${bamOut}
rm ${baiOut}
}
runtime {
docker:"bharatij/stretch_pipeline:stretch"
memory: machine_mem_size + " GB"
disks: "local-disk " + disk_size + " HDD"
cpu: "1"
zones: "us-central1-c us-central1-b"
preemptible: preemptible_tries
continueOnReturnCode: [0,1]
}
output {
File locusCov = "${output_prefix}.STRdecoy.locus_counts"
}
}