-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmultipleAssembly.nf
104 lines (77 loc) · 2.45 KB
/
multipleAssembly.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
nextflow.preview.dsl=2
date = new Date().format( 'yyyyMMdd' )
params.outdir = "multiassem-${date}"
params.reads = "mini_DF5120.ccs.fasta.gz"
reads = Channel.fromPath(params.reads, checkIfExists: true)
.map { file -> tuple(file.Name - ~/(\.ccs)?(\.fa)?(\.fasta)?(\.gz)?$/, file) }
/*
assemblies = Channel
.fromPath(params.assemblies, checkIfExists: true)
.map { file -> tuple(file.Name - ~/(\.fa)?(\.fasta)?(\.gz)?$/, file) }
dmnd_db = Channel.fromPath(params.dmnd_db, checkIfExists: true).collect()
*/
process hifiasm {
tag "${strain}"
publishDir "$params.outdir/hifiasm", mode: 'copy'
input:
tuple val(strain), path(reads)
output:
tuple val(strain), path("${strain}.hifiasm.fasta.gz")
script:
"""
/software/team301/hifiasm/hifiasm $reads -o $strain -t ${task.cpus}
awk '/^S/{print ">"\$2"\\n"\$3}' ${strain}.p_ctg.gfa | fold | gzip -c > ${strain}.hifiasm.fasta.gz
"""
}
process flye {
tag "${strain}"
publishDir "$params.outdir", mode: 'copy'
label 'btk'
input:
tuple val(strain), path(reads)
output:
tuple val(strain), path("${strain}.flye.fasta.gz")
script:
"""
/software/team301/Flye-2.8.2/Flye/bin/flye --threads ${task.cpus} \
--pacbio-hifi $reads --meta -o flyemeta
cat flyemeta/assembly.fasta | bgzip -c > ${strain}.flye.fasta.gz
"""
}
process wtdbg2 {
tag "${strain}"
publishDir "$params.outdir", mode: 'copy'
label 'btk'
input:
tuple val(strain), path(reads)
output:
tuple val(strain), path("${strain}.wtdbg2.fasta.gz")
script:
"""
/software/team301/wtdbg2/wtdbg2.pl -t ${task.cpus} -x ccs -g 100m -o ${strain} $reads
bgzip ${strain}.cns.fa
mv ${strain}.cns.fa.gz ${strain}.wtdbg2.fasta.gz
"""
}
process canu {
tag "${strain}"
publishDir "$params.outdir", mode: 'symlink'
input:
tuple val(strain), path(reads)
output:
tuple val(strain), path("${strain}")
script:
"""
/software/tola/bin/canu-2.1.1/bin/canu -d ${strain} \
-p ${strain} gridEngineResourceOption='-M MEMORY -R "select[mem>MEMORY] rusage[mem=MEMORY]" -n THREADS -R "span[hosts=1]"' \
genomeSize=60M -pacbio-hifi $reads
"""
}
workflow {
flye(reads)
wtdbg2(reads)
canu(reads)
/*mask_assembly(assemblies) | chunk_assembly
diamond_search(chunk_assembly.out, dmnd_db) | unchunk_hits
*/
}