-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathconcat_size_filt_nanopore.nf
62 lines (48 loc) · 1.41 KB
/
concat_size_filt_nanopore.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
nextflow.preview.dsl=2
date = new Date().format( 'yyyyMMdd' )
params.outdir = "concat_ont-${date}"
params.fofn_fastq = "samples.tsv"
params.min_seq_size = 4000
fastqs = Channel.fromPath(params.fofn_fastq, checkIfExists: true)
.splitCsv ( header: ['sample', 'fastq'], sep:'\t' )
.map { row -> tuple(row.sample, (row.fastq =~ /.+_(\d+).fastq.gz/)[0][1], row.fastq) }
process size_filter {
tag "${strain}_${fileNum}"
//label 'btk'
module 'minimap2/2.24--h7132678_1'
input:
tuple val(strain), val(fileNum), val(reads)
output:
tuple val(strain), val(fileNum), path("${strain}_${fileNum}.fasta.gz")
script:
"""
#iget $reads - | seqkit seq --min-len ${params.min_seq_size} \
# | gzip -c > ${strain}_${fileNum}.fasta.gz
iget $reads ${strain}_${fileNum}.fasta.gz
"""
}
process concat_fasta {
tag "$strain"
publishDir "${params.outdir}/concatFasta",
mode: 'move'
input:
tuple val(strain), path(reads)
output:
tuple val(strain), path("*.merged.fasta.gz")
script:
readList = reads.collect{it.toString()}
"""
cat ${readList.sort().join(' ')} > ${strain}.merged.fasta.gz
"""
}
workflow {
size_filter(
fastqs
)
.groupTuple(by: [0])
.map { it -> [ it[0], it[2].flatten() ] }
.set { ch_cat_fasta }
concat_fasta(
ch_cat_fasta
)
}