generated from Wang-Bioinformatics-Lab/Nextflow_Workflow_Template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnf_workflow.nf
108 lines (79 loc) · 2.33 KB
/
nf_workflow.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/env nextflow
nextflow.enable.dsl=2
params.inputlibraries = "data/libraries"
params.inputspectra = "data/spectra"
// Parameters
params.topk = 1
params.fragment_tolerance = 0.05
params.library_min_cosine = 0.7
params.library_min_matched_peaks = 6
params.analog_max_shift = 400
// Blink Parameters
params.blink_ionization = "positive"
params.blink_binwidth = 0.01
params.blink_numcores = 4
TOOL_FOLDER = "$baseDir/bin"
process searchDataBLINK {
//publishDir "./nf_output", mode: 'copy'
conda "$TOOL_FOLDER/conda_env.yml"
input:
each file(input_library)
each file(input_spectrum)
output:
file 'search_results/*.csv' optional true
script:
def randomFilename = UUID.randomUUID().toString()
def input_spectrum_abs = input_spectrum.toRealPath()
def input_library_abs = input_library.toRealPath()
"""
mkdir search_results
echo $workDir
previous_cwd=\$(pwd)
echo \$previous_cwd
python $TOOL_FOLDER/blink_analog_prefilter.py \
$input_spectrum_abs \
$input_library_abs \
\$previous_cwd/search_results/${randomFilename}.csv \
$params.blink_ionization \
--tolerance $params.fragment_tolerance \
--bin_width $params.blink_binwidth \
--min_score $params.library_min_cosine \
--min_matches $params.library_min_matched_peaks \
--max_shift $params.analog_max_shift \
--num_cores $params.blink_numcores \
"""
}
process mergeResults {
publishDir "./nf_output", mode: 'copy'
conda "$TOOL_FOLDER/conda_env.yml"
input:
path "results/*"
output:
path 'merged_results.tsv'
"""
python $TOOL_FOLDER/tsv_merger.py \
results \
merged_results.tsv \
--topk $params.topk
"""
}
process getGNPSAnnotations {
publishDir "./nf_output", mode: 'copy'
conda "$TOOL_FOLDER/conda_env.yml"
input:
path "merged_results.tsv"
output:
path 'merged_results_with_gnps.tsv'
"""
python $TOOL_FOLDER/getGNPS_library_annotations.py \
merged_results.tsv \
merged_results_with_gnps.tsv
"""
}
workflow {
libraries = Channel.fromPath(params.inputlibraries + "/*.mgf" )
spectra = Channel.fromPath(params.inputspectra + "/**" )
search_results = searchDataBLINK(libraries, spectra)
merged_results = mergeResults(search_results.collect())
getGNPSAnnotations(merged_results)
}