-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathSnakefile
119 lines (92 loc) · 3.91 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# The main entry point of your workflow.
# After configuring, running snakemake -n in a clone of this repository should successfully execute a dry-run of the workflow.
import os
import sys
import pandas as pd
from snakemake.utils import min_version, validate
### pipeline version
VERSION = '1.0.0'
### set minimum snakemake version ###
min_version('4.8.0')
if config['Workdir']:
workdir: config['Workdir']
Df = pd.read_table(config['Metadata']).set_index('ID', drop=False)
Df = Df.astype(str)
validate(Df, 'schemas/metadata.schema.yaml')
Srcdir = srcdir('.') # directory where snakefile is
### check wether SSUsearch_db has been downloaded
if config['Refdir']:
Refdir = config['Refdir']
else:
Refdir = '{}'.format(Srcdir)
Refdir = os.path.abspath(Refdir)
Downdir = Refdir
Refdir = '{}/SSUsearch_db'.format(Refdir)
if not os.path.exists(Refdir):
mes = (
'*** SSUsearch_db has not been downloaded yet..\n'
'*** Downloading to {} ..\n'
'*** Make sure you have write permission..\n'
'*** or else change "Refdir" in config file..\n'
).format(Downdir)
print(mes)
shell(
"""
cd {Downdir} && curl https://zenodo.org/record/1492910/files/SSUsearch_db.tgz?download=1 -o SSUsearch_db.tgz && tar -xzvf SSUsearch_db.tgz
"""
)
Gene = config['Gene']
Hmm = '{}/Hmm.{}.hmm'.format(Refdir, Gene) # hmm model for ssu
Gene_model_org = '{}/Gene_model_org.{}.fasta'.format(Refdir, Gene)
Ali_template = '{}/Ali_template.{}.silva.fasta'.format(Refdir, Gene)
Gene_tax = '{}/Gene_tax.{}.silva_108_rep_family.tax'.format(Refdir, Gene) # silva ref
Gene_db = '{}/Gene_db.{}.silva_108_rep.fasta'.format(Refdir, Gene)
Gene_tax_cc = '{}/Gene_tax_cc.{}.greengene_rep.tax'.format(Refdir, Gene) # greengene 2012.10 ref for copy correction
Gene_db_cc = '{}/Gene_db_cc.{}.greengene_rep.fasta'.format(Refdir, Gene)
Copy_db_cc = '{}/Copy_db_cc.{}.copyrighter.txt'.format(Refdir, Gene)
Samples = Df.index
Start = config['Start']
End = config['End']
Len_cutoff = config['Len_cutoff']
Java_xmx = config['Java_xmx'] # increase this if your machine has more memory
Otu_dist_cutoff = config['Otu_dist_cutoff']
Project = config['Project']
print('Please double check the metadata info:')
print(Df.to_string(index=False, justify='right'))
localrules: taxa_summary, make_biom, copy_correction, all
include: 'rules/convert_seqname.smk'
### search step
include: 'rules/hmmsearch.smk'
include: 'rules/region_cut.smk'
### classification
include: 'rules/classify.smk'
include: 'rules/taxa_summary.smk'
### clustering
include: 'rules/clust.smk'
include: 'rules/make_biom.smk'
### copy correction
include: 'rules/copy_correction.smk'
rule all:
input:
# The first rule should define the default target files
# Subsequent target rules can be specified below. They should start with all_*.
#
expand('{project}/seqname_convert/{sample}.fa', project=Project, sample=Samples),
expand('{project}/search/{sample}/{sample}.forclust', project=Project, sample=Samples),
expand('{project}/search/{sample}/{sample}.align.filter.wang.gg.taxonomy.count', project=Project, sample=Samples),
expand('{project}/search/{sample}/{sample}.align.filter.wang.silva.taxonomy.count', project=Project, sample=Samples),
expand('{project}/taxa_summary/level.{i}.tsv', project=Project, i=[1,2,3,4,5]),
expand('{project}/{project}.biom', project=Project),
expand('{project}/{project}.cc.biom', project=Project),
##### singularity #####
# this container defines the underlying OS for each job when using the workflow
# with --use-conda --use-singularity
#singularity: "docker://continuumio/miniconda3"
##### report #####
report: "report/workflow.rst"
onstart:
print('----------------------------------')
print('Welcome to the SSUsearch pipeline.')
print('----------------------------------')
onsuccess:
print("\n--- SSUsearch Workflow executed successfully! ---\n")