-
Notifications
You must be signed in to change notification settings - Fork 1
/
Makefile
258 lines (204 loc) · 21.8 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
#################################################################################
# GLOBALS #
#################################################################################
# https://stackoverflow.com/questions/322936/common-gnu-makefile-directory-path
# http://andylinuxblog.blogspot.com/2015/06/what-is-colon-equals-sign-in-makefiles.html
# ":=" ensures make ensures the expansion happens immediately at the start of the makefile,
# instead of when the variable is used in one of the make commands.
PROJECT_ROOT := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
PROJECT_NAME = deepTCR
PYTHON_INTERPRETER = python3
ifeq (,$(shell which conda))
HAS_CONDA=False
else
HAS_CONDA=True
endif
#################################################################################
# SETUP COMMANDS #
#################################################################################
## Install Python Dependencies
requirements: test_environment
ifeq (True,$(HAS_CONDA))
@echo ">>> Detected conda, installing requirements inside conda environment."
conda env create -f environment.yml
@echo ">>> New conda env created. Activate with:\nsource activate $(PROJECT_NAME)"
else
@echo ">>> Conda not detected, installing requirements inside virtualenv. NOTE: cudatoolkit, cudnn and hdf5 need to be installed manually."
#$(PYTHON_INTERPRETER) -m pip install -U pip setuptools wheel
virtualenv $(PROJECT_NAME)
. $(PROJECT_NAME)/bin/activate
pip install -r requirements.txt
endif
## Test python environment is setup correctly
test_environment:
$(PYTHON_INTERPRETER) test_environment.py
## Format src code with black
format:
black src
## Lint using flake8
lint:
flake8 src
## Run tests
test:
pytest src/tests
## Delete all compiled Python files
clean:
find . -type f -name "*.py[co]" -delete
find . -type d -name "__pycache__" -delete
#################################################################################
# ANALYSIS #
#################################################################################
## Download VDJdb data to correct directories
data-vdjdb-aug-2019:
@echo ">>> Downloading raw VDJdb data."
bash ./src/scripts/data_scripts/retrieve_data_vdjdb.sh
@echo ">>> Creating summary statistics."
bash ./src/scripts/data_scripts/vdjdb-content-analyser.sh data/raw/vdjdb/vdjdb-2019-08-08-vdjdb-summary.md data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt
bash ./src/scripts/data_scripts/vdjdb-content-analyser-slim.sh data/raw/vdjdb/vdjdb-2019-08-08-vdjdb-slim-summary.md data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.slim.txt
bash ./src/scripts/data_scripts/vdjdb-content-analyser.sh data/raw/vdjdb/vdjdb-browser-summary.md data/raw/vdjdb/vdjdb-browser.tsv
## Filter the VDJdb data and preprocess it into the correct format.
preprocess-vdjdb-aug-2019:
mkdir -p data/interim/vdjdb-2019-08-08/
## full untouched dataset for negative data generation through shuffling
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human.csv --species human --drop-spurious
## NO DOWNSAMPLING
### MHCI
# 2019-08-08 release: human TRB MHCI without spurious sequences and without any 10xgenomics entries and length restrictions
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-mhci-no10x-size.csv --species human --tcr-chain TRB --mhc MHCI --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11
# 2019-08-08 release: human TRA MHCI without spurious sequences and without any 10xgenomics entries and length restrictions
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-tra-mhci-no10x-size.csv --species human --tcr-chain TRA --mhc MHCI --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11
# 2019-08-08 release: human TRA+TRB MHCI without spurious sequences and without any 10xgenomics entries and length restrictions
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-tra-trb-mhci-no10x-size.csv --species human --mhc MHCI --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11
# ### MHCII
# # 2019-08-08 release: human TRB MHCII without spurious sequences and without any 10xgenomics entries and length restrictions
# $(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-mhcii-no10x-size.csv --species human --tcr-chain TRB --mhc MHCII --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11
# # 2019-08-08 release: human TRA MHCII without spurious sequences and without any 10xgenomics entries and length restrictions
# $(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-tra-mhcii-no10x-size.csv --species human --tcr-chain TRA --mhc MHCII --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11
# # 2019-08-08 release: human TRA+B MHCII without spurious sequences and without any 10xgenomics entries and length restrictions
# $(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-tra-trb-mhcii-no10x-size.csv --species human --mhc MHCII --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11
# ### MHCI+II
# # 2019-08-08 release: human TRB without spurious sequences and without any 10xgenomics entries and length restrictions
# $(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-no10x-size.csv --species human --tcr-chain TRB --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11
# # 2019-08-08 release: human TRB without spurious sequences and without any 10xgenomics entries and length restrictions
# $(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-no10x-size-preprint.csv --species human --tcr-chain TRB --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11
# # 2019-08-08 release: human TRA without spurious sequences and without any 10xgenomics entries and length restrictions
# $(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-tra-no10x-size.csv --species human --tcr-chain TRA --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11
# # 2019-08-08 release: human TRA+B without spurious sequences and without any 10xgenomics entries and length restrictions
# $(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-tra-trb-no10x-size.csv --species human --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11
## DOWNSAMPLING
### MHCI
# 2019-08-08 release: human TRB MHCI without spurious sequences and without any 10xgenomics entries and length restrictions
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-mhci-no10x-size-down.csv --species human --tcr-chain TRB --mhc MHCI --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11 --downsample NLVPMVATV 1000 GILGFVFTL 1000
# cut -f2 vdjdb-human-trb-mhci-no10x-size.csv -d';' | sort | uniq -c | sort -n
# 171 KAFSPEVIPMF
# 172 RAKFKQLL
# 191 TPRVTGGGAM
# 199 VTEHDTLLY
# 231 LLLGIGILV
# 316 KRWIILGLNK
# 404 LLWNGPMAV
# 883 GLCTLVAML
# 960 ELAGIGILTV
# 2856 GILGFVFTL
# 4387 NLVPMVATV
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-mhci-no10x-size-down400.csv --species human --tcr-chain TRB --mhc MHCI --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11 --downsample NLVPMVATV 400 GILGFVFTL 400 ELAGIGILTV 400 GLCTLVAML 400
# 2019-08-08 release: human TRA MHCI without spurious sequences and without any 10xgenomics entries and length restrictions
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-tra-mhci-no10x-size-down.csv --species human --tcr-chain TRA --mhc MHCI --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11 --downsample NLVPMVATV 300 GILGFVFTL 300
# cut -f2 vdjdb-human-tra-mhci-no10x-size.csv -d';' | sort | uniq -c | sort -n
# 22 KAFSPEVIPMF
# 25 KLSALGINAV
# 42 ELAGIGILTV
# 43 KLVALGINAV
# 48 NEGVKAAW
# 69 CINGVCWTV
# 108 GLCTLVAML
# 245 LLWNGPMAV
# 330 LLLGIGILV
# 2065 NLVPMVATV
# 2433 GILGFVFTL
# 2019-08-08 release: human TRA+B MHCI without spurious sequences and without any 10xgenomics entries and length restrictions
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-tra-trb-mhci-no10x-size-down.csv --species human --mhc MHCI --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11 --downsample NLVPMVATV 1000 GILGFVFTL 1000
# cut -f2 vdjdb-human-tra-trb-mhci-no10x-size.csv -d';' | sort | uniq -c | sort -n
# 172 RAKFKQLL
# 193 KAFSPEVIPMF
# 197 TPRVTGGGAM
# 199 CINGVCWTV
# 199 VTEHDTLLY
# 322 KRWIILGLNK
# 561 LLLGIGILV
# 649 LLWNGPMAV
# 991 GLCTLVAML
# 1002 ELAGIGILTV
# 5289 GILGFVFTL
# 6452 NLVPMVATV
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-tra-trb-mhci-no10x-size-down400.csv --species human --mhc MHCI --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11 --downsample NLVPMVATV 400 GILGFVFTL 400 ELAGIGILTV 400 GLCTLVAML 400 LLWNGPMAV 400 LLLGIGILV 400
# ### MHCII
# # 2019-08-08 release: human TRB MHCII without spurious sequences and without any 10xgenomics entries and length restrictions
# $(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-mhcii-no10x-size-down.csv --species human --tcr-chain TRB --mhc MHCII --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11 --downsample NLVPMVATV 0.84 GILGFVFTL 0.80
# # 2019-08-08 release: human TRA MHCII without spurious sequences and without any 10xgenomics entries and length restrictions
# $(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-tra-mhcii-no10x-size-down.csv --species human --tcr-chain TRA --mhc MHCII --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11 --downsample NLVPMVATV 0.84 GILGFVFTL 0.80
# # 2019-08-08 release: human TRA+B MHCII without spurious sequences and without any 10xgenomics entries and length restrictions
# $(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-tra-trb-mhcii-no10x-size-down.csv --species human --mhc MHCII --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11 --downsample NLVPMVATV 0.84 GILGFVFTL 0.80
# ### MHCI+II
# # 2019-08-08 release: human TRB without spurious sequences and without any 10xgenomics entries and length restrictions
# $(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-no10x-size-down.csv --species human --tcr-chain TRB --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11 --downsample NLVPMVATV 0.84 GILGFVFTL 0.80
# # 2019-08-08 release: human TRA without spurious sequences and without any 10xgenomics entries and length restrictions
# $(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-tra-no10x-size-down.csv --species human --tcr-chain TRA --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11 --downsample NLVPMVATV 0.84 GILGFVFTL 0.80
# # 2019-08-08 release: human TRA+B without spurious sequences and without any 10xgenomics entries and length restrictions
# $(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-tra-trb-no10x-size-down.csv --species human --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11 --downsample NLVPMVATV 0.84 GILGFVFTL 0.80
## DECOY
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/decoy_epitopes.py -i data/interim/vdjdb-2019-08-08/vdjdb-human-trb-mhci-no10x-size.csv -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-mhci-no10x-size-decoy.csv
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/decoy_epitopes.py -i data/interim/vdjdb-2019-08-08/vdjdb-human-trb-mhci-no10x-size-down.csv -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-mhci-no10x-size-down-decoy.csv
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/decoy_epitopes.py -i data/interim/vdjdb-2019-08-08/vdjdb-human-trb-mhci-no10x-size-down400.csv -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-mhci-no10x-size-down400-decoy.csv
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/decoy_epitopes.py -i data/interim/vdjdb-2019-08-08/vdjdb-human-tra-trb-mhci-no10x-size.csv -o data/interim/vdjdb-2019-08-08/vdjdb-human-tra-trb-mhci-no10x-size-decoy.csv
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/decoy_epitopes.py -i data/interim/vdjdb-2019-08-08/vdjdb-human-tra-trb-mhci-no10x-size-down.csv -o data/interim/vdjdb-2019-08-08/vdjdb-human-tra-trb-mhci-no10x-size-down-decoy.csv
# ## SINGLE EPITOPES
# $(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-size-NLVPMVATV.csv --species human --tcr-chain TRB --mhc MHCI --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11 --keep-specific-epitopes NLVPMVATV
# $(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-size-GILGFVFTL.csv --species human --tcr-chain TRB --mhc MHCI --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11 --keep-specific-epitopes GILGFVFTL
## REPLACED
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-mhci-no10x-size-terminal-replaced.csv --species human --tcr-chain TRB --mhc MHCI --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11 --terminal_replaced G
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-mhci-no10x-size-terminal-replaced-down.csv --species human --tcr-chain TRB --mhc MHCI --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11 --downsample NLVPMVATV 1000 GILGFVFTL 1000 --terminal_replaced G
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-mhci-no10x-size-terminal-only.csv --species human --tcr-chain TRB --mhc MHCI --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11 --terminal_only
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-mhci-no10x-size-terminal-only-down.csv --species human --tcr-chain TRB --mhc MHCI --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11 --downsample NLVPMVATV 1000 GILGFVFTL 1000 --terminal_only
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-mhci-no10x-size-middle-replaced.csv --species human --tcr-chain TRB --mhc MHCI --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11 --middle_replaced G
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-mhci-no10x-size-middle-replaced-down.csv --species human --tcr-chain TRB --mhc MHCI --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11 --downsample NLVPMVATV 1000 GILGFVFTL 1000 --middle_replaced G
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-mhci-no10x-size-middle-only.csv --species human --tcr-chain TRB --mhc MHCI --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11 --middle_only
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-mhci-no10x-size-middle-only-down.csv --species human --tcr-chain TRB --mhc MHCI --drop-spurious --remove-specific-reference 10xgenomics --length-restriction 10 20 8 11 --downsample NLVPMVATV 1000 GILGFVFTL 1000 --middle_only
# 10x only validation
# do not use quotes around epitopes
# epitopes taken from set of unique epitopes compared to non10x data
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_vdjdb.py -i data/raw/vdjdb/vdjdb-2019-08-08/vdjdb.txt -o data/interim/vdjdb-2019-08-08/vdjdb-human-trb-mhci-10x-size.csv --species human --tcr-chain TRB --mhc MHCI --drop-spurious --keep-specific-reference 10xgenomics --length-restriction 10 20 8 11 --keep-specific-epitopes AYAQKIFKI CLLGTYTQDV CLLWSFQTSA CYTWNQMNL FLASKIGRLV FLYALALLL IMDQVPFSV KLGGALQAK KLQCVDLHV KTWGQYWQV KVAELVHFL KVLEYVIKV LLDFVRFMGV MLDLQPETT QPRAPIRPI RIAAWMATY RLRAEAQVK RTLNAWVKV SLFNTVATL SLFNTVATLY SLYNTVATLY YLLEMLWRL YLNDHLEPWI
# ## Download 2020 VDJdb dataset (not used)
# data-vdjdb-jan-2020:
# @echo ">>> Downloading raw data."
# bash ./src/scripts/data_scripts/retrieve_data_vdjdb-2020-01-20.sh
# @echo ">>> Creating summary statistics."
# bash ./src/scripts/data_scripts/vdjdb-content-analyser.sh data/raw/vdjdb/vdjdb-2020-01-20-vdjdb-summary.md data/raw/vdjdb/vdjdb-2020-01-20/vdjdb.txt
# bash ./src/scripts/data_scripts/vdjdb-content-analyser-slim.sh data/raw/vdjdb/vdjdb-2020-01-20-vdjdb-slim-summary.md data/raw/vdjdb/vdjdb-2020-01-20/vdjdb.slim.txt
# bash ./src/scripts/data_scripts/vdjdb-content-analyser.sh data/raw/vdjdb/vdjdb-browser-summary.md data/raw/vdjdb/vdjdb-browser.tsv
## Download Adaptive ImmuneCODE sars-covid dataset
data-adaptive:
bash ./src/scripts/data_scripts/retrieve_data_adaptive.sh
## Preprocess Adaptive ImmuneCODE data
preprocess-adaptive:
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_immunecode.py -i ./data/raw/immunecode-adaptive/ImmuneCODE-Release001.1/peptide-detail.csv -o ./data/interim/immunecode-adaptive/adaptive-sars-cov.csv --length-restriction 10 20 8 11
## Preprocess McPAS data
preprocess-mcpas:
$(PYTHON_INTERPRETER) ./src/scripts/preprocessing/preprocess_mcpas.py -i ./data/raw/McPAS-TCR.csv -o ./data/interim/mcpas/mcpas-human-trb-mhci-size.csv --length-restriction 10 20 8 11 --vdjdb_dataset ./data/interim/vdjdb-2019-08-08/vdjdb-human-trb-mhci-no10x-size.csv
## Calculate metrics across train/test folds and create figures
metrics:
find models/models -maxdepth 1 -mindepth 1 -type d -exec python ./src/scripts/evaluate/visualize.py metrics --force True {} \;
# optionally use --y_lim_loss 2 to reduce y axis to a max of 2 for a better view of loss curves
metrics-compare:
@echo "Use the following one-liner to compare two directories with trained models."
@echo 'python ./src/scripts/evaluate/visualize.py compare --force True parent_directory'
# optionally use --y_lim_loss 2 to reduce y axis to a max of 2 for a better view of loss curves
## Per-epitope evaluation
evaluate_self:
@echo "Adjust the following one-liners. -name: should contain the name of the models of the same type. --model_type: padded or separated. --features: the features used to construct the padded model, order matters!"
@echo 'find models/models -maxdepth 1 -mindepth 1 -name "*padded*" -type d -exec python ./src/scripts/evaluate/evaluate_self.py --input {} --model_type padded --min_length_cdr3 10 --max_length_cdr3 20 --min_length_epitope 8 --max_length_epitope 11 --features "atchley1,atchley2,atchley3,atchley4,atchley5" \;'
@echo 'find models/models -maxdepth 1 -mindepth 1 -name "*nettcr*" -type d -exec python ./src/scripts/evaluate/evaluate_self.py --input {} --model_type separated --min_length_cdr3 10 --max_length_cdr3 20 --min_length_epitope 8 --max_length_epitope 11 \;'
@echo $(PROJECT_ROOT)
## Create interaction-map example
example-figure:
$(PYTHON_INTERPRETER) ./src/scripts/evaluate/visualize.py peptide --epitope ELAGIGILTV --cdr3 CASSPGEGLYEQYF --operator absdiff --cmyk True