-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcount_clonotypes.py
51 lines (37 loc) · 1.15 KB
/
count_clonotypes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import sys
def count_clonotypes_in_files(link_path):
def get_sequence(words):
tmp = 0
res = ''
try:
tmp = float(words[2])
res = words[3]
except Exception:
res = words[2]
if not res: print("Empty string in NUC!!!")
return res
global_sequences = {}
local_sequences = set()
sample_list = []
with open(link_path) as infile:
sample = ''
for line in infile:
line = line.strip()
if line[0] == '#':
sample = line[1:]
if sample:
for seq in local_sequences:
global_sequences[seq] = global_sequences.get(seq, 0) + 1
local_sequences = set()
elif line:
target_file = line
print("Searching in", target_file)
for target_line in target_file:
local_sequences.add(get_sequence(target_line.strip().split()))
for seq in local_sequences:
global_sequences[seq] = global_sequences.get(seq, 0) + 1
return global_sequences
if __name__ == '__main__':
seq_dict = count_clonotypes_in_files(sys.argv[1])
print("# Unique clonotypes: ", sum(filter(lambda x: x == 1, seq_dict.values())))
print("# Public clonotypes: ", sum([1 for x in filter(lambda x: x > 1, seq_dict.values())]))