-
Notifications
You must be signed in to change notification settings - Fork 0
/
target_pam_handler.py
121 lines (99 loc) · 2.9 KB
/
target_pam_handler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# Extract the downstream PAM of the target sequence from the topological map
import re
from Bio.Seq import Seq
def extract_downstream_pam(
pam,
tar_start,
tar_end,
chrom,
fasta,
dir,
true_chrom_lengths,
topological_chrom_lengths,
):
true_chrom_length = true_chrom_lengths.get(chrom, None)
topological_chrom_length = topological_chrom_lengths.get(chrom, None)
if pam == "":
return None
if None in (
pam,
tar_start,
tar_end,
chrom,
fasta,
dir,
true_chrom_length,
topological_chrom_length,
):
return None
if dir == "F":
if tar_end + len(pam) > topological_chrom_length:
return None
extracted_pam = fasta.fetch(
reference=chrom, start=tar_end, end=tar_end + len(pam)
).upper()
elif dir == "R":
if tar_start - len(pam) < 0:
return None
extracted_pam = fasta.fetch(
reference=chrom, start=tar_start - len(pam), end=tar_start
).upper()
extracted_pam = str(Seq(extracted_pam).reverse_complement())
else:
return None
return extracted_pam
# Extract the upstream PAM of the target sequence from the topological map
def extract_upstream_pam(
pam,
tar_start,
tar_end,
chrom,
fasta,
dir,
true_chrom_lengths,
topological_chrom_lengths,
):
true_chrom_length = true_chrom_lengths.get(chrom, None)
topological_chrom_length = topological_chrom_lengths.get(chrom, None)
if pam == "":
return None
if None in (
pam,
tar_start,
tar_end,
chrom,
fasta,
dir,
true_chrom_length,
topological_chrom_length,
):
return None
if dir == "F":
if tar_start - len(pam) < 0:
return None
extracted_pam = fasta.fetch(
reference=chrom, start=tar_start - len(pam), end=tar_start
).upper()
elif dir == "R":
if tar_end + len(pam) > topological_chrom_length:
return None
extracted_pam = fasta.fetch(
reference=chrom, start=tar_end, end=tar_end + len(pam)
).upper()
extracted_pam = str(Seq(extracted_pam).reverse_complement())
else:
return None
return extracted_pam
# Filter out spacers that don't match the PAM
def filter_offtargets_by_pam(df):
targeting_spacers = df[df["target"].notna()]["spacer"].unique()
return df[~((df["target"].isna()) & (df["spacer"].isin(targeting_spacers)))]
# Check if the extracted PAM matches the PAM pattern
def pam_matches(pam_pattern, extracted_pam):
# Convert N to . for regex matching
if extracted_pam is None:
return False
if pam_pattern == "N" * len(pam_pattern) or not pam_pattern:
return True
regex_pattern = pam_pattern.replace("N", ".")
return bool(re.match(regex_pattern, extracted_pam))