Skip to content

Commit

Permalink
renames
Browse files Browse the repository at this point in the history
  • Loading branch information
Lior Zfira committed Jan 21, 2019
1 parent 44b7374 commit 782eec4
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 23 deletions.
18 changes: 9 additions & 9 deletions hack_proj/lior_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ def create_data_01(train_list, test_list):


def test_new_markov():
markov_order = 2
win_size = 51
markov_order = 3
win_size = 201
island_markov_path = r'C:\liorz\school\76558 Algorithms in Computational Biology\hackathon\data\tmp\create_data_01\markov_model_island_%d.pkl' % markov_order
other_markov_path = r'C:\liorz\school\76558 Algorithms in Computational Biology\hackathon\data\tmp\create_data_01\markov_model_other_%d.pkl' % markov_order
test_seq_pkl_path = r'C:\liorz\school\76558 Algorithms in Computational Biology\hackathon\data\tmp\create_data_01\chr1_125124_145563.pkl'
Expand All @@ -61,32 +61,32 @@ def test_new_markov():
llr_markov = pickle.load(open(island_markov_path, 'rb')) # type: MarkovModel
other_markov = pickle.load(open(other_markov_path, 'rb')) # type: MarkovModel
seq, labels = pickle.load(open(test_seq_pkl_path, 'rb'))
seq = seq[:100000]
labels = labels[:100000]
seq = seq[:300000]
labels = labels[:300000]
llr_markov.log_prob_mat = island_markov.log_prob_mat - other_markov.log_prob_mat
llr_markov.log_transition_mat = island_markov.log_transition_mat - other_markov.log_transition_mat
print_transition_mat(island_markov)
# print_transition_mat(island_markov)

llr_score = llr_markov.get_ll([seq])
llr_score_island = island_markov.get_ll([seq])
llr_score_other = other_markov.get_ll([seq])

print('N Count: %d' % count_substr(seq, 'N'))
new_labels = labels[llr_markov.order:]
window_score, window_labels = apply_window(llr_score, new_labels, win_size=win_size)

show_roc_curve(window_labels, window_score, print_data=False)
y_pred = np.zeros_like(window_score)
y_pred[window_score > 17.3] = 1
y_pred[window_score > 30] = 1
print_prediction_stats(window_labels, y_pred)
idx = np.array(range(window_score.size))

plt.figure()
plot_scores_and_labels(window_score, window_labels, 'LLR', 'Log Likeklihood Ratio - Markov order = %d, Win size = %d' % (markov_order, win_size))
# plt.figure()
# llr_score_other = other_markov.get_ll([seq])
# window_score_other, window_labels_other = apply_window(llr_score_other, new_labels, win_size=win_size)
# plot_scores_and_labels(window_score_other, window_labels, 'Other LL', 'Other Sequence Log Likelihood - Markov order %d, Win size = %d' % (markov_order, win_size))
# plt.figure()
# llr_score_island = island_markov.get_ll([seq])
# window_score_island, window_labels_island = apply_window(llr_score_island, new_labels, win_size=win_size)
# plot_scores_and_labels(window_score_island, window_labels, 'Island LL', 'Island Sequence Log Likelihood - Markov order %d, Win size = %d' % (markov_order, win_size))
plt.show()
Expand All @@ -110,7 +110,7 @@ def train_markov_models():
r'C:\liorz\school\76558 Algorithms in Computational Biology\hackathon\data\tmp\create_data_01\train_near.pkl']
output_dir = r'C:\liorz\school\76558 Algorithms in Computational Biology\hackathon\data\tmp\create_data_01'

markov_order = 5
markov_order = 6
print('----------------Island data--------------')
island_data = []
for p in island_paths:
Expand Down
28 changes: 14 additions & 14 deletions hack_proj/markov_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ def __init__(self, valid_states, order):
self.valid_states = valid_states
self.valid_states_dict = list2dict(self.valid_states)
self.states_num = len(self.valid_states)
self.count_states = self._create_order_states(self.order + 1)
self.count_states_dict = list2dict(self.count_states)
self.base_states = self._create_order_states(self.order)
self.full_states = self._create_combined_states(self.order + 1)
self.full_states_dict = list2dict(self.full_states)
self.base_states = self._create_combined_states(self.order)
self.base_states_dict = list2dict(self.base_states)

self.log_prob_mat = np.log(np.ones(len(self.count_states), dtype=float) / len(self.count_states))
self.log_prob_mat = np.log(np.ones(len(self.full_states), dtype=float) / len(self.full_states))
self.log_transition_mat = np.log(np.ones((pow(self.states_num, self.order), self.states_num), dtype=float) / self.states_num)

def get_prob_mat(self):
Expand All @@ -31,12 +31,12 @@ def get_prob_mat(self):
def get_transition_mat(self):
return np.exp(self.log_transition_mat)

def _create_order_states(self, len):
def _create_combined_states(self, len):
return list([''.join(p) for p in itertools.product(self.valid_states, repeat=len)])

def _count_order_states(self, x, smooth=1):
def _count_full_states(self, x, smooth=1):
count_dict = {}
for k in self.count_states:
for k in self.full_states:
count_dict[k] = smooth

for val in x:
Expand All @@ -48,11 +48,11 @@ def _count_order_states(self, x, smooth=1):
return count_dict

def fit_transition(self, x):
prob_mat = np.ones(len(self.count_states), dtype=float) / len(self.count_states)
prob_mat = np.ones(len(self.full_states), dtype=float) / len(self.full_states)
transition_mat = np.ones((pow(self.states_num, self.order), self.states_num), dtype=float) / self.states_num

# Count all count states
count_dict = self._count_order_states(x)
# Count all full states
count_dict = self._count_full_states(x)

# Count base states to normalize by
norm_dict = {}
Expand All @@ -62,7 +62,7 @@ def fit_transition(self, x):
norm_dict[k[:-1]] += v

# Create probability mat for the count states
for i, k in enumerate(self.count_states):
for i, k in enumerate(self.full_states):
prob_mat[i] = count_dict[k] / norm_dict[k[:-1]]

# Create transition mat from base states
Expand All @@ -79,7 +79,7 @@ def str_to_order_states(self, x):
curr_ret = []
for i in range(len(val) - self.order):
try:
curr_ret.append(self.count_states_dict[val[i:i + self.order + 1]])
curr_ret.append(self.full_states_dict[val[i:i + self.order + 1]])
except:
curr_ret.append(MarkovModel.UNKNOWN_VAL)
ret.append(np.array(curr_ret))
Expand All @@ -90,7 +90,7 @@ def get_ll(self, x):
all_ll = []
for arr in x_states:
curr_ll = np.full_like(arr, fill_value=np.nan, dtype=float)
for i in range(len(self.count_states)):
for i in range(len(self.full_states)):
curr_ll[arr == i] = self.log_prob_mat[i]
self.nan_to_prev(curr_ll, np.mean(self.log_prob_mat))

Expand All @@ -114,7 +114,7 @@ def nan_to_prev(self, arr, start_val):

def print_prob_mat(markov_model: MarkovModel):
prob_mat = markov_model.get_prob_mat()
for i, k in enumerate(markov_model.count_states):
for i, k in enumerate(markov_model.full_states):
print('%s: %f' % (k, prob_mat[i]))


Expand Down

0 comments on commit 782eec4

Please sign in to comment.