Skip to content

Commit

Permalink
Refactored extract_best_flank function to return Optional[str] and si…
Browse files Browse the repository at this point in the history
…mplified logic for determining the most common flank sequence
  • Loading branch information
ryandward committed Dec 16, 2024
1 parent d435ee1 commit 9faefd5
Showing 1 changed file with 11 additions and 20 deletions.
31 changes: 11 additions & 20 deletions heuristicount.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from contextlib import nullcontext
from datetime import datetime
from multiprocessing import Pool, cpu_count
from typing import Generator, List, Set, Tuple
from typing import Generator, List, Optional, Set, Tuple

import rich
import zstandard as zstd
Expand Down Expand Up @@ -402,32 +402,23 @@ def update_flanks(side: str, seq: str, max_len: int):
update_flanks("L_flank", L_flank, len(L_flank))
update_flanks("R_flank", R_flank, len(R_flank))

def extract_best_flank(counts: Counter) -> str:
def extract_best_flank(counts: Counter) -> Optional[str]:
most_common_prev = None
for fl_len in range(max_flank, 0, -1):
# print(f"Potential flank sequences: {counts}", file=sys.stderr)

potential_seqs = [seq for seq in counts if len(seq) == fl_len]
if not potential_seqs:
continue

most_common = max(potential_seqs, key=lambda x: counts[x])
if fl_len == max_flank:
if (
most_common_prev is None
or counts[most_common] > 3 * counts[most_common_prev]
):
return most_common

if most_common_prev is None:
most_common_prev = most_common
elif counts[most_common] > 3 * counts[most_common_prev]:
most_common_prev = most_common
else:
if (
most_common_prev is not None
and counts[most_common] > 3 * counts[most_common_prev]
):
return most_common
if most_common_prev is None or (
counts[most_common] * 3 < counts[most_common_prev]
):
most_common_prev = most_common
return None
continue

return most_common_prev

L_most_common = extract_best_flank(L_flanks)
R_most_common = extract_best_flank(R_flanks)
Expand Down

0 comments on commit 9faefd5

Please sign in to comment.