Skip to content

Commit

Permalink
✨ Add path pattern exclusion as option to read_gfa_line and propaga…
Browse files Browse the repository at this point in the history
…tion
  • Loading branch information
dubssieg committed Jan 19, 2024
1 parent c31bed0 commit fc6aa63
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 7 deletions.
8 changes: 5 additions & 3 deletions pgGraphs/gfaparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def supplementary_datas(datas: list, length_condition: int) -> dict:
return mapping

@staticmethod
def read_gfa_line(datas: list[str], load_sequence_in_memory: bool = True) -> tuple[str, GFALine, dict]:
def read_gfa_line(datas: list[str], load_sequence_in_memory: bool = True, exclude_end_path_pattern: str = "") -> tuple[str, GFALine, dict]:
"""Calls methods to parse a GFA line,
accordingly to it's fields described in the GFAspec github.
Expand All @@ -203,7 +203,8 @@ def read_gfa_line(datas: list[str], load_sequence_in_memory: bool = True) -> tup
line_datas["orientation"] = f"{datas[2]}/{datas[4]}"
return ((line_datas['start'], line_datas['end']), line_type, {**line_datas, **GFAParser.supplementary_datas(datas, 5)})
case GFALine.WALK:
line_datas["id"] = datas[3]
line_datas["id"] = (datas[3].upper(), datas[3].upper()[
:-len(exclude_end_path_pattern)])[datas[3].upper().endswith(exclude_end_path_pattern)]
line_datas["origin"] = int(datas[2])
line_datas["start_offset"] = datas[4]
line_datas["stop_offset"] = datas[5]
Expand All @@ -216,7 +217,8 @@ def read_gfa_line(datas: list[str], load_sequence_in_memory: bool = True) -> tup
]
return (datas[1], line_type, {**line_datas, **GFAParser.supplementary_datas(datas, 7)})
case GFALine.PATH:
line_datas["id"] = datas[1]
line_datas["id"] = (datas[1].upper(), datas[1].upper()[
:-len(exclude_end_path_pattern)])[datas[1].upper().endswith(exclude_end_path_pattern)]
line_datas["origin"] = None
line_datas["start_offset"] = None
line_datas["stop_offset"] = None
Expand Down
8 changes: 5 additions & 3 deletions pgGraphs/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ def __init__(
self,
gfa_file: str | None = None,
with_sequence: bool = True,
low_memory: bool = False
low_memory: bool = False,
end_pattern_exclusion: str = ""
) -> None:
"""Constructor for GFA Graph object.
Expand Down Expand Up @@ -53,8 +54,9 @@ def __init__(
)

name, line_type, datas = GFAParser.read_gfa_line(
[__.strip() for __ in gfa_line.split('\t')],
with_sequence and not low_memory
datas=[__.strip() for __ in gfa_line.split('\t')],
load_sequence_in_memory=with_sequence and not low_memory,
exclude_end_path_pattern=end_pattern_exclusion,
)
match line_type:
case GFALine.SEGMENT:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

[project]
name = "gfagraphs"
version = "0.2.19"
version = "0.2.20"
authors = [
{ name="Siegfried Dubois", email="siegfried.dubois@inria.fr" },
]
Expand Down

0 comments on commit fc6aa63

Please sign in to comment.