Skip to content

Commit

Permalink
🐛 Fixes on load_graph and save_graph for JSON strings
Browse files Browse the repository at this point in the history
  • Loading branch information
dubssieg committed Dec 8, 2023
1 parent 17e960f commit ceef6c1
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 25 deletions.
29 changes: 24 additions & 5 deletions pgGraphs/gfaparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,25 @@ def get_gfa_type(tag_type: str) -> type | Callable:
raise ValueError(
f"Type identifier {tag_type} is not in the GFA standard")

@staticmethod
def set_gfa_type(tag_type: str) -> type | Callable:
"""Interprets tags of GFA as a Python-compatible format
Args:
tag_type (str): the letter that identifies the GFA data type
Raises:
NotImplementedError: happens if its an array or byte array (needs doc)
ValueError: happens if format is not in GFA standards
Returns:
type | Callable: the cast method or type to apply
"""
if tag_type == 'J':
return dumps
else:
return str

@staticmethod
def get_python_type(data: object) -> str:
"""Interprets tags of GFA as a Python-compatible format
Expand Down Expand Up @@ -224,16 +243,16 @@ def save_graph(graph, output_path: str) -> None:
if graph.headers:
for header in graph.headers:
gfa_writer.write(
"H\t"+'\t'.join([f"{key}:{GFAParser.get_python_type(value)}:{value}" if not key.startswith('ARG') else str(value) for key, value in header.items()])+"\n")
"H\t"+'\t'.join([f"{key}:{GFAParser.get_python_type(value)}:{GFAParser.set_gfa_type(GFAParser.get_python_type(value))(value)}" if not key.startswith('ARG') else str(value) for key, value in header.items()])+"\n")
if graph.segments:
for segment_name, segment_datas in graph.segments.items():
gfa_writer.write("S\t"+f"{segment_name}\t{segment_datas['seq'] if 'seq' in segment_datas else 'N'*segment_datas['length']}\t" + '\t'.join(
[f"{key}:{GFAParser.get_python_type(value)}:{value}" if not key.startswith('ARG') else str(value) for key, value in segment_datas.items() if key not in ['length', 'seq']])+"\n")
[f"{key}:{GFAParser.get_python_type(value)}:{GFAParser.set_gfa_type(GFAParser.get_python_type(value))(value)}" if not key.startswith('ARG') else str(value) for key, value in segment_datas.items() if key not in ['length', 'seq']])+"\n")
if graph.lines:
for line in graph.lines:
for line in graph.lines.values():
ori1, ori2 = line['orientation'].split('/')
gfa_writer.write(f"L\t"+f"{line['start']}\t{ori1}\t{line['end']}\t{ori2}\t" + '\t'.join(
[f"{key}:{GFAParser.get_python_type(value)}:{value}" if not key.startswith('ARG') else str(value) for key, value in line.items() if key not in ['orientation', 'start', 'end']])+"\n")
[f"{key}:{GFAParser.get_python_type(value)}:{GFAParser.set_gfa_type(GFAParser.get_python_type(value))(value)}" if not key.startswith('ARG') else str(value) for key, value in line.items() if key not in ['orientation', 'start', 'end']])+"\n")
if graph.paths:
for path_name, path_datas in graph.paths.items():
if graph.metadata['version'] == GFAFormat.GFA1: # P-line
Expand All @@ -245,5 +264,5 @@ def save_graph(graph, output_path: str) -> None:
offset_stop: int | str = path_datas['stop_offset'] if 'stop_offset' in path_datas else '?'
strpath: str = ''.join(
[f"{'>' if orient == Orientation.FORWARD else '<'}{node_name}" for node_name, orient in path_datas['path']])
return f"W\t{path_name}\t{path_datas['origin'] if 'origin' in path_datas else line_number}\t{path_datas['name']}\t{offset_start}\t{offset_stop}\t{strpath}\t*\n"
return f"W\t{path_name}\t{path_datas['origin'] if 'origin' in path_datas else line_number}\t{path_name}\t{offset_start}\t{offset_stop}\t{strpath}\t*\n"
line_number += 1
38 changes: 20 additions & 18 deletions pgGraphs/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def __init__(self, gfa_file: str | None = None, with_sequence: bool = True) -> N
)

name, line_type, datas = GFAParser.read_gfa_line(
gfa_line.split(), with_sequence)
gfa_line.split('\t'), with_sequence)
match line_type:
case GFALine.SEGMENT:
self.segments[name] = datas
Expand Down Expand Up @@ -340,7 +340,7 @@ def merge_segments(

############### POsitionnal tag ###############

def sequence_offsets(self) -> None:
def sequence_offsets(self, recalculate: bool = False) -> None:
"""
Calculates the offsets within each path for each node
Here, we aim to extend the current GFA tag format by adding tags
Expand All @@ -353,19 +353,21 @@ def sequence_offsets(self) -> None:
Note that any non-referenced walk in this field means that the node
is not inside the given walk.
"""
for walk_name, walk_datas in self.paths.items():
start_offset: int = int(
walk_datas['start_offset']) if 'start_offset' in walk_datas.keys() else 0
for node, vect in walk_datas["path"]:
if 'PO' not in self.segments[node]:
self.segments[node]['PO']: dict[str,
list[tuple[int, int, Orientation]]] = dict()
if walk_name in self.segments[node]['PO']:
# We already encountered the node in this path
self.segments[node]['PO'][walk_name].append(
(start_offset, start_offset+self.segments[node]['length'], vect.value))
else:
# First time we encounter this node for this path
self.segments[node]['PO'][walk_name] = [
(start_offset, start_offset+self.segments[node]['length'], vect.value)]
start_offset += self.segments[node]['length']
if not 'PO' in self.metadata or recalculate:
for walk_name, walk_datas in self.paths.items():
start_offset: int = int(
walk_datas['start_offset']) if 'start_offset' in walk_datas.keys() and isinstance(walk_datas['start_offset'], int) is None else 0
for node, vect in walk_datas["path"]:
if 'PO' not in self.segments[node]:
self.segments[node]['PO']: dict[str,
list[tuple[int, int, Orientation]]] = dict()
if walk_name in self.segments[node]['PO']:
# We already encountered the node in this path
self.segments[node]['PO'][walk_name].append(
(start_offset, start_offset+self.segments[node]['length'], vect.value))
else:
# First time we encounter this node for this path
self.segments[node]['PO'][walk_name] = [
(start_offset, start_offset+self.segments[node]['length'], vect.value)]
start_offset += self.segments[node]['length']
self.metadata['PO'] = True
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

[project]
name = "gfagraphs"
version = "0.2.2"
version = "0.2.9"
authors = [
{ name="Siegfried Dubois", email="siegfried.dubois@inria.fr" },
]
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
DESCRIPTION: str = "Library to parse, edit and handle in memory GFA graphs"
REQUIRED_PYTHON: tuple = (3, 10)
OVERRIDE_VN: bool = True
VN: str = "0.2.2"
VN: str = "0.2.9"
URL: str = "https://github.com/Tharos-ux/gfagraphs"
REQUIREMENTS: list[str] = ['networkx', 'tharos-pytools']

Expand Down

0 comments on commit ceef6c1

Please sign in to comment.