Skip to content

Commit

Permalink
✨ adding self method 'sequence_offsets'
Browse files Browse the repository at this point in the history
  • Loading branch information
dubssieg committed Dec 1, 2023
1 parent 6f3f575 commit 17e960f
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 6 deletions.
7 changes: 4 additions & 3 deletions gfagraphs/gfagraphs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"Tools to represent GFA format"
from os import path, stat
from os import stat
from os.path import exists
from enum import Enum
from re import sub, match
from typing import Callable
Expand All @@ -24,7 +25,7 @@ def get_gfa_subtype(gfa_file_path: str | list[str]) -> str | list[str]:
gfa_file_path = [gfa_file_path]
for gfa_file in gfa_file_path:
# Checking if path exists
if not path.exists(gfa_file):
if not exists(gfa_file):
raise OSError(
"Specified file does not exists. Please check provided path."
)
Expand Down Expand Up @@ -466,7 +467,7 @@ def __init__(self, gfa_file: str | None = None, gfa_type: str = 'unknown', with_
if gfa_file:
# We try to load file from disk
# Checking if path exists
if not path.exists(gfa_file):
if not exists(gfa_file):
raise OSError(
"Specified file does not exists. Please check provided path."
)
Expand Down
32 changes: 32 additions & 0 deletions pgGraphs/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,3 +337,35 @@ def merge_segments(
# We go backwards to dodge index collisions
for pos in positions:
path['path'][pos:pos-len(segs)+1] = [merge_name]

############### POsitionnal tag ###############

def sequence_offsets(self) -> None:
"""
Calculates the offsets within each path for each node
Here, we aim to extend the current GFA tag format by adding tags
that do respect the GFA naming convention.
A JSON string, PO (Path Offset) positions, relative to paths.
Hence, PO:J:{'w1':[(334,335,'+')],'w2':[(245,247,'-')]} tells that the walk/path w1
contains the sequence starting at position 334 and ending at position 335,
and the walk/path w2 contains the sequence starting at the offset 245 (ending 247),
and that the sequences are reversed one to each other.
Note that any non-referenced walk in this field means that the node
is not inside the given walk.
"""
for walk_name, walk_datas in self.paths.items():
start_offset: int = int(
walk_datas['start_offset']) if 'start_offset' in walk_datas.keys() else 0
for node, vect in walk_datas["path"]:
if 'PO' not in self.segments[node]:
self.segments[node]['PO']: dict[str,
list[tuple[int, int, Orientation]]] = dict()
if walk_name in self.segments[node]['PO']:
# We already encountered the node in this path
self.segments[node]['PO'][walk_name].append(
(start_offset, start_offset+self.segments[node]['length'], vect.value))
else:
# First time we encounter this node for this path
self.segments[node]['PO'][walk_name] = [
(start_offset, start_offset+self.segments[node]['length'], vect.value)]
start_offset += self.segments[node]['length']
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

[project]
name = "gfagraphs"
version = "0.2.0"
version = "0.2.2"
authors = [
{ name="Siegfried Dubois", email="siegfried.dubois@inria.fr" },
]
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
LICENCE: str = "LICENCE"
DESCRIPTION: str = "Library to parse, edit and handle in memory GFA graphs"
REQUIRED_PYTHON: tuple = (3, 10)
OVERRIDE_VN: bool = False
VN: str = "0.2.0"
OVERRIDE_VN: bool = True
VN: str = "0.2.2"
URL: str = "https://github.com/Tharos-ux/gfagraphs"
REQUIREMENTS: list[str] = ['networkx', 'tharos-pytools']

Expand Down

0 comments on commit 17e960f

Please sign in to comment.