From 17e960fe3ecd4c7876804e8fc9444863975721d3 Mon Sep 17 00:00:00 2001 From: Tharos Date: Fri, 1 Dec 2023 10:48:18 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20adding=20self=20method=20'sequence?= =?UTF-8?q?=5Foffsets'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- gfagraphs/gfagraphs.py | 7 ++++--- pgGraphs/graph.py | 32 ++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- setup.py | 4 ++-- 4 files changed, 39 insertions(+), 6 deletions(-) diff --git a/gfagraphs/gfagraphs.py b/gfagraphs/gfagraphs.py index cd198a9..9742af0 100644 --- a/gfagraphs/gfagraphs.py +++ b/gfagraphs/gfagraphs.py @@ -1,5 +1,6 @@ "Tools to represent GFA format" -from os import path, stat +from os import stat +from os.path import exists from enum import Enum from re import sub, match from typing import Callable @@ -24,7 +25,7 @@ def get_gfa_subtype(gfa_file_path: str | list[str]) -> str | list[str]: gfa_file_path = [gfa_file_path] for gfa_file in gfa_file_path: # Checking if path exists - if not path.exists(gfa_file): + if not exists(gfa_file): raise OSError( "Specified file does not exists. Please check provided path." ) @@ -466,7 +467,7 @@ def __init__(self, gfa_file: str | None = None, gfa_type: str = 'unknown', with_ if gfa_file: # We try to load file from disk # Checking if path exists - if not path.exists(gfa_file): + if not exists(gfa_file): raise OSError( "Specified file does not exists. Please check provided path." ) diff --git a/pgGraphs/graph.py b/pgGraphs/graph.py index 8827d5c..f40407b 100644 --- a/pgGraphs/graph.py +++ b/pgGraphs/graph.py @@ -337,3 +337,35 @@ def merge_segments( # We go backwards to dodge index collisions for pos in positions: path['path'][pos:pos-len(segs)+1] = [merge_name] + +############### POsitionnal tag ############### + + def sequence_offsets(self) -> None: + """ + Calculates the offsets within each path for each node + Here, we aim to extend the current GFA tag format by adding tags + that do respect the GFA naming convention. + A JSON string, PO (Path Offset) positions, relative to paths. + Hence, PO:J:{'w1':[(334,335,'+')],'w2':[(245,247,'-')]} tells that the walk/path w1 + contains the sequence starting at position 334 and ending at position 335, + and the walk/path w2 contains the sequence starting at the offset 245 (ending 247), + and that the sequences are reversed one to each other. + Note that any non-referenced walk in this field means that the node + is not inside the given walk. + """ + for walk_name, walk_datas in self.paths.items(): + start_offset: int = int( + walk_datas['start_offset']) if 'start_offset' in walk_datas.keys() else 0 + for node, vect in walk_datas["path"]: + if 'PO' not in self.segments[node]: + self.segments[node]['PO']: dict[str, + list[tuple[int, int, Orientation]]] = dict() + if walk_name in self.segments[node]['PO']: + # We already encountered the node in this path + self.segments[node]['PO'][walk_name].append( + (start_offset, start_offset+self.segments[node]['length'], vect.value)) + else: + # First time we encounter this node for this path + self.segments[node]['PO'][walk_name] = [ + (start_offset, start_offset+self.segments[node]['length'], vect.value)] + start_offset += self.segments[node]['length'] diff --git a/pyproject.toml b/pyproject.toml index de8c7f3..0214c67 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ [project] name = "gfagraphs" - version = "0.2.0" + version = "0.2.2" authors = [ { name="Siegfried Dubois", email="siegfried.dubois@inria.fr" }, ] diff --git a/setup.py b/setup.py index d7f79ef..44d5561 100644 --- a/setup.py +++ b/setup.py @@ -12,8 +12,8 @@ LICENCE: str = "LICENCE" DESCRIPTION: str = "Library to parse, edit and handle in memory GFA graphs" REQUIRED_PYTHON: tuple = (3, 10) -OVERRIDE_VN: bool = False -VN: str = "0.2.0" +OVERRIDE_VN: bool = True +VN: str = "0.2.2" URL: str = "https://github.com/Tharos-ux/gfagraphs" REQUIREMENTS: list[str] = ['networkx', 'tharos-pytools']