Skip to content

Commit

Permalink
Refactor reference passing (#316)
Browse files Browse the repository at this point in the history
More consistently use the prefix/identifier handler function
  • Loading branch information
cthoyt authored Jan 15, 2025
1 parent a9abf86 commit 7119f9e
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 65 deletions.
34 changes: 16 additions & 18 deletions src/pyobo/api/alts.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@
import curies
from typing_extensions import Unpack

from .utils import get_version_from_kwargs
from .utils import _get_pi, get_version_from_kwargs
from ..constants import GetOntologyKwargs, check_should_cache, check_should_force
from ..getters import get_ontology
from ..identifier_utils import wrap_norm_prefix
from ..struct.reference import Reference
from ..utils.cache import cached_multidict
from ..utils.path import prefix_cache_join

Expand Down Expand Up @@ -62,18 +61,23 @@ def get_alts_to_id(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> Mapping[


def get_primary_curie(
curie: str,
prefix: str | curies.Reference | curies.ReferenceTuple,
identifier: str | None = None,
/,
**kwargs: Unpack[GetOntologyKwargs],
) -> str | None:
"""Get the primary curie for an entity."""
reference = Reference.from_curie_or_uri(curie, strict=kwargs.get("strict", True))
if reference is None:
reference = _get_pi(prefix, identifier)
try:
primary_identifier = get_primary_identifier(reference, **kwargs)
except ValueError:
if kwargs.get("strict"):
raise
# this happens on invalid prefix. maybe revise?
return None
primary_identifier = get_primary_identifier(reference, **kwargs)
return f"{reference.prefix}:{primary_identifier}"


@wrap_norm_prefix
def get_primary_identifier(
prefix: str | curies.Reference | curies.ReferenceTuple,
identifier: str | None = None,
Expand All @@ -88,14 +92,8 @@ def get_primary_identifier(
Returns the original identifier if there are no alts available or if there's no mapping.
"""
if isinstance(prefix, curies.ReferenceTuple | curies.Reference):
identifier = prefix.identifier
prefix = prefix.prefix
elif identifier is None:
raise ValueError("passed a prefix but no local unique identifier")

if prefix in NO_ALTS: # TODO later expand list to other namespaces with no alts
return identifier

alts_to_id = get_alts_to_id(prefix, **kwargs)
return alts_to_id.get(identifier, identifier)
t = _get_pi(prefix, identifier)
if t.prefix in NO_ALTS: # TODO later expand list to other namespaces with no alts
return t.identifier
alts_to_id = get_alts_to_id(t.prefix, **kwargs)
return alts_to_id.get(t.identifier, t.identifier)
63 changes: 30 additions & 33 deletions src/pyobo/api/names.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
import subprocess
from collections.abc import Callable, Mapping
from functools import lru_cache
from typing import TypeVar
from typing import Any, TypeVar

from curies import Reference, ReferenceTuple
from typing_extensions import Unpack

from .alts import get_primary_identifier
from .utils import _get_pi, get_version, get_version_from_kwargs
from .utils import _get_pi, get_version_from_kwargs
from ..constants import GetOntologyKwargs, check_should_cache, check_should_force
from ..getters import NoBuildError, get_ontology
from ..identifier_utils import wrap_norm_prefix
Expand All @@ -35,12 +35,9 @@
logger = logging.getLogger(__name__)


def get_name_by_curie(curie: str, *, version: str | None = None) -> str | None:
def get_name_by_curie(curie: str, **kwargs: Any) -> str | None:
"""Get the name for a CURIE, if possible."""
reference = Reference.from_curie(curie)
if version is None:
version = get_version(reference.prefix)
return get_name(reference, version=version)
return get_name(curie, **kwargs)


X = TypeVar("X")
Expand All @@ -51,44 +48,42 @@ def get_name_by_curie(curie: str, *, version: str | None = None) -> str | None:

def _help_get(
f: Callable[[str, Unpack[GetOntologyKwargs]], Mapping[str, X]],
prefix: str,
identifier: str,
reference: ReferenceTuple,
**kwargs: Unpack[GetOntologyKwargs],
) -> X | None:
"""Get the result for an entity based on a mapping maker function ``f``."""
try:
mapping = f(prefix, **kwargs) # type:ignore
mapping = f(reference.prefix, **kwargs) # type:ignore
except NoBuildError:
if prefix not in NO_BUILD_PREFIXES:
logger.warning("[%s] unable to look up results with %s", prefix, f)
NO_BUILD_PREFIXES.add(prefix)
if reference.prefix not in NO_BUILD_PREFIXES:
logger.warning("[%s] unable to look up results with %s", reference, f)
NO_BUILD_PREFIXES.add(reference.prefix)
return None
except ValueError as e:
if prefix not in NO_BUILD_PREFIXES:
logger.warning("[%s] value error while looking up results with %s: %s", prefix, f, e)
NO_BUILD_PREFIXES.add(prefix)
if reference.prefix not in NO_BUILD_PREFIXES:
logger.warning("[%s] value error while looking up results with %s: %s", reference, f, e)
NO_BUILD_PREFIXES.add(reference.prefix)
return None

if not mapping:
if prefix not in NO_BUILD_PREFIXES:
logger.warning("[%s] no results produced with %s", prefix, f)
NO_BUILD_PREFIXES.add(prefix)
if reference.prefix not in NO_BUILD_PREFIXES:
logger.warning("[%s] no results produced with %s", reference, f)
NO_BUILD_PREFIXES.add(reference.prefix)
return None

primary_id = get_primary_identifier(prefix, identifier, **kwargs)
primary_id = get_primary_identifier(reference, **kwargs)
return mapping.get(primary_id)


@wrap_norm_prefix
def get_name(
prefix: str | Reference | ReferenceTuple,
identifier: str | None = None,
/,
**kwargs: Unpack[GetOntologyKwargs],
) -> str | None:
"""Get the name for an entity."""
t = _get_pi(prefix, identifier)
return _help_get(get_id_name_mapping, prefix=t.prefix, identifier=t.identifier, **kwargs)
reference = _get_pi(prefix, identifier)
return _help_get(get_id_name_mapping, reference, **kwargs)


@lru_cache
Expand Down Expand Up @@ -167,16 +162,15 @@ def get_name_id_mapping(
return {v: k for k, v in id_name.items()}


@wrap_norm_prefix
def get_definition(
prefix: str, identifier: str | None = None, **kwargs: Unpack[GetOntologyKwargs]
prefix: str | Reference | ReferenceTuple,
identifier: str | None = None,
/,
**kwargs: Unpack[GetOntologyKwargs],
) -> str | None:
"""Get the definition for an entity."""
if identifier is None:
prefix, _, identifier = prefix.rpartition(":")
if identifier is None:
raise ValueError
return _help_get(get_id_definition_mapping, prefix=prefix, identifier=identifier, **kwargs)
reference = _get_pi(prefix, identifier)
return _help_get(get_id_definition_mapping, reference, **kwargs)


def get_id_definition_mapping(
Expand Down Expand Up @@ -219,12 +213,15 @@ def _get_obsolete() -> list[str]:
return set(_get_obsolete())


@wrap_norm_prefix
def get_synonyms(
prefix: str, identifier: str, **kwargs: Unpack[GetOntologyKwargs]
prefix: str | Reference | ReferenceTuple,
identifier: str | None = None,
/,
**kwargs: Unpack[GetOntologyKwargs],
) -> list[str] | None:
"""Get the synonyms for an entity."""
return _help_get(get_id_synonyms_mapping, prefix=prefix, identifier=identifier, **kwargs)
reference = _get_pi(prefix, identifier)
return _help_get(get_id_synonyms_mapping, reference, **kwargs)


@wrap_norm_prefix
Expand Down
23 changes: 15 additions & 8 deletions src/pyobo/api/species.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
from collections.abc import Mapping
from functools import lru_cache

import curies
from typing_extensions import Unpack

from .alts import get_primary_identifier
from .utils import get_version_from_kwargs
from .utils import _get_pi, get_version_from_kwargs
from ..constants import GetOntologyKwargs, check_should_force
from ..getters import NoBuildError, get_ontology
from ..identifier_utils import wrap_norm_prefix
Expand All @@ -22,23 +23,29 @@
logger = logging.getLogger(__name__)


@wrap_norm_prefix
def get_species(prefix: str, identifier: str, **kwargs: Unpack[GetOntologyKwargs]) -> str | None:
def get_species(
prefix: str | curies.Reference | curies.ReferenceTuple,
identifier: str | None = None,
/,
**kwargs: Unpack[GetOntologyKwargs],
) -> str | None:
"""Get the species."""
if prefix == "uniprot":
t = _get_pi(prefix, identifier)

if t.prefix == "uniprot":
raise NotImplementedError

try:
id_species = get_id_species_mapping(prefix, **kwargs)
id_species = get_id_species_mapping(t.prefix, **kwargs)
except NoBuildError:
logger.warning("unable to look up species for prefix %s", prefix)
logger.warning("unable to look up species for prefix %s", t.prefix)
return None

if not id_species:
logger.warning("no results produced for prefix %s", prefix)
logger.warning("no results produced for prefix %s", t.prefix)
return None

primary_id = get_primary_identifier(prefix, identifier, **kwargs)
primary_id = get_primary_identifier(t, **kwargs)
return id_species.get(primary_id)


Expand Down
2 changes: 2 additions & 0 deletions src/pyobo/api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ def _get_pi(
if identifier is not None:
raise ValueError("unexpected non-none value passed as second positional argument")
return prefix.pair
if isinstance(prefix, str) and identifier is None:
return ReferenceTuple.from_curie(prefix)
if identifier is None:
raise ValueError(
"prefix was given as a string, so an identifier was expected to be passed as a string as well"
Expand Down
4 changes: 3 additions & 1 deletion src/pyobo/struct/struct_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,9 +675,11 @@ def _ensure_ref(
return Reference(prefix=reference[0], identifier=reference[1])
if isinstance(reference, Reference):
return reference
if isinstance(reference, curies.Reference):
return Reference(prefix=reference.prefix, identifier=reference.identifier)
if ":" not in reference:
if not ontology_prefix:
raise ValueError(f"can't parse reference: {reference}")
raise ValueError(f"can't parse reference of type {type(reference)}: {reference}")
return default_reference(ontology_prefix, reference)
_rv = Reference.from_curie_or_uri(reference, strict=True, ontology_prefix=ontology_prefix)
if _rv is None:
Expand Down
10 changes: 5 additions & 5 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def test_get_primary(self, _, __):
primary_id = get_primary_identifier("go", "0001071")
self.assertIsNotNone(primary_id)
self.assertEqual("0003700", primary_id)
name = get_name("go", "0001071")
name = get_name(ReferenceTuple("go", "0001071"))
self.assertIsNotNone(name)
self.assertEqual("DNA-binding transcription factor activity", name)

Expand All @@ -96,10 +96,10 @@ def test_get_primary_by_curie(self, _, __):
@mock_id_names_mapping
def test_already_primary(self, _, __):
"""Test when you give a primary id."""
primary_id = get_primary_identifier("go", "0003700")
primary_id = get_primary_identifier(ReferenceTuple("go", "0003700"))
self.assertIsNotNone(primary_id)
self.assertEqual("0003700", primary_id)
name = get_name("go", "0003700")
name = get_name(ReferenceTuple("go", "0003700"))
self.assertIsNotNone(name)
self.assertEqual("DNA-binding transcription factor activity", name)

Expand Down Expand Up @@ -134,9 +134,9 @@ def test_already_primary_by_curie(self, _, __):
@mock_id_names_mapping
def test_no_alts(self, _, __):
"""Test alternate behavior for nomenclature source with no alts."""
primary_id = get_primary_identifier("ncbitaxon", "52818")
primary_id = get_primary_identifier(ReferenceTuple("ncbitaxon", "52818"))
self.assertEqual("52818", primary_id)
self.assertEqual("Allamanda cathartica", get_name("ncbitaxon", "52818"))
self.assertEqual("Allamanda cathartica", get_name(ReferenceTuple("ncbitaxon", "52818")))

def test_api(self) -> None:
"""Test getting the hierarchy."""
Expand Down

0 comments on commit 7119f9e

Please sign in to comment.