diff --git a/nmdc_runtime/minter/config.py b/nmdc_runtime/minter/config.py index b1a5ac0e..6957a944 100644 --- a/nmdc_runtime/minter/config.py +++ b/nmdc_runtime/minter/config.py @@ -2,8 +2,9 @@ from functools import lru_cache from typing import List -from nmdc_runtime.util import get_nmdc_jsonschema_dict +from nmdc_schema.id_helpers import get_typecode_for_future_ids +from nmdc_runtime.util import get_nmdc_jsonschema_dict from nmdc_runtime.api.db.mongo import get_mongo_db @@ -12,55 +13,24 @@ def minting_service_id() -> str | None: return os.getenv("MINTING_SERVICE_ID") -def extract_typecode_from_pattern(pattern: str) -> str: - r""" - Returns the typecode portion of the specified string. - - >>> extract_typecode_from_pattern("foo-123-456$") # original behavior - 'foo' - >>> extract_typecode_from_pattern("(foo)-123-456$") # returns first and only typecode - 'foo' - >>> extract_typecode_from_pattern("(foo|bar)-123-456$") # returns first of 2 typecodes - 'foo' - >>> extract_typecode_from_pattern("(foo|bar|baz)-123-456$") # returns first of > 2 typecodes - 'foo' - """ - - # Get the portion of the pattern preceding the first hyphen. - # e.g. "foo-bar-baz" → ["foo", "bar-baz"] → "foo" - typecode_sub_pattern = pattern.split("-", maxsplit=1)[0] - - # If that portion of the pattern is enclosed in parentheses, get the portion between the parentheses. - # e.g. "(apple|banana|carrot)" → "apple|banana|carrot" - if typecode_sub_pattern.startswith("(") and typecode_sub_pattern.endswith(")"): - inner_pattern = typecode_sub_pattern[1:-1] - - # Finally, get everything before the first `|`, if any. - # e.g. "apple|banana|carrot" → "apple" - # e.g. "apple" → "apple" - typecode = inner_pattern.split("|", maxsplit=1)[0] - else: - # Note: This is the original behavior, before we added support for multi-typecode patterns. - # e.g. "apple" → "apple" - typecode = typecode_sub_pattern - - return typecode - - @lru_cache() def typecodes() -> List[dict]: r""" Returns a list of dictionaries containing typecodes and associated information derived from the schema. - Preconditions about the schema: - - The typecode portion of the pattern is between the pattern prefix and the first subsequent hyphen. - - The typecode portion of the pattern either consists of a single typecode verbatim (e.g. "foo"); - or consists of multiple typecodes in a pipe-delimited list enclosed in parentheses (e.g. "(foo|bar|baz)"). - - The typecode portion of the pattern does not, itself, contain any hyphens. - - TODO: Get the typecodes in a different way than by extracting them from a larger string, which seems brittle to me. - Getting them a different way may require schema authors to _define_ them a different way (e.g. defining them - in a dedicated property of a class; for example, one named `typecode`). + Note: In this function, we rely on a helper function provided by the `nmdc-schema` package to extract—from a given + class's `id` slot's pattern—the typecode that the minter would use when generating an ID for an instance of + that class _today_; regardless of what it may have used in the past. + + >>> typecode_descriptors = typecodes() + # Test #1: We get the typecode we expect, for a class whose pattern contains only one typecode. + >>> any((td["name"] == "sty" and td["schema_class"] == "nmdc:Study") for td in typecode_descriptors) + True + # Tests #2 and #3: We get only the typecode we expect, for a class whose pattern contains multiple typecodes. + >>> any((td["name"] == "dgms" and td["schema_class"] == "nmdc:MassSpectrometry") for td in typecode_descriptors) + True + >>> any((td["name"] == "omprc" and td["schema_class"] == "nmdc:MassSpectrometry") for td in typecode_descriptors) + False """ id_pattern_prefix = r"^(nmdc):" @@ -69,16 +39,14 @@ def typecodes() -> List[dict]: for cls_name, defn in schema_dict["$defs"].items(): match defn.get("properties"): case {"id": {"pattern": p}} if p.startswith(id_pattern_prefix): - # Get the portion of the pattern following the prefix. - # e.g. "^(nmdc):foo-bar-baz" → "foo-bar-baz" - index_of_first_character_following_prefix = len(id_pattern_prefix) - pattern_without_prefix = p[index_of_first_character_following_prefix:] + # Extract the typecode from the pattern. + typecode_for_future_ids = get_typecode_for_future_ids(slot_pattern=p) rv.append( { "id": "nmdc:" + cls_name + "_" + "typecode", "schema_class": "nmdc:" + cls_name, - "name": extract_typecode_from_pattern(pattern_without_prefix), + "name": typecode_for_future_ids, } ) case _: