Skip to content

Commit

Permalink
Merge branch 'main' into annotator-side-effect
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson authored Feb 14, 2025
2 parents 80f349c + c99eaad commit 33ca72f
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 73 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
id: get_branch_name
run: |
raw=$(git branch -r --contains ${{ github.ref }})
branch=$(echo "$raw" | grep "origin/main" | sed "s|origin/||" | xargs)
branch=$(echo "$raw" | grep "origin/main" | grep -v "HEAD" | sed "s|origin/||" | xargs)
echo "name=$branch" >> "$GITHUB_OUTPUT"
build:
name: Build distribution
Expand Down
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[submodule "submodules/vrs"]
path = submodules/vrs
url = https://github.com/ga4gh/vrs.git
branch = 2.0.0-ballot.2024-11
branch = 2.0.0-snapshot.2025-02
37 changes: 24 additions & 13 deletions src/ga4gh/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@
from ga4gh.core.identifiers import GA4GH_IR_REGEXP


class BaseModelForbidExtra(BaseModel):
"""Base Pydantic model class with extra attributes forbidden."""

model_config = ConfigDict(extra="forbid")


class Relation(str, Enum):
"""A mapping relation between concepts as defined by the Simple Knowledge
Organization System (SKOS).
Expand Down Expand Up @@ -95,11 +101,11 @@ class Entity(BaseModel, ABC):
...,
description="The name of the class that is instantiated by a data object representing the Entity.",
)
label: Optional[str] = Field(None, description="A primary name for the entity.")
name: Optional[str] = Field(None, description="A primary name for the entity.")
description: Optional[str] = Field(
None, description="A free-text description of the Entity."
)
alternativeLabels: Optional[list[str]] = Field( # noqa: N815
aliases: Optional[list[str]] = Field(
None, description="Alternative name(s) for the Entity."
)
extensions: Optional[list[Extension]] = Field(
Expand Down Expand Up @@ -129,12 +135,12 @@ class Element(BaseModel, ABC):
#########################################


class Coding(Element):
class Coding(Element, BaseModelForbidExtra):
"""A structured representation of a code for a defined concept in a terminology or
code system.
"""

label: Optional[str] = Field(
name: Optional[str] = Field(
None,
description="The human-readable name for the coded concept, as defined by the code system.",
)
Expand All @@ -149,7 +155,7 @@ class Coding(Element):
code: code # Cannot use Field due to PydanticUserError: field name and type annotation must not clash.


class ConceptMapping(Element):
class ConceptMapping(Element, BaseModelForbidExtra):
"""A mapping to a concept in a terminology or code system."""

model_config = ConfigDict(use_enum_values=True)
Expand All @@ -164,7 +170,7 @@ class ConceptMapping(Element):
)


class Extension(Element):
class Extension(Element, BaseModelForbidExtra):
"""The Extension class provides entities with a means to include additional
attributes that are outside of the specified standard but needed by a given content
provider or system implementer. These extensions are not expected to be natively
Expand All @@ -186,14 +192,14 @@ class Extension(Element):
)


class MappableConcept(Element):
"""A concept label that may be mapped to one or more `Codings`."""
class MappableConcept(Element, BaseModelForbidExtra):
"""A concept name that may be mapped to one or more `Codings`."""

conceptType: Optional[str] = Field( # noqa: N815
None,
description="A term indicating the type of concept being represented by the MappableConcept.",
)
label: Optional[str] = Field(None, description="A primary name for the concept.")
name: Optional[str] = Field(None, description="A primary name for the concept.")
primaryCode: Optional[code] = Field( # noqa: N815
None,
description="A primary code for the concept that is used to identify the concept in a terminology or code system. If there is a public code system for the primaryCode then it should also be specified in the mappings array with a relation of 'exactMatch'. This attribute is provided to both allow a more technical code to be used when a public Coding with a system is not available as well as when it is available but should be identified as the primary code.",
Expand All @@ -203,11 +209,16 @@ class MappableConcept(Element):
description="A list of mappings to concepts in terminologies or code systems. Each mapping should include a coding and a relation.",
)

class ga4gh: # noqa: N801
"""Contain properties used for computing digests"""

inherent: tuple[str] = ("primaryCode",)

@model_validator(mode="after")
def require_label_or_primary_code(cls, v): # noqa: ANN001 N805 ANN201
"""Ensure that ``label`` or ``primaryCode`` is provided"""
if v.primaryCode is None and v.label is None:
err_msg = "`One of label` or `primaryCode` must be provided."
def require_name_or_primary_code(cls, v): # noqa: ANN001 N805 ANN201
"""Ensure that ``name`` or ``primaryCode`` is provided"""
if v.primaryCode is None and v.name is None:
err_msg = "`One of name` or `primaryCode` must be provided."
raise ValueError(err_msg)
return v

Expand Down
87 changes: 47 additions & 40 deletions src/ga4gh/vrs/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,13 @@
PrevVrsVersion,
sha512t24u,
)
from ga4gh.core.models import Element, Entity, MappableConcept, iriReference
from ga4gh.core.models import (
BaseModelForbidExtra,
Element,
Entity,
MappableConcept,
iriReference,
)
from ga4gh.core.pydantic import get_pydantic_root, getattr_in


Expand Down Expand Up @@ -137,12 +143,12 @@ def pydantic_class_refatt_map():
class_reffable_fields.append(fieldname)
if len(class_reffable_fields) > 0:
reffable_fields[model_class.__name__] = class_reffable_fields
class_keys = {}
class_inherent = {}
for model_class in model_classes:
keys = getattr_in(model_class, ["ga4gh", "keys"])
if keys and len(keys) > 0:
class_keys[model_class.__name__] = keys
return (reffable_classes, union_reffable_classes, reffable_fields, class_keys)
inherent = getattr_in(model_class, ["ga4gh", "inherent"])
if inherent and len(inherent) > 0:
class_inherent[model_class.__name__] = inherent
return (reffable_classes, union_reffable_classes, reffable_fields, class_inherent)


class VrsType(str, Enum):
Expand Down Expand Up @@ -242,13 +248,13 @@ def __hash__(self):

def ga4gh_serialize(self) -> Dict:
out = OrderedDict()
for k in self.ga4gh.keys:
for k in self.ga4gh.inherent:
v = getattr(self, k)
out[k] = _recurse_ga4gh_serialize(v)
return out

class ga4gh: # noqa: N801
keys: List[str]
inherent: List[str]

@staticmethod
def is_ga4gh_identifiable() -> bool:
Expand Down Expand Up @@ -369,7 +375,7 @@ class ga4gh(_ValueObject.ga4gh): # noqa: N801
prefix: str


class Expression(Element):
class Expression(Element, BaseModelForbidExtra):
"""Representation of a variation by a specified nomenclature or syntax for a
Variation object. Common examples of expressions for the description of molecular
variation include the HGVS and ISCN nomenclatures.
Expand Down Expand Up @@ -465,7 +471,7 @@ class sequenceString(RootModel):
#########################################


class LengthExpression(_ValueObject):
class LengthExpression(_ValueObject, BaseModelForbidExtra):
"""A sequence expressed only by its length."""

type: Literal["LengthExpression"] = Field(
Expand All @@ -476,10 +482,10 @@ class LengthExpression(_ValueObject):
)

class ga4gh(_ValueObject.ga4gh):
keys = ["length", "type"]
inherent = ["length", "type"]


class ReferenceLengthExpression(_ValueObject):
class ReferenceLengthExpression(_ValueObject, BaseModelForbidExtra):
"""An expression of a length of a sequence from a repeating reference."""

type: Literal["ReferenceLengthExpression"] = Field(
Expand All @@ -498,10 +504,10 @@ class ReferenceLengthExpression(_ValueObject):
)

class ga4gh(_ValueObject.ga4gh):
keys = ["length", "repeatSubunitLength", "type"]
inherent = ["length", "repeatSubunitLength", "type"]


class LiteralSequenceExpression(_ValueObject):
class LiteralSequenceExpression(_ValueObject, BaseModelForbidExtra):
"""An explicit expression of a Sequence."""

type: Literal["LiteralSequenceExpression"] = Field(
Expand All @@ -511,15 +517,15 @@ class LiteralSequenceExpression(_ValueObject):
sequence: sequenceString = Field(..., description="the literal sequence")

class ga4gh(_ValueObject.ga4gh):
keys = ["sequence", "type"]
inherent = ["sequence", "type"]


#########################################
# vrs location
#########################################


class SequenceReference(_ValueObject):
class SequenceReference(_ValueObject, BaseModelForbidExtra):
"""A sequence of nucleic or amino acid character codes."""

model_config = ConfigDict(use_enum_values=True)
Expand Down Expand Up @@ -551,25 +557,26 @@ class SequenceReference(_ValueObject):
)

class ga4gh(_ValueObject.ga4gh):
keys = ["refgetAccession", "type"]
inherent = ["refgetAccession", "type"]


class SequenceLocation(Ga4ghIdentifiableObject):
"""A `Location` defined by an interval on a referenced `Sequence`."""
class SequenceLocation(Ga4ghIdentifiableObject, BaseModelForbidExtra):
"""A `Location` defined by an interval on a `Sequence`."""

type: Literal["SequenceLocation"] = Field(
VrsType.SEQ_LOC.value, description=f'MUST be "{VrsType.SEQ_LOC.value}"'
)
sequenceReference: Optional[Union[iriReference, SequenceReference]] = Field(
None, description="A reference to a Sequence on which the location is defined."
None,
description="A reference to a SequenceReference on which the location is defined.",
)
start: Optional[Union[Range, int]] = Field(
None,
description="The start coordinate or range of the SequenceLocation. The minimum value of this coordinate or range is 0. For locations on linear sequences, this MUST represent a coordinate or range less than or equal to the value of `end`. For circular sequences, `start` is greater than `end` when the location spans the sequence 0 coordinate.",
description="The start coordinate or range of the SequenceLocation. The minimum value of this coordinate or range is 0. For locations on linear sequences, this MUST represent a coordinate or range less than or equal to the value of `end`. For circular sequences, `start` is greater than `end` when the location spans the sequence 0 coordinate.",
)
end: Optional[Union[Range, int]] = Field(
None,
description="The end coordinate or range of the SequenceLocation. The minimum value of this coordinate or range is 0. For locations on linear sequences, this MUST represent a coordinate or range grater than or equal to the value of `start`. For circular sequences, `end` is less than `start` when the location spans the sequence 0 coordinate.",
description="The end coordinate or range of the SequenceLocation. The minimum value of this coordinate or range is 0. For locations on linear sequences, this MUST represent a coordinate or range greater than or equal to the value of `start`. For circular sequences, `end` is less than `start` when the location spans the sequence 0 coordinate.",
)
sequence: Optional[sequenceString] = Field(
None,
Expand Down Expand Up @@ -640,7 +647,7 @@ def get_refget_accession(self):
class ga4gh(Ga4ghIdentifiableObject.ga4gh): # noqa: N801
prefix = "SL"
priorPrefix = {PrevVrsVersion.V1_3.value: "VSL"} # noqa: N815
keys = ["end", "sequenceReference", "start", "type"]
inherent = ["end", "sequenceReference", "start", "type"]


#########################################
Expand All @@ -659,7 +666,7 @@ class _VariationBase(Ga4ghIdentifiableObject, ABC):
#########################################


class Allele(_VariationBase):
class Allele(_VariationBase, BaseModelForbidExtra):
"""The state of a molecule at a `Location`."""

type: Literal["Allele"] = Field(
Expand Down Expand Up @@ -700,10 +707,10 @@ def ga4gh_serialize_as_version(self, as_version: PrevVrsVersion):
class ga4gh(Ga4ghIdentifiableObject.ga4gh): # noqa: N801
prefix = "VA"
priorPrefix = {PrevVrsVersion.V1_3.value: "VA"} # noqa: N815
keys = ["location", "state", "type"]
inherent = ["location", "state", "type"]


class CisPhasedBlock(_VariationBase):
class CisPhasedBlock(_VariationBase, BaseModelForbidExtra):
"""An ordered set of co-occurring `Variation` on the same molecule."""

type: Literal["CisPhasedBlock"] = Field(
Expand All @@ -727,15 +734,15 @@ def ga4gh_serialize(self) -> Dict:

class ga4gh(Ga4ghIdentifiableObject.ga4gh):
prefix = "CPB"
keys = ["members", "type"]
inherent = ["members", "type"]


#########################################
# vrs structural variation (under active discussion)
#########################################


class Adjacency(_VariationBase):
class Adjacency(_VariationBase, BaseModelForbidExtra):
"""The `Adjacency` class represents the adjoining of the end of a sequence with the
beginning of an adjacent sequence, potentially with an intervening linker sequence.
"""
Expand Down Expand Up @@ -775,10 +782,10 @@ def validate_adjoined_sequences(

class ga4gh(Ga4ghIdentifiableObject.ga4gh):
prefix = "AJ"
keys = ["adjoinedSequences", "linker", "type"]
inherent = ["adjoinedSequences", "linker", "type"]


class Terminus(_VariationBase):
class Terminus(_VariationBase, BaseModelForbidExtra):
"""The `Terminus` data class provides a structure for describing the end
(terminus) of a sequence. Structurally similar to Adjacency but the linker sequence
is not allowed and it removes the unnecessary array structure.
Expand All @@ -793,10 +800,10 @@ class Terminus(_VariationBase):

class ga4gh(Ga4ghIdentifiableObject.ga4gh): # noqa: N815
prefix = "TM"
keys = ["location", "type"]
inherent = ["location", "type"]


class TraversalBlock(_ValueObject):
class TraversalBlock(_ValueObject, BaseModelForbidExtra):
"""A component used to describe the orientation of applicable molecular variation
within a DerivativeMolecule.
"""
Expand All @@ -816,10 +823,10 @@ class TraversalBlock(_ValueObject):
)

class ga4gh(_ValueObject.ga4gh):
keys = ["component", "orientation", "type"]
inherent = ["component", "orientation", "type"]


class DerivativeMolecule(_VariationBase):
class DerivativeMolecule(_VariationBase, BaseModelForbidExtra):
"""The "Derivative Molecule" data class is a structure for describing a derivate
molecule composed from multiple sequence components.
"""
Expand All @@ -842,15 +849,15 @@ class DerivativeMolecule(_VariationBase):

class ga4gh(Ga4ghIdentifiableObject.ga4gh): # noqa: N815
prefix = "DM"
keys = ["components", "type"]
inherent = ["components", "type"]


#########################################
# vrs systemic variation
#########################################


class CopyNumberCount(_VariationBase):
class CopyNumberCount(_VariationBase, BaseModelForbidExtra):
"""The absolute count of discrete copies of a `Location`, within a system
(e.g. genome, cell, etc.).
"""
Expand All @@ -868,10 +875,10 @@ class CopyNumberCount(_VariationBase):

class ga4gh(Ga4ghIdentifiableObject.ga4gh): # noqa: N815
prefix = "CN"
keys = ["copies", "location", "type"]
inherent = ["copies", "location", "type"]


class CopyNumberChange(_VariationBase):
class CopyNumberChange(_VariationBase, BaseModelForbidExtra):
"""An assessment of the copy number of a `Location` within a system
(e.g. genome, cell, etc.) relative to a baseline ploidy.
"""
Expand Down Expand Up @@ -912,7 +919,7 @@ def validate_copy_change(cls, v) -> MappableConcept:

class ga4gh(Ga4ghIdentifiableObject.ga4gh):
prefix = "CX"
keys = ["copyChange", "location", "type"]
inherent = ["copyChange", "location", "type"]


#########################################
Expand Down Expand Up @@ -993,6 +1000,6 @@ class SystemicVariation(RootModel):


# At end so classes exist
(reffable_classes, union_reffable_classes, class_refatt_map, class_keys) = (
(reffable_classes, union_reffable_classes, class_refatt_map, class_inherent) = (
pydantic_class_refatt_map()
)
2 changes: 1 addition & 1 deletion submodules/vrs
Submodule vrs updated 97 files
+24 −0 .github/workflows/cqa.yaml
+1 −0 .gitmodules
+2 −0 .pre-commit-config.yaml
+1 −1 .readthedocs.yaml
+8 −8 CONTRIBUTING.md
+1 −1 CONTRIBUTORS.md
+1 −1 README.md
+2 −2 TODO
+15 −16 docs/source/appendices/design_decisions.rst
+3 −3 docs/source/appendices/ga4gh_identifiers.rst
+77 −77 docs/source/appendices/maturity_model.rst
+4 −4 docs/source/appendices/resource_identifiers.rst
+0 −6 docs/source/concepts/AdditionalDataTypes/Element.rst
+0 −6 docs/source/concepts/AdditionalDataTypes/Entity.rst
+0 −6 docs/source/concepts/AdditionalDataTypes/Extension.rst
+71 −11 docs/source/concepts/AdditionalDataTypes/index.rst
+0 −6 docs/source/concepts/AdditionalDataTypes/iriReference.rst
+5 −5 docs/source/concepts/LocationAndReference/SequenceLocation.rst
+1 −1 docs/source/concepts/LocationAndReference/SequenceReference.rst
+12 −5 docs/source/concepts/LocationAndReference/index.rst
+1 −1 docs/source/concepts/MolecularVariation/Allele.rst
+1 −1 docs/source/concepts/MolecularVariation/CisPhasedBlock.rst
+1 −1 docs/source/concepts/MolecularVariation/DerivativeMolecule.rst
+1 −1 docs/source/concepts/MolecularVariation/Terminus.rst
+4 −1 docs/source/concepts/SequenceExpression/LengthExpression.rst
+1 −1 docs/source/concepts/SequenceExpression/LiteralSequenceExpression.rst
+1 −1 docs/source/concepts/SequenceExpression/ReferenceLengthExpression.rst
+0 −72 docs/source/concepts/SystemicVariation/CopyNumber.rst
+31 −0 docs/source/concepts/SystemicVariation/CopyNumberChange.rst
+33 −0 docs/source/concepts/SystemicVariation/CopyNumberCount.rst
+5 −5 docs/source/concepts/SystemicVariation/index.rst
+0 −1 docs/source/concepts/index.rst
+7 −9 docs/source/conventions/computed_identifiers.rst
+1 −1 docs/source/conventions/example.rst
+0 −1 docs/source/conventions/index.rst
+35 −35 docs/source/images/GA-logo.svg
+0 −3 docs/source/introduction.rst
+1 −1 docs/source/quickstart.rst
+2 −2 docs/source/releases/index.rst
+1 −1 docs/source/requirements.txt
+0 −3 docs/source/rst_epilog
+1 −1 examples/SPDI_expansion.yaml
+1 −1 examples/ambiguous_linker.yaml
+1 −1 examples/invalid_adjacency.yaml
+1 −1 examples/precise_linker.yaml
+1 −1 examples/revcomp_breakpoint.yaml
+1 −1 examples/sequence_homology.yaml
+1 −1 examples/simple_breakpoint.yaml
+3 −8 examples/sv_derivative_molecule.yaml
+1 −1 examples/terminal_breakend.yaml
+1 −1 schema/Makefile
+14 −14 schema/vrs/def/Adjacency.rst
+13 −13 schema/vrs/def/Allele.rst
+13 −13 schema/vrs/def/CisPhasedBlock.rst
+13 −13 schema/vrs/def/CopyNumberChange.rst
+13 −13 schema/vrs/def/CopyNumberCount.rst
+13 −13 schema/vrs/def/DerivativeMolecule.rst
+5 −5 schema/vrs/def/Expression.rst
+10 −10 schema/vrs/def/Ga4ghIdentifiableObject.rst
+9 −9 schema/vrs/def/LengthExpression.rst
+9 −9 schema/vrs/def/LiteralSequenceExpression.rst
+11 −11 schema/vrs/def/ReferenceLengthExpression.rst
+8 −8 schema/vrs/def/SequenceExpression.rst
+17 −17 schema/vrs/def/SequenceLocation.rst
+13 −13 schema/vrs/def/SequenceReference.rst
+12 −12 schema/vrs/def/Terminus.rst
+10 −10 schema/vrs/def/TraversalBlock.rst
+12 −12 schema/vrs/def/Variation.rst
+11 −11 schema/vrs/json/Adjacency
+11 −11 schema/vrs/json/Allele
+9 −9 schema/vrs/json/CisPhasedBlock
+9 −9 schema/vrs/json/CopyNumberChange
+9 −9 schema/vrs/json/CopyNumberCount
+11 −11 schema/vrs/json/DerivativeMolecule
+3 −3 schema/vrs/json/Expression
+7 −7 schema/vrs/json/LengthExpression
+7 −7 schema/vrs/json/LiteralSequenceExpression
+7 −3 schema/vrs/json/Location
+7 −7 schema/vrs/json/MolecularVariation
+2 −2 schema/vrs/json/Range
+8 −8 schema/vrs/json/ReferenceLengthExpression
+5 −5 schema/vrs/json/SequenceExpression
+14 −14 schema/vrs/json/SequenceLocation
+7 −7 schema/vrs/json/SequenceReference
+4 −4 schema/vrs/json/SystemicVariation
+8 −8 schema/vrs/json/Terminus
+7 −7 schema/vrs/json/TraversalBlock
+9 −9 schema/vrs/json/Variation
+2 −2 schema/vrs/json/residue
+2 −2 schema/vrs/json/sequenceString
+1 −1 schema/vrs/prune.mk
+10 −9 schema/vrs/vrs-source.yaml
+1 −1 submodules/gks-core
+1 −1 tests/config.py
+1 −1 tests/test_definitions.yaml
+1 −1 tests/test_examples.py
+1 −1 validation/models.yaml
Loading

0 comments on commit 33ca72f

Please sign in to comment.