Skip to content

Commit

Permalink
Merge pull request #261 from microbiomedata/issue-249
Browse files Browse the repository at this point in the history
Issue 249: require part of slot for biosamples
  • Loading branch information
wdduncan authored Feb 8, 2022
2 parents cb46055 + 9367b84 commit 46e62c3
Show file tree
Hide file tree
Showing 11 changed files with 90 additions and 17 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Changes to the schema are documented in this file.
### Fixed
- N/A
### Changed
- N/A
- made `part of` slot required for `biosample` (#249)
### Removed
- N/A

Expand Down
2 changes: 1 addition & 1 deletion jsonld-context/nmdc.context.jsonld
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"_comments": "Auto generated from nmdc.yaml by jsonldcontextgen.py version: 0.1.1\n Generation date: 2022-01-26T15:10:03\n Schema: NMDC\n \n id: https://microbiomedata/schema\n description: Schema for National Microbiome Data Collaborative (NMDC).\n \nThis schema is organized into distinct modules:\n \n * a set of core types for representing data values\n * the mixs schema (auto-translated from mixs excel)\n * annotation schema\n * the NMDC schema itself\n license: https://creativecommons.org/publicdomain/zero/1.0/\n ",
"_comments": "Auto generated from nmdc.yaml by jsonldcontextgen.py version: 0.1.1\n Generation date: 2022-02-08T14:39:14\n Schema: NMDC\n \n id: https://microbiomedata/schema\n description: Schema for National Microbiome Data Collaborative (NMDC).\n \nThis schema is organized into distinct modules:\n \n * a set of core types for representing data values\n * the mixs schema (auto-translated from mixs excel)\n * annotation schema\n * the NMDC schema itself\n license: https://creativecommons.org/publicdomain/zero/1.0/\n ",
"@context": {
"CAS": "http://identifiers.org/cas/",
"CATH": "http://identifiers.org/cath/",
Expand Down
1 change: 1 addition & 0 deletions jsonschema/nmdc.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,7 @@
},
"required": [
"id",
"part_of",
"env_broad_scale",
"env_local_scale",
"env_medium"
Expand Down
19 changes: 12 additions & 7 deletions python/nmdc.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Auto generated from nmdc.yaml by pythongen.py version: 0.9.0
# Generation date: 2022-01-26T15:10:35
# Generation date: 2022-02-08T14:39:50
# Schema: NMDC
#
# id: https://microbiomedata/schema
Expand Down Expand Up @@ -482,12 +482,12 @@ class Biosample(NamedThing):
class_model_uri: ClassVar[URIRef] = NMDC.Biosample

id: Union[str, BiosampleId] = None
part_of: Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]] = None
env_broad_scale: Union[dict, "ControlledTermValue"] = None
env_local_scale: Union[dict, "ControlledTermValue"] = None
env_medium: Union[dict, "ControlledTermValue"] = None
type: Optional[str] = None
alternative_identifiers: Optional[Union[str, List[str]]] = empty_list()
part_of: Optional[Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]]] = empty_list()
agrochem_addition: Optional[Union[dict, "QuantityValue"]] = None
alkalinity: Optional[Union[dict, "QuantityValue"]] = None
alkalinity_method: Optional[Union[dict, "TextValue"]] = None
Expand Down Expand Up @@ -633,6 +633,12 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
if not isinstance(self.id, BiosampleId):
self.id = BiosampleId(self.id)

if self._is_empty(self.part_of):
self.MissingRequiredField("part_of")
if not isinstance(self.part_of, list):
self.part_of = [self.part_of] if self.part_of is not None else []
self.part_of = [v if isinstance(v, NamedThingId) else NamedThingId(v) for v in self.part_of]

if self._is_empty(self.env_broad_scale):
self.MissingRequiredField("env_broad_scale")
if not isinstance(self.env_broad_scale, ControlledTermValue):
Expand All @@ -655,10 +661,6 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
self.alternative_identifiers = [self.alternative_identifiers] if self.alternative_identifiers is not None else []
self.alternative_identifiers = [v if isinstance(v, str) else str(v) for v in self.alternative_identifiers]

if not isinstance(self.part_of, list):
self.part_of = [self.part_of] if self.part_of is not None else []
self.part_of = [v if isinstance(v, NamedThingId) else NamedThingId(v) for v in self.part_of]

if self.agrochem_addition is not None and not isinstance(self.agrochem_addition, QuantityValue):
self.agrochem_addition = QuantityValue(**as_dict(self.agrochem_addition))

Expand Down Expand Up @@ -3026,7 +3028,7 @@ class FileTypeEnum(EnumDefinitionImpl):
def _addvals(cls):
setattr(cls, "FT ICR-MS Analysis Results",
PermissibleValue(text="FT ICR-MS Analysis Results",
description="FT ICR-MS-based metabolite assignment results table") )
description="FT ICR-MS-based molecular formula assignment results table") )
setattr(cls, "GC-MS Metabolomics Results",
PermissibleValue(text="GC-MS Metabolomics Results",
description="GC-MS-based metabolite assignment results table") )
Expand Down Expand Up @@ -6069,6 +6071,9 @@ class slots:
model_uri=NMDC.biosample_env_medium, domain=Biosample, range=Union[dict, "ControlledTermValue"], mappings = [MIXS.env_medium],
pattern=re.compile(r'.* \S+:\S+'))

slots.biosample_part_of = Slot(uri=DCTERMS.isPartOf, name="biosample_part of", curie=DCTERMS.curie('isPartOf'),
model_uri=NMDC.biosample_part_of, domain=Biosample, range=Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]])

slots.study_doi = Slot(uri=NMDC.doi, name="study_doi", curie=NMDC.curie('doi'),
model_uri=NMDC.study_doi, domain=Study, range=Optional[Union[dict, "AttributeValue"]])

Expand Down
2 changes: 2 additions & 0 deletions src/schema/nmdc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,8 @@ classes:
required: true
env_medium:
required: true
part of:
required: true
id_prefixes:
- GOLD
exact_mappings:
Expand Down
9 changes: 9 additions & 0 deletions test/data/biosample_test.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
"name": "Lithgow State Coal Mine Calcium nutrients (early)",
"description": "Bulk Aqueous phase filtered water",
"type": "nmdc:Biosample",
"part_of": [
"gold:Gs0110115"
],
"env_broad_scale": {
"has_raw_value": "ENVO:00002030"
},
Expand Down Expand Up @@ -40,6 +43,9 @@
"name": "Lithgow State Coal Mine Calcium nutrients Extra",
"description": "Bulk Aqueous phase filtered water",
"type": "nmdc:Biosample",
"part_of": [
"gold:Gs0110115"
],
"env_broad_scale": {
"has_raw_value": "ENVO:00002030"
},
Expand Down Expand Up @@ -75,6 +81,9 @@
"name": "Lithgow State Coal Mine Calcium nutrients",
"description": "Bulk Aqueous phase filtered water",
"type": "nmdc:Biosample",
"part_of": [
"gold:Gs0110115"
],
"env_broad_scale": {
"has_raw_value": "ENVO:00002030"
},
Expand Down
11 changes: 10 additions & 1 deletion test/data/invalid_schemas/biosample_invalid_range.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
"name": "Lithgow State Coal Mine Calcium nutrients (early)",
"description": "Bulk Aqueous phase filtered water",
"type": "nmdc:Biosample",
"part_of": [
"gold:Gs0128849"
],
"env_broad_scale": {
"has_raw_value": "ENVO:00002030"
},
Expand All @@ -22,7 +25,7 @@
"latitude": "-33.460524",
"longitude": "150.168149"
},
"ecosystem": "Environmental",
"ecosystem": "Environmental",
"ecosystem_category": "Aquatic",
"ecosystem_type": "Freshwater",
"ecosystem_subtype": "Groundwater",
Expand All @@ -40,6 +43,9 @@
"name": "Lithgow State Coal Mine Calcium nutrients Extra",
"description": "Bulk Aqueous phase filtered water",
"type": "nmdc:Biosample",
"part_of": [
"gold:Gs0128849"
],
"env_broad_scale": {
"has_raw_value": "ENVO:00002030"
},
Expand Down Expand Up @@ -75,6 +81,9 @@
"name": "Lithgow State Coal Mine Calcium nutrients",
"description": "Bulk Aqueous phase filtered water",
"type": "nmdc:Biosample",
"part_of": [
"gold:Gs0128849"
],
"env_broad_scale": {
"has_raw_value": "ENVO:00002030"
},
Expand Down
20 changes: 17 additions & 3 deletions test/data/invalid_schemas/biosample_mismatch_regex.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
"name": "Lithgow State Coal Mine Calcium nutrients (early)",
"description": "Bulk Aqueous phase filtered water",
"type": "nmdc:Biosample",
"part_of": [
"gold:Gs0128849"
],
"env_broad_scale": {
"has_raw_value": "ENVO:00002030"
},
Expand All @@ -23,7 +26,7 @@
"latitude": -33.460524,
"longitude": 150.168149
},
"ecosystem": "Environmental",
"ecosystem": "Environmental",
"ecosystem_category": "Aquatic",
"ecosystem_type": "Freshwater",
"ecosystem_subtype": "Groundwater",
Expand All @@ -37,11 +40,16 @@
"sample_collection_site": "Lithgow State Coal Mine"
},
{
"GOLD_sample_identifiers": ["GOLD:Gb1234"],
"GOLD_sample_identifiers": [
"GOLD:Gb1234"
],
"id": "gold:Gb0101225",
"name": "Lithgow State Coal Mine Calcium nutrients Extra",
"description": "Bulk Aqueous phase filtered water",
"type": "nmdc:Biosample",
"part_of": [
"gold:Gs0128849"
],
"env_broad_scale": {
"has_raw_value": "ENVO:00002030"
},
Expand Down Expand Up @@ -73,11 +81,17 @@
"sample_collection_site": "Lithgow State Coal Mine"
},
{
"GOLD_sample_identifiers": ["ABCD:Ab@#@", "WXYZ:Wx()"],
"GOLD_sample_identifiers": [
"ABCD:Ab@#@",
"WXYZ:Wx()"
],
"id": "gold:Gb0101226",
"name": "Lithgow State Coal Mine Calcium nutrients",
"description": "Bulk Aqueous phase filtered water",
"type": "nmdc:Biosample",
"part_of": [
"gold:Gs0128849"
],
"env_broad_scale": {
"has_raw_value": "ENVO:00002030"
},
Expand Down
21 changes: 18 additions & 3 deletions test/data/invalid_schemas/biosample_single_multi_value_mixup.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
{
"biosample_set": [
{
"id": ["gold:Gb0101224", "gold:Gb0101225"],
"id": [
"gold:Gb0101224",
"gold:Gb0101225"
],
"name": "Lithgow State Coal Mine Calcium nutrients (early)",
"description": "Bulk Aqueous phase filtered water",
"type": ["nmdc:Biosample", "nmdc:FunctionalAnnotation"],
"type": [
"nmdc:Biosample",
"nmdc:FunctionalAnnotation"
],
"part_of": [
"gold:Gs0128849"
],
"env_broad_scale": {
"has_raw_value": "ENVO:00002030"
},
Expand All @@ -22,7 +31,7 @@
"latitude": -33.460524,
"longitude": 150.168149
},
"ecosystem": "Environmental",
"ecosystem": "Environmental",
"ecosystem_category": "Aquatic",
"ecosystem_type": "Freshwater",
"ecosystem_subtype": "Groundwater",
Expand All @@ -40,6 +49,9 @@
"name": "Lithgow State Coal Mine Calcium nutrients Extra",
"description": "Bulk Aqueous phase filtered water",
"type": "nmdc:Biosample",
"part_of": [
"gold:Gs0128849"
],
"env_broad_scale": {
"has_raw_value": "ENVO:00002030"
},
Expand Down Expand Up @@ -75,6 +87,9 @@
"name": "Lithgow State Coal Mine Calcium nutrients",
"description": "Bulk Aqueous phase filtered water",
"type": "nmdc:Biosample",
"part_of": [
"gold:Gs0128849"
],
"env_broad_scale": {
"has_raw_value": "ENVO:00002030"
},
Expand Down
11 changes: 10 additions & 1 deletion test/data/invalid_schemas/biosample_undeclared_slot.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
"name": "Lithgow State Coal Mine Calcium nutrients (early)",
"description": "Bulk Aqueous phase filtered water",
"type": "nmdc:Biosample",
"part_of": [
"gold:Gs0128849"
],
"env_broad_scale": {
"has_raw_value": "ENVO:00002030"
},
Expand All @@ -23,7 +26,7 @@
"latitude": -33.460524,
"longitude": 150.168149
},
"ecosystem": "Environmental",
"ecosystem": "Environmental",
"ecosystem_category": "Aquatic",
"ecosystem_type": "Freshwater",
"ecosystem_subtype": "Groundwater",
Expand All @@ -41,6 +44,9 @@
"name": "Lithgow State Coal Mine Calcium nutrients Extra",
"description": "Bulk Aqueous phase filtered water",
"type": "nmdc:Biosample",
"part_of": [
"gold:Gs0128849"
],
"env_broad_scale": {
"has_raw_value": "ENVO:00002030"
},
Expand Down Expand Up @@ -76,6 +82,9 @@
"name": "Lithgow State Coal Mine Calcium nutrients",
"description": "Bulk Aqueous phase filtered water",
"type": "nmdc:Biosample",
"part_of": [
"gold:Gs0128849"
],
"env_broad_scale": {
"has_raw_value": "ENVO:00002030"
},
Expand Down
9 changes: 9 additions & 0 deletions test/data/nmdc_example_database.json
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@
"name": "Permafrost microbial communities from Stordalen Mire, Sweden - 611E1M metaG",
"description": "Permafrost microbial communities from Stordalen Mire, Sweden",
"type": "nmdc:Biosample",
"part_of": [
"gold:Gs0128849"
],
"collection_date": {
"has_raw_value": "2011-06-15"
},
Expand Down Expand Up @@ -165,6 +168,9 @@
"name": "Forest soil microbial communities from Barre Woods Harvard Forest LTER site, Petersham, Massachusetts, United States - Inc-BW-C-14-O",
"description": "Forest soil from Barre Woods Harvard Forest LTER site was incubated at 10C with heavy water. Sample is from a control plot at ambient soil temperature, organic horizon - top 4cm of soil",
"type": "nmdc:Biosample",
"part_of": [
"gold:Gs0128849"
],
"collection_date": {
"has_raw_value": "2017-05-24"
},
Expand Down Expand Up @@ -204,6 +210,9 @@
"name": "Rhizosphere microbial communities from Carex aquatilis grown in University of Washington, Seatle, WA, United States - 4-1-23 metaG",
"description": "Rhizosphere microbial communities from Carex aquatilis grown in submerged peat from a thermokarst bog, University of Washington, Seatle, WA, United States",
"type": "nmdc:Biosample",
"part_of": [
"gold:Gs0128849"
],
"collection_date": {
"has_raw_value": "2018-01-29"
},
Expand Down

0 comments on commit 46e62c3

Please sign in to comment.