Merge pull request #261 from microbiomedata/issue-249

Issue 249: require part of slot for biosamples
microbiomedata · Feb 8, 2022 · 46e62c3 · 46e62c3
2 parents cb46055 + 9367b84
commit 46e62c3
Show file tree

Hide file tree

Showing 11 changed files with 90 additions and 17 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,7 +7,7 @@ Changes to the schema are documented in this file.
 ### Fixed
   - N/A
 ### Changed 
-  - N/A
+  - made `part of` slot required for `biosample` (#249)
 ### Removed
   - N/A
 

diff --git a/jsonld-context/nmdc.context.jsonld b/jsonld-context/nmdc.context.jsonld
@@ -1,5 +1,5 @@
 {
-   "_comments": "Auto generated from nmdc.yaml by jsonldcontextgen.py version: 0.1.1\n    Generation date: 2022-01-26T15:10:03\n    Schema: NMDC\n    \n    id: https://microbiomedata/schema\n    description: Schema for National Microbiome Data Collaborative (NMDC).\n  \nThis schema is organized into distinct modules:\n  \n * a set of core types for representing data values\n * the mixs schema (auto-translated from mixs excel)\n * annotation schema\n * the NMDC schema itself\n    license: https://creativecommons.org/publicdomain/zero/1.0/\n    ",
+   "_comments": "Auto generated from nmdc.yaml by jsonldcontextgen.py version: 0.1.1\n    Generation date: 2022-02-08T14:39:14\n    Schema: NMDC\n    \n    id: https://microbiomedata/schema\n    description: Schema for National Microbiome Data Collaborative (NMDC).\n  \nThis schema is organized into distinct modules:\n  \n * a set of core types for representing data values\n * the mixs schema (auto-translated from mixs excel)\n * annotation schema\n * the NMDC schema itself\n    license: https://creativecommons.org/publicdomain/zero/1.0/\n    ",
    "@context": {
       "CAS": "http://identifiers.org/cas/",
       "CATH": "http://identifiers.org/cath/",

diff --git a/jsonschema/nmdc.schema.json b/jsonschema/nmdc.schema.json
@@ -743,6 +743,7 @@
          },
          "required": [
             "id",
+            "part_of",
             "env_broad_scale",
             "env_local_scale",
             "env_medium"

diff --git a/python/nmdc.py b/python/nmdc.py
@@ -1,5 +1,5 @@
 # Auto generated from nmdc.yaml by pythongen.py version: 0.9.0
-# Generation date: 2022-01-26T15:10:35
+# Generation date: 2022-02-08T14:39:50
 # Schema: NMDC
 #
 # id: https://microbiomedata/schema
@@ -482,12 +482,12 @@ class Biosample(NamedThing):
     class_model_uri: ClassVar[URIRef] = NMDC.Biosample
 
     id: Union[str, BiosampleId] = None
+    part_of: Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]] = None
     env_broad_scale: Union[dict, "ControlledTermValue"] = None
     env_local_scale: Union[dict, "ControlledTermValue"] = None
     env_medium: Union[dict, "ControlledTermValue"] = None
     type: Optional[str] = None
     alternative_identifiers: Optional[Union[str, List[str]]] = empty_list()
-    part_of: Optional[Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]]] = empty_list()
     agrochem_addition: Optional[Union[dict, "QuantityValue"]] = None
     alkalinity: Optional[Union[dict, "QuantityValue"]] = None
     alkalinity_method: Optional[Union[dict, "TextValue"]] = None
@@ -633,6 +633,12 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
         if not isinstance(self.id, BiosampleId):
             self.id = BiosampleId(self.id)
 
+        if self._is_empty(self.part_of):
+            self.MissingRequiredField("part_of")
+        if not isinstance(self.part_of, list):
+            self.part_of = [self.part_of] if self.part_of is not None else []
+        self.part_of = [v if isinstance(v, NamedThingId) else NamedThingId(v) for v in self.part_of]
+
         if self._is_empty(self.env_broad_scale):
             self.MissingRequiredField("env_broad_scale")
         if not isinstance(self.env_broad_scale, ControlledTermValue):
@@ -655,10 +661,6 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
             self.alternative_identifiers = [self.alternative_identifiers] if self.alternative_identifiers is not None else []
         self.alternative_identifiers = [v if isinstance(v, str) else str(v) for v in self.alternative_identifiers]
 
-        if not isinstance(self.part_of, list):
-            self.part_of = [self.part_of] if self.part_of is not None else []
-        self.part_of = [v if isinstance(v, NamedThingId) else NamedThingId(v) for v in self.part_of]
-
         if self.agrochem_addition is not None and not isinstance(self.agrochem_addition, QuantityValue):
             self.agrochem_addition = QuantityValue(**as_dict(self.agrochem_addition))
 
@@ -3026,7 +3028,7 @@ class FileTypeEnum(EnumDefinitionImpl):
     def _addvals(cls):
         setattr(cls, "FT ICR-MS Analysis Results",
                 PermissibleValue(text="FT ICR-MS Analysis Results",
-                                 description="FT ICR-MS-based metabolite assignment results table") )
+                                 description="FT ICR-MS-based molecular formula assignment results table") )
         setattr(cls, "GC-MS Metabolomics Results",
                 PermissibleValue(text="GC-MS Metabolomics Results",
                                  description="GC-MS-based metabolite assignment results table") )
@@ -6069,6 +6071,9 @@ class slots:
                    model_uri=NMDC.biosample_env_medium, domain=Biosample, range=Union[dict, "ControlledTermValue"], mappings = [MIXS.env_medium],
                    pattern=re.compile(r'.* \S+:\S+'))
 
+slots.biosample_part_of = Slot(uri=DCTERMS.isPartOf, name="biosample_part of", curie=DCTERMS.curie('isPartOf'),
+                   model_uri=NMDC.biosample_part_of, domain=Biosample, range=Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]])
+
 slots.study_doi = Slot(uri=NMDC.doi, name="study_doi", curie=NMDC.curie('doi'),
                    model_uri=NMDC.study_doi, domain=Study, range=Optional[Union[dict, "AttributeValue"]])
 

diff --git a/src/schema/nmdc.yaml b/src/schema/nmdc.yaml
@@ -300,6 +300,8 @@ classes:
         required: true
       env_medium:
         required: true
+      part of:
+        required: true
     id_prefixes:
       - GOLD
     exact_mappings:

diff --git a/test/data/biosample_test.json b/test/data/biosample_test.json
@@ -5,6 +5,9 @@
       "name": "Lithgow State Coal Mine Calcium nutrients (early)",
       "description": "Bulk Aqueous phase filtered water",
       "type": "nmdc:Biosample",
+      "part_of": [
+        "gold:Gs0110115"
+      ],
       "env_broad_scale": {
         "has_raw_value": "ENVO:00002030"
       },
@@ -40,6 +43,9 @@
       "name": "Lithgow State Coal Mine Calcium nutrients Extra",
       "description": "Bulk Aqueous phase filtered water",
       "type": "nmdc:Biosample",
+      "part_of": [
+        "gold:Gs0110115"
+      ],
       "env_broad_scale": {
         "has_raw_value": "ENVO:00002030"
       },
@@ -75,6 +81,9 @@
       "name": "Lithgow State Coal Mine Calcium nutrients",
       "description": "Bulk Aqueous phase filtered water",
       "type": "nmdc:Biosample",
+      "part_of": [
+        "gold:Gs0110115"
+      ],
       "env_broad_scale": {
         "has_raw_value": "ENVO:00002030"
       },

diff --git a/test/data/invalid_schemas/biosample_invalid_range.json b/test/data/invalid_schemas/biosample_invalid_range.json
@@ -5,6 +5,9 @@
       "name": "Lithgow State Coal Mine Calcium nutrients (early)",
       "description": "Bulk Aqueous phase filtered water",
       "type": "nmdc:Biosample",
+      "part_of": [
+        "gold:Gs0128849"
+      ],
       "env_broad_scale": {
         "has_raw_value": "ENVO:00002030"
       },
@@ -22,7 +25,7 @@
         "latitude": "-33.460524",
         "longitude": "150.168149"
       },
-      "ecosystem": "Environmental", 
+      "ecosystem": "Environmental",
       "ecosystem_category": "Aquatic",
       "ecosystem_type": "Freshwater",
       "ecosystem_subtype": "Groundwater",
@@ -40,6 +43,9 @@
       "name": "Lithgow State Coal Mine Calcium nutrients Extra",
       "description": "Bulk Aqueous phase filtered water",
       "type": "nmdc:Biosample",
+      "part_of": [
+        "gold:Gs0128849"
+      ],
       "env_broad_scale": {
         "has_raw_value": "ENVO:00002030"
       },
@@ -75,6 +81,9 @@
       "name": "Lithgow State Coal Mine Calcium nutrients",
       "description": "Bulk Aqueous phase filtered water",
       "type": "nmdc:Biosample",
+      "part_of": [
+        "gold:Gs0128849"
+      ],
       "env_broad_scale": {
         "has_raw_value": "ENVO:00002030"
       },

diff --git a/test/data/invalid_schemas/biosample_mismatch_regex.json b/test/data/invalid_schemas/biosample_mismatch_regex.json
@@ -6,6 +6,9 @@
       "name": "Lithgow State Coal Mine Calcium nutrients (early)",
       "description": "Bulk Aqueous phase filtered water",
       "type": "nmdc:Biosample",
+      "part_of": [
+        "gold:Gs0128849"
+      ],
       "env_broad_scale": {
         "has_raw_value": "ENVO:00002030"
       },
@@ -23,7 +26,7 @@
         "latitude": -33.460524,
         "longitude": 150.168149
       },
-      "ecosystem": "Environmental", 
+      "ecosystem": "Environmental",
       "ecosystem_category": "Aquatic",
       "ecosystem_type": "Freshwater",
       "ecosystem_subtype": "Groundwater",
@@ -37,11 +40,16 @@
       "sample_collection_site": "Lithgow State Coal Mine"
     },
     {
-      "GOLD_sample_identifiers": ["GOLD:Gb1234"],
+      "GOLD_sample_identifiers": [
+        "GOLD:Gb1234"
+      ],
       "id": "gold:Gb0101225",
       "name": "Lithgow State Coal Mine Calcium nutrients Extra",
       "description": "Bulk Aqueous phase filtered water",
       "type": "nmdc:Biosample",
+      "part_of": [
+        "gold:Gs0128849"
+      ],
       "env_broad_scale": {
         "has_raw_value": "ENVO:00002030"
       },
@@ -73,11 +81,17 @@
       "sample_collection_site": "Lithgow State Coal Mine"
     },
     {
-      "GOLD_sample_identifiers": ["ABCD:Ab@#@", "WXYZ:Wx()"],
+      "GOLD_sample_identifiers": [
+        "ABCD:Ab@#@",
+        "WXYZ:Wx()"
+      ],
       "id": "gold:Gb0101226",
       "name": "Lithgow State Coal Mine Calcium nutrients",
       "description": "Bulk Aqueous phase filtered water",
       "type": "nmdc:Biosample",
+      "part_of": [
+        "gold:Gs0128849"
+      ],
       "env_broad_scale": {
         "has_raw_value": "ENVO:00002030"
       },

diff --git a/test/data/invalid_schemas/biosample_single_multi_value_mixup.json b/test/data/invalid_schemas/biosample_single_multi_value_mixup.json
@@ -1,10 +1,19 @@
 {
   "biosample_set": [
     {
-      "id": ["gold:Gb0101224", "gold:Gb0101225"],
+      "id": [
+        "gold:Gb0101224",
+        "gold:Gb0101225"
+      ],
       "name": "Lithgow State Coal Mine Calcium nutrients (early)",
       "description": "Bulk Aqueous phase filtered water",
-      "type": ["nmdc:Biosample", "nmdc:FunctionalAnnotation"],
+      "type": [
+        "nmdc:Biosample",
+        "nmdc:FunctionalAnnotation"
+      ],
+      "part_of": [
+        "gold:Gs0128849"
+      ],
       "env_broad_scale": {
         "has_raw_value": "ENVO:00002030"
       },
@@ -22,7 +31,7 @@
         "latitude": -33.460524,
         "longitude": 150.168149
       },
-      "ecosystem": "Environmental", 
+      "ecosystem": "Environmental",
       "ecosystem_category": "Aquatic",
       "ecosystem_type": "Freshwater",
       "ecosystem_subtype": "Groundwater",
@@ -40,6 +49,9 @@
       "name": "Lithgow State Coal Mine Calcium nutrients Extra",
       "description": "Bulk Aqueous phase filtered water",
       "type": "nmdc:Biosample",
+      "part_of": [
+        "gold:Gs0128849"
+      ],
       "env_broad_scale": {
         "has_raw_value": "ENVO:00002030"
       },
@@ -75,6 +87,9 @@
       "name": "Lithgow State Coal Mine Calcium nutrients",
       "description": "Bulk Aqueous phase filtered water",
       "type": "nmdc:Biosample",
+      "part_of": [
+        "gold:Gs0128849"
+      ],
       "env_broad_scale": {
         "has_raw_value": "ENVO:00002030"
       },

diff --git a/test/data/invalid_schemas/biosample_undeclared_slot.json b/test/data/invalid_schemas/biosample_undeclared_slot.json
@@ -6,6 +6,9 @@
       "name": "Lithgow State Coal Mine Calcium nutrients (early)",
       "description": "Bulk Aqueous phase filtered water",
       "type": "nmdc:Biosample",
+      "part_of": [
+        "gold:Gs0128849"
+      ],
       "env_broad_scale": {
         "has_raw_value": "ENVO:00002030"
       },
@@ -23,7 +26,7 @@
         "latitude": -33.460524,
         "longitude": 150.168149
       },
-      "ecosystem": "Environmental", 
+      "ecosystem": "Environmental",
       "ecosystem_category": "Aquatic",
       "ecosystem_type": "Freshwater",
       "ecosystem_subtype": "Groundwater",
@@ -41,6 +44,9 @@
       "name": "Lithgow State Coal Mine Calcium nutrients Extra",
       "description": "Bulk Aqueous phase filtered water",
       "type": "nmdc:Biosample",
+      "part_of": [
+        "gold:Gs0128849"
+      ],
       "env_broad_scale": {
         "has_raw_value": "ENVO:00002030"
       },
@@ -76,6 +82,9 @@
       "name": "Lithgow State Coal Mine Calcium nutrients",
       "description": "Bulk Aqueous phase filtered water",
       "type": "nmdc:Biosample",
+      "part_of": [
+        "gold:Gs0128849"
+      ],
       "env_broad_scale": {
         "has_raw_value": "ENVO:00002030"
       },

diff --git a/test/data/nmdc_example_database.json b/test/data/nmdc_example_database.json
@@ -126,6 +126,9 @@
       "name": "Permafrost microbial communities from Stordalen Mire, Sweden - 611E1M metaG",
       "description": "Permafrost microbial communities from Stordalen Mire, Sweden",
       "type": "nmdc:Biosample",
+      "part_of": [
+        "gold:Gs0128849"
+      ],
       "collection_date": {
         "has_raw_value": "2011-06-15"
       },
@@ -165,6 +168,9 @@
       "name": "Forest soil microbial communities from Barre Woods Harvard Forest LTER site, Petersham, Massachusetts, United States - Inc-BW-C-14-O",
       "description": "Forest soil from Barre Woods Harvard Forest LTER site was incubated at 10C with heavy water. Sample is from a control plot at ambient soil temperature, organic horizon - top 4cm of soil",
       "type": "nmdc:Biosample",
+      "part_of": [
+        "gold:Gs0128849"
+      ],
       "collection_date": {
         "has_raw_value": "2017-05-24"
       },
@@ -204,6 +210,9 @@
       "name": "Rhizosphere microbial communities from Carex aquatilis grown in University of Washington, Seatle, WA, United States - 4-1-23 metaG",
       "description": "Rhizosphere microbial communities from Carex aquatilis grown in submerged peat from a thermokarst bog, University of Washington, Seatle, WA, United States",
       "type": "nmdc:Biosample",
+      "part_of": [
+        "gold:Gs0128849"
+      ],
       "collection_date": {
         "has_raw_value": "2018-01-29"
       },