diff --git a/README.md b/README.md
index 558e885c8b..6ab72d206a 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
-
-
-
+
+

+
# National Microbiome Data Collaborative Schema
diff --git a/nmdc_schema/nmdc.py b/nmdc_schema/nmdc.py
index 2b3ebe2655..cf0df9699b 100644
--- a/nmdc_schema/nmdc.py
+++ b/nmdc_schema/nmdc.py
@@ -1,5 +1,5 @@
# Auto generated from nmdc.yaml by pythongen.py version: 0.0.1
-# Generation date: 2024-10-17T14:33:36
+# Generation date: 2024-11-13T01:04:03
# Schema: NMDC
#
# id: https://w3id.org/nmdc/nmdc
@@ -51,10 +51,12 @@
ENVO = CurieNamespace('ENVO', 'http://purl.obolibrary.org/obo/ENVO_')
FBCV = CurieNamespace('FBcv', 'http://purl.obolibrary.org/obo/FBcv_')
FMA = CurieNamespace('FMA', 'http://purl.obolibrary.org/obo/FMA_')
+GENEPIO = CurieNamespace('GENEPIO', 'http://example.org/UNKNOWN/GENEPIO/')
GO = CurieNamespace('GO', 'http://purl.obolibrary.org/obo/GO_')
HMDB = CurieNamespace('HMDB', 'https://bioregistry.io/hmdb:')
ISA = CurieNamespace('ISA', 'http://example.org/isa/')
KEGG_COMPOUND = CurieNamespace('KEGG_COMPOUND', 'https://bioregistry.io/kegg.compound:')
+KEGG_MODULE = CurieNamespace('KEGG_MODULE', 'https://bioregistry.io/kegg.module:')
KEGG_ORTHOLOGY = CurieNamespace('KEGG_ORTHOLOGY', 'https://bioregistry.io/kegg.orthology:')
KEGG_PATHWAY = CurieNamespace('KEGG_PATHWAY', 'https://bioregistry.io/kegg.pathway:')
MASSIVE = CurieNamespace('MASSIVE', 'https://bioregistry.io/reference/massive:')
@@ -67,6 +69,7 @@
NCBITAXON = CurieNamespace('NCBITaxon', 'http://purl.obolibrary.org/obo/NCBITaxon_')
NCIT = CurieNamespace('NCIT', 'http://purl.obolibrary.org/obo/NCIT_')
OBI = CurieNamespace('OBI', 'http://purl.obolibrary.org/obo/OBI_')
+OMIT = CurieNamespace('OMIT', 'http://example.org/UNKNOWN/OMIT/')
ORCID = CurieNamespace('ORCID', 'https://orcid.org/')
PANTHER_FAMILY = CurieNamespace('PANTHER_FAMILY', 'https://bioregistry.io/panther.family:')
PATO = CurieNamespace('PATO', 'http://purl.obolibrary.org/obo/PATO_')
@@ -295,6 +298,10 @@ class ChromatographyConfigurationId(ConfigurationId):
pass
+class ManifestId(InformationObjectId):
+ pass
+
+
class CalibrationInformationId(InformationObjectId):
pass
@@ -414,16 +421,16 @@ class FunctionalAnnotationAggMember(YAMLRoot):
class_name: ClassVar[str] = "FunctionalAnnotationAggMember"
class_model_uri: ClassVar[URIRef] = NMDC.FunctionalAnnotationAggMember
- metagenome_annotation_id: Union[str, WorkflowExecutionId] = None
+ was_generated_by: Union[str, WorkflowExecutionId] = None
gene_function_id: Union[str, URIorCURIE] = None
count: int = None
type: Union[str, URIorCURIE] = None
def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
- if self._is_empty(self.metagenome_annotation_id):
- self.MissingRequiredField("metagenome_annotation_id")
- if not isinstance(self.metagenome_annotation_id, WorkflowExecutionId):
- self.metagenome_annotation_id = WorkflowExecutionId(self.metagenome_annotation_id)
+ if self._is_empty(self.was_generated_by):
+ self.MissingRequiredField("was_generated_by")
+ if not isinstance(self.was_generated_by, WorkflowExecutionId):
+ self.was_generated_by = WorkflowExecutionId(self.was_generated_by)
if self._is_empty(self.gene_function_id):
self.MissingRequiredField("gene_function_id")
@@ -469,6 +476,7 @@ class Database(YAMLRoot):
functional_annotation_set: Optional[Union[Union[dict, "FunctionalAnnotation"], List[Union[dict, "FunctionalAnnotation"]]]] = empty_list()
genome_feature_set: Optional[Union[Union[dict, "GenomeFeature"], List[Union[dict, "GenomeFeature"]]]] = empty_list()
instrument_set: Optional[Union[Dict[Union[str, InstrumentId], Union[dict, "Instrument"]], List[Union[dict, "Instrument"]]]] = empty_dict()
+ manifest_set: Optional[Union[Dict[Union[str, ManifestId], Union[dict, "Manifest"]], List[Union[dict, "Manifest"]]]] = empty_dict()
material_processing_set: Optional[Union[Dict[Union[str, MaterialProcessingId], Union[dict, "MaterialProcessing"]], List[Union[dict, "MaterialProcessing"]]]] = empty_dict()
processed_sample_set: Optional[Union[Dict[Union[str, ProcessedSampleId], Union[dict, "ProcessedSample"]], List[Union[dict, "ProcessedSample"]]]] = empty_dict()
protocol_execution_set: Optional[Union[Dict[Union[str, ProtocolExecutionId], Union[dict, "ProtocolExecution"]], List[Union[dict, "ProtocolExecution"]]]] = empty_dict()
@@ -507,6 +515,8 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
self._normalize_inlined_as_list(slot_name="instrument_set", slot_type=Instrument, key_name="id", keyed=True)
+ self._normalize_inlined_as_list(slot_name="manifest_set", slot_type=Manifest, key_name="id", keyed=True)
+
self._normalize_inlined_as_list(slot_name="material_processing_set", slot_type=MaterialProcessing, key_name="id", keyed=True)
self._normalize_inlined_as_list(slot_name="processed_sample_set", slot_type=ProcessedSample, key_name="id", keyed=True)
@@ -718,9 +728,9 @@ class MagBin(YAMLRoot):
class_name: ClassVar[str] = "MagBin"
class_model_uri: ClassVar[URIRef] = NMDC.MagBin
+ bin_name: str = None
type: Union[str, URIorCURIE] = None
- bin_name: Optional[str] = None
- bin_quality: Optional[str] = None
+ bin_quality: Optional[Union[str, "BinQualityEnum"]] = None
completeness: Optional[float] = None
contamination: Optional[float] = None
gene_count: Optional[int] = None
@@ -741,15 +751,17 @@ class MagBin(YAMLRoot):
eukaryotic_evaluation: Optional[Union[dict, EukEval]] = None
def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
+ if self._is_empty(self.bin_name):
+ self.MissingRequiredField("bin_name")
+ if not isinstance(self.bin_name, str):
+ self.bin_name = str(self.bin_name)
+
if self._is_empty(self.type):
self.MissingRequiredField("type")
self.type = str(self.class_class_curie)
- if self.bin_name is not None and not isinstance(self.bin_name, str):
- self.bin_name = str(self.bin_name)
-
- if self.bin_quality is not None and not isinstance(self.bin_quality, str):
- self.bin_quality = str(self.bin_quality)
+ if self.bin_quality is not None and not isinstance(self.bin_quality, BinQualityEnum):
+ self.bin_quality = BinQualityEnum(self.bin_quality)
if self.completeness is not None and not isinstance(self.completeness, float):
self.completeness = float(self.completeness)
@@ -5412,6 +5424,39 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
self.type = str(self.class_class_curie)
+@dataclass(repr=False)
+class Manifest(InformationObject):
+ """
+ A qualified collection of DataObjects that can be analyzed together in the same experimental context.
+ """
+ _inherited_slots: ClassVar[List[str]] = []
+
+ class_class_uri: ClassVar[URIRef] = NMDC["Manifest"]
+ class_class_curie: ClassVar[str] = "nmdc:Manifest"
+ class_name: ClassVar[str] = "Manifest"
+ class_model_uri: ClassVar[URIRef] = NMDC.Manifest
+
+ id: Union[str, ManifestId] = None
+ type: Union[str, URIorCURIE] = None
+ manifest_category: Union[str, "ManifestCategoryEnum"] = None
+
+ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
+ if self._is_empty(self.id):
+ self.MissingRequiredField("id")
+ if not isinstance(self.id, ManifestId):
+ self.id = ManifestId(self.id)
+
+ if self._is_empty(self.manifest_category):
+ self.MissingRequiredField("manifest_category")
+ if not isinstance(self.manifest_category, ManifestCategoryEnum):
+ self.manifest_category = ManifestCategoryEnum(self.manifest_category)
+
+ super().__post_init__(**kwargs)
+ if self._is_empty(self.type):
+ self.MissingRequiredField("type")
+ self.type = str(self.class_class_curie)
+
+
@dataclass(repr=False)
class CalibrationInformation(InformationObject):
"""
@@ -5484,6 +5529,7 @@ class DataObject(InformationObject):
md5_checksum: Optional[str] = None
url: Optional[str] = None
was_generated_by: Optional[Union[str, WorkflowExecutionId]] = None
+ in_manifest: Optional[Union[Union[str, ManifestId], List[Union[str, ManifestId]]]] = empty_list()
def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
if self._is_empty(self.id):
@@ -5526,6 +5572,10 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
if self.was_generated_by is not None and not isinstance(self.was_generated_by, WorkflowExecutionId):
self.was_generated_by = WorkflowExecutionId(self.was_generated_by)
+ if not isinstance(self.in_manifest, list):
+ self.in_manifest = [self.in_manifest] if self.in_manifest is not None else []
+ self.in_manifest = [v if isinstance(v, ManifestId) else ManifestId(v) for v in self.in_manifest]
+
super().__post_init__(**kwargs)
if self._is_empty(self.type):
self.MissingRequiredField("type")
@@ -5672,7 +5722,7 @@ class MassSpectrometry(DataGeneration):
associated_studies: Union[Union[str, StudyId], List[Union[str, StudyId]]] = None
has_input: Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]] = None
eluent_introduction_category: Optional[Union[str, "EluentIntroductionCategoryEnum"]] = None
- has_calibration: Optional[str] = None
+ has_calibration: Optional[Union[str, CalibrationInformationId]] = None
has_chromatography_configuration: Optional[Union[str, ChromatographyConfigurationId]] = None
has_mass_spectrometry_configuration: Optional[Union[str, MassSpectrometryConfigurationId]] = None
@@ -5685,8 +5735,8 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
if self.eluent_introduction_category is not None and not isinstance(self.eluent_introduction_category, EluentIntroductionCategoryEnum):
self.eluent_introduction_category = EluentIntroductionCategoryEnum(self.eluent_introduction_category)
- if self.has_calibration is not None and not isinstance(self.has_calibration, str):
- self.has_calibration = str(self.has_calibration)
+ if self.has_calibration is not None and not isinstance(self.has_calibration, CalibrationInformationId):
+ self.has_calibration = CalibrationInformationId(self.has_calibration)
if self.has_chromatography_configuration is not None and not isinstance(self.has_chromatography_configuration, ChromatographyConfigurationId):
self.has_chromatography_configuration = ChromatographyConfigurationId(self.has_chromatography_configuration)
@@ -6364,7 +6414,6 @@ class MetabolomicsAnalysis(WorkflowExecution):
git_url: str = None
started_at_time: str = None
has_input: Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]] = None
- has_calibration: Optional[str] = None
has_metabolite_identifications: Optional[Union[Union[dict, MetaboliteIdentification], List[Union[dict, MetaboliteIdentification]]]] = empty_list()
was_informed_by: Optional[Union[str, DataGenerationId]] = None
@@ -6374,9 +6423,6 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
if not isinstance(self.id, MetabolomicsAnalysisId):
self.id = MetabolomicsAnalysisId(self.id)
- if self.has_calibration is not None and not isinstance(self.has_calibration, str):
- self.has_calibration = str(self.has_calibration)
-
if not isinstance(self.has_metabolite_identifications, list):
self.has_metabolite_identifications = [self.has_metabolite_identifications] if self.has_metabolite_identifications is not None else []
self.has_metabolite_identifications = [v if isinstance(v, MetaboliteIdentification) else MetaboliteIdentification(**as_dict(v)) for v in self.has_metabolite_identifications]
@@ -6405,7 +6451,6 @@ class MetaproteomicsAnalysis(WorkflowExecution):
git_url: str = None
started_at_time: str = None
has_input: Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]] = None
- has_peptide_quantifications: Optional[Union[Union[dict, PeptideQuantification], List[Union[dict, PeptideQuantification]]]] = empty_list()
was_informed_by: Optional[Union[str, DataGenerationId]] = None
def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
@@ -6414,10 +6459,6 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
if not isinstance(self.id, MetaproteomicsAnalysisId):
self.id = MetaproteomicsAnalysisId(self.id)
- if not isinstance(self.has_peptide_quantifications, list):
- self.has_peptide_quantifications = [self.has_peptide_quantifications] if self.has_peptide_quantifications is not None else []
- self.has_peptide_quantifications = [v if isinstance(v, PeptideQuantification) else PeptideQuantification(**as_dict(v)) for v in self.has_peptide_quantifications]
-
if self.was_informed_by is not None and not isinstance(self.was_informed_by, DataGenerationId):
self.was_informed_by = DataGenerationId(self.was_informed_by)
@@ -6442,7 +6483,6 @@ class NomAnalysis(WorkflowExecution):
git_url: str = None
started_at_time: str = None
has_input: Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]] = None
- has_calibration: Optional[str] = None
was_informed_by: Optional[Union[str, DataGenerationId]] = None
def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
@@ -6451,9 +6491,6 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
if not isinstance(self.id, NomAnalysisId):
self.id = NomAnalysisId(self.id)
- if self.has_calibration is not None and not isinstance(self.has_calibration, str):
- self.has_calibration = str(self.has_calibration)
-
if self.was_informed_by is not None and not isinstance(self.was_informed_by, DataGenerationId):
self.was_informed_by = DataGenerationId(self.was_informed_by)
@@ -6509,6 +6546,25 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
# Enumerations
+class CalibrationTargetEnum(EnumDefinitionImpl):
+
+ mass_charge_ratio = PermissibleValue(text="mass_charge_ratio")
+ retention_time = PermissibleValue(text="retention_time")
+ retention_index = PermissibleValue(text="retention_index")
+
+ _defn = EnumDefinition(
+ name="CalibrationTargetEnum",
+ )
+
+class CalibrationStandardEnum(EnumDefinitionImpl):
+
+ fames = PermissibleValue(text="fames")
+ alkanes = PermissibleValue(text="alkanes")
+
+ _defn = EnumDefinition(
+ name="CalibrationStandardEnum",
+ )
+
class StrandedOrientationEnum(EnumDefinitionImpl):
"""
This enumeration specifies information about stranded RNA library preparations.
@@ -6752,23 +6808,20 @@ class SamplePortionEnum(EnumDefinitionImpl):
name="SamplePortionEnum",
)
-class CalibrationTargetEnum(EnumDefinitionImpl):
-
- mass_charge_ratio = PermissibleValue(text="mass_charge_ratio")
- retention_time = PermissibleValue(text="retention_time")
- retention_index = PermissibleValue(text="retention_index")
-
- _defn = EnumDefinition(
- name="CalibrationTargetEnum",
- )
-
-class CalibrationStandardEnum(EnumDefinitionImpl):
+class BinQualityEnum(EnumDefinitionImpl):
- fames = PermissibleValue(text="fames")
- alkanes = PermissibleValue(text="alkanes")
+ HQ = PermissibleValue(
+ text="HQ",
+ description="""Metagenome-assembled genome is high quality based on MIMAG standards (https://doi.org/10.1038/nbt.3893)""")
+ MQ = PermissibleValue(
+ text="MQ",
+ description="""Metagenome-assembled genome is medium quality based on MIMAG standards (https://doi.org/10.1038/nbt.3893)""")
+ LQ = PermissibleValue(
+ text="LQ",
+ description="""Metagenome-assembled genome is low quality based on MIMAG standards (https://doi.org/10.1038/nbt.3893)""")
_defn = EnumDefinition(
- name="CalibrationStandardEnum",
+ name="BinQualityEnum",
)
class ChemicalConversionCategoryEnum(EnumDefinitionImpl):
@@ -6937,6 +6990,25 @@ class StudyCategoryEnum(EnumDefinitionImpl):
name="StudyCategoryEnum",
)
+class ManifestCategoryEnum(EnumDefinitionImpl):
+ """
+ A list of contexts in which some DataObjects can be analyzed together.
+ """
+ instrument_run = PermissibleValue(
+ text="instrument_run",
+ description="A collection of data objects from a single run of an instrument.")
+ poolable_replicates = PermissibleValue(
+ text="poolable_replicates",
+ description="A collection of data objects that can be pooled for downstream analyses.")
+ fractions = PermissibleValue(
+ text="fractions",
+ description="A collection of data objects that represent fractions of a single sample.")
+
+ _defn = EnumDefinition(
+ name="ManifestCategoryEnum",
+ description="A list of contexts in which some DataObjects can be analyzed together.",
+ )
+
class InstrumentModelEnum(EnumDefinitionImpl):
exploris_21T = PermissibleValue(text="exploris_21T")
@@ -7169,6 +7241,18 @@ class FileTypeEnum(EnumDefinitionImpl):
@classmethod
def _addvals(cls):
+ setattr(cls, "Virus Summary",
+ PermissibleValue(
+ text="Virus Summary",
+ description="Tab separated file listing the viruses found by geNomad."))
+ setattr(cls, "Plasmid Summary",
+ PermissibleValue(
+ text="Plasmid Summary",
+ description="Tab separated file listing the plasmids found be geNomad."))
+ setattr(cls, "GeNomad Aggregated Classification",
+ PermissibleValue(
+ text="GeNomad Aggregated Classification",
+ description="""Tab separated file which combines the results from neural network-based classification and marker-based classification for virus and plasmid detection with geNomad."""))
setattr(cls, "Reference Calibration File",
PermissibleValue(
text="Reference Calibration File",
@@ -7529,6 +7613,9 @@ class DoiProviderEnum(EnumDefinitionImpl):
edi = PermissibleValue(
text="edi",
meaning=None)
+ figshare = PermissibleValue(
+ text="figshare",
+ meaning=None)
_defn = EnumDefinition(
name="DoiProviderEnum",
@@ -7579,15 +7666,6 @@ class AnalyteCategoryEnum(EnumDefinitionImpl):
name="AnalyteCategoryEnum",
)
-class DirectInfusionEnum(EnumDefinitionImpl):
-
- direct_infusion = PermissibleValue(text="direct_infusion")
- autosampler = PermissibleValue(text="autosampler")
-
- _defn = EnumDefinition(
- name="DirectInfusionEnum",
- )
-
class ExtractionTargetEnum(EnumDefinitionImpl):
DNA = PermissibleValue(text="DNA")
@@ -7620,6 +7698,9 @@ class ProcessingInstitutionEnum(EnumDefinitionImpl):
UCD_Genome_Center = PermissibleValue(
text="UCD_Genome_Center",
meaning=None)
+ Azenta = PermissibleValue(
+ text="Azenta",
+ meaning=None)
_defn = EnumDefinition(
name="ProcessingInstitutionEnum",
@@ -9694,6 +9775,21 @@ class WindowVertPosEnum(EnumDefinitionImpl):
class slots:
pass
+slots.has_calibration = Slot(uri=NMDC.has_calibration, name="has_calibration", curie=NMDC.curie('has_calibration'),
+ model_uri=NMDC.has_calibration, domain=None, range=Optional[Union[str, CalibrationInformationId]])
+
+slots.calibration_object = Slot(uri=NMDC.calibration_object, name="calibration_object", curie=NMDC.curie('calibration_object'),
+ model_uri=NMDC.calibration_object, domain=None, range=Optional[Union[str, DataObjectId]])
+
+slots.internal_calibration = Slot(uri=NMDC.internal_calibration, name="internal_calibration", curie=NMDC.curie('internal_calibration'),
+ model_uri=NMDC.internal_calibration, domain=None, range=Optional[Union[bool, Bool]])
+
+slots.calibration_target = Slot(uri=NMDC.calibration_target, name="calibration_target", curie=NMDC.curie('calibration_target'),
+ model_uri=NMDC.calibration_target, domain=None, range=Optional[Union[str, "CalibrationTargetEnum"]])
+
+slots.calibration_standard = Slot(uri=NMDC.calibration_standard, name="calibration_standard", curie=NMDC.curie('calibration_standard'),
+ model_uri=NMDC.calibration_standard, domain=None, range=Optional[Union[str, "CalibrationStandardEnum"]])
+
slots.polarity_mode = Slot(uri=NMDC.polarity_mode, name="polarity_mode", curie=NMDC.curie('polarity_mode'),
model_uri=NMDC.polarity_mode, domain=None, range=Optional[Union[str, "PolarityModeEnum"]])
@@ -9824,6 +9920,9 @@ class slots:
slots.configuration_set = Slot(uri=NMDC.configuration_set, name="configuration_set", curie=NMDC.curie('configuration_set'),
model_uri=NMDC.configuration_set, domain=None, range=Optional[Union[Dict[Union[str, ConfigurationId], Union[dict, Configuration]], List[Union[dict, Configuration]]]])
+slots.manifest_set = Slot(uri=NMDC.manifest_set, name="manifest_set", curie=NMDC.curie('manifest_set'),
+ model_uri=NMDC.manifest_set, domain=None, range=Optional[Union[Dict[Union[str, ManifestId], Union[dict, Manifest]], List[Union[dict, Manifest]]]])
+
slots.protocol_execution_set = Slot(uri=NMDC.protocol_execution_set, name="protocol_execution_set", curie=NMDC.curie('protocol_execution_set'),
model_uri=NMDC.protocol_execution_set, domain=None, range=Optional[Union[Dict[Union[str, ProtocolExecutionId], Union[dict, ProtocolExecution]], List[Union[dict, ProtocolExecution]]]])
@@ -10068,21 +10167,6 @@ class slots:
slots.input_read_bases = Slot(uri=NMDC.input_read_bases, name="input_read_bases", curie=NMDC.curie('input_read_bases'),
model_uri=NMDC.input_read_bases, domain=None, range=Optional[float])
-slots.has_calibration = Slot(uri=NMDC.has_calibration, name="has_calibration", curie=NMDC.curie('has_calibration'),
- model_uri=NMDC.has_calibration, domain=None, range=Optional[str])
-
-slots.calibration_object = Slot(uri=NMDC.calibration_object, name="calibration_object", curie=NMDC.curie('calibration_object'),
- model_uri=NMDC.calibration_object, domain=None, range=Optional[Union[str, DataObjectId]])
-
-slots.internal_calibration = Slot(uri=NMDC.internal_calibration, name="internal_calibration", curie=NMDC.curie('internal_calibration'),
- model_uri=NMDC.internal_calibration, domain=None, range=Optional[Union[bool, Bool]])
-
-slots.calibration_target = Slot(uri=NMDC.calibration_target, name="calibration_target", curie=NMDC.curie('calibration_target'),
- model_uri=NMDC.calibration_target, domain=None, range=Optional[Union[str, "CalibrationTargetEnum"]])
-
-slots.calibration_standard = Slot(uri=NMDC.calibration_standard, name="calibration_standard", curie=NMDC.curie('calibration_standard'),
- model_uri=NMDC.calibration_standard, domain=None, range=Optional[Union[str, "CalibrationStandardEnum"]])
-
slots.has_metabolite_identifications = Slot(uri=NMDC.has_metabolite_identifications, name="has_metabolite_identifications", curie=NMDC.curie('has_metabolite_identifications'),
model_uri=NMDC.has_metabolite_identifications, domain=None, range=Optional[Union[Union[dict, MetaboliteIdentification], List[Union[dict, MetaboliteIdentification]]]])
@@ -10168,7 +10252,7 @@ class slots:
model_uri=NMDC.members_id, domain=None, range=Optional[Union[str, List[str]]])
slots.bin_name = Slot(uri=NMDC.bin_name, name="bin_name", curie=NMDC.curie('bin_name'),
- model_uri=NMDC.bin_name, domain=None, range=Optional[str])
+ model_uri=NMDC.bin_name, domain=None, range=str)
slots.number_of_contig = Slot(uri=NMDC.number_of_contig, name="number_of_contig", curie=NMDC.curie('number_of_contig'),
model_uri=NMDC.number_of_contig, domain=None, range=Optional[int])
@@ -10183,7 +10267,7 @@ class slots:
model_uri=NMDC.gene_count, domain=None, range=Optional[int])
slots.bin_quality = Slot(uri=NMDC.bin_quality, name="bin_quality", curie=NMDC.curie('bin_quality'),
- model_uri=NMDC.bin_quality, domain=None, range=Optional[str])
+ model_uri=NMDC.bin_quality, domain=None, range=Optional[Union[str, "BinQualityEnum"]])
slots.num_16s = Slot(uri=NMDC.num_16s, name="num_16s", curie=NMDC.curie('num_16s'),
model_uri=NMDC.num_16s, domain=None, range=Optional[int])
@@ -10499,6 +10583,12 @@ class slots:
slots.instrument_used = Slot(uri=NMDC['basic_classes/instrument_used'], name="instrument_used", curie=NMDC.curie('basic_classes/instrument_used'),
model_uri=NMDC.instrument_used, domain=None, range=Optional[Union[Union[str, InstrumentId], List[Union[str, InstrumentId]]]])
+slots.in_manifest = Slot(uri=NMDC['basic_classes/in_manifest'], name="in_manifest", curie=NMDC.curie('basic_classes/in_manifest'),
+ model_uri=NMDC.in_manifest, domain=None, range=Optional[Union[Union[str, ManifestId], List[Union[str, ManifestId]]]])
+
+slots.manifest_category = Slot(uri=NMDC['basic_classes/manifest_category'], name="manifest_category", curie=NMDC.curie('basic_classes/manifest_category'),
+ model_uri=NMDC.manifest_category, domain=None, range=Union[str, "ManifestCategoryEnum"])
+
slots.model = Slot(uri=NMDC['basic_classes/model'], name="model", curie=NMDC.curie('basic_classes/model'),
model_uri=NMDC.model, domain=None, range=Optional[Union[str, "InstrumentModelEnum"]])
@@ -10688,9 +10778,6 @@ class slots:
slots.analyte_category = Slot(uri=NMDC.analyte_category, name="analyte_category", curie=NMDC.curie('analyte_category'),
model_uri=NMDC.analyte_category, domain=None, range=Union[str, "AnalyteCategoryEnum"])
-slots.direct_infusion_category = Slot(uri=NMDC.direct_infusion_category, name="direct_infusion_category", curie=NMDC.curie('direct_infusion_category'),
- model_uri=NMDC.direct_infusion_category, domain=None, range=Optional[Union[str, "DirectInfusionEnum"]])
-
slots.type = Slot(uri=RDF.type, name="type", curie=RDF.curie('type'),
model_uri=NMDC.type, domain=None, range=Union[str, URIorCURIE])
@@ -12359,7 +12446,7 @@ class slots:
pattern=re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$'))
slots.MassSpectrometry_has_calibration = Slot(uri=NMDC.has_calibration, name="MassSpectrometry_has_calibration", curie=NMDC.curie('has_calibration'),
- model_uri=NMDC.MassSpectrometry_has_calibration, domain=MassSpectrometry, range=Optional[str])
+ model_uri=NMDC.MassSpectrometry_has_calibration, domain=MassSpectrometry, range=Optional[Union[str, CalibrationInformationId]])
slots.MassSpectrometry_has_chromatography_configuration = Slot(uri=NMDC.has_chromatography_configuration, name="MassSpectrometry_has_chromatography_configuration", curie=NMDC.curie('has_chromatography_configuration'),
model_uri=NMDC.MassSpectrometry_has_chromatography_configuration, domain=MassSpectrometry, range=Optional[Union[str, ChromatographyConfigurationId]])
@@ -12387,8 +12474,25 @@ class slots:
model_uri=NMDC.ChromatographyConfiguration_id, domain=ChromatographyConfiguration, range=Union[str, ChromatographyConfigurationId],
pattern=re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$'))
-slots.FunctionalAnnotationAggMember_metagenome_annotation_id = Slot(uri=NMDC.metagenome_annotation_id, name="FunctionalAnnotationAggMember_metagenome_annotation_id", curie=NMDC.curie('metagenome_annotation_id'),
- model_uri=NMDC.FunctionalAnnotationAggMember_metagenome_annotation_id, domain=FunctionalAnnotationAggMember, range=Union[str, WorkflowExecutionId])
+slots.Manifest_id = Slot(uri=NMDC.id, name="Manifest_id", curie=NMDC.curie('id'),
+ model_uri=NMDC.Manifest_id, domain=Manifest, range=Union[str, ManifestId],
+ pattern=re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$'))
+
+slots.CalibrationInformation_internal_calibration = Slot(uri=NMDC.internal_calibration, name="CalibrationInformation_internal_calibration", curie=NMDC.curie('internal_calibration'),
+ model_uri=NMDC.CalibrationInformation_internal_calibration, domain=CalibrationInformation, range=Union[bool, Bool])
+
+slots.CalibrationInformation_calibration_target = Slot(uri=NMDC.calibration_target, name="CalibrationInformation_calibration_target", curie=NMDC.curie('calibration_target'),
+ model_uri=NMDC.CalibrationInformation_calibration_target, domain=CalibrationInformation, range=Union[str, "CalibrationTargetEnum"])
+
+slots.CalibrationInformation_id = Slot(uri=NMDC.id, name="CalibrationInformation_id", curie=NMDC.curie('id'),
+ model_uri=NMDC.CalibrationInformation_id, domain=CalibrationInformation, range=Union[str, CalibrationInformationId],
+ pattern=re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$'))
+
+slots.FunctionalAnnotationAggMember_was_generated_by = Slot(uri=NMDC['basic_classes/was_generated_by'], name="FunctionalAnnotationAggMember_was_generated_by", curie=NMDC.curie('basic_classes/was_generated_by'),
+ model_uri=NMDC.FunctionalAnnotationAggMember_was_generated_by, domain=FunctionalAnnotationAggMember, range=Union[str, WorkflowExecutionId], mappings = [PROV["wasGeneratedBy"]])
+
+slots.FunctionalAnnotationAggMember_count = Slot(uri=NMDC.count, name="FunctionalAnnotationAggMember_count", curie=NMDC.curie('count'),
+ model_uri=NMDC.FunctionalAnnotationAggMember_count, domain=FunctionalAnnotationAggMember, range=int)
slots.Pooling_has_input = Slot(uri=NMDC['basic_classes/has_input'], name="Pooling_has_input", curie=NMDC.curie('basic_classes/has_input'),
model_uri=NMDC.Pooling_has_input, domain=Pooling, range=Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]])
@@ -12633,16 +12737,6 @@ class slots:
slots.NomAnalysis_was_informed_by = Slot(uri=NMDC['basic_classes/was_informed_by'], name="NomAnalysis_was_informed_by", curie=NMDC.curie('basic_classes/was_informed_by'),
model_uri=NMDC.NomAnalysis_was_informed_by, domain=NomAnalysis, range=Optional[Union[str, DataGenerationId]], mappings = [PROV["wasInformedBy"]])
-slots.CalibrationInformation_internal_calibration = Slot(uri=NMDC.internal_calibration, name="CalibrationInformation_internal_calibration", curie=NMDC.curie('internal_calibration'),
- model_uri=NMDC.CalibrationInformation_internal_calibration, domain=CalibrationInformation, range=Union[bool, Bool])
-
-slots.CalibrationInformation_calibration_target = Slot(uri=NMDC.calibration_target, name="CalibrationInformation_calibration_target", curie=NMDC.curie('calibration_target'),
- model_uri=NMDC.CalibrationInformation_calibration_target, domain=CalibrationInformation, range=Union[str, "CalibrationTargetEnum"])
-
-slots.CalibrationInformation_id = Slot(uri=NMDC.id, name="CalibrationInformation_id", curie=NMDC.curie('id'),
- model_uri=NMDC.CalibrationInformation_id, domain=CalibrationInformation, range=Union[str, CalibrationInformationId],
- pattern=re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$'))
-
slots.ChemicalConversionProcess_id = Slot(uri=NMDC.id, name="ChemicalConversionProcess_id", curie=NMDC.curie('id'),
model_uri=NMDC.ChemicalConversionProcess_id, domain=ChemicalConversionProcess, range=Union[str, ChemicalConversionProcessId],
pattern=re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$'))
@@ -12794,12 +12888,6 @@ class slots:
model_uri=NMDC.ProcessedSample_id, domain=ProcessedSample, range=Union[str, ProcessedSampleId],
pattern=re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$'))
-slots.ProteinQuantification_best_protein = Slot(uri=NMDC.best_protein, name="ProteinQuantification_best_protein", curie=NMDC.curie('best_protein'),
- model_uri=NMDC.ProteinQuantification_best_protein, domain=ProteinQuantification, range=Optional[Union[str, GeneProductId]])
-
-slots.ProteinQuantification_all_proteins = Slot(uri=NMDC.all_proteins, name="ProteinQuantification_all_proteins", curie=NMDC.curie('all_proteins'),
- model_uri=NMDC.ProteinQuantification_all_proteins, domain=ProteinQuantification, range=Optional[Union[Union[str, GeneProductId], List[Union[str, GeneProductId]]]])
-
slots.QuantityValue_has_raw_value = Slot(uri=NMDC.has_raw_value, name="QuantityValue_has_raw_value", curie=NMDC.curie('has_raw_value'),
model_uri=NMDC.QuantityValue_has_raw_value, domain=QuantityValue, range=Optional[str])
diff --git a/nmdc_schema/nmdc.schema.json b/nmdc_schema/nmdc.schema.json
index d9d971d9fc..f86179a14f 100644
--- a/nmdc_schema/nmdc.schema.json
+++ b/nmdc_schema/nmdc.schema.json
@@ -37,6 +37,16 @@
"title": "ArchStrucEnum",
"type": "string"
},
+ "BinQualityEnum": {
+ "description": "",
+ "enum": [
+ "HQ",
+ "MQ",
+ "LQ"
+ ],
+ "title": "BinQualityEnum",
+ "type": "string"
+ },
"BiolStatEnum": {
"description": "",
"enum": [
@@ -6619,6 +6629,16 @@
"pattern": "^[a-zA-Z0-9][a-zA-Z0-9_\\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\\-\\/\\.,]*$",
"type": "string"
},
+ "in_manifest": {
+ "description": "one or more combinations of other DataObjects that can be analyzed together",
+ "items": {
+ "type": "string"
+ },
+ "type": [
+ "array",
+ "null"
+ ]
+ },
"insdc_experiment_identifiers": {
"items": {
"pattern": "^insdc.sra:(E|D|S)RX[0-9]{6,}$",
@@ -6812,6 +6832,16 @@
"null"
]
},
+ "manifest_set": {
+ "description": "This property links a database object to the set of manifests within it.",
+ "items": {
+ "$ref": "#/$defs/Manifest"
+ },
+ "type": [
+ "array",
+ "null"
+ ]
+ },
"material_processing_set": {
"description": "This property links a database object to the set of material processing within it.",
"items": {
@@ -6964,15 +6994,6 @@
"title": "DeposEnvEnum",
"type": "string"
},
- "DirectInfusionEnum": {
- "description": "",
- "enum": [
- "direct_infusion",
- "autosampler"
- ],
- "title": "DirectInfusionEnum",
- "type": "string"
- },
"DissolvingProcess": {
"additionalProperties": false,
"description": "A mixing step where a soluble component is mixed with a liquid component.",
@@ -7220,7 +7241,8 @@
"massive",
"gsc",
"zenodo",
- "edi"
+ "edi",
+ "figshare"
],
"title": "DoiProviderEnum",
"type": "string"
@@ -7431,12 +7453,16 @@
"description": "This class contains information pertaining to evaluating if a Metagenome-Assembled Genome (MAG) is eukaryotic.",
"properties": {
"completeness": {
+ "description": "Estimate of the completeness of the metagenome-assembled genome, estimated by a tool like CheckM.",
+ "minimum": 0,
"type": [
"number",
"null"
]
},
"contamination": {
+ "description": "Estimate of the completeness of the metagenome-assembled genome, estimated by a tool like CheckM.",
+ "minimum": 0,
"type": [
"number",
"null"
@@ -7923,6 +7949,9 @@
"FileTypeEnum": {
"description": "",
"enum": [
+ "Virus Summary",
+ "Plasmid Summary",
+ "GeNomad Aggregated Classification",
"Reference Calibration File",
"Metagenome Raw Reads",
"Metagenome Raw Read 1",
@@ -8369,13 +8398,21 @@
"description": "",
"properties": {
"count": {
+ "description": "The number of sequences (for a metagenome or metatranscriptome) or spectra (for metaproteomics) associated with the specified function.",
"type": "integer"
},
"gene_function_id": {
"description": "The identifier for the gene function.",
"type": "string"
},
- "metagenome_annotation_id": {
+ "type": {
+ "description": "the class_uri of the class that has been instantiated",
+ "enum": [
+ "nmdc:FunctionalAnnotationAggMember"
+ ],
+ "type": "string"
+ },
+ "was_generated_by": {
"anyOf": [
{
"type": "string"
@@ -8384,19 +8421,11 @@
"type": "string"
}
],
- "description": "The identifier for the analysis activity that generated the functional annotation results, where the analysis activity is an instance of the/an appropriate subclass of WorkflowExecution",
- "type": "string"
- },
- "type": {
- "description": "the class_uri of the class that has been instantiated",
- "enum": [
- "nmdc:FunctionalAnnotationAggMember"
- ],
"type": "string"
}
},
"required": [
- "metagenome_annotation_id",
+ "was_generated_by",
"gene_function_id",
"count",
"type"
@@ -9158,24 +9187,24 @@
"description": "",
"properties": {
"bin_name": {
- "type": [
- "string",
- "null"
- ]
+ "description": "Name of the metagenome-assembled genome.",
+ "type": "string"
},
"bin_quality": {
- "type": [
- "string",
- "null"
- ]
+ "$ref": "#/$defs/BinQualityEnum",
+ "description": "The quality of the metagenome-assembled genome based on MIMAG standards (https://doi.org/10.1038/nbt.3893)."
},
"completeness": {
+ "description": "Estimate of the completeness of the metagenome-assembled genome, estimated by a tool like CheckM.",
+ "minimum": 0,
"type": [
"number",
"null"
]
},
"contamination": {
+ "description": "Estimate of the completeness of the metagenome-assembled genome, estimated by a tool like CheckM.",
+ "minimum": 0,
"type": [
"number",
"null"
@@ -9193,54 +9222,64 @@
"description": "Contains results from evaluating if a Metagenome-Assembled Genome is of eukaryotic lineage."
},
"gene_count": {
+ "description": "Number of genes.",
+ "minimum": 0,
"type": [
"integer",
"null"
]
},
"gtdbtk_class": {
+ "description": "Taxonomic class assigned by GTDB-Tk.",
"type": [
"string",
"null"
]
},
"gtdbtk_domain": {
+ "description": "Taxonomic domain assigned by GTDB-Tk.",
"type": [
"string",
"null"
]
},
"gtdbtk_family": {
+ "description": "Taxonomic family assigned by GTDB-Tk.",
"type": [
"string",
"null"
]
},
"gtdbtk_genus": {
+ "description": "Taxonomic genus assigned by GTDB-Tk.",
"type": [
"string",
"null"
]
},
"gtdbtk_order": {
+ "description": "Taxonomic order assigned by GTDB-Tk.",
"type": [
"string",
"null"
]
},
"gtdbtk_phylum": {
+ "description": "Taxonomic phylum assigned by GTDB-Tk.",
"type": [
"string",
"null"
]
},
"gtdbtk_species": {
+ "description": "Taxonomic genus assigned by GTDB-Tk.",
"type": [
"string",
"null"
]
},
"members_id": {
+ "description": "Names of the contigs that make up a metagenome-assembled genome.",
"items": {
"type": "string"
},
@@ -9250,36 +9289,48 @@
]
},
"num_16s": {
+ "description": "Number of 16s sequences detected, a subunit of prokaryotic ribosomes.",
+ "minimum": 0,
"type": [
"integer",
"null"
]
},
"num_23s": {
+ "description": "Number of 23 seqeuences detected, a subunit of ribosomes.",
+ "minimum": 0,
"type": [
"integer",
"null"
]
},
"num_5s": {
+ "description": "Number of 5s seqeuences detected, a subunit of ribosomes.",
+ "minimum": 0,
"type": [
"integer",
"null"
]
},
"num_t_rna": {
+ "description": "Number of transfer transfer RNAs.",
+ "minimum": 0,
"type": [
"integer",
"null"
]
},
"number_of_contig": {
+ "description": "Number of contigs",
+ "minimum": 0,
"type": [
"integer",
"null"
]
},
"total_bases": {
+ "description": "Total number of basepairs.",
+ "minimum": 0,
"type": [
"integer",
"null"
@@ -9294,6 +9345,7 @@
}
},
"required": [
+ "bin_name",
"type"
],
"title": "MagBin",
@@ -9357,6 +9409,8 @@
]
},
"binned_contig_num": {
+ "description": "Number of contigs that ended up in a medium or high quality bin.",
+ "minimum": 0,
"type": [
"integer",
"null"
@@ -9435,18 +9489,23 @@
]
},
"input_contig_num": {
+ "description": "Total number of input contigs.",
+ "minimum": 0,
"type": [
"integer",
"null"
]
},
"low_depth_contig_num": {
+ "description": "Number of contigs which were excluded from binning for depth of coverage.",
+ "minimum": 0,
"type": [
"integer",
"null"
]
},
"mags_list": {
+ "description": "Contains detailed information about each metagenome-assembled genome.",
"items": {
"$ref": "#/$defs/MagBin"
},
@@ -9499,6 +9558,8 @@
"type": "string"
},
"too_short_contig_num": {
+ "description": "Number of contigs which were excluded from binning for length.",
+ "minimum": 0,
"type": [
"integer",
"null"
@@ -9512,6 +9573,8 @@
"type": "string"
},
"unbinned_contig_num": {
+ "description": "Number of contigs which did not end up in a medium or high quality bin.",
+ "minimum": 0,
"type": [
"integer",
"null"
@@ -9539,6 +9602,70 @@
"title": "MagsAnalysis",
"type": "object"
},
+ "Manifest": {
+ "additionalProperties": false,
+ "description": "A qualified collection of DataObjects that can be analyzed together in the same experimental context.",
+ "properties": {
+ "alternative_identifiers": {
+ "description": "A list of alternative identifiers for the entity.",
+ "items": {
+ "pattern": "^[a-zA-Z0-9][a-zA-Z0-9_\\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\\-\\/\\.,]*$",
+ "type": "string"
+ },
+ "type": [
+ "array",
+ "null"
+ ]
+ },
+ "description": {
+ "description": "a human-readable description of a thing",
+ "type": [
+ "string",
+ "null"
+ ]
+ },
+ "id": {
+ "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI",
+ "pattern": "^[a-zA-Z0-9][a-zA-Z0-9_\\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\\-\\/\\.,]*$",
+ "type": "string"
+ },
+ "manifest_category": {
+ "$ref": "#/$defs/ManifestCategoryEnum",
+ "description": "The type of context in which the constituent DataObjects can be analyzed together."
+ },
+ "name": {
+ "description": "A human readable label for an entity",
+ "type": [
+ "string",
+ "null"
+ ]
+ },
+ "type": {
+ "description": "the class_uri of the class that has been instantiated",
+ "enum": [
+ "nmdc:Manifest"
+ ],
+ "type": "string"
+ }
+ },
+ "required": [
+ "manifest_category",
+ "id",
+ "type"
+ ],
+ "title": "Manifest",
+ "type": "object"
+ },
+ "ManifestCategoryEnum": {
+ "description": "A list of contexts in which some DataObjects can be analyzed together.",
+ "enum": [
+ "instrument_run",
+ "poolable_replicates",
+ "fractions"
+ ],
+ "title": "ManifestCategoryEnum",
+ "type": "string"
+ },
"MassAnalyzerEnum": {
"description": "",
"enum": [
@@ -9652,19 +9779,11 @@
]
},
"has_calibration": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
"description": "a calibration instance associated with a process",
- "type": "string"
+ "type": [
+ "string",
+ "null"
+ ]
},
"has_chromatography_configuration": {
"description": "The identifier of the associated ChromatographyConfiguration, providing information about how a sample was introduced into the mass spectrometer.",
@@ -10055,21 +10174,6 @@
"description": "The url that points to the exact github location of a workflow.",
"type": "string"
},
- "has_calibration": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "a calibration instance associated with a process",
- "type": "string"
- },
"has_failure_categorization": {
"items": {
"$ref": "#/$defs/FailureCategorization"
@@ -11080,15 +11184,6 @@
"null"
]
},
- "has_peptide_quantifications": {
- "items": {
- "$ref": "#/$defs/PeptideQuantification"
- },
- "type": [
- "array",
- "null"
- ]
- },
"id": {
"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI",
"pattern": "^[a-zA-Z0-9][a-zA-Z0-9_\\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\\-\\/\\.,]*$",
@@ -12246,21 +12341,6 @@
"description": "The url that points to the exact github location of a workflow.",
"type": "string"
},
- "has_calibration": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "a calibration instance associated with a process",
- "type": "string"
- },
"has_failure_categorization": {
"items": {
"$ref": "#/$defs/FailureCategorization"
@@ -13263,7 +13343,8 @@
"EMSL",
"Battelle",
"ANL",
- "UCD_Genome_Center"
+ "UCD_Genome_Center",
+ "Azenta"
],
"title": "ProcessingInstitutionEnum",
"type": "string"
@@ -13285,7 +13366,7 @@
"description": "This is used to link a metaproteomics analysis workflow to a specific protein",
"properties": {
"all_proteins": {
- "description": "the grouped list of protein identifiers associated with the peptide sequences that were grouped to a best protein",
+ "description": "the list of protein identifiers that are associated with the peptide sequence",
"items": {
"type": "string"
},
@@ -13295,7 +13376,7 @@
]
},
"best_protein": {
- "description": "the specific protein identifier most correctly grouped to its associated peptide sequences",
+ "description": "the specific protein identifier most correctly associated with the peptide sequence",
"type": [
"string",
"null"
@@ -15626,6 +15707,16 @@
"null"
]
},
+ "manifest_set": {
+ "description": "This property links a database object to the set of manifests within it.",
+ "items": {
+ "$ref": "#/$defs/Manifest"
+ },
+ "type": [
+ "array",
+ "null"
+ ]
+ },
"material_processing_set": {
"description": "This property links a database object to the set of material processing within it.",
"items": {
diff --git a/nmdc_schema/nmdc_materialized_patterns.schema.json b/nmdc_schema/nmdc_materialized_patterns.schema.json
index 1d2f9ed942..13ac19cc4a 100644
--- a/nmdc_schema/nmdc_materialized_patterns.schema.json
+++ b/nmdc_schema/nmdc_materialized_patterns.schema.json
@@ -37,6 +37,16 @@
"title": "ArchStrucEnum",
"type": "string"
},
+ "BinQualityEnum": {
+ "description": "",
+ "enum": [
+ "HQ",
+ "MQ",
+ "LQ"
+ ],
+ "title": "BinQualityEnum",
+ "type": "string"
+ },
"BiolStatEnum": {
"description": "",
"enum": [
@@ -6627,6 +6637,16 @@
"pattern": "^(nmdc):dobj-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$",
"type": "string"
},
+ "in_manifest": {
+ "description": "one or more combinations of other DataObjects that can be analyzed together",
+ "items": {
+ "type": "string"
+ },
+ "type": [
+ "array",
+ "null"
+ ]
+ },
"insdc_experiment_identifiers": {
"items": {
"pattern": "^insdc.sra:(E|D|S)RX[0-9]{6,}$",
@@ -6821,6 +6841,16 @@
"null"
]
},
+ "manifest_set": {
+ "description": "This property links a database object to the set of manifests within it.",
+ "items": {
+ "$ref": "#/$defs/Manifest"
+ },
+ "type": [
+ "array",
+ "null"
+ ]
+ },
"material_processing_set": {
"description": "This property links a database object to the set of material processing within it.",
"items": {
@@ -6973,15 +7003,6 @@
"title": "DeposEnvEnum",
"type": "string"
},
- "DirectInfusionEnum": {
- "description": "",
- "enum": [
- "direct_infusion",
- "autosampler"
- ],
- "title": "DirectInfusionEnum",
- "type": "string"
- },
"DissolvingProcess": {
"additionalProperties": false,
"description": "A mixing step where a soluble component is mixed with a liquid component.",
@@ -7231,7 +7252,8 @@
"massive",
"gsc",
"zenodo",
- "edi"
+ "edi",
+ "figshare"
],
"title": "DoiProviderEnum",
"type": "string"
@@ -7442,12 +7464,16 @@
"description": "This class contains information pertaining to evaluating if a Metagenome-Assembled Genome (MAG) is eukaryotic.",
"properties": {
"completeness": {
+ "description": "Estimate of the completeness of the metagenome-assembled genome, estimated by a tool like CheckM.",
+ "minimum": 0,
"type": [
"number",
"null"
]
},
"contamination": {
+ "description": "Estimate of the completeness of the metagenome-assembled genome, estimated by a tool like CheckM.",
+ "minimum": 0,
"type": [
"number",
"null"
@@ -7937,6 +7963,9 @@
"FileTypeEnum": {
"description": "",
"enum": [
+ "Virus Summary",
+ "Plasmid Summary",
+ "GeNomad Aggregated Classification",
"Reference Calibration File",
"Metagenome Raw Reads",
"Metagenome Raw Read 1",
@@ -8386,13 +8415,21 @@
"description": "",
"properties": {
"count": {
+ "description": "The number of sequences (for a metagenome or metatranscriptome) or spectra (for metaproteomics) associated with the specified function.",
"type": "integer"
},
"gene_function_id": {
"description": "The identifier for the gene function.",
"type": "string"
},
- "metagenome_annotation_id": {
+ "type": {
+ "description": "the class_uri of the class that has been instantiated",
+ "enum": [
+ "nmdc:FunctionalAnnotationAggMember"
+ ],
+ "type": "string"
+ },
+ "was_generated_by": {
"anyOf": [
{
"type": "string"
@@ -8401,20 +8438,12 @@
"type": "string"
}
],
- "description": "The identifier for the analysis activity that generated the functional annotation results, where the analysis activity is an instance of the/an appropriate subclass of WorkflowExecution",
- "pattern": "^(nmdc):(wfmgan|wfmtan)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[0-9]{1,})$",
- "type": "string"
- },
- "type": {
- "description": "the class_uri of the class that has been instantiated",
- "enum": [
- "nmdc:FunctionalAnnotationAggMember"
- ],
+ "pattern": "^(nmdc):(wfmgan|wfmp|wfmtan)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[0-9]{1,})$",
"type": "string"
}
},
"required": [
- "metagenome_annotation_id",
+ "was_generated_by",
"gene_function_id",
"count",
"type"
@@ -9178,24 +9207,24 @@
"description": "",
"properties": {
"bin_name": {
- "type": [
- "string",
- "null"
- ]
+ "description": "Name of the metagenome-assembled genome.",
+ "type": "string"
},
"bin_quality": {
- "type": [
- "string",
- "null"
- ]
+ "$ref": "#/$defs/BinQualityEnum",
+ "description": "The quality of the metagenome-assembled genome based on MIMAG standards (https://doi.org/10.1038/nbt.3893)."
},
"completeness": {
+ "description": "Estimate of the completeness of the metagenome-assembled genome, estimated by a tool like CheckM.",
+ "minimum": 0,
"type": [
"number",
"null"
]
},
"contamination": {
+ "description": "Estimate of the completeness of the metagenome-assembled genome, estimated by a tool like CheckM.",
+ "minimum": 0,
"type": [
"number",
"null"
@@ -9213,54 +9242,64 @@
"description": "Contains results from evaluating if a Metagenome-Assembled Genome is of eukaryotic lineage."
},
"gene_count": {
+ "description": "Number of genes.",
+ "minimum": 0,
"type": [
"integer",
"null"
]
},
"gtdbtk_class": {
+ "description": "Taxonomic class assigned by GTDB-Tk.",
"type": [
"string",
"null"
]
},
"gtdbtk_domain": {
+ "description": "Taxonomic domain assigned by GTDB-Tk.",
"type": [
"string",
"null"
]
},
"gtdbtk_family": {
+ "description": "Taxonomic family assigned by GTDB-Tk.",
"type": [
"string",
"null"
]
},
"gtdbtk_genus": {
+ "description": "Taxonomic genus assigned by GTDB-Tk.",
"type": [
"string",
"null"
]
},
"gtdbtk_order": {
+ "description": "Taxonomic order assigned by GTDB-Tk.",
"type": [
"string",
"null"
]
},
"gtdbtk_phylum": {
+ "description": "Taxonomic phylum assigned by GTDB-Tk.",
"type": [
"string",
"null"
]
},
"gtdbtk_species": {
+ "description": "Taxonomic genus assigned by GTDB-Tk.",
"type": [
"string",
"null"
]
},
"members_id": {
+ "description": "Names of the contigs that make up a metagenome-assembled genome.",
"items": {
"type": "string"
},
@@ -9270,36 +9309,48 @@
]
},
"num_16s": {
+ "description": "Number of 16s sequences detected, a subunit of prokaryotic ribosomes.",
+ "minimum": 0,
"type": [
"integer",
"null"
]
},
"num_23s": {
+ "description": "Number of 23 seqeuences detected, a subunit of ribosomes.",
+ "minimum": 0,
"type": [
"integer",
"null"
]
},
"num_5s": {
+ "description": "Number of 5s seqeuences detected, a subunit of ribosomes.",
+ "minimum": 0,
"type": [
"integer",
"null"
]
},
"num_t_rna": {
+ "description": "Number of transfer transfer RNAs.",
+ "minimum": 0,
"type": [
"integer",
"null"
]
},
"number_of_contig": {
+ "description": "Number of contigs",
+ "minimum": 0,
"type": [
"integer",
"null"
]
},
"total_bases": {
+ "description": "Total number of basepairs.",
+ "minimum": 0,
"type": [
"integer",
"null"
@@ -9314,6 +9365,7 @@
}
},
"required": [
+ "bin_name",
"type"
],
"title": "MagBin",
@@ -9377,6 +9429,8 @@
]
},
"binned_contig_num": {
+ "description": "Number of contigs that ended up in a medium or high quality bin.",
+ "minimum": 0,
"type": [
"integer",
"null"
@@ -9457,18 +9511,23 @@
]
},
"input_contig_num": {
+ "description": "Total number of input contigs.",
+ "minimum": 0,
"type": [
"integer",
"null"
]
},
"low_depth_contig_num": {
+ "description": "Number of contigs which were excluded from binning for depth of coverage.",
+ "minimum": 0,
"type": [
"integer",
"null"
]
},
"mags_list": {
+ "description": "Contains detailed information about each metagenome-assembled genome.",
"items": {
"$ref": "#/$defs/MagBin"
},
@@ -9521,6 +9580,8 @@
"type": "string"
},
"too_short_contig_num": {
+ "description": "Number of contigs which were excluded from binning for length.",
+ "minimum": 0,
"type": [
"integer",
"null"
@@ -9534,6 +9595,8 @@
"type": "string"
},
"unbinned_contig_num": {
+ "description": "Number of contigs which did not end up in a medium or high quality bin.",
+ "minimum": 0,
"type": [
"integer",
"null"
@@ -9562,6 +9625,70 @@
"title": "MagsAnalysis",
"type": "object"
},
+ "Manifest": {
+ "additionalProperties": false,
+ "description": "A qualified collection of DataObjects that can be analyzed together in the same experimental context.",
+ "properties": {
+ "alternative_identifiers": {
+ "description": "A list of alternative identifiers for the entity.",
+ "items": {
+ "pattern": "^[a-zA-Z0-9][a-zA-Z0-9_\\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\\-\\/\\.,]*$",
+ "type": "string"
+ },
+ "type": [
+ "array",
+ "null"
+ ]
+ },
+ "description": {
+ "description": "a human-readable description of a thing",
+ "type": [
+ "string",
+ "null"
+ ]
+ },
+ "id": {
+ "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI",
+ "pattern": "^(nmdc):manif-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$",
+ "type": "string"
+ },
+ "manifest_category": {
+ "$ref": "#/$defs/ManifestCategoryEnum",
+ "description": "The type of context in which the constituent DataObjects can be analyzed together."
+ },
+ "name": {
+ "description": "A human readable label for an entity",
+ "type": [
+ "string",
+ "null"
+ ]
+ },
+ "type": {
+ "description": "the class_uri of the class that has been instantiated",
+ "enum": [
+ "nmdc:Manifest"
+ ],
+ "type": "string"
+ }
+ },
+ "required": [
+ "manifest_category",
+ "id",
+ "type"
+ ],
+ "title": "Manifest",
+ "type": "object"
+ },
+ "ManifestCategoryEnum": {
+ "description": "A list of contexts in which some DataObjects can be analyzed together.",
+ "enum": [
+ "instrument_run",
+ "poolable_replicates",
+ "fractions"
+ ],
+ "title": "ManifestCategoryEnum",
+ "type": "string"
+ },
"MassAnalyzerEnum": {
"description": "",
"enum": [
@@ -9676,20 +9803,12 @@
]
},
"has_calibration": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
"description": "a calibration instance associated with a process",
"pattern": "^(nmdc):calib-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$",
- "type": "string"
+ "type": [
+ "string",
+ "null"
+ ]
},
"has_chromatography_configuration": {
"description": "The identifier of the associated ChromatographyConfiguration, providing information about how a sample was introduced into the mass spectrometer.",
@@ -10084,21 +10203,6 @@
"description": "The url that points to the exact github location of a workflow.",
"type": "string"
},
- "has_calibration": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "a calibration instance associated with a process",
- "type": "string"
- },
"has_failure_categorization": {
"items": {
"$ref": "#/$defs/FailureCategorization"
@@ -11123,15 +11227,6 @@
"null"
]
},
- "has_peptide_quantifications": {
- "items": {
- "$ref": "#/$defs/PeptideQuantification"
- },
- "type": [
- "array",
- "null"
- ]
- },
"id": {
"description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI",
"pattern": "^(nmdc):wfmp-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[0-9]{1,})$",
@@ -12301,21 +12396,6 @@
"description": "The url that points to the exact github location of a workflow.",
"type": "string"
},
- "has_calibration": {
- "anyOf": [
- {
- "type": "string"
- },
- {
- "type": "string"
- },
- {
- "type": "null"
- }
- ],
- "description": "a calibration instance associated with a process",
- "type": "string"
- },
"has_failure_categorization": {
"items": {
"$ref": "#/$defs/FailureCategorization"
@@ -13326,7 +13406,8 @@
"EMSL",
"Battelle",
"ANL",
- "UCD_Genome_Center"
+ "UCD_Genome_Center",
+ "Azenta"
],
"title": "ProcessingInstitutionEnum",
"type": "string"
@@ -13348,7 +13429,7 @@
"description": "This is used to link a metaproteomics analysis workflow to a specific protein",
"properties": {
"all_proteins": {
- "description": "the grouped list of protein identifiers associated with the peptide sequences that were grouped to a best protein",
+ "description": "the list of protein identifiers that are associated with the peptide sequence",
"items": {
"type": "string"
},
@@ -13358,7 +13439,7 @@
]
},
"best_protein": {
- "description": "the specific protein identifier most correctly grouped to its associated peptide sequences",
+ "description": "the specific protein identifier most correctly associated with the peptide sequence",
"type": [
"string",
"null"
@@ -15703,6 +15784,16 @@
"null"
]
},
+ "manifest_set": {
+ "description": "This property links a database object to the set of manifests within it.",
+ "items": {
+ "$ref": "#/$defs/Manifest"
+ },
+ "type": [
+ "array",
+ "null"
+ ]
+ },
"material_processing_set": {
"description": "This property links a database object to the set of material processing within it.",
"items": {
diff --git a/nmdc_schema/nmdc_materialized_patterns.yaml b/nmdc_schema/nmdc_materialized_patterns.yaml
index 1b672cceec..c1f85dd9db 100644
--- a/nmdc_schema/nmdc_materialized_patterns.yaml
+++ b/nmdc_schema/nmdc_materialized_patterns.yaml
@@ -179,6 +179,9 @@ prefixes:
kegg:
prefix_prefix: kegg
prefix_reference: 'https://bioregistry.io/kegg:'
+ KEGG.MODULE:
+ prefix_prefix: KEGG.MODULE
+ prefix_reference: 'https://bioregistry.io/kegg.module:'
linkml:
prefix_prefix: linkml
prefix_reference: https://w3id.org/linkml/
@@ -621,6 +624,36 @@ types:
base: str
uri: xsd:string
enums:
+ CalibrationTargetEnum:
+ name: CalibrationTargetEnum
+ from_schema: https://w3id.org/nmdc/nmdc
+ permissible_values:
+ mass_charge_ratio:
+ text: mass_charge_ratio
+ title: m/z
+ aliases:
+ - Mass
+ - m/z
+ retention_time:
+ text: retention_time
+ aliases:
+ - RT
+ retention_index:
+ text: retention_index
+ aliases:
+ - RI
+ CalibrationStandardEnum:
+ name: CalibrationStandardEnum
+ from_schema: https://w3id.org/nmdc/nmdc
+ permissible_values:
+ fames:
+ text: fames
+ aliases:
+ - FAMES
+ alkanes:
+ text: alkanes
+ aliases:
+ - Alkanes
StrandedOrientationEnum:
name: StrandedOrientationEnum
description: This enumeration specifies information about stranded RNA library
@@ -955,6 +988,28 @@ enums:
text: aqueous_layer
non_polar_layer:
text: non_polar_layer
+ BinQualityEnum:
+ name: BinQualityEnum
+ from_schema: https://w3id.org/nmdc/nmdc
+ permissible_values:
+ HQ:
+ text: HQ
+ description: Metagenome-assembled genome is high quality based on MIMAG standards
+ (https://doi.org/10.1038/nbt.3893)
+ aliases:
+ - high quality
+ MQ:
+ text: MQ
+ description: Metagenome-assembled genome is medium quality based on MIMAG
+ standards (https://doi.org/10.1038/nbt.3893)
+ aliases:
+ - medium quality
+ LQ:
+ text: LQ
+ description: Metagenome-assembled genome is low quality based on MIMAG standards
+ (https://doi.org/10.1038/nbt.3893)
+ aliases:
+ - low quality
ChemicalConversionCategoryEnum:
name: ChemicalConversionCategoryEnum
from_schema: https://w3id.org/nmdc/nmdc
@@ -3189,6 +3244,22 @@ enums:
experiment.
exact_mappings:
- NCIT:C61538
+ ManifestCategoryEnum:
+ name: ManifestCategoryEnum
+ description: A list of contexts in which some DataObjects can be analyzed together.
+ from_schema: https://w3id.org/nmdc/nmdc
+ permissible_values:
+ instrument_run:
+ text: instrument_run
+ description: A collection of data objects from a single run of an instrument.
+ poolable_replicates:
+ text: poolable_replicates
+ description: A collection of data objects that can be pooled for downstream
+ analyses.
+ fractions:
+ text: fractions
+ description: A collection of data objects that represent fractions of a single
+ sample.
InstrumentModelEnum:
name: InstrumentModelEnum
from_schema: https://w3id.org/nmdc/nmdc
@@ -3604,6 +3675,35 @@ enums:
name: FileTypeEnum
from_schema: https://w3id.org/nmdc/nmdc
permissible_values:
+ Virus Summary:
+ text: Virus Summary
+ description: Tab separated file listing the viruses found by geNomad.
+ annotations:
+ file_name_pattern:
+ tag: file_name_pattern
+ value: ^_virus_summary\.tsv?$
+ see_also:
+ - https://portal.nersc.gov/genomad/
+ Plasmid Summary:
+ text: Plasmid Summary
+ description: Tab separated file listing the plasmids found be geNomad.
+ annotations:
+ file_name_pattern:
+ tag: file_name_pattern
+ value: ^_plasmid_summary\.tsv?$
+ see_also:
+ - https://portal.nersc.gov/genomad/
+ GeNomad Aggregated Classification:
+ text: GeNomad Aggregated Classification
+ description: Tab separated file which combines the results from neural network-based
+ classification and marker-based classification for virus and plasmid detection
+ with geNomad.
+ annotations:
+ file_name_pattern:
+ tag: file_name_pattern
+ value: ^_aggregated_classification\.tsv?$
+ see_also:
+ - https://portal.nersc.gov/genomad/
Reference Calibration File:
text: Reference Calibration File
description: A file that contains data used to calibrate a natural organic
@@ -4150,6 +4250,10 @@ enums:
aliases:
- EDI
- Environmental Data Initiative
+ figshare:
+ text: figshare
+ meaning: https://ror.org/041mxqs23
+ title: Figshare
DoiCategoryEnum:
name: DoiCategoryEnum
comments:
@@ -4206,16 +4310,6 @@ enums:
text: nom
aliases:
- natural organic matter
- DirectInfusionEnum:
- name: DirectInfusionEnum
- from_schema: https://w3id.org/nmdc/nmdc
- permissible_values:
- direct_infusion:
- text: direct_infusion
- aliases:
- - DI
- autosampler:
- text: autosampler
ExtractionTargetEnum:
name: ExtractionTargetEnum
from_schema: https://w3id.org/nmdc/nmdc
@@ -4263,6 +4357,10 @@ enums:
text: UCD_Genome_Center
meaning: https://genomecenter.ucdavis.edu/
title: University of California, Davis Genome Center
+ Azenta:
+ text: Azenta
+ meaning: https://www.azenta.com/
+ title: Azenta Life Sciences
DataCategoryEnum:
name: DataCategoryEnum
from_schema: https://w3id.org/nmdc/nmdc
@@ -4387,37 +4485,33 @@ enums:
text: natural organic matter
bulk chemistry:
text: bulk chemistry
- CalibrationTargetEnum:
- name: CalibrationTargetEnum
+slots:
+ has_calibration:
+ name: has_calibration
+ description: a calibration instance associated with a process
from_schema: https://w3id.org/nmdc/nmdc
- permissible_values:
- mass_charge_ratio:
- text: mass_charge_ratio
- title: m/z
- aliases:
- - Mass
- - m/z
- retention_time:
- text: retention_time
- aliases:
- - RT
- retention_index:
- text: retention_index
- aliases:
- - RI
- CalibrationStandardEnum:
- name: CalibrationStandardEnum
+ range: CalibrationInformation
+ calibration_object:
+ name: calibration_object
+ description: the file containing calibration data object
from_schema: https://w3id.org/nmdc/nmdc
- permissible_values:
- fames:
- text: fames
- aliases:
- - FAMES
- alkanes:
- text: alkanes
- aliases:
- - Alkanes
-slots:
+ range: DataObject
+ internal_calibration:
+ name: internal_calibration
+ description: whether internal calibration was used, if false, external calibration
+ was used
+ from_schema: https://w3id.org/nmdc/nmdc
+ range: boolean
+ calibration_target:
+ name: calibration_target
+ description: the target measurement of the calibration
+ from_schema: https://w3id.org/nmdc/nmdc
+ range: CalibrationTargetEnum
+ calibration_standard:
+ name: calibration_standard
+ description: the reference standard(s) used for calibration
+ from_schema: https://w3id.org/nmdc/nmdc
+ range: CalibrationStandardEnum
polarity_mode:
name: polarity_mode
description: the polarity of which ions are generated and detected
@@ -4514,12 +4608,10 @@ slots:
description: The identifier for the analysis activity that generated the functional
annotation results, where the analysis activity is an instance of the/an appropriate
subclass of WorkflowExecution
+ deprecated: not used. 2024-10 https://github.com/microbiomedata/nmdc-schema/issues/1253
from_schema: https://w3id.org/nmdc/nmdc
range: WorkflowExecution
required: true
- any_of:
- - range: MetagenomeAnnotation
- - range: MetatranscriptomeAnnotation
gene_function_id:
name: gene_function_id
description: The identifier for the gene function.
@@ -4717,6 +4809,14 @@ slots:
mixins:
- object_set
range: Configuration
+ manifest_set:
+ name: manifest_set
+ description: This property links a database object to the set of manifests within
+ it.
+ from_schema: https://w3id.org/nmdc/nmdc
+ mixins:
+ - object_set
+ range: Manifest
protocol_execution_set:
name: protocol_execution_set
description: This property links a database object to the set of protocol executions
@@ -5204,84 +5304,152 @@ slots:
range: QuantityValue
total_bases:
name: total_bases
- todos:
- - this slot needs some basic textual annotations and constraints
+ description: Total number of basepairs.
from_schema: https://w3id.org/nmdc/nmdc
range: integer
+ minimum_value: 0
members_id:
name: members_id
- todos:
- - this slot needs some basic textual annotations and constraints
+ description: Names of the contigs that make up a metagenome-assembled genome.
from_schema: https://w3id.org/nmdc/nmdc
+ close_mappings:
+ - GENEPIO:0100596
range: string
multivalued: true
bin_name:
name: bin_name
+ description: Name of the metagenome-assembled genome.
from_schema: https://w3id.org/nmdc/nmdc
range: string
+ required: true
number_of_contig:
name: number_of_contig
+ description: Number of contigs
from_schema: https://w3id.org/nmdc/nmdc
+ exact_mappings:
+ - GENEPIO:0000093
range: integer
+ minimum_value: 0
completeness:
name: completeness
+ description: Estimate of the completeness of the metagenome-assembled genome,
+ estimated by a tool like CheckM.
from_schema: https://w3id.org/nmdc/nmdc
range: float
+ minimum_value: 0
contamination:
name: contamination
+ description: Estimate of the completeness of the metagenome-assembled genome,
+ estimated by a tool like CheckM.
from_schema: https://w3id.org/nmdc/nmdc
range: float
+ minimum_value: 0
gene_count:
name: gene_count
+ description: Number of genes.
from_schema: https://w3id.org/nmdc/nmdc
range: integer
+ minimum_value: 0
bin_quality:
name: bin_quality
+ description: The quality of the metagenome-assembled genome based on MIMAG standards
+ (https://doi.org/10.1038/nbt.3893).
from_schema: https://w3id.org/nmdc/nmdc
- range: string
+ range: BinQualityEnum
num_16s:
name: num_16s
+ description: Number of 16s sequences detected, a subunit of prokaryotic ribosomes.
from_schema: https://w3id.org/nmdc/nmdc
+ broad_mappings:
+ - NCIT:C105370
range: integer
+ minimum_value: 0
num_5s:
name: num_5s
+ description: Number of 5s seqeuences detected, a subunit of ribosomes.
from_schema: https://w3id.org/nmdc/nmdc
+ broad_mappings:
+ - OMIT:0013248
range: integer
+ minimum_value: 0
num_23s:
name: num_23s
+ description: Number of 23 seqeuences detected, a subunit of ribosomes.
from_schema: https://w3id.org/nmdc/nmdc
+ broad_mappings:
+ - OMIT:0013245
range: integer
+ minimum_value: 0
num_t_rna:
name: num_t_rna
+ description: Number of transfer transfer RNAs.
from_schema: https://w3id.org/nmdc/nmdc
+ broad_mappings:
+ - NCIT:C816
range: integer
+ minimum_value: 0
gtdbtk_domain:
name: gtdbtk_domain
+ description: Taxonomic domain assigned by GTDB-Tk.
from_schema: https://w3id.org/nmdc/nmdc
+ see_also:
+ - https://doi.org/10.1093/bioinformatics/btz848
+ exact_mappings:
+ - NCBITaxon:superkingdom
range: string
gtdbtk_phylum:
name: gtdbtk_phylum
+ description: Taxonomic phylum assigned by GTDB-Tk.
from_schema: https://w3id.org/nmdc/nmdc
+ see_also:
+ - https://doi.org/10.1093/bioinformatics/btz848
+ exact_mappings:
+ - NCBITaxon:phylum
range: string
gtdbtk_class:
name: gtdbtk_class
+ description: Taxonomic class assigned by GTDB-Tk.
from_schema: https://w3id.org/nmdc/nmdc
+ see_also:
+ - https://doi.org/10.1093/bioinformatics/btz848
+ exact_mappings:
+ - NCBITaxon:class
range: string
gtdbtk_order:
name: gtdbtk_order
+ description: Taxonomic order assigned by GTDB-Tk.
from_schema: https://w3id.org/nmdc/nmdc
+ see_also:
+ - https://doi.org/10.1093/bioinformatics/btz848
+ exact_mappings:
+ - NCBITaxon:order
range: string
gtdbtk_family:
name: gtdbtk_family
+ description: Taxonomic family assigned by GTDB-Tk.
from_schema: https://w3id.org/nmdc/nmdc
+ see_also:
+ - https://doi.org/10.1093/bioinformatics/btz848
+ exact_mappings:
+ - NCBITaxon:family
range: string
gtdbtk_genus:
name: gtdbtk_genus
+ description: Taxonomic genus assigned by GTDB-Tk.
from_schema: https://w3id.org/nmdc/nmdc
+ see_also:
+ - https://doi.org/10.1093/bioinformatics/btz848
+ exact_mappings:
+ - NCBITaxon:genus
range: string
gtdbtk_species:
name: gtdbtk_species
+ description: Taxonomic genus assigned by GTDB-Tk.
from_schema: https://w3id.org/nmdc/nmdc
+ see_also:
+ - https://doi.org/10.1093/bioinformatics/btz848
+ exact_mappings:
+ - NCBITaxon:species
range: string
highest_similarity_score:
name: highest_similarity_score
@@ -5298,6 +5466,7 @@ slots:
name: all_proteins
description: the list of protein identifiers that are associated with the peptide
sequence
+ deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250
from_schema: https://w3id.org/nmdc/nmdc
range: GeneProduct
multivalued: true
@@ -5305,24 +5474,28 @@ slots:
name: best_protein
description: the specific protein identifier most correctly associated with the
peptide sequence
+ deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250
from_schema: https://w3id.org/nmdc/nmdc
range: GeneProduct
min_q_value:
name: min_q_value
description: smallest Q-Value associated with the peptide sequence as provided
by MSGFPlus tool
+ deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250
from_schema: https://w3id.org/nmdc/nmdc
see_also:
- OBI:0001442
range: float
peptide_sequence:
name: peptide_sequence
+ deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250
from_schema: https://w3id.org/nmdc/nmdc
range: string
peptide_spectral_count:
name: peptide_spectral_count
description: sum of filter passing MS2 spectra associated with the peptide sequence
within a given LC-MS/MS data file
+ deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250
from_schema: https://w3id.org/nmdc/nmdc
range: integer
peptide_sum_masic_abundance:
@@ -5330,6 +5503,7 @@ slots:
description: combined MS1 extracted ion chromatograms derived from MS2 spectra
associated with the peptide sequence from a given LC-MS/MS data file using the
MASIC tool
+ deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250
from_schema: https://w3id.org/nmdc/nmdc
range: integer
chemical_formula:
@@ -5352,12 +5526,14 @@ slots:
peptide_sequence_count:
name: peptide_sequence_count
description: count of peptide sequences grouped to the best_protein
+ deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250
from_schema: https://w3id.org/nmdc/nmdc
range: integer
protein_spectral_count:
name: protein_spectral_count
description: sum of filter passing MS2 spectra associated with the best protein
within a given LC-MS/MS data file
+ deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250
from_schema: https://w3id.org/nmdc/nmdc
range: integer
protein_sum_masic_abundance:
@@ -5365,6 +5541,7 @@ slots:
description: combined MS1 extracted ion chromatograms derived from MS2 spectra
associated with the best protein from a given LC-MS/MS data file using the MASIC
tool
+ deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250
from_schema: https://w3id.org/nmdc/nmdc
range: integer
smiles:
@@ -17113,6 +17290,24 @@ slots:
from_schema: https://w3id.org/nmdc/nmdc
range: Instrument
multivalued: true
+ in_manifest:
+ name: in_manifest
+ description: one or more combinations of other DataObjects that can be analyzed
+ together
+ comments:
+ - A DataObject can be part of multiple manifests, for example, a DataObject could
+ be part of a manifest for a single run of an instrument and a manifest for technical
+ replicates of a single sample.
+ from_schema: https://w3id.org/nmdc/nmdc
+ range: Manifest
+ multivalued: true
+ manifest_category:
+ name: manifest_category
+ description: The type of context in which the constituent DataObjects can be analyzed
+ together.
+ from_schema: https://w3id.org/nmdc/nmdc
+ range: ManifestCategoryEnum
+ required: true
model:
name: model
from_schema: https://w3id.org/nmdc/nmdc
@@ -17600,12 +17795,6 @@ slots:
from_schema: https://w3id.org/nmdc/nmdc
range: AnalyteCategoryEnum
required: true
- direct_infusion_category:
- name: direct_infusion_category
- description: used when a processed sample is introduced into a mass spectrometer
- without chromatographic separation
- from_schema: https://w3id.org/nmdc/nmdc
- range: DirectInfusionEnum
type:
name: type
description: the class_uri of the class that has been instantiated
@@ -18635,6 +18824,7 @@ slots:
abstract: true
has_peptide_quantifications:
name: has_peptide_quantifications
+ deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250
from_schema: https://w3id.org/nmdc/nmdc
range: PeptideQuantification
multivalued: true
@@ -18825,30 +19015,42 @@ slots:
abstract: true
mags_list:
name: mags_list
+ description: Contains detailed information about each metagenome-assembled genome.
from_schema: https://w3id.org/nmdc/nmdc
range: MagBin
multivalued: true
inlined_as_list: true
too_short_contig_num:
name: too_short_contig_num
+ description: Number of contigs which were excluded from binning for length.
from_schema: https://w3id.org/nmdc/nmdc
range: integer
+ minimum_value: 0
binned_contig_num:
name: binned_contig_num
+ description: Number of contigs that ended up in a medium or high quality bin.
from_schema: https://w3id.org/nmdc/nmdc
range: integer
+ minimum_value: 0
input_contig_num:
name: input_contig_num
+ description: Total number of input contigs.
from_schema: https://w3id.org/nmdc/nmdc
range: integer
+ minimum_value: 0
unbinned_contig_num:
name: unbinned_contig_num
+ description: Number of contigs which did not end up in a medium or high quality
+ bin.
from_schema: https://w3id.org/nmdc/nmdc
range: integer
+ minimum_value: 0
low_depth_contig_num:
name: low_depth_contig_num
+ description: Number of contigs which were excluded from binning for depth of coverage.
from_schema: https://w3id.org/nmdc/nmdc
range: integer
+ minimum_value: 0
input_read_count:
name: input_read_count
description: The sequence count number of input reads for QC analysis.
@@ -18883,40 +19085,6 @@ slots:
description: 'TODO '
from_schema: https://w3id.org/nmdc/nmdc
range: float
- has_calibration:
- name: has_calibration
- description: a calibration instance associated with a process
- notes:
- - 'has_calibration slot will be removed from all WorkflowExecution classes but
- remain on the MassSpectrometry class after an ingest of the appropriate set
- has occurred. Once this has occurred, this slot''s range can be updated to
- CalibrationInformation and class/slot definitions can move to nmdc.yaml. See
- PR #29 in Berkeley schema.'
- from_schema: https://w3id.org/nmdc/nmdc
- any_of:
- - range: CalibrationInformation
- - range: string
- calibration_object:
- name: calibration_object
- description: the file containing calibration data object
- from_schema: https://w3id.org/nmdc/nmdc
- range: DataObject
- internal_calibration:
- name: internal_calibration
- description: whether internal calibration was used, if false, external calibration
- was used
- from_schema: https://w3id.org/nmdc/nmdc
- range: boolean
- calibration_target:
- name: calibration_target
- description: the target measurement of the calibration
- from_schema: https://w3id.org/nmdc/nmdc
- range: CalibrationTargetEnum
- calibration_standard:
- name: calibration_standard
- description: the reference standard(s) used for calibration
- from_schema: https://w3id.org/nmdc/nmdc
- range: CalibrationStandardEnum
has_metabolite_identifications:
name: has_metabolite_identifications
from_schema: https://w3id.org/nmdc/nmdc
@@ -19099,21 +19267,96 @@ classes:
syntax: '{id_nmdc_prefix}:chrcon-{id_shoulder}-{id_blade}$'
interpolated: true
class_uri: nmdc:ChromatographyConfiguration
+ Manifest:
+ name: Manifest
+ description: A qualified collection of DataObjects that can be analyzed together
+ in the same experimental context.
+ comments:
+ - Manifest are currently uncoupled from other modelling. For example, there is
+ no schema requirement that DataObjects in a fractions Manifest were all obtained
+ by analyzing the same ProcessedSample.
+ from_schema: https://w3id.org/nmdc/nmdc
+ is_a: InformationObject
+ slots:
+ - manifest_category
+ slot_usage:
+ id:
+ name: id
+ pattern: ^(nmdc):manif-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$
+ structured_pattern:
+ syntax: '{id_nmdc_prefix}:manif-{id_shoulder}-{id_blade}$'
+ class_uri: nmdc:Manifest
+ CalibrationInformation:
+ name: CalibrationInformation
+ description: A calibration object that is associated with a process.
+ from_schema: https://w3id.org/nmdc/nmdc
+ is_a: InformationObject
+ slots:
+ - calibration_object
+ - internal_calibration
+ - calibration_target
+ - calibration_standard
+ slot_usage:
+ internal_calibration:
+ name: internal_calibration
+ required: true
+ calibration_target:
+ name: calibration_target
+ required: true
+ id:
+ name: id
+ pattern: ^(nmdc):calib-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$
+ structured_pattern:
+ syntax: '{id_nmdc_prefix}:calib-{id_shoulder}-{id_blade}$'
+ interpolated: true
+ class_uri: nmdc:CalibrationInformation
+ rules:
+ - preconditions:
+ slot_conditions:
+ calibration_target:
+ name: calibration_target
+ equals_string: retention_index
+ postconditions:
+ slot_conditions:
+ calibration_standard:
+ name: calibration_standard
+ required: true
+ description: If the calibration_target is retention_index, a calibration_standard
+ is required.
+ title: calibration_standard_if_rt
+ - preconditions:
+ slot_conditions:
+ internal_calibration:
+ name: internal_calibration
+ id_prefixes:
+ - 'False'
+ postconditions:
+ slot_conditions:
+ calibration_object:
+ name: calibration_object
+ required: true
+ description: If internal_calibration is false, a calibration_object is required.
+ title: calibration_object_if_not_internal_calibration
FunctionalAnnotationAggMember:
name: FunctionalAnnotationAggMember
from_schema: https://w3id.org/nmdc/nmdc
slots:
- - metagenome_annotation_id
+ - was_generated_by
- gene_function_id
- count
- type
slot_usage:
- metagenome_annotation_id:
- name: metagenome_annotation_id
- pattern: ^(nmdc):(wfmgan|wfmtan)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\.[0-9]{1,})$
+ was_generated_by:
+ name: was_generated_by
+ required: true
+ pattern: ^(nmdc):(wfmgan|wfmp|wfmtan)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\.[0-9]{1,})$
structured_pattern:
- syntax: '{id_nmdc_prefix}:(wfmgan|wfmtan)-{id_shoulder}-{id_blade}{id_version}$'
+ syntax: '{id_nmdc_prefix}:(wfmgan|wfmp|wfmtan)-{id_shoulder}-{id_blade}{id_version}$'
interpolated: true
+ count:
+ name: count
+ description: The number of sequences (for a metagenome or metatranscriptome)
+ or spectra (for metaproteomics) associated with the specified function.
class_uri: nmdc:FunctionalAnnotationAggMember
Database:
name: Database
@@ -19138,6 +19381,7 @@ classes:
- functional_annotation_set
- genome_feature_set
- instrument_set
+ - manifest_set
- material_processing_set
- processed_sample_set
- protocol_execution_set
@@ -20886,6 +21130,7 @@ classes:
name: PeptideQuantification
description: This is used to link a metaproteomics analysis workflow to a specific
peptide sequence and related information
+ deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250
from_schema: https://w3id.org/nmdc/nmdc
slots:
- type
@@ -20900,6 +21145,7 @@ classes:
name: ProteinQuantification
description: This is used to link a metaproteomics analysis workflow to a specific
protein
+ deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250
from_schema: https://w3id.org/nmdc/nmdc
slots:
- all_proteins
@@ -20908,15 +21154,6 @@ classes:
- protein_spectral_count
- protein_sum_masic_abundance
- type
- slot_usage:
- best_protein:
- name: best_protein
- description: the specific protein identifier most correctly grouped to its
- associated peptide sequences
- all_proteins:
- name: all_proteins
- description: the grouped list of protein identifiers associated with the peptide
- sequences that were grouped to a best protein
class_uri: nmdc:ProteinQuantification
ChemicalEntity:
name: ChemicalEntity
@@ -21575,6 +21812,7 @@ classes:
- md5_checksum
- url
- was_generated_by
+ - in_manifest
slot_usage:
name:
name: name
@@ -22036,7 +22274,6 @@ classes:
from_schema: https://w3id.org/nmdc/nmdc
is_a: WorkflowExecution
slots:
- - has_calibration
- has_metabolite_identifications
slot_usage:
id:
@@ -22059,8 +22296,6 @@ classes:
- workflow subset
from_schema: https://w3id.org/nmdc/nmdc
is_a: WorkflowExecution
- slots:
- - has_peptide_quantifications
slot_usage:
id:
name: id
@@ -22082,8 +22317,6 @@ classes:
- workflow subset
from_schema: https://w3id.org/nmdc/nmdc
is_a: WorkflowExecution
- slots:
- - has_calibration
slot_usage:
id:
name: id
@@ -22099,57 +22332,6 @@ classes:
syntax: '{id_nmdc_prefix}:(omprc|dgms)-{id_shoulder}-{id_blade}$'
interpolated: true
class_uri: nmdc:NomAnalysis
- CalibrationInformation:
- name: CalibrationInformation
- description: A calibration object that is associated with a process.
- from_schema: https://w3id.org/nmdc/nmdc
- is_a: InformationObject
- slots:
- - calibration_object
- - internal_calibration
- - calibration_target
- - calibration_standard
- slot_usage:
- internal_calibration:
- name: internal_calibration
- required: true
- calibration_target:
- name: calibration_target
- required: true
- id:
- name: id
- pattern: ^(nmdc):calib-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$
- structured_pattern:
- syntax: '{id_nmdc_prefix}:calib-{id_shoulder}-{id_blade}$'
- interpolated: true
- class_uri: nmdc:CalibrationInformation
- rules:
- - preconditions:
- slot_conditions:
- calibration_target:
- name: calibration_target
- equals_string: retention_index
- postconditions:
- slot_conditions:
- calibration_standard:
- name: calibration_standard
- required: true
- description: If the calibration_target is retention_index, a calibration_standard
- is required.
- title: calibration_standard_if_rt
- - preconditions:
- slot_conditions:
- internal_calibration:
- name: internal_calibration
- id_prefixes:
- - 'False'
- postconditions:
- slot_conditions:
- calibration_object:
- name: calibration_object
- required: true
- description: If internal_calibration is false, a calibration_object is required.
- title: calibration_object_if_not_internal_calibration
source_file: src/schema/nmdc.yaml
settings:
id_nmdc_prefix: