diff --git a/README.md b/README.md index 558e885c8b..6ab72d206a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -

- -

+
+ Long NMDC logo +
# National Microbiome Data Collaborative Schema diff --git a/nmdc_schema/nmdc.py b/nmdc_schema/nmdc.py index 2b3ebe2655..cf0df9699b 100644 --- a/nmdc_schema/nmdc.py +++ b/nmdc_schema/nmdc.py @@ -1,5 +1,5 @@ # Auto generated from nmdc.yaml by pythongen.py version: 0.0.1 -# Generation date: 2024-10-17T14:33:36 +# Generation date: 2024-11-13T01:04:03 # Schema: NMDC # # id: https://w3id.org/nmdc/nmdc @@ -51,10 +51,12 @@ ENVO = CurieNamespace('ENVO', 'http://purl.obolibrary.org/obo/ENVO_') FBCV = CurieNamespace('FBcv', 'http://purl.obolibrary.org/obo/FBcv_') FMA = CurieNamespace('FMA', 'http://purl.obolibrary.org/obo/FMA_') +GENEPIO = CurieNamespace('GENEPIO', 'http://example.org/UNKNOWN/GENEPIO/') GO = CurieNamespace('GO', 'http://purl.obolibrary.org/obo/GO_') HMDB = CurieNamespace('HMDB', 'https://bioregistry.io/hmdb:') ISA = CurieNamespace('ISA', 'http://example.org/isa/') KEGG_COMPOUND = CurieNamespace('KEGG_COMPOUND', 'https://bioregistry.io/kegg.compound:') +KEGG_MODULE = CurieNamespace('KEGG_MODULE', 'https://bioregistry.io/kegg.module:') KEGG_ORTHOLOGY = CurieNamespace('KEGG_ORTHOLOGY', 'https://bioregistry.io/kegg.orthology:') KEGG_PATHWAY = CurieNamespace('KEGG_PATHWAY', 'https://bioregistry.io/kegg.pathway:') MASSIVE = CurieNamespace('MASSIVE', 'https://bioregistry.io/reference/massive:') @@ -67,6 +69,7 @@ NCBITAXON = CurieNamespace('NCBITaxon', 'http://purl.obolibrary.org/obo/NCBITaxon_') NCIT = CurieNamespace('NCIT', 'http://purl.obolibrary.org/obo/NCIT_') OBI = CurieNamespace('OBI', 'http://purl.obolibrary.org/obo/OBI_') +OMIT = CurieNamespace('OMIT', 'http://example.org/UNKNOWN/OMIT/') ORCID = CurieNamespace('ORCID', 'https://orcid.org/') PANTHER_FAMILY = CurieNamespace('PANTHER_FAMILY', 'https://bioregistry.io/panther.family:') PATO = CurieNamespace('PATO', 'http://purl.obolibrary.org/obo/PATO_') @@ -295,6 +298,10 @@ class ChromatographyConfigurationId(ConfigurationId): pass +class ManifestId(InformationObjectId): + pass + + class CalibrationInformationId(InformationObjectId): pass @@ -414,16 +421,16 @@ class FunctionalAnnotationAggMember(YAMLRoot): class_name: ClassVar[str] = "FunctionalAnnotationAggMember" class_model_uri: ClassVar[URIRef] = NMDC.FunctionalAnnotationAggMember - metagenome_annotation_id: Union[str, WorkflowExecutionId] = None + was_generated_by: Union[str, WorkflowExecutionId] = None gene_function_id: Union[str, URIorCURIE] = None count: int = None type: Union[str, URIorCURIE] = None def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): - if self._is_empty(self.metagenome_annotation_id): - self.MissingRequiredField("metagenome_annotation_id") - if not isinstance(self.metagenome_annotation_id, WorkflowExecutionId): - self.metagenome_annotation_id = WorkflowExecutionId(self.metagenome_annotation_id) + if self._is_empty(self.was_generated_by): + self.MissingRequiredField("was_generated_by") + if not isinstance(self.was_generated_by, WorkflowExecutionId): + self.was_generated_by = WorkflowExecutionId(self.was_generated_by) if self._is_empty(self.gene_function_id): self.MissingRequiredField("gene_function_id") @@ -469,6 +476,7 @@ class Database(YAMLRoot): functional_annotation_set: Optional[Union[Union[dict, "FunctionalAnnotation"], List[Union[dict, "FunctionalAnnotation"]]]] = empty_list() genome_feature_set: Optional[Union[Union[dict, "GenomeFeature"], List[Union[dict, "GenomeFeature"]]]] = empty_list() instrument_set: Optional[Union[Dict[Union[str, InstrumentId], Union[dict, "Instrument"]], List[Union[dict, "Instrument"]]]] = empty_dict() + manifest_set: Optional[Union[Dict[Union[str, ManifestId], Union[dict, "Manifest"]], List[Union[dict, "Manifest"]]]] = empty_dict() material_processing_set: Optional[Union[Dict[Union[str, MaterialProcessingId], Union[dict, "MaterialProcessing"]], List[Union[dict, "MaterialProcessing"]]]] = empty_dict() processed_sample_set: Optional[Union[Dict[Union[str, ProcessedSampleId], Union[dict, "ProcessedSample"]], List[Union[dict, "ProcessedSample"]]]] = empty_dict() protocol_execution_set: Optional[Union[Dict[Union[str, ProtocolExecutionId], Union[dict, "ProtocolExecution"]], List[Union[dict, "ProtocolExecution"]]]] = empty_dict() @@ -507,6 +515,8 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): self._normalize_inlined_as_list(slot_name="instrument_set", slot_type=Instrument, key_name="id", keyed=True) + self._normalize_inlined_as_list(slot_name="manifest_set", slot_type=Manifest, key_name="id", keyed=True) + self._normalize_inlined_as_list(slot_name="material_processing_set", slot_type=MaterialProcessing, key_name="id", keyed=True) self._normalize_inlined_as_list(slot_name="processed_sample_set", slot_type=ProcessedSample, key_name="id", keyed=True) @@ -718,9 +728,9 @@ class MagBin(YAMLRoot): class_name: ClassVar[str] = "MagBin" class_model_uri: ClassVar[URIRef] = NMDC.MagBin + bin_name: str = None type: Union[str, URIorCURIE] = None - bin_name: Optional[str] = None - bin_quality: Optional[str] = None + bin_quality: Optional[Union[str, "BinQualityEnum"]] = None completeness: Optional[float] = None contamination: Optional[float] = None gene_count: Optional[int] = None @@ -741,15 +751,17 @@ class MagBin(YAMLRoot): eukaryotic_evaluation: Optional[Union[dict, EukEval]] = None def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if self._is_empty(self.bin_name): + self.MissingRequiredField("bin_name") + if not isinstance(self.bin_name, str): + self.bin_name = str(self.bin_name) + if self._is_empty(self.type): self.MissingRequiredField("type") self.type = str(self.class_class_curie) - if self.bin_name is not None and not isinstance(self.bin_name, str): - self.bin_name = str(self.bin_name) - - if self.bin_quality is not None and not isinstance(self.bin_quality, str): - self.bin_quality = str(self.bin_quality) + if self.bin_quality is not None and not isinstance(self.bin_quality, BinQualityEnum): + self.bin_quality = BinQualityEnum(self.bin_quality) if self.completeness is not None and not isinstance(self.completeness, float): self.completeness = float(self.completeness) @@ -5412,6 +5424,39 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): self.type = str(self.class_class_curie) +@dataclass(repr=False) +class Manifest(InformationObject): + """ + A qualified collection of DataObjects that can be analyzed together in the same experimental context. + """ + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = NMDC["Manifest"] + class_class_curie: ClassVar[str] = "nmdc:Manifest" + class_name: ClassVar[str] = "Manifest" + class_model_uri: ClassVar[URIRef] = NMDC.Manifest + + id: Union[str, ManifestId] = None + type: Union[str, URIorCURIE] = None + manifest_category: Union[str, "ManifestCategoryEnum"] = None + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if self._is_empty(self.id): + self.MissingRequiredField("id") + if not isinstance(self.id, ManifestId): + self.id = ManifestId(self.id) + + if self._is_empty(self.manifest_category): + self.MissingRequiredField("manifest_category") + if not isinstance(self.manifest_category, ManifestCategoryEnum): + self.manifest_category = ManifestCategoryEnum(self.manifest_category) + + super().__post_init__(**kwargs) + if self._is_empty(self.type): + self.MissingRequiredField("type") + self.type = str(self.class_class_curie) + + @dataclass(repr=False) class CalibrationInformation(InformationObject): """ @@ -5484,6 +5529,7 @@ class DataObject(InformationObject): md5_checksum: Optional[str] = None url: Optional[str] = None was_generated_by: Optional[Union[str, WorkflowExecutionId]] = None + in_manifest: Optional[Union[Union[str, ManifestId], List[Union[str, ManifestId]]]] = empty_list() def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): if self._is_empty(self.id): @@ -5526,6 +5572,10 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): if self.was_generated_by is not None and not isinstance(self.was_generated_by, WorkflowExecutionId): self.was_generated_by = WorkflowExecutionId(self.was_generated_by) + if not isinstance(self.in_manifest, list): + self.in_manifest = [self.in_manifest] if self.in_manifest is not None else [] + self.in_manifest = [v if isinstance(v, ManifestId) else ManifestId(v) for v in self.in_manifest] + super().__post_init__(**kwargs) if self._is_empty(self.type): self.MissingRequiredField("type") @@ -5672,7 +5722,7 @@ class MassSpectrometry(DataGeneration): associated_studies: Union[Union[str, StudyId], List[Union[str, StudyId]]] = None has_input: Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]] = None eluent_introduction_category: Optional[Union[str, "EluentIntroductionCategoryEnum"]] = None - has_calibration: Optional[str] = None + has_calibration: Optional[Union[str, CalibrationInformationId]] = None has_chromatography_configuration: Optional[Union[str, ChromatographyConfigurationId]] = None has_mass_spectrometry_configuration: Optional[Union[str, MassSpectrometryConfigurationId]] = None @@ -5685,8 +5735,8 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): if self.eluent_introduction_category is not None and not isinstance(self.eluent_introduction_category, EluentIntroductionCategoryEnum): self.eluent_introduction_category = EluentIntroductionCategoryEnum(self.eluent_introduction_category) - if self.has_calibration is not None and not isinstance(self.has_calibration, str): - self.has_calibration = str(self.has_calibration) + if self.has_calibration is not None and not isinstance(self.has_calibration, CalibrationInformationId): + self.has_calibration = CalibrationInformationId(self.has_calibration) if self.has_chromatography_configuration is not None and not isinstance(self.has_chromatography_configuration, ChromatographyConfigurationId): self.has_chromatography_configuration = ChromatographyConfigurationId(self.has_chromatography_configuration) @@ -6364,7 +6414,6 @@ class MetabolomicsAnalysis(WorkflowExecution): git_url: str = None started_at_time: str = None has_input: Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]] = None - has_calibration: Optional[str] = None has_metabolite_identifications: Optional[Union[Union[dict, MetaboliteIdentification], List[Union[dict, MetaboliteIdentification]]]] = empty_list() was_informed_by: Optional[Union[str, DataGenerationId]] = None @@ -6374,9 +6423,6 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): if not isinstance(self.id, MetabolomicsAnalysisId): self.id = MetabolomicsAnalysisId(self.id) - if self.has_calibration is not None and not isinstance(self.has_calibration, str): - self.has_calibration = str(self.has_calibration) - if not isinstance(self.has_metabolite_identifications, list): self.has_metabolite_identifications = [self.has_metabolite_identifications] if self.has_metabolite_identifications is not None else [] self.has_metabolite_identifications = [v if isinstance(v, MetaboliteIdentification) else MetaboliteIdentification(**as_dict(v)) for v in self.has_metabolite_identifications] @@ -6405,7 +6451,6 @@ class MetaproteomicsAnalysis(WorkflowExecution): git_url: str = None started_at_time: str = None has_input: Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]] = None - has_peptide_quantifications: Optional[Union[Union[dict, PeptideQuantification], List[Union[dict, PeptideQuantification]]]] = empty_list() was_informed_by: Optional[Union[str, DataGenerationId]] = None def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): @@ -6414,10 +6459,6 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): if not isinstance(self.id, MetaproteomicsAnalysisId): self.id = MetaproteomicsAnalysisId(self.id) - if not isinstance(self.has_peptide_quantifications, list): - self.has_peptide_quantifications = [self.has_peptide_quantifications] if self.has_peptide_quantifications is not None else [] - self.has_peptide_quantifications = [v if isinstance(v, PeptideQuantification) else PeptideQuantification(**as_dict(v)) for v in self.has_peptide_quantifications] - if self.was_informed_by is not None and not isinstance(self.was_informed_by, DataGenerationId): self.was_informed_by = DataGenerationId(self.was_informed_by) @@ -6442,7 +6483,6 @@ class NomAnalysis(WorkflowExecution): git_url: str = None started_at_time: str = None has_input: Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]] = None - has_calibration: Optional[str] = None was_informed_by: Optional[Union[str, DataGenerationId]] = None def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): @@ -6451,9 +6491,6 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): if not isinstance(self.id, NomAnalysisId): self.id = NomAnalysisId(self.id) - if self.has_calibration is not None and not isinstance(self.has_calibration, str): - self.has_calibration = str(self.has_calibration) - if self.was_informed_by is not None and not isinstance(self.was_informed_by, DataGenerationId): self.was_informed_by = DataGenerationId(self.was_informed_by) @@ -6509,6 +6546,25 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): # Enumerations +class CalibrationTargetEnum(EnumDefinitionImpl): + + mass_charge_ratio = PermissibleValue(text="mass_charge_ratio") + retention_time = PermissibleValue(text="retention_time") + retention_index = PermissibleValue(text="retention_index") + + _defn = EnumDefinition( + name="CalibrationTargetEnum", + ) + +class CalibrationStandardEnum(EnumDefinitionImpl): + + fames = PermissibleValue(text="fames") + alkanes = PermissibleValue(text="alkanes") + + _defn = EnumDefinition( + name="CalibrationStandardEnum", + ) + class StrandedOrientationEnum(EnumDefinitionImpl): """ This enumeration specifies information about stranded RNA library preparations. @@ -6752,23 +6808,20 @@ class SamplePortionEnum(EnumDefinitionImpl): name="SamplePortionEnum", ) -class CalibrationTargetEnum(EnumDefinitionImpl): - - mass_charge_ratio = PermissibleValue(text="mass_charge_ratio") - retention_time = PermissibleValue(text="retention_time") - retention_index = PermissibleValue(text="retention_index") - - _defn = EnumDefinition( - name="CalibrationTargetEnum", - ) - -class CalibrationStandardEnum(EnumDefinitionImpl): +class BinQualityEnum(EnumDefinitionImpl): - fames = PermissibleValue(text="fames") - alkanes = PermissibleValue(text="alkanes") + HQ = PermissibleValue( + text="HQ", + description="""Metagenome-assembled genome is high quality based on MIMAG standards (https://doi.org/10.1038/nbt.3893)""") + MQ = PermissibleValue( + text="MQ", + description="""Metagenome-assembled genome is medium quality based on MIMAG standards (https://doi.org/10.1038/nbt.3893)""") + LQ = PermissibleValue( + text="LQ", + description="""Metagenome-assembled genome is low quality based on MIMAG standards (https://doi.org/10.1038/nbt.3893)""") _defn = EnumDefinition( - name="CalibrationStandardEnum", + name="BinQualityEnum", ) class ChemicalConversionCategoryEnum(EnumDefinitionImpl): @@ -6937,6 +6990,25 @@ class StudyCategoryEnum(EnumDefinitionImpl): name="StudyCategoryEnum", ) +class ManifestCategoryEnum(EnumDefinitionImpl): + """ + A list of contexts in which some DataObjects can be analyzed together. + """ + instrument_run = PermissibleValue( + text="instrument_run", + description="A collection of data objects from a single run of an instrument.") + poolable_replicates = PermissibleValue( + text="poolable_replicates", + description="A collection of data objects that can be pooled for downstream analyses.") + fractions = PermissibleValue( + text="fractions", + description="A collection of data objects that represent fractions of a single sample.") + + _defn = EnumDefinition( + name="ManifestCategoryEnum", + description="A list of contexts in which some DataObjects can be analyzed together.", + ) + class InstrumentModelEnum(EnumDefinitionImpl): exploris_21T = PermissibleValue(text="exploris_21T") @@ -7169,6 +7241,18 @@ class FileTypeEnum(EnumDefinitionImpl): @classmethod def _addvals(cls): + setattr(cls, "Virus Summary", + PermissibleValue( + text="Virus Summary", + description="Tab separated file listing the viruses found by geNomad.")) + setattr(cls, "Plasmid Summary", + PermissibleValue( + text="Plasmid Summary", + description="Tab separated file listing the plasmids found be geNomad.")) + setattr(cls, "GeNomad Aggregated Classification", + PermissibleValue( + text="GeNomad Aggregated Classification", + description="""Tab separated file which combines the results from neural network-based classification and marker-based classification for virus and plasmid detection with geNomad.""")) setattr(cls, "Reference Calibration File", PermissibleValue( text="Reference Calibration File", @@ -7529,6 +7613,9 @@ class DoiProviderEnum(EnumDefinitionImpl): edi = PermissibleValue( text="edi", meaning=None) + figshare = PermissibleValue( + text="figshare", + meaning=None) _defn = EnumDefinition( name="DoiProviderEnum", @@ -7579,15 +7666,6 @@ class AnalyteCategoryEnum(EnumDefinitionImpl): name="AnalyteCategoryEnum", ) -class DirectInfusionEnum(EnumDefinitionImpl): - - direct_infusion = PermissibleValue(text="direct_infusion") - autosampler = PermissibleValue(text="autosampler") - - _defn = EnumDefinition( - name="DirectInfusionEnum", - ) - class ExtractionTargetEnum(EnumDefinitionImpl): DNA = PermissibleValue(text="DNA") @@ -7620,6 +7698,9 @@ class ProcessingInstitutionEnum(EnumDefinitionImpl): UCD_Genome_Center = PermissibleValue( text="UCD_Genome_Center", meaning=None) + Azenta = PermissibleValue( + text="Azenta", + meaning=None) _defn = EnumDefinition( name="ProcessingInstitutionEnum", @@ -9694,6 +9775,21 @@ class WindowVertPosEnum(EnumDefinitionImpl): class slots: pass +slots.has_calibration = Slot(uri=NMDC.has_calibration, name="has_calibration", curie=NMDC.curie('has_calibration'), + model_uri=NMDC.has_calibration, domain=None, range=Optional[Union[str, CalibrationInformationId]]) + +slots.calibration_object = Slot(uri=NMDC.calibration_object, name="calibration_object", curie=NMDC.curie('calibration_object'), + model_uri=NMDC.calibration_object, domain=None, range=Optional[Union[str, DataObjectId]]) + +slots.internal_calibration = Slot(uri=NMDC.internal_calibration, name="internal_calibration", curie=NMDC.curie('internal_calibration'), + model_uri=NMDC.internal_calibration, domain=None, range=Optional[Union[bool, Bool]]) + +slots.calibration_target = Slot(uri=NMDC.calibration_target, name="calibration_target", curie=NMDC.curie('calibration_target'), + model_uri=NMDC.calibration_target, domain=None, range=Optional[Union[str, "CalibrationTargetEnum"]]) + +slots.calibration_standard = Slot(uri=NMDC.calibration_standard, name="calibration_standard", curie=NMDC.curie('calibration_standard'), + model_uri=NMDC.calibration_standard, domain=None, range=Optional[Union[str, "CalibrationStandardEnum"]]) + slots.polarity_mode = Slot(uri=NMDC.polarity_mode, name="polarity_mode", curie=NMDC.curie('polarity_mode'), model_uri=NMDC.polarity_mode, domain=None, range=Optional[Union[str, "PolarityModeEnum"]]) @@ -9824,6 +9920,9 @@ class slots: slots.configuration_set = Slot(uri=NMDC.configuration_set, name="configuration_set", curie=NMDC.curie('configuration_set'), model_uri=NMDC.configuration_set, domain=None, range=Optional[Union[Dict[Union[str, ConfigurationId], Union[dict, Configuration]], List[Union[dict, Configuration]]]]) +slots.manifest_set = Slot(uri=NMDC.manifest_set, name="manifest_set", curie=NMDC.curie('manifest_set'), + model_uri=NMDC.manifest_set, domain=None, range=Optional[Union[Dict[Union[str, ManifestId], Union[dict, Manifest]], List[Union[dict, Manifest]]]]) + slots.protocol_execution_set = Slot(uri=NMDC.protocol_execution_set, name="protocol_execution_set", curie=NMDC.curie('protocol_execution_set'), model_uri=NMDC.protocol_execution_set, domain=None, range=Optional[Union[Dict[Union[str, ProtocolExecutionId], Union[dict, ProtocolExecution]], List[Union[dict, ProtocolExecution]]]]) @@ -10068,21 +10167,6 @@ class slots: slots.input_read_bases = Slot(uri=NMDC.input_read_bases, name="input_read_bases", curie=NMDC.curie('input_read_bases'), model_uri=NMDC.input_read_bases, domain=None, range=Optional[float]) -slots.has_calibration = Slot(uri=NMDC.has_calibration, name="has_calibration", curie=NMDC.curie('has_calibration'), - model_uri=NMDC.has_calibration, domain=None, range=Optional[str]) - -slots.calibration_object = Slot(uri=NMDC.calibration_object, name="calibration_object", curie=NMDC.curie('calibration_object'), - model_uri=NMDC.calibration_object, domain=None, range=Optional[Union[str, DataObjectId]]) - -slots.internal_calibration = Slot(uri=NMDC.internal_calibration, name="internal_calibration", curie=NMDC.curie('internal_calibration'), - model_uri=NMDC.internal_calibration, domain=None, range=Optional[Union[bool, Bool]]) - -slots.calibration_target = Slot(uri=NMDC.calibration_target, name="calibration_target", curie=NMDC.curie('calibration_target'), - model_uri=NMDC.calibration_target, domain=None, range=Optional[Union[str, "CalibrationTargetEnum"]]) - -slots.calibration_standard = Slot(uri=NMDC.calibration_standard, name="calibration_standard", curie=NMDC.curie('calibration_standard'), - model_uri=NMDC.calibration_standard, domain=None, range=Optional[Union[str, "CalibrationStandardEnum"]]) - slots.has_metabolite_identifications = Slot(uri=NMDC.has_metabolite_identifications, name="has_metabolite_identifications", curie=NMDC.curie('has_metabolite_identifications'), model_uri=NMDC.has_metabolite_identifications, domain=None, range=Optional[Union[Union[dict, MetaboliteIdentification], List[Union[dict, MetaboliteIdentification]]]]) @@ -10168,7 +10252,7 @@ class slots: model_uri=NMDC.members_id, domain=None, range=Optional[Union[str, List[str]]]) slots.bin_name = Slot(uri=NMDC.bin_name, name="bin_name", curie=NMDC.curie('bin_name'), - model_uri=NMDC.bin_name, domain=None, range=Optional[str]) + model_uri=NMDC.bin_name, domain=None, range=str) slots.number_of_contig = Slot(uri=NMDC.number_of_contig, name="number_of_contig", curie=NMDC.curie('number_of_contig'), model_uri=NMDC.number_of_contig, domain=None, range=Optional[int]) @@ -10183,7 +10267,7 @@ class slots: model_uri=NMDC.gene_count, domain=None, range=Optional[int]) slots.bin_quality = Slot(uri=NMDC.bin_quality, name="bin_quality", curie=NMDC.curie('bin_quality'), - model_uri=NMDC.bin_quality, domain=None, range=Optional[str]) + model_uri=NMDC.bin_quality, domain=None, range=Optional[Union[str, "BinQualityEnum"]]) slots.num_16s = Slot(uri=NMDC.num_16s, name="num_16s", curie=NMDC.curie('num_16s'), model_uri=NMDC.num_16s, domain=None, range=Optional[int]) @@ -10499,6 +10583,12 @@ class slots: slots.instrument_used = Slot(uri=NMDC['basic_classes/instrument_used'], name="instrument_used", curie=NMDC.curie('basic_classes/instrument_used'), model_uri=NMDC.instrument_used, domain=None, range=Optional[Union[Union[str, InstrumentId], List[Union[str, InstrumentId]]]]) +slots.in_manifest = Slot(uri=NMDC['basic_classes/in_manifest'], name="in_manifest", curie=NMDC.curie('basic_classes/in_manifest'), + model_uri=NMDC.in_manifest, domain=None, range=Optional[Union[Union[str, ManifestId], List[Union[str, ManifestId]]]]) + +slots.manifest_category = Slot(uri=NMDC['basic_classes/manifest_category'], name="manifest_category", curie=NMDC.curie('basic_classes/manifest_category'), + model_uri=NMDC.manifest_category, domain=None, range=Union[str, "ManifestCategoryEnum"]) + slots.model = Slot(uri=NMDC['basic_classes/model'], name="model", curie=NMDC.curie('basic_classes/model'), model_uri=NMDC.model, domain=None, range=Optional[Union[str, "InstrumentModelEnum"]]) @@ -10688,9 +10778,6 @@ class slots: slots.analyte_category = Slot(uri=NMDC.analyte_category, name="analyte_category", curie=NMDC.curie('analyte_category'), model_uri=NMDC.analyte_category, domain=None, range=Union[str, "AnalyteCategoryEnum"]) -slots.direct_infusion_category = Slot(uri=NMDC.direct_infusion_category, name="direct_infusion_category", curie=NMDC.curie('direct_infusion_category'), - model_uri=NMDC.direct_infusion_category, domain=None, range=Optional[Union[str, "DirectInfusionEnum"]]) - slots.type = Slot(uri=RDF.type, name="type", curie=RDF.curie('type'), model_uri=NMDC.type, domain=None, range=Union[str, URIorCURIE]) @@ -12359,7 +12446,7 @@ class slots: pattern=re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$')) slots.MassSpectrometry_has_calibration = Slot(uri=NMDC.has_calibration, name="MassSpectrometry_has_calibration", curie=NMDC.curie('has_calibration'), - model_uri=NMDC.MassSpectrometry_has_calibration, domain=MassSpectrometry, range=Optional[str]) + model_uri=NMDC.MassSpectrometry_has_calibration, domain=MassSpectrometry, range=Optional[Union[str, CalibrationInformationId]]) slots.MassSpectrometry_has_chromatography_configuration = Slot(uri=NMDC.has_chromatography_configuration, name="MassSpectrometry_has_chromatography_configuration", curie=NMDC.curie('has_chromatography_configuration'), model_uri=NMDC.MassSpectrometry_has_chromatography_configuration, domain=MassSpectrometry, range=Optional[Union[str, ChromatographyConfigurationId]]) @@ -12387,8 +12474,25 @@ class slots: model_uri=NMDC.ChromatographyConfiguration_id, domain=ChromatographyConfiguration, range=Union[str, ChromatographyConfigurationId], pattern=re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$')) -slots.FunctionalAnnotationAggMember_metagenome_annotation_id = Slot(uri=NMDC.metagenome_annotation_id, name="FunctionalAnnotationAggMember_metagenome_annotation_id", curie=NMDC.curie('metagenome_annotation_id'), - model_uri=NMDC.FunctionalAnnotationAggMember_metagenome_annotation_id, domain=FunctionalAnnotationAggMember, range=Union[str, WorkflowExecutionId]) +slots.Manifest_id = Slot(uri=NMDC.id, name="Manifest_id", curie=NMDC.curie('id'), + model_uri=NMDC.Manifest_id, domain=Manifest, range=Union[str, ManifestId], + pattern=re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$')) + +slots.CalibrationInformation_internal_calibration = Slot(uri=NMDC.internal_calibration, name="CalibrationInformation_internal_calibration", curie=NMDC.curie('internal_calibration'), + model_uri=NMDC.CalibrationInformation_internal_calibration, domain=CalibrationInformation, range=Union[bool, Bool]) + +slots.CalibrationInformation_calibration_target = Slot(uri=NMDC.calibration_target, name="CalibrationInformation_calibration_target", curie=NMDC.curie('calibration_target'), + model_uri=NMDC.CalibrationInformation_calibration_target, domain=CalibrationInformation, range=Union[str, "CalibrationTargetEnum"]) + +slots.CalibrationInformation_id = Slot(uri=NMDC.id, name="CalibrationInformation_id", curie=NMDC.curie('id'), + model_uri=NMDC.CalibrationInformation_id, domain=CalibrationInformation, range=Union[str, CalibrationInformationId], + pattern=re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$')) + +slots.FunctionalAnnotationAggMember_was_generated_by = Slot(uri=NMDC['basic_classes/was_generated_by'], name="FunctionalAnnotationAggMember_was_generated_by", curie=NMDC.curie('basic_classes/was_generated_by'), + model_uri=NMDC.FunctionalAnnotationAggMember_was_generated_by, domain=FunctionalAnnotationAggMember, range=Union[str, WorkflowExecutionId], mappings = [PROV["wasGeneratedBy"]]) + +slots.FunctionalAnnotationAggMember_count = Slot(uri=NMDC.count, name="FunctionalAnnotationAggMember_count", curie=NMDC.curie('count'), + model_uri=NMDC.FunctionalAnnotationAggMember_count, domain=FunctionalAnnotationAggMember, range=int) slots.Pooling_has_input = Slot(uri=NMDC['basic_classes/has_input'], name="Pooling_has_input", curie=NMDC.curie('basic_classes/has_input'), model_uri=NMDC.Pooling_has_input, domain=Pooling, range=Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]]) @@ -12633,16 +12737,6 @@ class slots: slots.NomAnalysis_was_informed_by = Slot(uri=NMDC['basic_classes/was_informed_by'], name="NomAnalysis_was_informed_by", curie=NMDC.curie('basic_classes/was_informed_by'), model_uri=NMDC.NomAnalysis_was_informed_by, domain=NomAnalysis, range=Optional[Union[str, DataGenerationId]], mappings = [PROV["wasInformedBy"]]) -slots.CalibrationInformation_internal_calibration = Slot(uri=NMDC.internal_calibration, name="CalibrationInformation_internal_calibration", curie=NMDC.curie('internal_calibration'), - model_uri=NMDC.CalibrationInformation_internal_calibration, domain=CalibrationInformation, range=Union[bool, Bool]) - -slots.CalibrationInformation_calibration_target = Slot(uri=NMDC.calibration_target, name="CalibrationInformation_calibration_target", curie=NMDC.curie('calibration_target'), - model_uri=NMDC.CalibrationInformation_calibration_target, domain=CalibrationInformation, range=Union[str, "CalibrationTargetEnum"]) - -slots.CalibrationInformation_id = Slot(uri=NMDC.id, name="CalibrationInformation_id", curie=NMDC.curie('id'), - model_uri=NMDC.CalibrationInformation_id, domain=CalibrationInformation, range=Union[str, CalibrationInformationId], - pattern=re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$')) - slots.ChemicalConversionProcess_id = Slot(uri=NMDC.id, name="ChemicalConversionProcess_id", curie=NMDC.curie('id'), model_uri=NMDC.ChemicalConversionProcess_id, domain=ChemicalConversionProcess, range=Union[str, ChemicalConversionProcessId], pattern=re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$')) @@ -12794,12 +12888,6 @@ class slots: model_uri=NMDC.ProcessedSample_id, domain=ProcessedSample, range=Union[str, ProcessedSampleId], pattern=re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$')) -slots.ProteinQuantification_best_protein = Slot(uri=NMDC.best_protein, name="ProteinQuantification_best_protein", curie=NMDC.curie('best_protein'), - model_uri=NMDC.ProteinQuantification_best_protein, domain=ProteinQuantification, range=Optional[Union[str, GeneProductId]]) - -slots.ProteinQuantification_all_proteins = Slot(uri=NMDC.all_proteins, name="ProteinQuantification_all_proteins", curie=NMDC.curie('all_proteins'), - model_uri=NMDC.ProteinQuantification_all_proteins, domain=ProteinQuantification, range=Optional[Union[Union[str, GeneProductId], List[Union[str, GeneProductId]]]]) - slots.QuantityValue_has_raw_value = Slot(uri=NMDC.has_raw_value, name="QuantityValue_has_raw_value", curie=NMDC.curie('has_raw_value'), model_uri=NMDC.QuantityValue_has_raw_value, domain=QuantityValue, range=Optional[str]) diff --git a/nmdc_schema/nmdc.schema.json b/nmdc_schema/nmdc.schema.json index d9d971d9fc..f86179a14f 100644 --- a/nmdc_schema/nmdc.schema.json +++ b/nmdc_schema/nmdc.schema.json @@ -37,6 +37,16 @@ "title": "ArchStrucEnum", "type": "string" }, + "BinQualityEnum": { + "description": "", + "enum": [ + "HQ", + "MQ", + "LQ" + ], + "title": "BinQualityEnum", + "type": "string" + }, "BiolStatEnum": { "description": "", "enum": [ @@ -6619,6 +6629,16 @@ "pattern": "^[a-zA-Z0-9][a-zA-Z0-9_\\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\\-\\/\\.,]*$", "type": "string" }, + "in_manifest": { + "description": "one or more combinations of other DataObjects that can be analyzed together", + "items": { + "type": "string" + }, + "type": [ + "array", + "null" + ] + }, "insdc_experiment_identifiers": { "items": { "pattern": "^insdc.sra:(E|D|S)RX[0-9]{6,}$", @@ -6812,6 +6832,16 @@ "null" ] }, + "manifest_set": { + "description": "This property links a database object to the set of manifests within it.", + "items": { + "$ref": "#/$defs/Manifest" + }, + "type": [ + "array", + "null" + ] + }, "material_processing_set": { "description": "This property links a database object to the set of material processing within it.", "items": { @@ -6964,15 +6994,6 @@ "title": "DeposEnvEnum", "type": "string" }, - "DirectInfusionEnum": { - "description": "", - "enum": [ - "direct_infusion", - "autosampler" - ], - "title": "DirectInfusionEnum", - "type": "string" - }, "DissolvingProcess": { "additionalProperties": false, "description": "A mixing step where a soluble component is mixed with a liquid component.", @@ -7220,7 +7241,8 @@ "massive", "gsc", "zenodo", - "edi" + "edi", + "figshare" ], "title": "DoiProviderEnum", "type": "string" @@ -7431,12 +7453,16 @@ "description": "This class contains information pertaining to evaluating if a Metagenome-Assembled Genome (MAG) is eukaryotic.", "properties": { "completeness": { + "description": "Estimate of the completeness of the metagenome-assembled genome, estimated by a tool like CheckM.", + "minimum": 0, "type": [ "number", "null" ] }, "contamination": { + "description": "Estimate of the completeness of the metagenome-assembled genome, estimated by a tool like CheckM.", + "minimum": 0, "type": [ "number", "null" @@ -7923,6 +7949,9 @@ "FileTypeEnum": { "description": "", "enum": [ + "Virus Summary", + "Plasmid Summary", + "GeNomad Aggregated Classification", "Reference Calibration File", "Metagenome Raw Reads", "Metagenome Raw Read 1", @@ -8369,13 +8398,21 @@ "description": "", "properties": { "count": { + "description": "The number of sequences (for a metagenome or metatranscriptome) or spectra (for metaproteomics) associated with the specified function.", "type": "integer" }, "gene_function_id": { "description": "The identifier for the gene function.", "type": "string" }, - "metagenome_annotation_id": { + "type": { + "description": "the class_uri of the class that has been instantiated", + "enum": [ + "nmdc:FunctionalAnnotationAggMember" + ], + "type": "string" + }, + "was_generated_by": { "anyOf": [ { "type": "string" @@ -8384,19 +8421,11 @@ "type": "string" } ], - "description": "The identifier for the analysis activity that generated the functional annotation results, where the analysis activity is an instance of the/an appropriate subclass of WorkflowExecution", - "type": "string" - }, - "type": { - "description": "the class_uri of the class that has been instantiated", - "enum": [ - "nmdc:FunctionalAnnotationAggMember" - ], "type": "string" } }, "required": [ - "metagenome_annotation_id", + "was_generated_by", "gene_function_id", "count", "type" @@ -9158,24 +9187,24 @@ "description": "", "properties": { "bin_name": { - "type": [ - "string", - "null" - ] + "description": "Name of the metagenome-assembled genome.", + "type": "string" }, "bin_quality": { - "type": [ - "string", - "null" - ] + "$ref": "#/$defs/BinQualityEnum", + "description": "The quality of the metagenome-assembled genome based on MIMAG standards (https://doi.org/10.1038/nbt.3893)." }, "completeness": { + "description": "Estimate of the completeness of the metagenome-assembled genome, estimated by a tool like CheckM.", + "minimum": 0, "type": [ "number", "null" ] }, "contamination": { + "description": "Estimate of the completeness of the metagenome-assembled genome, estimated by a tool like CheckM.", + "minimum": 0, "type": [ "number", "null" @@ -9193,54 +9222,64 @@ "description": "Contains results from evaluating if a Metagenome-Assembled Genome is of eukaryotic lineage." }, "gene_count": { + "description": "Number of genes.", + "minimum": 0, "type": [ "integer", "null" ] }, "gtdbtk_class": { + "description": "Taxonomic class assigned by GTDB-Tk.", "type": [ "string", "null" ] }, "gtdbtk_domain": { + "description": "Taxonomic domain assigned by GTDB-Tk.", "type": [ "string", "null" ] }, "gtdbtk_family": { + "description": "Taxonomic family assigned by GTDB-Tk.", "type": [ "string", "null" ] }, "gtdbtk_genus": { + "description": "Taxonomic genus assigned by GTDB-Tk.", "type": [ "string", "null" ] }, "gtdbtk_order": { + "description": "Taxonomic order assigned by GTDB-Tk.", "type": [ "string", "null" ] }, "gtdbtk_phylum": { + "description": "Taxonomic phylum assigned by GTDB-Tk.", "type": [ "string", "null" ] }, "gtdbtk_species": { + "description": "Taxonomic genus assigned by GTDB-Tk.", "type": [ "string", "null" ] }, "members_id": { + "description": "Names of the contigs that make up a metagenome-assembled genome.", "items": { "type": "string" }, @@ -9250,36 +9289,48 @@ ] }, "num_16s": { + "description": "Number of 16s sequences detected, a subunit of prokaryotic ribosomes.", + "minimum": 0, "type": [ "integer", "null" ] }, "num_23s": { + "description": "Number of 23 seqeuences detected, a subunit of ribosomes.", + "minimum": 0, "type": [ "integer", "null" ] }, "num_5s": { + "description": "Number of 5s seqeuences detected, a subunit of ribosomes.", + "minimum": 0, "type": [ "integer", "null" ] }, "num_t_rna": { + "description": "Number of transfer transfer RNAs.", + "minimum": 0, "type": [ "integer", "null" ] }, "number_of_contig": { + "description": "Number of contigs", + "minimum": 0, "type": [ "integer", "null" ] }, "total_bases": { + "description": "Total number of basepairs.", + "minimum": 0, "type": [ "integer", "null" @@ -9294,6 +9345,7 @@ } }, "required": [ + "bin_name", "type" ], "title": "MagBin", @@ -9357,6 +9409,8 @@ ] }, "binned_contig_num": { + "description": "Number of contigs that ended up in a medium or high quality bin.", + "minimum": 0, "type": [ "integer", "null" @@ -9435,18 +9489,23 @@ ] }, "input_contig_num": { + "description": "Total number of input contigs.", + "minimum": 0, "type": [ "integer", "null" ] }, "low_depth_contig_num": { + "description": "Number of contigs which were excluded from binning for depth of coverage.", + "minimum": 0, "type": [ "integer", "null" ] }, "mags_list": { + "description": "Contains detailed information about each metagenome-assembled genome.", "items": { "$ref": "#/$defs/MagBin" }, @@ -9499,6 +9558,8 @@ "type": "string" }, "too_short_contig_num": { + "description": "Number of contigs which were excluded from binning for length.", + "minimum": 0, "type": [ "integer", "null" @@ -9512,6 +9573,8 @@ "type": "string" }, "unbinned_contig_num": { + "description": "Number of contigs which did not end up in a medium or high quality bin.", + "minimum": 0, "type": [ "integer", "null" @@ -9539,6 +9602,70 @@ "title": "MagsAnalysis", "type": "object" }, + "Manifest": { + "additionalProperties": false, + "description": "A qualified collection of DataObjects that can be analyzed together in the same experimental context.", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "pattern": "^[a-zA-Z0-9][a-zA-Z0-9_\\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\\-\\/\\.,]*$", + "type": "string" + }, + "type": [ + "array", + "null" + ] + }, + "description": { + "description": "a human-readable description of a thing", + "type": [ + "string", + "null" + ] + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^[a-zA-Z0-9][a-zA-Z0-9_\\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\\-\\/\\.,]*$", + "type": "string" + }, + "manifest_category": { + "$ref": "#/$defs/ManifestCategoryEnum", + "description": "The type of context in which the constituent DataObjects can be analyzed together." + }, + "name": { + "description": "A human readable label for an entity", + "type": [ + "string", + "null" + ] + }, + "type": { + "description": "the class_uri of the class that has been instantiated", + "enum": [ + "nmdc:Manifest" + ], + "type": "string" + } + }, + "required": [ + "manifest_category", + "id", + "type" + ], + "title": "Manifest", + "type": "object" + }, + "ManifestCategoryEnum": { + "description": "A list of contexts in which some DataObjects can be analyzed together.", + "enum": [ + "instrument_run", + "poolable_replicates", + "fractions" + ], + "title": "ManifestCategoryEnum", + "type": "string" + }, "MassAnalyzerEnum": { "description": "", "enum": [ @@ -9652,19 +9779,11 @@ ] }, "has_calibration": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - }, - { - "type": "null" - } - ], "description": "a calibration instance associated with a process", - "type": "string" + "type": [ + "string", + "null" + ] }, "has_chromatography_configuration": { "description": "The identifier of the associated ChromatographyConfiguration, providing information about how a sample was introduced into the mass spectrometer.", @@ -10055,21 +10174,6 @@ "description": "The url that points to the exact github location of a workflow.", "type": "string" }, - "has_calibration": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - }, - { - "type": "null" - } - ], - "description": "a calibration instance associated with a process", - "type": "string" - }, "has_failure_categorization": { "items": { "$ref": "#/$defs/FailureCategorization" @@ -11080,15 +11184,6 @@ "null" ] }, - "has_peptide_quantifications": { - "items": { - "$ref": "#/$defs/PeptideQuantification" - }, - "type": [ - "array", - "null" - ] - }, "id": { "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "pattern": "^[a-zA-Z0-9][a-zA-Z0-9_\\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\\-\\/\\.,]*$", @@ -12246,21 +12341,6 @@ "description": "The url that points to the exact github location of a workflow.", "type": "string" }, - "has_calibration": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - }, - { - "type": "null" - } - ], - "description": "a calibration instance associated with a process", - "type": "string" - }, "has_failure_categorization": { "items": { "$ref": "#/$defs/FailureCategorization" @@ -13263,7 +13343,8 @@ "EMSL", "Battelle", "ANL", - "UCD_Genome_Center" + "UCD_Genome_Center", + "Azenta" ], "title": "ProcessingInstitutionEnum", "type": "string" @@ -13285,7 +13366,7 @@ "description": "This is used to link a metaproteomics analysis workflow to a specific protein", "properties": { "all_proteins": { - "description": "the grouped list of protein identifiers associated with the peptide sequences that were grouped to a best protein", + "description": "the list of protein identifiers that are associated with the peptide sequence", "items": { "type": "string" }, @@ -13295,7 +13376,7 @@ ] }, "best_protein": { - "description": "the specific protein identifier most correctly grouped to its associated peptide sequences", + "description": "the specific protein identifier most correctly associated with the peptide sequence", "type": [ "string", "null" @@ -15626,6 +15707,16 @@ "null" ] }, + "manifest_set": { + "description": "This property links a database object to the set of manifests within it.", + "items": { + "$ref": "#/$defs/Manifest" + }, + "type": [ + "array", + "null" + ] + }, "material_processing_set": { "description": "This property links a database object to the set of material processing within it.", "items": { diff --git a/nmdc_schema/nmdc_materialized_patterns.schema.json b/nmdc_schema/nmdc_materialized_patterns.schema.json index 1d2f9ed942..13ac19cc4a 100644 --- a/nmdc_schema/nmdc_materialized_patterns.schema.json +++ b/nmdc_schema/nmdc_materialized_patterns.schema.json @@ -37,6 +37,16 @@ "title": "ArchStrucEnum", "type": "string" }, + "BinQualityEnum": { + "description": "", + "enum": [ + "HQ", + "MQ", + "LQ" + ], + "title": "BinQualityEnum", + "type": "string" + }, "BiolStatEnum": { "description": "", "enum": [ @@ -6627,6 +6637,16 @@ "pattern": "^(nmdc):dobj-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$", "type": "string" }, + "in_manifest": { + "description": "one or more combinations of other DataObjects that can be analyzed together", + "items": { + "type": "string" + }, + "type": [ + "array", + "null" + ] + }, "insdc_experiment_identifiers": { "items": { "pattern": "^insdc.sra:(E|D|S)RX[0-9]{6,}$", @@ -6821,6 +6841,16 @@ "null" ] }, + "manifest_set": { + "description": "This property links a database object to the set of manifests within it.", + "items": { + "$ref": "#/$defs/Manifest" + }, + "type": [ + "array", + "null" + ] + }, "material_processing_set": { "description": "This property links a database object to the set of material processing within it.", "items": { @@ -6973,15 +7003,6 @@ "title": "DeposEnvEnum", "type": "string" }, - "DirectInfusionEnum": { - "description": "", - "enum": [ - "direct_infusion", - "autosampler" - ], - "title": "DirectInfusionEnum", - "type": "string" - }, "DissolvingProcess": { "additionalProperties": false, "description": "A mixing step where a soluble component is mixed with a liquid component.", @@ -7231,7 +7252,8 @@ "massive", "gsc", "zenodo", - "edi" + "edi", + "figshare" ], "title": "DoiProviderEnum", "type": "string" @@ -7442,12 +7464,16 @@ "description": "This class contains information pertaining to evaluating if a Metagenome-Assembled Genome (MAG) is eukaryotic.", "properties": { "completeness": { + "description": "Estimate of the completeness of the metagenome-assembled genome, estimated by a tool like CheckM.", + "minimum": 0, "type": [ "number", "null" ] }, "contamination": { + "description": "Estimate of the completeness of the metagenome-assembled genome, estimated by a tool like CheckM.", + "minimum": 0, "type": [ "number", "null" @@ -7937,6 +7963,9 @@ "FileTypeEnum": { "description": "", "enum": [ + "Virus Summary", + "Plasmid Summary", + "GeNomad Aggregated Classification", "Reference Calibration File", "Metagenome Raw Reads", "Metagenome Raw Read 1", @@ -8386,13 +8415,21 @@ "description": "", "properties": { "count": { + "description": "The number of sequences (for a metagenome or metatranscriptome) or spectra (for metaproteomics) associated with the specified function.", "type": "integer" }, "gene_function_id": { "description": "The identifier for the gene function.", "type": "string" }, - "metagenome_annotation_id": { + "type": { + "description": "the class_uri of the class that has been instantiated", + "enum": [ + "nmdc:FunctionalAnnotationAggMember" + ], + "type": "string" + }, + "was_generated_by": { "anyOf": [ { "type": "string" @@ -8401,20 +8438,12 @@ "type": "string" } ], - "description": "The identifier for the analysis activity that generated the functional annotation results, where the analysis activity is an instance of the/an appropriate subclass of WorkflowExecution", - "pattern": "^(nmdc):(wfmgan|wfmtan)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[0-9]{1,})$", - "type": "string" - }, - "type": { - "description": "the class_uri of the class that has been instantiated", - "enum": [ - "nmdc:FunctionalAnnotationAggMember" - ], + "pattern": "^(nmdc):(wfmgan|wfmp|wfmtan)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[0-9]{1,})$", "type": "string" } }, "required": [ - "metagenome_annotation_id", + "was_generated_by", "gene_function_id", "count", "type" @@ -9178,24 +9207,24 @@ "description": "", "properties": { "bin_name": { - "type": [ - "string", - "null" - ] + "description": "Name of the metagenome-assembled genome.", + "type": "string" }, "bin_quality": { - "type": [ - "string", - "null" - ] + "$ref": "#/$defs/BinQualityEnum", + "description": "The quality of the metagenome-assembled genome based on MIMAG standards (https://doi.org/10.1038/nbt.3893)." }, "completeness": { + "description": "Estimate of the completeness of the metagenome-assembled genome, estimated by a tool like CheckM.", + "minimum": 0, "type": [ "number", "null" ] }, "contamination": { + "description": "Estimate of the completeness of the metagenome-assembled genome, estimated by a tool like CheckM.", + "minimum": 0, "type": [ "number", "null" @@ -9213,54 +9242,64 @@ "description": "Contains results from evaluating if a Metagenome-Assembled Genome is of eukaryotic lineage." }, "gene_count": { + "description": "Number of genes.", + "minimum": 0, "type": [ "integer", "null" ] }, "gtdbtk_class": { + "description": "Taxonomic class assigned by GTDB-Tk.", "type": [ "string", "null" ] }, "gtdbtk_domain": { + "description": "Taxonomic domain assigned by GTDB-Tk.", "type": [ "string", "null" ] }, "gtdbtk_family": { + "description": "Taxonomic family assigned by GTDB-Tk.", "type": [ "string", "null" ] }, "gtdbtk_genus": { + "description": "Taxonomic genus assigned by GTDB-Tk.", "type": [ "string", "null" ] }, "gtdbtk_order": { + "description": "Taxonomic order assigned by GTDB-Tk.", "type": [ "string", "null" ] }, "gtdbtk_phylum": { + "description": "Taxonomic phylum assigned by GTDB-Tk.", "type": [ "string", "null" ] }, "gtdbtk_species": { + "description": "Taxonomic genus assigned by GTDB-Tk.", "type": [ "string", "null" ] }, "members_id": { + "description": "Names of the contigs that make up a metagenome-assembled genome.", "items": { "type": "string" }, @@ -9270,36 +9309,48 @@ ] }, "num_16s": { + "description": "Number of 16s sequences detected, a subunit of prokaryotic ribosomes.", + "minimum": 0, "type": [ "integer", "null" ] }, "num_23s": { + "description": "Number of 23 seqeuences detected, a subunit of ribosomes.", + "minimum": 0, "type": [ "integer", "null" ] }, "num_5s": { + "description": "Number of 5s seqeuences detected, a subunit of ribosomes.", + "minimum": 0, "type": [ "integer", "null" ] }, "num_t_rna": { + "description": "Number of transfer transfer RNAs.", + "minimum": 0, "type": [ "integer", "null" ] }, "number_of_contig": { + "description": "Number of contigs", + "minimum": 0, "type": [ "integer", "null" ] }, "total_bases": { + "description": "Total number of basepairs.", + "minimum": 0, "type": [ "integer", "null" @@ -9314,6 +9365,7 @@ } }, "required": [ + "bin_name", "type" ], "title": "MagBin", @@ -9377,6 +9429,8 @@ ] }, "binned_contig_num": { + "description": "Number of contigs that ended up in a medium or high quality bin.", + "minimum": 0, "type": [ "integer", "null" @@ -9457,18 +9511,23 @@ ] }, "input_contig_num": { + "description": "Total number of input contigs.", + "minimum": 0, "type": [ "integer", "null" ] }, "low_depth_contig_num": { + "description": "Number of contigs which were excluded from binning for depth of coverage.", + "minimum": 0, "type": [ "integer", "null" ] }, "mags_list": { + "description": "Contains detailed information about each metagenome-assembled genome.", "items": { "$ref": "#/$defs/MagBin" }, @@ -9521,6 +9580,8 @@ "type": "string" }, "too_short_contig_num": { + "description": "Number of contigs which were excluded from binning for length.", + "minimum": 0, "type": [ "integer", "null" @@ -9534,6 +9595,8 @@ "type": "string" }, "unbinned_contig_num": { + "description": "Number of contigs which did not end up in a medium or high quality bin.", + "minimum": 0, "type": [ "integer", "null" @@ -9562,6 +9625,70 @@ "title": "MagsAnalysis", "type": "object" }, + "Manifest": { + "additionalProperties": false, + "description": "A qualified collection of DataObjects that can be analyzed together in the same experimental context.", + "properties": { + "alternative_identifiers": { + "description": "A list of alternative identifiers for the entity.", + "items": { + "pattern": "^[a-zA-Z0-9][a-zA-Z0-9_\\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\\-\\/\\.,]*$", + "type": "string" + }, + "type": [ + "array", + "null" + ] + }, + "description": { + "description": "a human-readable description of a thing", + "type": [ + "string", + "null" + ] + }, + "id": { + "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", + "pattern": "^(nmdc):manif-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$", + "type": "string" + }, + "manifest_category": { + "$ref": "#/$defs/ManifestCategoryEnum", + "description": "The type of context in which the constituent DataObjects can be analyzed together." + }, + "name": { + "description": "A human readable label for an entity", + "type": [ + "string", + "null" + ] + }, + "type": { + "description": "the class_uri of the class that has been instantiated", + "enum": [ + "nmdc:Manifest" + ], + "type": "string" + } + }, + "required": [ + "manifest_category", + "id", + "type" + ], + "title": "Manifest", + "type": "object" + }, + "ManifestCategoryEnum": { + "description": "A list of contexts in which some DataObjects can be analyzed together.", + "enum": [ + "instrument_run", + "poolable_replicates", + "fractions" + ], + "title": "ManifestCategoryEnum", + "type": "string" + }, "MassAnalyzerEnum": { "description": "", "enum": [ @@ -9676,20 +9803,12 @@ ] }, "has_calibration": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - }, - { - "type": "null" - } - ], "description": "a calibration instance associated with a process", "pattern": "^(nmdc):calib-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$", - "type": "string" + "type": [ + "string", + "null" + ] }, "has_chromatography_configuration": { "description": "The identifier of the associated ChromatographyConfiguration, providing information about how a sample was introduced into the mass spectrometer.", @@ -10084,21 +10203,6 @@ "description": "The url that points to the exact github location of a workflow.", "type": "string" }, - "has_calibration": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - }, - { - "type": "null" - } - ], - "description": "a calibration instance associated with a process", - "type": "string" - }, "has_failure_categorization": { "items": { "$ref": "#/$defs/FailureCategorization" @@ -11123,15 +11227,6 @@ "null" ] }, - "has_peptide_quantifications": { - "items": { - "$ref": "#/$defs/PeptideQuantification" - }, - "type": [ - "array", - "null" - ] - }, "id": { "description": "A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI", "pattern": "^(nmdc):wfmp-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[0-9]{1,})$", @@ -12301,21 +12396,6 @@ "description": "The url that points to the exact github location of a workflow.", "type": "string" }, - "has_calibration": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - }, - { - "type": "null" - } - ], - "description": "a calibration instance associated with a process", - "type": "string" - }, "has_failure_categorization": { "items": { "$ref": "#/$defs/FailureCategorization" @@ -13326,7 +13406,8 @@ "EMSL", "Battelle", "ANL", - "UCD_Genome_Center" + "UCD_Genome_Center", + "Azenta" ], "title": "ProcessingInstitutionEnum", "type": "string" @@ -13348,7 +13429,7 @@ "description": "This is used to link a metaproteomics analysis workflow to a specific protein", "properties": { "all_proteins": { - "description": "the grouped list of protein identifiers associated with the peptide sequences that were grouped to a best protein", + "description": "the list of protein identifiers that are associated with the peptide sequence", "items": { "type": "string" }, @@ -13358,7 +13439,7 @@ ] }, "best_protein": { - "description": "the specific protein identifier most correctly grouped to its associated peptide sequences", + "description": "the specific protein identifier most correctly associated with the peptide sequence", "type": [ "string", "null" @@ -15703,6 +15784,16 @@ "null" ] }, + "manifest_set": { + "description": "This property links a database object to the set of manifests within it.", + "items": { + "$ref": "#/$defs/Manifest" + }, + "type": [ + "array", + "null" + ] + }, "material_processing_set": { "description": "This property links a database object to the set of material processing within it.", "items": { diff --git a/nmdc_schema/nmdc_materialized_patterns.yaml b/nmdc_schema/nmdc_materialized_patterns.yaml index 1b672cceec..c1f85dd9db 100644 --- a/nmdc_schema/nmdc_materialized_patterns.yaml +++ b/nmdc_schema/nmdc_materialized_patterns.yaml @@ -179,6 +179,9 @@ prefixes: kegg: prefix_prefix: kegg prefix_reference: 'https://bioregistry.io/kegg:' + KEGG.MODULE: + prefix_prefix: KEGG.MODULE + prefix_reference: 'https://bioregistry.io/kegg.module:' linkml: prefix_prefix: linkml prefix_reference: https://w3id.org/linkml/ @@ -621,6 +624,36 @@ types: base: str uri: xsd:string enums: + CalibrationTargetEnum: + name: CalibrationTargetEnum + from_schema: https://w3id.org/nmdc/nmdc + permissible_values: + mass_charge_ratio: + text: mass_charge_ratio + title: m/z + aliases: + - Mass + - m/z + retention_time: + text: retention_time + aliases: + - RT + retention_index: + text: retention_index + aliases: + - RI + CalibrationStandardEnum: + name: CalibrationStandardEnum + from_schema: https://w3id.org/nmdc/nmdc + permissible_values: + fames: + text: fames + aliases: + - FAMES + alkanes: + text: alkanes + aliases: + - Alkanes StrandedOrientationEnum: name: StrandedOrientationEnum description: This enumeration specifies information about stranded RNA library @@ -955,6 +988,28 @@ enums: text: aqueous_layer non_polar_layer: text: non_polar_layer + BinQualityEnum: + name: BinQualityEnum + from_schema: https://w3id.org/nmdc/nmdc + permissible_values: + HQ: + text: HQ + description: Metagenome-assembled genome is high quality based on MIMAG standards + (https://doi.org/10.1038/nbt.3893) + aliases: + - high quality + MQ: + text: MQ + description: Metagenome-assembled genome is medium quality based on MIMAG + standards (https://doi.org/10.1038/nbt.3893) + aliases: + - medium quality + LQ: + text: LQ + description: Metagenome-assembled genome is low quality based on MIMAG standards + (https://doi.org/10.1038/nbt.3893) + aliases: + - low quality ChemicalConversionCategoryEnum: name: ChemicalConversionCategoryEnum from_schema: https://w3id.org/nmdc/nmdc @@ -3189,6 +3244,22 @@ enums: experiment. exact_mappings: - NCIT:C61538 + ManifestCategoryEnum: + name: ManifestCategoryEnum + description: A list of contexts in which some DataObjects can be analyzed together. + from_schema: https://w3id.org/nmdc/nmdc + permissible_values: + instrument_run: + text: instrument_run + description: A collection of data objects from a single run of an instrument. + poolable_replicates: + text: poolable_replicates + description: A collection of data objects that can be pooled for downstream + analyses. + fractions: + text: fractions + description: A collection of data objects that represent fractions of a single + sample. InstrumentModelEnum: name: InstrumentModelEnum from_schema: https://w3id.org/nmdc/nmdc @@ -3604,6 +3675,35 @@ enums: name: FileTypeEnum from_schema: https://w3id.org/nmdc/nmdc permissible_values: + Virus Summary: + text: Virus Summary + description: Tab separated file listing the viruses found by geNomad. + annotations: + file_name_pattern: + tag: file_name_pattern + value: ^_virus_summary\.tsv?$ + see_also: + - https://portal.nersc.gov/genomad/ + Plasmid Summary: + text: Plasmid Summary + description: Tab separated file listing the plasmids found be geNomad. + annotations: + file_name_pattern: + tag: file_name_pattern + value: ^_plasmid_summary\.tsv?$ + see_also: + - https://portal.nersc.gov/genomad/ + GeNomad Aggregated Classification: + text: GeNomad Aggregated Classification + description: Tab separated file which combines the results from neural network-based + classification and marker-based classification for virus and plasmid detection + with geNomad. + annotations: + file_name_pattern: + tag: file_name_pattern + value: ^_aggregated_classification\.tsv?$ + see_also: + - https://portal.nersc.gov/genomad/ Reference Calibration File: text: Reference Calibration File description: A file that contains data used to calibrate a natural organic @@ -4150,6 +4250,10 @@ enums: aliases: - EDI - Environmental Data Initiative + figshare: + text: figshare + meaning: https://ror.org/041mxqs23 + title: Figshare DoiCategoryEnum: name: DoiCategoryEnum comments: @@ -4206,16 +4310,6 @@ enums: text: nom aliases: - natural organic matter - DirectInfusionEnum: - name: DirectInfusionEnum - from_schema: https://w3id.org/nmdc/nmdc - permissible_values: - direct_infusion: - text: direct_infusion - aliases: - - DI - autosampler: - text: autosampler ExtractionTargetEnum: name: ExtractionTargetEnum from_schema: https://w3id.org/nmdc/nmdc @@ -4263,6 +4357,10 @@ enums: text: UCD_Genome_Center meaning: https://genomecenter.ucdavis.edu/ title: University of California, Davis Genome Center + Azenta: + text: Azenta + meaning: https://www.azenta.com/ + title: Azenta Life Sciences DataCategoryEnum: name: DataCategoryEnum from_schema: https://w3id.org/nmdc/nmdc @@ -4387,37 +4485,33 @@ enums: text: natural organic matter bulk chemistry: text: bulk chemistry - CalibrationTargetEnum: - name: CalibrationTargetEnum +slots: + has_calibration: + name: has_calibration + description: a calibration instance associated with a process from_schema: https://w3id.org/nmdc/nmdc - permissible_values: - mass_charge_ratio: - text: mass_charge_ratio - title: m/z - aliases: - - Mass - - m/z - retention_time: - text: retention_time - aliases: - - RT - retention_index: - text: retention_index - aliases: - - RI - CalibrationStandardEnum: - name: CalibrationStandardEnum + range: CalibrationInformation + calibration_object: + name: calibration_object + description: the file containing calibration data object from_schema: https://w3id.org/nmdc/nmdc - permissible_values: - fames: - text: fames - aliases: - - FAMES - alkanes: - text: alkanes - aliases: - - Alkanes -slots: + range: DataObject + internal_calibration: + name: internal_calibration + description: whether internal calibration was used, if false, external calibration + was used + from_schema: https://w3id.org/nmdc/nmdc + range: boolean + calibration_target: + name: calibration_target + description: the target measurement of the calibration + from_schema: https://w3id.org/nmdc/nmdc + range: CalibrationTargetEnum + calibration_standard: + name: calibration_standard + description: the reference standard(s) used for calibration + from_schema: https://w3id.org/nmdc/nmdc + range: CalibrationStandardEnum polarity_mode: name: polarity_mode description: the polarity of which ions are generated and detected @@ -4514,12 +4608,10 @@ slots: description: The identifier for the analysis activity that generated the functional annotation results, where the analysis activity is an instance of the/an appropriate subclass of WorkflowExecution + deprecated: not used. 2024-10 https://github.com/microbiomedata/nmdc-schema/issues/1253 from_schema: https://w3id.org/nmdc/nmdc range: WorkflowExecution required: true - any_of: - - range: MetagenomeAnnotation - - range: MetatranscriptomeAnnotation gene_function_id: name: gene_function_id description: The identifier for the gene function. @@ -4717,6 +4809,14 @@ slots: mixins: - object_set range: Configuration + manifest_set: + name: manifest_set + description: This property links a database object to the set of manifests within + it. + from_schema: https://w3id.org/nmdc/nmdc + mixins: + - object_set + range: Manifest protocol_execution_set: name: protocol_execution_set description: This property links a database object to the set of protocol executions @@ -5204,84 +5304,152 @@ slots: range: QuantityValue total_bases: name: total_bases - todos: - - this slot needs some basic textual annotations and constraints + description: Total number of basepairs. from_schema: https://w3id.org/nmdc/nmdc range: integer + minimum_value: 0 members_id: name: members_id - todos: - - this slot needs some basic textual annotations and constraints + description: Names of the contigs that make up a metagenome-assembled genome. from_schema: https://w3id.org/nmdc/nmdc + close_mappings: + - GENEPIO:0100596 range: string multivalued: true bin_name: name: bin_name + description: Name of the metagenome-assembled genome. from_schema: https://w3id.org/nmdc/nmdc range: string + required: true number_of_contig: name: number_of_contig + description: Number of contigs from_schema: https://w3id.org/nmdc/nmdc + exact_mappings: + - GENEPIO:0000093 range: integer + minimum_value: 0 completeness: name: completeness + description: Estimate of the completeness of the metagenome-assembled genome, + estimated by a tool like CheckM. from_schema: https://w3id.org/nmdc/nmdc range: float + minimum_value: 0 contamination: name: contamination + description: Estimate of the completeness of the metagenome-assembled genome, + estimated by a tool like CheckM. from_schema: https://w3id.org/nmdc/nmdc range: float + minimum_value: 0 gene_count: name: gene_count + description: Number of genes. from_schema: https://w3id.org/nmdc/nmdc range: integer + minimum_value: 0 bin_quality: name: bin_quality + description: The quality of the metagenome-assembled genome based on MIMAG standards + (https://doi.org/10.1038/nbt.3893). from_schema: https://w3id.org/nmdc/nmdc - range: string + range: BinQualityEnum num_16s: name: num_16s + description: Number of 16s sequences detected, a subunit of prokaryotic ribosomes. from_schema: https://w3id.org/nmdc/nmdc + broad_mappings: + - NCIT:C105370 range: integer + minimum_value: 0 num_5s: name: num_5s + description: Number of 5s seqeuences detected, a subunit of ribosomes. from_schema: https://w3id.org/nmdc/nmdc + broad_mappings: + - OMIT:0013248 range: integer + minimum_value: 0 num_23s: name: num_23s + description: Number of 23 seqeuences detected, a subunit of ribosomes. from_schema: https://w3id.org/nmdc/nmdc + broad_mappings: + - OMIT:0013245 range: integer + minimum_value: 0 num_t_rna: name: num_t_rna + description: Number of transfer transfer RNAs. from_schema: https://w3id.org/nmdc/nmdc + broad_mappings: + - NCIT:C816 range: integer + minimum_value: 0 gtdbtk_domain: name: gtdbtk_domain + description: Taxonomic domain assigned by GTDB-Tk. from_schema: https://w3id.org/nmdc/nmdc + see_also: + - https://doi.org/10.1093/bioinformatics/btz848 + exact_mappings: + - NCBITaxon:superkingdom range: string gtdbtk_phylum: name: gtdbtk_phylum + description: Taxonomic phylum assigned by GTDB-Tk. from_schema: https://w3id.org/nmdc/nmdc + see_also: + - https://doi.org/10.1093/bioinformatics/btz848 + exact_mappings: + - NCBITaxon:phylum range: string gtdbtk_class: name: gtdbtk_class + description: Taxonomic class assigned by GTDB-Tk. from_schema: https://w3id.org/nmdc/nmdc + see_also: + - https://doi.org/10.1093/bioinformatics/btz848 + exact_mappings: + - NCBITaxon:class range: string gtdbtk_order: name: gtdbtk_order + description: Taxonomic order assigned by GTDB-Tk. from_schema: https://w3id.org/nmdc/nmdc + see_also: + - https://doi.org/10.1093/bioinformatics/btz848 + exact_mappings: + - NCBITaxon:order range: string gtdbtk_family: name: gtdbtk_family + description: Taxonomic family assigned by GTDB-Tk. from_schema: https://w3id.org/nmdc/nmdc + see_also: + - https://doi.org/10.1093/bioinformatics/btz848 + exact_mappings: + - NCBITaxon:family range: string gtdbtk_genus: name: gtdbtk_genus + description: Taxonomic genus assigned by GTDB-Tk. from_schema: https://w3id.org/nmdc/nmdc + see_also: + - https://doi.org/10.1093/bioinformatics/btz848 + exact_mappings: + - NCBITaxon:genus range: string gtdbtk_species: name: gtdbtk_species + description: Taxonomic genus assigned by GTDB-Tk. from_schema: https://w3id.org/nmdc/nmdc + see_also: + - https://doi.org/10.1093/bioinformatics/btz848 + exact_mappings: + - NCBITaxon:species range: string highest_similarity_score: name: highest_similarity_score @@ -5298,6 +5466,7 @@ slots: name: all_proteins description: the list of protein identifiers that are associated with the peptide sequence + deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 from_schema: https://w3id.org/nmdc/nmdc range: GeneProduct multivalued: true @@ -5305,24 +5474,28 @@ slots: name: best_protein description: the specific protein identifier most correctly associated with the peptide sequence + deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 from_schema: https://w3id.org/nmdc/nmdc range: GeneProduct min_q_value: name: min_q_value description: smallest Q-Value associated with the peptide sequence as provided by MSGFPlus tool + deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 from_schema: https://w3id.org/nmdc/nmdc see_also: - OBI:0001442 range: float peptide_sequence: name: peptide_sequence + deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 from_schema: https://w3id.org/nmdc/nmdc range: string peptide_spectral_count: name: peptide_spectral_count description: sum of filter passing MS2 spectra associated with the peptide sequence within a given LC-MS/MS data file + deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 from_schema: https://w3id.org/nmdc/nmdc range: integer peptide_sum_masic_abundance: @@ -5330,6 +5503,7 @@ slots: description: combined MS1 extracted ion chromatograms derived from MS2 spectra associated with the peptide sequence from a given LC-MS/MS data file using the MASIC tool + deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 from_schema: https://w3id.org/nmdc/nmdc range: integer chemical_formula: @@ -5352,12 +5526,14 @@ slots: peptide_sequence_count: name: peptide_sequence_count description: count of peptide sequences grouped to the best_protein + deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 from_schema: https://w3id.org/nmdc/nmdc range: integer protein_spectral_count: name: protein_spectral_count description: sum of filter passing MS2 spectra associated with the best protein within a given LC-MS/MS data file + deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 from_schema: https://w3id.org/nmdc/nmdc range: integer protein_sum_masic_abundance: @@ -5365,6 +5541,7 @@ slots: description: combined MS1 extracted ion chromatograms derived from MS2 spectra associated with the best protein from a given LC-MS/MS data file using the MASIC tool + deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 from_schema: https://w3id.org/nmdc/nmdc range: integer smiles: @@ -17113,6 +17290,24 @@ slots: from_schema: https://w3id.org/nmdc/nmdc range: Instrument multivalued: true + in_manifest: + name: in_manifest + description: one or more combinations of other DataObjects that can be analyzed + together + comments: + - A DataObject can be part of multiple manifests, for example, a DataObject could + be part of a manifest for a single run of an instrument and a manifest for technical + replicates of a single sample. + from_schema: https://w3id.org/nmdc/nmdc + range: Manifest + multivalued: true + manifest_category: + name: manifest_category + description: The type of context in which the constituent DataObjects can be analyzed + together. + from_schema: https://w3id.org/nmdc/nmdc + range: ManifestCategoryEnum + required: true model: name: model from_schema: https://w3id.org/nmdc/nmdc @@ -17600,12 +17795,6 @@ slots: from_schema: https://w3id.org/nmdc/nmdc range: AnalyteCategoryEnum required: true - direct_infusion_category: - name: direct_infusion_category - description: used when a processed sample is introduced into a mass spectrometer - without chromatographic separation - from_schema: https://w3id.org/nmdc/nmdc - range: DirectInfusionEnum type: name: type description: the class_uri of the class that has been instantiated @@ -18635,6 +18824,7 @@ slots: abstract: true has_peptide_quantifications: name: has_peptide_quantifications + deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 from_schema: https://w3id.org/nmdc/nmdc range: PeptideQuantification multivalued: true @@ -18825,30 +19015,42 @@ slots: abstract: true mags_list: name: mags_list + description: Contains detailed information about each metagenome-assembled genome. from_schema: https://w3id.org/nmdc/nmdc range: MagBin multivalued: true inlined_as_list: true too_short_contig_num: name: too_short_contig_num + description: Number of contigs which were excluded from binning for length. from_schema: https://w3id.org/nmdc/nmdc range: integer + minimum_value: 0 binned_contig_num: name: binned_contig_num + description: Number of contigs that ended up in a medium or high quality bin. from_schema: https://w3id.org/nmdc/nmdc range: integer + minimum_value: 0 input_contig_num: name: input_contig_num + description: Total number of input contigs. from_schema: https://w3id.org/nmdc/nmdc range: integer + minimum_value: 0 unbinned_contig_num: name: unbinned_contig_num + description: Number of contigs which did not end up in a medium or high quality + bin. from_schema: https://w3id.org/nmdc/nmdc range: integer + minimum_value: 0 low_depth_contig_num: name: low_depth_contig_num + description: Number of contigs which were excluded from binning for depth of coverage. from_schema: https://w3id.org/nmdc/nmdc range: integer + minimum_value: 0 input_read_count: name: input_read_count description: The sequence count number of input reads for QC analysis. @@ -18883,40 +19085,6 @@ slots: description: 'TODO ' from_schema: https://w3id.org/nmdc/nmdc range: float - has_calibration: - name: has_calibration - description: a calibration instance associated with a process - notes: - - 'has_calibration slot will be removed from all WorkflowExecution classes but - remain on the MassSpectrometry class after an ingest of the appropriate set - has occurred. Once this has occurred, this slot''s range can be updated to - CalibrationInformation and class/slot definitions can move to nmdc.yaml. See - PR #29 in Berkeley schema.' - from_schema: https://w3id.org/nmdc/nmdc - any_of: - - range: CalibrationInformation - - range: string - calibration_object: - name: calibration_object - description: the file containing calibration data object - from_schema: https://w3id.org/nmdc/nmdc - range: DataObject - internal_calibration: - name: internal_calibration - description: whether internal calibration was used, if false, external calibration - was used - from_schema: https://w3id.org/nmdc/nmdc - range: boolean - calibration_target: - name: calibration_target - description: the target measurement of the calibration - from_schema: https://w3id.org/nmdc/nmdc - range: CalibrationTargetEnum - calibration_standard: - name: calibration_standard - description: the reference standard(s) used for calibration - from_schema: https://w3id.org/nmdc/nmdc - range: CalibrationStandardEnum has_metabolite_identifications: name: has_metabolite_identifications from_schema: https://w3id.org/nmdc/nmdc @@ -19099,21 +19267,96 @@ classes: syntax: '{id_nmdc_prefix}:chrcon-{id_shoulder}-{id_blade}$' interpolated: true class_uri: nmdc:ChromatographyConfiguration + Manifest: + name: Manifest + description: A qualified collection of DataObjects that can be analyzed together + in the same experimental context. + comments: + - Manifest are currently uncoupled from other modelling. For example, there is + no schema requirement that DataObjects in a fractions Manifest were all obtained + by analyzing the same ProcessedSample. + from_schema: https://w3id.org/nmdc/nmdc + is_a: InformationObject + slots: + - manifest_category + slot_usage: + id: + name: id + pattern: ^(nmdc):manif-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$ + structured_pattern: + syntax: '{id_nmdc_prefix}:manif-{id_shoulder}-{id_blade}$' + class_uri: nmdc:Manifest + CalibrationInformation: + name: CalibrationInformation + description: A calibration object that is associated with a process. + from_schema: https://w3id.org/nmdc/nmdc + is_a: InformationObject + slots: + - calibration_object + - internal_calibration + - calibration_target + - calibration_standard + slot_usage: + internal_calibration: + name: internal_calibration + required: true + calibration_target: + name: calibration_target + required: true + id: + name: id + pattern: ^(nmdc):calib-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$ + structured_pattern: + syntax: '{id_nmdc_prefix}:calib-{id_shoulder}-{id_blade}$' + interpolated: true + class_uri: nmdc:CalibrationInformation + rules: + - preconditions: + slot_conditions: + calibration_target: + name: calibration_target + equals_string: retention_index + postconditions: + slot_conditions: + calibration_standard: + name: calibration_standard + required: true + description: If the calibration_target is retention_index, a calibration_standard + is required. + title: calibration_standard_if_rt + - preconditions: + slot_conditions: + internal_calibration: + name: internal_calibration + id_prefixes: + - 'False' + postconditions: + slot_conditions: + calibration_object: + name: calibration_object + required: true + description: If internal_calibration is false, a calibration_object is required. + title: calibration_object_if_not_internal_calibration FunctionalAnnotationAggMember: name: FunctionalAnnotationAggMember from_schema: https://w3id.org/nmdc/nmdc slots: - - metagenome_annotation_id + - was_generated_by - gene_function_id - count - type slot_usage: - metagenome_annotation_id: - name: metagenome_annotation_id - pattern: ^(nmdc):(wfmgan|wfmtan)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\.[0-9]{1,})$ + was_generated_by: + name: was_generated_by + required: true + pattern: ^(nmdc):(wfmgan|wfmp|wfmtan)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\.[0-9]{1,})$ structured_pattern: - syntax: '{id_nmdc_prefix}:(wfmgan|wfmtan)-{id_shoulder}-{id_blade}{id_version}$' + syntax: '{id_nmdc_prefix}:(wfmgan|wfmp|wfmtan)-{id_shoulder}-{id_blade}{id_version}$' interpolated: true + count: + name: count + description: The number of sequences (for a metagenome or metatranscriptome) + or spectra (for metaproteomics) associated with the specified function. class_uri: nmdc:FunctionalAnnotationAggMember Database: name: Database @@ -19138,6 +19381,7 @@ classes: - functional_annotation_set - genome_feature_set - instrument_set + - manifest_set - material_processing_set - processed_sample_set - protocol_execution_set @@ -20886,6 +21130,7 @@ classes: name: PeptideQuantification description: This is used to link a metaproteomics analysis workflow to a specific peptide sequence and related information + deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 from_schema: https://w3id.org/nmdc/nmdc slots: - type @@ -20900,6 +21145,7 @@ classes: name: ProteinQuantification description: This is used to link a metaproteomics analysis workflow to a specific protein + deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 from_schema: https://w3id.org/nmdc/nmdc slots: - all_proteins @@ -20908,15 +21154,6 @@ classes: - protein_spectral_count - protein_sum_masic_abundance - type - slot_usage: - best_protein: - name: best_protein - description: the specific protein identifier most correctly grouped to its - associated peptide sequences - all_proteins: - name: all_proteins - description: the grouped list of protein identifiers associated with the peptide - sequences that were grouped to a best protein class_uri: nmdc:ProteinQuantification ChemicalEntity: name: ChemicalEntity @@ -21575,6 +21812,7 @@ classes: - md5_checksum - url - was_generated_by + - in_manifest slot_usage: name: name: name @@ -22036,7 +22274,6 @@ classes: from_schema: https://w3id.org/nmdc/nmdc is_a: WorkflowExecution slots: - - has_calibration - has_metabolite_identifications slot_usage: id: @@ -22059,8 +22296,6 @@ classes: - workflow subset from_schema: https://w3id.org/nmdc/nmdc is_a: WorkflowExecution - slots: - - has_peptide_quantifications slot_usage: id: name: id @@ -22082,8 +22317,6 @@ classes: - workflow subset from_schema: https://w3id.org/nmdc/nmdc is_a: WorkflowExecution - slots: - - has_calibration slot_usage: id: name: id @@ -22099,57 +22332,6 @@ classes: syntax: '{id_nmdc_prefix}:(omprc|dgms)-{id_shoulder}-{id_blade}$' interpolated: true class_uri: nmdc:NomAnalysis - CalibrationInformation: - name: CalibrationInformation - description: A calibration object that is associated with a process. - from_schema: https://w3id.org/nmdc/nmdc - is_a: InformationObject - slots: - - calibration_object - - internal_calibration - - calibration_target - - calibration_standard - slot_usage: - internal_calibration: - name: internal_calibration - required: true - calibration_target: - name: calibration_target - required: true - id: - name: id - pattern: ^(nmdc):calib-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$ - structured_pattern: - syntax: '{id_nmdc_prefix}:calib-{id_shoulder}-{id_blade}$' - interpolated: true - class_uri: nmdc:CalibrationInformation - rules: - - preconditions: - slot_conditions: - calibration_target: - name: calibration_target - equals_string: retention_index - postconditions: - slot_conditions: - calibration_standard: - name: calibration_standard - required: true - description: If the calibration_target is retention_index, a calibration_standard - is required. - title: calibration_standard_if_rt - - preconditions: - slot_conditions: - internal_calibration: - name: internal_calibration - id_prefixes: - - 'False' - postconditions: - slot_conditions: - calibration_object: - name: calibration_object - required: true - description: If internal_calibration is false, a calibration_object is required. - title: calibration_object_if_not_internal_calibration source_file: src/schema/nmdc.yaml settings: id_nmdc_prefix: