Skip to content

Commit

Permalink
Improvements for pathology report extractions (#452)
Browse files Browse the repository at this point in the history
  • Loading branch information
caufieldjh authored Sep 18, 2024
2 parents 2035bb5 + f53238e commit 8a66db6
Show file tree
Hide file tree
Showing 2 changed files with 147 additions and 16 deletions.
67 changes: 61 additions & 6 deletions src/ontogpt/templates/pathology.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,34 @@ class NullDataOptions(str, Enum):
NOT_MENTIONED = "NOT_MENTIONED"


class SeverityLevel(str, Enum):
"""
The severity of a pathology.
"""
# A pathology that is mild in severity.
mild = "mild"
# A pathology that is moderate in severity.
moderate = "moderate"
# A pathology that is severe in severity.
severe = "severe"
# The severity of the pathology is not specified.
Not_Specified = "Not Specified"


class PathologyClassification(str, Enum):
"""
The final classification of the overall pathology.
"""
# The final classification of the overall pathology is unclear.
Unclear = "Unclear"
# The final classification of the overall pathology is benign.
Benign = "Benign"
# The final classification of the overall pathology is malignant.
Malignant = "Malignant"
# The final classification of the overall pathology is inflammation.
Inflammation = "Inflammation"



class ExtractionResult(ConfiguredBaseModel):
"""
Expand Down Expand Up @@ -203,6 +231,12 @@ class PathologyReport(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/pathology', 'tree_root': True})

pathology_statements: Optional[List[PathologyStatement]] = Field(None, description="""A semicolon-delimited list of pathology statements, each describing a pathology, including any diagnoses, one or more specific qualities being measured and the anatomical location or tissue the pathology is measured in. If any of the pathology statements are negative, the negation should be included in each statment, e.g., \"no granulomas or viropathic changes\" should become \"no granulomas\" and \"no viropathic changes\".""", json_schema_extra = { "linkml_meta": {'alias': 'pathology_statements', 'domain_of': ['PathologyReport']} })
is_benign: Optional[str] = Field(None, description="""Whether the overall pathology appears to be benign and not malignant. Other pathologies may be present, but if tissue is described as benign and/or if a carcinoma is explicitly excluded, this value should be true. A statement of \"no significant pathologic abnormality\" or the short form \"nspa\" would also have a value of true. It it otherwise false.""", json_schema_extra = { "linkml_meta": {'alias': 'is_benign',
'annotations': {'prompt.example': {'tag': 'prompt.example',
'value': 'true, false'}},
'domain_of': ['PathologyReport']} })
risks: Optional[List[str]] = Field(None, description="""A semicolon-delimited list of risks for development of more severe pathologies. If not specified, this value must be \"Not Specified\". Examples: gastric intestinal metaplasia, ulceration, lymphangiectasia""", json_schema_extra = { "linkml_meta": {'alias': 'risks', 'domain_of': ['PathologyReport']} })
overall_classification: Optional[PathologyClassification] = Field(None, description="""The final classification of the overall pathology. This must be one of the following: \"Unclear\", \"Benign\", \"Malignant\", or \"Inflammation\".""", json_schema_extra = { "linkml_meta": {'alias': 'overall_classification', 'domain_of': ['PathologyReport']} })


class PathologyStatement(ConfiguredBaseModel):
Expand All @@ -211,16 +245,16 @@ class PathologyStatement(ConfiguredBaseModel):
"""
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/pathology'})

diagnosis: Optional[str] = Field(None, description="""The diagnosis or pathology being described. This may include full diagnoses or observations, for example, \"colitis\", \"inflammation\", \"dysplasia\", \"polyp\". If not specified or cannot be identified (e.g., due to lack of tissue sample), this value must be \"N/A\".""", json_schema_extra = { "linkml_meta": {'alias': 'diagnosis',
diagnosis: Optional[str] = Field(None, description="""The diagnosis or pathology being described. This may include full diagnoses or observations, for example, \"colitis\", \"inflammation\", \"dysplasia\", \"polyp\". If not specified, this value must be \"Not Specified\". If a diagnosis cannot be reached (e.g., due to lack of tissue sample), this value must be \"No Diagnosis\". Do not include qualifiers in this field, e.g., \"active colitis\" should be \"colitis\".""", json_schema_extra = { "linkml_meta": {'alias': 'diagnosis',
'annotations': {'prompt.example': {'tag': 'prompt.example',
'value': 'colitis, inflammation, '
'dysplasia'}},
'domain_of': ['PathologyStatement']} })
qualifiers: Optional[List[str]] = Field(None, description="""A semicolon-delimited list of descriptors other than those for severity. If not specified, this value must be \"N/A\".""", json_schema_extra = { "linkml_meta": {'alias': 'qualifiers',
qualifiers: Optional[List[str]] = Field(None, description="""A semicolon-delimited list of descriptors other than those for severity. If not specified, this value must be \"Not Specified\".""", json_schema_extra = { "linkml_meta": {'alias': 'qualifiers',
'annotations': {'prompt.example': {'tag': 'prompt.example',
'value': 'active, chronic, focal'}},
'domain_of': ['PathologyStatement']} })
severity: Optional[str] = Field(None, description="""The severity of the pathology, for example, mild, moderate, or severe. If not specified, this value must be \"N/A\".""", json_schema_extra = { "linkml_meta": {'alias': 'severity',
severity: Optional[SeverityLevel] = Field(None, description="""The severity of the pathology, for example, mild, moderate, or severe. If not specified, this value must be \"N/A\".""", json_schema_extra = { "linkml_meta": {'alias': 'severity',
'annotations': {'prompt.example': {'tag': 'prompt.example',
'value': 'mild, moderate, severe'}},
'domain_of': ['PathologyStatement']} })
Expand All @@ -229,17 +263,19 @@ class PathologyStatement(ConfiguredBaseModel):
'value': 'duodenum, colonic mucosa, '
'liver'}},
'domain_of': ['PathologyStatement']} })
negative: Optional[str] = Field(None, description="""Whether the pathology is negative or not present. A statement of \"no significant pathologic abnormality\" or the short form \"nspa\" would have a value of true.""", json_schema_extra = { "linkml_meta": {'alias': 'negative',
negative: Optional[str] = Field(None, description="""Whether the pathology is negative or not present. This must be explicitly stated in the input, e.g., \"no granulomas\", in order to be true. It is otherwise false.""", json_schema_extra = { "linkml_meta": {'alias': 'negative',
'annotations': {'prompt.example': {'tag': 'prompt.example',
'value': 'true, false'}},
'domain_of': ['PathologyStatement']} })


class Diagnosis(NamedEntity):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'annotations': {'annotators': {'tag': 'annotators',
'value': 'bioportal:SNOMEDCT'}},
'value': 'bioportal:SNOMEDCT, '
'bioportal:ICD10CM, sqlite:obo:ncit, '
'sqlite:obo:mesh, sqlite:obo:mondo'}},
'from_schema': 'http://w3id.org/ontogpt/pathology',
'id_prefixes': ['SNOMEDCT']})
'id_prefixes': ['SNOMEDCT', 'ICD10CM']})

id: str = Field(..., description="""A unique identifier for the named entity""", json_schema_extra = { "linkml_meta": {'alias': 'id',
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}},
Expand Down Expand Up @@ -271,6 +307,24 @@ class AnatomicalEntity(NamedEntity):
'slot_uri': 'rdfs:label'} })


class Qualifier(NamedEntity):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'annotations': {'annotators': {'tag': 'annotators',
'value': 'sqlite:obo:pato'}},
'from_schema': 'http://w3id.org/ontogpt/pathology',
'id_prefixes': ['PATO']})

id: str = Field(..., description="""A unique identifier for the named entity""", json_schema_extra = { "linkml_meta": {'alias': 'id',
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}},
'comments': ['this is populated during the grounding and normalization step'],
'domain_of': ['NamedEntity', 'Publication']} })
label: Optional[str] = Field(None, description="""The label (name) of the named thing""", json_schema_extra = { "linkml_meta": {'alias': 'label',
'aliases': ['name'],
'annotations': {'owl': {'tag': 'owl',
'value': 'AnnotationProperty, AnnotationAssertion'}},
'domain_of': ['NamedEntity'],
'slot_uri': 'rdfs:label'} })


# Model rebuild
# see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model
ExtractionResult.model_rebuild()
Expand All @@ -286,4 +340,5 @@ class AnatomicalEntity(NamedEntity):
PathologyStatement.model_rebuild()
Diagnosis.model_rebuild()
AnatomicalEntity.model_rebuild()
Qualifier.model_rebuild()

96 changes: 86 additions & 10 deletions src/ontogpt/templates/pathology.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ imports:
# This may require instructions to reprocess the text to be more like SNOMED, e.g.,
# "hepatitis" doesn't have a specific term but maybe inflammation is appropriate

# TODO: tune the risk factor list, it seems to identify negative instances as present
# e.g. "gastric antral-type mucosa with reactive gastropathy; no helicobacter organisms, intestinal metaplasia or dysplasia; see comment."
# identifies "gastric intestinal metaplasia" as a risk factor

classes:

PathologyReport:
Expand All @@ -48,6 +52,30 @@ classes:
changes".
range: PathologyStatement
multivalued: true
is_benign:
description: >-
Whether the overall pathology appears to be benign and not malignant.
Other pathologies may be present, but if tissue is described as
benign and/or if a carcinoma is explicitly excluded, this value should
be true. A statement of "no significant pathologic abnormality"
or the short form "nspa" would also have a value of true.
It it otherwise false.
range: string
annotations:
prompt.example: true, false
risks:
description: >-
A semicolon-delimited list of risks for development of
more severe pathologies. If not specified, this value must be
"Not Specified". Examples: gastric intestinal metaplasia,
ulceration, lymphangiectasia
range: string
multivalued: true
overall_classification:
description: >-
The final classification of the overall pathology. This must be one of
the following: "Unclear", "Benign", "Malignant", or "Inflammation".
range: PathologyClassification

PathologyStatement:
description: >-
Expand All @@ -59,25 +87,27 @@ classes:
description: >-
The diagnosis or pathology being described. This may include
full diagnoses or observations, for example, "colitis",
"inflammation", "dysplasia", "polyp". If not specified or cannot be
identified (e.g., due to lack of tissue sample), this value must be
"N/A".
"inflammation", "dysplasia", "polyp". If not specified, this value
must be "Not Specified". If a diagnosis cannot be reached
(e.g., due to lack of tissue sample), this value must be
"No Diagnosis". Do not include qualifiers in this field,
e.g., "active colitis" should be "colitis".
range: Diagnosis
annotations:
prompt.example: colitis, inflammation, dysplasia
qualifiers:
description: >-
A semicolon-delimited list of descriptors other than those for
severity. If not specified, this value must be "N/A".
range: string
severity. If not specified, this value must be "Not Specified".
range: Qualifier
annotations:
prompt.example: active, chronic, focal
multivalued: true
severity:
description: >-
The severity of the pathology, for example, mild, moderate, or severe.
If not specified, this value must be "N/A".
range: string
range: SeverityLevel
annotations:
prompt.example: mild, moderate, severe
anatomical_entities:
Expand All @@ -95,9 +125,9 @@ classes:
multivalued: true
negative:
description: >-
Whether the pathology is negative or not present. A statement of
"no significant pathologic abnormality" or the short form "nspa"
would have a value of true.
Whether the pathology is negative or not present. This must be
explicitly stated in the input, e.g., "no granulomas", in order
to be true. It is otherwise false.
range: string
annotations:
prompt.example: true, false
Expand All @@ -106,12 +136,58 @@ classes:
is_a: NamedEntity
id_prefixes:
- SNOMEDCT
- ICD10CM
annotations:
annotators: bioportal:SNOMEDCT
annotators: bioportal:SNOMEDCT, bioportal:ICD10CM, sqlite:obo:ncit, sqlite:obo:mesh, sqlite:obo:mondo

AnatomicalEntity:
is_a: NamedEntity
id_prefixes:
- UBERON
annotations:
annotators: sqlite:obo:uberon

Qualifier:
is_a: NamedEntity
id_prefixes:
- PATO
annotations:
annotators: sqlite:obo:pato

enums:
SeverityLevel:
description: >-
The severity of a pathology.
permissible_values:
mild:
description: >-
A pathology that is mild in severity.
meaning: PATO:0000394
moderate:
description: >-
A pathology that is moderate in severity.
meaning: PATO:0000395
severe:
description: >-
A pathology that is severe in severity.
meaning: PATO:0000396
Not Specified:
description: >-
The severity of the pathology is not specified.
PathologyClassification:
description: >-
The final classification of the overall pathology.
permissible_values:
Unclear:
description: >-
The final classification of the overall pathology is unclear.
Benign:
description: >-
The final classification of the overall pathology is benign.
Malignant:
description: >-
The final classification of the overall pathology is malignant.
Inflammation:
description: >-
The final classification of the overall pathology is inflammation.

0 comments on commit 8a66db6

Please sign in to comment.