Skip to content

Commit

Permalink
Updates for dietitian notes extraction (#313)
Browse files Browse the repository at this point in the history
Including changing the spelling of "dietitian" from "dietician"
  • Loading branch information
caufieldjh authored Jan 26, 2024
2 parents 449b081 + fc1d89b commit 450c11a
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 31 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations
from datetime import datetime, date
from enum import Enum

from typing import List, Dict, Optional, Any, Union
from pydantic import BaseModel as BaseModel, ConfigDict, Field, field_validator
import re
Expand All @@ -21,6 +22,7 @@ class ConfiguredBaseModel(BaseModel):
extra = 'forbid',
arbitrary_types_allowed=True,
use_enum_values = True)
pass


class NullDataOptions(str, Enum):
Expand All @@ -40,15 +42,16 @@ class ClinicalObservationSet(ConfiguredBaseModel):
"""
observations: Optional[List[str]] = Field(default_factory=list)



class MalnutritionObservations(ConfiguredBaseModel):

malnutrition_presence: Optional[str] = Field(None, description="""True if the patient is malnourished, False otherwise.""")
severity: Optional[str] = Field(None, description="""The severity of the patient's malnutrition, if present.""")
diagnosis: Optional[str] = Field(None, description="""The patient's malnutrition diagnosis, if present.""")
malnutrition_risk: Optional[str] = Field(None, description="""True if the patient has a demonstrable risk for malnutrition, False otherwise.""")
severity: Optional[str] = Field(None, description="""The severity of the patient's malnutrition, if present. This may be Mild, Moderate, or Severe. In general, a patient receiving less than 50% of their estimated energy requirement for greater than 5 days is considered to have severe malnutrition.""")
diagnosis: Optional[str] = Field(None, description="""The patient's malnutrition diagnosis, if present. This should not include modifiers like 'severe'.""")




class ExtractionResult(ConfiguredBaseModel):
"""
Expand All @@ -62,14 +65,14 @@ class ExtractionResult(ConfiguredBaseModel):
extracted_object: Optional[Any] = Field(None, description="""The complex objects extracted from the text""")
named_entities: Optional[List[Any]] = Field(default_factory=list, description="""Named entities extracted from the text""")



class NamedEntity(ConfiguredBaseModel):

id: str = Field(..., description="""A unique identifier for the named entity""")
label: Optional[str] = Field(None, description="""The label (name) of the named thing""")



class ClinicalObservations(NamedEntity):
"""
Expand All @@ -84,7 +87,7 @@ class ClinicalObservations(NamedEntity):
id: str = Field(..., description="""A unique identifier for the named entity""")
label: Optional[str] = Field(None, description="""The label (name) of the named thing""")



class DietSupplementation(NamedEntity):
"""
Expand All @@ -93,7 +96,7 @@ class DietSupplementation(NamedEntity):
id: str = Field(..., description="""A unique identifier for the named entity""")
label: Optional[str] = Field(None, description="""The label (name) of the named thing""")



class NutritionSupport(NamedEntity):
"""
Expand All @@ -102,34 +105,34 @@ class NutritionSupport(NamedEntity):
id: str = Field(..., description="""A unique identifier for the named entity""")
label: Optional[str] = Field(None, description="""The label (name) of the named thing""")



class Disease(NamedEntity):

id: str = Field(..., description="""A unique identifier for the named entity""")
label: Optional[str] = Field(None, description="""The label (name) of the named thing""")



class Unit(NamedEntity):

id: str = Field(..., description="""A unique identifier for the named entity""")
label: Optional[str] = Field(None, description="""The label (name) of the named thing""")



class CompoundExpression(ConfiguredBaseModel):

None



class QuantitativeValue(CompoundExpression):

value: Optional[float] = Field(None, description="""The value of the quantity.""")
value: Optional[str] = Field(None, description="""The value of the quantity, or N/A if not provided.""")
unit: Optional[str] = Field(None, description="""The unit of the quantity.""")



class Triple(CompoundExpression):
"""
Expand All @@ -142,7 +145,7 @@ class Triple(CompoundExpression):
subject_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the subject of the statement, e.g. \"high dose\" or \"intravenously administered\"""")
object_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the object of the statement, e.g. \"severe\" or \"with additional complications\"""")



class TextWithTriples(ConfiguredBaseModel):
"""
Expand All @@ -151,7 +154,7 @@ class TextWithTriples(ConfiguredBaseModel):
publication: Optional[Publication] = Field(None)
triples: Optional[List[Triple]] = Field(default_factory=list)



class TextWithEntity(ConfiguredBaseModel):
"""
Expand All @@ -160,14 +163,14 @@ class TextWithEntity(ConfiguredBaseModel):
publication: Optional[Publication] = Field(None)
entities: Optional[List[str]] = Field(default_factory=list)



class RelationshipType(NamedEntity):

id: str = Field(..., description="""A unique identifier for the named entity""")
label: Optional[str] = Field(None, description="""The label (name) of the named thing""")



class Publication(ConfiguredBaseModel):

Expand All @@ -177,15 +180,15 @@ class Publication(ConfiguredBaseModel):
combined_text: Optional[str] = Field(None)
full_text: Optional[str] = Field(None, description="""The full text of the publication""")



class AnnotatorResult(ConfiguredBaseModel):

subject_text: Optional[str] = Field(None)
object_id: Optional[str] = Field(None)
object_text: Optional[str] = Field(None)




# Model rebuild
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
id: http://w3id.org/ontogpt/dietician_notes
name: dietician_notes
title: Dietician Notes Extraction Template
name: dietitian_notes
title: Dietitian Notes Extraction Template
description: >-
A template for extracting clinical observations from dietician notes
A template for extracting clinical observations from dietitian notes
license: https://creativecommons.org/publicdomain/zero/1.0/
prefixes:
linkml: https://w3id.org/linkml/
dietician_notes: http://w3id.org/ontogpt/dietician_notes
dietician_notes: http://w3id.org/ontogpt/dietitian_notes
HP: http://purl.obolibrary.org/obo/HP_

default_prefix: dietician_notes
default_prefix: dietitian_notes
default_range: string

# TODO: address extracted chunks that are too large
# TODO: find additional grounding strategies
# TODO: provide additional examples for each class

imports:
- linkml:types
- core
Expand All @@ -25,6 +29,8 @@ classes:
range: ClinicalObservations
multivalued: true

# TODO: want to capture risk for refeeding syndrome
# can ground to MONDO:0400005
ClinicalObservations:
tree_root: true
description: >-
Expand All @@ -40,11 +46,13 @@ classes:
annotations:
prompt: >-
The value and units of the patient's height.
'Not provided' if not provided.
range: QuantitativeValue
patient_weight:
annotations:
prompt: >-
The value and units of the patient's weight.
'Not provided' if not provided.
range: QuantitativeValue
malnutrition_status:
annotations:
Expand All @@ -64,16 +72,17 @@ classes:
range: NutritionSupport
multivalued: true

# TODO: Resolve validation errors if the values can't be parsed as floats
# This may happen if no value is provided.
QuantitativeValue:
is_a: CompoundExpression
annotations:
owl: IntersectionOf
attributes:
value:
description: The value of the quantity.
range: float
description: >-
The value of the quantity,
or N/A if not provided.
# would prefer a float, but that's invalid if a value isn't provided
range: string
annotations:
owl: DataProperty, DataHasValue
unit:
Expand All @@ -83,18 +92,32 @@ classes:
annotations:
owl: ObjectProperty, ObjectSomeValuesFrom

# TODO: make distinction between presence vs risk for malnutrition
# TODO: note that there are specific criteria for malnutrition diagnosis
MalnutritionObservations:
attributes:
malnutrition_presence:
description: True if the patient is malnourished, False otherwise.
range: string
malnutrition_risk:
description: >-
True if the patient has a demonstrable risk for malnutrition,
False otherwise.
range: string
severity:
description: >-
The severity of the patient's malnutrition, if present.
This may be Mild, Moderate, or Severe.
In general, a patient receiving less than 50% of their
estimated energy requirement for greater than 5 days
is considered to have severe malnutrition.
range: string
diagnosis:
# For MONDO this may often be MONDO:0001371,
# for protein-energy malnutrition
description: >-
The patient's malnutrition diagnosis, if present.
This should not include modifiers like 'severe'.
range: Disease

# TODO: distinguish whether this is currently active therapy
Expand All @@ -115,14 +138,20 @@ classes:
# TODO: consider limiting to children of MAXO:0000009
# or a more specific group of terms
# TODO: consider adding dosages + frequency
# TODO: may need more examples
# TODO: consider grounding vs CHEBI? Maybe to MESH, but be careful of
# synonyms - they may only be technically correct
# TODO: consider LOINC grounding for nutrition
NutritionSupport:
description: >-
A nutrition support therapy used to treat or prevent malnutrition.
is_a: NamedEntity
id_prefixes:
- MAXO
- EFO
annotations:
annotators: sqlite:obo:maxo
annotators: sqlite:obo:maxo, sqlite:obo:efo, sqlite:obo:mesh
prompt.examples: >-
enteral nutrition intake, gavage nutrition intake,
parenteral nutrition intake, partial parenteral nutrition intake
Expand All @@ -131,8 +160,9 @@ classes:
is_a: NamedEntity
id_prefixes:
- MONDO
- EFO
annotations:
annotators: sqlite:obo:mondo
annotators: sqlite:obo:mondo, sqlite:obo:mesh, sqlite:obo:efo
prompt.examples: cardiac asystole, COVID-19, Headache, cancer

Unit:
Expand Down

0 comments on commit 450c11a

Please sign in to comment.