Skip to content

Commit

Permalink
Add sex, age_band, imd, ethnicity and region to dataset definition, w…
Browse files Browse the repository at this point in the history
…ith functions.
  • Loading branch information
viv3ckj committed Jan 22, 2025
1 parent b0563db commit c7ffe62
Show file tree
Hide file tree
Showing 3 changed files with 181 additions and 37 deletions.
190 changes: 162 additions & 28 deletions analysis/pf_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,33 @@ def get_acute_otitis_media_denominator(

return inclusion_criteria & ~exclusion_criteria

def get_age_band(patients, index_date):
age = patients.age_on(index_date)
age_band = case(
when((age >= 0) & (age < 20)).then("0-19"),
when((age >= 20) & (age < 40)).then("20-39"),
when((age >= 40) & (age < 60)).then("40-59"),
when((age >= 60) & (age < 80)).then("60-79"),
when(age >= 80).then("80+"),
when(age.is_null()).then("Missing"),
)
return age_band

def get_imd(addresses, index_date):
imd_rounded = addresses.for_patient_on(index_date).imd_rounded
max_imd = 32844
imd_quintile = case(
when((imd_rounded >= 0) & (imd_rounded < int(max_imd * 1 / 5))).then("1 (Most Deprived)"),
when(imd_rounded < int(max_imd * 2 / 5)).then("2"),
when(imd_rounded < int(max_imd * 3 / 5)).then("3"),
when(imd_rounded < int(max_imd * 4 / 5)).then("4"),
when(imd_rounded <= max_imd).then("5 (Least Deprived)"),
otherwise="Missing",
)
return imd_quintile

def get_latest_ethnicity(
index_date, clinical_events, ethnicity_codelist, ethnicity_from_sus
index_date, clinical_events, ethnicity_codelist, ethnicity_from_sus, grouping=6
):
latest_ethnicity_from_codes_category_num = (
clinical_events.where(clinical_events.snomedct_code.is_in(ethnicity_codelist))
Expand All @@ -137,33 +161,143 @@ def get_latest_ethnicity(
.snomedct_code.to_category(ethnicity_codelist)
)

latest_ethnicity_from_codes = case(
when(latest_ethnicity_from_codes_category_num == "1").then("White"),
when(latest_ethnicity_from_codes_category_num == "2").then("Mixed"),
when(latest_ethnicity_from_codes_category_num == "3").then(
"Asian or Asian British"
),
when(latest_ethnicity_from_codes_category_num == "4").then(
"Black or Black British"
),
when(latest_ethnicity_from_codes_category_num == "5").then(
"Chinese or Other Ethnic Groups"
),
)

ethnicity_from_sus = case(
when(ethnicity_from_sus.code.is_in(["A", "B", "C"])).then("White"),
when(ethnicity_from_sus.code.is_in(["D", "E", "F", "G"])).then("Mixed"),
when(ethnicity_from_sus.code.is_in(["H", "J", "K", "L"])).then(
"Asian or Asian British"
),
when(ethnicity_from_sus.code.is_in(["M", "N", "P"])).then(
"Black or Black British"
),
when(ethnicity_from_sus.code.is_in(["R", "S"])).then(
"Chinese or Other Ethnic Groups"
),
)
if grouping == 6:
latest_ethnicity_from_codes = case(
when(latest_ethnicity_from_codes_category_num == "1").then("White"),
when(latest_ethnicity_from_codes_category_num == "2").then("Mixed"),
when(latest_ethnicity_from_codes_category_num == "3").then(
"Asian or Asian British"
),
when(latest_ethnicity_from_codes_category_num == "4").then(
"Black or Black British"
),
when(latest_ethnicity_from_codes_category_num == "5").then(
"Chinese or Other Ethnic Groups"
),
)

ethnicity_from_sus = case(
when(ethnicity_from_sus.code.is_in(["A", "B", "C"])).then("White"),
when(ethnicity_from_sus.code.is_in(["D", "E", "F", "G"])).then("Mixed"),
when(ethnicity_from_sus.code.is_in(["H", "J", "K", "L"])).then(
"Asian or Asian British"
),
when(ethnicity_from_sus.code.is_in(["M", "N", "P"])).then(
"Black or Black British"
),
when(ethnicity_from_sus.code.is_in(["R", "S"])).then(
"Chinese or Other Ethnic Groups"
),
)
elif grouping == 16:
latest_ethnicity_from_codes = case(
when(latest_ethnicity_from_codes_category_num == "1").then(
"White British"
),
when(latest_ethnicity_from_codes_category_num == "2").then(
"White Irish"
),
when(latest_ethnicity_from_codes_category_num == "3").then(
"Other White"
),
when(latest_ethnicity_from_codes_category_num == "4").then(
"White and Caribbean"
),
when(latest_ethnicity_from_codes_category_num == "5").then(
"White and African"
),
when(latest_ethnicity_from_codes_category_num == "6").then(
"White and Asian"
),
when(latest_ethnicity_from_codes_category_num == "7").then(
"Other Mixed"
),
when(latest_ethnicity_from_codes_category_num == "8").then(
"Indian"
),
when(latest_ethnicity_from_codes_category_num == "9").then(
"Pakistani"
),
when(latest_ethnicity_from_codes_category_num == "10").then(
"Bangladeshi"
),
when(latest_ethnicity_from_codes_category_num == "11").then(
"Other South Asian"
),
when(latest_ethnicity_from_codes_category_num == "12").then(
"Caribbean"
),
when(latest_ethnicity_from_codes_category_num == "13").then(
"African"
),
when(latest_ethnicity_from_codes_category_num == "14").then(
"Other Black"
),
when(latest_ethnicity_from_codes_category_num == "15").then(
"Chinese"
),
when(latest_ethnicity_from_codes_category_num == "16").then(
"All other ethnic groups"
),
when(latest_ethnicity_from_codes_category_num == "17").then(
"Not stated"
),

)

ethnicity_from_sus = case(
when(ethnicity_from_sus.code == "A").then(
"White British"
),
when(ethnicity_from_sus.code == "B").then(
"White Irish"
),
when(ethnicity_from_sus.code == "C").then(
"Other White"
),
when(ethnicity_from_sus.code == "D").then(
"White and Caribbean"
),
when(ethnicity_from_sus.code == "E").then(
"White and African"
),
when(ethnicity_from_sus.code == "F").then(
"White and Asian"
),
when(ethnicity_from_sus.code == "G").then(
"Other Mixed"
),
when(ethnicity_from_sus.code == "H").then(
"Indian"
),
when(ethnicity_from_sus.code == "J").then(
"Pakistani"
),
when(ethnicity_from_sus.code == "K").then(
"Bangladeshi"
),
when(ethnicity_from_sus.code == "L").then(
"Other South Asian"
),
when(ethnicity_from_sus.code == "M").then(
"Caribbean"
),
when(ethnicity_from_sus.code == "N").then(
"African"
),
when(ethnicity_from_sus.code == "P").then(
"Other Black"
),
when(ethnicity_from_sus.code == "R").then(
"Chinese"
),
when(ethnicity_from_sus.code == "S").then(
"All other ethnic groups"
),
when(ethnicity_from_sus.code == "Z").then(
"Not stated"
),
)

ethnicity_combined = case(
when(latest_ethnicity_from_codes.is_not_null()).then(
Expand Down
25 changes: 16 additions & 9 deletions analysis/pf_dataset_definition.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from ehrql import create_dataset
from ehrql import create_dataset, case, when
from ehrql.tables.tpp import (
patients,
clinical_events,
practice_registrations,
addresses,
ethnicity_from_sus,
)
from pf_variables_library import check_pregnancy_status
from pf_dataset import (
Expand All @@ -13,6 +15,9 @@
get_acute_sore_throat_denominator,
get_acute_sinusitis_denominator,
get_acute_otitis_media_denominator,
get_latest_ethnicity,
get_age_band,
get_imd
)

import codelists
Expand All @@ -22,35 +27,37 @@

registration = practice_registrations.for_patient_on(index_date)

# Create new columns for each denominator rule
# Columns for demographics table
dataset.sex = patients.sex
dataset.age_band = get_age_band(patients, index_date)
dataset.region = registration.practice_nuts1_region_name
dataset.imd = get_imd(addresses, index_date)
dataset.ethnicity = get_latest_ethnicity(
index_date, clinical_events, codelists.ethnicity_group16_codelist, ethnicity_from_sus, grouping=16
)

# Create new columns for each denominator rule for clinical conditions table
dataset.uti_denominator = get_uncomplicated_uti_denominator(
index_date, patients, clinical_events, codelists.pregnancy_codelist
)

dataset.shingles_denominator = get_shingles_denominator(
index_date, patients, clinical_events, codelists.pregnancy_codelist
)

dataset.impetigo_denominator = get_impetigo_denominator(
index_date, patients, clinical_events, codelists.pregnancy_codelist
)

dataset.insectbite_denominator = get_infected_insect_bites_denominator(
index_date, patients, clinical_events, codelists.pregnancy_codelist
)

dataset.sorethroat_denominator = get_acute_sore_throat_denominator(
index_date, patients, clinical_events, codelists.pregnancy_codelist
)

dataset.sinusitis_denominator = get_acute_sinusitis_denominator(
index_date, patients, clinical_events, codelists.pregnancy_codelist
)

dataset.otitismedia_denominator = get_acute_otitis_media_denominator(
index_date, patients, clinical_events, codelists.pregnancy_codelist
)

dataset.pregnancy_status = check_pregnancy_status(
index_date, clinical_events, codelists.pregnancy_codelist
)
Expand Down
3 changes: 3 additions & 0 deletions analysis/test_pf_dataset_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"clinical_events": [
{},
],
"addresses": {},
"ethnicity_from_sus": {},
"practice_registrations": [{"start_date": date(2024, 3, 1)}],
"expected_in_population": True,
Expand All @@ -37,6 +38,7 @@
"snomedct_code": "77386006",
},
],
"addresses": {},
"ethnicity_from_sus": {},
"practice_registrations": [{"start_date": date(2024, 3, 1)}],
"expected_in_population": True,
Expand Down Expand Up @@ -72,6 +74,7 @@
"snomedct_code": "1090711000000102",
},
],
"addresses": {},
"ethnicity_from_sus": {},
"practice_registrations": [{"start_date": date(2024, 3, 1)}],
"expected_in_population": True,
Expand Down

0 comments on commit c7ffe62

Please sign in to comment.