diff --git a/analysis/pf_dataset.py b/analysis/pf_dataset.py index 85288e4..e3aa020 100644 --- a/analysis/pf_dataset.py +++ b/analysis/pf_dataset.py @@ -125,9 +125,33 @@ def get_acute_otitis_media_denominator( return inclusion_criteria & ~exclusion_criteria +def get_age_band(patients, index_date): + age = patients.age_on(index_date) + age_band = case( + when((age >= 0) & (age < 20)).then("0-19"), + when((age >= 20) & (age < 40)).then("20-39"), + when((age >= 40) & (age < 60)).then("40-59"), + when((age >= 60) & (age < 80)).then("60-79"), + when(age >= 80).then("80+"), + when(age.is_null()).then("Missing"), +) + return age_band + +def get_imd(addresses, index_date): + imd_rounded = addresses.for_patient_on(index_date).imd_rounded + max_imd = 32844 + imd_quintile = case( + when((imd_rounded >= 0) & (imd_rounded < int(max_imd * 1 / 5))).then("1 (Most Deprived)"), + when(imd_rounded < int(max_imd * 2 / 5)).then("2"), + when(imd_rounded < int(max_imd * 3 / 5)).then("3"), + when(imd_rounded < int(max_imd * 4 / 5)).then("4"), + when(imd_rounded <= max_imd).then("5 (Least Deprived)"), + otherwise="Missing", + ) + return imd_quintile def get_latest_ethnicity( - index_date, clinical_events, ethnicity_codelist, ethnicity_from_sus + index_date, clinical_events, ethnicity_codelist, ethnicity_from_sus, grouping=6 ): latest_ethnicity_from_codes_category_num = ( clinical_events.where(clinical_events.snomedct_code.is_in(ethnicity_codelist)) @@ -137,33 +161,143 @@ def get_latest_ethnicity( .snomedct_code.to_category(ethnicity_codelist) ) - latest_ethnicity_from_codes = case( - when(latest_ethnicity_from_codes_category_num == "1").then("White"), - when(latest_ethnicity_from_codes_category_num == "2").then("Mixed"), - when(latest_ethnicity_from_codes_category_num == "3").then( - "Asian or Asian British" - ), - when(latest_ethnicity_from_codes_category_num == "4").then( - "Black or Black British" - ), - when(latest_ethnicity_from_codes_category_num == "5").then( - "Chinese or Other Ethnic Groups" - ), - ) - - ethnicity_from_sus = case( - when(ethnicity_from_sus.code.is_in(["A", "B", "C"])).then("White"), - when(ethnicity_from_sus.code.is_in(["D", "E", "F", "G"])).then("Mixed"), - when(ethnicity_from_sus.code.is_in(["H", "J", "K", "L"])).then( - "Asian or Asian British" - ), - when(ethnicity_from_sus.code.is_in(["M", "N", "P"])).then( - "Black or Black British" - ), - when(ethnicity_from_sus.code.is_in(["R", "S"])).then( - "Chinese or Other Ethnic Groups" - ), - ) + if grouping == 6: + latest_ethnicity_from_codes = case( + when(latest_ethnicity_from_codes_category_num == "1").then("White"), + when(latest_ethnicity_from_codes_category_num == "2").then("Mixed"), + when(latest_ethnicity_from_codes_category_num == "3").then( + "Asian or Asian British" + ), + when(latest_ethnicity_from_codes_category_num == "4").then( + "Black or Black British" + ), + when(latest_ethnicity_from_codes_category_num == "5").then( + "Chinese or Other Ethnic Groups" + ), + ) + + ethnicity_from_sus = case( + when(ethnicity_from_sus.code.is_in(["A", "B", "C"])).then("White"), + when(ethnicity_from_sus.code.is_in(["D", "E", "F", "G"])).then("Mixed"), + when(ethnicity_from_sus.code.is_in(["H", "J", "K", "L"])).then( + "Asian or Asian British" + ), + when(ethnicity_from_sus.code.is_in(["M", "N", "P"])).then( + "Black or Black British" + ), + when(ethnicity_from_sus.code.is_in(["R", "S"])).then( + "Chinese or Other Ethnic Groups" + ), + ) + elif grouping == 16: + latest_ethnicity_from_codes = case( + when(latest_ethnicity_from_codes_category_num == "1").then( + "White British" + ), + when(latest_ethnicity_from_codes_category_num == "2").then( + "White Irish" + ), + when(latest_ethnicity_from_codes_category_num == "3").then( + "Other White" + ), + when(latest_ethnicity_from_codes_category_num == "4").then( + "White and Caribbean" + ), + when(latest_ethnicity_from_codes_category_num == "5").then( + "White and African" + ), + when(latest_ethnicity_from_codes_category_num == "6").then( + "White and Asian" + ), + when(latest_ethnicity_from_codes_category_num == "7").then( + "Other Mixed" + ), + when(latest_ethnicity_from_codes_category_num == "8").then( + "Indian" + ), + when(latest_ethnicity_from_codes_category_num == "9").then( + "Pakistani" + ), + when(latest_ethnicity_from_codes_category_num == "10").then( + "Bangladeshi" + ), + when(latest_ethnicity_from_codes_category_num == "11").then( + "Other South Asian" + ), + when(latest_ethnicity_from_codes_category_num == "12").then( + "Caribbean" + ), + when(latest_ethnicity_from_codes_category_num == "13").then( + "African" + ), + when(latest_ethnicity_from_codes_category_num == "14").then( + "Other Black" + ), + when(latest_ethnicity_from_codes_category_num == "15").then( + "Chinese" + ), + when(latest_ethnicity_from_codes_category_num == "16").then( + "All other ethnic groups" + ), + when(latest_ethnicity_from_codes_category_num == "17").then( + "Not stated" + ), + + ) + + ethnicity_from_sus = case( + when(ethnicity_from_sus.code == "A").then( + "White British" + ), + when(ethnicity_from_sus.code == "B").then( + "White Irish" + ), + when(ethnicity_from_sus.code == "C").then( + "Other White" + ), + when(ethnicity_from_sus.code == "D").then( + "White and Caribbean" + ), + when(ethnicity_from_sus.code == "E").then( + "White and African" + ), + when(ethnicity_from_sus.code == "F").then( + "White and Asian" + ), + when(ethnicity_from_sus.code == "G").then( + "Other Mixed" + ), + when(ethnicity_from_sus.code == "H").then( + "Indian" + ), + when(ethnicity_from_sus.code == "J").then( + "Pakistani" + ), + when(ethnicity_from_sus.code == "K").then( + "Bangladeshi" + ), + when(ethnicity_from_sus.code == "L").then( + "Other South Asian" + ), + when(ethnicity_from_sus.code == "M").then( + "Caribbean" + ), + when(ethnicity_from_sus.code == "N").then( + "African" + ), + when(ethnicity_from_sus.code == "P").then( + "Other Black" + ), + when(ethnicity_from_sus.code == "R").then( + "Chinese" + ), + when(ethnicity_from_sus.code == "S").then( + "All other ethnic groups" + ), + when(ethnicity_from_sus.code == "Z").then( + "Not stated" + ), + ) ethnicity_combined = case( when(latest_ethnicity_from_codes.is_not_null()).then( diff --git a/analysis/pf_dataset_definition.py b/analysis/pf_dataset_definition.py index 4ce8441..255207a 100644 --- a/analysis/pf_dataset_definition.py +++ b/analysis/pf_dataset_definition.py @@ -1,8 +1,10 @@ -from ehrql import create_dataset +from ehrql import create_dataset, case, when from ehrql.tables.tpp import ( patients, clinical_events, practice_registrations, + addresses, + ethnicity_from_sus, ) from pf_variables_library import check_pregnancy_status from pf_dataset import ( @@ -13,6 +15,9 @@ get_acute_sore_throat_denominator, get_acute_sinusitis_denominator, get_acute_otitis_media_denominator, + get_latest_ethnicity, + get_age_band, + get_imd ) import codelists @@ -22,35 +27,37 @@ registration = practice_registrations.for_patient_on(index_date) -# Create new columns for each denominator rule +# Columns for demographics table +dataset.sex = patients.sex +dataset.age_band = get_age_band(patients, index_date) +dataset.region = registration.practice_nuts1_region_name +dataset.imd = get_imd(addresses, index_date) +dataset.ethnicity = get_latest_ethnicity( + index_date, clinical_events, codelists.ethnicity_group16_codelist, ethnicity_from_sus, grouping=16 +) + +# Create new columns for each denominator rule for clinical conditions table dataset.uti_denominator = get_uncomplicated_uti_denominator( index_date, patients, clinical_events, codelists.pregnancy_codelist ) - dataset.shingles_denominator = get_shingles_denominator( index_date, patients, clinical_events, codelists.pregnancy_codelist ) - dataset.impetigo_denominator = get_impetigo_denominator( index_date, patients, clinical_events, codelists.pregnancy_codelist ) - dataset.insectbite_denominator = get_infected_insect_bites_denominator( index_date, patients, clinical_events, codelists.pregnancy_codelist ) - dataset.sorethroat_denominator = get_acute_sore_throat_denominator( index_date, patients, clinical_events, codelists.pregnancy_codelist ) - dataset.sinusitis_denominator = get_acute_sinusitis_denominator( index_date, patients, clinical_events, codelists.pregnancy_codelist ) - dataset.otitismedia_denominator = get_acute_otitis_media_denominator( index_date, patients, clinical_events, codelists.pregnancy_codelist ) - dataset.pregnancy_status = check_pregnancy_status( index_date, clinical_events, codelists.pregnancy_codelist ) diff --git a/analysis/test_pf_dataset_definition.py b/analysis/test_pf_dataset_definition.py index f1b146a..edc7077 100644 --- a/analysis/test_pf_dataset_definition.py +++ b/analysis/test_pf_dataset_definition.py @@ -12,6 +12,7 @@ "clinical_events": [ {}, ], + "addresses": {}, "ethnicity_from_sus": {}, "practice_registrations": [{"start_date": date(2024, 3, 1)}], "expected_in_population": True, @@ -37,6 +38,7 @@ "snomedct_code": "77386006", }, ], + "addresses": {}, "ethnicity_from_sus": {}, "practice_registrations": [{"start_date": date(2024, 3, 1)}], "expected_in_population": True, @@ -72,6 +74,7 @@ "snomedct_code": "1090711000000102", }, ], + "addresses": {}, "ethnicity_from_sus": {}, "practice_registrations": [{"start_date": date(2024, 3, 1)}], "expected_in_population": True,