Merge pull request #105 from opensafely/viv3ckj/demographic-table

Viv3ckj/demographic table
opensafely · Jan 24, 2025 · bb49002 · bb49002
2 parents b841d36 + 494a2e6
commit bb49002
Show file tree

Hide file tree

Showing 9 changed files with 316 additions and 59 deletions.
diff --git a/analysis/codelists.py b/analysis/codelists.py
@@ -8,12 +8,19 @@
 )
 
 # Import ethnicity codelist
-ethnicity_codelist = codelist_from_csv(
+ethnicity_group6_codelist = codelist_from_csv(
     "codelists/opensafely-ethnicity-snomed-0removed.csv",
     column="snomedcode",
     category_column="Grouping_6",
 )
 
+# Import ethnicity codelist
+ethnicity_group16_codelist = codelist_from_csv(
+    "codelists/opensafely-ethnicity-snomed-0removed.csv",
+    column="snomedcode",
+    category_column="Grouping_16",
+)
+
 # Import pregnancy codelist
 pregnancy_codelist = codelist_from_csv(
     "codelists/nhsd-primary-care-domain-refsets-preg_cod.csv",

diff --git a/analysis/dataset_definition_tables.R b/analysis/dataset_definition_tables.R
@@ -0,0 +1,71 @@
+library(here)
+library(tidyverse)
+library(readr)
+library(gt)
+library(purrr)
+
+# Dataset definition file path output/population/pf_population.csv.gz
+df <- read_csv(here("output", "population", "pf_population.csv.gz"))
+
+df_demographics <- df %>%
+    select(sex, age_band, region, imd, ethnicity)
+
+# map_dfr maps function to each elevent and combines result in single df
+df_demographics_counts <- map_dfr(
+    # Column names sex, age_band, region, imd, ethnicity are inputs (.x) 
+  names(df_demographics),
+  ~ df_demographics %>%
+  # Group by each column 
+    group_by(across(all_of(.x))) %>%
+    # summarises df with a new column which counts occurences (n)
+    summarise(n = n()) %>%
+    mutate(category = .x) %>%
+    rename(subcategory = 1)
+)
+
+readr::write_csv(
+  df_demographics_counts,
+  here::here("output", "population", "pf_demographics.csv")
+)
+
+# gt_table <- df_demographics_counts[1:2] %>%
+#   gt() %>%
+#   tab_header(
+#     title = "Demographics Table",
+#     subtitle = "Counts of individuals by category and subcategory"
+#   ) %>%
+#   tab_row_group(
+#     group = "sex",
+#     rows = df_counts$Category == "sex"
+#   ) %>%
+#   tab_row_group(
+#     group = "age_band",
+#     rows = df_counts$Category == "age_band"
+#   ) %>%
+#   tab_row_group(
+#     group = "region",
+#     rows = df_counts$Category == "region"
+#   ) %>%
+#   tab_row_group(
+#     group = "imd",
+#     rows = df_counts$Category == "imd"
+#   ) %>%
+#   tab_row_group(
+#     group = "ethnicity",
+#     rows = df_counts$Category == "ethnicity"
+#   ) %>%
+#   tab_options(
+#     heading.title.font.size = "medium",
+#     heading.subtitle.font.size = "small",
+#     table.font.size = "small"
+#   ) %>%
+#   tab_style(
+#     style = cell_text(weight = "bold"),
+#     locations = cells_row_groups(groups = everything())
+#   )
+
+# # Display the table
+# gt_table
+
+
+#output/population/pf_population.csv.gz
diff --git a/analysis/measures_definition_pf_breakdown.py b/analysis/measures_definition_pf_breakdown.py
@@ -8,7 +8,7 @@
 )
 from codelists import (
     pf_conditions_codelist,
-    ethnicity_codelist,
+    ethnicity_group6_codelist,
 )
 
 from pf_dataset import get_latest_ethnicity
@@ -26,7 +26,7 @@
 ethnicity_combined = get_latest_ethnicity(
     index_date=INTERVAL.start_date,
     clinical_events=clinical_events,
-    ethnicity_codelist=ethnicity_codelist,
+    ethnicity_codelist=ethnicity_group6_codelist,
     ethnicity_from_sus=ethnicity_from_sus,
 )
 # Age bands for age breakdown

diff --git a/analysis/pf_dataset.py b/analysis/pf_dataset.py
@@ -125,9 +125,42 @@ def get_acute_otitis_media_denominator(
 
     return inclusion_criteria & ~exclusion_criteria
 
+def get_numerator(selected_events, condition_code, condition_denominator):
+    numerator_counts = selected_events.where(
+        selected_events.snomedct_code.is_in(condition_code)
+    ).where(
+        condition_denominator
+    ).count_for_patient()
+
+    return numerator_counts
+
+def get_age_band(patients, index_date):
+    age = patients.age_on(index_date)
+    age_band = case(
+    when((age >= 0) & (age < 20)).then("0-19"),
+    when((age >= 20) & (age < 40)).then("20-39"),
+    when((age >= 40) & (age < 60)).then("40-59"),
+    when((age >= 60) & (age < 80)).then("60-79"),
+    when(age >= 80).then("80+"),
+    when(age.is_null()).then("Missing"),
+)
+    return age_band
+
+def get_imd(addresses, index_date):
+    imd_rounded = addresses.for_patient_on(index_date).imd_rounded
+    max_imd = 32844
+    imd_quintile = case(
+        when((imd_rounded >= 0) & (imd_rounded < int(max_imd * 1 / 5))).then("1 (Most Deprived)"),
+        when(imd_rounded < int(max_imd * 2 / 5)).then("2"),
+        when(imd_rounded < int(max_imd * 3 / 5)).then("3"),
+        when(imd_rounded < int(max_imd * 4 / 5)).then("4"),
+        when(imd_rounded <= max_imd).then("5 (Least Deprived)"),
+        otherwise="Missing",
+    )
+    return imd_quintile
 
 def get_latest_ethnicity(
-    index_date, clinical_events, ethnicity_codelist, ethnicity_from_sus
+    index_date, clinical_events, ethnicity_codelist, ethnicity_from_sus, grouping=6
 ):
     latest_ethnicity_from_codes_category_num = (
         clinical_events.where(clinical_events.snomedct_code.is_in(ethnicity_codelist))
@@ -137,33 +170,143 @@ def get_latest_ethnicity(
         .snomedct_code.to_category(ethnicity_codelist)
     )
 
-    latest_ethnicity_from_codes = case(
-        when(latest_ethnicity_from_codes_category_num == "1").then("White"),
-        when(latest_ethnicity_from_codes_category_num == "2").then("Mixed"),
-        when(latest_ethnicity_from_codes_category_num == "3").then(
-            "Asian or Asian British"
-        ),
-        when(latest_ethnicity_from_codes_category_num == "4").then(
-            "Black or Black British"
-        ),
-        when(latest_ethnicity_from_codes_category_num == "5").then(
-            "Chinese or Other Ethnic Groups"
-        ),
-    )
-
-    ethnicity_from_sus = case(
-        when(ethnicity_from_sus.code.is_in(["A", "B", "C"])).then("White"),
-        when(ethnicity_from_sus.code.is_in(["D", "E", "F", "G"])).then("Mixed"),
-        when(ethnicity_from_sus.code.is_in(["H", "J", "K", "L"])).then(
-            "Asian or Asian British"
-        ),
-        when(ethnicity_from_sus.code.is_in(["M", "N", "P"])).then(
-            "Black or Black British"
-        ),
-        when(ethnicity_from_sus.code.is_in(["R", "S"])).then(
-            "Chinese or Other Ethnic Groups"
-        ),
-    )
+    if grouping == 6:
+        latest_ethnicity_from_codes = case(
+            when(latest_ethnicity_from_codes_category_num == "1").then("White"),
+            when(latest_ethnicity_from_codes_category_num == "2").then("Mixed"),
+            when(latest_ethnicity_from_codes_category_num == "3").then(
+                "Asian or Asian British"
+            ),
+            when(latest_ethnicity_from_codes_category_num == "4").then(
+                "Black or Black British"
+            ),
+            when(latest_ethnicity_from_codes_category_num == "5").then(
+                "Chinese or Other Ethnic Groups"
+            ),
+        )
+
+        ethnicity_from_sus = case(
+            when(ethnicity_from_sus.code.is_in(["A", "B", "C"])).then("White"),
+            when(ethnicity_from_sus.code.is_in(["D", "E", "F", "G"])).then("Mixed"),
+            when(ethnicity_from_sus.code.is_in(["H", "J", "K", "L"])).then(
+                "Asian or Asian British"
+            ),
+            when(ethnicity_from_sus.code.is_in(["M", "N", "P"])).then(
+                "Black or Black British"
+            ),
+            when(ethnicity_from_sus.code.is_in(["R", "S"])).then(
+                "Chinese or Other Ethnic Groups"
+            ),
+        )
+    elif grouping == 16:
+        latest_ethnicity_from_codes = case(
+            when(latest_ethnicity_from_codes_category_num == "1").then(
+                "White British"
+            ),
+            when(latest_ethnicity_from_codes_category_num == "2").then(
+                "White Irish"
+            ),
+            when(latest_ethnicity_from_codes_category_num == "3").then(
+                "Other White"
+            ),
+            when(latest_ethnicity_from_codes_category_num == "4").then(
+                "White and Caribbean"
+            ),
+            when(latest_ethnicity_from_codes_category_num == "5").then(
+                "White and African"
+            ),
+            when(latest_ethnicity_from_codes_category_num == "6").then(
+                "White and Asian"
+            ),
+            when(latest_ethnicity_from_codes_category_num == "7").then(
+                "Other Mixed"
+            ),
+            when(latest_ethnicity_from_codes_category_num == "8").then(
+                "Indian"
+            ),
+            when(latest_ethnicity_from_codes_category_num == "9").then(
+                "Pakistani"
+            ),
+            when(latest_ethnicity_from_codes_category_num == "10").then(
+                "Bangladeshi"
+            ),
+            when(latest_ethnicity_from_codes_category_num == "11").then(
+                "Other South Asian"
+            ),
+            when(latest_ethnicity_from_codes_category_num == "12").then(
+                "Caribbean"
+            ),
+            when(latest_ethnicity_from_codes_category_num == "13").then(
+                "African"
+            ),
+            when(latest_ethnicity_from_codes_category_num == "14").then(
+                "Other Black"
+            ),
+            when(latest_ethnicity_from_codes_category_num == "15").then(
+                "Chinese"
+            ),
+            when(latest_ethnicity_from_codes_category_num == "16").then(
+                "All other ethnic groups"
+            ),
+            when(latest_ethnicity_from_codes_category_num == "17").then(
+                "Not stated"
+            ),
+
+        )
+
+        ethnicity_from_sus = case(
+            when(ethnicity_from_sus.code == "A").then(
+                "White British"
+            ),
+            when(ethnicity_from_sus.code == "B").then(
+                "White Irish"
+            ),
+            when(ethnicity_from_sus.code == "C").then(
+                "Other White"
+            ),
+            when(ethnicity_from_sus.code == "D").then(
+                "White and Caribbean"
+            ),
+            when(ethnicity_from_sus.code == "E").then(
+                "White and African"
+            ),
+            when(ethnicity_from_sus.code == "F").then(
+                "White and Asian"
+            ),
+            when(ethnicity_from_sus.code == "G").then(
+                "Other Mixed"
+            ),
+            when(ethnicity_from_sus.code == "H").then(
+                "Indian"
+            ),
+            when(ethnicity_from_sus.code == "J").then(
+                "Pakistani"
+            ),
+            when(ethnicity_from_sus.code == "K").then(
+                "Bangladeshi"
+            ),
+            when(ethnicity_from_sus.code == "L").then(
+                "Other South Asian"
+            ),
+            when(ethnicity_from_sus.code == "M").then(
+                "Caribbean"
+            ),
+            when(ethnicity_from_sus.code == "N").then(
+                "African"
+            ),
+            when(ethnicity_from_sus.code == "P").then(
+                "Other Black"
+            ),
+            when(ethnicity_from_sus.code == "R").then(
+                "Chinese"
+            ),
+            when(ethnicity_from_sus.code == "S").then(
+                "All other ethnic groups"
+            ),
+            when(ethnicity_from_sus.code == "Z").then(
+                "Not stated"
+            ),
+        )
 
     ethnicity_combined = case(
         when(latest_ethnicity_from_codes.is_not_null()).then(