Skip to content

Commit

Permalink
Merge pull request #105 from opensafely/viv3ckj/demographic-table
Browse files Browse the repository at this point in the history
Viv3ckj/demographic table
  • Loading branch information
viv3ckj authored Jan 24, 2025
2 parents b841d36 + 494a2e6 commit bb49002
Show file tree
Hide file tree
Showing 9 changed files with 316 additions and 59 deletions.
9 changes: 8 additions & 1 deletion analysis/codelists.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,19 @@
)

# Import ethnicity codelist
ethnicity_codelist = codelist_from_csv(
ethnicity_group6_codelist = codelist_from_csv(
"codelists/opensafely-ethnicity-snomed-0removed.csv",
column="snomedcode",
category_column="Grouping_6",
)

# Import ethnicity codelist
ethnicity_group16_codelist = codelist_from_csv(
"codelists/opensafely-ethnicity-snomed-0removed.csv",
column="snomedcode",
category_column="Grouping_16",
)

# Import pregnancy codelist
pregnancy_codelist = codelist_from_csv(
"codelists/nhsd-primary-care-domain-refsets-preg_cod.csv",
Expand Down
71 changes: 71 additions & 0 deletions analysis/dataset_definition_tables.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
library(here)
library(tidyverse)
library(readr)
library(gt)
library(purrr)

# Dataset definition file path output/population/pf_population.csv.gz
df <- read_csv(here("output", "population", "pf_population.csv.gz"))

df_demographics <- df %>%
select(sex, age_band, region, imd, ethnicity)

# map_dfr maps function to each elevent and combines result in single df
df_demographics_counts <- map_dfr(
# Column names sex, age_band, region, imd, ethnicity are inputs (.x)
names(df_demographics),
~ df_demographics %>%
# Group by each column
group_by(across(all_of(.x))) %>%
# summarises df with a new column which counts occurences (n)
summarise(n = n()) %>%
mutate(category = .x) %>%
rename(subcategory = 1)
)

readr::write_csv(
df_demographics_counts,
here::here("output", "population", "pf_demographics.csv")
)

# gt_table <- df_demographics_counts[1:2] %>%
# gt() %>%
# tab_header(
# title = "Demographics Table",
# subtitle = "Counts of individuals by category and subcategory"
# ) %>%
# tab_row_group(
# group = "sex",
# rows = df_counts$Category == "sex"
# ) %>%
# tab_row_group(
# group = "age_band",
# rows = df_counts$Category == "age_band"
# ) %>%
# tab_row_group(
# group = "region",
# rows = df_counts$Category == "region"
# ) %>%
# tab_row_group(
# group = "imd",
# rows = df_counts$Category == "imd"
# ) %>%
# tab_row_group(
# group = "ethnicity",
# rows = df_counts$Category == "ethnicity"
# ) %>%
# tab_options(
# heading.title.font.size = "medium",
# heading.subtitle.font.size = "small",
# table.font.size = "small"
# ) %>%
# tab_style(
# style = cell_text(weight = "bold"),
# locations = cells_row_groups(groups = everything())
# )

# # Display the table
# gt_table


#output/population/pf_population.csv.gz
4 changes: 2 additions & 2 deletions analysis/measures_definition_pf_breakdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
)
from codelists import (
pf_conditions_codelist,
ethnicity_codelist,
ethnicity_group6_codelist,
)

from pf_dataset import get_latest_ethnicity
Expand All @@ -26,7 +26,7 @@
ethnicity_combined = get_latest_ethnicity(
index_date=INTERVAL.start_date,
clinical_events=clinical_events,
ethnicity_codelist=ethnicity_codelist,
ethnicity_codelist=ethnicity_group6_codelist,
ethnicity_from_sus=ethnicity_from_sus,
)
# Age bands for age breakdown
Expand Down
199 changes: 171 additions & 28 deletions analysis/pf_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,42 @@ def get_acute_otitis_media_denominator(

return inclusion_criteria & ~exclusion_criteria

def get_numerator(selected_events, condition_code, condition_denominator):
numerator_counts = selected_events.where(
selected_events.snomedct_code.is_in(condition_code)
).where(
condition_denominator
).count_for_patient()

return numerator_counts

def get_age_band(patients, index_date):
age = patients.age_on(index_date)
age_band = case(
when((age >= 0) & (age < 20)).then("0-19"),
when((age >= 20) & (age < 40)).then("20-39"),
when((age >= 40) & (age < 60)).then("40-59"),
when((age >= 60) & (age < 80)).then("60-79"),
when(age >= 80).then("80+"),
when(age.is_null()).then("Missing"),
)
return age_band

def get_imd(addresses, index_date):
imd_rounded = addresses.for_patient_on(index_date).imd_rounded
max_imd = 32844
imd_quintile = case(
when((imd_rounded >= 0) & (imd_rounded < int(max_imd * 1 / 5))).then("1 (Most Deprived)"),
when(imd_rounded < int(max_imd * 2 / 5)).then("2"),
when(imd_rounded < int(max_imd * 3 / 5)).then("3"),
when(imd_rounded < int(max_imd * 4 / 5)).then("4"),
when(imd_rounded <= max_imd).then("5 (Least Deprived)"),
otherwise="Missing",
)
return imd_quintile

def get_latest_ethnicity(
index_date, clinical_events, ethnicity_codelist, ethnicity_from_sus
index_date, clinical_events, ethnicity_codelist, ethnicity_from_sus, grouping=6
):
latest_ethnicity_from_codes_category_num = (
clinical_events.where(clinical_events.snomedct_code.is_in(ethnicity_codelist))
Expand All @@ -137,33 +170,143 @@ def get_latest_ethnicity(
.snomedct_code.to_category(ethnicity_codelist)
)

latest_ethnicity_from_codes = case(
when(latest_ethnicity_from_codes_category_num == "1").then("White"),
when(latest_ethnicity_from_codes_category_num == "2").then("Mixed"),
when(latest_ethnicity_from_codes_category_num == "3").then(
"Asian or Asian British"
),
when(latest_ethnicity_from_codes_category_num == "4").then(
"Black or Black British"
),
when(latest_ethnicity_from_codes_category_num == "5").then(
"Chinese or Other Ethnic Groups"
),
)

ethnicity_from_sus = case(
when(ethnicity_from_sus.code.is_in(["A", "B", "C"])).then("White"),
when(ethnicity_from_sus.code.is_in(["D", "E", "F", "G"])).then("Mixed"),
when(ethnicity_from_sus.code.is_in(["H", "J", "K", "L"])).then(
"Asian or Asian British"
),
when(ethnicity_from_sus.code.is_in(["M", "N", "P"])).then(
"Black or Black British"
),
when(ethnicity_from_sus.code.is_in(["R", "S"])).then(
"Chinese or Other Ethnic Groups"
),
)
if grouping == 6:
latest_ethnicity_from_codes = case(
when(latest_ethnicity_from_codes_category_num == "1").then("White"),
when(latest_ethnicity_from_codes_category_num == "2").then("Mixed"),
when(latest_ethnicity_from_codes_category_num == "3").then(
"Asian or Asian British"
),
when(latest_ethnicity_from_codes_category_num == "4").then(
"Black or Black British"
),
when(latest_ethnicity_from_codes_category_num == "5").then(
"Chinese or Other Ethnic Groups"
),
)

ethnicity_from_sus = case(
when(ethnicity_from_sus.code.is_in(["A", "B", "C"])).then("White"),
when(ethnicity_from_sus.code.is_in(["D", "E", "F", "G"])).then("Mixed"),
when(ethnicity_from_sus.code.is_in(["H", "J", "K", "L"])).then(
"Asian or Asian British"
),
when(ethnicity_from_sus.code.is_in(["M", "N", "P"])).then(
"Black or Black British"
),
when(ethnicity_from_sus.code.is_in(["R", "S"])).then(
"Chinese or Other Ethnic Groups"
),
)
elif grouping == 16:
latest_ethnicity_from_codes = case(
when(latest_ethnicity_from_codes_category_num == "1").then(
"White British"
),
when(latest_ethnicity_from_codes_category_num == "2").then(
"White Irish"
),
when(latest_ethnicity_from_codes_category_num == "3").then(
"Other White"
),
when(latest_ethnicity_from_codes_category_num == "4").then(
"White and Caribbean"
),
when(latest_ethnicity_from_codes_category_num == "5").then(
"White and African"
),
when(latest_ethnicity_from_codes_category_num == "6").then(
"White and Asian"
),
when(latest_ethnicity_from_codes_category_num == "7").then(
"Other Mixed"
),
when(latest_ethnicity_from_codes_category_num == "8").then(
"Indian"
),
when(latest_ethnicity_from_codes_category_num == "9").then(
"Pakistani"
),
when(latest_ethnicity_from_codes_category_num == "10").then(
"Bangladeshi"
),
when(latest_ethnicity_from_codes_category_num == "11").then(
"Other South Asian"
),
when(latest_ethnicity_from_codes_category_num == "12").then(
"Caribbean"
),
when(latest_ethnicity_from_codes_category_num == "13").then(
"African"
),
when(latest_ethnicity_from_codes_category_num == "14").then(
"Other Black"
),
when(latest_ethnicity_from_codes_category_num == "15").then(
"Chinese"
),
when(latest_ethnicity_from_codes_category_num == "16").then(
"All other ethnic groups"
),
when(latest_ethnicity_from_codes_category_num == "17").then(
"Not stated"
),

)

ethnicity_from_sus = case(
when(ethnicity_from_sus.code == "A").then(
"White British"
),
when(ethnicity_from_sus.code == "B").then(
"White Irish"
),
when(ethnicity_from_sus.code == "C").then(
"Other White"
),
when(ethnicity_from_sus.code == "D").then(
"White and Caribbean"
),
when(ethnicity_from_sus.code == "E").then(
"White and African"
),
when(ethnicity_from_sus.code == "F").then(
"White and Asian"
),
when(ethnicity_from_sus.code == "G").then(
"Other Mixed"
),
when(ethnicity_from_sus.code == "H").then(
"Indian"
),
when(ethnicity_from_sus.code == "J").then(
"Pakistani"
),
when(ethnicity_from_sus.code == "K").then(
"Bangladeshi"
),
when(ethnicity_from_sus.code == "L").then(
"Other South Asian"
),
when(ethnicity_from_sus.code == "M").then(
"Caribbean"
),
when(ethnicity_from_sus.code == "N").then(
"African"
),
when(ethnicity_from_sus.code == "P").then(
"Other Black"
),
when(ethnicity_from_sus.code == "R").then(
"Chinese"
),
when(ethnicity_from_sus.code == "S").then(
"All other ethnic groups"
),
when(ethnicity_from_sus.code == "Z").then(
"Not stated"
),
)

ethnicity_combined = case(
when(latest_ethnicity_from_codes.is_not_null()).then(
Expand Down
Loading

0 comments on commit bb49002

Please sign in to comment.