Add script action and comment function

opensafely · Jan 23, 2025 · 7c08327 · 7c08327
1 parent e93c737
commit 7c08327
Show file tree

Hide file tree

Showing 2 changed files with 54 additions and 45 deletions.
diff --git a/analysis/dataset_definition_tables.R b/analysis/dataset_definition_tables.R
@@ -5,60 +5,62 @@ library(gt)
 library(purrr)
 
 # Dataset definition file path output/population/pf_population.csv.gz
-df <- read_csv(here("output", "population", "pf_population.csv"))
+df <- read_csv(here("output", "population", "pf_population.csv.gz"))
 
 df_demographics <- df %>%
     select(sex, age_band, region, imd, ethnicity)
 
 # map_dfr maps function to each elevent and combines result in single df
-df_counts <- map_dfr(
-    # Iterate over column names 
+df_demographics_counts <- map_dfr(
+    # Column names sex, age_band, region, imd, ethnicity are inputs (.x) 
   names(df_demographics),
   ~ df_demographics %>%
-    group_by(across(all_of(.x)), .drop = FALSE) %>%
-    summarise(Count = n(), .groups = "drop") %>%
-    mutate(Category = .x) %>%
-    rename(Value = 1)
+  # Group by each column 
+    group_by(across(all_of(.x))) %>%
+    # summarises df with a new column which counts occurences (n)
+    summarise(n = n()) %>%
+    mutate(category = .x) %>%
+    rename(subcategory = 1)
 )
 
-gt_table <- df_counts[1:2] %>%
-  gt() %>%
-  tab_header(
-    title = "Demographics Table",
-    subtitle = "Counts of individuals by category and subcategory"
-  ) %>%
-  tab_row_group(
-    group = "sex",
-    rows = df_counts$Category == "sex"
-  ) %>%
-  tab_row_group(
-    group = "age_band",
-    rows = df_counts$Category == "age_band"
-  ) %>%
-  tab_row_group(
-    group = "region",
-    rows = df_counts$Category == "region"
-  ) %>%
-  tab_row_group(
-    group = "imd",
-    rows = df_counts$Category == "imd"
-  ) %>%
-  tab_row_group(
-    group = "ethnicity",
-    rows = df_counts$Category == "ethnicity"
-  ) %>%
-  tab_options(
-    heading.title.font.size = "medium",
-    heading.subtitle.font.size = "small",
-    table.font.size = "small"
-  ) %>%
-  tab_style(
-    style = cell_text(weight = "bold"),
-    locations = cells_row_groups(groups = everything())
-  )
+# gt_table <- df_demographics_counts[1:2] %>%
+#   gt() %>%
+#   tab_header(
+#     title = "Demographics Table",
+#     subtitle = "Counts of individuals by category and subcategory"
+#   ) %>%
+#   tab_row_group(
+#     group = "sex",
+#     rows = df_counts$Category == "sex"
+#   ) %>%
+#   tab_row_group(
+#     group = "age_band",
+#     rows = df_counts$Category == "age_band"
+#   ) %>%
+#   tab_row_group(
+#     group = "region",
+#     rows = df_counts$Category == "region"
+#   ) %>%
+#   tab_row_group(
+#     group = "imd",
+#     rows = df_counts$Category == "imd"
+#   ) %>%
+#   tab_row_group(
+#     group = "ethnicity",
+#     rows = df_counts$Category == "ethnicity"
+#   ) %>%
+#   tab_options(
+#     heading.title.font.size = "medium",
+#     heading.subtitle.font.size = "small",
+#     table.font.size = "small"
+#   ) %>%
+#   tab_style(
+#     style = cell_text(weight = "bold"),
+#     locations = cells_row_groups(groups = everything())
+#   )
 
-# Display the table
-gt_table
+# # Display the table
+# gt_table
 
 
 #output/population/pf_population.csv.gz
diff --git a/project.yaml b/project.yaml
@@ -56,4 +56,11 @@ actions:
       --output output/measures/consultation_med_counts_measures.csv
     outputs:
       moderately_sensitive:
-        measure: output/measures/consultation_med_counts_measures.csv
+        measure: output/measures/consultation_med_counts_measures.csv
+
+  pf_demographics_table:
+    run: r:latest analysis/dataset_definition_tables.R
+    needs: [generate_pf_dataset_definition]
+    outputs:
+      moderately_sensitive:
+        dataset: output/population/pf_demographics.csv