analysis.rmd

---
title: ""
author: "James Goldie, 360info"
---

```{r setup}
library(tidyverse)
library(janitor)
library(lubridate)
library(glue)
library(themes360info)
library(ggtext)
library(here)
source(here("util.r"))
```

Download the surveys (CC-BY 4.0) from [the GitHub repo](https://github.com/Cool-infrastructures/Covid19-Heat). Extra info, like the survey coding framework, is available from [the published paper](https://doi.org/10.7488/ds/2961).

```{r import}
survey_remote_root <-
  "https://github.com/Cool-infrastructures/Covid19-Heat/raw/main/data/"
all_survey <- import_survey(paste0(
  survey_remote_root, "Covid-19_Heat_combined_survey.csv"))

# here's a fn to see wha tthe unique values of our categorical columns are
get_factor_values <- . %>%
  summarise(across(
    where(is.factor),
    list(
      n = ~ length(levels(.x)),
      # could also pivot longer so that there's a row for each level...
      pos_values = ~ paste(levels(.x), collapse = "|")),
    .names = "{.col}-{.fn}"
    )) %>%
  pivot_longer(everything(), names_to = c("col_name", ".value"),
    names_sep = "-")

get_factor_values(all_survey) %>% print(n = Inf)
```

```{r comparerisk}
cameroon %>% select(ends_with("material")) %>% summary()

indonesia %>%
  mutate(
    metal_roof = case_when(
      roofing_material %in% c(
        "SHEET METAL [TIN/ZINC/CORRUGATED IRON]", "GALVANISED IRON") ~ "Yes",
      roofing_material %in% c(
        "DRIED CLAY", "WOODEN SHINGLES", "REINFORCED CONCRETE") ~ "No",
      TRUE ~ NA_character_)) %>%
  select(metal_roof, experience_headache) %>%
  drop_na(metal_roof) ->
test_numbers

test_numbers %>%
  count(metal_roof, experience_headache) %>%
  # now count stats per group so we can feed them into ggplot2
  group_by(metal_roof) %>%
  mutate(
    predictor_n = sum(n),
    predictor_prop = n / predictor_n) ->
test_numbers_vis

rate_diff_plot(test_numbers_vis,
  predictor = metal_roof, outcome = experience_headache,
  bad_outcome = experience_headache == "Yes",
  good_outcome = experience_headache == "No",
  disadvantage = metal_roof == "Yes",
  advantage = metal_roof == "No",
  phrase_disadvantage = "with metal roofing",
  phrase_advantage = "with other roofing",
  phrase_bad_outcome = "experienced headaches in the last month") +
  labs(title = "Roofing and headaches in Indonesia")
```

Let's look at how hot, metal roofing affects people. For example, whether people are more likely to go outside to escape indoor heat:

```{r combinecountries}

# some columns don't appear in all four countries:
all_survey %>%
  select(country, roofing_material, go_outside) %>%
  mutate(
    go_outside = fct_recode(go_outside, "Yes" = "Yes", "No" = "missing"),
    metal_roof = case_when(
      roofing_material %in% c(
        "SHEET METAL [TIN/ZINC/CORRUGATED IRON]", "GALVANISED IRON") ~ "Metal roof",
      roofing_material %in% c(
        "DRIED CLAY", "WOODEN SHINGLES", "REINFORCED CONCRETE", "PALM FRONDS",
          "CERAMIC TILE", "BAMBOO", "T-GIRDER") ~ "Non-metal<br>roof",
      TRUE ~ NA_character_)) %>%
    drop_na(metal_roof) %>%
    select(country, go_outside, metal_roof) ->
roofing_gooutside

roofing_gooutside %>%
  count(metal_roof, go_outside) %>%
  # now count stats per group so we can feed them into ggplot2
  group_by(metal_roof) %>%
  mutate(
    predictor_n = sum(n),
    predictor_prop = n / predictor_n) ->
test_numbers_vis

rate_diff_plot(test_numbers_vis,
  predictor = metal_roof, outcome = go_outside,
  bad_outcome = go_outside == "Yes",
  good_outcome = go_outside == "No",
  disadvantage = metal_roof == "Metal roof",
  advantage = metal_roof == "Non-metal<br>roof",
  phrase_disadvantage = "with metal roofing",
  phrase_advantage = "with other roofing",
  phrase_bad_outcome = "go outside to escape the heat") +
  labs(
    title = toupper("Escaping indoor heat"),
    caption = paste(
      "**CHART:** James Goldie, 360info",
      "**SOURCE:** Amir et al. 2020 [doi.org/10.7488/ds/2961]",
      sep = "<br>")) +
  guides(fill = "none") +
  theme_360() +
  theme(
    plot.subtitle = element_markdown(
      family = "Body 360info",
      face = "plain",
      size = rel(0.9),
      lineheight = rel(1.25),
      margin = margin(b = 20 * 1.15)),
    axis.title = element_blank(),
    axis.text.y.left = element_markdown(face = "bold", size = rel(1.25)),
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank()) ->
roof_gooutside_plot

save_360plot(roof_gooutside_plot, here("out", "roofing-gooutside.png"),
  shape = "phone-landscape", device = png)
```

Now let's look at differences in bathing behaviour across countries:

```{r bathing}

all_survey %>%
  select(city, country, take_a_bath_shower) %>%
  mutate(
    geo_group = glue("**{str_to_sentence(city)}**<br>{country}"),
    take_a_bath_shower = fct_recode(take_a_bath_shower,
      "Yes" = "Yes",
      "No" = "missing")) %>%
  drop_na(geo_group) %>%
  select(-city, -country) %>%
  # now count for each area
  group_by(geo_group) %>%
  count(take_a_bath_shower, .drop = FALSE) %>%
  group_by(geo_group) %>%
  # work out yes/no proportions for each area
  mutate(
    predictor_prop = n / sum(n),
    total_n = sum(n)) %>%
  ungroup() %>%
  filter(take_a_bath_shower == "Yes") %>%
  # sort for ggplot2 bar order
  mutate(geo_group = fct_reorder(geo_group, predictor_prop)) %>%
  {
    ggplot(.) +
    aes(x = geo_group, y = predictor_prop) +
    geom_col(fill = "black") +
    # filter labels so that small values are on outside of bar
    geom_richtext(
      aes(label = glue(
        "**{scales::percent(predictor_prop, accuracy = 0.1)}**<br>",
        "({n} of {total_n} respondents)")),
      data = filter(., predictor_prop > 0.1),
      hjust = "right", nudge_y = -0.005,
      colour = "white", fill = NA, label.colour = NA,
      family = "Body 360info", size = 5
    ) +
    geom_richtext(
      aes(label = glue(
        "**{scales::percent(predictor_prop, accuracy = 0.1)}**<br>",
        "({n} of {total_n} respondents)")),
      data = filter(., predictor_prop <= 0.1),
      hjust = "left", nudge_y = 0.005,
      colour = "black", fill = NA, label.colour = NA,
      family = "Body 360info", size = 5
    ) +
    scale_y_continuous(labels = scales::label_percent(accuracy = 1)) +
    coord_flip() +
    theme_360() +
    theme(
      axis.text.y = element_markdown(size = rel(1.2)),
      axis.text.x = element_text(size = rel(1.2)),
      panel.grid.major.y = element_blank(),
      panel.grid.minor.y = element_blank(),
      plot.subtitle = element_markdown(
        family = "Body 360info",
        face = "plain",
        size = rel(0.9),
        lineheight = rel(1.25),
        margin = margin(b = 20 * 1.15)),
    ) +
    labs(
      x = NULL, y = NULL,
      title = toupper("Cooling down with water"),
      subtitle =
        glue(
          "Survey respondents in Karachi, Pakistan **were more likely to cool down with a bath or shower**<br>",
          "then respondents in other surveyed places. The difference suggests that **local customs<br>",
          "and water scarcity** play a part in how people respond to extreme heat."),
      caption = paste(
        "**CHART:** James Goldie, 360info",
        "**SOURCE:** Amir et al. 2020 [doi.org/10.7488/ds/2961]",
        sep = "<br>"))
  } %>%
  save_360plot(here("out", "bathing.png"), shape = "sdtv-landscape")

```