From 9374054dad440c1a430be9907a2e4140580b2fc6 Mon Sep 17 00:00:00 2001
From: Andrew Pulsipher <45372570+apulsipher@users.noreply.github.com>
Date: Thu, 16 Jan 2025 12:26:13 -0700
Subject: [PATCH] Make forecasts customizable (#54)

* Start migrating code to separate R files

* Refactor first part of report (getting data)

* Refactor get_season_starts

* Refactor up until end of LFMCMC

* Refactor rest of index.qmd

* Finish refactor, compartmentalizing code into separate files

* Minor edits to report

* Document functions in forecast.pkg.R and plot-forecast.R

* Document functions in covid-forecast.R
---
 covid-forecast.R | 362 +++++++++++++++++++++++++++++++++++++++++++
 forecast-pkg.R   | 107 +++++++++++++
 index.qmd        | 390 ++---------------------------------------------
 plot-forecast.R  | 168 ++++++++++++++++++++
 4 files changed, 650 insertions(+), 377 deletions(-)
 create mode 100644 covid-forecast.R
 create mode 100644 forecast-pkg.R
 create mode 100644 plot-forecast.R

diff --git a/covid-forecast.R b/covid-forecast.R
new file mode 100644
index 0000000..aef672a
--- /dev/null
+++ b/covid-forecast.R
@@ -0,0 +1,362 @@
+## ----------------------------------
+## GENERAL FUNCTIONS
+## ----------------------------------
+source("forecast-pkg.R")
+source("plot-forecast.R")
+library(tidyr)
+
+## ----------------------------------
+## HELPER FUNCTIONS
+## ----------------------------------
+
+#' Get COVID-19 case counts from UDHHS
+#'
+#' @description
+#' `get_covid_data` downloads the COVID-19 dataset from UDHHS.
+#'
+#' @details
+#' This function downloads the COVID-19 daily case count dataset
+#' from UDHHS, returning the data from the last N days.
+#'
+#' @param n_days Integer value.
+#'
+#' @returns The COVID-19 case count data.
+get_covid_data <- function(n_days) {
+  # Download the Trends data from Utah DHSS
+  # - URL for UDHHS COVID-19 data (returns a zip archive with multiple files)
+  data_url <- "https://coronavirus-dashboard.utah.gov/Utah_COVID19_data.zip"
+  # - Target file from the above zip archive
+  target_file_regex <- "Trends_Epidemic+"
+  # - Perform data extraction
+  covid_data <- get_data_from_url(data_url, target_file_regex)
+  # - Format date column properly
+  covid_data$Date <- as.Date(covid_data$Date)
+
+  # Extract last n_days of data
+  last_date <- max(covid_data$Date)
+  covid_data <- covid_data[covid_data$Date > (last_date - n_days), ]
+  return(covid_data)
+}
+
+## ----------------------------------
+## LFMCMC FUNCTIONS
+## ----------------------------------
+
+#' Define the LFMCMC summary function
+#'
+#' @description
+#' `lfmcmc_summary_fun` defines the summary function for LFMCMC.
+#'
+#' @details
+#' In LFMCMC, the summary function computes summary statistics which
+#' are used to compare the simulation function output with the observed
+#' data. It must perform the same with the observed dataset
+#' as it does with the results of the LFMCMC simulation function.
+#' This function takes a vector of case counts and computes
+#' `time_to_peak` (how many days to the highest case count),
+#' `size_of_peak`, as well as the mean and standard deviation.
+#'
+#' @param case_counts Vector of integer case counts.
+#'
+#' @returns A vector of summary statistics.
+lfmcmc_summary_fun <- function(case_counts) {
+
+  time_to_peak <- which.max(case_counts)
+  size_of_peak <- case_counts[time_to_peak]
+  mean_cases <- mean(case_counts)
+  sd_cases <- sd(case_counts)
+
+  c(
+    time_to_peak,
+    size_of_peak,
+    mean_cases,
+    sd_cases
+  )
+}
+
+#' Define the LFMCMC proposal function
+#'
+#' @description
+#' `lfmcmc_proposal_fun` defines the proposal function for LFMCMC.
+#'
+#' @details
+#' In LFMCMC, the proposal function generates new model parameters
+#' to be used in the next run of the simulation function.
+#' This function takes a vector of parameters (the set used in
+#' the previous run) and takes a random step from those values
+#' to compute new ("proposed") values.
+#'
+#' @param params_prev Vector of numeric parameter values.
+#'
+#' @returns A vector of proposed parameter values.
+lfmcmc_proposal_fun <- function(params_prev) {
+  # Propose new model parameters
+  params_1_to_5 <- plogis(
+    qlogis(params_prev[1:5]) +
+      rnorm(length(params_prev[1:5]), mean = 0, sd = 0.025)
+  )
+
+  params_6_to_7 <- params_prev[6:7] +
+    rnorm(2, mean = 0, sd = 0.025)
+
+
+  # Reflect contact rates
+  if (params_6_to_7[1] < 0) {
+    params_6_to_7[1] <- params_prev[6] -
+      (params_6_to_7[1] - params_prev[6])
+  }
+  if (params_6_to_7[2] < 0) {
+    params_6_to_7[2] <- params_prev[7] -
+      (params_6_to_7[2] - params_prev[7])
+  }
+
+  # Return proposed parameters
+  c(params_1_to_5, params_6_to_7)
+}
+
+#' Define the LFMCMC kernel function
+#'
+#' @description
+#' `lfmcmc_kernel_fun` defines the kernel function for LFMCMC.
+#'
+#' @details
+#' In LFMCMC, the kernel function weighs the summary statistics from
+#' the latest run of the simulation function against the summary
+#' statistics of the observed data.
+#'
+#' @param simulated_stats Vector of numeric summary stats for simulated data.
+#' @param observed_stats Vector of numeric summary stats for observed data.
+#' @param epsilon Numeric epsilon value.
+#'
+#' @returns A numeric kernel score.
+lfmcmc_kernel_fun <- function(simulated_stats, observed_stats, epsilon) {
+  diff <- ((simulated_stats - observed_stats)^2)^epsilon
+  dnorm(sqrt(sum(diff)))
+}
+
+#' Get a sample of accepted parameters from LFMCMC
+#'
+#' @description
+#' `get_params_sample` gets a sample of accepted parameters from the
+#' LFMCMC run after the burn-in period.
+#'
+#' @param lfmcmc_obj Object of class [LFMCMC].
+#' @param total_samples Integer total samples from the LFMCMC object.
+#' @param burnin Integer burn-in period for the LFMCMC object.
+#' @param sample_size Integer number of samples to take.
+#'
+#' @returns An array of numeric vectors where each
+#' vector represents a single sample set of parameters.
+get_params_sample <- function(lfmcmc_obj, total_samples, burnin, sample_size) {
+  accepted_params <- get_accepted_params(lfmcmc_obj)
+  burnin_sample <- tail(accepted_params, n = (total_samples - burnin))
+  params_sample <- burnin_sample[sample(nrow(burnin_sample), sample_size), ]
+  return(params_sample)
+}
+
+## ----------------------------------
+## Setup Forecast
+## ----------------------------------
+# Define forecast parameters
+n_days <- 90 # Calibrate model last 90 days of data
+
+## ----------------------------------
+## Get Data
+## ----------------------------------
+
+# Get COVID-19 data
+covid_data <- get_covid_data(n_days)
+# Compute start date for each season
+seasons <- get_season_starts(covid_data$Date)
+# Get observed case counts
+covid_cases <- covid_data$Daily.Cases
+
+## ----------------------------------
+## Create SIR CONN Model
+## ----------------------------------
+
+# Define SIRCONN model parameters
+model_seed          <- 112      # Random seed
+model_ndays         <- n_days   # How many days to run the model
+model_n             <- 10000    # model population size
+
+# Define initial disease parameters
+init_prevalence <- covid_cases[1] / model_n
+init_contact_rate <- 10
+init_transmission_rate <- 0.05
+init_recovery_rate <- 1 / 7
+
+# Create the SIRCONN model
+covid_sirconn_model <- ModelSIRCONN(
+  name              = "COVID-19",
+  n                 = model_n,
+  prevalence        = init_prevalence,
+  contact_rate      = init_contact_rate,
+  transmission_rate = init_transmission_rate,
+  recovery_rate     = init_recovery_rate
+)
+
+## ----------------------------------
+## Setup LFMCMC Calibration
+## ----------------------------------
+
+# Define LFMCMC parameters
+lfmcmc_n_samples <- 6000   # number of LFMCMC iterations
+lfmcmc_burnin <- 2000    # burn-in period
+lfmcmc_epsilon <- 0.25
+
+init_lfmcmc_params <- c(
+  1 / 7,  # r_rate
+  0.05,   # t_rate_spring
+  0.04,   # t_rate_summer
+  0.06,   # t_rate_fall
+  0.07,   # t_rate_winter
+  10,     # c_rate_weekday
+  2       # c_rate_weekend
+)
+param_names <- c(
+  "Recovery rate",
+  "Transmission rate (spring)",
+  "Transmission rate (summer)",
+  "Transmission rate (fall)",
+  "Transmission rate (winter)",
+  "Contact rate (weekday)",
+  "Contact rate (weekend)"
+)
+
+stats_names <- c(
+  "Time to peak",
+  "Size of peak",
+  "Mean (cases)",
+  "Standard deviation (cases)"
+)
+
+#' Define the LFMCMC simulation function
+#'
+#' @description
+#' `lfmcmc_simulation_fun` defines the simulation function for LFMCMC.
+#'
+#' @details
+#' In LFMCMC, the simulation function run the model with the proposed
+#' parameters and returns a simulated dataset that looks like the
+#' observed dataset. This simulation function runs the SIR CONN
+#' model created in earlier steps with the given model parameters,
+#' adjusting contact rates for weekday vs weekend and adjusting
+#' transmission rates based on the season. It then returns a set
+#' of COVID-19 case counts for the same period as the observed case
+#' counts.
+#'
+#' @param params Vector of numeric model parameters.
+#'
+#' @returns A simulated set of COVID-19 case counts.
+lfmcmc_simulation_fun <- function(params) {
+  # Extract parameters
+  r_rate          <- params[1]
+  t_rate_spring   <- params[2]
+  t_rate_summer   <- params[3]
+  t_rate_fall     <- params[4]
+  t_rate_winter   <- params[5]
+  c_rate_weekday  <- params[6]
+  c_rate_weekend  <- params[7]
+
+  # Set recovery rate
+  set_param(covid_sirconn_model, "Recovery rate", r_rate)
+
+  # Global event to change contact and transmission rates
+  change_c_and_t_rates <- function(model) {
+    # Get the current model day (step)
+    current_model_day <- today(model)
+
+    ## Update contact rate based on weekday/weekend
+    if (any(c(6, 0) %in% (current_model_day %% 7L))) {
+      set_param(model, "Contact rate", c_rate_weekend)
+    } else {
+      set_param(model, "Contact rate", c_rate_weekday)
+    }
+
+    ## Update transmission rate each season
+    if (current_model_day == seasons[["spring"]]) {
+      set_param(model, "Transmission rate", t_rate_spring)
+    } else if (current_model_day == seasons[["summer"]]) {
+      set_param(model, "Transmission rate", t_rate_summer)
+    } else if (current_model_day == seasons[["fall"]]) {
+      set_param(model, "Transmission rate", t_rate_fall)
+    } else if (current_model_day == seasons[["winter"]]) {
+      set_param(model, "Transmission rate", t_rate_winter)
+    }
+
+    invisible(model)
+  }
+
+  # Add global event to the model
+  change_c_and_t_event_name <- "Change Contact and Transmission Rates"
+  globalevent_fun(change_c_and_t_rates, name = change_c_and_t_event_name) |>
+    add_globalevent(model = covid_sirconn_model)
+
+  # Run the model
+  verbose_off(covid_sirconn_model)
+  run(covid_sirconn_model, ndays = model_ndays)
+
+  # Remove global event (new event set each simulation run)
+  rm_globalevent(covid_sirconn_model, change_c_and_t_event_name)
+
+  # Get infected cases
+  hist_matrix <- get_hist_transition_matrix(covid_sirconn_model)
+  # - Drop the last day of data because the model returns data for (model_ndays + 1) days
+  infected_cases <- head(
+    hist_matrix[
+      hist_matrix$state_to == "Infected" &
+        hist_matrix$state_from == "Susceptible",
+      c("counts")],
+    -1
+  )
+
+  return(as.double(infected_cases))
+}
+
+# Create the LFMCMC object
+calibration_lfmcmc <- LFMCMC(covid_sirconn_model) |>
+  set_simulation_fun(lfmcmc_simulation_fun) |>
+  set_summary_fun(lfmcmc_summary_fun) |>
+  set_proposal_fun(lfmcmc_proposal_fun) |>
+  set_kernel_fun(lfmcmc_kernel_fun) |>
+  set_observed_data(covid_cases)
+
+# Run LFMCMC calibration
+run_lfmcmc(
+  lfmcmc = calibration_lfmcmc,
+  params_init_ = init_lfmcmc_params,
+  n_samples_ = lfmcmc_n_samples,
+  epsilon_ = lfmcmc_epsilon,
+  seed = model_seed
+)
+set_params_names(calibration_lfmcmc, param_names)
+set_stats_names(calibration_lfmcmc, stats_names)
+
+## ----------------------------------
+## RUN FORECAST
+## ----------------------------------
+# Create a new SIR CONN model
+# - Compute prevalance based on most recent day
+forecast_prevalence <- covid_cases[90] / model_n
+# - Init the model
+covid_sirconn_model <- ModelSIRCONN(
+  name              = "COVID-19",
+  n                 = model_n,
+  prevalence        = forecast_prevalence,
+  contact_rate      = init_contact_rate,
+  transmission_rate = init_transmission_rate,
+  recovery_rate     = init_recovery_rate
+)
+
+# Run the simulation for each set of params in the sample
+# - Select sample of accepted params from LFMCMC
+forecast_sample_n <- 200 # Sample size
+params_sample <- get_params_sample(calibration_lfmcmc,
+  lfmcmc_n_samples,
+  lfmcmc_burnin,
+  forecast_sample_n)
+# - Set forecast length
+model_ndays <- 14
+# - Run simulation function for each set of params from the sample
+forecast_dist <- apply(params_sample, 1, lfmcmc_simulation_fun)
diff --git a/forecast-pkg.R b/forecast-pkg.R
new file mode 100644
index 0000000..241084b
--- /dev/null
+++ b/forecast-pkg.R
@@ -0,0 +1,107 @@
+# Needed libraries
+library(epiworldR)
+
+#' Download data from a URL
+#'
+#' @description
+#' `get_data_from_url` downloads a CSV file from a URL and returns the file's data.
+#'
+#' @details
+#' Downloads a zip archive from the given 'data_url',
+#' selects the CSV file matching 'target_file' from the zip archive,
+#' extracts the data from the target CSV file, and returns the data.
+#'
+#' @param data_url String URL pointing to the data file (zip archive).
+#' @param target_file String filename of a CSV file in the zip archive.
+#'
+#' @returns The contents of the CSV data in `target_file` using `read.csv()`
+get_data_from_url <- function(data_url, target_file) {
+  tryCatch({
+    # Download the zipped data
+    temp_file <- tempfile()
+    download.file(data_url, temp_file)
+    # Select the appropriate data file
+    datafile_name <- grep(
+      target_file,
+      unzip(temp_file, list = TRUE)$Name,
+      ignore.case = TRUE,
+      value = TRUE
+    )
+    datafile <- unz(temp_file, datafile_name)
+    # Extract the data
+    collected_data <- read.csv(datafile)
+    return(collected_data)
+  },
+  error = function(cond) {
+    message("Error: ", conditionMessage(cond))
+    return(-1)
+  },
+  warning = function(cond) {
+    message("Warning: ", conditionMessage(cond))
+    return(-2)
+  },
+  finally = {
+    # Cleanup
+    unlink(temp_file)
+  }
+  )
+}
+
+#' Get the season of a given date
+#'
+#' @description
+#' `get_date_season` returns the season (spring, summer, fall, winter) for the given date.
+#'
+#' @param date Date object.
+#'
+#' @returns A string representing the season for the given date:
+#' "spring", "summer", "fall", or "winter"
+get_date_season <- function(date) {
+  date_month <- as.integer(format(as.Date(date, format = "%d/%m/%Y"), "%m"))
+
+  if (date_month >= 3 && date_month <= 5) {
+    return("spring")
+  } else if (date_month >= 6 && date_month <= 8) {
+    return("summer")
+  } else if (date_month >= 9 && date_month <= 11) {
+    return("fall")
+  } else {
+    return("winter")
+  }
+}
+
+#' Get start date of each season from a list of dates
+#'
+#' @description
+#' `get_season_starts` returns the start of each season (spring, summer,
+#' fall, winter) from a list of dates
+#'
+#' @details
+#' This function takes a list of Date objects in chronological order and uses
+#' the [get_date_season] function assign a season to each date in the list.
+#' The function then finds the position of the first instance of each season
+#' in the list, returning a dictionary of four integer values corresponding to
+#' the start of each season. The dictionary values are accessed using the
+#' lowercase string name of the season ("spring", "summer", "fall",
+#' or "winter"). If no match was found, the value of the season start
+#' will be '-1'.
+#'
+#' @param dates A list of Date objects in chronological order.
+#'
+#' @returns A dictionary containing the starting positiong of
+#' each season, accessed with the lowercase string name of
+#' the season ("spring", "summer", "fall", or "winter").
+get_season_starts <- function(dates) {
+  seasons <- mapply(get_date_season, dates)
+  season_names <- c("spring", "summer", "fall", "winter")
+
+  spring_start <- match(season_names[1], seasons, nomatch = -1)
+  summer_start <- match(season_names[2], seasons, nomatch = -1)
+  fall_start <- match(season_names[3], seasons, nomatch = -1)
+  winter_start <- match(season_names[4], seasons, nomatch = -1)
+
+  season_starts <- c(spring_start, summer_start, fall_start, winter_start)
+  names(season_starts) <- season_names
+
+  return(season_starts)
+}
diff --git a/index.qmd b/index.qmd
index a3acf9c..5f3da77 100644
--- a/index.qmd
+++ b/index.qmd
@@ -15,50 +15,16 @@ Our forecast is calibrated on case counts from the last 90 days.
 
 ```{r}
 #| label: get-data
-#| cache: true
-# Download the Trends data from Utah DHHS
-source("get-forecast-data.R")
-data_url <- "https://coronavirus-dashboard.utah.gov/Utah_COVID19_data.zip"
-target_file_regex <- "Trends_Epidemic+"
-forecast_data <- get_forecast_data(data_url, target_file_regex)
-forecast_data$Date <- as.Date(forecast_data$Date)
-
-# Get data from last 90 days
-last_date <- max(forecast_data$Date)
-forecast_data <- forecast_data[forecast_data$Date > (last_date - 90), ]
+# Load in needed files
+source("covid-forecast.R")
 ```
 
 ```{r}
-#| label: plot-data
+#| label: plot-observed-data
 #| fig-width: 7
 #| fig-height: 3
 #| fig-align: center
-library(ggplot2)
-ggplot(forecast_data, aes(x = Date, y = Daily.Cases)) +
-  geom_line() +
-  labs(x = "Date", y = "Daily Cases") +
-  theme_classic()
-
-# Identify the start date of each season (spring, summer, fall, winter) in data
-get_date_season <- function(date) {
-  date_month <- as.integer(format(as.Date(date, format = "%d/%m/%Y"), "%m"))
-
-  if (date_month >= 3 && date_month <= 5) {
-    return("spring")
-  } else if (date_month >= 6 && date_month <= 8) {
-    return("summer")
-  } else if (date_month >= 9 && date_month <= 11) {
-    return("fall")
-  } else {
-    return("winter")
-  }
-}
-forecast_seasons <- mapply(get_date_season, forecast_data$Date)
-
-spring_start <- match("spring", forecast_seasons, nomatch = -1)
-summer_start <- match("summer", forecast_seasons, nomatch = -1)
-fall_start <- match("fall", forecast_seasons, nomatch = -1)
-winter_start <- match("winter", forecast_seasons, nomatch = -1)
+plot_covid_data(covid_data)
 ```
 
 ## Calibrating the Forecasting Model
@@ -68,7 +34,7 @@ We use epiworldR's implementation of Likelihood-Free Markhov Chain Monte Carlo (
 * Transmission rates for each season (spring, summer, fall, winter)
 * Contact rates for weekdays and weekends
 
-The LFMCMC simulation runs for 3,000 iterations.
+The LFMCMC simulation runs for 6,000 iterations.
 In each iteration, it does the following:
 
 * Proposes a new set of parameter values
@@ -78,271 +44,18 @@ In each iteration, it does the following:
 We note that while the population of Utah over 3 million, we only need to run our model with 10,000 agents.
 This is because we don't expect more than 10,000 COVID-19 cases and the model automatically scales the contact rate to account for the difference between the model population and Utah's population.
 
-```{r}
-#| label: prep-model
-library(epiworldR)
-
-# Extract cases from observed data
-forecast_data_cases <- forecast_data$Daily.Cases
-
-# Set model parameters
-model_seed <- 112
-model_ndays <- 90
-model_n <- 10000    # model population size
-n_samples <- 6000   # number of LFMCMC iterations
-n_burnin <- 2000    # burn-in period
-lfmcmc_epsilon <- 0.25
-
-init_prevalence <- forecast_data_cases[1] / model_n
-init_contact_rate <- 10
-init_transmission_rate <- 0.05
-
-init_lfmcmc_params <- c(
-  1 / 7,  # recovery_rate
-  0.05,   # t_rate_spring
-  0.04,   # t_rate_summer
-  0.06,   # t_rate_fall
-  0.07,   # t_rate_winter
-  10,     # contact_rate_weekday
-  2       # contact_rate_weekend
-)
-
-# Create the base SIRCONN model
-ef_model <- ModelSIRCONN(
-  name              = "COVID-19",
-  n                 = model_n,
-  prevalence        = init_prevalence,
-  contact_rate      = init_contact_rate,
-  transmission_rate = init_transmission_rate,
-  recovery_rate     = init_lfmcmc_params[1]
-)
-```
-
-```{r}
-#| label: setup-lfmcmc
-# Define the LFMCMC simulation function
-lfmcmc_simulation_fun <- function(params) {
-  # Extract parameters
-  recovery_rate        <- params[1]
-  t_rate_spring        <- params[2]
-  t_rate_summer        <- params[3]
-  t_rate_fall          <- params[4]
-  t_rate_winter        <- params[5]
-  contact_rate_weekday <- params[6]
-  contact_rate_weekend <- params[7]
-
-  # Set recovery rate
-  set_param(ef_model, "Recovery rate", recovery_rate)
-
-  # Global event to change contact and transmission rates
-  change_c_and_t_rates <- function(model) {
-    # Get the current model day (step)
-    current_model_day <- today(model)
-
-    ## Update contact rate based on weekday/weekend
-    if (any(c(6, 0) %in% (current_model_day %% 7L))) {
-      set_param(model, "Contact rate", contact_rate_weekend)
-    } else {
-      set_param(model, "Contact rate", contact_rate_weekday)
-    }
-
-    ## Update transmission rate each season
-    if (current_model_day == spring_start) {
-      set_param(model, "Transmission rate", t_rate_spring)
-    } else if (current_model_day == summer_start) {
-      set_param(model, "Transmission rate", t_rate_summer)
-    } else if (current_model_day == fall_start) {
-      set_param(model, "Transmission rate", t_rate_fall)
-    } else if (current_model_day == winter_start) {
-      set_param(model, "Transmission rate", t_rate_winter)
-    }
-
-    invisible(model)
-  }
-
-  # Add global event to the model
-  change_c_and_t_event_name <- "Change Contact and Transmission Rates"
-  globalevent_fun(change_c_and_t_rates, name = change_c_and_t_event_name) |>
-    add_globalevent(model = ef_model)
-
-  # Run the model
-  verbose_off(ef_model)
-  run(ef_model, ndays = model_ndays)
-
-  # Remove global event (new event set each simulation run)
-  rm_globalevent(ef_model, change_c_and_t_event_name)
-
-  # Get infected cases
-  hist_matrix <- get_hist_transition_matrix(ef_model)
-  # - Drop the last day of data because the model returns data for (model_ndays + 1) days
-  infected_cases <- head(
-    hist_matrix[
-      hist_matrix$state_to == "Infected" &
-        hist_matrix$state_from == "Susceptible",
-      c("counts")],
-    -1
-  )
-
-  return(as.double(infected_cases))
-}
-
-# Define the LFMCMC summary function
-lfmcmc_summary_fun <- function(case_counts) {
-  # Extract summary statistics from the data
-  time_to_peak <- which.max(case_counts)
-  size_of_peak <- case_counts[time_to_peak]
-  mean_cases <- mean(case_counts)
-  sd_cases <- sd(case_counts)
-
-  c(
-    time_to_peak,
-    size_of_peak,
-    mean_cases,
-    sd_cases
-  )
-}
-
-# Define the LFMCMC summary function
-lfmcmc_proposal_fun <- function(params_prev) {
-  # Propose new model parameters
-  params_1_to_5 <- plogis(
-    qlogis(params_prev[1:5]) +
-      rnorm(length(params_prev[1:5]), mean = 0, sd = 0.025)
-  )
-
-  params_6_to_7 <- params_prev[6:7] +
-    rnorm(2, mean = 0, sd = 0.025)
-
-
-  # Reflect contact rates
-  if (params_6_to_7[1] < 0) {
-    params_6_to_7[1] <- params_prev[6] -
-      (params_6_to_7[1] - params_prev[6])
-  }
-  if (params_6_to_7[2] < 0) {
-    params_6_to_7[2] <- params_prev[7] -
-      (params_6_to_7[2] - params_prev[7])
-  }
-
-  # Return proposed parameters
-  c(params_1_to_5, params_6_to_7)
-}
-
-# Define the LFMCMC kernel function
-# - Weighs simulation results against observed data
-lfmcmc_kernel_fun <- function(simulated_stats, observed_stats, epsilon) {
-  diff <- ((simulated_stats - observed_stats)^2)^epsilon
-  dnorm(sqrt(sum(diff)))
-}
-
-# Create the LFMCMC model
-lfmcmc_model <- LFMCMC(ef_model) |>
-  set_simulation_fun(lfmcmc_simulation_fun) |>
-  set_summary_fun(lfmcmc_summary_fun) |>
-  set_proposal_fun(lfmcmc_proposal_fun) |>
-  set_kernel_fun(lfmcmc_kernel_fun) |>
-  set_observed_data(forecast_data_cases)
-```
-
-```{r}
-#| label: run-lfmcmc
-run_lfmcmc(
-  lfmcmc = lfmcmc_model,
-  params_init_ = init_lfmcmc_params,
-  n_samples_ = n_samples,
-  epsilon_ = lfmcmc_epsilon,
-  seed = model_seed
-)
-```
-
-When the simulation is finished, we use a burn-in period of n = 2000 (66% of simulation iterations).
+When the simulation is finished, we use a burn-in period of n = 2,000 (33% of simulation iterations).
 The epiworldR results printout (below) shows the mean parameter/statistic value, the 95% credible interval (in [ ]), and the initial/observed value (in ()).
 ```{r}
-#| label: print-results
-# Print the results with 50% burnin
-param_names <- c(
-  "Recovery rate",
-  "Transmission rate (spring)",
-  "Transmission rate (summer)",
-  "Transmission rate (fall)",
-  "Transmission rate (winter)",
-  "Contact rate (weekday)",
-  "Contact rate (weekend)"
-)
-set_params_names(lfmcmc_model, param_names)
-
-stats_names <- c(
-  "Time to peak",
-  "Size of peak",
-  "Mean (cases)",
-  "Standard deviation (cases)"
-)
-set_stats_names(lfmcmc_model, stats_names)
-
-print(lfmcmc_model, burnin = n_burnin)
+#| label: print-lfmcmc-results
+print(calibration_lfmcmc, burnin = lfmcmc_burnin)
 ```
-Here is the posterior distribution of the LFMCMC samples with vertical lines representing the initial parameter values.
-```{r}
-#| label: get-posterior-dist
-
-accepted_params <- get_accepted_params(lfmcmc_model)
-accepted_params <- lapply(seq_along(param_names), \(i) {
-  data.frame(
-    step  = seq_along(nrow(accepted_params)),
-    param = param_names[i],
-    value = accepted_params[, i]
-  )
-}) |> do.call(what = "rbind")
-
-# Select transmission rates to plot
-t_params_used <- character()
-t_values_used <- numeric()
-if (spring_start >= 0) {
-  t_params_used <- c(t_params_used, param_names[2])
-  t_values_used <- c(t_values_used, init_lfmcmc_params[2])
-}
-if (summer_start >= 0) {
-  t_params_used <- c(t_params_used, param_names[3])
-  t_values_used <- c(t_values_used, init_lfmcmc_params[3])
-}
-if (fall_start >= 0) {
-  t_params_used <- c(t_params_used, param_names[4])
-  t_values_used <- c(t_values_used, init_lfmcmc_params[4])
-}
-if (winter_start >= 0) {
-  t_params_used <- c(t_params_used, param_names[5])
-  t_values_used <- c(t_values_used, init_lfmcmc_params[5])
-}
-
-# Extract transmission, recovery, and contact rates
-t_rates <- accepted_params[accepted_params$param == t_params_used, ]
-r_rate <- accepted_params[accepted_params$param == param_names[1], ]
-c_rates <- accepted_params[accepted_params$param == param_names[6:7], ]
-t_r_c_rates <- rbind(r_rate, c_rates, t_rates)
-
-# Generate initial values for vertical lines in each plot
-init_values <- c(init_lfmcmc_params[6],
-  init_lfmcmc_params[7],
-  init_lfmcmc_params[1],
-  t_values_used)
 
-init_names <- c(param_names[6],
-  param_names[7],
-  param_names[1],
-  t_params_used)
-
-init_df <- data.frame(param = init_names,
-  value = init_values)
+Here is the posterior distribution of the LFMCMC samples with vertical lines representing the initial parameter values.
 
-# Plot parameter distributions
-ggplot(t_r_c_rates, aes(x = value,
-  fill = param,
-  y = after_stat(scaled))) +
-  geom_density(alpha = .3) +
-  geom_vline(aes(xintercept = value, color = param),
-    data = init_df) +
-  facet_wrap(~param, scales = "free", ncol = 1) +
-  theme_light()
+```{r}
+#| label: plot-posterior-dist
+plot_lfmcmc_post_dist(calibration_lfmcmc, init_lfmcmc_params, param_names, seasons)
 ```
 
 ## Epiworld Forecast
@@ -355,84 +68,7 @@ The forecast mean is shown below along with the 50% and 95% confidence intervals
 
 ```{r}
 #| label: run-forecast
-library(tidyr)
-
-forecast_sample_n <- 200
-
-# Run model for 2 weeks
-model_ndays <- 14
-
-# Compute prevalance based on most recent day
-forecast_prevalence <- forecast_data_cases[90] / model_n
-
-# Select parameters from second quantile of accepted params
-accepted_params <- get_accepted_params(lfmcmc_model)
-burnin_sample <- tail(accepted_params, n = (n_samples - n_burnin))
-params_sample <- burnin_sample[sample(nrow(burnin_sample), forecast_sample_n), ]
-
-# Create the new model
-ef_model <- ModelSIRCONN(
-  name              = "COVID-19",
-  n                 = model_n,
-  prevalence        = forecast_prevalence,
-  contact_rate      = init_contact_rate,
-  transmission_rate = init_transmission_rate,
-  recovery_rate     = init_lfmcmc_params[1]
-)
-
-# Run the simulation for each set of params in the sample
-forecast_dist <- apply(params_sample, 1, lfmcmc_simulation_fun)
-
-# Find 2.5%, 25%, 50%, 75%, and 97.5% quantiles
-forecast_quantiles <- apply(forecast_dist, 1, quantile, probs = c(0.025, 0.25, 0.5, 0.75, 0.975))
-
-# Combine observed data with sample median for plotting forecast
-observed_df <- data.frame(
-  date = forecast_data$Date[60:90],
-  counts = forecast_data$Daily.Cases[60:90],
-  observed = TRUE,
-  lb_95 = NA,
-  ub_95 = NA,
-  lb_50 = NA,
-  ub_50 = NA
-)
-sample_df <- data.frame(
-  date = forecast_data$Date[90] + 0:13,
-  counts = forecast_quantiles["50%", ],
-  observed = FALSE,
-  lb_95 = forecast_quantiles["2.5%", ],
-  ub_95 = forecast_quantiles["97.5%", ],
-  lb_50 = forecast_quantiles["25%", ],
-  ub_50 = forecast_quantiles["75%", ]
-)
-
-forecast_df <- rbind(observed_df, sample_df)
-```
-```{r}
-#| label: plot-forecast
-# Use color-blind-friendly palette:
-cbb_light_blue <- "#56B4E9"
-cbb_palette <- c(cbb_light_blue, "black")
-
-ggplot(forecast_df,
-  aes(x = date)) +
-  geom_ribbon(aes(ymin = lb_95, ymax = ub_95),
-    fill = cbb_light_blue,
-    alpha = 0.4,
-    na.rm = TRUE) +
-  geom_ribbon(aes(ymin = lb_50, ymax = ub_50),
-    fill = cbb_light_blue,
-    alpha = 0.4,
-    na.rm = TRUE) +
-  geom_point(aes(y = counts,
-    color = observed)) +
-  geom_line(aes(y = counts,
-    color = observed)) +
-  labs(x = "Date", y = "Daily Cases") +
-  scale_colour_manual(values = cbb_palette,
-    labels = c("Forecasted Cases", "Observed Cases")) +
-  scale_y_continuous(n.breaks = 20) +
-  theme_bw()
+plot_forecast(forecast_dist, covid_data)
 ```
 
 ## Acknowledgements
diff --git a/plot-forecast.R b/plot-forecast.R
new file mode 100644
index 0000000..7d8ed33
--- /dev/null
+++ b/plot-forecast.R
@@ -0,0 +1,168 @@
+# Needed libraries
+library(ggplot2)
+
+#' Plot observed COVID-19 data from UDHHS
+#'
+#' @description
+#' `plot_covid_data` plots the COVID-19 dataset downloaded from UDHHS.
+#'
+#' @details
+#' This function uses `ggplot` to plot the COVID-19 data
+#' as a time-series with Date along the x-axis and
+#' case counts along the y-axis.
+#'
+#' @param data The data to plot.
+#'
+#' @returns The plot of the data.
+plot_covid_data <- function(data) {
+  ggplot(data, aes(x = Date, y = Daily.Cases)) +
+    geom_line() +
+    labs(x = "Date", y = "Daily Cases") +
+    theme_classic()
+}
+
+#' Plot posterior distribution of model parameters
+#'
+#' @description
+#' `plot_lfmcmc_post_dist` plots posterior distribution of the
+#' model parameters accepted during calibration.
+#'
+#' @details
+#' This function plots the distribution of accepted parameters
+#' from the calibration run of LFMCMC. It also plots a
+#' vertical line representing the initial parameter value.
+#'
+#' @param lfmcmc_object Object of class [LFMCMC] which performed the simulation.
+#' @param init_params Vector of the initial parameter values.
+#' @param param_names The string names of the parameters.
+#' @param seasons The dictionary of season start positions.
+#'
+#' @returns The plot of the posterior distribution of model parameters.
+plot_lfmcmc_post_dist <- function(lfmcmc_object, init_params, param_names, seasons) {
+
+  accepted_params <- get_accepted_params(lfmcmc_object)
+  accepted_params <- lapply(seq_along(param_names), \(i) {
+    data.frame(
+      step  = seq_len(nrow(accepted_params)),
+      param = param_names[i],
+      value = accepted_params[, i]
+    )
+  }) |> do.call(what = "rbind")
+
+  # Select transmission rates to plot
+  t_params_used <- character()
+  t_values_used <- numeric()
+  if (seasons[["spring"]] >= 0) {
+    t_params_used <- c(t_params_used, param_names[2])
+    t_values_used <- c(t_values_used, init_params[2])
+  }
+  if (seasons[["summer"]] >= 0) {
+    t_params_used <- c(t_params_used, param_names[3])
+    t_values_used <- c(t_values_used, init_params[3])
+  }
+  if (seasons[["fall"]] >= 0) {
+    t_params_used <- c(t_params_used, param_names[4])
+    t_values_used <- c(t_values_used, init_params[4])
+  }
+  if (seasons[["winter"]] >= 0) {
+    t_params_used <- c(t_params_used, param_names[5])
+    t_values_used <- c(t_values_used, init_params[5])
+  }
+
+  # Extract transmission, recovery, and contact rates
+  t_rates <- accepted_params[accepted_params$param == t_params_used, ]
+  r_rate <- accepted_params[accepted_params$param == param_names[1], ]
+  c_rates <- accepted_params[accepted_params$param == param_names[6:7], ]
+  t_r_c_rates <- rbind(r_rate, c_rates, t_rates)
+
+  # Generate initial values for vertical lines in each plot
+  init_values <- c(init_params[6],
+    init_params[7],
+    init_params[1],
+    t_values_used)
+
+  init_names <- c(param_names[6],
+    param_names[7],
+    param_names[1],
+    t_params_used)
+
+  init_df <- data.frame(param = init_names,
+    value = init_values)
+
+  # Plot parameter distributions
+  ggplot(t_r_c_rates, aes(x = value,
+    fill = param,
+    y = after_stat(scaled))) +
+    geom_density(alpha = .3) +
+    geom_vline(aes(xintercept = value, color = param),
+      data = init_df) +
+    facet_wrap(~param, scales = "free", ncol = 1) +
+    theme_light()
+
+}
+
+#' Plot forecasted COVID-19 case counts
+#'
+#' @description
+#' `plot_forecast` plots the forecasted COVID case counts with 50% and 95% confidence intervals.
+#'
+#' @details
+#' This function plots the observed COVID-19 case count data from
+#' the last 30 days (in black), appending to the end of this data the
+#' forecasted case counts (in blue). It also plots the 50% and 95%
+#' confidence intervals for the forecasted case counts.
+#'
+#' @param forecast_dist An array of forecasted case counts.
+#' @param covid_data A data frame of observed COVID-19 case counts.
+#'
+#' @returns The plot of the forecasted COVID-19 case counts.
+plot_forecast <- function(forecast_dist, covid_data) {
+  # Find 2.5%, 25%, 50%, 75%, and 97.5% quantiles
+  forecast_quantiles <- apply(forecast_dist, 1, quantile, probs = c(0.025, 0.25, 0.5, 0.75, 0.975))
+
+  # Combine observed data with sample median for plotting forecast
+  observed_df <- data.frame(
+    date = covid_data$Date[60:90],
+    counts = covid_data$Daily.Cases[60:90],
+    observed = TRUE,
+    lb_95 = NA,
+    ub_95 = NA,
+    lb_50 = NA,
+    ub_50 = NA
+  )
+  sample_df <- data.frame(
+    date = covid_data$Date[90] + 0:13,
+    counts = forecast_quantiles["50%", ],
+    observed = FALSE,
+    lb_95 = forecast_quantiles["2.5%", ],
+    ub_95 = forecast_quantiles["97.5%", ],
+    lb_50 = forecast_quantiles["25%", ],
+    ub_50 = forecast_quantiles["75%", ]
+  )
+
+  forecast_df <- rbind(observed_df, sample_df)
+
+  # Use color-blind-friendly palette:
+  cbb_light_blue <- "#56B4E9"
+  cbb_palette <- c(cbb_light_blue, "black")
+
+  ggplot(forecast_df,
+    aes(x = date)) +
+    geom_ribbon(aes(ymin = lb_95, ymax = ub_95),
+      fill = cbb_light_blue,
+      alpha = 0.4,
+      na.rm = TRUE) +
+    geom_ribbon(aes(ymin = lb_50, ymax = ub_50),
+      fill = cbb_light_blue,
+      alpha = 0.4,
+      na.rm = TRUE) +
+    geom_point(aes(y = counts,
+      color = observed)) +
+    geom_line(aes(y = counts,
+      color = observed)) +
+    labs(x = "Date", y = "Daily Cases") +
+    scale_colour_manual(values = cbb_palette,
+      labels = c("Forecasted Cases", "Observed Cases")) +
+    scale_y_continuous(n.breaks = 20) +
+    theme_bw()
+}