Merge pull request #50 from mrc-ide/python-interface-poc

Add Python interface
mrc-ide · Feb 10, 2025 · 921562d · 921562d
2 parents b4e9cd9 + 32afe91
commit 921562d
Show file tree

Hide file tree

Showing 82 changed files with 2,583 additions and 259 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -12,3 +12,4 @@ docker
 ^tmp$
 ^README\.Rmd$
 ^CPPLINT\.cfg$
+^leapfrog-py$
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -47,6 +47,9 @@ jobs:
           extra-packages: any::rcmdcheck, any::here
           needs: check
 
+      - name: Extract test data
+        run: ./inst/standalone_model/extract_data
+
       - uses: r-lib/actions/check-r-package@v2
         with:
           error-on: '"error"'

diff --git a/.github/workflows/leapfrog-py-deploy.yaml b/.github/workflows/leapfrog-py-deploy.yaml
@@ -0,0 +1,78 @@
+name: Build and upload to PyPI
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+  release:
+    types:
+      - published
+
+defaults:
+  run:
+    working-directory: leapfrog-py
+
+jobs:
+  build_wheels:
+    name: Build wheels on ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        # macos-13 is an intel runner, macos-14 is apple silicon
+        os: [ubuntu-latest, windows-latest, macos-13, macos-14]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Build wheels
+        uses: pypa/cibuildwheel@v2.19.1
+        with:
+          package-dir: leapfrog-py
+        env:
+          # Issue with compiling pybind11 eigen header as it has an invalid
+          # conversion from size_t to int
+          CIBW_SKIP: "pp38-manylinux_i686 pp39-manylinux_i686 pp310-manylinux_i686"
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
+          path: ./wheelhouse/*.whl
+
+  build_sdist:
+    name: Build source distribution
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Build sdist
+        run: pipx run build --sdist
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: cibw-sdist
+          path: dist/*.tar.gz
+
+  upload_pypi:
+    needs: [build_wheels, build_sdist]
+    runs-on: ubuntu-latest
+    environment: pypi
+    permissions:
+      id-token: write
+    if: github.event_name == 'release' && github.event.action == 'published'
+    # or, alternatively, upload to PyPI on every tag starting with 'v' (remove on: release above to use this)
+    # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
+
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          # unpacks all CIBW artifacts into dist/
+          pattern: cibw-*
+          path: dist
+          merge-multiple: true
+
+      - uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          # To test: repository-url: https://test.pypi.org/legacy/
+          repository-url: https://upload.pypi.org/legacy/
diff --git a/.github/workflows/leapfrog-py-test.yaml b/.github/workflows/leapfrog-py-test.yaml
@@ -0,0 +1,86 @@
+name: leapfrog-py test
+
+on:
+  pull_request:
+    branches:
+      - "*"
+  push:
+    branches:
+      - main
+
+jobs:
+  run:
+
+    runs-on: ${{ matrix.config.os }}
+
+    name: ${{matrix.config.os }} (${{ matrix.config.py }})
+
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - {os: macos-latest,   py: '3.13'}
+          - {os: ubuntu-latest,  py: '3.10'}
+          - {os: ubuntu-latest,  py: '3.11'}
+          - {os: ubuntu-latest,  py: '3.12'}
+          - {os: ubuntu-latest,  py: '3.13'}
+          - {os: windows-latest, py: '3.13'}
+
+    defaults:
+      run:
+        working-directory: leapfrog-py
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+          cache-dependency-glob: "leapfrog-py/uv.lock"
+
+      - name: "Set up Python"
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.config.py }}
+
+      - name: Extract test data
+        shell: bash
+        run: ../inst/standalone_model/extract_data
+
+      # We need to install R to run the test to compare R and Python output
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+          extra-repositories: https://mrc-ide.r-universe.dev
+          working-directory: .
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: docopt, local::.
+          working-directory: .
+
+      - name: Install the project
+        run: uv sync --all-extras --dev
+
+      - name: Run tests & coverage
+        run: uv run pytest --cov --cov-report=xml --cov-config=pyproject.toml
+
+      - name: Lint
+        run: |
+          uvx ruff check .
+          uv run --group check mypy --install-types --non-interactive src tests
+
+      - name: Test install
+        run: |
+          pip install .
+          python -c "import leapfrog_py;"
+
+      - name: Upload to Codecov
+        uses: codecov/codecov-action@v4
+        with:
+          # This can be useful, but the false positive rate is
+          # annoyingly high.
+          fail_ci_if_error: false
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -7,16 +7,16 @@ Authors@R: c(
            role = c("aut"),
            email = "jeffrey.eaton@imperial.ac.uk",
            comment = c(ORCID = "0000-0001-7728-728X")),
-    person(given = "Magdalene", 
-           family = "Walters", 
+    person(given = "Magdalene",
+           family = "Walters",
            role = c("aut"),
            email = "m.walters@imperial.ac.uk"),
-    person(given = "Robert", 
-           family = "Ashton", 
+    person(given = "Robert",
+           family = "Ashton",
            role = c("aut", "cre"),
            email = "robertashton94@gmail.com"),
-    person(given = "Mantra", 
-           family = "Kusumgar", 
+    person(given = "Mantra",
+           family = "Kusumgar",
            role = c("aut"),
            email = "m.kusumgar@imperial.ac.uk"))
 Description: Leapfrog is a multistate population projection model for estimating population, demographic indicators, and HIV epidemic. The model combines a standard cohort component model of population projection (CCMPP) with a multistate model for HIV infection, disease progression, and treatment. Statistical tools are implemented for joint inference from multiple demographic and epidemiologic data sources.

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+export(frogger_input_name_mapping)
 export(prepare_leapfrog_demp)
 export(prepare_leapfrog_projp)
 export(run_model)

diff --git a/R/generate_cpp.R b/R/generate_cpp.R
@@ -611,3 +611,34 @@ read_types_csv <- function(path, validation_function, group_by_col) {
 
   group_list_of_lists(parsed_types, group_by_col)
 }
+
+
+#' Get mapping of R names to C++ names for input data
+#'
+#' @param input_csv Path to input csv which is source of truth for these names
+#'
+#' @return A list with r_name, cpp_name, and C++ struct/namespace this
+#'   belongs to
+#' @export
+frogger_input_name_mapping <- function(
+    input_csv = frogger_file("cpp_generation/model_input.csv")) {
+  input_data <- utils::read.csv(input_csv, colClasses = "character")
+
+  out <- lapply(seq_len(nrow(input_data)), function(row_num) {
+    ## When reading csv in excel the header column is included in count
+    csv_row_num <- row_num + 1
+    parsed <- validate_and_parse_input(as.list(input_data[row_num, ]),
+                                       basename(input_csv), csv_row_num)
+    if (parsed$r_name == "") {
+      ## In this case, data is created as const by C++ so we don't
+      ## need to map the data in
+      return(NULL)
+    }
+    list(
+      r_name = parsed$r_name,
+      cpp_name = parsed$cpp_name,
+      struct = parsed$struct
+    )
+  })
+  out[vlapply(out, function(x) !is.null(x))]
+}
diff --git a/R/spectrum_inputs.R b/R/spectrum_inputs.R
@@ -1,4 +1,3 @@
-
 read_sx <- function(pjnz, use_ep5=FALSE) {
 
   if(use_ep5) {
@@ -229,7 +228,7 @@ prepare_leapfrog_projp <- function(pjnz, hiv_steps_per_year = 10L, hTS = 3) {
   ## v$who34percelig <- who34percelig
 
   v$art_dropout_recover_cd4 <- if (projp$spectrum_version >= "6.14") {TRUE} else {FALSE}
-  
+
   ## Convert input percent dropout in 12 months to an annual rate (Rob Glaubius email 25 July 2024)
   v$art_dropout_rate <- -log(1.0 - projp$art_dropout/100)
 
@@ -272,3 +271,34 @@ prepare_leapfrog_projp <- function(pjnz, hiv_steps_per_year = 10L, hTS = 3) {
 
   v
 }
+
+# Used for testing
+setup_childmodel <- function(testinput) {
+  input <- readRDS(testinput)
+  demp <- input$demp
+  parameters <- input$proj
+
+  parameters$ctx_effect <- 0.33
+  parameters$laf <- 1
+  parameters$paed_art_elig_age <- as.integer(parameters$paed_art_elig_age)
+  parameters$mat_prev_input <- rep(TRUE, 61)
+  pmtct_new <- array(0, dim = c(7, 61), dimnames = list(pmtct = c("Option A", "Option B", "SDNVP", "Dual ARV", "Option B+: before pregnancy", "Option B+: >4 weeks", "Option B+: <4 weeks")))
+  ## pick out which ones were inserted as numbers
+  pmtct_new[, which(colSums(parameters$pmtct)[, 1] > 0)] <- parameters$pmtct[, (which(colSums(parameters$pmtct)[, 1] > 0)), 1]
+  ## pick out which ones were inserted as percent
+  pmtct_new[, which(colSums(parameters$pmtct)[, 1] == 0)] <- parameters$pmtct[, which(colSums(parameters$pmtct)[, 1] == 0), 2]
+  parameters$pmtct <- pmtct_new
+
+  return(list(
+    dp = input$dp,
+    demp = demp,
+    parameters = parameters,
+    pjnz = input$pjnz,
+    timedat.idx = input$timedat.idx,
+    pop1 = input$pop1_outputs,
+    ontrt = input$on_treatment,
+    offtrt = input$off_trt,
+    deaths_noart = input$deaths_noart,
+    deaths_art = input$deaths_art
+  ))
+}
diff --git a/R/util.R b/R/util.R
@@ -30,7 +30,7 @@ serialize_r_to_tensor <- function(data, path) {
   data <- paste0(data, collapse = ",")
   lines <- c(type, dims, data)
   writeLines(lines, path)
-  path
+  invisible(path)
 }
 
 #' Deseralize an Eigen::Tensor into an R array
@@ -42,8 +42,12 @@ serialize_r_to_tensor <- function(data, path) {
 #' @keywords internal
 deserialize_tensor_to_r <- function(path) {
   content <- readLines(path)
+  converter <- switch(
+    content[[1]],
+    "int" = as.integer,
+    "double" = as.numeric)
   array(
-    as.numeric(strsplit(content[[3]], ",\\s*")[[1]]),
+    converter(strsplit(content[[3]], ",\\s*")[[1]]),
     as.numeric(strsplit(content[[2]], ",\\s*")[[1]])
   )
 }

diff --git a/README.Rmd b/README.Rmd
@@ -128,6 +128,23 @@ via [Rcpp](http://dirk.eddelbuettel.com/code/rcpp.html) and
 
 ## Development notes
 
+### Testing
+
+There is some pre-prepared test data available to make tests run faster. This is generated and saved `./scripts/create_test_data.R`.
+
+We also have some separate data written out in a generic format which can be read to test the model directly from C++. This is in `inst/standalone_model/data` in zipped files.
+
+If this is your first time running you will need to unzip the standalone test data
+
+```
+./inst/standalone_model/extract_data
+```
+
+If you want to update the test data, it should be updated in the `./scripts/create_test_data.R` script so that we know how it was created and we can do it again fairly easily. Steps are
+1. Update the script and generate the test data
+1. Update the standalone data which is built from this `./scripts/update_standalone_data`. You might need to add a new mapping from R to serialized name if you are adding new input data
+1. Unzip this for automated tests `./inst/standalone_model/extract_data`
+
 ### Simulation model
 
 * The model was implemented using _Eigen::Tensor_ containers. These were preferred

diff --git a/README.md b/README.md
@@ -142,6 +142,28 @@ simulation via [Rcpp](http://dirk.eddelbuettel.com/code/rcpp.html) and
 
 ## Development notes
 
+### Testing
+
+There is some pre-prepared test data available to make tests run faster.
+This is generated and saved `./scripts/create_test_data.R`.
+
+We also have some separate data written out in a generic format which
+can be read to test the model directly from C++. This is in
+`inst/standalone_model/data` in zipped files.
+
+If this is your first time running you will need to unzip the standalone
+test data
+
+    ./inst/standalone_model/extract_data
+
+If you want to update the test data, it should be updated in the
+`./scripts/create_test_data.R` script so that we know how it was created
+and we can do it again fairly easily. Steps are 1. Update the script and
+generate the test data 1. Update the standalone data which is built from
+this `./scripts/update_standalone_data`. You might need to add a new
+mapping from R to serialized name if you are adding new input data 1.
+Unzip this for automated tests `./inst/standalone_model/extract_data`
+
 ### Simulation model
 
 - The model was implemented using *Eigen::Tensor* containers. These were

diff --git a/configure b/configure
@@ -1,3 +1,4 @@
 #!/bin/sh
 
-##$R_HOME/bin/Rscript inst/cpp_generation/generate.R
+echo "TODO turn generate back on when generating out state_types needed by Python"
+#$R_HOME/bin/Rscript inst/cpp_generation/generate.R
diff --git a/inst/cpp_generation/model_input.csv b/inst/cpp_generation/model_input.csv
@@ -12,7 +12,7 @@ cd4_prog,NaturalHistory.cd4_progression,real_type,ModelVariant::run_hiv_simulati
 artcd4elig_idx,Art.idx_hm_elig,int,ModelVariant::run_hiv_simulation,,TRUE,1,proj_years,,,
 cd4_initdist,NaturalHistory.cd4_initial_distribution,real_type,ModelVariant::run_hiv_simulation,,FALSE,3,base.hDS,base.hAG,base.NS,
 art_mort,Art.mortality,real_type,ModelVariant::run_hiv_simulation,,FALSE,4,base.hTS,base.hDS,base.hAG,base.NS
-artmx_timerr,Art.mortaility_time_rate_ratio,real_type,ModelVariant::run_hiv_simulation,,FALSE,2,base.hTS,proj_years,,
+artmx_timerr,Art.mortality_time_rate_ratio,real_type,ModelVariant::run_hiv_simulation,,FALSE,2,base.hTS,proj_years,,
 art_dropout_recover_cd4,Art.dropout_recover_cd4,int,ModelVariant::run_hiv_simulation,,FALSE,1,1,,,
 art_dropout_rate,Art.dropout_rate,real_type,ModelVariant::run_hiv_simulation,,FALSE,1,proj_years,,,
 art15plus_num,Art.adults_on_art,real_type,ModelVariant::run_hiv_simulation,,FALSE,2,base.NS,proj_years,,