Skip to content

Commit

Permalink
Merge pull request #50 from mrc-ide/python-interface-poc
Browse files Browse the repository at this point in the history
Add Python interface
  • Loading branch information
r-ash authored Feb 10, 2025
2 parents b4e9cd9 + 32afe91 commit 921562d
Show file tree
Hide file tree
Showing 82 changed files with 2,583 additions and 259 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ docker
^tmp$
^README\.Rmd$
^CPPLINT\.cfg$
^leapfrog-py$
3 changes: 3 additions & 0 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ jobs:
extra-packages: any::rcmdcheck, any::here
needs: check

- name: Extract test data
run: ./inst/standalone_model/extract_data

- uses: r-lib/actions/check-r-package@v2
with:
error-on: '"error"'
Expand Down
78 changes: 78 additions & 0 deletions .github/workflows/leapfrog-py-deploy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
name: Build and upload to PyPI

on:
workflow_dispatch:
push:
branches:
- main
release:
types:
- published

defaults:
run:
working-directory: leapfrog-py

jobs:
build_wheels:
name: Build wheels on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
# macos-13 is an intel runner, macos-14 is apple silicon
os: [ubuntu-latest, windows-latest, macos-13, macos-14]

steps:
- uses: actions/checkout@v4

- name: Build wheels
uses: pypa/cibuildwheel@v2.19.1
with:
package-dir: leapfrog-py
env:
# Issue with compiling pybind11 eigen header as it has an invalid
# conversion from size_t to int
CIBW_SKIP: "pp38-manylinux_i686 pp39-manylinux_i686 pp310-manylinux_i686"

- uses: actions/upload-artifact@v4
with:
name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
path: ./wheelhouse/*.whl

build_sdist:
name: Build source distribution
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4

- name: Build sdist
run: pipx run build --sdist

- uses: actions/upload-artifact@v4
with:
name: cibw-sdist
path: dist/*.tar.gz

upload_pypi:
needs: [build_wheels, build_sdist]
runs-on: ubuntu-latest
environment: pypi
permissions:
id-token: write
if: github.event_name == 'release' && github.event.action == 'published'
# or, alternatively, upload to PyPI on every tag starting with 'v' (remove on: release above to use this)
# if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')

steps:
- uses: actions/download-artifact@v4
with:
# unpacks all CIBW artifacts into dist/
pattern: cibw-*
path: dist
merge-multiple: true

- uses: pypa/gh-action-pypi-publish@release/v1
with:
# To test: repository-url: https://test.pypi.org/legacy/
repository-url: https://upload.pypi.org/legacy/
86 changes: 86 additions & 0 deletions .github/workflows/leapfrog-py-test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
name: leapfrog-py test

on:
pull_request:
branches:
- "*"
push:
branches:
- main

jobs:
run:

runs-on: ${{ matrix.config.os }}

name: ${{matrix.config.os }} (${{ matrix.config.py }})

strategy:
fail-fast: false
matrix:
config:
- {os: macos-latest, py: '3.13'}
- {os: ubuntu-latest, py: '3.10'}
- {os: ubuntu-latest, py: '3.11'}
- {os: ubuntu-latest, py: '3.12'}
- {os: ubuntu-latest, py: '3.13'}
- {os: windows-latest, py: '3.13'}

defaults:
run:
working-directory: leapfrog-py

steps:
- uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
cache-dependency-glob: "leapfrog-py/uv.lock"

- name: "Set up Python"
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.config.py }}

- name: Extract test data
shell: bash
run: ../inst/standalone_model/extract_data

# We need to install R to run the test to compare R and Python output
- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true
extra-repositories: https://mrc-ide.r-universe.dev
working-directory: .

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: docopt, local::.
working-directory: .

- name: Install the project
run: uv sync --all-extras --dev

- name: Run tests & coverage
run: uv run pytest --cov --cov-report=xml --cov-config=pyproject.toml

- name: Lint
run: |
uvx ruff check .
uv run --group check mypy --install-types --non-interactive src tests
- name: Test install
run: |
pip install .
python -c "import leapfrog_py;"
- name: Upload to Codecov
uses: codecov/codecov-action@v4
with:
# This can be useful, but the false positive rate is
# annoyingly high.
fail_ci_if_error: false
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
12 changes: 6 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@ Authors@R: c(
role = c("aut"),
email = "jeffrey.eaton@imperial.ac.uk",
comment = c(ORCID = "0000-0001-7728-728X")),
person(given = "Magdalene",
family = "Walters",
person(given = "Magdalene",
family = "Walters",
role = c("aut"),
email = "m.walters@imperial.ac.uk"),
person(given = "Robert",
family = "Ashton",
person(given = "Robert",
family = "Ashton",
role = c("aut", "cre"),
email = "robertashton94@gmail.com"),
person(given = "Mantra",
family = "Kusumgar",
person(given = "Mantra",
family = "Kusumgar",
role = c("aut"),
email = "m.kusumgar@imperial.ac.uk"))
Description: Leapfrog is a multistate population projection model for estimating population, demographic indicators, and HIV epidemic. The model combines a standard cohort component model of population projection (CCMPP) with a multistate model for HIV infection, disease progression, and treatment. Statistical tools are implemented for joint inference from multiple demographic and epidemiologic data sources.
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(frogger_input_name_mapping)
export(prepare_leapfrog_demp)
export(prepare_leapfrog_projp)
export(run_model)
Expand Down
31 changes: 31 additions & 0 deletions R/generate_cpp.R
Original file line number Diff line number Diff line change
Expand Up @@ -611,3 +611,34 @@ read_types_csv <- function(path, validation_function, group_by_col) {

group_list_of_lists(parsed_types, group_by_col)
}


#' Get mapping of R names to C++ names for input data
#'
#' @param input_csv Path to input csv which is source of truth for these names
#'
#' @return A list with r_name, cpp_name, and C++ struct/namespace this
#' belongs to
#' @export
frogger_input_name_mapping <- function(
input_csv = frogger_file("cpp_generation/model_input.csv")) {
input_data <- utils::read.csv(input_csv, colClasses = "character")

out <- lapply(seq_len(nrow(input_data)), function(row_num) {
## When reading csv in excel the header column is included in count
csv_row_num <- row_num + 1
parsed <- validate_and_parse_input(as.list(input_data[row_num, ]),
basename(input_csv), csv_row_num)
if (parsed$r_name == "") {
## In this case, data is created as const by C++ so we don't
## need to map the data in
return(NULL)
}
list(
r_name = parsed$r_name,
cpp_name = parsed$cpp_name,
struct = parsed$struct
)
})
out[vlapply(out, function(x) !is.null(x))]
}
34 changes: 32 additions & 2 deletions R/spectrum_inputs.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

read_sx <- function(pjnz, use_ep5=FALSE) {

if(use_ep5) {
Expand Down Expand Up @@ -229,7 +228,7 @@ prepare_leapfrog_projp <- function(pjnz, hiv_steps_per_year = 10L, hTS = 3) {
## v$who34percelig <- who34percelig

v$art_dropout_recover_cd4 <- if (projp$spectrum_version >= "6.14") {TRUE} else {FALSE}

## Convert input percent dropout in 12 months to an annual rate (Rob Glaubius email 25 July 2024)
v$art_dropout_rate <- -log(1.0 - projp$art_dropout/100)

Expand Down Expand Up @@ -272,3 +271,34 @@ prepare_leapfrog_projp <- function(pjnz, hiv_steps_per_year = 10L, hTS = 3) {

v
}

# Used for testing
setup_childmodel <- function(testinput) {
input <- readRDS(testinput)
demp <- input$demp
parameters <- input$proj

parameters$ctx_effect <- 0.33
parameters$laf <- 1
parameters$paed_art_elig_age <- as.integer(parameters$paed_art_elig_age)
parameters$mat_prev_input <- rep(TRUE, 61)
pmtct_new <- array(0, dim = c(7, 61), dimnames = list(pmtct = c("Option A", "Option B", "SDNVP", "Dual ARV", "Option B+: before pregnancy", "Option B+: >4 weeks", "Option B+: <4 weeks")))
## pick out which ones were inserted as numbers
pmtct_new[, which(colSums(parameters$pmtct)[, 1] > 0)] <- parameters$pmtct[, (which(colSums(parameters$pmtct)[, 1] > 0)), 1]
## pick out which ones were inserted as percent
pmtct_new[, which(colSums(parameters$pmtct)[, 1] == 0)] <- parameters$pmtct[, which(colSums(parameters$pmtct)[, 1] == 0), 2]
parameters$pmtct <- pmtct_new

return(list(
dp = input$dp,
demp = demp,
parameters = parameters,
pjnz = input$pjnz,
timedat.idx = input$timedat.idx,
pop1 = input$pop1_outputs,
ontrt = input$on_treatment,
offtrt = input$off_trt,
deaths_noart = input$deaths_noart,
deaths_art = input$deaths_art
))
}
8 changes: 6 additions & 2 deletions R/util.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ serialize_r_to_tensor <- function(data, path) {
data <- paste0(data, collapse = ",")
lines <- c(type, dims, data)
writeLines(lines, path)
path
invisible(path)
}

#' Deseralize an Eigen::Tensor into an R array
Expand All @@ -42,8 +42,12 @@ serialize_r_to_tensor <- function(data, path) {
#' @keywords internal
deserialize_tensor_to_r <- function(path) {
content <- readLines(path)
converter <- switch(
content[[1]],
"int" = as.integer,
"double" = as.numeric)
array(
as.numeric(strsplit(content[[3]], ",\\s*")[[1]]),
converter(strsplit(content[[3]], ",\\s*")[[1]]),
as.numeric(strsplit(content[[2]], ",\\s*")[[1]])
)
}
Expand Down
17 changes: 17 additions & 0 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,23 @@ via [Rcpp](http://dirk.eddelbuettel.com/code/rcpp.html) and

## Development notes

### Testing

There is some pre-prepared test data available to make tests run faster. This is generated and saved `./scripts/create_test_data.R`.

We also have some separate data written out in a generic format which can be read to test the model directly from C++. This is in `inst/standalone_model/data` in zipped files.

If this is your first time running you will need to unzip the standalone test data

```
./inst/standalone_model/extract_data
```

If you want to update the test data, it should be updated in the `./scripts/create_test_data.R` script so that we know how it was created and we can do it again fairly easily. Steps are
1. Update the script and generate the test data
1. Update the standalone data which is built from this `./scripts/update_standalone_data`. You might need to add a new mapping from R to serialized name if you are adding new input data
1. Unzip this for automated tests `./inst/standalone_model/extract_data`

### Simulation model

* The model was implemented using _Eigen::Tensor_ containers. These were preferred
Expand Down
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,28 @@ simulation via [Rcpp](http://dirk.eddelbuettel.com/code/rcpp.html) and

## Development notes

### Testing

There is some pre-prepared test data available to make tests run faster.
This is generated and saved `./scripts/create_test_data.R`.

We also have some separate data written out in a generic format which
can be read to test the model directly from C++. This is in
`inst/standalone_model/data` in zipped files.

If this is your first time running you will need to unzip the standalone
test data

./inst/standalone_model/extract_data

If you want to update the test data, it should be updated in the
`./scripts/create_test_data.R` script so that we know how it was created
and we can do it again fairly easily. Steps are 1. Update the script and
generate the test data 1. Update the standalone data which is built from
this `./scripts/update_standalone_data`. You might need to add a new
mapping from R to serialized name if you are adding new input data 1.
Unzip this for automated tests `./inst/standalone_model/extract_data`

### Simulation model

- The model was implemented using *Eigen::Tensor* containers. These were
Expand Down
3 changes: 2 additions & 1 deletion configure
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/bin/sh

##$R_HOME/bin/Rscript inst/cpp_generation/generate.R
echo "TODO turn generate back on when generating out state_types needed by Python"
#$R_HOME/bin/Rscript inst/cpp_generation/generate.R
2 changes: 1 addition & 1 deletion inst/cpp_generation/model_input.csv
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ cd4_prog,NaturalHistory.cd4_progression,real_type,ModelVariant::run_hiv_simulati
artcd4elig_idx,Art.idx_hm_elig,int,ModelVariant::run_hiv_simulation,,TRUE,1,proj_years,,,
cd4_initdist,NaturalHistory.cd4_initial_distribution,real_type,ModelVariant::run_hiv_simulation,,FALSE,3,base.hDS,base.hAG,base.NS,
art_mort,Art.mortality,real_type,ModelVariant::run_hiv_simulation,,FALSE,4,base.hTS,base.hDS,base.hAG,base.NS
artmx_timerr,Art.mortaility_time_rate_ratio,real_type,ModelVariant::run_hiv_simulation,,FALSE,2,base.hTS,proj_years,,
artmx_timerr,Art.mortality_time_rate_ratio,real_type,ModelVariant::run_hiv_simulation,,FALSE,2,base.hTS,proj_years,,
art_dropout_recover_cd4,Art.dropout_recover_cd4,int,ModelVariant::run_hiv_simulation,,FALSE,1,1,,,
art_dropout_rate,Art.dropout_rate,real_type,ModelVariant::run_hiv_simulation,,FALSE,1,proj_years,,,
art15plus_num,Art.adults_on_art,real_type,ModelVariant::run_hiv_simulation,,FALSE,2,base.NS,proj_years,,
Expand Down
Loading

0 comments on commit 921562d

Please sign in to comment.