Skip to content

Commit

Permalink
Merge pull request #13 from oxford-pharmacoepi/issue_12
Browse files Browse the repository at this point in the history
v1.0.2
  • Loading branch information
cecicampanile authored Dec 6, 2024
2 parents 58752e6 + 9a98c4b commit f0d23f7
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 1 deletion.
106 changes: 106 additions & 0 deletions Benchmark/R/benchmarkCDMConnector.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
benchmarkCDMConnector <- function(cdm) {

checkmate::assertClass(cdm, "cdm_reference")

# will add timings to list
timings <- list()

# first set of queries are just with the vocabulary tables
# these should be similar between databases

task <- "distinct count of concept relationship table"
cli::cli_inform("Getting {task}")
tictoc::tic()
cdm[["concept_relationship"]] |>
dplyr::distinct() |>
dplyr::tally() |>
dplyr::pull("n")
t <- tictoc::toc()
timings[[task]] <- dplyr::tibble(
task = .env$task,
time_taken_secs = as.numeric(t$toc - t$tic)
)

task <- "count of different relationship IDs in concept relationship table"
cli::cli_inform("Getting {task}")
tictoc::tic()
cdm[["concept_relationship"]] |>
dplyr::group_by(.data$relationship_id) |>
dplyr::tally() |>
dplyr::collect()
t <- tictoc::toc()
timings[[task]] <- dplyr::tibble(
task = .env$task,
time_taken_secs = as.numeric(t$toc - t$tic)
)

task <- "join of concept and concept class computed to a temp table"
cli::cli_inform("Getting {task}")
tictoc::tic()
cdm[["concept"]] |>
dplyr::left_join(cdm[["concept_class"]],
by = c("concept_id" = "concept_class_concept_id")) |>
dplyr::compute()
t <- tictoc::toc()
timings[[task]] <- dplyr::tibble(
task = .env$task,
time_taken_secs = as.numeric(t$toc - t$tic)
)

task <- "concept table collected into memory"
cli::cli_inform("Getting {task}")
tictoc::tic()
cdm[["concept"]] |>
dplyr::collect()
t <- tictoc::toc()
timings[[task]] <- dplyr::tibble(
task = .env$task,
time_taken_secs = as.numeric(t$toc - t$tic)
)


# second set of queries are with clinical tables
# these will differ substantially by database

task <- "join of person and observation period collected into memory"
cli::cli_inform("Getting {task}")
tictoc::tic()
cdm[["person"]] |>
dplyr::inner_join(cdm[["observation_period"]],
by = "person_id") |>
dplyr::collect()
t <- tictoc::toc()
timings[[task]] <- dplyr::tibble(
task = .env$task,
time_taken_secs = as.numeric(t$toc - t$tic)
)

task <- "summary of observation period start and end dates by gender concept id"
cli::cli_inform("Getting {task}")
tictoc::tic()
cdm[["person"]] |>
dplyr::inner_join(cdm[["observation_period"]],
by = "person_id") |>
dplyr::group_by(.data$gender_concept_id) |>
dplyr::summarise(
max = max(.data$observation_period_end_date, na.rm = TRUE),
min = min(.data$observation_period_start_date, na.rm = TRUE)
) |>
dplyr::collect()
t <- tictoc::toc()
timings[[task]] <- dplyr::tibble(
task = .env$task,
time_taken_secs = as.numeric(t$toc - t$tic)
)

# combine results
timings <- dplyr::bind_rows(timings) |>
dplyr::mutate(time_taken_mins = round(.data$time_taken_secs / 60, 2)) |>
dplyr::mutate(dbms = attr(attr(cdm, "cdm_source"), "source_type")) |>
dplyr::mutate(person_n = cdm$person |>
dplyr::count() |>
dplyr::pull())


return(timings)
}
5 changes: 4 additions & 1 deletion Benchmark/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -271,12 +271,15 @@ incidencePrevalenceBenchmark <- function(cdm, iterations, logger) {
}

cdmConnectorBenchmark <- function(cdm, iterations, logger) {

source("R/benchmarkCDMConnector.R")

res <- list()

for (i in 1:iterations) {
mes <- glue::glue("CDMConnector benchmark interation {i}/{iterations}")
log4r::info(logger = logger, mes)
res <- dplyr::bind_rows(res, CDMConnector::benchmarkCDMConnector(cdm) |>
res <- dplyr::bind_rows(res, benchmarkCDMConnector(cdm) |>
dplyr::mutate(strata_level = as.character(i)))
}

Expand Down

0 comments on commit f0d23f7

Please sign in to comment.