Skip to content

Commit

Permalink
Initial module verification function
Browse files Browse the repository at this point in the history
  • Loading branch information
anthonysena committed Dec 13, 2023
1 parent 00d0c00 commit 9a98c44
Show file tree
Hide file tree
Showing 10 changed files with 277 additions and 18 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export(getModuleList)
export(retrieveConnectionDetails)
export(storeConnectionDetails)
export(unlockKeyring)
export(verifyModuleInstallation)
import(CohortGenerator)
import(DatabaseConnector)
import(dplyr)
Expand Down
203 changes: 200 additions & 3 deletions R/ModuleInstantiation.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,207 @@ ensureAllModulesInstantiated <- function(analysisSpecifications) {
stop(message)
}

# Verify all modules are properly installed
moduleInstallStatus <- list()
for (i in 1:nrow(modules)) {
status <- verifyModuleInstallation(
module = modules$module[i],
version = modules$version[i]
)
moduleInstallStatus <- append(status, moduleInstallStatus)
}
attr(modules, 'moduleInstallStatus') <- moduleInstallStatus

return(modules)
}


#' Verify a module is properly installed
#'
#' @description
#' In some instances a module may fail to instantiate and install due to problems
#' when calling renv::restore for the module's renv.lock file. This function
#' will allow you to surface inconsistencies between the module renv.lock file
#' and the module's renv project library. This function will check to that a
#' module has been properly installed using internal functions of the `renv`
#' package. If a module is verified to work via this function, the hash of
#' the module's renv.lock file will be written to a text file in the module
#' directory to indicate that it is ready for use. This will allow subsequent
#' calls to work faster since the initial verification process can take some
#' time.It is possible to re-run the verification of a module
#' by using the `forceVerification` parameter.
#'
#' To fix issues with a module, you will need to open the module's .Rproj in
#' RStudio instance and debug the issues when calling renv::restore().
#'
#' @param module The name of the module to verify (i.e. "CohortGeneratorModule")
#'
#' @param version The version of the module to verify (i.e. "0.2.1")
#'
#' @param forceVerification When set to TRUE, the verification process is forced
#' to re-evaluate if the module is properly installed. The default is FALSE
#' since if the module is successfully validated by this function, it will cache
#' the hash value of the module's renv.lock file in the file system so it can
#' by-pass running this check every time.
#'
#' @return
#' A list with the output of the consistency check
#'
#' @export
verifyModuleInstallation <- function(module, version, forceVerification = FALSE) {
# Internal helper function
verifyModuleInstallationReturnValue <- function(moduleFolder, moduleInstalled, issues = NULL) {
returnVal <- list(
moduleFolder = moduleFolder,
moduleInstalled = moduleInstalled,
issues = issues
)
return(returnVal)
}

moduleFolder <- getModuleFolder(module, version)
if (!dir.exists(moduleFolder)) {
warn("Module ", module, ", Version: ", version, " not found at: ", moduleFolder, ". This means the module was never installed.")
return(
verifyModuleInstallationReturnValue(
moduleFolder = moduleFolder,
moduleInstalled = FALSE
)
)
}

message("Verifying module: ", module, ", (", version, ") at ", moduleFolder, "...", appendLF = F)

moduleStatusFileName <- "moduleStatus.txt"
renvLockFileName <- "renv.lock"

# If the lock file doesn't exist, we're not sure if we're dealing with a module.
if (!file.exists(file.path(moduleFolder, renvLockFileName))) {
message("ERROR - renv.lock file missing.")
return(
verifyModuleInstallationReturnValue(
moduleFolder = moduleFolder,
moduleInstalled = FALSE
)
)
}

# Check to see if we've already performed the verification by looking at the
# moduleStatus.txt file to see if the md5 in that file matches the one
# created by hashing the renv.lock file
lockfileContents <- ParallelLogger::loadSettingsFromJson(
fileName = file.path(moduleFolder, renvLockFileName)
)
lockfileHash <- digest::digest(
object = lockfileContents,
algo = "md5"
)
if (!forceVerification && file.exists(file.path(moduleFolder, moduleStatusFileName))) {
lockfileHashFromModuleStatusFile <- SqlRender::readSql(
sourceFile = file.path(moduleFolder, moduleStatusFileName)
)

# If the values match, the module is installed correctly
# return and exit
if (lockfileHashFromModuleStatusFile == lockfileHash) {
message("MODULE READY!")
return(
verifyModuleInstallationReturnValue(
moduleFolder = moduleFolder,
moduleInstalled = TRUE
)
)
}
}


# Now perform the consistency check to verify that the renv::restore()
# process executed successfully. We must do this in the module's context
Strategus:::withModuleRenv(
code = {
# Start by turning off verbose output to hide renv output
verboseOption <- getOption("renv.verbose")
options(renv.verbose = FALSE)
on.exit(options(renv.verbose = verboseOption))

# Get the renv project status and then identify the packages used
# in the project to determine if there were issues when restoring
# the project from the renv.lock file.
projectStatus <- renv::status()

# Get the packages in the project - adapted from
# https://github.com/rstudio/renv/blob/v1.0.3/R/status.R
project <- renv:::renv_project_resolve()
libpaths <- renv:::renv_libpaths_resolve()
dependencies <- renv:::renv_snapshot_dependencies(project, dev = FALSE)
packages <- sort(union(dependencies, "renv"))
paths <- renv:::renv_package_dependencies(packages, libpaths = libpaths, project = project)
packages <- as.character(names(paths))
# remove ignored packages
ignored <- c(
renv:::renv_project_ignored_packages(project),
renv:::renv_packages_base()
)
packages <- setdiff(packages, ignored)
projectStatus$packages <- packages
saveRDS(projectStatus, file="projectStatus.rds")
},
moduleFolder = moduleFolder
)

# The module's project status is written to the
# file system. Now we can get the module status and use the information
# to determine the restoration status
projectStatus <- readRDS(file.path(moduleFolder, "projectStatus.rds"))

library <- names(projectStatus$library$Packages)
lockfile <- names(projectStatus$lockfile$Packages)

packages <- sort(unique(c(library, lockfile, projectStatus$packages)))

packageStatus <- data.frame(
package = packages,
installed = packages %in% library,
recorded = packages %in% lockfile,
used = packages %in% packages
)

# If all of the used & recorded packages are installed, then
# return TRUE for the module installed status. If not, return
# FALSE and set an attribute of the list that contains the issues
# discovered
ok <- packageStatus$installed & (packageStatus$used == packageStatus$recorded)
issues <- packageStatus[!ok, , drop = FALSE]
missing <- !issues$installed
issues$installed <- ifelse(issues$installed, "y", "n")
issues$recorded <- ifelse(issues$recorded, "y", "n")
issues$used <- ifelse(issues$used, "y", if (any(missing)) "?" else "n")
issues <- issues[issues$installed == "n" & issues$recorded == "y" & issues$used == "y", ]

moduleInstalled <- nrow(issues) == 0

if (isTRUE(moduleInstalled)) {
message("MODULE READY!")
# Write the contents of the md5 hash of the module's
# renv.lock file to the file system to note that the
# module's install status was successful and verified
SqlRender::writeSql(
sql = lockfileHash,
targetFile = file.path(moduleFolder, "moduleStatus.txt")
)
} else {
message("MODULE HAS ISSUES!")
}

return(
verifyModuleInstallationReturnValue(
moduleFolder = moduleFolder,
moduleInstalled = moduleInstalled,
issues = issues
)
)
}

getModuleTable <- function(analysisSpecifications, distinct = FALSE) {
modules <- lapply(
analysisSpecifications$moduleSpecifications,
Expand Down Expand Up @@ -121,15 +319,14 @@ getModuleMetaData <- function(moduleFolder) {
}

getModuleFolder <- function(module, version) {
assertModulesFolderSetting(x = Sys.getenv("INSTANTIATED_MODULES_FOLDER"))
moduleFolder <- file.path(Sys.getenv("INSTANTIATED_MODULES_FOLDER"), sprintf("%s_%s", module, version))
invisible(moduleFolder)
}

ensureModuleInstantiated <- function(module, version, remoteRepo, remoteUsername) {
assertModulesFolderSetting(x = Sys.getenv("INSTANTIATED_MODULES_FOLDER"))
instantiatedModulesFolder <- Sys.getenv("INSTANTIATED_MODULES_FOLDER")
if (instantiatedModulesFolder == "") {
stop("The INSTANTIATED_MODULES_FOLDER environment variable has not been set.")
}
if (!dir.exists(instantiatedModulesFolder)) {
dir.create(instantiatedModulesFolder, recursive = TRUE)
}
Expand Down
6 changes: 5 additions & 1 deletion R/ResultModelCreation.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,11 @@ runSchemaCreation <- function(analysisSpecifications, keyringSettings, moduleInd
version <- moduleSpecification$version
remoteRepo <- moduleSpecification$remoteRepo
remoteUsername <- moduleSpecification$remoteUsername
moduleFolder <- ensureModuleInstantiated(module, version, remoteRepo, remoteUsername)
moduleInstallation <- verifyModuleInstallation(module, version)
moduleFolder <- moduleInstallation$moduleFolder
if (isFALSE(moduleInstallation$moduleInstalled)) {
stop("Stopping since module is not properly installed!")
}

# Create job context
moduleExecutionSettings <- executionSettings
Expand Down
6 changes: 5 additions & 1 deletion R/ResultsUpload.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@ runResultsUpload <- function(analysisSpecifications, keyringSettings, moduleInde
version <- moduleSpecification$version
remoteRepo <- moduleSpecification$remoteRepo
remoteUsername <- moduleSpecification$remoteUsername
moduleFolder <- ensureModuleInstantiated(module, version, remoteRepo, remoteUsername)
moduleInstallation <- verifyModuleInstallation(module, version)
moduleFolder <- moduleInstallation$moduleFolder
if (isFALSE(moduleInstallation$moduleInstalled)) {
stop("Stopping since module is not properly installed!")
}

# Create job context
moduleExecutionSettings <- executionSettings
Expand Down
6 changes: 5 additions & 1 deletion R/RunModule.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@ runModule <- function(analysisSpecifications, keyringSettings, moduleIndex, exec
version <- moduleSpecification$version
remoteRepo <- moduleSpecification$remoteRepo
remoteUsername <- moduleSpecification$remoteUsername
moduleFolder <- ensureModuleInstantiated(module, version, remoteRepo, remoteUsername)
moduleInstallation <- verifyModuleInstallation(module, version)
moduleFolder <- moduleInstallation$moduleFolder
if (isFALSE(moduleInstallation$moduleInstalled)) {
stop("Stopping since module is not properly installed!")
}

# Create job context
moduleExecutionSettings <- executionSettings
Expand Down
9 changes: 9 additions & 0 deletions R/Settings.R
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,15 @@ unlockKeyring <- function(keyringName) {
}
}

#' @keywords internal
.checkModuleFolderSetting <- function(x) {
if (length(x) == 0 || x == "") {
return(paste0("INSTANTIATED_MODULES_FOLDER environment variable has not been set. INSTANTIATED_MODULES_FOLDER must be set using Sys.setenv(INSTANTIATED_MODULES_FOLDER = \"/somepath\")"))
} else {
return(TRUE)
}
}

#' Used when serializing connection details to retain NULL values
#'
#' @keywords internal
Expand Down
1 change: 1 addition & 0 deletions R/Strategus.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ NULL

# Add custom asssertions
assertKeyringPassword <- checkmate::makeAssertionFunction(.checkKeyringPasswordSet)
assertModulesFolderSetting <- checkmate::makeAssertionFunction(.checkModuleFolderSetting)
10 changes: 5 additions & 5 deletions inst/testdata/analysisSpecification.json
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@
"moduleSpecifications": [
{
"module": "CohortGeneratorModule",
"version": "0.2.0",
"version": "0.2.1",
"remoteRepo": "github.com",
"remoteUsername": "ohdsi",
"settings": {
Expand Down Expand Up @@ -292,7 +292,7 @@
},
{
"module": "CohortIncidenceModule",
"version": "0.1.0",
"version": "0.3.0",
"remoteRepo": "github.com",
"remoteUsername": "ohdsi",
"settings": {
Expand Down Expand Up @@ -489,7 +489,7 @@
},
{
"module": "CohortMethodModule",
"version": "0.2.0",
"version": "0.2.1",
"remoteRepo": "github.com",
"remoteUsername": "ohdsi",
"settings": {
Expand Down Expand Up @@ -1341,7 +1341,7 @@
},
{
"module": "SelfControlledCaseSeriesModule",
"version": "0.2.0",
"version": "0.3.2",
"remoteRepo": "github.com",
"remoteUsername": "ohdsi",
"settings": {
Expand Down Expand Up @@ -2360,7 +2360,7 @@
},
{
"module": "PatientLevelPredictionModule",
"version": "0.2.0",
"version": "0.2.1",
"remoteRepo": "github.com",
"remoteUsername": "ohdsi",
"settings": [
Expand Down
39 changes: 39 additions & 0 deletions man/verifyModuleInstallation.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 9a98c44

Please sign in to comment.