diff --git a/R/data.R b/R/data.R index b57e3e5..68259b7 100644 --- a/R/data.R +++ b/R/data.R @@ -2,7 +2,7 @@ #' combination #' #' @format -#' \itemize{ +#' \describe{ #' \item{SDTM}{The SDTM version} #' \item{IG_NAME}{The implementation guide (IG) name being used} #' \item{IG_VERSION}{The implemetation guide (IG) version} @@ -10,3 +10,26 @@ #' be used for current studies)} #' } "valid_versions" + +#' The current SDTM terminology +#' +#' SDTM terminology is used for reserved words and lists of codes that can be +#' used for values in the SDTM standard. The version date from the download is +#' available via \code{attr(sdtm_terminology, "version")}. +#' +#' The description of the columns draw from +#' \url{https://evs.nci.nih.gov/ftp1/CDISC/SDTM/SDTM CDISC ReadMe.doc}, and +#' the column names are modified slightly to work better in R. +#' +#' @format +#' \describe{ +#' \item{code}{Unique numeric code randomly generated by NCI Thesaurus (NCIt) and assigned to individual CDISC controlled terms.} +#' \item{codelist_code}{Unique numeric code randomly generated by NCI Thesaurus (NCIt) and assigned to the SDTM parent codelist names. This code is repeated for each controlled term (aka permissible value) belonging to a codelist. **NOTE - light blue highlighting is used to identify the beginning of a new SDTM codelist and its applicable term set.} +#' \item{codelist_extensible_yes_no}{Defines if controlled terms may be added to the codelist. New terms may be added to existing codelist values when the codelist has "Yes" in Column C as long as the terms are not duplicates or synonyms of existing terms. The expectation is that sponsors will use the published controlled terminology as a standard baseline and codelists defined as "extensible" (or "Yes") may have terms added by the sponsor internally. For codelists with "No" in Column C sponsor organizations may not add to the published lists. Suggestions for new terms may be requested through the New Term Request Page.} +#' \item{codelist_name}{Contains the descriptive name of the codelist. As with the Codelist Code, the Codelist Name is repeated for each controlled term belonging to a codelist.} +#' \item{CDISC_submission_value}{IMPORTANT COLUMN: Currently this is the specific value expected for submissions. Each value corresponds to a SDTM Codelist Name as indicated by light blue shading.} +#' \item{CDISC_synonym}{This identifies the applicable synonyms for the CDISC Submission Value in Column E. These terms should not be submitted, but are included for collection and mapping purposes.**NOTE - this is especially important in instances where a Test name or Parameter Test name contains a corresponding Test Code or Parameter Test Code.} +#' \item{CDISC_definition}{This identifies the CDISC definition for a particular term. In many cases an existing NCI definition has been used. The source for a definition is noted in parentheses (e.g. NCI, CDISC glossary).} +#' \item{NCI_preferred_term}{This identifies the NCI preferred name for a term as identified in NCIt. **NOTE - This column designates the human readable, fully specified preferred term corresponding to the NCI c-code, and is especially helpful for searching NCIt to get the entire concept with links to all instances of the term.} +#' } +"sdtm_terminology" \ No newline at end of file diff --git a/data-raw/sdtm_terminology.R b/data-raw/sdtm_terminology.R new file mode 100644 index 0000000..eedb64a --- /dev/null +++ b/data-raw/sdtm_terminology.R @@ -0,0 +1,37 @@ +# Load and save the SDTM terminology data set + +library(tidyverse) +library(janitor) +library(assertr) + +url <- "https://evs.nci.nih.gov/ftp1/CDISC/SDTM/SDTM%20Terminology.xls" +local_file <- file.path(tempdir(), basename(url)) +download_result <- curl::curl_download(url=url, destfile=local_file, quiet=FALSE) + +d_raw <- rio::import_list(local_file) +names(d_raw) +sheet_to_use <- names(d_raw)[startsWith(names(d_raw), "SDTM Terminology")] +if (length(sheet_to_use) != 1) { + stop("Could not find correct sheet name") +} else { + sdtm_terminology <- + d_raw[[sheet_to_use]] %>% + clean_names() %>% + verify(is.na(codelist_code) | is.na(codelist_extensible_yes_no)) %>% + verify(is.na(codelist_code) | (codelist_code != code)) %>% + verify( + !is.na(codelist_code) | + (is.na(codelist_code) & codelist_name == cdisc_synonym_s) + ) %>% + #select(-codelist_name) %>% + rename_with( + .fn=gsub, + pattern="^(CDISC|NCI)", + replacement="\\U\\1", + ignore.case=TRUE, + perl=TRUE + ) %>% + rename(CDISC_synonym=CDISC_synonym_s) + attr(sdtm_terminology, "version") <- sheet_to_use + usethis::use_data(sdtm_terminology, compress="xz", overwrite=TRUE, version=3) +} diff --git a/data/sdtm_terminology.rda b/data/sdtm_terminology.rda new file mode 100644 index 0000000..6b84d31 Binary files /dev/null and b/data/sdtm_terminology.rda differ diff --git a/man/sdtm_terminology.Rd b/man/sdtm_terminology.Rd new file mode 100644 index 0000000..abe7f19 --- /dev/null +++ b/man/sdtm_terminology.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{sdtm_terminology} +\alias{sdtm_terminology} +\title{The current SDTM terminology} +\format{ +\describe{ + \item{code}{Unique numeric code randomly generated by NCI Thesaurus (NCIt) and assigned to individual CDISC controlled terms.} + \item{codelist_code}{Unique numeric code randomly generated by NCI Thesaurus (NCIt) and assigned to the SDTM parent codelist names. This code is repeated for each controlled term (aka permissible value) belonging to a codelist. **NOTE - light blue highlighting is used to identify the beginning of a new SDTM codelist and its applicable term set.} + \item{codelist_extensible_yes_no}{Defines if controlled terms may be added to the codelist. New terms may be added to existing codelist values when the codelist has "Yes" in Column C as long as the terms are not duplicates or synonyms of existing terms. The expectation is that sponsors will use the published controlled terminology as a standard baseline and codelists defined as "extensible" (or "Yes") may have terms added by the sponsor internally. For codelists with "No" in Column C sponsor organizations may not add to the published lists. Suggestions for new terms may be requested through the New Term Request Page.} + \item{codelist_name}{Contains the descriptive name of the codelist. As with the Codelist Code, the Codelist Name is repeated for each controlled term belonging to a codelist.} + \item{CDISC_submission_value}{IMPORTANT COLUMN: Currently this is the specific value expected for submissions. Each value corresponds to a SDTM Codelist Name as indicated by light blue shading.} + \item{CDISC_synonym}{This identifies the applicable synonyms for the CDISC Submission Value in Column E. These terms should not be submitted, but are included for collection and mapping purposes.**NOTE - this is especially important in instances where a Test name or Parameter Test name contains a corresponding Test Code or Parameter Test Code.} + \item{CDISC_definition}{This identifies the CDISC definition for a particular term. In many cases an existing NCI definition has been used. The source for a definition is noted in parentheses (e.g. NCI, CDISC glossary).} + \item{NCI_preferred_term}{This identifies the NCI preferred name for a term as identified in NCIt. **NOTE - This column designates the human readable, fully specified preferred term corresponding to the NCI c-code, and is especially helpful for searching NCIt to get the entire concept with links to all instances of the term.} +} +} +\usage{ +sdtm_terminology +} +\description{ +SDTM terminology is used for reserved words and lists of codes that can be +used for values in the SDTM standard. The version date from the download is +available via \code{attr(sdtm_terminology, "version")}. +} +\details{ +The description of the columns draw from +\url{https://evs.nci.nih.gov/ftp1/CDISC/SDTM/SDTM CDISC ReadMe.doc}, and +the column names are modified slightly to work better in R. +} +\keyword{datasets} diff --git a/man/valid_versions.Rd b/man/valid_versions.Rd index 77c7e55..49a8488 100644 --- a/man/valid_versions.Rd +++ b/man/valid_versions.Rd @@ -6,7 +6,7 @@ \title{A data.frame with 4 columns and one row per valid SDTM version/IG/IG version combination} \format{ -\itemize{ +\describe{ \item{SDTM}{The SDTM version} \item{IG_NAME}{The implementation guide (IG) name being used} \item{IG_VERSION}{The implemetation guide (IG) version}