From d112d869f1bd18a88c290e487f651dbeb5695e77 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Fri, 22 Nov 2024 10:22:32 -0800 Subject: [PATCH 1/4] cleanup: drop stringr dependency --- DESCRIPTION | 1 - NAMESPACE | 5 ----- R/GAMBLR.helpers-package.R | 4 ---- R/get_gambl_colours.R | 4 ++-- R/get_template_wildcards.R | 2 +- R/grob_wildcards.R | 4 ++-- 6 files changed, 5 insertions(+), 15 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c69cdad..5af1546 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,7 +17,6 @@ Imports: ggthemes, philentropy, readr, - stringr, tibble, tidyr, workflowr diff --git a/NAMESPACE b/NAMESPACE index d9aa630..9433009 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -35,7 +35,6 @@ import(GAMBLR.data) import(dplyr) import(ggplot2) import(readr) -import(stringr) import(tibble) import(tidyr) import(workflowr) @@ -45,10 +44,6 @@ importFrom(philentropy,KL) importFrom(stats,end) importFrom(stats,quantile) importFrom(stats,start) -importFrom(stringr,str_c) -importFrom(stringr,str_extract) -importFrom(stringr,str_remove) -importFrom(stringr,str_remove_all) importFrom(tidyr,unnest_auto) importFrom(utils,head) importFrom(utils,read.csv) diff --git a/R/GAMBLR.helpers-package.R b/R/GAMBLR.helpers-package.R index 0af4d14..2c32605 100644 --- a/R/GAMBLR.helpers-package.R +++ b/R/GAMBLR.helpers-package.R @@ -7,10 +7,6 @@ #' @importFrom stats end #' @importFrom stats quantile #' @importFrom stats start -#' @importFrom stringr str_c -#' @importFrom stringr str_extract -#' @importFrom stringr str_remove -#' @importFrom stringr str_remove_all #' @importFrom tidyr unnest_auto #' @importFrom utils head #' @importFrom utils read.csv diff --git a/R/get_gambl_colours.R b/R/get_gambl_colours.R index dab03b6..c7397ee 100644 --- a/R/get_gambl_colours.R +++ b/R/get_gambl_colours.R @@ -18,7 +18,7 @@ #' #' @return A named vector of colour codes for lymphgen classes and pathology. #' -#' @import dplyr stringr tidyr +#' @import dplyr tidyr #' @export #' #' @examples @@ -318,7 +318,7 @@ get_gambl_colours = function(classification = "all", everything = c(everything, all_colours[[this_group]]) } #return matching value from lowercase version of the argument if it exists - lc_class = stringr::str_to_lower(classification) + lc_class = tolower(classification) if(return_available){ return(names(all_colours)) } diff --git a/R/get_template_wildcards.R b/R/get_template_wildcards.R index f50d6fd..cc3b46e 100644 --- a/R/get_template_wildcards.R +++ b/R/get_template_wildcards.R @@ -16,6 +16,6 @@ get_template_wildcards = function(parent_key, }else{ wildcard_string = config::get(paste0(parent_key,"_wildcards"))[template_key] } - wildcards = stringr::str_split(wildcard_string,",") + wildcards = strsplit(wildcard_string,",") return(unlist(wildcards)) } diff --git a/R/grob_wildcards.R b/R/grob_wildcards.R index ab77166..ffa12e1 100644 --- a/R/grob_wildcards.R +++ b/R/grob_wildcards.R @@ -8,7 +8,7 @@ #' #' @export grob_wildcards = function(wildcarded_string){ - wildcards = unlist(stringr::str_extract_all(wildcarded_string,"\\{[^\\{]+\\}")) - wildcards = stringr::str_remove_all(wildcards,"\\{") %>% stringr::str_remove_all(.,"\\}") + wildcards = unlist(regmatches(wildcarded_string, gregexpr("\\{[^\\{]+\\}", wildcarded_string))) + wildcards = gsub("\\{", "", wildcards) %>% gsub("\\}", "", .) return(wildcards) } From 2f3ecbc6bc7f89dea46a252e27dc3298a32d50ed Mon Sep 17 00:00:00 2001 From: Kdreval Date: Fri, 22 Nov 2024 10:57:12 -0800 Subject: [PATCH 2/4] cleanup: drop philentropy dependency --- NAMESPACE | 2 +- R/GAMBLR.helpers-package.R | 1 - R/compare_coding_mutation_pattern.R | 12 +++++++--- R/kl_divergence.R | 36 ++++++++++++++++++++++++++++ man/kl_divergence.Rd | 37 +++++++++++++++++++++++++++++ 5 files changed, 83 insertions(+), 5 deletions(-) create mode 100644 R/kl_divergence.R create mode 100644 man/kl_divergence.Rd diff --git a/NAMESPACE b/NAMESPACE index 9433009..2119e85 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,6 +19,7 @@ export(get_template_wildcards) export(get_unmatched_normals) export(grob_wildcards) export(handle_metadata) +export(kl_divergence) export(maf_header) export(normalize_expression_data) export(rainfall_conv) @@ -40,7 +41,6 @@ import(tidyr) import(workflowr) importFrom(dplyr,left_join) importFrom(ggthemes,theme_foundation) -importFrom(philentropy,KL) importFrom(stats,end) importFrom(stats,quantile) importFrom(stats,start) diff --git a/R/GAMBLR.helpers-package.R b/R/GAMBLR.helpers-package.R index 2c32605..1dd1b12 100644 --- a/R/GAMBLR.helpers-package.R +++ b/R/GAMBLR.helpers-package.R @@ -3,7 +3,6 @@ ## usethis namespace: start #' @importFrom dplyr left_join -#' @importFrom philentropy KL #' @importFrom stats end #' @importFrom stats quantile #' @importFrom stats start diff --git a/R/compare_coding_mutation_pattern.R b/R/compare_coding_mutation_pattern.R index 281f090..20a7772 100644 --- a/R/compare_coding_mutation_pattern.R +++ b/R/compare_coding_mutation_pattern.R @@ -17,9 +17,9 @@ compare_coding_mutation_pattern = function(maf_df1,maf_df2,gene){ stop("Must provide the Hugo_Symbol of a single gene that is present in both maf files") } missense_positions1 = dplyr::filter(maf_df1,Hugo_Symbol==gene,!Variant_Classification %in% c("Silent","Splice_Site","Splice_Region"),Variant_Type=="SNP") %>% - pull(HGVSp_Short) %>% str_remove_all("p.\\w") %>% str_extract("\\d+") %>% as.numeric() + pull(HGVSp_Short) %>% gsub("p\\.\\w", "", .) %>% regmatches(., regexpr("\\d+", .)) %>% as.numeric() missense_positions2 = dplyr::filter(maf_df2,Hugo_Symbol==gene,!Variant_Classification %in% c("Silent","Splice_Site","Splice_Region"),Variant_Type=="SNP") %>% - pull(HGVSp_Short) %>% str_remove_all("p.\\w") %>% str_extract("\\d+") %>% as.numeric() + pull(HGVSp_Short) %>% gsub("p\\.\\w", "", .) %>% regmatches(., regexpr("\\d+", .)) %>% as.numeric() if(length(missense_positions1)==0 | length(missense_positions2)==0 ){ message(paste("no mutations for",gene,"in one or both data sets")) return(list(kl=15)) @@ -35,6 +35,12 @@ compare_coding_mutation_pattern = function(maf_df1,maf_df2,gene){ all_counts = dplyr::select(full_df,-position) %>% t() all_counts[1,]=all_counts[1,]/sum(all_counts[1,]) all_counts[2,]=all_counts[2,]/sum(all_counts[2,]) - kl_out = KL(all_counts) + + # Normalize the rows to turn counts into probabilities + P <- all_counts[1, ] / sum(all_counts[1, ]) + Q <- all_counts[2, ] / sum(all_counts[2, ]) + + kl_out <- kl_divergence(P, Q) + return(list(df=full_df,kl=unname(kl_out))) } diff --git a/R/kl_divergence.R b/R/kl_divergence.R new file mode 100644 index 0000000..2fb5291 --- /dev/null +++ b/R/kl_divergence.R @@ -0,0 +1,36 @@ +#' Calculate Kullback-Leibler Divergence +#' +#' This function computes the Kullback-Leibler (KL) divergence between two +#' probability distributions, with an optional small constant (epsilon) +#' added to avoid zero probabilities, which would otherwise cause division +#' by zero or undefined logarithms. +#' +#' @param P A numeric vector representing the first probability distribution. +#' The sum of "P" should be 1, but the function will normalize it if +#' necessary. +#' @param Q A numeric vector representing the second probability distribution. +#' The sum of "Q" should be 1, but the function will normalize it if +#' necessary. +#' @param epsilon A small positive number (default = 1e-7) to be added to each +#' probability in P and Q to avoid zero probabilities. This helps to +#' prevent division by zero or log(0). +#' +#' @return float +#' +#' @examples +#' P <- c(0.1, 0.4, 0.3, 0.2) +#' Q <- c(0.2, 0.3, 0.4, 0.1) +#' +#' kl_divergence(P, Q) +#' +#' @export +kl_divergence <- function(P, Q, epsilon = 1e-7) { + P <- P + epsilon + Q <- Q + epsilon + + P <- P / sum(P) + Q <- Q / sum(Q) + + # KL divergence formula: sum(P * log(P / Q)) + return(sum(P * log(P / Q), na.rm = TRUE)) +} diff --git a/man/kl_divergence.Rd b/man/kl_divergence.Rd new file mode 100644 index 0000000..5652dbe --- /dev/null +++ b/man/kl_divergence.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kl_divergence.R +\name{kl_divergence} +\alias{kl_divergence} +\title{Calculate Kullback-Leibler Divergence} +\usage{ +kl_divergence(P, Q, epsilon = 1e-07) +} +\arguments{ +\item{P}{A numeric vector representing the first probability distribution. +The sum of "P" should be 1, but the function will normalize it if +necessary.} + +\item{Q}{A numeric vector representing the second probability distribution. +The sum of "Q" should be 1, but the function will normalize it if +necessary.} + +\item{epsilon}{A small positive number (default = 1e-7) to be added to each +probability in P and Q to avoid zero probabilities. This helps to +prevent division by zero or log(0).} +} +\value{ +float +} +\description{ +This function computes the Kullback-Leibler (KL) divergence between two +probability distributions, with an optional small constant (epsilon) +added to avoid zero probabilities, which would otherwise cause division +by zero or undefined logarithms. +} +\examples{ +P <- c(0.1, 0.4, 0.3, 0.2) +Q <- c(0.2, 0.3, 0.4, 0.1) + +kl_divergence(P, Q) + +} From 0050e497422ea4574c9b69a96265e90fa98e4440 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Fri, 22 Nov 2024 11:15:14 -0800 Subject: [PATCH 3/4] clenaup: switch to consistent import definition --- DESCRIPTION | 1 - NAMESPACE | 11 ----------- R/GAMBLR.helpers-package.R | 17 ----------------- R/compare_coding_mutation_pattern.R | 1 + R/gene_mutation_tally.R | 1 + R/sanity_check_metadata.R | 2 +- man/GAMBLR.helpers-package.Rd | 15 --------------- 7 files changed, 3 insertions(+), 45 deletions(-) delete mode 100644 R/GAMBLR.helpers-package.R delete mode 100644 man/GAMBLR.helpers-package.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 5af1546..e33c01b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,7 +15,6 @@ Imports: GAMBLR.data, ggplot2, ggthemes, - philentropy, readr, tibble, tidyr, diff --git a/NAMESPACE b/NAMESPACE index 2119e85..ad6b74c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -39,15 +39,4 @@ import(readr) import(tibble) import(tidyr) import(workflowr) -importFrom(dplyr,left_join) importFrom(ggthemes,theme_foundation) -importFrom(stats,end) -importFrom(stats,quantile) -importFrom(stats,start) -importFrom(tidyr,unnest_auto) -importFrom(utils,head) -importFrom(utils,read.csv) -importFrom(utils,read.socket) -importFrom(utils,tail) -importFrom(utils,write.socket) -importFrom(utils,write.table) diff --git a/R/GAMBLR.helpers-package.R b/R/GAMBLR.helpers-package.R deleted file mode 100644 index 1dd1b12..0000000 --- a/R/GAMBLR.helpers-package.R +++ /dev/null @@ -1,17 +0,0 @@ -#' @keywords internal -"_PACKAGE" - -## usethis namespace: start -#' @importFrom dplyr left_join -#' @importFrom stats end -#' @importFrom stats quantile -#' @importFrom stats start -#' @importFrom tidyr unnest_auto -#' @importFrom utils head -#' @importFrom utils read.csv -#' @importFrom utils read.socket -#' @importFrom utils tail -#' @importFrom utils write.socket -#' @importFrom utils write.table -## usethis namespace: end -NULL diff --git a/R/compare_coding_mutation_pattern.R b/R/compare_coding_mutation_pattern.R index 20a7772..2ea0d72 100644 --- a/R/compare_coding_mutation_pattern.R +++ b/R/compare_coding_mutation_pattern.R @@ -8,6 +8,7 @@ #' #' @return list #' +#' @import dplyr #' @export compare_coding_mutation_pattern = function(maf_df1,maf_df2,gene){ if(missing(maf_df1) | missing(maf_df2)){ diff --git a/R/gene_mutation_tally.R b/R/gene_mutation_tally.R index 10f0964..32a3aec 100644 --- a/R/gene_mutation_tally.R +++ b/R/gene_mutation_tally.R @@ -17,6 +17,7 @@ #' #' @return data frame #' +#' @import dplyr #' @export gene_mutation_tally = function(maf_df,these_samples_metadata,these_genes,grouping_variable="cohort"){ meta = dplyr::select(these_samples_metadata,sample_id,{{grouping_variable}}) diff --git a/R/sanity_check_metadata.R b/R/sanity_check_metadata.R index 0faa1f4..8a1b461 100644 --- a/R/sanity_check_metadata.R +++ b/R/sanity_check_metadata.R @@ -6,7 +6,7 @@ #' #' @return A table. #' -#' @import tibble readr dplyr +#' @import tibble readr dplyr tidyr #' #' #' @examples diff --git a/man/GAMBLR.helpers-package.Rd b/man/GAMBLR.helpers-package.Rd deleted file mode 100644 index 90c6164..0000000 --- a/man/GAMBLR.helpers-package.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/GAMBLR.helpers-package.R -\docType{package} -\name{GAMBLR.helpers-package} -\alias{GAMBLR.helpers} -\alias{GAMBLR.helpers-package} -\title{GAMBLR.helpers: Provide helper functions for GAMBLR-based packages} -\description{ -This package is part of the Genomic Analysis of Mature B-cell Lymphomas (GAMBL) project developed by the Morin Lab. -} -\author{ -\strong{Maintainer}: Vladimir Souza \email{vsouza@bcgsc.ca} - -} -\keyword{internal} From 2dda8459598618453105a854614187e7874cbb3d Mon Sep 17 00:00:00 2001 From: Kdreval Date: Fri, 22 Nov 2024 12:31:22 -0800 Subject: [PATCH 4/4] cleanup: remove workflowr --- DESCRIPTION | 3 +-- NAMESPACE | 2 -- R/web_initialize_gambl_site.R | 22 ---------------------- man/web_initialize_gambl_site.Rd | 25 ------------------------- 4 files changed, 1 insertion(+), 51 deletions(-) delete mode 100644 R/web_initialize_gambl_site.R delete mode 100644 man/web_initialize_gambl_site.Rd diff --git a/DESCRIPTION b/DESCRIPTION index e33c01b..ac813c6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,8 +17,7 @@ Imports: ggthemes, readr, tibble, - tidyr, - workflowr + tidyr Remotes: morinlab/GAMBLR.data Encoding: UTF-8 diff --git a/NAMESPACE b/NAMESPACE index ad6b74c..6b5fe8a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -31,12 +31,10 @@ export(subset_cnstates) export(theme_Morons) export(trim_scale_expression) export(vc_nonSynonymous) -export(web_initialize_gambl_site) import(GAMBLR.data) import(dplyr) import(ggplot2) import(readr) import(tibble) import(tidyr) -import(workflowr) importFrom(ggthemes,theme_foundation) diff --git a/R/web_initialize_gambl_site.R b/R/web_initialize_gambl_site.R deleted file mode 100644 index a781ee4..0000000 --- a/R/web_initialize_gambl_site.R +++ /dev/null @@ -1,22 +0,0 @@ -#' @title Web Initialize GAMBL Site. -#' -#' @description Set up a fresh instance of a website to host on gitlab. -#' -#' @param site_base_name Base name for site. -#' @param base_directory Path to base directory. -#' @param my_name My name. -#' @param my_gitlab_email The email used for gitlab. -#' -#' @import workflowr -#' -#' @export -web_initialize_gambl_site = function(site_base_name, - base_directory = "/home/rmorin/", - my_name = "Ryan Morin", - my_gitlab_email = "rdmorin@sfu.ca"){ - - wflow_git_config(user.name = my_name, user.email = my_gitlab_email) - setwd(base_directory) - wflow_start(site_base_name) - wflow_build() -} diff --git a/man/web_initialize_gambl_site.Rd b/man/web_initialize_gambl_site.Rd deleted file mode 100644 index 66a8b9b..0000000 --- a/man/web_initialize_gambl_site.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/web_initialize_gambl_site.R -\name{web_initialize_gambl_site} -\alias{web_initialize_gambl_site} -\title{Web Initialize GAMBL Site.} -\usage{ -web_initialize_gambl_site( - site_base_name, - base_directory = "/home/rmorin/", - my_name = "Ryan Morin", - my_gitlab_email = "rdmorin@sfu.ca" -) -} -\arguments{ -\item{site_base_name}{Base name for site.} - -\item{base_directory}{Path to base directory.} - -\item{my_name}{My name.} - -\item{my_gitlab_email}{The email used for gitlab.} -} -\description{ -Set up a fresh instance of a website to host on gitlab. -}