From 11aa08f5530e862279b9767c8ab357a3ccf0e162 Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Fri, 10 May 2024 16:04:50 -0700
Subject: [PATCH 01/21] transition: move out cool_overlaps

---
 NAMESPACE            |   1 -
 R/cool_overlaps.R    | 183 -------------------------------------------
 man/cool_overlaps.Rd |  92 ----------------------
 3 files changed, 276 deletions(-)
 delete mode 100644 R/cool_overlaps.R
 delete mode 100644 man/cool_overlaps.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 8f7afde..d9aa630 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -10,7 +10,6 @@ export(coding_class)
 export(coding_vc)
 export(colour_aliases)
 export(compare_coding_mutation_pattern)
-export(cool_overlaps)
 export(copy_no_clobber)
 export(create_onco_matrix)
 export(fuzzy_match_mafs)
diff --git a/R/cool_overlaps.R b/R/cool_overlaps.R
deleted file mode 100644
index 0a52c23..0000000
--- a/R/cool_overlaps.R
+++ /dev/null
@@ -1,183 +0,0 @@
-#' @title Cool overlap of data frames.
-#'
-#' @description This function implements overlap of 2 data frames that contain
-#' regions of coordinates similar to what data.table::foverlaps does. Unlike
-#' foverlaps, this function takes as input data frame class objects, and relies
-#' on dplyr solution rather than data.table handling, therefore allowing usage
-#' of data frames with virtually unlimited dimensions without crashing. This
-#' implementation uses same logic of different types of overlaps as the original
-#' foverlaps solution ("any", "start", "end", "within", "equal"). The type "any"
-#' is default and allows for any overlapping solution between 2 regions. The
-#' type "start" only considers regions with exact same start position as
-#' overlap; similarly type "end" considers regions overlapped when the end
-#' positions are exact matches. Type "within" means that regions are overlapped
-#' when one is contained in another and neither start nor end positions match.
-#' Finally, type "equal" only considers overlap when both start and end
-#' positions match for both regions. For any type, the presence of any
-#' additional column not directly specifying regions (for example, Chromosome)
-#' will serve similar to a grouping variable.
-#' The generated output of this function will contain the overlapping regions
-#' and all columns present in the data frame data1, as well as any columns from
-#' the data frame supplied with data2 argument, except for those columns present
-#' in data2 that are used for overlap. When the same columns are present in both
-#' data1 and data2, the output data frame will have ".x" and ".y" suffixes to
-#' indicate which original input data they are coming from.
-#'
-#' @param data1 Data frame with data to overlap. Required parameter. The minimal
-#'      required columns are those supplied with the argument columns1. Will
-#'      dictate the naming of the columns used for overlap in the output.
-#' @param data2 Data frame with data to overlap. Required parameter. The minimal
-#'      required columns are those supplied with the argument columns2.
-#' @param columns1 The list of columns from data frame data1 to be used to find
-#'      overlapping regions.
-#' @param columns2 The list of columns from data frame data2 to be used to find
-#'      overlapping regions.
-#' @param type Character specifying the way to find overlaps. Accepted values
-#'      are "any" (used as default), "start", "end", "within", and "equal".
-#'      Please see function description for more details of different types.
-#'
-#' @return data frame
-#'
-#' @examples
-#' # obtain maf data
-#' maf1 <- get_coding_ssm(
-#'     these_sample_ids = "DOHH-2"
-#' )
-#'
-#' maf2 <- get_coding_ssm(
-#'     these_sample_ids = "SU-DHL-4"
-#' )
-#'
-#' # The same mutations are not expected to be present in different samples
-#' # so this overlap will produce 0 matching rows
-#' overlap <- cool_overlaps(
-#'     maf1,
-#'     maf1,
-#'     type = "equal"
-#' )
-#'
-#' # To demonstrate functionality we can supply the same maf to the data2
-#' overlap <- cool_overlaps(
-#'     maf1,
-#'     maf1 %>% head
-#' )
-#'
-#' # We can also overlap different formats, for example
-#' seg1 <- get_sample_cn_segments(these_sample_ids = "DOHH-2")
-#' overlap <- cool_overlaps(
-#'     data1 = maf1,
-#'     data2 = seg1,
-#'     columns2 = c("chrom", "start", "end")
-#' )
-#'
-#' @import dplyr
-#' @export
-#'
-cool_overlaps <- function(
-    data1,
-    data2,
-    columns1 = c("Chromosome", "Start_Position", "End_Position"),
-    columns2 = c("Chromosome", "Start_Position", "End_Position"),
-    type = "any"
-){
-
-    # Ensure all columns provided for overlap are present in the data frame
-    if(! length(columns1) == length(intersect(columns1, colnames(data1)))){
-        stop(
-            "Not all of the requested columns for overlap in data1 are present."
-        )
-    }
-
-    if(! length(columns2) == length(intersect(columns2, colnames(data2)))){
-        stop(
-            "Not all of the requested columns for overlap in data2 are present."
-        )
-    }
-
-    # What is the name of the column in columns1 that specifies start and end?
-    start1 <- columns1[grepl("start", columns1, ignore.case = TRUE)]
-    end1 <- columns1[grepl("end", columns1, ignore.case = TRUE)]
-
-    # What is the name of the column in columns1 that specifies start and end?
-    start2 <- columns2[grepl("start", columns2, ignore.case = TRUE)]
-    end2 <- columns2[grepl("end", columns2, ignore.case = TRUE)]
-
-    # What are the other columns to be used in overlap?
-    columns1 <- columns1[!columns1 %in% c(start1, end1)]
-    columns2 <- columns2[!columns2 %in% c(start2, end2)]
-
-    # When the same columns are provided they will become .x and .y
-    if(start1 == start2) {
-        start1 <- paste0(start1, ".x")
-        start2 <- paste0(start2, ".y")
-
-    }
-    if(end1 == end2) {
-        end1 <- paste0(end1, ".x")
-        end2 <- paste0(end2, ".y")
-
-    }
-
-
-    # Prepare for overlap
-    overlap <- dplyr::inner_join(
-        data1,
-        data2,
-        by = structure(names = columns1, .Data = columns2),
-        relationship = "many-to-many"
-    )
-
-    # Return matches based on mode
-    if(type == "any"){
-        message(
-            "Running in default mode of any..."
-        )
-        overlap <- overlap %>%
-            dplyr::filter(
-                !!sym(start2) >= !!sym(start1) & !!sym(end2) <= !!sym(end1) |
-                !!sym(start1) >= !!sym(start2) & !!sym(end1) <= !!sym(end2)
-            )
-    } else if (type == "start"){
-        message(
-            "Running in the mode start..."
-        )
-        overlap <- overlap %>%
-            dplyr::filter(
-               !!sym(start1) == !!sym(start2)
-            )
-    } else if (type == "end"){
-        message(
-            "Running in the mode end..."
-        )
-        overlap <- overlap %>%
-            dplyr::filter(
-               !!sym(end1) == !!sym(end2)
-            )
-    } else if (type == "within"){
-        message(
-            "Running in the mode within..."
-        )
-        overlap <- overlap %>%
-            dplyr::filter(
-               (!!sym(start1) >= !!sym(start2)) & (!!sym(end1) <= !!sym(end2)) |
-               (!!sym(start2) >= !!sym(start1)) & (!!sym(end2) <= !!sym(end1))
-            )
-    } else if (type == "equal"){
-        message(
-            "Running in the mode equal..."
-        )
-        overlap <- overlap %>%
-            dplyr::filter(
-               (!!sym(start1) == !!sym(start2)) & (!!sym(end1) == !!sym(end2))
-            )
-    } else {
-        message(
-            "You have requested mode that is not supported."
-        )
-        stop(
-            "Please supply one of any, start, end, within, or equal with type."
-        )
-    }
-
-    return(overlap)
-}
diff --git a/man/cool_overlaps.Rd b/man/cool_overlaps.Rd
deleted file mode 100644
index 0b20674..0000000
--- a/man/cool_overlaps.Rd
+++ /dev/null
@@ -1,92 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/cool_overlaps.R
-\name{cool_overlaps}
-\alias{cool_overlaps}
-\title{Cool overlap of data frames.}
-\usage{
-cool_overlaps(
-  data1,
-  data2,
-  columns1 = c("Chromosome", "Start_Position", "End_Position"),
-  columns2 = c("Chromosome", "Start_Position", "End_Position"),
-  type = "any"
-)
-}
-\arguments{
-\item{data1}{Data frame with data to overlap. Required parameter. The minimal
-required columns are those supplied with the argument columns1. Will
-dictate the naming of the columns used for overlap in the output.}
-
-\item{data2}{Data frame with data to overlap. Required parameter. The minimal
-required columns are those supplied with the argument columns2.}
-
-\item{columns1}{The list of columns from data frame data1 to be used to find
-overlapping regions.}
-
-\item{columns2}{The list of columns from data frame data2 to be used to find
-overlapping regions.}
-
-\item{type}{Character specifying the way to find overlaps. Accepted values
-are "any" (used as default), "start", "end", "within", and "equal".
-Please see function description for more details of different types.}
-}
-\value{
-data frame
-}
-\description{
-This function implements overlap of 2 data frames that contain
-regions of coordinates similar to what data.table::foverlaps does. Unlike
-foverlaps, this function takes as input data frame class objects, and relies
-on dplyr solution rather than data.table handling, therefore allowing usage
-of data frames with virtually unlimited dimensions without crashing. This
-implementation uses same logic of different types of overlaps as the original
-foverlaps solution ("any", "start", "end", "within", "equal"). The type "any"
-is default and allows for any overlapping solution between 2 regions. The
-type "start" only considers regions with exact same start position as
-overlap; similarly type "end" considers regions overlapped when the end
-positions are exact matches. Type "within" means that regions are overlapped
-when one is contained in another and neither start nor end positions match.
-Finally, type "equal" only considers overlap when both start and end
-positions match for both regions. For any type, the presence of any
-additional column not directly specifying regions (for example, Chromosome)
-will serve similar to a grouping variable.
-The generated output of this function will contain the overlapping regions
-and all columns present in the data frame data1, as well as any columns from
-the data frame supplied with data2 argument, except for those columns present
-in data2 that are used for overlap. When the same columns are present in both
-data1 and data2, the output data frame will have ".x" and ".y" suffixes to
-indicate which original input data they are coming from.
-}
-\examples{
-# obtain maf data
-maf1 <- get_coding_ssm(
-    these_sample_ids = "DOHH-2"
-)
-
-maf2 <- get_coding_ssm(
-    these_sample_ids = "SU-DHL-4"
-)
-
-# The same mutations are not expected to be present in different samples
-# so this overlap will produce 0 matching rows
-overlap <- cool_overlaps(
-    maf1,
-    maf1,
-    type = "equal"
-)
-
-# To demonstrate functionality we can supply the same maf to the data2
-overlap <- cool_overlaps(
-    maf1,
-    maf1 \%>\% head
-)
-
-# We can also overlap different formats, for example
-seg1 <- get_sample_cn_segments(these_sample_ids = "DOHH-2")
-overlap <- cool_overlaps(
-    data1 = maf1,
-    data2 = seg1,
-    columns2 = c("chrom", "start", "end")
-)
-
-}

From 5c31a9832a862d6f552e3c92e6164010203e30a9 Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Tue, 2 Jul 2024 15:44:51 -0700
Subject: [PATCH 02/21] bug fix: color for nonsense mutations

---
 R/get_gambl_colours.R | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/R/get_gambl_colours.R b/R/get_gambl_colours.R
index 1e0ced7..dab03b6 100644
--- a/R/get_gambl_colours.R
+++ b/R/get_gambl_colours.R
@@ -34,9 +34,9 @@ get_gambl_colours = function(classification = "all",
 
   all_colours = list()
   everything = c()
-  
+
   blood_cols = c(   Red   ="#c41230",
-                    Blue ="#115284", 
+                    Blue ="#115284",
                     Green =  "#39b54b",
                     Purple  =  "#5c266c",
                     Orange  = "#fe9003",
@@ -119,10 +119,10 @@ get_gambl_colours = function(classification = "all",
                                 "Other" = "#ACADAF",
                                 "COMPOSITE" = "#ACADAF")
 
-  
+
   all_colours[["mutation"]]=
     c(
-      "Nonsense_Mutation"=unname(blood_cols["Red"]),
+      "Nonsense_Mutation"="#D8A7CA",
       "Missense_Mutation"=unname(blood_cols["Green"]),
       "Multi_Hit"=unname(blood_cols["Steel Blue"]),
       "Frame_Shift_Ins" = unname(blood_cols["Magenta"]),
@@ -194,8 +194,8 @@ get_gambl_colours = function(classification = "all",
     "F"="#EF476F",
     "Female"="#EF476F",
     "female"="#EF476F")
-  
-  all_colours[["clinical"]]= 
+
+  all_colours[["clinical"]]=
     c(
       "M"="#118AB2",
       "Male"="#118AB2",

From c0f75f79b27d0fe8757fd1983ee777a11d49fcdf Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Tue, 2 Jul 2024 23:20:57 -0700
Subject: [PATCH 03/21] env

---
 envs/r.yaml | 171 ++++++++++++++++++++++++++--------------------------
 1 file changed, 86 insertions(+), 85 deletions(-)

diff --git a/envs/r.yaml b/envs/r.yaml
index fdceb73..c6af660 100644
--- a/envs/r.yaml
+++ b/envs/r.yaml
@@ -1,23 +1,23 @@
 name: r
 channels:
+  - r
   - conda-forge
   - defaults
 dependencies:
   - _libgcc_mutex=0.1
   - _openmp_mutex=4.5
-  - _r-mutex=1.0.1
-  - _sysroot_linux-64_curr_repodata_hack=3
-  - binutils_impl_linux-64=2.40
-  - bwidget=1.9.14
+  - _r-mutex=1.0.0
+  - binutils_impl_linux-64=2.38
+  - bwidget=1.9.16
   - bzip2=1.0.8
-  - c-ares=1.27.0
-  - ca-certificates=2024.2.2
+  - c-ares=1.19.1
+  - ca-certificates=2024.3.11
   - cairo=1.18.0
   - curl=8.5.0
-  - expat=2.6.2
+  - expat=2.5.0
   - font-ttf-dejavu-sans-mono=2.37
-  - font-ttf-inconsolata=3.000
-  - font-ttf-source-code-pro=2.038
+  - font-ttf-inconsolata=2.001
+  - font-ttf-source-code-pro=2.030
   - font-ttf-ubuntu=0.83
   - fontconfig=2.14.2
   - fonts-anaconda=1
@@ -25,102 +25,103 @@ dependencies:
   - fonts-conda-forge=1
   - freetype=2.12.1
   - fribidi=1.0.10
-  - gcc_impl_linux-64=13.2.0
-  - gettext=0.21.1
-  - gfortran_impl_linux-64=13.2.0
-  - graphite2=1.3.13
-  - gxx_impl_linux-64=13.2.0
+  - gcc_impl_linux-64=11.2.0
+  - gettext=0.21.0
+  - gfortran_impl_linux-64=11.2.0
+  - graphite2=1.3.14
+  - gxx_impl_linux-64=11.2.0
   - harfbuzz=8.3.0
   - icu=73.2
-  - kernel-headers_linux-64=4.18.0
+  - kernel-headers_linux-64=2.6.32
   - keyutils=1.6.1
-  - krb5=1.21.2
-  - ld_impl_linux-64=2.40
+  - krb5=1.20.1
+  - ld_impl_linux-64=2.38
   - lerc=4.0.0
   - libblas=3.9.0
   - libcurl=8.5.0
   - libdeflate=1.19
-  - libedit=3.1.20191231
+  - libedit=3.1.20230828
   - libev=4.33
-  - libexpat=2.6.2
-  - libffi=3.4.2
-  - libgcc-devel_linux-64=13.2.0
+  - libexpat=2.5.0
+  - libffi=3.4.4
+  - libgcc-devel_linux-64=11.2.0
   - libgcc-ng=13.2.0
-  - libgfortran-ng=13.2.0
-  - libgfortran5=13.2.0
-  - libgit2=1.7.2
-  - libglib=2.80.0
+  - libgfortran-ng=11.2.0
+  - libgfortran5=11.2.0
+  - libgit2=1.6.4
+  - libglib=2.78.4
   - libgomp=13.2.0
   - libiconv=1.17
   - libjpeg-turbo=3.0.0
   - liblapack=3.9.0
-  - libnghttp2=1.58.0
-  - libopenblas=0.3.26
+  - libnghttp2=1.57.0
+  - libopenblas=0.3.21
   - libpng=1.6.43
-  - libsanitizer=13.2.0
-  - libssh2=1.11.0
-  - libstdcxx-devel_linux-64=13.2.0
+  - libssh2=1.10.0
+  - libstdcxx-devel_linux-64=11.2.0
   - libstdcxx-ng=13.2.0
   - libtiff=4.6.0
   - libuuid=2.38.1
   - libwebp-base=1.3.2
   - libxcb=1.15
-  - libxml2=2.12.5
+  - libxml2=2.10.4
   - libzlib=1.2.13
   - lz4-c=1.9.4
-  - make=4.3
-  - ncurses=6.4
-  - openssl=3.2.1
-  - pandoc=3.1.12.2
-  - pango=1.52.1
-  - pcre2=10.43
+  - make=4.2.1
+  - ncurses=6.4.20240210
+  - openssl=3.0.13
+  - pandoc=2.12
+  - pango=1.52.0
+  - pcre2=10.42
   - pixman=0.43.2
-  - pthread-stubs=0.4
+  - pthread-stubs=0.3
   - r-askpass=1.2.0
   - r-assertthat=0.2.1
-  - r-base=4.2.3
+  - r-base=4.3.3
   - r-base64enc=0.1_3
-  - r-biocmanager=1.30.22
-  - r-brew=1.0_10
-  - r-brio=1.1.4
-  - r-bslib=0.6.1
+  - r-brew=1.0_8
+  - r-brio=1.1.3
+  - r-bslib=0.5.1
   - r-cachem=1.0.8
-  - r-callr=3.7.5
-  - r-cli=3.6.2
+  - r-callr=3.7.3
+  - r-cli=3.6.1
   - r-clipr=0.8.0
-  - r-commonmark=1.9.1
-  - r-cpp11=0.4.7
+  - r-commonmark=1.9.0
+  - r-cpp11=0.4.6
   - r-crayon=1.5.2
   - r-credentials=2.0.1
   - r-curl=5.1.0
-  - r-desc=1.4.3
+  - r-desc=1.4.2
   - r-devtools=2.4.5
   - r-diffobj=0.3.5
-  - r-digest=0.6.35
+  - r-digest=0.6.33
   - r-downlit=0.4.3
   - r-ellipsis=0.3.2
-  - r-evaluate=0.23
-  - r-fansi=1.0.6
+  - r-evaluate=0.22
+  - r-fansi=1.0.5
   - r-fastmap=1.1.1
   - r-fontawesome=0.5.2
   - r-fs=1.6.3
-  - r-gert=2.0.1
+  - r-gert=2.0.0
   - r-gh=1.4.0
   - r-gitcreds=0.1.2
-  - r-glue=1.7.0
+  - r-glue=1.6.2
   - r-highr=0.10
-  - r-htmltools=0.5.7
-  - r-htmlwidgets=1.6.4
-  - r-httpuv=1.6.14
+  - r-htmltools=0.5.6.1
+  - r-htmlwidgets=1.6.2
+  - r-httpuv=1.6.11
   - r-httr=1.4.7
-  - r-httr2=1.0.0
+  - r-httr2=0.2.3
   - r-ini=0.3.1
   - r-jquerylib=0.1.4
-  - r-jsonlite=1.8.8
-  - r-knitr=1.45
-  - r-later=1.3.2
-  - r-lifecycle=1.0.4
+  - r-jsonlite=1.8.7
+  - r-knitr=1.44
+  - r-later=1.3.1
+  - r-lattice=0.22_5
+  - r-lifecycle=1.0.3
   - r-magrittr=2.0.3
+  - r-mass=7.3_60
+  - r-matrix=1.6_1.1
   - r-memoise=2.0.1
   - r-mime=0.12
   - r-miniui=0.1.1.1
@@ -129,55 +130,55 @@ dependencies:
   - r-pkgbuild=1.4.2
   - r-pkgconfig=2.0.3
   - r-pkgdown=2.0.7
-  - r-pkgload=1.3.4
+  - r-pkgload=1.3.3
   - r-praise=1.0.0
   - r-prettyunits=1.2.0
-  - r-processx=3.8.3
+  - r-processx=3.8.2
   - r-profvis=0.3.8
   - r-promises=1.2.1
-  - r-ps=1.7.6
+  - r-ps=1.7.5
   - r-purrr=1.0.2
   - r-r6=2.5.1
-  - r-ragg=1.3.0
+  - r-ragg=1.2.6
   - r-rappdirs=0.3.3
   - r-rcmdcheck=1.4.0
-  - r-rcpp=1.0.12
+  - r-rcpp=1.0.11
   - r-rematch2=2.1.2
   - r-remotes=2.4.2.1
-  - r-rlang=1.1.3
+  - r-rlang=1.1.1
   - r-rmarkdown=2.25
-  - r-roxygen2=7.3.1
-  - r-rprojroot=2.0.4
+  - r-roxygen2=7.2.3
+  - r-rprojroot=2.0.3
   - r-rstudioapi=0.15.0
   - r-rversions=2.1.2
-  - r-sass=0.4.8
+  - r-sass=0.4.7
   - r-sessioninfo=1.2.2
-  - r-shiny=1.8.0
+  - r-shiny=1.7.5.1
   - r-sourcetools=0.1.7_1
-  - r-stringi=1.8.3
-  - r-stringr=1.5.1
+  - r-stringi=1.7.12
+  - r-stringr=1.5.0
   - r-sys=3.4.2
   - r-systemfonts=1.0.5
-  - r-testthat=3.2.1
+  - r-testthat=3.2.0
   - r-textshaping=0.3.7
   - r-tibble=3.2.1
-  - r-tinytex=0.49
+  - r-tinytex=0.48
   - r-urlchecker=1.0.1
-  - r-usethis=2.2.3
+  - r-usethis=2.2.2
   - r-utf8=1.2.4
-  - r-vctrs=0.6.5
-  - r-waldo=0.5.2
+  - r-vctrs=0.6.4
+  - r-waldo=0.5.1
   - r-whisker=0.4.1
-  - r-withr=3.0.0
-  - r-xfun=0.42
-  - r-xml2=1.3.6
+  - r-withr=2.5.1
+  - r-xfun=0.40
+  - r-xml2=1.3.5
   - r-xopen=1.0.0
   - r-xtable=1.8_4
-  - r-yaml=2.3.8
-  - r-zip=2.3.1
+  - r-yaml=2.3.7
+  - r-zip=2.3.0
   - readline=8.2
   - sed=4.8
-  - sysroot_linux-64=2.28
+  - sysroot_linux-64=2.12
   - tk=8.6.13
   - tktable=2.10
   - xorg-kbproto=1.0.7
@@ -192,6 +193,6 @@ dependencies:
   - xorg-renderproto=0.11.1
   - xorg-xextproto=7.3.0
   - xorg-xproto=7.0.31
-  - xz=5.2.6
+  - xz=5.4.6
   - zlib=1.2.13
   - zstd=1.5.5

From da3f546b41640659691c0811ad37d3d3116cf91e Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Wed, 3 Jul 2024 08:02:39 -0700
Subject: [PATCH 04/21] add vanilla

---
 .github/workflows/build_check.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build_check.yaml b/.github/workflows/build_check.yaml
index c6f4395..683e225 100644
--- a/.github/workflows/build_check.yaml
+++ b/.github/workflows/build_check.yaml
@@ -35,11 +35,11 @@ jobs:
 
     - name: Build package
       run:
-        Rscript -e "devtools::install()"
+        Rscript --vanilla -e "devtools::install()"
 
     - name: Check package
       run:
-        Rscript -e "devtools::check(vignettes = FALSE, args = '--no-examples')"
+        Rscript --vanilla -e "devtools::check(vignettes = FALSE, args = '--no-examples')"
 
     - name: Upload check results
       if: failure()

From 7c6d656aca1d4af4ca37bf4e2e490342eade98c4 Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Mon, 8 Jul 2024 18:07:57 -0700
Subject: [PATCH 05/21] bug fix: testing working env on actions

---
 envs/r.yaml | 226 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 127 insertions(+), 99 deletions(-)

diff --git a/envs/r.yaml b/envs/r.yaml
index c6af660..9836904 100644
--- a/envs/r.yaml
+++ b/envs/r.yaml
@@ -1,190 +1,218 @@
 name: r
 channels:
-  - r
   - conda-forge
   - defaults
 dependencies:
   - _libgcc_mutex=0.1
   - _openmp_mutex=4.5
-  - _r-mutex=1.0.0
-  - binutils_impl_linux-64=2.38
-  - bwidget=1.9.16
+  - _r-mutex=1.0.1
+  - binutils_impl_linux-64=2.40
+  - binutils_linux-64=2.40
+  - bwidget=1.9.14
   - bzip2=1.0.8
-  - c-ares=1.19.1
-  - ca-certificates=2024.3.11
-  - cairo=1.18.0
-  - curl=8.5.0
-  - expat=2.5.0
+  - c-ares=1.28.1
+  - ca-certificates=2024.7.4
+  - cairo=1.16.0
+  - cmake=3.30.0
+  - curl=8.8.0
+  - expat=2.6.2
   - font-ttf-dejavu-sans-mono=2.37
-  - font-ttf-inconsolata=2.001
-  - font-ttf-source-code-pro=2.030
+  - font-ttf-inconsolata=3.000
+  - font-ttf-source-code-pro=2.038
   - font-ttf-ubuntu=0.83
   - fontconfig=2.14.2
-  - fonts-anaconda=1
   - fonts-conda-ecosystem=1
   - fonts-conda-forge=1
   - freetype=2.12.1
   - fribidi=1.0.10
-  - gcc_impl_linux-64=11.2.0
-  - gettext=0.21.0
-  - gfortran_impl_linux-64=11.2.0
-  - graphite2=1.3.14
-  - gxx_impl_linux-64=11.2.0
-  - harfbuzz=8.3.0
-  - icu=73.2
+  - gcc_impl_linux-64=13.2.0
+  - gcc_linux-64=13.2.0
+  - gettext=0.22.5
+  - gettext-tools=0.22.5
+  - gfortran_impl_linux-64=13.2.0
+  - graphite2=1.3.13
+  - gsl=2.7
+  - gxx_impl_linux-64=13.2.0
+  - harfbuzz=7.3.0
+  - icu=72.1
   - kernel-headers_linux-64=2.6.32
   - keyutils=1.6.1
-  - krb5=1.20.1
-  - ld_impl_linux-64=2.38
+  - krb5=1.21.3
+  - ld_impl_linux-64=2.40
   - lerc=4.0.0
+  - libasprintf=0.22.5
+  - libasprintf-devel=0.22.5
   - libblas=3.9.0
-  - libcurl=8.5.0
-  - libdeflate=1.19
-  - libedit=3.1.20230828
+  - libcblas=3.9.0
+  - libcurl=8.8.0
+  - libdeflate=1.18
+  - libedit=3.1.20191231
   - libev=4.33
-  - libexpat=2.5.0
-  - libffi=3.4.4
-  - libgcc-devel_linux-64=11.2.0
-  - libgcc-ng=13.2.0
-  - libgfortran-ng=11.2.0
-  - libgfortran5=11.2.0
-  - libgit2=1.6.4
-  - libglib=2.78.4
-  - libgomp=13.2.0
+  - libexpat=2.6.2
+  - libffi=3.4.2
+  - libgcc-devel_linux-64=13.2.0
+  - libgcc-ng=14.1.0
+  - libgettextpo=0.22.5
+  - libgettextpo-devel=0.22.5
+  - libgfortran-ng=14.1.0
+  - libgfortran5=14.1.0
+  - libgit2=1.5.1
+  - libglib=2.78.1
+  - libgomp=14.1.0
   - libiconv=1.17
-  - libjpeg-turbo=3.0.0
+  - libjpeg-turbo=2.1.5.1
   - liblapack=3.9.0
-  - libnghttp2=1.57.0
-  - libopenblas=0.3.21
+  - libnghttp2=1.58.0
+  - libnsl=2.0.1
+  - libopenblas=0.3.27
   - libpng=1.6.43
-  - libssh2=1.10.0
-  - libstdcxx-devel_linux-64=11.2.0
-  - libstdcxx-ng=13.2.0
-  - libtiff=4.6.0
+  - libsanitizer=13.2.0
+  - libsqlite=3.46.0
+  - libssh2=1.11.0
+  - libstdcxx-devel_linux-64=13.2.0
+  - libstdcxx-ng=14.1.0
+  - libtiff=4.5.1
   - libuuid=2.38.1
-  - libwebp-base=1.3.2
+  - libuv=1.48.0
+  - libwebp-base=1.4.0
   - libxcb=1.15
-  - libxml2=2.10.4
-  - libzlib=1.2.13
-  - lz4-c=1.9.4
-  - make=4.2.1
-  - ncurses=6.4.20240210
-  - openssl=3.0.13
-  - pandoc=2.12
-  - pango=1.52.0
-  - pcre2=10.42
+  - libxcrypt=4.4.36
+  - libxml2=2.11.5
+  - libzlib=1.3.1
+  - make=4.3
+  - mysql-common=8.3.0
+  - mysql-connector-c=6.1.11
+  - mysql-libs=8.3.0
+  - mysqlclient=2.2.4
+  - ncurses=6.5
+  - openssl=3.3.1
+  - pandoc=2.19.2
+  - pango=1.50.14
+  - pcre2=10.40
+  - pip=24.0
   - pixman=0.43.2
-  - pthread-stubs=0.3
-  - r-askpass=1.2.0
+  - pthread-stubs=0.4
+  - python=3.12.4
+  - python_abi=3.12
+  - r-askpass=1.1
   - r-assertthat=0.2.1
-  - r-base=4.3.3
+  - r-backports=1.4.1
+  - r-base=4.1.3
   - r-base64enc=0.1_3
+  - r-biocmanager=1.30.21
   - r-brew=1.0_8
   - r-brio=1.1.3
-  - r-bslib=0.5.1
+  - r-bslib=0.5.0
   - r-cachem=1.0.8
   - r-callr=3.7.3
   - r-cli=3.6.1
   - r-clipr=0.8.0
   - r-commonmark=1.9.0
-  - r-cpp11=0.4.6
+  - r-cpp11=0.4.7
   - r-crayon=1.5.2
-  - r-credentials=2.0.1
-  - r-curl=5.1.0
+  - r-credentials=1.3.2
+  - r-curl=4.3.3
+  - r-dbi=1.1.3
   - r-desc=1.4.2
   - r-devtools=2.4.5
   - r-diffobj=0.3.5
-  - r-digest=0.6.33
-  - r-downlit=0.4.3
+  - r-digest=0.6.31
+  - r-downlit=0.4.2
   - r-ellipsis=0.3.2
-  - r-evaluate=0.22
-  - r-fansi=1.0.5
+  - r-evaluate=0.21
+  - r-fansi=1.0.4
   - r-fastmap=1.1.1
-  - r-fontawesome=0.5.2
-  - r-fs=1.6.3
-  - r-gert=2.0.0
+  - r-fontawesome=0.5.1
+  - r-fs=1.6.2
+  - r-gert=1.9.2
   - r-gh=1.4.0
+  - r-git2r=0.31.0
   - r-gitcreds=0.1.2
   - r-glue=1.6.2
   - r-highr=0.10
-  - r-htmltools=0.5.6.1
+  - r-htmltools=0.5.5
   - r-htmlwidgets=1.6.2
   - r-httpuv=1.6.11
-  - r-httr=1.4.7
+  - r-httr=1.4.6
   - r-httr2=0.2.3
   - r-ini=0.3.1
   - r-jquerylib=0.1.4
-  - r-jsonlite=1.8.7
-  - r-knitr=1.44
+  - r-jsonlite=1.8.5
+  - r-knitr=1.43
   - r-later=1.3.1
-  - r-lattice=0.22_5
+  - r-lattice=0.21_8
   - r-lifecycle=1.0.3
   - r-magrittr=2.0.3
-  - r-mass=7.3_60
-  - r-matrix=1.6_1.1
+  - r-mass=7.3_58.3
+  - r-matrix=1.5_4.1
   - r-memoise=2.0.1
   - r-mime=0.12
   - r-miniui=0.1.1.1
-  - r-openssl=2.1.1
+  - r-openssl=2.0.6
   - r-pillar=1.9.0
-  - r-pkgbuild=1.4.2
+  - r-pkgbuild=1.4.0
   - r-pkgconfig=2.0.3
   - r-pkgdown=2.0.7
-  - r-pkgload=1.3.3
+  - r-pkgload=1.3.2
   - r-praise=1.0.0
-  - r-prettyunits=1.2.0
-  - r-processx=3.8.2
+  - r-prettyunits=1.1.1
+  - r-processx=3.8.1
   - r-profvis=0.3.8
-  - r-promises=1.2.1
+  - r-promises=1.2.0.1
   - r-ps=1.7.5
-  - r-purrr=1.0.2
+  - r-purrr=1.0.1
   - r-r6=2.5.1
-  - r-ragg=1.2.6
+  - r-ragg=1.2.5
   - r-rappdirs=0.3.3
   - r-rcmdcheck=1.4.0
-  - r-rcpp=1.0.11
+  - r-rcpp=1.0.10
   - r-rematch2=2.1.2
-  - r-remotes=2.4.2.1
+  - r-remotes=2.4.2
   - r-rlang=1.1.1
-  - r-rmarkdown=2.25
+  - r-rmarkdown=2.22
+  - r-rmysql=0.10.25
   - r-roxygen2=7.2.3
   - r-rprojroot=2.0.3
-  - r-rstudioapi=0.15.0
+  - r-rstudioapi=0.14
   - r-rversions=2.1.2
-  - r-sass=0.4.7
+  - r-sass=0.4.6
   - r-sessioninfo=1.2.2
-  - r-shiny=1.7.5.1
+  - r-shiny=1.7.4
   - r-sourcetools=0.1.7_1
   - r-stringi=1.7.12
   - r-stringr=1.5.0
   - r-sys=3.4.2
-  - r-systemfonts=1.0.5
-  - r-testthat=3.2.0
-  - r-textshaping=0.3.7
+  - r-systemfonts=1.0.4
+  - r-testthat=3.1.8
+  - r-textshaping=0.3.6
   - r-tibble=3.2.1
-  - r-tinytex=0.48
+  - r-tinytex=0.45
   - r-urlchecker=1.0.1
-  - r-usethis=2.2.2
-  - r-utf8=1.2.4
-  - r-vctrs=0.6.4
+  - r-usethis=2.2.0
+  - r-utf8=1.2.3
+  - r-vctrs=0.6.2
   - r-waldo=0.5.1
   - r-whisker=0.4.1
-  - r-withr=2.5.1
-  - r-xfun=0.40
-  - r-xml2=1.3.5
+  - r-withr=2.5.0
+  - r-xfun=0.39
+  - r-xml2=1.3.4
   - r-xopen=1.0.0
   - r-xtable=1.8_4
   - r-yaml=2.3.7
   - r-zip=2.3.0
   - readline=8.2
+  - rhash=1.4.4
   - sed=4.8
+  - setuptools=70.1.1
   - sysroot_linux-64=2.12
   - tk=8.6.13
   - tktable=2.10
+  - tzdata=2024a
+  - wheel=0.43.0
   - xorg-kbproto=1.0.7
   - xorg-libice=1.1.1
   - xorg-libsm=1.2.4
-  - xorg-libx11=1.8.7
+  - xorg-libx11=1.8.9
   - xorg-libxau=1.0.11
   - xorg-libxdmcp=1.1.3
   - xorg-libxext=1.3.4
@@ -193,6 +221,6 @@ dependencies:
   - xorg-renderproto=0.11.1
   - xorg-xextproto=7.3.0
   - xorg-xproto=7.0.31
-  - xz=5.4.6
-  - zlib=1.2.13
-  - zstd=1.5.5
+  - xz=5.2.6
+  - zlib=1.3.1
+  - zstd=1.5.6

From 1740b09886e3c98449246c8141592182a1bf3e55 Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Mon, 8 Jul 2024 18:15:54 -0700
Subject: [PATCH 06/21] bug fix: downgrade python v

---
 envs/r.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/envs/r.yaml b/envs/r.yaml
index 9836904..195aee7 100644
--- a/envs/r.yaml
+++ b/envs/r.yaml
@@ -92,8 +92,8 @@ dependencies:
   - pip=24.0
   - pixman=0.43.2
   - pthread-stubs=0.4
-  - python=3.12.4
-  - python_abi=3.12
+  - python=3.10.14
+  - python_abi=3.10
   - r-askpass=1.1
   - r-assertthat=0.2.1
   - r-backports=1.4.1

From 49c73ed1b712b7fb784ba486d359b80e476cb142 Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Mon, 8 Jul 2024 18:18:53 -0700
Subject: [PATCH 07/21] bug fix: more python v downgrading

---
 envs/r.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/envs/r.yaml b/envs/r.yaml
index 195aee7..74e56e8 100644
--- a/envs/r.yaml
+++ b/envs/r.yaml
@@ -1,4 +1,4 @@
-name: r
+name: deps
 channels:
   - conda-forge
   - defaults
@@ -92,8 +92,8 @@ dependencies:
   - pip=24.0
   - pixman=0.43.2
   - pthread-stubs=0.4
-  - python=3.10.14
-  - python_abi=3.10
+  - python=3.9.10
+  - python_abi=3.9
   - r-askpass=1.1
   - r-assertthat=0.2.1
   - r-backports=1.4.1

From faabd32048bd60ee582dcbe2da80deb61efe3cd1 Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Mon, 8 Jul 2024 18:20:26 -0700
Subject: [PATCH 08/21] bug fix: specify python v differently

---
 envs/r.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/envs/r.yaml b/envs/r.yaml
index 74e56e8..4661ba2 100644
--- a/envs/r.yaml
+++ b/envs/r.yaml
@@ -92,7 +92,7 @@ dependencies:
   - pip=24.0
   - pixman=0.43.2
   - pthread-stubs=0.4
-  - python=3.9.10
+  - python=3.9
   - python_abi=3.9
   - r-askpass=1.1
   - r-assertthat=0.2.1

From 07edf0199bf337ebe7e2ae54b81b48ca7c31d54c Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Mon, 8 Jul 2024 18:28:20 -0700
Subject: [PATCH 09/21] bug fix: drop python

---
 envs/r.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/envs/r.yaml b/envs/r.yaml
index 4661ba2..9eb1874 100644
--- a/envs/r.yaml
+++ b/envs/r.yaml
@@ -92,8 +92,6 @@ dependencies:
   - pip=24.0
   - pixman=0.43.2
   - pthread-stubs=0.4
-  - python=3.9
-  - python_abi=3.9
   - r-askpass=1.1
   - r-assertthat=0.2.1
   - r-backports=1.4.1

From 845052e9c9c65667201ae139185c335dbae4ca21 Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Tue, 9 Jul 2024 10:30:48 -0700
Subject: [PATCH 10/21] cleanup: address relocated function reference

---
 R/calculate_tmb.R    | 4 ++--
 R/fuzzy_match_mafs.R | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/calculate_tmb.R b/R/calculate_tmb.R
index ebda2ed..3f6e10f 100644
--- a/R/calculate_tmb.R
+++ b/R/calculate_tmb.R
@@ -37,7 +37,7 @@
 #'     subset_to_nonSyn = FALSE
 #' )
 #'
-#' @import dplyr
+#' @import dplyr GAMBLR.data
 #' @export
 #'
 calculate_tmb <- function(
@@ -105,7 +105,7 @@ calculate_tmb <- function(
         # Expect bed format but be flexible about column names
         columns <- colnames(regions_bed)[1:3]
 
-        overlap <- GAMBLR.helpers::cool_overlaps(
+        overlap <- GAMBLR.data::cool_overlaps(
             data1 = maf_data,
             data2 = regions_bed,
             columns2 = columns
diff --git a/R/fuzzy_match_mafs.R b/R/fuzzy_match_mafs.R
index f4484a5..0c64e23 100644
--- a/R/fuzzy_match_mafs.R
+++ b/R/fuzzy_match_mafs.R
@@ -25,7 +25,7 @@
 #'
 #' @return data frame
 #'
-#' @import dplyr tidyr tibble
+#' @import dplyr tidyr tibble GAMBLR.data
 #' @export
 #'
 #' @examples
@@ -107,7 +107,7 @@ fuzzy_match_mafs <- function(
         "Start_Position",
         "End_Position"
     )
-    matched <- cool_overlaps(
+    matched <- GAMBLR.data::cool_overlaps(
         data1 = maf1,
         data2 = maf2,
         columns1 = columns_to_overlap,

From d112d869f1bd18a88c290e487f651dbeb5695e77 Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Fri, 22 Nov 2024 10:22:32 -0800
Subject: [PATCH 11/21] cleanup: drop stringr dependency

---
 DESCRIPTION                | 1 -
 NAMESPACE                  | 5 -----
 R/GAMBLR.helpers-package.R | 4 ----
 R/get_gambl_colours.R      | 4 ++--
 R/get_template_wildcards.R | 2 +-
 R/grob_wildcards.R         | 4 ++--
 6 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index c69cdad..5af1546 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -17,7 +17,6 @@ Imports:
     ggthemes,
     philentropy,
     readr,
-    stringr,
     tibble,
     tidyr,
     workflowr
diff --git a/NAMESPACE b/NAMESPACE
index d9aa630..9433009 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -35,7 +35,6 @@ import(GAMBLR.data)
 import(dplyr)
 import(ggplot2)
 import(readr)
-import(stringr)
 import(tibble)
 import(tidyr)
 import(workflowr)
@@ -45,10 +44,6 @@ importFrom(philentropy,KL)
 importFrom(stats,end)
 importFrom(stats,quantile)
 importFrom(stats,start)
-importFrom(stringr,str_c)
-importFrom(stringr,str_extract)
-importFrom(stringr,str_remove)
-importFrom(stringr,str_remove_all)
 importFrom(tidyr,unnest_auto)
 importFrom(utils,head)
 importFrom(utils,read.csv)
diff --git a/R/GAMBLR.helpers-package.R b/R/GAMBLR.helpers-package.R
index 0af4d14..2c32605 100644
--- a/R/GAMBLR.helpers-package.R
+++ b/R/GAMBLR.helpers-package.R
@@ -7,10 +7,6 @@
 #' @importFrom stats end
 #' @importFrom stats quantile
 #' @importFrom stats start
-#' @importFrom stringr str_c
-#' @importFrom stringr str_extract
-#' @importFrom stringr str_remove
-#' @importFrom stringr str_remove_all
 #' @importFrom tidyr unnest_auto
 #' @importFrom utils head
 #' @importFrom utils read.csv
diff --git a/R/get_gambl_colours.R b/R/get_gambl_colours.R
index dab03b6..c7397ee 100644
--- a/R/get_gambl_colours.R
+++ b/R/get_gambl_colours.R
@@ -18,7 +18,7 @@
 #'
 #' @return A named vector of colour codes for lymphgen classes and pathology.
 #'
-#' @import dplyr stringr tidyr
+#' @import dplyr tidyr
 #' @export
 #'
 #' @examples
@@ -318,7 +318,7 @@ get_gambl_colours = function(classification = "all",
     everything = c(everything, all_colours[[this_group]])
   }
   #return matching value from lowercase version of the argument if it exists
-  lc_class = stringr::str_to_lower(classification)
+  lc_class = tolower(classification)
   if(return_available){
     return(names(all_colours))
   }
diff --git a/R/get_template_wildcards.R b/R/get_template_wildcards.R
index f50d6fd..cc3b46e 100644
--- a/R/get_template_wildcards.R
+++ b/R/get_template_wildcards.R
@@ -16,6 +16,6 @@ get_template_wildcards = function(parent_key,
   }else{
     wildcard_string = config::get(paste0(parent_key,"_wildcards"))[template_key]
   }
-  wildcards = stringr::str_split(wildcard_string,",")
+  wildcards = strsplit(wildcard_string,",")
   return(unlist(wildcards))
 }
diff --git a/R/grob_wildcards.R b/R/grob_wildcards.R
index ab77166..ffa12e1 100644
--- a/R/grob_wildcards.R
+++ b/R/grob_wildcards.R
@@ -8,7 +8,7 @@
 #'
 #' @export
 grob_wildcards = function(wildcarded_string){
-  wildcards = unlist(stringr::str_extract_all(wildcarded_string,"\\{[^\\{]+\\}"))
-  wildcards = stringr::str_remove_all(wildcards,"\\{") %>%  stringr::str_remove_all(.,"\\}")
+  wildcards = unlist(regmatches(wildcarded_string, gregexpr("\\{[^\\{]+\\}", wildcarded_string)))
+  wildcards = gsub("\\{", "", wildcards) %>% gsub("\\}", "", .)
   return(wildcards)
 }

From 2f3ecbc6bc7f89dea46a252e27dc3298a32d50ed Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Fri, 22 Nov 2024 10:57:12 -0800
Subject: [PATCH 12/21] cleanup: drop philentropy dependency

---
 NAMESPACE                           |  2 +-
 R/GAMBLR.helpers-package.R          |  1 -
 R/compare_coding_mutation_pattern.R | 12 +++++++---
 R/kl_divergence.R                   | 36 ++++++++++++++++++++++++++++
 man/kl_divergence.Rd                | 37 +++++++++++++++++++++++++++++
 5 files changed, 83 insertions(+), 5 deletions(-)
 create mode 100644 R/kl_divergence.R
 create mode 100644 man/kl_divergence.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 9433009..2119e85 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -19,6 +19,7 @@ export(get_template_wildcards)
 export(get_unmatched_normals)
 export(grob_wildcards)
 export(handle_metadata)
+export(kl_divergence)
 export(maf_header)
 export(normalize_expression_data)
 export(rainfall_conv)
@@ -40,7 +41,6 @@ import(tidyr)
 import(workflowr)
 importFrom(dplyr,left_join)
 importFrom(ggthemes,theme_foundation)
-importFrom(philentropy,KL)
 importFrom(stats,end)
 importFrom(stats,quantile)
 importFrom(stats,start)
diff --git a/R/GAMBLR.helpers-package.R b/R/GAMBLR.helpers-package.R
index 2c32605..1dd1b12 100644
--- a/R/GAMBLR.helpers-package.R
+++ b/R/GAMBLR.helpers-package.R
@@ -3,7 +3,6 @@
 
 ## usethis namespace: start
 #' @importFrom dplyr left_join
-#' @importFrom philentropy KL
 #' @importFrom stats end
 #' @importFrom stats quantile
 #' @importFrom stats start
diff --git a/R/compare_coding_mutation_pattern.R b/R/compare_coding_mutation_pattern.R
index 281f090..20a7772 100644
--- a/R/compare_coding_mutation_pattern.R
+++ b/R/compare_coding_mutation_pattern.R
@@ -17,9 +17,9 @@ compare_coding_mutation_pattern = function(maf_df1,maf_df2,gene){
     stop("Must provide the Hugo_Symbol of a single gene that is present in both maf files")
   }
   missense_positions1 = dplyr::filter(maf_df1,Hugo_Symbol==gene,!Variant_Classification %in% c("Silent","Splice_Site","Splice_Region"),Variant_Type=="SNP") %>%
-    pull(HGVSp_Short) %>% str_remove_all("p.\\w") %>% str_extract("\\d+") %>% as.numeric()
+    pull(HGVSp_Short) %>% gsub("p\\.\\w", "", .) %>% regmatches(., regexpr("\\d+", .)) %>% as.numeric()
   missense_positions2 = dplyr::filter(maf_df2,Hugo_Symbol==gene,!Variant_Classification %in% c("Silent","Splice_Site","Splice_Region"),Variant_Type=="SNP") %>%
-    pull(HGVSp_Short) %>% str_remove_all("p.\\w") %>% str_extract("\\d+") %>% as.numeric()
+    pull(HGVSp_Short) %>% gsub("p\\.\\w", "", .) %>% regmatches(., regexpr("\\d+", .)) %>% as.numeric()
  if(length(missense_positions1)==0 | length(missense_positions2)==0 ){
    message(paste("no mutations for",gene,"in one or both data sets"))
    return(list(kl=15))
@@ -35,6 +35,12 @@ compare_coding_mutation_pattern = function(maf_df1,maf_df2,gene){
   all_counts = dplyr::select(full_df,-position) %>% t()
   all_counts[1,]=all_counts[1,]/sum(all_counts[1,])
   all_counts[2,]=all_counts[2,]/sum(all_counts[2,])
-  kl_out = KL(all_counts)
+
+  # Normalize the rows to turn counts into probabilities
+  P <- all_counts[1, ] / sum(all_counts[1, ])
+  Q <- all_counts[2, ] / sum(all_counts[2, ])
+
+  kl_out <- kl_divergence(P, Q)
+
   return(list(df=full_df,kl=unname(kl_out)))
 }
diff --git a/R/kl_divergence.R b/R/kl_divergence.R
new file mode 100644
index 0000000..2fb5291
--- /dev/null
+++ b/R/kl_divergence.R
@@ -0,0 +1,36 @@
+#' Calculate Kullback-Leibler Divergence
+#'
+#' This function computes the Kullback-Leibler (KL) divergence between two
+#'      probability distributions, with an optional small constant (epsilon)
+#'      added to avoid zero probabilities, which would otherwise cause division
+#'      by zero or undefined logarithms.
+#'
+#' @param P A numeric vector representing the first probability distribution.
+#'      The sum of "P" should be 1, but the function will normalize it if
+#'      necessary.
+#' @param Q A numeric vector representing the second probability distribution.
+#'      The sum of "Q" should be 1, but the function will normalize it if
+#'      necessary.
+#' @param epsilon A small positive number (default = 1e-7) to be added to each
+#'      probability in P and Q to avoid zero probabilities. This helps to
+#'      prevent division by zero or log(0).
+#'
+#' @return float
+#'
+#' @examples
+#' P <- c(0.1, 0.4, 0.3, 0.2)
+#' Q <- c(0.2, 0.3, 0.4, 0.1)
+#'
+#' kl_divergence(P, Q)
+#'
+#' @export
+kl_divergence <- function(P, Q, epsilon = 1e-7) {
+    P <- P + epsilon
+    Q <- Q + epsilon
+
+    P <- P / sum(P)
+    Q <- Q / sum(Q)
+
+    # KL divergence formula: sum(P * log(P / Q))
+    return(sum(P * log(P / Q), na.rm = TRUE))
+}
diff --git a/man/kl_divergence.Rd b/man/kl_divergence.Rd
new file mode 100644
index 0000000..5652dbe
--- /dev/null
+++ b/man/kl_divergence.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/kl_divergence.R
+\name{kl_divergence}
+\alias{kl_divergence}
+\title{Calculate Kullback-Leibler Divergence}
+\usage{
+kl_divergence(P, Q, epsilon = 1e-07)
+}
+\arguments{
+\item{P}{A numeric vector representing the first probability distribution.
+The sum of "P" should be 1, but the function will normalize it if
+necessary.}
+
+\item{Q}{A numeric vector representing the second probability distribution.
+The sum of "Q" should be 1, but the function will normalize it if
+necessary.}
+
+\item{epsilon}{A small positive number (default = 1e-7) to be added to each
+probability in P and Q to avoid zero probabilities. This helps to
+prevent division by zero or log(0).}
+}
+\value{
+float
+}
+\description{
+This function computes the Kullback-Leibler (KL) divergence between two
+probability distributions, with an optional small constant (epsilon)
+added to avoid zero probabilities, which would otherwise cause division
+by zero or undefined logarithms.
+}
+\examples{
+P <- c(0.1, 0.4, 0.3, 0.2)
+Q <- c(0.2, 0.3, 0.4, 0.1)
+
+kl_divergence(P, Q)
+
+}

From 0050e497422ea4574c9b69a96265e90fa98e4440 Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Fri, 22 Nov 2024 11:15:14 -0800
Subject: [PATCH 13/21] clenaup: switch to consistent import definition

---
 DESCRIPTION                         |  1 -
 NAMESPACE                           | 11 -----------
 R/GAMBLR.helpers-package.R          | 17 -----------------
 R/compare_coding_mutation_pattern.R |  1 +
 R/gene_mutation_tally.R             |  1 +
 R/sanity_check_metadata.R           |  2 +-
 man/GAMBLR.helpers-package.Rd       | 15 ---------------
 7 files changed, 3 insertions(+), 45 deletions(-)
 delete mode 100644 R/GAMBLR.helpers-package.R
 delete mode 100644 man/GAMBLR.helpers-package.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 5af1546..e33c01b 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -15,7 +15,6 @@ Imports:
     GAMBLR.data,
     ggplot2,
     ggthemes,
-    philentropy,
     readr,
     tibble,
     tidyr,
diff --git a/NAMESPACE b/NAMESPACE
index 2119e85..ad6b74c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -39,15 +39,4 @@ import(readr)
 import(tibble)
 import(tidyr)
 import(workflowr)
-importFrom(dplyr,left_join)
 importFrom(ggthemes,theme_foundation)
-importFrom(stats,end)
-importFrom(stats,quantile)
-importFrom(stats,start)
-importFrom(tidyr,unnest_auto)
-importFrom(utils,head)
-importFrom(utils,read.csv)
-importFrom(utils,read.socket)
-importFrom(utils,tail)
-importFrom(utils,write.socket)
-importFrom(utils,write.table)
diff --git a/R/GAMBLR.helpers-package.R b/R/GAMBLR.helpers-package.R
deleted file mode 100644
index 1dd1b12..0000000
--- a/R/GAMBLR.helpers-package.R
+++ /dev/null
@@ -1,17 +0,0 @@
-#' @keywords internal
-"_PACKAGE"
-
-## usethis namespace: start
-#' @importFrom dplyr left_join
-#' @importFrom stats end
-#' @importFrom stats quantile
-#' @importFrom stats start
-#' @importFrom tidyr unnest_auto
-#' @importFrom utils head
-#' @importFrom utils read.csv
-#' @importFrom utils read.socket
-#' @importFrom utils tail
-#' @importFrom utils write.socket
-#' @importFrom utils write.table
-## usethis namespace: end
-NULL
diff --git a/R/compare_coding_mutation_pattern.R b/R/compare_coding_mutation_pattern.R
index 20a7772..2ea0d72 100644
--- a/R/compare_coding_mutation_pattern.R
+++ b/R/compare_coding_mutation_pattern.R
@@ -8,6 +8,7 @@
 #'
 #' @return list
 #'
+#' @import dplyr
 #' @export
 compare_coding_mutation_pattern = function(maf_df1,maf_df2,gene){
   if(missing(maf_df1) | missing(maf_df2)){
diff --git a/R/gene_mutation_tally.R b/R/gene_mutation_tally.R
index 10f0964..32a3aec 100644
--- a/R/gene_mutation_tally.R
+++ b/R/gene_mutation_tally.R
@@ -17,6 +17,7 @@
 #'
 #' @return data frame
 #'
+#' @import dplyr
 #' @export
 gene_mutation_tally = function(maf_df,these_samples_metadata,these_genes,grouping_variable="cohort"){
   meta = dplyr::select(these_samples_metadata,sample_id,{{grouping_variable}})
diff --git a/R/sanity_check_metadata.R b/R/sanity_check_metadata.R
index 0faa1f4..8a1b461 100644
--- a/R/sanity_check_metadata.R
+++ b/R/sanity_check_metadata.R
@@ -6,7 +6,7 @@
 #'
 #' @return A table.
 #'
-#' @import tibble readr dplyr
+#' @import tibble readr dplyr tidyr
 #'
 #'
 #' @examples
diff --git a/man/GAMBLR.helpers-package.Rd b/man/GAMBLR.helpers-package.Rd
deleted file mode 100644
index 90c6164..0000000
--- a/man/GAMBLR.helpers-package.Rd
+++ /dev/null
@@ -1,15 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/GAMBLR.helpers-package.R
-\docType{package}
-\name{GAMBLR.helpers-package}
-\alias{GAMBLR.helpers}
-\alias{GAMBLR.helpers-package}
-\title{GAMBLR.helpers: Provide helper functions for GAMBLR-based packages}
-\description{
-This package is part of the Genomic Analysis of Mature B-cell Lymphomas (GAMBL) project developed by the Morin Lab.
-}
-\author{
-\strong{Maintainer}: Vladimir Souza \email{vsouza@bcgsc.ca}
-
-}
-\keyword{internal}

From 2dda8459598618453105a854614187e7874cbb3d Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Fri, 22 Nov 2024 12:31:22 -0800
Subject: [PATCH 14/21] cleanup: remove workflowr

---
 DESCRIPTION                      |  3 +--
 NAMESPACE                        |  2 --
 R/web_initialize_gambl_site.R    | 22 ----------------------
 man/web_initialize_gambl_site.Rd | 25 -------------------------
 4 files changed, 1 insertion(+), 51 deletions(-)
 delete mode 100644 R/web_initialize_gambl_site.R
 delete mode 100644 man/web_initialize_gambl_site.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index e33c01b..ac813c6 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -17,8 +17,7 @@ Imports:
     ggthemes,
     readr,
     tibble,
-    tidyr,
-    workflowr
+    tidyr
 Remotes:
     morinlab/GAMBLR.data
 Encoding: UTF-8
diff --git a/NAMESPACE b/NAMESPACE
index ad6b74c..6b5fe8a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -31,12 +31,10 @@ export(subset_cnstates)
 export(theme_Morons)
 export(trim_scale_expression)
 export(vc_nonSynonymous)
-export(web_initialize_gambl_site)
 import(GAMBLR.data)
 import(dplyr)
 import(ggplot2)
 import(readr)
 import(tibble)
 import(tidyr)
-import(workflowr)
 importFrom(ggthemes,theme_foundation)
diff --git a/R/web_initialize_gambl_site.R b/R/web_initialize_gambl_site.R
deleted file mode 100644
index a781ee4..0000000
--- a/R/web_initialize_gambl_site.R
+++ /dev/null
@@ -1,22 +0,0 @@
-#' @title Web Initialize GAMBL Site.
-#'
-#' @description Set up a fresh instance of a website to host on gitlab.
-#'
-#' @param site_base_name Base name for site.
-#' @param base_directory Path to base directory.
-#' @param my_name My name.
-#' @param my_gitlab_email The email used for gitlab.
-#'
-#' @import workflowr
-#'
-#' @export
-web_initialize_gambl_site = function(site_base_name,
-                                     base_directory = "/home/rmorin/",
-                                     my_name = "Ryan Morin",
-                                     my_gitlab_email = "rdmorin@sfu.ca"){
-
-  wflow_git_config(user.name = my_name, user.email = my_gitlab_email)
-  setwd(base_directory)
-  wflow_start(site_base_name)
-  wflow_build()
-}
diff --git a/man/web_initialize_gambl_site.Rd b/man/web_initialize_gambl_site.Rd
deleted file mode 100644
index 66a8b9b..0000000
--- a/man/web_initialize_gambl_site.Rd
+++ /dev/null
@@ -1,25 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/web_initialize_gambl_site.R
-\name{web_initialize_gambl_site}
-\alias{web_initialize_gambl_site}
-\title{Web Initialize GAMBL Site.}
-\usage{
-web_initialize_gambl_site(
-  site_base_name,
-  base_directory = "/home/rmorin/",
-  my_name = "Ryan Morin",
-  my_gitlab_email = "rdmorin@sfu.ca"
-)
-}
-\arguments{
-\item{site_base_name}{Base name for site.}
-
-\item{base_directory}{Path to base directory.}
-
-\item{my_name}{My name.}
-
-\item{my_gitlab_email}{The email used for gitlab.}
-}
-\description{
-Set up a fresh instance of a website to host on gitlab.
-}

From bff1bbabceb36e3a4ab81a8445f81413dbe655e4 Mon Sep 17 00:00:00 2001
From: lkhilton <laura.k.hilton@gmail.com>
Date: Tue, 3 Dec 2024 17:55:44 -0800
Subject: [PATCH 15/21] Assume input matrix is already subset to the correct
 regions

---
 R/create_onco_matrix.R | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/R/create_onco_matrix.R b/R/create_onco_matrix.R
index 6d6b46e..4bedce8 100644
--- a/R/create_onco_matrix.R
+++ b/R/create_onco_matrix.R
@@ -38,10 +38,6 @@ create_onco_matrix = function(
             )
     }
 
-    onco_matrix_coding <- coding_class[
-        !coding_class %in% c("Silent", "Splice_Region", "Targeted_Region")
-    ]
-
 
     onco_matrix <- maf_df %>%
         dplyr::distinct(
@@ -52,10 +48,7 @@ create_onco_matrix = function(
         dplyr::select(
             Tumor_Sample_Barcode, Hugo_Symbol, Variant_Classification
         ) %>%
-        dplyr::filter(
-            Variant_Classification %in% onco_matrix_coding
-        ) %>%
-        dplyr::group_by(
+                dplyr::group_by(
             Hugo_Symbol, Tumor_Sample_Barcode
         ) %>%
         dplyr::mutate(

From 69d95f0666288a626e266c30577e029f1ab9e718 Mon Sep 17 00:00:00 2001
From: lkhilton <laura.k.hilton@gmail.com>
Date: Tue, 3 Dec 2024 17:59:41 -0800
Subject: [PATCH 16/21] Fix redundant colours

---
 R/get_gambl_colours.R | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/R/get_gambl_colours.R b/R/get_gambl_colours.R
index c7397ee..fa47b18 100644
--- a/R/get_gambl_colours.R
+++ b/R/get_gambl_colours.R
@@ -35,20 +35,22 @@ get_gambl_colours = function(classification = "all",
   all_colours = list()
   everything = c()
 
-  blood_cols = c(   Red   ="#c41230",
-                    Blue ="#115284",
-                    Green =  "#39b54b",
-                    Purple  =  "#5c266c",
-                    Orange  = "#fe9003",
-                    Green ="#046852",
+  blood_cols <- c(
+    Red = "#c41230",
+    Blue = "#115284",
+    "Light Green" = "#39b54b",
+    Purple = "#5c266c",
+    Orange = "#fe9003",
+    Green = "#046852",
                     Lavendar = "#8781bd",
-                    "Steel Blue" =  "#455564",
+    "Steel Blue" = "#455564",
                     "Light Blue" = "#2cace3",
                     Magenta = "#e90c8b",
                     LimeGreen = "#a4bb87",
                     Brown = "#5f3a17",
                     Gray = "#bdbdc1",
-                    Yellow = "#f9bd1f" )
+    Yellow = "#f9bd1f"
+  )
 
   all_colours[["seq_type"]] = c("mrna" = "#E41A1C",
                                 "genome" = "#377EB8",

From f973d4ec2fe47138db4809d33c6a300da7f23f35 Mon Sep 17 00:00:00 2001
From: lkhilton <laura.k.hilton@gmail.com>
Date: Tue, 3 Dec 2024 18:00:41 -0800
Subject: [PATCH 17/21] Create colour for 5'UTR

---
 R/get_gambl_colours.R | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/R/get_gambl_colours.R b/R/get_gambl_colours.R
index fa47b18..d0aa628 100644
--- a/R/get_gambl_colours.R
+++ b/R/get_gambl_colours.R
@@ -122,11 +122,11 @@ get_gambl_colours = function(classification = "all",
                                 "COMPOSITE" = "#ACADAF")
 
 
-  all_colours[["mutation"]]=
+  all_colours[["mutation"]] <-
     c(
-      "Nonsense_Mutation"="#D8A7CA",
-      "Missense_Mutation"=unname(blood_cols["Green"]),
-      "Multi_Hit"=unname(blood_cols["Steel Blue"]),
+      "Nonsense_Mutation" = unname(blood_cols["Red"]),
+      "Missense_Mutation" = unname(blood_cols["Light Green"]),
+      "Multi_Hit" = unname(blood_cols["Steel Blue"]),
       "Frame_Shift_Ins" = unname(blood_cols["Magenta"]),
       "Frame_Shift_Del" = unname(blood_cols["Magenta"]),
       "In_Frame_Ins" = unname(blood_cols["Brown"]),
@@ -136,7 +136,9 @@ get_gambl_colours = function(classification = "all",
       "Splice_Site" = unname(blood_cols["Orange"]),
       "Splice_Region" = unname(blood_cols["Orange"]),
       "3'UTR" = unname(blood_cols["Yellow"]),
-      "Silent" = "#A020F0")
+      "5'UTR" = unname(blood_cols["LimeGreen"]),
+      "Silent" = "#D8A7CA"
+    )
 
   all_colours[["rainfall"]] =
     c(

From c71efb2bd3e85a6118a2e3097e8ef33cb2332700 Mon Sep 17 00:00:00 2001
From: lkhilton <laura.k.hilton@gmail.com>
Date: Tue, 3 Dec 2024 18:01:40 -0800
Subject: [PATCH 18/21] Fix redundant colour

---
 R/get_gambl_colours.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/get_gambl_colours.R b/R/get_gambl_colours.R
index d0aa628..b72add0 100644
--- a/R/get_gambl_colours.R
+++ b/R/get_gambl_colours.R
@@ -183,10 +183,10 @@ get_gambl_colours = function(classification = "all",
     "1"="#92C5DE",
     "0"="#4393C3"
   )
-  all_colours[["blood"]] = c(
-    "Red" = "#c41230", "Blue"="#115284","Green" = "#39b54b",
-    "Purple" = "#5c266c", "Orange"="#fe9003","Green" = "#046852",
-    "Lavendar" = "#8781bd", "Steel Blue"= "#455564",
+  all_colours[["blood"]] <- c(
+    "Red" = "#c41230", "Blue" = "#115284", "Light Green" = "#39b54b",
+    "Purple" = "#5c266c", "Orange" = "#fe9003", "Green" = "#046852",
+    "Lavendar" = "#8781bd", "Steel Blue" = "#455564",
     "Light Blue" = "#2cace3", "Magenta" = "#e90c8b", "Mustard" = "#b76d29",
     "LimeGreen" = "#a4bb87", "Brown" = "#5f3a17", "Gray" = "#bdbdc1",
     "Yellow" = "#f9bd1f"

From b8bf2923ac223a1a49a22d9dc922d211259d4ff0 Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Wed, 4 Dec 2024 10:19:48 -0800
Subject: [PATCH 19/21] new feature: make new behaviour conditional

---
 R/create_onco_matrix.R    | 15 +++++++++++++--
 man/create_onco_matrix.Rd |  4 +++-
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/R/create_onco_matrix.R b/R/create_onco_matrix.R
index 4bedce8..bb667fa 100644
--- a/R/create_onco_matrix.R
+++ b/R/create_onco_matrix.R
@@ -9,6 +9,7 @@
 #' @param maf_df Data frame with maf data. Required parameter. The minimal required columns are Tumor_Sample_Barcode, Hugo_Symbol, Variant_Classification, Start_Position, and End_Position.
 #' @param genes List of genes to return in the resulting matrix. When not provided, matrix is generated for each gene present in the input maf data.
 #' @param add_missing When list of genes is provided and some of the specified genes are not mutated in the provided maf data, they will still be added to the matrix with 0% mutation frequency. Default TRUE.
+#' @param subset_to_coding Will conditionally subset to only coding variants. Default is FALSE (no subsetting).
 #'
 #' @return matrix
 #'
@@ -23,7 +24,8 @@
 create_onco_matrix = function(
     maf_df,
     genes,
-    add_missing = TRUE
+    add_missing = TRUE,
+    subset_to_coding = FALSE
 ){
     if(missing(maf_df)){
         stop(
@@ -38,6 +40,15 @@ create_onco_matrix = function(
             )
     }
 
+    if(subset_to_coding){
+        onco_matrix_coding <- coding_class[
+            !coding_class %in% c("Silent", "Splice_Region", "Targeted_Region")
+        ]
+        maf_df <- maf_df %>%
+            dplyr::filter(
+                Variant_Classification %in% onco_matrix_coding
+            )
+    }
 
     onco_matrix <- maf_df %>%
         dplyr::distinct(
@@ -48,7 +59,7 @@ create_onco_matrix = function(
         dplyr::select(
             Tumor_Sample_Barcode, Hugo_Symbol, Variant_Classification
         ) %>%
-                dplyr::group_by(
+        dplyr::group_by(
             Hugo_Symbol, Tumor_Sample_Barcode
         ) %>%
         dplyr::mutate(
diff --git a/man/create_onco_matrix.Rd b/man/create_onco_matrix.Rd
index e062087..5d5f812 100644
--- a/man/create_onco_matrix.Rd
+++ b/man/create_onco_matrix.Rd
@@ -4,7 +4,7 @@
 \alias{create_onco_matrix}
 \title{Create onco matrix from maf data.}
 \usage{
-create_onco_matrix(maf_df, genes, add_missing = TRUE)
+create_onco_matrix(maf_df, genes, add_missing = TRUE, subset_to_coding = FALSE)
 }
 \arguments{
 \item{maf_df}{Data frame with maf data. Required parameter. The minimal required columns are Tumor_Sample_Barcode, Hugo_Symbol, Variant_Classification, Start_Position, and End_Position.}
@@ -12,6 +12,8 @@ create_onco_matrix(maf_df, genes, add_missing = TRUE)
 \item{genes}{List of genes to return in the resulting matrix. When not provided, matrix is generated for each gene present in the input maf data.}
 
 \item{add_missing}{When list of genes is provided and some of the specified genes are not mutated in the provided maf data, they will still be added to the matrix with 0\% mutation frequency. Default TRUE.}
+
+\item{subset_to_coding}{Will conditionally subset to only coding variants. Default is FALSE (no subsetting).}
 }
 \value{
 matrix

From 8a81137034bdabbd213da4c466ca97342d48fe80 Mon Sep 17 00:00:00 2001
From: lkhilton <laura.k.hilton@gmail.com>
Date: Wed, 18 Dec 2024 13:59:38 -0800
Subject: [PATCH 20/21] Add a colour for intron mutations

---
 R/get_gambl_colours.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/get_gambl_colours.R b/R/get_gambl_colours.R
index b72add0..89a09bf 100644
--- a/R/get_gambl_colours.R
+++ b/R/get_gambl_colours.R
@@ -49,7 +49,8 @@ get_gambl_colours = function(classification = "all",
                     LimeGreen = "#a4bb87",
                     Brown = "#5f3a17",
                     Gray = "#bdbdc1",
-    Yellow = "#f9bd1f"
+    Yellow = "#f9bd1f",
+    Mustard = "#b76d29"
   )
 
   all_colours[["seq_type"]] = c("mrna" = "#E41A1C",

From 81721ce90d31748eaa31741ae9cbfd1773cbd3e8 Mon Sep 17 00:00:00 2001
From: lkhilton <laura.k.hilton@gmail.com>
Date: Wed, 18 Dec 2024 13:59:51 -0800
Subject: [PATCH 21/21] Formatting changes (sorry)

---
 R/get_gambl_colours.R | 444 ++++++++++++++++++++++--------------------
 1 file changed, 230 insertions(+), 214 deletions(-)

diff --git a/R/get_gambl_colours.R b/R/get_gambl_colours.R
index 89a09bf..579baf0 100644
--- a/R/get_gambl_colours.R
+++ b/R/get_gambl_colours.R
@@ -22,18 +22,16 @@
 #' @export
 #'
 #' @examples
-#' lymphgen_cols = get_gambl_colours("lymphgen")
+#' lymphgen_cols <- get_gambl_colours("lymphgen")
 #'
-#'
-get_gambl_colours = function(classification = "all",
-                             alpha = 1,
-                             as_list = FALSE,
-                             as_dataframe = FALSE,
-                             return_available = FALSE,
-                             verbose = FALSE){
-
-  all_colours = list()
-  everything = c()
+get_gambl_colours <- function(classification = "all",
+                              alpha = 1,
+                              as_list = FALSE,
+                              as_dataframe = FALSE,
+                              return_available = FALSE,
+                              verbose = FALSE) {
+  all_colours <- list()
+  everything <- c()
 
   blood_cols <- c(
     Red = "#c41230",
@@ -42,85 +40,98 @@ get_gambl_colours = function(classification = "all",
     Purple = "#5c266c",
     Orange = "#fe9003",
     Green = "#046852",
-                    Lavendar = "#8781bd",
+    Lavendar = "#8781bd",
     "Steel Blue" = "#455564",
-                    "Light Blue" = "#2cace3",
-                    Magenta = "#e90c8b",
-                    LimeGreen = "#a4bb87",
-                    Brown = "#5f3a17",
-                    Gray = "#bdbdc1",
+    "Light Blue" = "#2cace3",
+    Magenta = "#e90c8b",
+    LimeGreen = "#a4bb87",
+    Brown = "#5f3a17",
+    Gray = "#bdbdc1",
     Yellow = "#f9bd1f",
     Mustard = "#b76d29"
   )
 
-  all_colours[["seq_type"]] = c("mrna" = "#E41A1C",
-                                "genome" = "#377EB8",
-                                "capture" = "#4DAF4A")
+  all_colours[["seq_type"]] <- c(
+    "mrna" = "#E41A1C",
+    "genome" = "#377EB8",
+    "capture" = "#4DAF4A"
+  )
 
-  all_colours[["type"]] = c("gain" = "#0000FF",
-                            "loss" = "#FF0000")
+  all_colours[["type"]] <- c(
+    "gain" = "#0000FF",
+    "loss" = "#FF0000"
+  )
 
-  all_colours[["hmrn"]] = c("BCL2-MYC" = "#52000F",
-                            "BCL2" = "#721F0F",
-                            "SOCS1/SGK1" = "#D66B1F",
-                            "TET2/SGK1" = "#C41230",
-                            "MYD88" = "#3B5FAC",
-                            "NOTCH2" = "#7F3293",
-                            "NOTCH1" = "#55B55E",
-                            "Other" = "#ACADAF")
+  all_colours[["hmrn"]] <- c(
+    "BCL2-MYC" = "#52000F",
+    "BCL2" = "#721F0F",
+    "SOCS1/SGK1" = "#D66B1F",
+    "TET2/SGK1" = "#C41230",
+    "MYD88" = "#3B5FAC",
+    "NOTCH2" = "#7F3293",
+    "NOTCH1" = "#55B55E",
+    "Other" = "#ACADAF"
+  )
 
-  all_colours[["EBV"]] =  c("EBV-positive" = "#7F055F",
-                            "EBV-negative" = "#E5A4CB",
-                            "POS" = "#7F055F",
-                            "NEG" = "#E5A4CB")
+  all_colours[["EBV"]] <- c(
+    "EBV-positive" = "#7F055F",
+    "EBV-negative" = "#E5A4CB",
+    "POS" = "#7F055F",
+    "NEG" = "#E5A4CB"
+  )
 
-  all_colours[["BL"]] = c("Q53-BL" = "#A6CEE3",
-                          "M53-BL" = "#A6CEE3", #added because genetic subgroup still refers to it this way
-                          "DLBCL-A" = "#721F0F",
-                          "IC-BL" = "#45425A",
-                          "DGG-BL" = "#E90C8B",
-                          "DLBCL-B" = "#FB9A99",
-                          "DLBCL-C" = "#C41230")
+  all_colours[["BL"]] <- c(
+    "Q53-BL" = "#A6CEE3",
+    "M53-BL" = "#A6CEE3", # added because genetic subgroup still refers to it this way
+    "DLBCL-A" = "#721F0F",
+    "IC-BL" = "#45425A",
+    "DGG-BL" = "#E90C8B",
+    "DLBCL-B" = "#FB9A99",
+    "DLBCL-C" = "#C41230"
+  )
 
-  all_colours[["FL"]] = c(dFL = "#99C1B9", cFL = "#D16666", DLBCL = "#479450")
+  all_colours[["FL"]] <- c(dFL = "#99C1B9", cFL = "#D16666", DLBCL = "#479450")
 
-  all_colours[["lymphgenerator"]] = c("MP3"="#5B8565",
-                                      "EGB" = "#98622A",
-                                      "ETB"="#813F3D",
-                                      "aSCI"="#D66B1F",
-                                      "aSEL"="#6A0D18",
-                                      "MCaP"="#5F8CFF",
-                                      "BNZ"="#8870B6",
-                                      "EZB"="#721F0F",
-                                      "ST2"="#C41230",
-                                      "UNCLASS"="#05631E"
+  all_colours[["lymphgenerator"]] <- c(
+    "MP3" = "#5B8565",
+    "EGB" = "#98622A",
+    "ETB" = "#813F3D",
+    "aSCI" = "#D66B1F",
+    "aSEL" = "#6A0D18",
+    "MCaP" = "#5F8CFF",
+    "BNZ" = "#8870B6",
+    "EZB" = "#721F0F",
+    "ST2" = "#C41230",
+    "UNCLASS" = "#05631E"
   )
 
-  all_colours[["chapuy_classifier"]] = c(
+  all_colours[["chapuy_classifier"]] <- c(
     C0 = "#bebebe",
     C1 = "#803D99",
-    C2 ="#00A2D2",
+    C2 = "#00A2D2",
     C3 = "#F39123",
     C4 = "#50BFAD",
     C5 = "#DE292A"
   )
 
-  all_colours[["lacy_classifier"]] = all_colours[["hmrn"]]
+  all_colours[["lacy_classifier"]] <- all_colours[["hmrn"]]
 
-  all_colours[["lymphgen"]] = c("EZB-MYC" = "#52000F",
-                                "EZB" = "#721F0F",
-                                "EZB-COMP" = "#C7371A",
-                                "ST2" = "#C41230",
-                                "ST2-COMP" = "#EC3251",
-                                "MCD" = "#3B5FAC",
-                                "MCD-COMP" = "#6787CB",
-                                "BN2" =  "#7F3293",
-                                "BN2-COMP" = "#A949C1",
-                                "N1" = "#55B55E",
-                                "N1-COMP" = "#7FC787",
-                                "A53" = "#5b6d8a",
-                                "Other" = "#ACADAF",
-                                "COMPOSITE" = "#ACADAF")
+  all_colours[["lymphgen"]] <- c(
+    "EZB-MYC" = "#52000F",
+    "EZB" = "#721F0F",
+    "EZB-COMP" = "#C7371A",
+    "ST2" = "#C41230",
+    "ST2-COMP" = "#EC3251",
+    "MCD" = "#3B5FAC",
+    "MCD-COMP" = "#6787CB",
+    "BN2" = "#7F3293",
+    "BN2-COMP" = "#A949C1",
+    "N1" = "#55B55E",
+    "N1-COMP" = "#7FC787",
+    "A53" = "#5b6d8a",
+    "Other" = "#ACADAF",
+    "COMPOSITE" = "#ACADAF"
+  )
 
 
   all_colours[["mutation"]] <-
@@ -138,10 +149,11 @@ get_gambl_colours = function(classification = "all",
       "Splice_Region" = unname(blood_cols["Orange"]),
       "3'UTR" = unname(blood_cols["Yellow"]),
       "5'UTR" = unname(blood_cols["LimeGreen"]),
+      "Intron" = unname(blood_cols["Mustard"]),
       "Silent" = "#D8A7CA"
     )
 
-  all_colours[["rainfall"]] =
+  all_colours[["rainfall"]] <-
     c(
       "C>A" = "#2196F3FF",
       "C>G" = "#3F51B5FF",
@@ -152,37 +164,38 @@ get_gambl_colours = function(classification = "all",
       "T>G" = "#FF9800FF"
     )
 
-  all_colours[["pos_neg"]]=c(
-    "POS"="#c41230",
-    "NEG"="#E88873",
-    "PARTIAL"="#E88873",
-    "yes"="#c41230",
-    "no"="#E88873",
-    "YES"="#c41230",
-    "NO"="#E88873",
-    "FAIL"="#bdbdc1",
-    "positive"="#c41230",
-    "negative"="#E88873",
-    "fail"="#bdbdc1")
+  all_colours[["pos_neg"]] <- c(
+    "POS" = "#c41230",
+    "NEG" = "#E88873",
+    "PARTIAL" = "#E88873",
+    "yes" = "#c41230",
+    "no" = "#E88873",
+    "YES" = "#c41230",
+    "NO" = "#E88873",
+    "FAIL" = "#bdbdc1",
+    "positive" = "#c41230",
+    "negative" = "#E88873",
+    "fail" = "#bdbdc1"
+  )
 
-  all_colours[["copy_number"]]=c(
-    "nLOH"="#E026D7",
-    "14"="#380015",
-    "15"="#380015",
-    "13"="#380015",
-    "12"="#380015",
-    "11"="#380015",
-    "10"="#380015",
-    "9"="#380015",
-    "8"="#380015",
-    "7"="#380015",
-    "6"="#380015",
-    "5"="#67001F",
-    "4"="#B2182B",
-    "3"="#D6604D",
-    "2"="#ede4c7",
-    "1"="#92C5DE",
-    "0"="#4393C3"
+  all_colours[["copy_number"]] <- c(
+    "nLOH" = "#E026D7",
+    "14" = "#380015",
+    "15" = "#380015",
+    "13" = "#380015",
+    "12" = "#380015",
+    "11" = "#380015",
+    "10" = "#380015",
+    "9" = "#380015",
+    "8" = "#380015",
+    "7" = "#380015",
+    "6" = "#380015",
+    "5" = "#67001F",
+    "4" = "#B2182B",
+    "3" = "#D6604D",
+    "2" = "#ede4c7",
+    "1" = "#92C5DE",
+    "0" = "#4393C3"
   )
   all_colours[["blood"]] <- c(
     "Red" = "#c41230", "Blue" = "#115284", "Light Green" = "#39b54b",
@@ -192,33 +205,34 @@ get_gambl_colours = function(classification = "all",
     "LimeGreen" = "#a4bb87", "Brown" = "#5f3a17", "Gray" = "#bdbdc1",
     "Yellow" = "#f9bd1f"
   )
-  all_colours[["sex"]]=c(
-    "M"="#118AB2",
-    "Male"="#118AB2",
-    "male"="#118AB2",
-    "F"="#EF476F",
-    "Female"="#EF476F",
-    "female"="#EF476F")
+  all_colours[["sex"]] <- c(
+    "M" = "#118AB2",
+    "Male" = "#118AB2",
+    "male" = "#118AB2",
+    "F" = "#EF476F",
+    "Female" = "#EF476F",
+    "female" = "#EF476F"
+  )
 
-  all_colours[["clinical"]]=
+  all_colours[["clinical"]] <-
     c(
-      "M"="#118AB2",
-      "Male"="#118AB2",
-      "F"="#EF476F",
-      "Female"="#EF476F",
-      "EBV-positive"="#7F055F",
-      "EBV-negative"="#E5A4CB",
-      "POS"="#c41230",
-      "NEG"="#E88873",
-      "FAIL"="#bdbdc1",
-      "Alive"="#046852",
-      "alive"="#046852",
-      "dead"="#a4bb87",
-      "Dead"="#a4bb87",
-      "deceased"="#a4bb87",
-      "unknown"="#C3C9E9",
-      "IPI_0"= "#3B9AB2",
-      "IPI_1"= "#78B7C5",
+      "M" = "#118AB2",
+      "Male" = "#118AB2",
+      "F" = "#EF476F",
+      "Female" = "#EF476F",
+      "EBV-positive" = "#7F055F",
+      "EBV-negative" = "#E5A4CB",
+      "POS" = "#c41230",
+      "NEG" = "#E88873",
+      "FAIL" = "#bdbdc1",
+      "Alive" = "#046852",
+      "alive" = "#046852",
+      "dead" = "#a4bb87",
+      "Dead" = "#a4bb87",
+      "deceased" = "#a4bb87",
+      "unknown" = "#C3C9E9",
+      "IPI_0" = "#3B9AB2",
+      "IPI_1" = "#78B7C5",
       "IPI_2" = "#EBCC2A",
       "IPI_3" = "#E1AF00",
       "IPI_4" = "#F21A00",
@@ -226,124 +240,126 @@ get_gambl_colours = function(classification = "all",
       "adult" = "#DCE0E5",
       "Pediatric" = "#677A8E",
       "pediatric" = "#677A8E",
-      "Diagnosis"="#E57A44",
-      "A"="#E57A44",
-      "B"="#721817",
-      "C"="#721817",
-      "D"="#721817",
-      "E"="#721817",
-      "Progression"="#A44A3F",
-      "Relapse"="#721817",
-      "I"="#75F4F4",
-      "FOLL1"="#75F4F4",
-      "II"="#90E0F3",
-      "FOLL2"="#90E0F3",
-      "IIIA"="#B8B3E9",
-      "FOLL3A"="#B8B3E9",
-      "IIIB"="#D999B9",
-      "FOLL3B"="#D999B9",
-      "matched"="#F0B67F",
-      "unmatched"="#D6D1B1",
-      "FF"="#009FFD",
-      "frozen"="#009FFD",
-      "FFPE"="#95B2B8",
-      "ctDNA"="#7E6148",
-      "NA"="white"
+      "Diagnosis" = "#E57A44",
+      "A" = "#E57A44",
+      "B" = "#721817",
+      "C" = "#721817",
+      "D" = "#721817",
+      "E" = "#721817",
+      "Progression" = "#A44A3F",
+      "Relapse" = "#721817",
+      "I" = "#75F4F4",
+      "FOLL1" = "#75F4F4",
+      "II" = "#90E0F3",
+      "FOLL2" = "#90E0F3",
+      "IIIA" = "#B8B3E9",
+      "FOLL3A" = "#B8B3E9",
+      "IIIB" = "#D999B9",
+      "FOLL3B" = "#D999B9",
+      "matched" = "#F0B67F",
+      "unmatched" = "#D6D1B1",
+      "FF" = "#009FFD",
+      "frozen" = "#009FFD",
+      "FFPE" = "#95B2B8",
+      "ctDNA" = "#7E6148",
+      "NA" = "white"
     )
-  all_colours[["pathology"]] = c(
-    "B-ALL"="#C1C64B",
-    "CLL"="#889BE5",
-    "MCL"="#40E0D0",
-    "BL"="#926CAD",
-    "mBL"="#34C7F4",
-    "tFL"="#FF8595",
-    "DLBCL-BL-like"="#34C7F4",
-    "pre-HT"="#754F5B",
-    "PMBL"= "#227C9D",
-    "PMBCL"="#227C9D",
-    "FL"="#EA8368",
-    "no-HT"="#EA8368",
-    "COMFL"="#8BBC98",
-    "COM"="#8BBC98",
-    "post-HT"="#479450",
-    "DLBCL"="#479450",
-    "denovo-DLBCL"="#479450",
-    "HGBL-NOS"="#294936",
-    "HGBL"="#294936",
-    "HGBL-DH/TH"="#7A1616",
+  all_colours[["pathology"]] <- c(
+    "B-ALL" = "#C1C64B",
+    "CLL" = "#889BE5",
+    "MCL" = "#40E0D0",
+    "BL" = "#926CAD",
+    "mBL" = "#34C7F4",
+    "tFL" = "#FF8595",
+    "DLBCL-BL-like" = "#34C7F4",
+    "pre-HT" = "#754F5B",
+    "PMBL" = "#227C9D",
+    "PMBCL" = "#227C9D",
+    "FL" = "#EA8368",
+    "no-HT" = "#EA8368",
+    "COMFL" = "#8BBC98",
+    "COM" = "#8BBC98",
+    "post-HT" = "#479450",
+    "DLBCL" = "#479450",
+    "denovo-DLBCL" = "#479450",
+    "HGBL-NOS" = "#294936",
+    "HGBL" = "#294936",
+    "HGBL-DH/TH" = "#7A1616",
     "PBL" = "#E058C0",
     "Plasmablastic" = "#E058C0",
     "CNS" = "#E2EF60",
     "THRLBCL" = "#A5F2B3",
-    "MM"="#CC9A42",
-    "SCBC"="#8c9c90",
-    "UNSPECIFIED"="#cfba7c",
-    "OTHER"="#cfba7c",
-    "MZL"="#065A7F",
-    "SMZL"="#065A7F",
+    "MM" = "#CC9A42",
+    "SCBC" = "#8c9c90",
+    "UNSPECIFIED" = "#cfba7c",
+    "OTHER" = "#cfba7c",
+    "MZL" = "#065A7F",
+    "SMZL" = "#065A7F",
     "Prolymphocytic" = "#7842f5"
   )
-  all_colours[["coo"]] = c(
+  all_colours[["coo"]] <- c(
     "ABC" = "#05ACEF",
     "UNCLASS" = "#05631E",
     "Unclass" = "#05631E",
     "U" = "#05631E",
     "UNC" = "#05631E",
-    "GCB"= "#F58F20",
-    "DHITsig-"= "#F58F20",
-    "DHITsigNeg"= "#F58F20",
+    "GCB" = "#F58F20",
+    "DHITsig-" = "#F58F20",
+    "DHITsigNeg" = "#F58F20",
     "DHITsig-IND" = "#003049",
     "DHITsig+" = "#D62828",
     "DHITsigPos" = "#D62828",
     "NA" = "#ACADAF"
   )
-  all_colours[["cohort"]] = c("Chapuy"="#8B0000","Chapuy, 2018"="#8B0000",
-                              "Arthur"= "#8845A8","Arthur, 2018"= "#8845A8",
-                              "Schmitz"= "#2C72B2","Schmitz, 2018"= "#2C72B2",
-                              "Reddy" = "#E561C3","Reddy, 2017" = "#E561C3",
-                              "Morin"= "#8DB753", "Morin, 2013"= "#8DB753",
-                              "Kridel"= "#4686B7", "Kridel, 2016"= "#4686B7",
-                              "ICGC"="#E09C3B","ICGC, 2018"="#E09C3B",
-                              "Grande"="#e90c8b", "Grande, 2019"="#e90c8b")
+  all_colours[["cohort"]] <- c(
+    "Chapuy" = "#8B0000", "Chapuy, 2018" = "#8B0000",
+    "Arthur" = "#8845A8", "Arthur, 2018" = "#8845A8",
+    "Schmitz" = "#2C72B2", "Schmitz, 2018" = "#2C72B2",
+    "Reddy" = "#E561C3", "Reddy, 2017" = "#E561C3",
+    "Morin" = "#8DB753", "Morin, 2013" = "#8DB753",
+    "Kridel" = "#4686B7", "Kridel, 2016" = "#4686B7",
+    "ICGC" = "#E09C3B", "ICGC, 2018" = "#E09C3B",
+    "Grande" = "#e90c8b", "Grande, 2019" = "#e90c8b"
+  )
 
-  all_colours[["indels"]] = c("DEL" = "#53B1FC", "INS" = "#FC9C6D")
-  all_colours[["svs"]] = c("DEL" = "#53B1FC", "DUP" = "#FC9C6D")
-  all_colours[["genetic_subgroup"]] = c(all_colours[["lymphgen"]],all_colours[["BL"]],all_colours[["FL"]])
-  #print(all_colours)
-  if(alpha <1){
-    for(colslot in names(all_colours)){
-      raw_cols = all_colours[[colslot]]
-      raw_cols_rgb = col2rgb(raw_cols)
-      alpha_cols = rgb(raw_cols_rgb[1L, ], raw_cols_rgb[2L, ], raw_cols_rgb[3L, ], alpha = alpha * 255L, names = names(raw_cols), maxColorValue = 255L)
-      names(alpha_cols) = names(raw_cols)
-      all_colours[[colslot]] = alpha_cols
+  all_colours[["indels"]] <- c("DEL" = "#53B1FC", "INS" = "#FC9C6D")
+  all_colours[["svs"]] <- c("DEL" = "#53B1FC", "DUP" = "#FC9C6D")
+  all_colours[["genetic_subgroup"]] <- c(all_colours[["lymphgen"]], all_colours[["BL"]], all_colours[["FL"]])
+  # print(all_colours)
+  if (alpha < 1) {
+    for (colslot in names(all_colours)) {
+      raw_cols <- all_colours[[colslot]]
+      raw_cols_rgb <- col2rgb(raw_cols)
+      alpha_cols <- rgb(raw_cols_rgb[1L, ], raw_cols_rgb[2L, ], raw_cols_rgb[3L, ], alpha = alpha * 255L, names = names(raw_cols), maxColorValue = 255L)
+      names(alpha_cols) <- names(raw_cols)
+      all_colours[[colslot]] <- alpha_cols
     }
   }
-  for(this_group in names(all_colours)){
-    everything = c(everything, all_colours[[this_group]])
+  for (this_group in names(all_colours)) {
+    everything <- c(everything, all_colours[[this_group]])
   }
-  #return matching value from lowercase version of the argument if it exists
-  lc_class = tolower(classification)
-  if(return_available){
+  # return matching value from lowercase version of the argument if it exists
+  lc_class <- tolower(classification)
+  if (return_available) {
     return(names(all_colours))
   }
-  if(classification %in% names(all_colours)){
-    if(as_dataframe){
-      some_col=all_colours[[classification]]
-      df_ugly = data.frame(name=names(some_col),colour=unname(some_col))
-      df_tidy = mutate(df_ugly,group=classification)
+  if (classification %in% names(all_colours)) {
+    if (as_dataframe) {
+      some_col <- all_colours[[classification]]
+      df_ugly <- data.frame(name = names(some_col), colour = unname(some_col))
+      df_tidy <- mutate(df_ugly, group = classification)
       return(df_tidy)
     }
     return(all_colours[[classification]])
-  }else if(lc_class %in% names(all_colours)){
+  } else if (lc_class %in% names(all_colours)) {
     return(all_colours[[lc_class]])
-  }else if(as_list){
+  } else if (as_list) {
     return(all_colours)
-  }else if(as_dataframe){
-    df_ugly = data.frame(name = names(unlist(all_colours, use.names = T)), colour = unlist(all_colours, use.names = T))
-    df_tidy = separate(df_ugly,name,into=c("group","name"),sep="\\.")
+  } else if (as_dataframe) {
+    df_ugly <- data.frame(name = names(unlist(all_colours, use.names = T)), colour = unlist(all_colours, use.names = T))
+    df_tidy <- separate(df_ugly, name, into = c("group", "name"), sep = "\\.")
     return(df_tidy)
-  }else{
+  } else {
     return(everything)
   }
 }