From 11aa08f5530e862279b9767c8ab357a3ccf0e162 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Fri, 10 May 2024 16:04:50 -0700 Subject: [PATCH 01/21] transition: move out cool_overlaps --- NAMESPACE | 1 - R/cool_overlaps.R | 183 ------------------------------------------- man/cool_overlaps.Rd | 92 ---------------------- 3 files changed, 276 deletions(-) delete mode 100644 R/cool_overlaps.R delete mode 100644 man/cool_overlaps.Rd diff --git a/NAMESPACE b/NAMESPACE index 8f7afde..d9aa630 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,7 +10,6 @@ export(coding_class) export(coding_vc) export(colour_aliases) export(compare_coding_mutation_pattern) -export(cool_overlaps) export(copy_no_clobber) export(create_onco_matrix) export(fuzzy_match_mafs) diff --git a/R/cool_overlaps.R b/R/cool_overlaps.R deleted file mode 100644 index 0a52c23..0000000 --- a/R/cool_overlaps.R +++ /dev/null @@ -1,183 +0,0 @@ -#' @title Cool overlap of data frames. -#' -#' @description This function implements overlap of 2 data frames that contain -#' regions of coordinates similar to what data.table::foverlaps does. Unlike -#' foverlaps, this function takes as input data frame class objects, and relies -#' on dplyr solution rather than data.table handling, therefore allowing usage -#' of data frames with virtually unlimited dimensions without crashing. This -#' implementation uses same logic of different types of overlaps as the original -#' foverlaps solution ("any", "start", "end", "within", "equal"). The type "any" -#' is default and allows for any overlapping solution between 2 regions. The -#' type "start" only considers regions with exact same start position as -#' overlap; similarly type "end" considers regions overlapped when the end -#' positions are exact matches. Type "within" means that regions are overlapped -#' when one is contained in another and neither start nor end positions match. -#' Finally, type "equal" only considers overlap when both start and end -#' positions match for both regions. For any type, the presence of any -#' additional column not directly specifying regions (for example, Chromosome) -#' will serve similar to a grouping variable. -#' The generated output of this function will contain the overlapping regions -#' and all columns present in the data frame data1, as well as any columns from -#' the data frame supplied with data2 argument, except for those columns present -#' in data2 that are used for overlap. When the same columns are present in both -#' data1 and data2, the output data frame will have ".x" and ".y" suffixes to -#' indicate which original input data they are coming from. -#' -#' @param data1 Data frame with data to overlap. Required parameter. The minimal -#' required columns are those supplied with the argument columns1. Will -#' dictate the naming of the columns used for overlap in the output. -#' @param data2 Data frame with data to overlap. Required parameter. The minimal -#' required columns are those supplied with the argument columns2. -#' @param columns1 The list of columns from data frame data1 to be used to find -#' overlapping regions. -#' @param columns2 The list of columns from data frame data2 to be used to find -#' overlapping regions. -#' @param type Character specifying the way to find overlaps. Accepted values -#' are "any" (used as default), "start", "end", "within", and "equal". -#' Please see function description for more details of different types. -#' -#' @return data frame -#' -#' @examples -#' # obtain maf data -#' maf1 <- get_coding_ssm( -#' these_sample_ids = "DOHH-2" -#' ) -#' -#' maf2 <- get_coding_ssm( -#' these_sample_ids = "SU-DHL-4" -#' ) -#' -#' # The same mutations are not expected to be present in different samples -#' # so this overlap will produce 0 matching rows -#' overlap <- cool_overlaps( -#' maf1, -#' maf1, -#' type = "equal" -#' ) -#' -#' # To demonstrate functionality we can supply the same maf to the data2 -#' overlap <- cool_overlaps( -#' maf1, -#' maf1 %>% head -#' ) -#' -#' # We can also overlap different formats, for example -#' seg1 <- get_sample_cn_segments(these_sample_ids = "DOHH-2") -#' overlap <- cool_overlaps( -#' data1 = maf1, -#' data2 = seg1, -#' columns2 = c("chrom", "start", "end") -#' ) -#' -#' @import dplyr -#' @export -#' -cool_overlaps <- function( - data1, - data2, - columns1 = c("Chromosome", "Start_Position", "End_Position"), - columns2 = c("Chromosome", "Start_Position", "End_Position"), - type = "any" -){ - - # Ensure all columns provided for overlap are present in the data frame - if(! length(columns1) == length(intersect(columns1, colnames(data1)))){ - stop( - "Not all of the requested columns for overlap in data1 are present." - ) - } - - if(! length(columns2) == length(intersect(columns2, colnames(data2)))){ - stop( - "Not all of the requested columns for overlap in data2 are present." - ) - } - - # What is the name of the column in columns1 that specifies start and end? - start1 <- columns1[grepl("start", columns1, ignore.case = TRUE)] - end1 <- columns1[grepl("end", columns1, ignore.case = TRUE)] - - # What is the name of the column in columns1 that specifies start and end? - start2 <- columns2[grepl("start", columns2, ignore.case = TRUE)] - end2 <- columns2[grepl("end", columns2, ignore.case = TRUE)] - - # What are the other columns to be used in overlap? - columns1 <- columns1[!columns1 %in% c(start1, end1)] - columns2 <- columns2[!columns2 %in% c(start2, end2)] - - # When the same columns are provided they will become .x and .y - if(start1 == start2) { - start1 <- paste0(start1, ".x") - start2 <- paste0(start2, ".y") - - } - if(end1 == end2) { - end1 <- paste0(end1, ".x") - end2 <- paste0(end2, ".y") - - } - - - # Prepare for overlap - overlap <- dplyr::inner_join( - data1, - data2, - by = structure(names = columns1, .Data = columns2), - relationship = "many-to-many" - ) - - # Return matches based on mode - if(type == "any"){ - message( - "Running in default mode of any..." - ) - overlap <- overlap %>% - dplyr::filter( - !!sym(start2) >= !!sym(start1) & !!sym(end2) <= !!sym(end1) | - !!sym(start1) >= !!sym(start2) & !!sym(end1) <= !!sym(end2) - ) - } else if (type == "start"){ - message( - "Running in the mode start..." - ) - overlap <- overlap %>% - dplyr::filter( - !!sym(start1) == !!sym(start2) - ) - } else if (type == "end"){ - message( - "Running in the mode end..." - ) - overlap <- overlap %>% - dplyr::filter( - !!sym(end1) == !!sym(end2) - ) - } else if (type == "within"){ - message( - "Running in the mode within..." - ) - overlap <- overlap %>% - dplyr::filter( - (!!sym(start1) >= !!sym(start2)) & (!!sym(end1) <= !!sym(end2)) | - (!!sym(start2) >= !!sym(start1)) & (!!sym(end2) <= !!sym(end1)) - ) - } else if (type == "equal"){ - message( - "Running in the mode equal..." - ) - overlap <- overlap %>% - dplyr::filter( - (!!sym(start1) == !!sym(start2)) & (!!sym(end1) == !!sym(end2)) - ) - } else { - message( - "You have requested mode that is not supported." - ) - stop( - "Please supply one of any, start, end, within, or equal with type." - ) - } - - return(overlap) -} diff --git a/man/cool_overlaps.Rd b/man/cool_overlaps.Rd deleted file mode 100644 index 0b20674..0000000 --- a/man/cool_overlaps.Rd +++ /dev/null @@ -1,92 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cool_overlaps.R -\name{cool_overlaps} -\alias{cool_overlaps} -\title{Cool overlap of data frames.} -\usage{ -cool_overlaps( - data1, - data2, - columns1 = c("Chromosome", "Start_Position", "End_Position"), - columns2 = c("Chromosome", "Start_Position", "End_Position"), - type = "any" -) -} -\arguments{ -\item{data1}{Data frame with data to overlap. Required parameter. The minimal -required columns are those supplied with the argument columns1. Will -dictate the naming of the columns used for overlap in the output.} - -\item{data2}{Data frame with data to overlap. Required parameter. The minimal -required columns are those supplied with the argument columns2.} - -\item{columns1}{The list of columns from data frame data1 to be used to find -overlapping regions.} - -\item{columns2}{The list of columns from data frame data2 to be used to find -overlapping regions.} - -\item{type}{Character specifying the way to find overlaps. Accepted values -are "any" (used as default), "start", "end", "within", and "equal". -Please see function description for more details of different types.} -} -\value{ -data frame -} -\description{ -This function implements overlap of 2 data frames that contain -regions of coordinates similar to what data.table::foverlaps does. Unlike -foverlaps, this function takes as input data frame class objects, and relies -on dplyr solution rather than data.table handling, therefore allowing usage -of data frames with virtually unlimited dimensions without crashing. This -implementation uses same logic of different types of overlaps as the original -foverlaps solution ("any", "start", "end", "within", "equal"). The type "any" -is default and allows for any overlapping solution between 2 regions. The -type "start" only considers regions with exact same start position as -overlap; similarly type "end" considers regions overlapped when the end -positions are exact matches. Type "within" means that regions are overlapped -when one is contained in another and neither start nor end positions match. -Finally, type "equal" only considers overlap when both start and end -positions match for both regions. For any type, the presence of any -additional column not directly specifying regions (for example, Chromosome) -will serve similar to a grouping variable. -The generated output of this function will contain the overlapping regions -and all columns present in the data frame data1, as well as any columns from -the data frame supplied with data2 argument, except for those columns present -in data2 that are used for overlap. When the same columns are present in both -data1 and data2, the output data frame will have ".x" and ".y" suffixes to -indicate which original input data they are coming from. -} -\examples{ -# obtain maf data -maf1 <- get_coding_ssm( - these_sample_ids = "DOHH-2" -) - -maf2 <- get_coding_ssm( - these_sample_ids = "SU-DHL-4" -) - -# The same mutations are not expected to be present in different samples -# so this overlap will produce 0 matching rows -overlap <- cool_overlaps( - maf1, - maf1, - type = "equal" -) - -# To demonstrate functionality we can supply the same maf to the data2 -overlap <- cool_overlaps( - maf1, - maf1 \%>\% head -) - -# We can also overlap different formats, for example -seg1 <- get_sample_cn_segments(these_sample_ids = "DOHH-2") -overlap <- cool_overlaps( - data1 = maf1, - data2 = seg1, - columns2 = c("chrom", "start", "end") -) - -} From 5c31a9832a862d6f552e3c92e6164010203e30a9 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Tue, 2 Jul 2024 15:44:51 -0700 Subject: [PATCH 02/21] bug fix: color for nonsense mutations --- R/get_gambl_colours.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/get_gambl_colours.R b/R/get_gambl_colours.R index 1e0ced7..dab03b6 100644 --- a/R/get_gambl_colours.R +++ b/R/get_gambl_colours.R @@ -34,9 +34,9 @@ get_gambl_colours = function(classification = "all", all_colours = list() everything = c() - + blood_cols = c( Red ="#c41230", - Blue ="#115284", + Blue ="#115284", Green = "#39b54b", Purple = "#5c266c", Orange = "#fe9003", @@ -119,10 +119,10 @@ get_gambl_colours = function(classification = "all", "Other" = "#ACADAF", "COMPOSITE" = "#ACADAF") - + all_colours[["mutation"]]= c( - "Nonsense_Mutation"=unname(blood_cols["Red"]), + "Nonsense_Mutation"="#D8A7CA", "Missense_Mutation"=unname(blood_cols["Green"]), "Multi_Hit"=unname(blood_cols["Steel Blue"]), "Frame_Shift_Ins" = unname(blood_cols["Magenta"]), @@ -194,8 +194,8 @@ get_gambl_colours = function(classification = "all", "F"="#EF476F", "Female"="#EF476F", "female"="#EF476F") - - all_colours[["clinical"]]= + + all_colours[["clinical"]]= c( "M"="#118AB2", "Male"="#118AB2", From c0f75f79b27d0fe8757fd1983ee777a11d49fcdf Mon Sep 17 00:00:00 2001 From: Kdreval Date: Tue, 2 Jul 2024 23:20:57 -0700 Subject: [PATCH 03/21] env --- envs/r.yaml | 171 ++++++++++++++++++++++++++-------------------------- 1 file changed, 86 insertions(+), 85 deletions(-) diff --git a/envs/r.yaml b/envs/r.yaml index fdceb73..c6af660 100644 --- a/envs/r.yaml +++ b/envs/r.yaml @@ -1,23 +1,23 @@ name: r channels: + - r - conda-forge - defaults dependencies: - _libgcc_mutex=0.1 - _openmp_mutex=4.5 - - _r-mutex=1.0.1 - - _sysroot_linux-64_curr_repodata_hack=3 - - binutils_impl_linux-64=2.40 - - bwidget=1.9.14 + - _r-mutex=1.0.0 + - binutils_impl_linux-64=2.38 + - bwidget=1.9.16 - bzip2=1.0.8 - - c-ares=1.27.0 - - ca-certificates=2024.2.2 + - c-ares=1.19.1 + - ca-certificates=2024.3.11 - cairo=1.18.0 - curl=8.5.0 - - expat=2.6.2 + - expat=2.5.0 - font-ttf-dejavu-sans-mono=2.37 - - font-ttf-inconsolata=3.000 - - font-ttf-source-code-pro=2.038 + - font-ttf-inconsolata=2.001 + - font-ttf-source-code-pro=2.030 - font-ttf-ubuntu=0.83 - fontconfig=2.14.2 - fonts-anaconda=1 @@ -25,102 +25,103 @@ dependencies: - fonts-conda-forge=1 - freetype=2.12.1 - fribidi=1.0.10 - - gcc_impl_linux-64=13.2.0 - - gettext=0.21.1 - - gfortran_impl_linux-64=13.2.0 - - graphite2=1.3.13 - - gxx_impl_linux-64=13.2.0 + - gcc_impl_linux-64=11.2.0 + - gettext=0.21.0 + - gfortran_impl_linux-64=11.2.0 + - graphite2=1.3.14 + - gxx_impl_linux-64=11.2.0 - harfbuzz=8.3.0 - icu=73.2 - - kernel-headers_linux-64=4.18.0 + - kernel-headers_linux-64=2.6.32 - keyutils=1.6.1 - - krb5=1.21.2 - - ld_impl_linux-64=2.40 + - krb5=1.20.1 + - ld_impl_linux-64=2.38 - lerc=4.0.0 - libblas=3.9.0 - libcurl=8.5.0 - libdeflate=1.19 - - libedit=3.1.20191231 + - libedit=3.1.20230828 - libev=4.33 - - libexpat=2.6.2 - - libffi=3.4.2 - - libgcc-devel_linux-64=13.2.0 + - libexpat=2.5.0 + - libffi=3.4.4 + - libgcc-devel_linux-64=11.2.0 - libgcc-ng=13.2.0 - - libgfortran-ng=13.2.0 - - libgfortran5=13.2.0 - - libgit2=1.7.2 - - libglib=2.80.0 + - libgfortran-ng=11.2.0 + - libgfortran5=11.2.0 + - libgit2=1.6.4 + - libglib=2.78.4 - libgomp=13.2.0 - libiconv=1.17 - libjpeg-turbo=3.0.0 - liblapack=3.9.0 - - libnghttp2=1.58.0 - - libopenblas=0.3.26 + - libnghttp2=1.57.0 + - libopenblas=0.3.21 - libpng=1.6.43 - - libsanitizer=13.2.0 - - libssh2=1.11.0 - - libstdcxx-devel_linux-64=13.2.0 + - libssh2=1.10.0 + - libstdcxx-devel_linux-64=11.2.0 - libstdcxx-ng=13.2.0 - libtiff=4.6.0 - libuuid=2.38.1 - libwebp-base=1.3.2 - libxcb=1.15 - - libxml2=2.12.5 + - libxml2=2.10.4 - libzlib=1.2.13 - lz4-c=1.9.4 - - make=4.3 - - ncurses=6.4 - - openssl=3.2.1 - - pandoc=3.1.12.2 - - pango=1.52.1 - - pcre2=10.43 + - make=4.2.1 + - ncurses=6.4.20240210 + - openssl=3.0.13 + - pandoc=2.12 + - pango=1.52.0 + - pcre2=10.42 - pixman=0.43.2 - - pthread-stubs=0.4 + - pthread-stubs=0.3 - r-askpass=1.2.0 - r-assertthat=0.2.1 - - r-base=4.2.3 + - r-base=4.3.3 - r-base64enc=0.1_3 - - r-biocmanager=1.30.22 - - r-brew=1.0_10 - - r-brio=1.1.4 - - r-bslib=0.6.1 + - r-brew=1.0_8 + - r-brio=1.1.3 + - r-bslib=0.5.1 - r-cachem=1.0.8 - - r-callr=3.7.5 - - r-cli=3.6.2 + - r-callr=3.7.3 + - r-cli=3.6.1 - r-clipr=0.8.0 - - r-commonmark=1.9.1 - - r-cpp11=0.4.7 + - r-commonmark=1.9.0 + - r-cpp11=0.4.6 - r-crayon=1.5.2 - r-credentials=2.0.1 - r-curl=5.1.0 - - r-desc=1.4.3 + - r-desc=1.4.2 - r-devtools=2.4.5 - r-diffobj=0.3.5 - - r-digest=0.6.35 + - r-digest=0.6.33 - r-downlit=0.4.3 - r-ellipsis=0.3.2 - - r-evaluate=0.23 - - r-fansi=1.0.6 + - r-evaluate=0.22 + - r-fansi=1.0.5 - r-fastmap=1.1.1 - r-fontawesome=0.5.2 - r-fs=1.6.3 - - r-gert=2.0.1 + - r-gert=2.0.0 - r-gh=1.4.0 - r-gitcreds=0.1.2 - - r-glue=1.7.0 + - r-glue=1.6.2 - r-highr=0.10 - - r-htmltools=0.5.7 - - r-htmlwidgets=1.6.4 - - r-httpuv=1.6.14 + - r-htmltools=0.5.6.1 + - r-htmlwidgets=1.6.2 + - r-httpuv=1.6.11 - r-httr=1.4.7 - - r-httr2=1.0.0 + - r-httr2=0.2.3 - r-ini=0.3.1 - r-jquerylib=0.1.4 - - r-jsonlite=1.8.8 - - r-knitr=1.45 - - r-later=1.3.2 - - r-lifecycle=1.0.4 + - r-jsonlite=1.8.7 + - r-knitr=1.44 + - r-later=1.3.1 + - r-lattice=0.22_5 + - r-lifecycle=1.0.3 - r-magrittr=2.0.3 + - r-mass=7.3_60 + - r-matrix=1.6_1.1 - r-memoise=2.0.1 - r-mime=0.12 - r-miniui=0.1.1.1 @@ -129,55 +130,55 @@ dependencies: - r-pkgbuild=1.4.2 - r-pkgconfig=2.0.3 - r-pkgdown=2.0.7 - - r-pkgload=1.3.4 + - r-pkgload=1.3.3 - r-praise=1.0.0 - r-prettyunits=1.2.0 - - r-processx=3.8.3 + - r-processx=3.8.2 - r-profvis=0.3.8 - r-promises=1.2.1 - - r-ps=1.7.6 + - r-ps=1.7.5 - r-purrr=1.0.2 - r-r6=2.5.1 - - r-ragg=1.3.0 + - r-ragg=1.2.6 - r-rappdirs=0.3.3 - r-rcmdcheck=1.4.0 - - r-rcpp=1.0.12 + - r-rcpp=1.0.11 - r-rematch2=2.1.2 - r-remotes=2.4.2.1 - - r-rlang=1.1.3 + - r-rlang=1.1.1 - r-rmarkdown=2.25 - - r-roxygen2=7.3.1 - - r-rprojroot=2.0.4 + - r-roxygen2=7.2.3 + - r-rprojroot=2.0.3 - r-rstudioapi=0.15.0 - r-rversions=2.1.2 - - r-sass=0.4.8 + - r-sass=0.4.7 - r-sessioninfo=1.2.2 - - r-shiny=1.8.0 + - r-shiny=1.7.5.1 - r-sourcetools=0.1.7_1 - - r-stringi=1.8.3 - - r-stringr=1.5.1 + - r-stringi=1.7.12 + - r-stringr=1.5.0 - r-sys=3.4.2 - r-systemfonts=1.0.5 - - r-testthat=3.2.1 + - r-testthat=3.2.0 - r-textshaping=0.3.7 - r-tibble=3.2.1 - - r-tinytex=0.49 + - r-tinytex=0.48 - r-urlchecker=1.0.1 - - r-usethis=2.2.3 + - r-usethis=2.2.2 - r-utf8=1.2.4 - - r-vctrs=0.6.5 - - r-waldo=0.5.2 + - r-vctrs=0.6.4 + - r-waldo=0.5.1 - r-whisker=0.4.1 - - r-withr=3.0.0 - - r-xfun=0.42 - - r-xml2=1.3.6 + - r-withr=2.5.1 + - r-xfun=0.40 + - r-xml2=1.3.5 - r-xopen=1.0.0 - r-xtable=1.8_4 - - r-yaml=2.3.8 - - r-zip=2.3.1 + - r-yaml=2.3.7 + - r-zip=2.3.0 - readline=8.2 - sed=4.8 - - sysroot_linux-64=2.28 + - sysroot_linux-64=2.12 - tk=8.6.13 - tktable=2.10 - xorg-kbproto=1.0.7 @@ -192,6 +193,6 @@ dependencies: - xorg-renderproto=0.11.1 - xorg-xextproto=7.3.0 - xorg-xproto=7.0.31 - - xz=5.2.6 + - xz=5.4.6 - zlib=1.2.13 - zstd=1.5.5 From da3f546b41640659691c0811ad37d3d3116cf91e Mon Sep 17 00:00:00 2001 From: Kdreval Date: Wed, 3 Jul 2024 08:02:39 -0700 Subject: [PATCH 04/21] add vanilla --- .github/workflows/build_check.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_check.yaml b/.github/workflows/build_check.yaml index c6f4395..683e225 100644 --- a/.github/workflows/build_check.yaml +++ b/.github/workflows/build_check.yaml @@ -35,11 +35,11 @@ jobs: - name: Build package run: - Rscript -e "devtools::install()" + Rscript --vanilla -e "devtools::install()" - name: Check package run: - Rscript -e "devtools::check(vignettes = FALSE, args = '--no-examples')" + Rscript --vanilla -e "devtools::check(vignettes = FALSE, args = '--no-examples')" - name: Upload check results if: failure() From 7c6d656aca1d4af4ca37bf4e2e490342eade98c4 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Mon, 8 Jul 2024 18:07:57 -0700 Subject: [PATCH 05/21] bug fix: testing working env on actions --- envs/r.yaml | 226 +++++++++++++++++++++++++++++----------------------- 1 file changed, 127 insertions(+), 99 deletions(-) diff --git a/envs/r.yaml b/envs/r.yaml index c6af660..9836904 100644 --- a/envs/r.yaml +++ b/envs/r.yaml @@ -1,190 +1,218 @@ name: r channels: - - r - conda-forge - defaults dependencies: - _libgcc_mutex=0.1 - _openmp_mutex=4.5 - - _r-mutex=1.0.0 - - binutils_impl_linux-64=2.38 - - bwidget=1.9.16 + - _r-mutex=1.0.1 + - binutils_impl_linux-64=2.40 + - binutils_linux-64=2.40 + - bwidget=1.9.14 - bzip2=1.0.8 - - c-ares=1.19.1 - - ca-certificates=2024.3.11 - - cairo=1.18.0 - - curl=8.5.0 - - expat=2.5.0 + - c-ares=1.28.1 + - ca-certificates=2024.7.4 + - cairo=1.16.0 + - cmake=3.30.0 + - curl=8.8.0 + - expat=2.6.2 - font-ttf-dejavu-sans-mono=2.37 - - font-ttf-inconsolata=2.001 - - font-ttf-source-code-pro=2.030 + - font-ttf-inconsolata=3.000 + - font-ttf-source-code-pro=2.038 - font-ttf-ubuntu=0.83 - fontconfig=2.14.2 - - fonts-anaconda=1 - fonts-conda-ecosystem=1 - fonts-conda-forge=1 - freetype=2.12.1 - fribidi=1.0.10 - - gcc_impl_linux-64=11.2.0 - - gettext=0.21.0 - - gfortran_impl_linux-64=11.2.0 - - graphite2=1.3.14 - - gxx_impl_linux-64=11.2.0 - - harfbuzz=8.3.0 - - icu=73.2 + - gcc_impl_linux-64=13.2.0 + - gcc_linux-64=13.2.0 + - gettext=0.22.5 + - gettext-tools=0.22.5 + - gfortran_impl_linux-64=13.2.0 + - graphite2=1.3.13 + - gsl=2.7 + - gxx_impl_linux-64=13.2.0 + - harfbuzz=7.3.0 + - icu=72.1 - kernel-headers_linux-64=2.6.32 - keyutils=1.6.1 - - krb5=1.20.1 - - ld_impl_linux-64=2.38 + - krb5=1.21.3 + - ld_impl_linux-64=2.40 - lerc=4.0.0 + - libasprintf=0.22.5 + - libasprintf-devel=0.22.5 - libblas=3.9.0 - - libcurl=8.5.0 - - libdeflate=1.19 - - libedit=3.1.20230828 + - libcblas=3.9.0 + - libcurl=8.8.0 + - libdeflate=1.18 + - libedit=3.1.20191231 - libev=4.33 - - libexpat=2.5.0 - - libffi=3.4.4 - - libgcc-devel_linux-64=11.2.0 - - libgcc-ng=13.2.0 - - libgfortran-ng=11.2.0 - - libgfortran5=11.2.0 - - libgit2=1.6.4 - - libglib=2.78.4 - - libgomp=13.2.0 + - libexpat=2.6.2 + - libffi=3.4.2 + - libgcc-devel_linux-64=13.2.0 + - libgcc-ng=14.1.0 + - libgettextpo=0.22.5 + - libgettextpo-devel=0.22.5 + - libgfortran-ng=14.1.0 + - libgfortran5=14.1.0 + - libgit2=1.5.1 + - libglib=2.78.1 + - libgomp=14.1.0 - libiconv=1.17 - - libjpeg-turbo=3.0.0 + - libjpeg-turbo=2.1.5.1 - liblapack=3.9.0 - - libnghttp2=1.57.0 - - libopenblas=0.3.21 + - libnghttp2=1.58.0 + - libnsl=2.0.1 + - libopenblas=0.3.27 - libpng=1.6.43 - - libssh2=1.10.0 - - libstdcxx-devel_linux-64=11.2.0 - - libstdcxx-ng=13.2.0 - - libtiff=4.6.0 + - libsanitizer=13.2.0 + - libsqlite=3.46.0 + - libssh2=1.11.0 + - libstdcxx-devel_linux-64=13.2.0 + - libstdcxx-ng=14.1.0 + - libtiff=4.5.1 - libuuid=2.38.1 - - libwebp-base=1.3.2 + - libuv=1.48.0 + - libwebp-base=1.4.0 - libxcb=1.15 - - libxml2=2.10.4 - - libzlib=1.2.13 - - lz4-c=1.9.4 - - make=4.2.1 - - ncurses=6.4.20240210 - - openssl=3.0.13 - - pandoc=2.12 - - pango=1.52.0 - - pcre2=10.42 + - libxcrypt=4.4.36 + - libxml2=2.11.5 + - libzlib=1.3.1 + - make=4.3 + - mysql-common=8.3.0 + - mysql-connector-c=6.1.11 + - mysql-libs=8.3.0 + - mysqlclient=2.2.4 + - ncurses=6.5 + - openssl=3.3.1 + - pandoc=2.19.2 + - pango=1.50.14 + - pcre2=10.40 + - pip=24.0 - pixman=0.43.2 - - pthread-stubs=0.3 - - r-askpass=1.2.0 + - pthread-stubs=0.4 + - python=3.12.4 + - python_abi=3.12 + - r-askpass=1.1 - r-assertthat=0.2.1 - - r-base=4.3.3 + - r-backports=1.4.1 + - r-base=4.1.3 - r-base64enc=0.1_3 + - r-biocmanager=1.30.21 - r-brew=1.0_8 - r-brio=1.1.3 - - r-bslib=0.5.1 + - r-bslib=0.5.0 - r-cachem=1.0.8 - r-callr=3.7.3 - r-cli=3.6.1 - r-clipr=0.8.0 - r-commonmark=1.9.0 - - r-cpp11=0.4.6 + - r-cpp11=0.4.7 - r-crayon=1.5.2 - - r-credentials=2.0.1 - - r-curl=5.1.0 + - r-credentials=1.3.2 + - r-curl=4.3.3 + - r-dbi=1.1.3 - r-desc=1.4.2 - r-devtools=2.4.5 - r-diffobj=0.3.5 - - r-digest=0.6.33 - - r-downlit=0.4.3 + - r-digest=0.6.31 + - r-downlit=0.4.2 - r-ellipsis=0.3.2 - - r-evaluate=0.22 - - r-fansi=1.0.5 + - r-evaluate=0.21 + - r-fansi=1.0.4 - r-fastmap=1.1.1 - - r-fontawesome=0.5.2 - - r-fs=1.6.3 - - r-gert=2.0.0 + - r-fontawesome=0.5.1 + - r-fs=1.6.2 + - r-gert=1.9.2 - r-gh=1.4.0 + - r-git2r=0.31.0 - r-gitcreds=0.1.2 - r-glue=1.6.2 - r-highr=0.10 - - r-htmltools=0.5.6.1 + - r-htmltools=0.5.5 - r-htmlwidgets=1.6.2 - r-httpuv=1.6.11 - - r-httr=1.4.7 + - r-httr=1.4.6 - r-httr2=0.2.3 - r-ini=0.3.1 - r-jquerylib=0.1.4 - - r-jsonlite=1.8.7 - - r-knitr=1.44 + - r-jsonlite=1.8.5 + - r-knitr=1.43 - r-later=1.3.1 - - r-lattice=0.22_5 + - r-lattice=0.21_8 - r-lifecycle=1.0.3 - r-magrittr=2.0.3 - - r-mass=7.3_60 - - r-matrix=1.6_1.1 + - r-mass=7.3_58.3 + - r-matrix=1.5_4.1 - r-memoise=2.0.1 - r-mime=0.12 - r-miniui=0.1.1.1 - - r-openssl=2.1.1 + - r-openssl=2.0.6 - r-pillar=1.9.0 - - r-pkgbuild=1.4.2 + - r-pkgbuild=1.4.0 - r-pkgconfig=2.0.3 - r-pkgdown=2.0.7 - - r-pkgload=1.3.3 + - r-pkgload=1.3.2 - r-praise=1.0.0 - - r-prettyunits=1.2.0 - - r-processx=3.8.2 + - r-prettyunits=1.1.1 + - r-processx=3.8.1 - r-profvis=0.3.8 - - r-promises=1.2.1 + - r-promises=1.2.0.1 - r-ps=1.7.5 - - r-purrr=1.0.2 + - r-purrr=1.0.1 - r-r6=2.5.1 - - r-ragg=1.2.6 + - r-ragg=1.2.5 - r-rappdirs=0.3.3 - r-rcmdcheck=1.4.0 - - r-rcpp=1.0.11 + - r-rcpp=1.0.10 - r-rematch2=2.1.2 - - r-remotes=2.4.2.1 + - r-remotes=2.4.2 - r-rlang=1.1.1 - - r-rmarkdown=2.25 + - r-rmarkdown=2.22 + - r-rmysql=0.10.25 - r-roxygen2=7.2.3 - r-rprojroot=2.0.3 - - r-rstudioapi=0.15.0 + - r-rstudioapi=0.14 - r-rversions=2.1.2 - - r-sass=0.4.7 + - r-sass=0.4.6 - r-sessioninfo=1.2.2 - - r-shiny=1.7.5.1 + - r-shiny=1.7.4 - r-sourcetools=0.1.7_1 - r-stringi=1.7.12 - r-stringr=1.5.0 - r-sys=3.4.2 - - r-systemfonts=1.0.5 - - r-testthat=3.2.0 - - r-textshaping=0.3.7 + - r-systemfonts=1.0.4 + - r-testthat=3.1.8 + - r-textshaping=0.3.6 - r-tibble=3.2.1 - - r-tinytex=0.48 + - r-tinytex=0.45 - r-urlchecker=1.0.1 - - r-usethis=2.2.2 - - r-utf8=1.2.4 - - r-vctrs=0.6.4 + - r-usethis=2.2.0 + - r-utf8=1.2.3 + - r-vctrs=0.6.2 - r-waldo=0.5.1 - r-whisker=0.4.1 - - r-withr=2.5.1 - - r-xfun=0.40 - - r-xml2=1.3.5 + - r-withr=2.5.0 + - r-xfun=0.39 + - r-xml2=1.3.4 - r-xopen=1.0.0 - r-xtable=1.8_4 - r-yaml=2.3.7 - r-zip=2.3.0 - readline=8.2 + - rhash=1.4.4 - sed=4.8 + - setuptools=70.1.1 - sysroot_linux-64=2.12 - tk=8.6.13 - tktable=2.10 + - tzdata=2024a + - wheel=0.43.0 - xorg-kbproto=1.0.7 - xorg-libice=1.1.1 - xorg-libsm=1.2.4 - - xorg-libx11=1.8.7 + - xorg-libx11=1.8.9 - xorg-libxau=1.0.11 - xorg-libxdmcp=1.1.3 - xorg-libxext=1.3.4 @@ -193,6 +221,6 @@ dependencies: - xorg-renderproto=0.11.1 - xorg-xextproto=7.3.0 - xorg-xproto=7.0.31 - - xz=5.4.6 - - zlib=1.2.13 - - zstd=1.5.5 + - xz=5.2.6 + - zlib=1.3.1 + - zstd=1.5.6 From 1740b09886e3c98449246c8141592182a1bf3e55 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Mon, 8 Jul 2024 18:15:54 -0700 Subject: [PATCH 06/21] bug fix: downgrade python v --- envs/r.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/envs/r.yaml b/envs/r.yaml index 9836904..195aee7 100644 --- a/envs/r.yaml +++ b/envs/r.yaml @@ -92,8 +92,8 @@ dependencies: - pip=24.0 - pixman=0.43.2 - pthread-stubs=0.4 - - python=3.12.4 - - python_abi=3.12 + - python=3.10.14 + - python_abi=3.10 - r-askpass=1.1 - r-assertthat=0.2.1 - r-backports=1.4.1 From 49c73ed1b712b7fb784ba486d359b80e476cb142 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Mon, 8 Jul 2024 18:18:53 -0700 Subject: [PATCH 07/21] bug fix: more python v downgrading --- envs/r.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/envs/r.yaml b/envs/r.yaml index 195aee7..74e56e8 100644 --- a/envs/r.yaml +++ b/envs/r.yaml @@ -1,4 +1,4 @@ -name: r +name: deps channels: - conda-forge - defaults @@ -92,8 +92,8 @@ dependencies: - pip=24.0 - pixman=0.43.2 - pthread-stubs=0.4 - - python=3.10.14 - - python_abi=3.10 + - python=3.9.10 + - python_abi=3.9 - r-askpass=1.1 - r-assertthat=0.2.1 - r-backports=1.4.1 From faabd32048bd60ee582dcbe2da80deb61efe3cd1 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Mon, 8 Jul 2024 18:20:26 -0700 Subject: [PATCH 08/21] bug fix: specify python v differently --- envs/r.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/envs/r.yaml b/envs/r.yaml index 74e56e8..4661ba2 100644 --- a/envs/r.yaml +++ b/envs/r.yaml @@ -92,7 +92,7 @@ dependencies: - pip=24.0 - pixman=0.43.2 - pthread-stubs=0.4 - - python=3.9.10 + - python=3.9 - python_abi=3.9 - r-askpass=1.1 - r-assertthat=0.2.1 From 07edf0199bf337ebe7e2ae54b81b48ca7c31d54c Mon Sep 17 00:00:00 2001 From: Kdreval Date: Mon, 8 Jul 2024 18:28:20 -0700 Subject: [PATCH 09/21] bug fix: drop python --- envs/r.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/envs/r.yaml b/envs/r.yaml index 4661ba2..9eb1874 100644 --- a/envs/r.yaml +++ b/envs/r.yaml @@ -92,8 +92,6 @@ dependencies: - pip=24.0 - pixman=0.43.2 - pthread-stubs=0.4 - - python=3.9 - - python_abi=3.9 - r-askpass=1.1 - r-assertthat=0.2.1 - r-backports=1.4.1 From 845052e9c9c65667201ae139185c335dbae4ca21 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Tue, 9 Jul 2024 10:30:48 -0700 Subject: [PATCH 10/21] cleanup: address relocated function reference --- R/calculate_tmb.R | 4 ++-- R/fuzzy_match_mafs.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/calculate_tmb.R b/R/calculate_tmb.R index ebda2ed..3f6e10f 100644 --- a/R/calculate_tmb.R +++ b/R/calculate_tmb.R @@ -37,7 +37,7 @@ #' subset_to_nonSyn = FALSE #' ) #' -#' @import dplyr +#' @import dplyr GAMBLR.data #' @export #' calculate_tmb <- function( @@ -105,7 +105,7 @@ calculate_tmb <- function( # Expect bed format but be flexible about column names columns <- colnames(regions_bed)[1:3] - overlap <- GAMBLR.helpers::cool_overlaps( + overlap <- GAMBLR.data::cool_overlaps( data1 = maf_data, data2 = regions_bed, columns2 = columns diff --git a/R/fuzzy_match_mafs.R b/R/fuzzy_match_mafs.R index f4484a5..0c64e23 100644 --- a/R/fuzzy_match_mafs.R +++ b/R/fuzzy_match_mafs.R @@ -25,7 +25,7 @@ #' #' @return data frame #' -#' @import dplyr tidyr tibble +#' @import dplyr tidyr tibble GAMBLR.data #' @export #' #' @examples @@ -107,7 +107,7 @@ fuzzy_match_mafs <- function( "Start_Position", "End_Position" ) - matched <- cool_overlaps( + matched <- GAMBLR.data::cool_overlaps( data1 = maf1, data2 = maf2, columns1 = columns_to_overlap, From d112d869f1bd18a88c290e487f651dbeb5695e77 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Fri, 22 Nov 2024 10:22:32 -0800 Subject: [PATCH 11/21] cleanup: drop stringr dependency --- DESCRIPTION | 1 - NAMESPACE | 5 ----- R/GAMBLR.helpers-package.R | 4 ---- R/get_gambl_colours.R | 4 ++-- R/get_template_wildcards.R | 2 +- R/grob_wildcards.R | 4 ++-- 6 files changed, 5 insertions(+), 15 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c69cdad..5af1546 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,7 +17,6 @@ Imports: ggthemes, philentropy, readr, - stringr, tibble, tidyr, workflowr diff --git a/NAMESPACE b/NAMESPACE index d9aa630..9433009 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -35,7 +35,6 @@ import(GAMBLR.data) import(dplyr) import(ggplot2) import(readr) -import(stringr) import(tibble) import(tidyr) import(workflowr) @@ -45,10 +44,6 @@ importFrom(philentropy,KL) importFrom(stats,end) importFrom(stats,quantile) importFrom(stats,start) -importFrom(stringr,str_c) -importFrom(stringr,str_extract) -importFrom(stringr,str_remove) -importFrom(stringr,str_remove_all) importFrom(tidyr,unnest_auto) importFrom(utils,head) importFrom(utils,read.csv) diff --git a/R/GAMBLR.helpers-package.R b/R/GAMBLR.helpers-package.R index 0af4d14..2c32605 100644 --- a/R/GAMBLR.helpers-package.R +++ b/R/GAMBLR.helpers-package.R @@ -7,10 +7,6 @@ #' @importFrom stats end #' @importFrom stats quantile #' @importFrom stats start -#' @importFrom stringr str_c -#' @importFrom stringr str_extract -#' @importFrom stringr str_remove -#' @importFrom stringr str_remove_all #' @importFrom tidyr unnest_auto #' @importFrom utils head #' @importFrom utils read.csv diff --git a/R/get_gambl_colours.R b/R/get_gambl_colours.R index dab03b6..c7397ee 100644 --- a/R/get_gambl_colours.R +++ b/R/get_gambl_colours.R @@ -18,7 +18,7 @@ #' #' @return A named vector of colour codes for lymphgen classes and pathology. #' -#' @import dplyr stringr tidyr +#' @import dplyr tidyr #' @export #' #' @examples @@ -318,7 +318,7 @@ get_gambl_colours = function(classification = "all", everything = c(everything, all_colours[[this_group]]) } #return matching value from lowercase version of the argument if it exists - lc_class = stringr::str_to_lower(classification) + lc_class = tolower(classification) if(return_available){ return(names(all_colours)) } diff --git a/R/get_template_wildcards.R b/R/get_template_wildcards.R index f50d6fd..cc3b46e 100644 --- a/R/get_template_wildcards.R +++ b/R/get_template_wildcards.R @@ -16,6 +16,6 @@ get_template_wildcards = function(parent_key, }else{ wildcard_string = config::get(paste0(parent_key,"_wildcards"))[template_key] } - wildcards = stringr::str_split(wildcard_string,",") + wildcards = strsplit(wildcard_string,",") return(unlist(wildcards)) } diff --git a/R/grob_wildcards.R b/R/grob_wildcards.R index ab77166..ffa12e1 100644 --- a/R/grob_wildcards.R +++ b/R/grob_wildcards.R @@ -8,7 +8,7 @@ #' #' @export grob_wildcards = function(wildcarded_string){ - wildcards = unlist(stringr::str_extract_all(wildcarded_string,"\\{[^\\{]+\\}")) - wildcards = stringr::str_remove_all(wildcards,"\\{") %>% stringr::str_remove_all(.,"\\}") + wildcards = unlist(regmatches(wildcarded_string, gregexpr("\\{[^\\{]+\\}", wildcarded_string))) + wildcards = gsub("\\{", "", wildcards) %>% gsub("\\}", "", .) return(wildcards) } From 2f3ecbc6bc7f89dea46a252e27dc3298a32d50ed Mon Sep 17 00:00:00 2001 From: Kdreval Date: Fri, 22 Nov 2024 10:57:12 -0800 Subject: [PATCH 12/21] cleanup: drop philentropy dependency --- NAMESPACE | 2 +- R/GAMBLR.helpers-package.R | 1 - R/compare_coding_mutation_pattern.R | 12 +++++++--- R/kl_divergence.R | 36 ++++++++++++++++++++++++++++ man/kl_divergence.Rd | 37 +++++++++++++++++++++++++++++ 5 files changed, 83 insertions(+), 5 deletions(-) create mode 100644 R/kl_divergence.R create mode 100644 man/kl_divergence.Rd diff --git a/NAMESPACE b/NAMESPACE index 9433009..2119e85 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,6 +19,7 @@ export(get_template_wildcards) export(get_unmatched_normals) export(grob_wildcards) export(handle_metadata) +export(kl_divergence) export(maf_header) export(normalize_expression_data) export(rainfall_conv) @@ -40,7 +41,6 @@ import(tidyr) import(workflowr) importFrom(dplyr,left_join) importFrom(ggthemes,theme_foundation) -importFrom(philentropy,KL) importFrom(stats,end) importFrom(stats,quantile) importFrom(stats,start) diff --git a/R/GAMBLR.helpers-package.R b/R/GAMBLR.helpers-package.R index 2c32605..1dd1b12 100644 --- a/R/GAMBLR.helpers-package.R +++ b/R/GAMBLR.helpers-package.R @@ -3,7 +3,6 @@ ## usethis namespace: start #' @importFrom dplyr left_join -#' @importFrom philentropy KL #' @importFrom stats end #' @importFrom stats quantile #' @importFrom stats start diff --git a/R/compare_coding_mutation_pattern.R b/R/compare_coding_mutation_pattern.R index 281f090..20a7772 100644 --- a/R/compare_coding_mutation_pattern.R +++ b/R/compare_coding_mutation_pattern.R @@ -17,9 +17,9 @@ compare_coding_mutation_pattern = function(maf_df1,maf_df2,gene){ stop("Must provide the Hugo_Symbol of a single gene that is present in both maf files") } missense_positions1 = dplyr::filter(maf_df1,Hugo_Symbol==gene,!Variant_Classification %in% c("Silent","Splice_Site","Splice_Region"),Variant_Type=="SNP") %>% - pull(HGVSp_Short) %>% str_remove_all("p.\\w") %>% str_extract("\\d+") %>% as.numeric() + pull(HGVSp_Short) %>% gsub("p\\.\\w", "", .) %>% regmatches(., regexpr("\\d+", .)) %>% as.numeric() missense_positions2 = dplyr::filter(maf_df2,Hugo_Symbol==gene,!Variant_Classification %in% c("Silent","Splice_Site","Splice_Region"),Variant_Type=="SNP") %>% - pull(HGVSp_Short) %>% str_remove_all("p.\\w") %>% str_extract("\\d+") %>% as.numeric() + pull(HGVSp_Short) %>% gsub("p\\.\\w", "", .) %>% regmatches(., regexpr("\\d+", .)) %>% as.numeric() if(length(missense_positions1)==0 | length(missense_positions2)==0 ){ message(paste("no mutations for",gene,"in one or both data sets")) return(list(kl=15)) @@ -35,6 +35,12 @@ compare_coding_mutation_pattern = function(maf_df1,maf_df2,gene){ all_counts = dplyr::select(full_df,-position) %>% t() all_counts[1,]=all_counts[1,]/sum(all_counts[1,]) all_counts[2,]=all_counts[2,]/sum(all_counts[2,]) - kl_out = KL(all_counts) + + # Normalize the rows to turn counts into probabilities + P <- all_counts[1, ] / sum(all_counts[1, ]) + Q <- all_counts[2, ] / sum(all_counts[2, ]) + + kl_out <- kl_divergence(P, Q) + return(list(df=full_df,kl=unname(kl_out))) } diff --git a/R/kl_divergence.R b/R/kl_divergence.R new file mode 100644 index 0000000..2fb5291 --- /dev/null +++ b/R/kl_divergence.R @@ -0,0 +1,36 @@ +#' Calculate Kullback-Leibler Divergence +#' +#' This function computes the Kullback-Leibler (KL) divergence between two +#' probability distributions, with an optional small constant (epsilon) +#' added to avoid zero probabilities, which would otherwise cause division +#' by zero or undefined logarithms. +#' +#' @param P A numeric vector representing the first probability distribution. +#' The sum of "P" should be 1, but the function will normalize it if +#' necessary. +#' @param Q A numeric vector representing the second probability distribution. +#' The sum of "Q" should be 1, but the function will normalize it if +#' necessary. +#' @param epsilon A small positive number (default = 1e-7) to be added to each +#' probability in P and Q to avoid zero probabilities. This helps to +#' prevent division by zero or log(0). +#' +#' @return float +#' +#' @examples +#' P <- c(0.1, 0.4, 0.3, 0.2) +#' Q <- c(0.2, 0.3, 0.4, 0.1) +#' +#' kl_divergence(P, Q) +#' +#' @export +kl_divergence <- function(P, Q, epsilon = 1e-7) { + P <- P + epsilon + Q <- Q + epsilon + + P <- P / sum(P) + Q <- Q / sum(Q) + + # KL divergence formula: sum(P * log(P / Q)) + return(sum(P * log(P / Q), na.rm = TRUE)) +} diff --git a/man/kl_divergence.Rd b/man/kl_divergence.Rd new file mode 100644 index 0000000..5652dbe --- /dev/null +++ b/man/kl_divergence.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kl_divergence.R +\name{kl_divergence} +\alias{kl_divergence} +\title{Calculate Kullback-Leibler Divergence} +\usage{ +kl_divergence(P, Q, epsilon = 1e-07) +} +\arguments{ +\item{P}{A numeric vector representing the first probability distribution. +The sum of "P" should be 1, but the function will normalize it if +necessary.} + +\item{Q}{A numeric vector representing the second probability distribution. +The sum of "Q" should be 1, but the function will normalize it if +necessary.} + +\item{epsilon}{A small positive number (default = 1e-7) to be added to each +probability in P and Q to avoid zero probabilities. This helps to +prevent division by zero or log(0).} +} +\value{ +float +} +\description{ +This function computes the Kullback-Leibler (KL) divergence between two +probability distributions, with an optional small constant (epsilon) +added to avoid zero probabilities, which would otherwise cause division +by zero or undefined logarithms. +} +\examples{ +P <- c(0.1, 0.4, 0.3, 0.2) +Q <- c(0.2, 0.3, 0.4, 0.1) + +kl_divergence(P, Q) + +} From 0050e497422ea4574c9b69a96265e90fa98e4440 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Fri, 22 Nov 2024 11:15:14 -0800 Subject: [PATCH 13/21] clenaup: switch to consistent import definition --- DESCRIPTION | 1 - NAMESPACE | 11 ----------- R/GAMBLR.helpers-package.R | 17 ----------------- R/compare_coding_mutation_pattern.R | 1 + R/gene_mutation_tally.R | 1 + R/sanity_check_metadata.R | 2 +- man/GAMBLR.helpers-package.Rd | 15 --------------- 7 files changed, 3 insertions(+), 45 deletions(-) delete mode 100644 R/GAMBLR.helpers-package.R delete mode 100644 man/GAMBLR.helpers-package.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 5af1546..e33c01b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,7 +15,6 @@ Imports: GAMBLR.data, ggplot2, ggthemes, - philentropy, readr, tibble, tidyr, diff --git a/NAMESPACE b/NAMESPACE index 2119e85..ad6b74c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -39,15 +39,4 @@ import(readr) import(tibble) import(tidyr) import(workflowr) -importFrom(dplyr,left_join) importFrom(ggthemes,theme_foundation) -importFrom(stats,end) -importFrom(stats,quantile) -importFrom(stats,start) -importFrom(tidyr,unnest_auto) -importFrom(utils,head) -importFrom(utils,read.csv) -importFrom(utils,read.socket) -importFrom(utils,tail) -importFrom(utils,write.socket) -importFrom(utils,write.table) diff --git a/R/GAMBLR.helpers-package.R b/R/GAMBLR.helpers-package.R deleted file mode 100644 index 1dd1b12..0000000 --- a/R/GAMBLR.helpers-package.R +++ /dev/null @@ -1,17 +0,0 @@ -#' @keywords internal -"_PACKAGE" - -## usethis namespace: start -#' @importFrom dplyr left_join -#' @importFrom stats end -#' @importFrom stats quantile -#' @importFrom stats start -#' @importFrom tidyr unnest_auto -#' @importFrom utils head -#' @importFrom utils read.csv -#' @importFrom utils read.socket -#' @importFrom utils tail -#' @importFrom utils write.socket -#' @importFrom utils write.table -## usethis namespace: end -NULL diff --git a/R/compare_coding_mutation_pattern.R b/R/compare_coding_mutation_pattern.R index 20a7772..2ea0d72 100644 --- a/R/compare_coding_mutation_pattern.R +++ b/R/compare_coding_mutation_pattern.R @@ -8,6 +8,7 @@ #' #' @return list #' +#' @import dplyr #' @export compare_coding_mutation_pattern = function(maf_df1,maf_df2,gene){ if(missing(maf_df1) | missing(maf_df2)){ diff --git a/R/gene_mutation_tally.R b/R/gene_mutation_tally.R index 10f0964..32a3aec 100644 --- a/R/gene_mutation_tally.R +++ b/R/gene_mutation_tally.R @@ -17,6 +17,7 @@ #' #' @return data frame #' +#' @import dplyr #' @export gene_mutation_tally = function(maf_df,these_samples_metadata,these_genes,grouping_variable="cohort"){ meta = dplyr::select(these_samples_metadata,sample_id,{{grouping_variable}}) diff --git a/R/sanity_check_metadata.R b/R/sanity_check_metadata.R index 0faa1f4..8a1b461 100644 --- a/R/sanity_check_metadata.R +++ b/R/sanity_check_metadata.R @@ -6,7 +6,7 @@ #' #' @return A table. #' -#' @import tibble readr dplyr +#' @import tibble readr dplyr tidyr #' #' #' @examples diff --git a/man/GAMBLR.helpers-package.Rd b/man/GAMBLR.helpers-package.Rd deleted file mode 100644 index 90c6164..0000000 --- a/man/GAMBLR.helpers-package.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/GAMBLR.helpers-package.R -\docType{package} -\name{GAMBLR.helpers-package} -\alias{GAMBLR.helpers} -\alias{GAMBLR.helpers-package} -\title{GAMBLR.helpers: Provide helper functions for GAMBLR-based packages} -\description{ -This package is part of the Genomic Analysis of Mature B-cell Lymphomas (GAMBL) project developed by the Morin Lab. -} -\author{ -\strong{Maintainer}: Vladimir Souza \email{vsouza@bcgsc.ca} - -} -\keyword{internal} From 2dda8459598618453105a854614187e7874cbb3d Mon Sep 17 00:00:00 2001 From: Kdreval Date: Fri, 22 Nov 2024 12:31:22 -0800 Subject: [PATCH 14/21] cleanup: remove workflowr --- DESCRIPTION | 3 +-- NAMESPACE | 2 -- R/web_initialize_gambl_site.R | 22 ---------------------- man/web_initialize_gambl_site.Rd | 25 ------------------------- 4 files changed, 1 insertion(+), 51 deletions(-) delete mode 100644 R/web_initialize_gambl_site.R delete mode 100644 man/web_initialize_gambl_site.Rd diff --git a/DESCRIPTION b/DESCRIPTION index e33c01b..ac813c6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,8 +17,7 @@ Imports: ggthemes, readr, tibble, - tidyr, - workflowr + tidyr Remotes: morinlab/GAMBLR.data Encoding: UTF-8 diff --git a/NAMESPACE b/NAMESPACE index ad6b74c..6b5fe8a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -31,12 +31,10 @@ export(subset_cnstates) export(theme_Morons) export(trim_scale_expression) export(vc_nonSynonymous) -export(web_initialize_gambl_site) import(GAMBLR.data) import(dplyr) import(ggplot2) import(readr) import(tibble) import(tidyr) -import(workflowr) importFrom(ggthemes,theme_foundation) diff --git a/R/web_initialize_gambl_site.R b/R/web_initialize_gambl_site.R deleted file mode 100644 index a781ee4..0000000 --- a/R/web_initialize_gambl_site.R +++ /dev/null @@ -1,22 +0,0 @@ -#' @title Web Initialize GAMBL Site. -#' -#' @description Set up a fresh instance of a website to host on gitlab. -#' -#' @param site_base_name Base name for site. -#' @param base_directory Path to base directory. -#' @param my_name My name. -#' @param my_gitlab_email The email used for gitlab. -#' -#' @import workflowr -#' -#' @export -web_initialize_gambl_site = function(site_base_name, - base_directory = "/home/rmorin/", - my_name = "Ryan Morin", - my_gitlab_email = "rdmorin@sfu.ca"){ - - wflow_git_config(user.name = my_name, user.email = my_gitlab_email) - setwd(base_directory) - wflow_start(site_base_name) - wflow_build() -} diff --git a/man/web_initialize_gambl_site.Rd b/man/web_initialize_gambl_site.Rd deleted file mode 100644 index 66a8b9b..0000000 --- a/man/web_initialize_gambl_site.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/web_initialize_gambl_site.R -\name{web_initialize_gambl_site} -\alias{web_initialize_gambl_site} -\title{Web Initialize GAMBL Site.} -\usage{ -web_initialize_gambl_site( - site_base_name, - base_directory = "/home/rmorin/", - my_name = "Ryan Morin", - my_gitlab_email = "rdmorin@sfu.ca" -) -} -\arguments{ -\item{site_base_name}{Base name for site.} - -\item{base_directory}{Path to base directory.} - -\item{my_name}{My name.} - -\item{my_gitlab_email}{The email used for gitlab.} -} -\description{ -Set up a fresh instance of a website to host on gitlab. -} From bff1bbabceb36e3a4ab81a8445f81413dbe655e4 Mon Sep 17 00:00:00 2001 From: lkhilton Date: Tue, 3 Dec 2024 17:55:44 -0800 Subject: [PATCH 15/21] Assume input matrix is already subset to the correct regions --- R/create_onco_matrix.R | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/R/create_onco_matrix.R b/R/create_onco_matrix.R index 6d6b46e..4bedce8 100644 --- a/R/create_onco_matrix.R +++ b/R/create_onco_matrix.R @@ -38,10 +38,6 @@ create_onco_matrix = function( ) } - onco_matrix_coding <- coding_class[ - !coding_class %in% c("Silent", "Splice_Region", "Targeted_Region") - ] - onco_matrix <- maf_df %>% dplyr::distinct( @@ -52,10 +48,7 @@ create_onco_matrix = function( dplyr::select( Tumor_Sample_Barcode, Hugo_Symbol, Variant_Classification ) %>% - dplyr::filter( - Variant_Classification %in% onco_matrix_coding - ) %>% - dplyr::group_by( + dplyr::group_by( Hugo_Symbol, Tumor_Sample_Barcode ) %>% dplyr::mutate( From 69d95f0666288a626e266c30577e029f1ab9e718 Mon Sep 17 00:00:00 2001 From: lkhilton Date: Tue, 3 Dec 2024 17:59:41 -0800 Subject: [PATCH 16/21] Fix redundant colours --- R/get_gambl_colours.R | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/R/get_gambl_colours.R b/R/get_gambl_colours.R index c7397ee..fa47b18 100644 --- a/R/get_gambl_colours.R +++ b/R/get_gambl_colours.R @@ -35,20 +35,22 @@ get_gambl_colours = function(classification = "all", all_colours = list() everything = c() - blood_cols = c( Red ="#c41230", - Blue ="#115284", - Green = "#39b54b", - Purple = "#5c266c", - Orange = "#fe9003", - Green ="#046852", + blood_cols <- c( + Red = "#c41230", + Blue = "#115284", + "Light Green" = "#39b54b", + Purple = "#5c266c", + Orange = "#fe9003", + Green = "#046852", Lavendar = "#8781bd", - "Steel Blue" = "#455564", + "Steel Blue" = "#455564", "Light Blue" = "#2cace3", Magenta = "#e90c8b", LimeGreen = "#a4bb87", Brown = "#5f3a17", Gray = "#bdbdc1", - Yellow = "#f9bd1f" ) + Yellow = "#f9bd1f" + ) all_colours[["seq_type"]] = c("mrna" = "#E41A1C", "genome" = "#377EB8", From f973d4ec2fe47138db4809d33c6a300da7f23f35 Mon Sep 17 00:00:00 2001 From: lkhilton Date: Tue, 3 Dec 2024 18:00:41 -0800 Subject: [PATCH 17/21] Create colour for 5'UTR --- R/get_gambl_colours.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/R/get_gambl_colours.R b/R/get_gambl_colours.R index fa47b18..d0aa628 100644 --- a/R/get_gambl_colours.R +++ b/R/get_gambl_colours.R @@ -122,11 +122,11 @@ get_gambl_colours = function(classification = "all", "COMPOSITE" = "#ACADAF") - all_colours[["mutation"]]= + all_colours[["mutation"]] <- c( - "Nonsense_Mutation"="#D8A7CA", - "Missense_Mutation"=unname(blood_cols["Green"]), - "Multi_Hit"=unname(blood_cols["Steel Blue"]), + "Nonsense_Mutation" = unname(blood_cols["Red"]), + "Missense_Mutation" = unname(blood_cols["Light Green"]), + "Multi_Hit" = unname(blood_cols["Steel Blue"]), "Frame_Shift_Ins" = unname(blood_cols["Magenta"]), "Frame_Shift_Del" = unname(blood_cols["Magenta"]), "In_Frame_Ins" = unname(blood_cols["Brown"]), @@ -136,7 +136,9 @@ get_gambl_colours = function(classification = "all", "Splice_Site" = unname(blood_cols["Orange"]), "Splice_Region" = unname(blood_cols["Orange"]), "3'UTR" = unname(blood_cols["Yellow"]), - "Silent" = "#A020F0") + "5'UTR" = unname(blood_cols["LimeGreen"]), + "Silent" = "#D8A7CA" + ) all_colours[["rainfall"]] = c( From c71efb2bd3e85a6118a2e3097e8ef33cb2332700 Mon Sep 17 00:00:00 2001 From: lkhilton Date: Tue, 3 Dec 2024 18:01:40 -0800 Subject: [PATCH 18/21] Fix redundant colour --- R/get_gambl_colours.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/get_gambl_colours.R b/R/get_gambl_colours.R index d0aa628..b72add0 100644 --- a/R/get_gambl_colours.R +++ b/R/get_gambl_colours.R @@ -183,10 +183,10 @@ get_gambl_colours = function(classification = "all", "1"="#92C5DE", "0"="#4393C3" ) - all_colours[["blood"]] = c( - "Red" = "#c41230", "Blue"="#115284","Green" = "#39b54b", - "Purple" = "#5c266c", "Orange"="#fe9003","Green" = "#046852", - "Lavendar" = "#8781bd", "Steel Blue"= "#455564", + all_colours[["blood"]] <- c( + "Red" = "#c41230", "Blue" = "#115284", "Light Green" = "#39b54b", + "Purple" = "#5c266c", "Orange" = "#fe9003", "Green" = "#046852", + "Lavendar" = "#8781bd", "Steel Blue" = "#455564", "Light Blue" = "#2cace3", "Magenta" = "#e90c8b", "Mustard" = "#b76d29", "LimeGreen" = "#a4bb87", "Brown" = "#5f3a17", "Gray" = "#bdbdc1", "Yellow" = "#f9bd1f" From b8bf2923ac223a1a49a22d9dc922d211259d4ff0 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Wed, 4 Dec 2024 10:19:48 -0800 Subject: [PATCH 19/21] new feature: make new behaviour conditional --- R/create_onco_matrix.R | 15 +++++++++++++-- man/create_onco_matrix.Rd | 4 +++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/R/create_onco_matrix.R b/R/create_onco_matrix.R index 4bedce8..bb667fa 100644 --- a/R/create_onco_matrix.R +++ b/R/create_onco_matrix.R @@ -9,6 +9,7 @@ #' @param maf_df Data frame with maf data. Required parameter. The minimal required columns are Tumor_Sample_Barcode, Hugo_Symbol, Variant_Classification, Start_Position, and End_Position. #' @param genes List of genes to return in the resulting matrix. When not provided, matrix is generated for each gene present in the input maf data. #' @param add_missing When list of genes is provided and some of the specified genes are not mutated in the provided maf data, they will still be added to the matrix with 0% mutation frequency. Default TRUE. +#' @param subset_to_coding Will conditionally subset to only coding variants. Default is FALSE (no subsetting). #' #' @return matrix #' @@ -23,7 +24,8 @@ create_onco_matrix = function( maf_df, genes, - add_missing = TRUE + add_missing = TRUE, + subset_to_coding = FALSE ){ if(missing(maf_df)){ stop( @@ -38,6 +40,15 @@ create_onco_matrix = function( ) } + if(subset_to_coding){ + onco_matrix_coding <- coding_class[ + !coding_class %in% c("Silent", "Splice_Region", "Targeted_Region") + ] + maf_df <- maf_df %>% + dplyr::filter( + Variant_Classification %in% onco_matrix_coding + ) + } onco_matrix <- maf_df %>% dplyr::distinct( @@ -48,7 +59,7 @@ create_onco_matrix = function( dplyr::select( Tumor_Sample_Barcode, Hugo_Symbol, Variant_Classification ) %>% - dplyr::group_by( + dplyr::group_by( Hugo_Symbol, Tumor_Sample_Barcode ) %>% dplyr::mutate( diff --git a/man/create_onco_matrix.Rd b/man/create_onco_matrix.Rd index e062087..5d5f812 100644 --- a/man/create_onco_matrix.Rd +++ b/man/create_onco_matrix.Rd @@ -4,7 +4,7 @@ \alias{create_onco_matrix} \title{Create onco matrix from maf data.} \usage{ -create_onco_matrix(maf_df, genes, add_missing = TRUE) +create_onco_matrix(maf_df, genes, add_missing = TRUE, subset_to_coding = FALSE) } \arguments{ \item{maf_df}{Data frame with maf data. Required parameter. The minimal required columns are Tumor_Sample_Barcode, Hugo_Symbol, Variant_Classification, Start_Position, and End_Position.} @@ -12,6 +12,8 @@ create_onco_matrix(maf_df, genes, add_missing = TRUE) \item{genes}{List of genes to return in the resulting matrix. When not provided, matrix is generated for each gene present in the input maf data.} \item{add_missing}{When list of genes is provided and some of the specified genes are not mutated in the provided maf data, they will still be added to the matrix with 0\% mutation frequency. Default TRUE.} + +\item{subset_to_coding}{Will conditionally subset to only coding variants. Default is FALSE (no subsetting).} } \value{ matrix From 8a81137034bdabbd213da4c466ca97342d48fe80 Mon Sep 17 00:00:00 2001 From: lkhilton Date: Wed, 18 Dec 2024 13:59:38 -0800 Subject: [PATCH 20/21] Add a colour for intron mutations --- R/get_gambl_colours.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/get_gambl_colours.R b/R/get_gambl_colours.R index b72add0..89a09bf 100644 --- a/R/get_gambl_colours.R +++ b/R/get_gambl_colours.R @@ -49,7 +49,8 @@ get_gambl_colours = function(classification = "all", LimeGreen = "#a4bb87", Brown = "#5f3a17", Gray = "#bdbdc1", - Yellow = "#f9bd1f" + Yellow = "#f9bd1f", + Mustard = "#b76d29" ) all_colours[["seq_type"]] = c("mrna" = "#E41A1C", From 81721ce90d31748eaa31741ae9cbfd1773cbd3e8 Mon Sep 17 00:00:00 2001 From: lkhilton Date: Wed, 18 Dec 2024 13:59:51 -0800 Subject: [PATCH 21/21] Formatting changes (sorry) --- R/get_gambl_colours.R | 444 ++++++++++++++++++++++-------------------- 1 file changed, 230 insertions(+), 214 deletions(-) diff --git a/R/get_gambl_colours.R b/R/get_gambl_colours.R index 89a09bf..579baf0 100644 --- a/R/get_gambl_colours.R +++ b/R/get_gambl_colours.R @@ -22,18 +22,16 @@ #' @export #' #' @examples -#' lymphgen_cols = get_gambl_colours("lymphgen") +#' lymphgen_cols <- get_gambl_colours("lymphgen") #' -#' -get_gambl_colours = function(classification = "all", - alpha = 1, - as_list = FALSE, - as_dataframe = FALSE, - return_available = FALSE, - verbose = FALSE){ - - all_colours = list() - everything = c() +get_gambl_colours <- function(classification = "all", + alpha = 1, + as_list = FALSE, + as_dataframe = FALSE, + return_available = FALSE, + verbose = FALSE) { + all_colours <- list() + everything <- c() blood_cols <- c( Red = "#c41230", @@ -42,85 +40,98 @@ get_gambl_colours = function(classification = "all", Purple = "#5c266c", Orange = "#fe9003", Green = "#046852", - Lavendar = "#8781bd", + Lavendar = "#8781bd", "Steel Blue" = "#455564", - "Light Blue" = "#2cace3", - Magenta = "#e90c8b", - LimeGreen = "#a4bb87", - Brown = "#5f3a17", - Gray = "#bdbdc1", + "Light Blue" = "#2cace3", + Magenta = "#e90c8b", + LimeGreen = "#a4bb87", + Brown = "#5f3a17", + Gray = "#bdbdc1", Yellow = "#f9bd1f", Mustard = "#b76d29" ) - all_colours[["seq_type"]] = c("mrna" = "#E41A1C", - "genome" = "#377EB8", - "capture" = "#4DAF4A") + all_colours[["seq_type"]] <- c( + "mrna" = "#E41A1C", + "genome" = "#377EB8", + "capture" = "#4DAF4A" + ) - all_colours[["type"]] = c("gain" = "#0000FF", - "loss" = "#FF0000") + all_colours[["type"]] <- c( + "gain" = "#0000FF", + "loss" = "#FF0000" + ) - all_colours[["hmrn"]] = c("BCL2-MYC" = "#52000F", - "BCL2" = "#721F0F", - "SOCS1/SGK1" = "#D66B1F", - "TET2/SGK1" = "#C41230", - "MYD88" = "#3B5FAC", - "NOTCH2" = "#7F3293", - "NOTCH1" = "#55B55E", - "Other" = "#ACADAF") + all_colours[["hmrn"]] <- c( + "BCL2-MYC" = "#52000F", + "BCL2" = "#721F0F", + "SOCS1/SGK1" = "#D66B1F", + "TET2/SGK1" = "#C41230", + "MYD88" = "#3B5FAC", + "NOTCH2" = "#7F3293", + "NOTCH1" = "#55B55E", + "Other" = "#ACADAF" + ) - all_colours[["EBV"]] = c("EBV-positive" = "#7F055F", - "EBV-negative" = "#E5A4CB", - "POS" = "#7F055F", - "NEG" = "#E5A4CB") + all_colours[["EBV"]] <- c( + "EBV-positive" = "#7F055F", + "EBV-negative" = "#E5A4CB", + "POS" = "#7F055F", + "NEG" = "#E5A4CB" + ) - all_colours[["BL"]] = c("Q53-BL" = "#A6CEE3", - "M53-BL" = "#A6CEE3", #added because genetic subgroup still refers to it this way - "DLBCL-A" = "#721F0F", - "IC-BL" = "#45425A", - "DGG-BL" = "#E90C8B", - "DLBCL-B" = "#FB9A99", - "DLBCL-C" = "#C41230") + all_colours[["BL"]] <- c( + "Q53-BL" = "#A6CEE3", + "M53-BL" = "#A6CEE3", # added because genetic subgroup still refers to it this way + "DLBCL-A" = "#721F0F", + "IC-BL" = "#45425A", + "DGG-BL" = "#E90C8B", + "DLBCL-B" = "#FB9A99", + "DLBCL-C" = "#C41230" + ) - all_colours[["FL"]] = c(dFL = "#99C1B9", cFL = "#D16666", DLBCL = "#479450") + all_colours[["FL"]] <- c(dFL = "#99C1B9", cFL = "#D16666", DLBCL = "#479450") - all_colours[["lymphgenerator"]] = c("MP3"="#5B8565", - "EGB" = "#98622A", - "ETB"="#813F3D", - "aSCI"="#D66B1F", - "aSEL"="#6A0D18", - "MCaP"="#5F8CFF", - "BNZ"="#8870B6", - "EZB"="#721F0F", - "ST2"="#C41230", - "UNCLASS"="#05631E" + all_colours[["lymphgenerator"]] <- c( + "MP3" = "#5B8565", + "EGB" = "#98622A", + "ETB" = "#813F3D", + "aSCI" = "#D66B1F", + "aSEL" = "#6A0D18", + "MCaP" = "#5F8CFF", + "BNZ" = "#8870B6", + "EZB" = "#721F0F", + "ST2" = "#C41230", + "UNCLASS" = "#05631E" ) - all_colours[["chapuy_classifier"]] = c( + all_colours[["chapuy_classifier"]] <- c( C0 = "#bebebe", C1 = "#803D99", - C2 ="#00A2D2", + C2 = "#00A2D2", C3 = "#F39123", C4 = "#50BFAD", C5 = "#DE292A" ) - all_colours[["lacy_classifier"]] = all_colours[["hmrn"]] + all_colours[["lacy_classifier"]] <- all_colours[["hmrn"]] - all_colours[["lymphgen"]] = c("EZB-MYC" = "#52000F", - "EZB" = "#721F0F", - "EZB-COMP" = "#C7371A", - "ST2" = "#C41230", - "ST2-COMP" = "#EC3251", - "MCD" = "#3B5FAC", - "MCD-COMP" = "#6787CB", - "BN2" = "#7F3293", - "BN2-COMP" = "#A949C1", - "N1" = "#55B55E", - "N1-COMP" = "#7FC787", - "A53" = "#5b6d8a", - "Other" = "#ACADAF", - "COMPOSITE" = "#ACADAF") + all_colours[["lymphgen"]] <- c( + "EZB-MYC" = "#52000F", + "EZB" = "#721F0F", + "EZB-COMP" = "#C7371A", + "ST2" = "#C41230", + "ST2-COMP" = "#EC3251", + "MCD" = "#3B5FAC", + "MCD-COMP" = "#6787CB", + "BN2" = "#7F3293", + "BN2-COMP" = "#A949C1", + "N1" = "#55B55E", + "N1-COMP" = "#7FC787", + "A53" = "#5b6d8a", + "Other" = "#ACADAF", + "COMPOSITE" = "#ACADAF" + ) all_colours[["mutation"]] <- @@ -138,10 +149,11 @@ get_gambl_colours = function(classification = "all", "Splice_Region" = unname(blood_cols["Orange"]), "3'UTR" = unname(blood_cols["Yellow"]), "5'UTR" = unname(blood_cols["LimeGreen"]), + "Intron" = unname(blood_cols["Mustard"]), "Silent" = "#D8A7CA" ) - all_colours[["rainfall"]] = + all_colours[["rainfall"]] <- c( "C>A" = "#2196F3FF", "C>G" = "#3F51B5FF", @@ -152,37 +164,38 @@ get_gambl_colours = function(classification = "all", "T>G" = "#FF9800FF" ) - all_colours[["pos_neg"]]=c( - "POS"="#c41230", - "NEG"="#E88873", - "PARTIAL"="#E88873", - "yes"="#c41230", - "no"="#E88873", - "YES"="#c41230", - "NO"="#E88873", - "FAIL"="#bdbdc1", - "positive"="#c41230", - "negative"="#E88873", - "fail"="#bdbdc1") + all_colours[["pos_neg"]] <- c( + "POS" = "#c41230", + "NEG" = "#E88873", + "PARTIAL" = "#E88873", + "yes" = "#c41230", + "no" = "#E88873", + "YES" = "#c41230", + "NO" = "#E88873", + "FAIL" = "#bdbdc1", + "positive" = "#c41230", + "negative" = "#E88873", + "fail" = "#bdbdc1" + ) - all_colours[["copy_number"]]=c( - "nLOH"="#E026D7", - "14"="#380015", - "15"="#380015", - "13"="#380015", - "12"="#380015", - "11"="#380015", - "10"="#380015", - "9"="#380015", - "8"="#380015", - "7"="#380015", - "6"="#380015", - "5"="#67001F", - "4"="#B2182B", - "3"="#D6604D", - "2"="#ede4c7", - "1"="#92C5DE", - "0"="#4393C3" + all_colours[["copy_number"]] <- c( + "nLOH" = "#E026D7", + "14" = "#380015", + "15" = "#380015", + "13" = "#380015", + "12" = "#380015", + "11" = "#380015", + "10" = "#380015", + "9" = "#380015", + "8" = "#380015", + "7" = "#380015", + "6" = "#380015", + "5" = "#67001F", + "4" = "#B2182B", + "3" = "#D6604D", + "2" = "#ede4c7", + "1" = "#92C5DE", + "0" = "#4393C3" ) all_colours[["blood"]] <- c( "Red" = "#c41230", "Blue" = "#115284", "Light Green" = "#39b54b", @@ -192,33 +205,34 @@ get_gambl_colours = function(classification = "all", "LimeGreen" = "#a4bb87", "Brown" = "#5f3a17", "Gray" = "#bdbdc1", "Yellow" = "#f9bd1f" ) - all_colours[["sex"]]=c( - "M"="#118AB2", - "Male"="#118AB2", - "male"="#118AB2", - "F"="#EF476F", - "Female"="#EF476F", - "female"="#EF476F") + all_colours[["sex"]] <- c( + "M" = "#118AB2", + "Male" = "#118AB2", + "male" = "#118AB2", + "F" = "#EF476F", + "Female" = "#EF476F", + "female" = "#EF476F" + ) - all_colours[["clinical"]]= + all_colours[["clinical"]] <- c( - "M"="#118AB2", - "Male"="#118AB2", - "F"="#EF476F", - "Female"="#EF476F", - "EBV-positive"="#7F055F", - "EBV-negative"="#E5A4CB", - "POS"="#c41230", - "NEG"="#E88873", - "FAIL"="#bdbdc1", - "Alive"="#046852", - "alive"="#046852", - "dead"="#a4bb87", - "Dead"="#a4bb87", - "deceased"="#a4bb87", - "unknown"="#C3C9E9", - "IPI_0"= "#3B9AB2", - "IPI_1"= "#78B7C5", + "M" = "#118AB2", + "Male" = "#118AB2", + "F" = "#EF476F", + "Female" = "#EF476F", + "EBV-positive" = "#7F055F", + "EBV-negative" = "#E5A4CB", + "POS" = "#c41230", + "NEG" = "#E88873", + "FAIL" = "#bdbdc1", + "Alive" = "#046852", + "alive" = "#046852", + "dead" = "#a4bb87", + "Dead" = "#a4bb87", + "deceased" = "#a4bb87", + "unknown" = "#C3C9E9", + "IPI_0" = "#3B9AB2", + "IPI_1" = "#78B7C5", "IPI_2" = "#EBCC2A", "IPI_3" = "#E1AF00", "IPI_4" = "#F21A00", @@ -226,124 +240,126 @@ get_gambl_colours = function(classification = "all", "adult" = "#DCE0E5", "Pediatric" = "#677A8E", "pediatric" = "#677A8E", - "Diagnosis"="#E57A44", - "A"="#E57A44", - "B"="#721817", - "C"="#721817", - "D"="#721817", - "E"="#721817", - "Progression"="#A44A3F", - "Relapse"="#721817", - "I"="#75F4F4", - "FOLL1"="#75F4F4", - "II"="#90E0F3", - "FOLL2"="#90E0F3", - "IIIA"="#B8B3E9", - "FOLL3A"="#B8B3E9", - "IIIB"="#D999B9", - "FOLL3B"="#D999B9", - "matched"="#F0B67F", - "unmatched"="#D6D1B1", - "FF"="#009FFD", - "frozen"="#009FFD", - "FFPE"="#95B2B8", - "ctDNA"="#7E6148", - "NA"="white" + "Diagnosis" = "#E57A44", + "A" = "#E57A44", + "B" = "#721817", + "C" = "#721817", + "D" = "#721817", + "E" = "#721817", + "Progression" = "#A44A3F", + "Relapse" = "#721817", + "I" = "#75F4F4", + "FOLL1" = "#75F4F4", + "II" = "#90E0F3", + "FOLL2" = "#90E0F3", + "IIIA" = "#B8B3E9", + "FOLL3A" = "#B8B3E9", + "IIIB" = "#D999B9", + "FOLL3B" = "#D999B9", + "matched" = "#F0B67F", + "unmatched" = "#D6D1B1", + "FF" = "#009FFD", + "frozen" = "#009FFD", + "FFPE" = "#95B2B8", + "ctDNA" = "#7E6148", + "NA" = "white" ) - all_colours[["pathology"]] = c( - "B-ALL"="#C1C64B", - "CLL"="#889BE5", - "MCL"="#40E0D0", - "BL"="#926CAD", - "mBL"="#34C7F4", - "tFL"="#FF8595", - "DLBCL-BL-like"="#34C7F4", - "pre-HT"="#754F5B", - "PMBL"= "#227C9D", - "PMBCL"="#227C9D", - "FL"="#EA8368", - "no-HT"="#EA8368", - "COMFL"="#8BBC98", - "COM"="#8BBC98", - "post-HT"="#479450", - "DLBCL"="#479450", - "denovo-DLBCL"="#479450", - "HGBL-NOS"="#294936", - "HGBL"="#294936", - "HGBL-DH/TH"="#7A1616", + all_colours[["pathology"]] <- c( + "B-ALL" = "#C1C64B", + "CLL" = "#889BE5", + "MCL" = "#40E0D0", + "BL" = "#926CAD", + "mBL" = "#34C7F4", + "tFL" = "#FF8595", + "DLBCL-BL-like" = "#34C7F4", + "pre-HT" = "#754F5B", + "PMBL" = "#227C9D", + "PMBCL" = "#227C9D", + "FL" = "#EA8368", + "no-HT" = "#EA8368", + "COMFL" = "#8BBC98", + "COM" = "#8BBC98", + "post-HT" = "#479450", + "DLBCL" = "#479450", + "denovo-DLBCL" = "#479450", + "HGBL-NOS" = "#294936", + "HGBL" = "#294936", + "HGBL-DH/TH" = "#7A1616", "PBL" = "#E058C0", "Plasmablastic" = "#E058C0", "CNS" = "#E2EF60", "THRLBCL" = "#A5F2B3", - "MM"="#CC9A42", - "SCBC"="#8c9c90", - "UNSPECIFIED"="#cfba7c", - "OTHER"="#cfba7c", - "MZL"="#065A7F", - "SMZL"="#065A7F", + "MM" = "#CC9A42", + "SCBC" = "#8c9c90", + "UNSPECIFIED" = "#cfba7c", + "OTHER" = "#cfba7c", + "MZL" = "#065A7F", + "SMZL" = "#065A7F", "Prolymphocytic" = "#7842f5" ) - all_colours[["coo"]] = c( + all_colours[["coo"]] <- c( "ABC" = "#05ACEF", "UNCLASS" = "#05631E", "Unclass" = "#05631E", "U" = "#05631E", "UNC" = "#05631E", - "GCB"= "#F58F20", - "DHITsig-"= "#F58F20", - "DHITsigNeg"= "#F58F20", + "GCB" = "#F58F20", + "DHITsig-" = "#F58F20", + "DHITsigNeg" = "#F58F20", "DHITsig-IND" = "#003049", "DHITsig+" = "#D62828", "DHITsigPos" = "#D62828", "NA" = "#ACADAF" ) - all_colours[["cohort"]] = c("Chapuy"="#8B0000","Chapuy, 2018"="#8B0000", - "Arthur"= "#8845A8","Arthur, 2018"= "#8845A8", - "Schmitz"= "#2C72B2","Schmitz, 2018"= "#2C72B2", - "Reddy" = "#E561C3","Reddy, 2017" = "#E561C3", - "Morin"= "#8DB753", "Morin, 2013"= "#8DB753", - "Kridel"= "#4686B7", "Kridel, 2016"= "#4686B7", - "ICGC"="#E09C3B","ICGC, 2018"="#E09C3B", - "Grande"="#e90c8b", "Grande, 2019"="#e90c8b") + all_colours[["cohort"]] <- c( + "Chapuy" = "#8B0000", "Chapuy, 2018" = "#8B0000", + "Arthur" = "#8845A8", "Arthur, 2018" = "#8845A8", + "Schmitz" = "#2C72B2", "Schmitz, 2018" = "#2C72B2", + "Reddy" = "#E561C3", "Reddy, 2017" = "#E561C3", + "Morin" = "#8DB753", "Morin, 2013" = "#8DB753", + "Kridel" = "#4686B7", "Kridel, 2016" = "#4686B7", + "ICGC" = "#E09C3B", "ICGC, 2018" = "#E09C3B", + "Grande" = "#e90c8b", "Grande, 2019" = "#e90c8b" + ) - all_colours[["indels"]] = c("DEL" = "#53B1FC", "INS" = "#FC9C6D") - all_colours[["svs"]] = c("DEL" = "#53B1FC", "DUP" = "#FC9C6D") - all_colours[["genetic_subgroup"]] = c(all_colours[["lymphgen"]],all_colours[["BL"]],all_colours[["FL"]]) - #print(all_colours) - if(alpha <1){ - for(colslot in names(all_colours)){ - raw_cols = all_colours[[colslot]] - raw_cols_rgb = col2rgb(raw_cols) - alpha_cols = rgb(raw_cols_rgb[1L, ], raw_cols_rgb[2L, ], raw_cols_rgb[3L, ], alpha = alpha * 255L, names = names(raw_cols), maxColorValue = 255L) - names(alpha_cols) = names(raw_cols) - all_colours[[colslot]] = alpha_cols + all_colours[["indels"]] <- c("DEL" = "#53B1FC", "INS" = "#FC9C6D") + all_colours[["svs"]] <- c("DEL" = "#53B1FC", "DUP" = "#FC9C6D") + all_colours[["genetic_subgroup"]] <- c(all_colours[["lymphgen"]], all_colours[["BL"]], all_colours[["FL"]]) + # print(all_colours) + if (alpha < 1) { + for (colslot in names(all_colours)) { + raw_cols <- all_colours[[colslot]] + raw_cols_rgb <- col2rgb(raw_cols) + alpha_cols <- rgb(raw_cols_rgb[1L, ], raw_cols_rgb[2L, ], raw_cols_rgb[3L, ], alpha = alpha * 255L, names = names(raw_cols), maxColorValue = 255L) + names(alpha_cols) <- names(raw_cols) + all_colours[[colslot]] <- alpha_cols } } - for(this_group in names(all_colours)){ - everything = c(everything, all_colours[[this_group]]) + for (this_group in names(all_colours)) { + everything <- c(everything, all_colours[[this_group]]) } - #return matching value from lowercase version of the argument if it exists - lc_class = tolower(classification) - if(return_available){ + # return matching value from lowercase version of the argument if it exists + lc_class <- tolower(classification) + if (return_available) { return(names(all_colours)) } - if(classification %in% names(all_colours)){ - if(as_dataframe){ - some_col=all_colours[[classification]] - df_ugly = data.frame(name=names(some_col),colour=unname(some_col)) - df_tidy = mutate(df_ugly,group=classification) + if (classification %in% names(all_colours)) { + if (as_dataframe) { + some_col <- all_colours[[classification]] + df_ugly <- data.frame(name = names(some_col), colour = unname(some_col)) + df_tidy <- mutate(df_ugly, group = classification) return(df_tidy) } return(all_colours[[classification]]) - }else if(lc_class %in% names(all_colours)){ + } else if (lc_class %in% names(all_colours)) { return(all_colours[[lc_class]]) - }else if(as_list){ + } else if (as_list) { return(all_colours) - }else if(as_dataframe){ - df_ugly = data.frame(name = names(unlist(all_colours, use.names = T)), colour = unlist(all_colours, use.names = T)) - df_tidy = separate(df_ugly,name,into=c("group","name"),sep="\\.") + } else if (as_dataframe) { + df_ugly <- data.frame(name = names(unlist(all_colours, use.names = T)), colour = unlist(all_colours, use.names = T)) + df_tidy <- separate(df_ugly, name, into = c("group", "name"), sep = "\\.") return(df_tidy) - }else{ + } else { return(everything) } }