Skip to content

Commit

Permalink
merge master
Browse files Browse the repository at this point in the history
rdmorin committed Jan 13, 2025
2 parents a6c6df6 + d829bfb commit ee585ce
Showing 24 changed files with 556 additions and 748 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build_check.yaml
Original file line number Diff line number Diff line change
@@ -35,11 +35,11 @@ jobs:

- name: Build package
run:
Rscript -e "devtools::install()"
Rscript --vanilla -e "devtools::install()"

- name: Check package
run:
Rscript -e "devtools::check(vignettes = FALSE, args = '--no-examples')"
Rscript --vanilla -e "devtools::check(vignettes = FALSE, args = '--no-examples')"

- name: Upload check results
if: failure()
5 changes: 1 addition & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -15,12 +15,9 @@ Imports:
GAMBLR.data,
ggplot2,
ggthemes,
philentropy,
readr,
stringr,
tibble,
tidyr,
workflowr
tidyr
Remotes:
morinlab/GAMBLR.data
Encoding: UTF-8
21 changes: 1 addition & 20 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -10,7 +10,6 @@ export(coding_class)
export(coding_vc)
export(colour_aliases)
export(compare_coding_mutation_pattern)
export(cool_overlaps)
export(copy_no_clobber)
export(create_onco_matrix)
export(decorate_bed)
@@ -21,6 +20,7 @@ export(get_template_wildcards)
export(get_unmatched_normals)
export(grob_wildcards)
export(handle_metadata)
export(kl_divergence)
export(maf_header)
export(normalize_expression_data)
export(rainfall_conv)
@@ -33,29 +33,10 @@ export(subset_cnstates)
export(theme_Morons)
export(trim_scale_expression)
export(vc_nonSynonymous)
export(web_initialize_gambl_site)
import(GAMBLR.data)
import(dplyr)
import(ggplot2)
import(readr)
import(stringr)
import(tibble)
import(tidyr)
import(workflowr)
importFrom(dplyr,left_join)
importFrom(ggthemes,theme_foundation)
importFrom(philentropy,KL)
importFrom(stats,end)
importFrom(stats,quantile)
importFrom(stats,start)
importFrom(stringr,str_c)
importFrom(stringr,str_extract)
importFrom(stringr,str_remove)
importFrom(stringr,str_remove_all)
importFrom(tidyr,unnest_auto)
importFrom(utils,head)
importFrom(utils,read.csv)
importFrom(utils,read.socket)
importFrom(utils,tail)
importFrom(utils,write.socket)
importFrom(utils,write.table)
22 changes: 0 additions & 22 deletions R/GAMBLR.helpers-package.R

This file was deleted.

4 changes: 2 additions & 2 deletions R/calculate_tmb.R
Original file line number Diff line number Diff line change
@@ -37,7 +37,7 @@
#' subset_to_nonSyn = FALSE
#' )
#'
#' @import dplyr
#' @import dplyr GAMBLR.data
#' @export
#'
calculate_tmb <- function(
@@ -105,7 +105,7 @@ calculate_tmb <- function(
# Expect bed format but be flexible about column names
columns <- colnames(regions_bed)[1:3]

overlap <- GAMBLR.helpers::cool_overlaps(
overlap <- GAMBLR.data::cool_overlaps(
data1 = maf_data,
data2 = regions_bed,
columns2 = columns
13 changes: 10 additions & 3 deletions R/compare_coding_mutation_pattern.R
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@
#'
#' @return list
#'
#' @import dplyr
#' @export
compare_coding_mutation_pattern = function(maf_df1,maf_df2,gene){
if(missing(maf_df1) | missing(maf_df2)){
@@ -17,9 +18,9 @@ compare_coding_mutation_pattern = function(maf_df1,maf_df2,gene){
stop("Must provide the Hugo_Symbol of a single gene that is present in both maf files")
}
missense_positions1 = dplyr::filter(maf_df1,Hugo_Symbol==gene,!Variant_Classification %in% c("Silent","Splice_Site","Splice_Region"),Variant_Type=="SNP") %>%
pull(HGVSp_Short) %>% str_remove_all("p.\\w") %>% str_extract("\\d+") %>% as.numeric()
pull(HGVSp_Short) %>% gsub("p\\.\\w", "", .) %>% regmatches(., regexpr("\\d+", .)) %>% as.numeric()
missense_positions2 = dplyr::filter(maf_df2,Hugo_Symbol==gene,!Variant_Classification %in% c("Silent","Splice_Site","Splice_Region"),Variant_Type=="SNP") %>%
pull(HGVSp_Short) %>% str_remove_all("p.\\w") %>% str_extract("\\d+") %>% as.numeric()
pull(HGVSp_Short) %>% gsub("p\\.\\w", "", .) %>% regmatches(., regexpr("\\d+", .)) %>% as.numeric()
if(length(missense_positions1)==0 | length(missense_positions2)==0 ){
message(paste("no mutations for",gene,"in one or both data sets"))
return(list(kl=15))
@@ -35,6 +36,12 @@ compare_coding_mutation_pattern = function(maf_df1,maf_df2,gene){
all_counts = dplyr::select(full_df,-position) %>% t()
all_counts[1,]=all_counts[1,]/sum(all_counts[1,])
all_counts[2,]=all_counts[2,]/sum(all_counts[2,])
kl_out = KL(all_counts)

# Normalize the rows to turn counts into probabilities
P <- all_counts[1, ] / sum(all_counts[1, ])
Q <- all_counts[2, ] / sum(all_counts[2, ])

kl_out <- kl_divergence(P, Q)

return(list(df=full_df,kl=unname(kl_out)))
}
183 changes: 0 additions & 183 deletions R/cool_overlaps.R

This file was deleted.

20 changes: 12 additions & 8 deletions R/create_onco_matrix.R
Original file line number Diff line number Diff line change
@@ -9,6 +9,7 @@
#' @param maf_df Data frame with maf data. Required parameter. The minimal required columns are Tumor_Sample_Barcode, Hugo_Symbol, Variant_Classification, Start_Position, and End_Position.
#' @param genes List of genes to return in the resulting matrix. When not provided, matrix is generated for each gene present in the input maf data.
#' @param add_missing When list of genes is provided and some of the specified genes are not mutated in the provided maf data, they will still be added to the matrix with 0% mutation frequency. Default TRUE.
#' @param subset_to_coding Will conditionally subset to only coding variants. Default is FALSE (no subsetting).
#'
#' @return matrix
#'
@@ -23,7 +24,8 @@
create_onco_matrix = function(
maf_df,
genes,
add_missing = TRUE
add_missing = TRUE,
subset_to_coding = FALSE
){
if(missing(maf_df)){
stop(
@@ -38,10 +40,15 @@ create_onco_matrix = function(
)
}

onco_matrix_coding <- coding_class[
!coding_class %in% c("Silent", "Splice_Region", "Targeted_Region")
]

if(subset_to_coding){
onco_matrix_coding <- coding_class[
!coding_class %in% c("Silent", "Splice_Region", "Targeted_Region")
]
maf_df <- maf_df %>%
dplyr::filter(
Variant_Classification %in% onco_matrix_coding
)
}

onco_matrix <- maf_df %>%
dplyr::distinct(
@@ -52,9 +59,6 @@ create_onco_matrix = function(
dplyr::select(
Tumor_Sample_Barcode, Hugo_Symbol, Variant_Classification
) %>%
dplyr::filter(
Variant_Classification %in% onco_matrix_coding
) %>%
dplyr::group_by(
Hugo_Symbol, Tumor_Sample_Barcode
) %>%
4 changes: 2 additions & 2 deletions R/fuzzy_match_mafs.R
Original file line number Diff line number Diff line change
@@ -25,7 +25,7 @@
#'
#' @return data frame
#'
#' @import dplyr tidyr tibble
#' @import dplyr tidyr tibble GAMBLR.data
#' @export
#'
#' @examples
@@ -107,7 +107,7 @@ fuzzy_match_mafs <- function(
"Start_Position",
"End_Position"
)
matched <- cool_overlaps(
matched <- GAMBLR.data::cool_overlaps(
data1 = maf1,
data2 = maf2,
columns1 = columns_to_overlap,
Loading

0 comments on commit ee585ce

Please sign in to comment.