morinlab · Kdreval · Jul 9, 2024 · Aug 29, 2023 · Apr 21, 2024 · Apr 21, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -38,3 +38,6 @@ Imports:
     tibble,
     tidyr
 LazyDataCompression: xz
+Suggests: 
+    testthat (>= 3.0.0)
+Config/testthat/edition: 3
diff --git a/R/get_ssm_by_regions.R b/R/get_ssm_by_regions.R
@@ -51,6 +51,7 @@ get_ssm_by_regions = function(these_sample_ids = NULL,
                               use_name_column = FALSE,
                               projection = "grch37",
                               verbose = FALSE,
+                              engine="default",
                               ...){
 
   # check provided projection
@@ -108,6 +109,22 @@ get_ssm_by_regions = function(these_sample_ids = NULL,
                 )
             }
         )
+    }else if(engine == "foverlaps"){
+      sample_maf = get_ssm_by_samples(these_samples_metadata=these_samples_metadata)
+      setkey(sample_maf, Chromosome, Start_Position,End_Position)
+      regions_dt = data.frame(all_lymphome_gene_regions) %>% rownames_to_column("Hugo_Symbol") %>% 
+        separate(all_lymphome_gene_regions,c("Chromosome","region"),sep = ":") %>%
+        separate(region,c("Start_Position","End_Position"),"-") %>%
+        mutate(Start_Position=as.numeric(Start_Position),End_Position=as.numeric(End_Position)) %>%
+        as.data.table()
+      setkey(regions_dt,Chromosome,Start_Position,End_Position)
+      maf_regions = foverlaps(sample_maf, regions_dt, type="within", which=TRUE,
+                              by.x=c("Chromosome","Start_Position","End_Position"),
+                              by.y=c("Chromosome","Start_Position","End_Position"))
+
+      match_rows=filter(maf_regions,!is.na(yid)) %>% pull(xid)
+      match_maf = sample_maf[match_rows,]
+      return(match_maf)
     }else{
         region_mafs = lapply(
             regions, function(x){

diff --git a/tests/testthat.R b/tests/testthat.R
@@ -0,0 +1,12 @@
+# This file is part of the standard setup for testthat.
+# It is recommended that you do not modify it.
+#
+# Where should you do additional test configuration?
+# Learn more about the roles of various files in:
+# * https://r-pkgs.org/tests.html
+# * https://testthat.r-lib.org/reference/test_package.html#special-files
+
+library(testthat)
+library(GAMBLR.data)
+
+test_check("GAMBLR.data")
diff --git a/tests/testthat/test-get_gambl_metadata.R b/tests/testthat/test-get_gambl_metadata.R
@@ -0,0 +1,20 @@
+
+
+
+test_that("consistent row number", {
+  expect_equal(nrow(get_gambl_metadata(seq_type_filter=c("genome","capture"))), 4785)
+  expect_equal(nrow(get_gambl_metadata(seq_type_filter=c("capture"))), 3100)
+  expect_equal(nrow(get_gambl_metadata(seq_type_filter=c("genome"))), 4785-3100)
+})
+
+
+test_that("all bundled samples in metadata", {
+  expect_false(any(!unique(c(GAMBLR.data::sample_data$grch37$maf$Tumor_Sample_Barcode,
+                       GAMBLR.data::sample_data$grch37$seg$ID,
+                       GAMBLR.data::sample_data$grch37$bedpe$tumour_sample_id)) %in% GAMBLR.data::sample_data$meta$sample_id)
+  )
+  expect_false(any(!unique(c(GAMBLR.data::sample_data$hg38$maf$Tumor_Sample_Barcode,
+                             GAMBLR.data::sample_data$hg38$seg$ID,
+                             GAMBLR.data::sample_data$hg38$bedpe$tumour_sample_id)) %in% GAMBLR.data::sample_data$meta$sample_id)
+  )
+})
diff --git a/tests/testthat/test-id_ease.R b/tests/testthat/test-id_ease.R
@@ -0,0 +1,22 @@
+test_that("sane subsetting", {
+  expect_length(id_ease(these_samples_metadata=GAMBLR.data::sample_data$meta,
+                       these_sample_ids = slice_sample(GAMBLR.data::sample_data$meta,n=10) %>% pull(sample_id))$these_samples, 10)
+  some_sample_ids = slice_sample(GAMBLR.data::sample_data$meta,n=10) %>% pull(sample_id)
+  #ensure we get back the same sample_id (and no extra), as intended
+  expect_equal(id_ease(these_samples_metadata=GAMBLR.data::sample_data$meta,
+                       these_sample_ids = some_sample_ids)$these_samples,some_sample_ids)
+  some_sample_ids = slice_sample(GAMBLR.data::sample_data$meta,n=15) %>% pull(sample_id)
+  some_sample_ids = c(some_sample_ids,"bonus_feature")
+  #this test currently fails. Why is this a warning instead of an error? 
+  expect_equal(nrow(id_ease(these_samples_metadata=GAMBLR.data::sample_data$meta,
+                       these_sample_ids = some_sample_ids)$this_metadata),length(some_sample_ids))
+})
+
+
+
+test_that("handles nonsense", {
+  #this test currently fails. Why is this a warning instead of an error? 
+  expect_length(id_ease(these_samples_metadata=GAMBLR.data::sample_data$meta,
+                        these_sample_ids = c("r2d2","c3P0","Luke","Reddy_3812T"))$these_samples, 1)
+
+})