From 0fcd86002c0633509569d312bd47cf28b77f9a66 Mon Sep 17 00:00:00 2001 From: igor <6363505+igordot@users.noreply.github.com> Date: Mon, 6 May 2024 17:20:47 -0400 Subject: [PATCH] Clean up clustermole_overlaps() --- R/overlaps.R | 17 +++++++++-------- tests/testthat/test-overlaps.R | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/R/overlaps.R b/R/overlaps.R index c4695a8..c793a30 100644 --- a/R/overlaps.R +++ b/R/overlaps.R @@ -18,17 +18,16 @@ #' my_overlaps <- clustermole_overlaps(genes = my_genes, species = "hs") #' head(my_overlaps) clustermole_overlaps <- function(genes, species) { - # check that the genes vector seems reasonable if (!is(genes, "character")) { - stop("genes is not a character vector") + stop("`genes` is not a character vector") } - genes <- unique(genes) + genes <- unique(sort(genes)) if (length(genes) < 5) { - stop("too few genes") + stop("input should be at least 5 genes") } if (length(genes) > 5000) { - stop("too many genes") + stop("input should be less than 5,000 genes") } # retrieve markers @@ -39,11 +38,13 @@ clustermole_overlaps <- function(genes, species) { dplyr::select(-dplyr::starts_with("gene")) %>% dplyr::distinct() - # check that input genes overlap species genes + # check that input genes overlap marker genes for a given species all_genes <- unique(markers_tbl$gene) + input_genes <- genes genes <- intersect(genes, all_genes) - if (length(genes) < 5) { - stop("the genes do not appear to correspond to the given species") + if (length(genes) < (length(input_genes) * 0.25)) { + problematic_genes <- setdiff(input_genes, genes) + stop("many input genes are not known (possibly wrong species): ", toString(problematic_genes)) } # run the enrichment analysis diff --git a/tests/testthat/test-overlaps.R b/tests/testthat/test-overlaps.R index 1d52601..82617ba 100644 --- a/tests/testthat/test-overlaps.R +++ b/tests/testthat/test-overlaps.R @@ -9,7 +9,7 @@ gene_names <- sample(gene_names) test_that("clustermole_overlaps() wrong input", { expect_error(clustermole_overlaps(gene_names[1:3], species = "hs")) expect_error(clustermole_overlaps(gene_names[1:10000], species = "hs")) - expect_error(clustermole_overlaps(c(gene_names[1:3], "X", "Y", "Z"), species = "hs")) + expect_error(clustermole_overlaps(c(gene_names[1:3], "Z1", "Z2", "Z3", "Z4"), species = "hs")) expect_error(clustermole_overlaps(as.list(gene_names[1:10]), species = "hs")) })