Skip to content

Commit

Permalink
Clean up clustermole_overlaps()
Browse files Browse the repository at this point in the history
  • Loading branch information
igordot committed May 7, 2024
1 parent 048f2ef commit 22099c5
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 9 deletions.
17 changes: 9 additions & 8 deletions R/overlaps.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,16 @@
#' my_overlaps <- clustermole_overlaps(genes = my_genes, species = "hs")
#' head(my_overlaps)
clustermole_overlaps <- function(genes, species) {

# check that the genes vector seems reasonable
if (!is(genes, "character")) {
stop("genes is not a character vector")
stop("`genes` is not a character vector")
}
genes <- unique(genes)
genes <- unique(sort(genes))
if (length(genes) < 5) {
stop("too few genes")
stop("input should be at least 5 genes")
}
if (length(genes) > 5000) {
stop("too many genes")
stop("input should be less than 5,000 genes")
}

# retrieve markers
Expand All @@ -39,11 +38,13 @@ clustermole_overlaps <- function(genes, species) {
dplyr::select(-dplyr::starts_with("gene")) %>%
dplyr::distinct()

# check that input genes overlap species genes
# check that input genes overlap marker genes for a given species
all_genes <- unique(markers_tbl$gene)
input_genes <- genes
genes <- intersect(genes, all_genes)
if (length(genes) < 5) {
stop("the genes do not appear to correspond to the given species")
if (length(genes) < max(3, length(input_genes) * 0.2)) {
problematic_genes <- setdiff(input_genes, genes)
stop("large fraction of input genes are not known (possibly wrong species): ", toString(problematic_genes))
}

# run the enrichment analysis
Expand Down
14 changes: 13 additions & 1 deletion tests/testthat/test-overlaps.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,16 @@ markers_mm_tbl <- clustermole_markers(species = "mm")
gene_names <- unique(markers_hs_tbl$gene)
gene_names <- sample(gene_names)

# generate strings that look like real genes
fake_gene_names <- paste0(sample(LETTERS), sample(LETTERS), sample(LETTERS), sample(1:9, 1000, replace = TRUE))
fake_gene_names <- sample(fake_gene_names)

test_that("clustermole_overlaps() wrong input", {
expect_error(clustermole_overlaps(gene_names[1:3], species = "hs"))
expect_error(clustermole_overlaps(gene_names[1:10000], species = "hs"))
expect_error(clustermole_overlaps(c(gene_names[1:3], "X", "Y", "Z"), species = "hs"))
expect_error(clustermole_overlaps(fake_gene_names[1:100], species = "hs"))
expect_error(clustermole_overlaps(c(gene_names[1:2], fake_gene_names[1:5]), species = "hs"))
expect_error(clustermole_overlaps(c(gene_names[1:5], fake_gene_names[1:25]), species = "hs"))
expect_error(clustermole_overlaps(as.list(gene_names[1:10]), species = "hs"))
})

Expand All @@ -19,6 +25,12 @@ test_that("clustermole_overlaps() human input", {
expect_gt(nrow(overlap_tbl), 1)
})

test_that("clustermole_overlaps() human input with fake genes", {
overlap_tbl <- clustermole_overlaps(genes = c(gene_names[1:10], fake_gene_names[1:20]), species = "hs")
expect_s3_class(overlap_tbl, "tbl_df")
expect_gt(nrow(overlap_tbl), 1)
})

# gene list for mouse overrepresentation tests
gene_names <- unique(markers_mm_tbl$gene)
gene_names <- sample(gene_names)
Expand Down

0 comments on commit 22099c5

Please sign in to comment.