Merge pull request #160 from bschneidr/main

Fix #159, expand tests accordingly
gergness · Jul 14, 2024 · 5aea29a · 5aea29a
2 parents 1917f75 + 8324249
commit 5aea29a
Show file tree

Hide file tree

Showing 2 changed files with 51 additions and 4 deletions.
diff --git a/R/subset_svy_vars.R b/R/subset_svy_vars.R
@@ -12,7 +12,11 @@ subset_svy_vars.survey.design2 <- function(x, ..., .preserve = FALSE) {
   if (is.calibrated(x) || is.pps(x)){
     ## Set weights to zero: no memory saving possible
     ## Will always be numeric because srvyr's construction
-    x$prob[-row_numbers] <- Inf
+    if (length(row_numbers) == 0) {
+      x$prob <- rep(Inf, length(x$prob))
+    } else {
+      x$prob[-row_numbers] <- Inf
+    }
 
     index <- is.finite(x$prob)
     psu <- !duplicated(x$cluster[index, 1])
@@ -68,9 +72,18 @@ subset_svy_vars.twophase2 <- function(x, ..., .preserve = FALSE) {
 
   ## Set weights to zero:  don't try to save memory
   ## Will always have numeric because of srvyr's structure
-  x$prob[-row_numbers] <- Inf
-  x$phase2$prob[-row_numbers] <- Inf
-  x$dcheck <- lapply(x$dcheck, function(m) {m[-row_numbers, -row_numbers] <- 0; m})
+  if (length(row_numbers) == 0) {
+    x$prob <- rep(Inf, length(x$prob))
+    x$phase2$prob <- rep(Inf, length(x$phase2$prob))
+    x$dcheck <- lapply(x$dcheck, function(m) {
+      m[seq_len(nrow(m)), seq_len(ncol(m))] <- 0
+      m
+    })
+  } else {
+    x$prob[-row_numbers] <- Inf
+    x$phase2$prob[-row_numbers] <- Inf
+    x$dcheck <- lapply(x$dcheck, function(m) {m[-row_numbers, -row_numbers] <- 0; m})
+  }
 
   index <- is.finite(x$prob)
   psu <- !duplicated(x$phase2$cluster[index, 1])

diff --git a/tests/testthat/test_survey_statistics.r b/tests/testthat/test_survey_statistics.r
@@ -729,6 +729,8 @@ test_that("unweighted allows named arguments",  {
 
 test_that(
   "unweighted works with filtered data in calibrated or PPS designs", {
+
+    # First check for calibrated designs
     data(api, package = "survey")
     dclus1 <- as_survey_design(apiclus1, id = dnum, weights = pw, fpc = fpc)
 
@@ -739,6 +741,7 @@ test_that(
                          sample.margins = list(~stype,~sch.wide),
                          population.margins = list(pop.types, pop.schwide))
 
+    # Check when filtering returns at least one row
     out_calib <- raked_design %>%
       filter(sch.wide == "Yes") %>%
       group_by(stype) %>%
@@ -752,11 +755,28 @@ test_that(
     expect_equal(out_calib[['sample_size']],
                  out_noncalib[['sample_size']])
 
+    # Check when filtering returns zero rows
+
+    out_calib <- raked_design %>%
+      filter(sch.wide == "Fake Category") %>%
+      summarize(sample_size = unweighted(n()))
+
+    out_noncalib <- dclus1 %>%
+      filter(sch.wide == "Fake Category") %>%
+      summarize(sample_size = unweighted(n()))
+
+    expect_equal(out_calib[['sample_size']],
+                 expected = 0)
+    expect_equal(out_noncalib[['sample_size']],
+                 expected = 0)
+
+    # Next check for PPS design
     data(election, package = "survey")
 
     non_pps_design <- as_survey_design(election_pps, id = 1)
     pps_design <- as_survey_design(election_pps, id = 1, fpc = p, pps = "brewer")
 
+    # Check correct results when filtering returns at least one row
     out_nonpps <- non_pps_design %>%
       filter(County == "Los Angeles") %>%
       summarize(n_rows = unweighted(n()))
@@ -767,6 +787,20 @@ test_that(
 
     expect_equal(out_pps[['n_rows']],
                  out_nonpps[['n_rows']])
+
+    # Check correct results when filtering returns zero rows
+    out_nonpps <- non_pps_design %>%
+      filter(County == "Fake Category") %>%
+      summarize(n_rows = unweighted(n()))
+
+    out_pps <- pps_design %>%
+      filter(County == "Fake Category") %>%
+      summarize(n_rows = unweighted(n()))
+
+    expect_equal(out_pps[['n_rows']],
+                 expected = 0)
+    expect_equal(out_nonpps[['n_rows']],
+                 expected = 0)
   }
 )