diff --git a/README.Rmd b/README.Rmd index 82b1b01..aebb6bb 100644 --- a/README.Rmd +++ b/README.Rmd @@ -12,16 +12,16 @@ knitr::opts_chunk$set( ) ``` -# Sequential Poisson sampling sps website +# Sequential Poisson sampling sps website [![CRAN status](https://www.r-pkg.org/badges/version/sps)](https://cran.r-project.org/package=sps) [![sps status badge](https://marberts.r-universe.dev/badges/sps)](https://marberts.r-universe.dev) [![Conda Version](https://img.shields.io/conda/vn/conda-forge/r-sps.svg)](https://anaconda.org/conda-forge/r-sps) [![R-CMD-check](https://github.com/marberts/sps/workflows/R-CMD-check/badge.svg)](https://github.com/marberts/sps/actions) -[![codecov](https://codecov.io/gh/marberts/sps/graph/badge.svg?token=5CPGWUF267)](https://codecov.io/gh/marberts/sps) +[![codecov](https://codecov.io/gh/marberts/sps/graph/badge.svg?token=5CPGWUF267)]( https://app.codecov.io/gh/marberts/sps) [![DOI](https://zenodo.org/badge/326323827.svg)](https://zenodo.org/doi/10.5281/zenodo.10109857) -[![Mentioned in Awesome Official Statistics ](https://awesome.re/mentioned-badge.svg)](http://www.awesomeofficialstatistics.org) +[![Mentioned in Awesome Official Statistics ](https://awesome.re/mentioned-badge.svg)](https://github.com/SNStatComp/awesome-official-statistics-software) Sequential Poisson sampling is a variation of Poisson sampling for drawing probability-proportional-to-size samples with a given number of units, and is commonly used for price-index surveys. This package gives functions to draw stratified sequential Poisson samples according to the method by Ohlsson (1998), as well as other order sample designs by Rosén (1997), and generate appropriate bootstrap replicate weights according to the generalized bootstrap method by Beaumont and Patak (2012). diff --git a/README.md b/README.md index e0f9798..83ba6b3 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -# Sequential Poisson sampling sps website +# Sequential Poisson sampling sps website @@ -12,10 +12,10 @@ badge](https://marberts.r-universe.dev/badges/sps)](https://marberts.r-universe. [![Conda Version](https://img.shields.io/conda/vn/conda-forge/r-sps.svg)](https://anaconda.org/conda-forge/r-sps) [![R-CMD-check](https://github.com/marberts/sps/workflows/R-CMD-check/badge.svg)](https://github.com/marberts/sps/actions) -[![codecov](https://codecov.io/gh/marberts/sps/graph/badge.svg?token=5CPGWUF267)](https://codecov.io/gh/marberts/sps) +[![codecov](https://codecov.io/gh/marberts/sps/graph/badge.svg?token=5CPGWUF267)](https://app.codecov.io/gh/marberts/sps) [![DOI](https://zenodo.org/badge/326323827.svg)](https://zenodo.org/doi/10.5281/zenodo.10109857) [![Mentioned in Awesome Official -Statistics](https://awesome.re/mentioned-badge.svg)](http://www.awesomeofficialstatistics.org) +Statistics](https://awesome.re/mentioned-badge.svg)](https://github.com/SNStatComp/awesome-official-statistics-software) Sequential Poisson sampling is a variation of Poisson sampling for @@ -61,11 +61,11 @@ revenue <- c(1:10, 100, 150) # Draw a sample of 6 businesses (samp <- sps(revenue, 6)) -#> [1] 2 6 7 10 11 12 +#> [1] 4 8 9 10 11 12 # Design weights and sampling strata are stored with the sample weights(samp) -#> [1] 6.875000 2.291667 1.964286 1.375000 1.000000 1.000000 +#> [1] 3.437500 1.718750 1.527778 1.375000 1.000000 1.000000 levels(samp) #> [1] "TS" "TS" "TS" "TS" "TA" "TA" ``` @@ -85,10 +85,10 @@ stratum <- rep(c("a", "b"), c(9, 3)) # Draw a stratified sample (samp <- sps(revenue, allocation, stratum)) -#> [1] 7 8 9 10 11 12 +#> [1] 5 6 9 10 11 12 weights(samp) -#> [1] 2.142857 1.875000 1.666667 1.000000 1.000000 1.000000 +#> [1] 3.000000 2.500000 1.666667 1.000000 1.000000 1.000000 levels(samp) #> [1] "TS" "TS" "TS" "TA" "TA" "TA" ``` @@ -98,13 +98,13 @@ replicate weights with the `sps_repweights()` function. ``` r sps_repweights(weights(samp), 5, tau = 2) -#> [,1] [,2] [,3] [,4] [,5] -#> [1,] 2.214286 2.214286 1.714286 1.142857 3.285714 -#> [2,] 1.812500 2.750000 1.812500 0.875000 0.875000 -#> [3,] 1.500000 1.500000 1.166667 1.500000 2.000000 -#> [4,] 1.000000 1.000000 1.000000 1.000000 1.000000 -#> [5,] 1.000000 1.000000 1.000000 1.000000 1.000000 -#> [6,] 1.000000 1.000000 1.000000 1.000000 1.000000 +#> [,1] [,2] [,3] [,4] [,5] +#> [1,] 3.0000000 1.50 1.5000000 4.500000 1.500000 +#> [2,] 2.2500000 2.25 3.5000000 1.500000 1.500000 +#> [3,] 0.6666667 1.50 0.6666667 2.333333 2.333333 +#> [4,] 1.0000000 1.00 1.0000000 1.000000 1.000000 +#> [5,] 1.0000000 1.00 1.0000000 1.000000 1.000000 +#> [6,] 1.0000000 1.00 1.0000000 1.000000 1.000000 #> attr(,"tau") #> [1] 2 ``` diff --git a/vignettes/sps.Rmd b/vignettes/sps.Rmd index a89b89f..56d7a09 100644 --- a/vignettes/sps.Rmd +++ b/vignettes/sps.Rmd @@ -188,3 +188,31 @@ sampling_distribution <- replicate(1000, { summary(sampling_distribution / sum(sales) - 1) ``` + +More generally, the distribution of inclusion probabilities +is usually close to what is expected if sequential Poisson sampling was exactly proportional to size.^[See Tillé, Y. (2023). Remarks on some misconceptions about unequal probability sampling without replacement. *Computer Science Review*, 47, 100533.] + +```{r tille, fig.width=8, fig.height=5.33} +set.seed(123456) +n <- 5e3 +frame1 <- subset(frame, region == 1) + +pi_est <- tabulate( + replicate(n, sps(frame1$revenue, allocation[1])), + nbins = nrow(frame1) +) / n + +pi <- inclusion_prob(frame1$revenue, allocation[1]) + +dist <- (pi_est - pi) / sqrt(pi * (1 - pi) / n) + +plot( + density(dist, na.rm = TRUE), + ylim = c(0, 0.5), xlim = c(-4, 4), + ylab = "", xlab = "", + main = "Empirical distribution of inclusion probabilities" +) +lines(seq(-4, 4, 0.1), dnorm(seq(-4, 4, 0.1)), lty = "dashed") +legend("topright", c("empirical", "theoretical"), lty = c("solid", "dashed")) +``` +