Skip to content

Commit

Permalink
Merge pull request #258 from Crunch-io/cbs_tracking_fixes
Browse files Browse the repository at this point in the history
Fixes for tracking report edge cases
  • Loading branch information
1beb authored Mar 29, 2021
2 parents 5151749 + e213f01 commit 60b5242
Show file tree
Hide file tree
Showing 13 changed files with 324 additions and 70 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ Package: crunchtabs
Type: Package
Title: Custom Report Generation for Crunch Datasets
Description: In order to generate custom survey reports, this package provides
functions for computing 'toplines' (one-way frequency summaries) and
'banners' (cross-tabulations) of datasets in the Crunch
functions for computing 'toplines' (one-way frequency summaries),
'banners' (cross-tabulations) and codebooks of datasets in the Crunch
(<https://crunch.io/>) web service. Reports can be written in 'PDF' format
using 'LaTeX' or in Microsoft Excel '.xlsx' files.
Version: 1.4.0
Version: 1.4.1
Authors@R: c(
person("Persephone", "Tsebelis", role="aut"),
person("Kamil", "Sedrowicz", role="aut"),
Expand Down
14 changes: 14 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
## crunchtabs 1.4.1

- Multiple adjustments for tracking report edge cases
- In a tracking report, if an alias appears only once we now specify that by appending the wave label to the question's note.
- In a tracking report, we now have the option to show a variable only once even if it appears in multiple datasets.

Fixes:

- In some cases there are duplicated row names in different positions when we use cbindFill, we did not account for this possibility.
- When creating a tracking report for a multiple response question, it's possible that one or more of the responses is not included in one or more of the waves, we have added a tryCatch to accomodate this possibility.
- themeNew documentation was missing a closing } that silently broke the display of documentation via ?themeNew.
- When converting MR variables we rename subVars using numbers. If a researcher included an alias with the same number they would collide. Now we assign random strings instead of sequential numbers to the subVars.


## crunchtabs 1.4.0

Features:
Expand Down
98 changes: 62 additions & 36 deletions R/asToplineCategoricalArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,36 +28,6 @@ as.ToplineCategoricalArray <- function(
)
}

categoryFill <- function(clist) {
cbindFill <- function(x, y) {
r <- merge(x, y, by = "row.names", all = TRUE, sort = FALSE)
rownames(r) <- r$Row.names
r$Row.names <- NULL
r
}

addPos <- function(x) {
x[,1] <- 1:nrow(x)
x
}

r <- lapply(clist, addPos)
r <- do.call(rbind, r)
r <- data.frame(nm = names(r[,1]), pos = r[,1])
r <- unique(r)
rownames(r) <- r$nm
r$nm <- NULL

m <- Reduce(function(x,y) suppressWarnings(cbindFill(x,y)), clist)
m <- suppressWarnings(merge(m, r, by = "row.names", all = TRUE, sort = FALSE))
m <- m[with(m, order(pos)),]

rownames(m) <- m$Row.names
m$Row.names <- NULL
m$pos <- NULL
as.matrix(m)
}

counts <- obj$crosstabs$Results$`___total___`$counts
second_label <- attr(counts, "dimnames")[[1]]

Expand Down Expand Up @@ -111,7 +81,14 @@ catArrayToCategoricals <- function(questions, question_alias, labels) {
labels <- paste0("Wave ", seq_along(questions))
}

nms <- paste0(question_alias, seq_along(statements))
# Collisions can occur so we randomize names because people version aliases sometimes
# votefactors with sub vars + votefactors2 :/
# randNames <- function(n) {
# a <- do.call(paste0, replicate(5, sample(LETTERS, n, TRUE), FALSE))
# paste0(a, sprintf("%04d", sample(9999, n, TRUE)), sample(LETTERS, n, TRUE))
# }

nms <- paste0(question_alias, "_", seq_along(statements))

# Create list of objects to fill in, one for each sub statement of the
# multiple response group
Expand All @@ -137,11 +114,13 @@ catArrayToCategoricals <- function(questions, question_alias, labels) {

# Pull out our data
for (i in seq_len(nrow(guide))) {
guide$value[i] <- questions[[
guide$label[i]
]]$crosstabs$Results$`___total___`$proportions[
guide$cat[i], guide$statement[i]
]
guide$value[i] <- tryCatch({
questions[[
guide$label[i]
]]$crosstabs$Results$`___total___`$proportions[
guide$cat[i], guide$statement[i]
]
}, error = function(e) NA_real_)
}

# Pre allocate
Expand Down Expand Up @@ -175,3 +154,50 @@ catArrayToCategoricals <- function(questions, question_alias, labels) {
names(l) <- nms
return(l)
}

#' Merge two data.frames by rownames
#'
#' This function is designed to cbind via rownames where
#' the rownames may not match and then fix the result so that
#' it can be further merged to another data.frame.
#' @param x A data.frame
#' @param y A data.frame
cbindFill <- function(x, y) {
r <- merge(x, y, by = "row.names", all = TRUE, sort = FALSE)
rownames(r) <- r$Row.names
r$Row.names <- NULL
r
}

#' Add position
#'
#' A small utility function to add position to a matrix
#' @param x A matrix
addPos <- function(x) {
x[,1] <- 1:nrow(x)
x
}

#' Column Bind Unequal Matrices
#'
#' This function takes a list of matrices and binds them together into
#' a single frame. Accounts for missing or unequal rows, by rowname.
#'
#' @param clist A list of matrices with rownames
categoryFill <- function(clist) {
r <- lapply(clist, addPos)
r <- do.call(rbind, r)
r <- data.frame(nm = names(r[,1]), pos = r[,1])
r <- r[!duplicated(r$nm),]
rownames(r) <- r$nm
r$nm <- NULL

m <- Reduce(function(x,y) suppressWarnings(cbindFill(x,y)), clist)
m <- suppressWarnings(merge(m, r, by = "row.names", all = TRUE, sort = FALSE))
m <- m[with(m, order(pos)),]

rownames(m) <- m$Row.names
m$Row.names <- NULL
m$pos <- NULL
as.matrix(m)
}
4 changes: 0 additions & 4 deletions R/theme.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#' `themeNew` produces themes for `writeExcel` or `writeLatex`.
#'
#' @section Theme Arguments:
# nolint start
#' \describe{
#' \item{digits}{A numeric. How many digits should the data be rounded to? (In Excel, this is excel styling.) Defaults to 0.}
#' \item{digits_numeric}{A numeric. How many digits should continuous variable data be rounded to? (In Latex, , this is Latex styling.) Defaults to 2.}
Expand Down Expand Up @@ -82,7 +81,6 @@
#' \item{border_color}{In Excel, an optional color. The border color of the relevant cells.}
#' \item{border_left}{In Excel, an optional logical. Should there be a border on the left of the relevant cells? }
#' \item{border_right}{In Excel, an optional logical. Should there be a border on the right of the relevant cells? }
#' \item{border_style}{In Excel, an optional character. The style of the border of the relevant
#' \item{border_style}{In Excel, an optional character. The style of the border of the relevant cells. Valid options are: "dashDot", "dashDotDot", "dashed", "dotted", "double", "hair", "medium", "mediumDashDot", "mediumDashDotDot", "mediumDashed", "none", "slantDashDot", "thick", and "thin".}
#' \item{border_top}{In Excel, an optional logical. Should there be a border on the top of the relevant cells? }
#' \item{decoration}{An optional character vector. Text decorations to be applied to relevant cells. Valid options are: "bold", "italic", "strikeout", "underline", and "underline2".}
Expand Down Expand Up @@ -153,8 +151,6 @@ themeNew <- function(..., default_theme = themeDefaultExcel()) {
}
}

# nolint end

theme <- modifyList(default_theme, dots, keep.null = TRUE)
theme <- theme[union(names(dots), names(default_theme))]

Expand Down
57 changes: 37 additions & 20 deletions R/trackingReports.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ trackingReport <- function(dataset_list, vars, labels = NULL, weight = NULL, sho
rebuilt_results <- list()
class(rebuilt_results) <- c("Toplines", "CrunchTabs")
rebuilt_results$results <- lapply(vars, function(x) NULL)
rebuilt_results$metadata <- tabs[[1]]$metadata
has_meta <- which(!unlist(lapply(lapply(tabs, function(x) x$metadata), is.null)))[1]
rebuilt_results$metadata <- tabs[[has_meta]]$metadata
names(rebuilt_results$results) <- vars
rebuilt_results$banner <- NULL

Expand Down Expand Up @@ -59,26 +60,10 @@ trackingReport <- function(dataset_list, vars, labels = NULL, weight = NULL, sho
}

rebuilt_results$results[[v]]$available_at <- results_available
rebuilt_results <- trackingDeclareAvailability(
rebuilt_results, results_available, var = v, labels
)


# For each alias, we set an attribute that identifies it's availability
# across all the datasets: "all", "partial", and "single"
# - "all" means it is available in every dataset
# - "partial" means it is available in only some datasets
# - "single" means it is available in exactly one dataset

# Because we use subsetting at the list level, "all" and "partial"
# would follow a typical path that labeling was adjusted appropriately
# for presentation in the resulting pdf "single" should act as a simple
# passthrough where no additional formatting or manipulation takes place
# on the result.

# The single case
if (length(results_available) == 1) {
rebuilt_results$results[[v]]$availability <- "single"
} else {
rebuilt_results$results[[v]]$availability <- "general"
}
}

# Now that we have an attribute that identifies availability we can use it as
Expand Down Expand Up @@ -132,7 +117,39 @@ trackingReport <- function(dataset_list, vars, labels = NULL, weight = NULL, sho
rebuilt_results
}

#' Specify question availability in a tracking report
#'
#' For each alias, we set an attribute that identifies it's availability
#' across all the datasets: "general", and "single"
#' - "general" means it is available in only some datasets
#' - "single" means it is available in exactly one dataset
#' Because we use subsetting at the list level, "general" and "single"
#' would follow a typical path that labeling was adjusted appropriately
#' for presentation in the resulting pdf "single" should act as a simple
#' passthrough where no additional formatting or manipulation takes place
#' on the result.
#' @md
#' @param rebuilt_results A list of result objects from crunch
#' @param results_available A vector identifying in which list elements
#' @param var The name of the alias that we are declaring its availability
#' @param labels The wave labels
trackingDeclareAvailability <- function(rebuilt_results, results_available, var, labels) {
if (length(results_available) == 1) {
rebuilt_results$results[[var]]$availability <- "single"
if(rebuilt_results$results[[var]]$notes == "") {
rebuilt_results$results[[var]]$notes <- paste0("Asked in ", labels[results_available])
} else {
rebuilt_results$results[[var]]$notes <- paste0(
rebuilt_results$results[[var]]$notes,
" (Asked in ", labels[results_available], ")")
}
} else {
rebuilt_results$results[[var]]$availability <- "general"
}
return(rebuilt_results)
}


trackingReport_tabs <- function(datasets, vars, weight = NULL) {
lapply(
datasets,
Expand Down
14 changes: 14 additions & 0 deletions man/addPos.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions man/categoryFill.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions man/cbindFill.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/crunchtabs-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 60b5242

Please sign in to comment.