Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
LudvigOlsen committed Nov 11, 2020
2 parents 98b6234 + 279bf0e commit 6b2ab2d
Show file tree
Hide file tree
Showing 34 changed files with 1,687 additions and 216 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ matrix:
before_script: Rscript -e "remotes::install_github('ludvigolsen/xpectr')"
- r: release
name: multiple-devel
before_script: Rscript -e "remotes::install_github(c('ludvigolsen/xpectr', 'r-lib/vctrs', 'tidyverse/dplyr', 'tidyverse/tidyr', 'tidyverse/tibble'), dependencies = TRUE)"
before_script: Rscript -e "remotes::install_github(c('ludvigolsen/xpectr', 'tidyverse/dplyr', 'tidyverse/tidyr', 'tidyverse/tibble'), dependencies = TRUE)"
- r: release
os: osx
name: release osx
Expand Down
2 changes: 0 additions & 2 deletions CRAN-RELEASE

This file was deleted.

3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: rearrr
Title: Rearranging Data
Version: 0.1.0
Version: 0.1.0.9000
Authors@R:
c(person(given = "Ludvig Renbo",
family = "Olsen",
Expand All @@ -23,6 +23,7 @@ Imports:
purrr (>= 0.3.4),
rlang (>= 0.4.7),
stats,
tibble,
utils
Suggests:
covr,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ export(rotate_2d)
export(rotate_3d)
export(shear_2d)
export(shear_3d)
export(shuffle_hierarchy)
export(square)
export(swirl_2d)
export(swirl_3d)
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@

# rearrr 0.2.0

* Adds `shuffle_hierarchy()` for shuffling a multi-column hierarchy of groups. (Rearranger)

* Adds optional recursion to `pair_extremes()`. This adds the `num_pairings` and `balance` arguments. Now returns one additional sorting factor per `num_pairing` level.

# rearrr 0.1.0

Note: Multiple of the new functions also have `*_vec()` versions that take and return vectors. The same can *usually* be achieved with the regular versions, but these wrappers make it easier and less verbose.
Expand Down
15 changes: 12 additions & 3 deletions R/pair_extremes.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@
#' # Shuffle the order of the pairs
#' pair_extremes(df, col = "A", shuffle_pairs = TRUE)
#'
#' # Use recursive pairing
#' pair_extremes(df, col = "A", num_pairings = 2)
#'
#' # Grouped by G
#' df %>%
#' dplyr::select(G, A) %>% # For clarity
Expand All @@ -100,16 +103,18 @@
pair_extremes <- function(data,
col = NULL,
unequal_method = "middle",
# num_pairings = 1, # TODO
num_pairings = 1,
balance = "mean",
shuffle_members = FALSE,
shuffle_pairs = FALSE,
factor_name = ".pair",
factor_name = ifelse(num_pairings == 1, ".pair", ".pairing"),
overwrite = FALSE) {
extreme_pairing_rearranger_(
data = data,
col = col,
unequal_method = unequal_method,
num_pairings = 1,
num_pairings = num_pairings,
balance = balance,
shuffle_members = shuffle_members,
shuffle_pairs = shuffle_pairs,
factor_name = factor_name,
Expand All @@ -121,13 +126,17 @@ pair_extremes <- function(data,
#' @export
pair_extremes_vec <- function(data,
unequal_method = "middle",
num_pairings = 1,
balance = "mean",
shuffle_members = FALSE,
shuffle_pairs = FALSE){
checkmate::assert(checkmate::check_vector(data, strict = TRUE),
checkmate::check_factor(data))
pair_extremes(
data = data,
unequal_method = unequal_method,
num_pairings = num_pairings,
balance = balance,
shuffle_members = shuffle_members,
shuffle_pairs = shuffle_pairs,
factor_name = NULL,
Expand Down
26 changes: 16 additions & 10 deletions R/rearrange_factors.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,25 @@ create_rearrange_factor_pair_extremes_ <- function(size, unequal_method = "middl
if (size == 1) {
return(1)
}

half_size <- floor(size / 2)
idx <- seq_len(half_size)

if (half_size * 2 == size) {
return(c(idx, rev(idx)))
} else {
if (unequal_method == "middle") {
middle <- ceiling((half_size / 2)) + 1
idx <- ifelse(idx >= middle, idx + 1, idx)
return(c(idx, middle, rev(idx)))
} else if (unequal_method == "first") {
return(c(1, c(idx, rev(idx)) + 1))
} else if (unequal_method == "last") {
return(c(c(idx, rev(idx)), max(idx) + 1))
}
}

if (unequal_method == "middle") {
middle <- ceiling(half_size / 2) + 1
idx <- ifelse(idx >= middle, idx + 1, idx)
return(c(idx, middle, rev(idx)))
}

if (unequal_method == "first") {
return(c(1, c(idx, rev(idx)) + 1))
}

if (unequal_method == "last") {
return(c(c(idx, rev(idx)), max(idx) + 1))
}
}
174 changes: 149 additions & 25 deletions R/rearrange_methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -320,10 +320,12 @@ rearrange_by_distance <- function(data,


# TODO Add aggregate_fn for recursive pairings
rearrange_pair_extremes <- function(data, cols,
rearrange_pair_extremes <- function(data,
cols,
overwrite,
unequal_method,
num_pairings,
balance,
shuffle_members,
shuffle_pairs,
factor_name,
Expand All @@ -335,43 +337,165 @@ rearrange_pair_extremes <- function(data, cols,
return(data)
}

# Order data frame
data <- data[order(data[[col]]), , drop = FALSE]
equal_nrows <- nrow(data) %% 2 == 0

# Prepare factor names for output
factor_names <- factor_name
if (!is.null(factor_names) && num_pairings > 1){
factor_names <- paste0(factor_name, "_", seq_len(num_pairings))
}

## First extreme pairing

# Create
local_tmp_rearrange_var <- create_tmp_var(data, ".tmp_rearrange_col")
data[[local_tmp_rearrange_var]] <-
create_rearrange_factor_pair_extremes_(
size = nrow(data), unequal_method = unequal_method
)
# Create num_pairings tmp column names
tmp_rearrange_vars <- purrr::map(.x = seq_len(num_pairings), .f = ~ {
create_tmp_var(data, paste0(".tmp_rearrange_col_", .x))
}) %>% unlist(recursive = TRUE)

# Pair the extreme values and order by the pairs
data <- order_and_pair_extremes_(
data = data,
col = col,
new_col = tmp_rearrange_vars[[1]],
unequal_method = unequal_method
)

# Order data by the pairs
data <- order_by_group(
data = data,
group_col = local_tmp_rearrange_var,
shuffle_members = shuffle_members,
shuffle_pairs = shuffle_pairs
group_col = tmp_rearrange_vars[[1]],
shuffle_members = FALSE, # Done at the end
shuffle_pairs = FALSE # Done at the end
)

# TODO Perform recursive pairing

# if (num_pairings > 1){
#
#
#
# }
if (num_pairings > 1){

# Get environment so we can update `data`
pairing_env <- environment()

# Perform num_pairings-1 pairings
plyr::l_ply(seq_len(num_pairings - 1), function(i) {

# Note that `tmp_rearrange_vars[[i]]` is for the previous level
# and `tmp_rearrange_vars[[i + 1]]` is for the current level

# What to balance ("mean", "spread", "min", or "max")
if (length(balance) > 1) {
current_balance_target <- balance[[i]]
} else {
current_balance_target <- balance
}

# Define function to summarize with
if (current_balance_target == "mean") {
summ_fn <- sum
} else if (current_balance_target == "spread") {
summ_fn <- function(v) {
sum(abs(diff(v)))
}
} else if (current_balance_target == "min") {
summ_fn <- min
} else if (current_balance_target == "max") {
summ_fn <- max
}

# Aggregate values for pairs from previous pairing
tmp_group_scores <- data %>%
dplyr::group_by(!!as.name(tmp_rearrange_vars[[i]])) %>%
dplyr::summarize(group_aggr = summ_fn(!!as.name(col)))

if (!equal_nrows & unequal_method == "first") {

# Reorder with first group always first
# (otherwise doesn't work with negative numbers)
tmp_group_scores_sorted <- tmp_group_scores %>%
dplyr::filter(dplyr::row_number() == 1) %>%
dplyr::bind_rows(
tmp_group_scores %>%
dplyr::filter(dplyr::row_number() != 1) %>%
dplyr::arrange(.data$group_aggr))
} else {
tmp_group_scores_sorted <- tmp_group_scores %>%
dplyr::arrange(.data$group_aggr)
}

# Pair the extreme pairs and order by the new pairs
tmp_rearrange <- order_and_pair_extremes_(
data = tmp_group_scores_sorted,
col = "group_aggr",
new_col = tmp_rearrange_vars[[i + 1]],
unequal_method = unequal_method
) %>%
base_select_(cols = c(tmp_rearrange_vars[[i]], tmp_rearrange_vars[[i + 1]]))

# Add the new pair identifiers to `data`
data <- data %>%
dplyr::left_join(tmp_rearrange, by = tmp_rearrange_vars[[i]])

# Order data by the pairs
data <- order_by_group(
data = data,
group_col = tmp_rearrange_vars[[i + 1]],
shuffle_members = FALSE,
shuffle_pairs = FALSE
)

# TODO Make this work for num_pairings factors
# Update `data` in parent environment
pairing_env[["data"]] <- data

# Add rearrange factor
data <- add_info_col_(
data = base_deselect_(data, cols = local_tmp_rearrange_var),
nm = factor_name,
content = as.factor(data[[local_tmp_rearrange_var]]),
overwrite = overwrite
)
})
}

# Find columns to shuffle
shuffling_group_cols <- rev(tmp_rearrange_vars)
cols_to_shuffle <- c()
if (isTRUE(shuffle_members))
cols_to_shuffle <- shuffling_group_cols
if (isTRUE(shuffle_pairs)) {
shuffling_group_cols <- c(shuffling_group_cols, col)
cols_to_shuffle <- c(cols_to_shuffle, col)
}

# Shuffle members and/or pairs if specified
if (length(cols_to_shuffle) > 0) {
data <- dplyr::ungroup(data) %>%
shuffle_hierarchy(group_cols = shuffling_group_cols,
cols_to_shuffle = cols_to_shuffle,
leaf_has_groups = !shuffle_members)
}

if (!is.null(factor_names)){
# Convert to factors and give correct names
rearrange_factors <- as.list(data[, tmp_rearrange_vars, drop=FALSE])
rearrange_factors <- purrr::map(rearrange_factors, .f = ~{as.factor(.x)})
names(rearrange_factors) <- factor_names

# Remove tmp vars and add factors
data <- base_deselect_(data, cols = tmp_rearrange_vars) %>%
add_dimensions_(new_vectors = rearrange_factors,
overwrite = overwrite)
} else {
# If names are NULL, we just remove the tmp columns
data <- base_deselect_(data, cols = tmp_rearrange_vars)
}

data
}

# Wrapper for running extreme pairing and ordering data by it
order_and_pair_extremes_ <- function(data, col, new_col, unequal_method){

# Order data frame
data <- data[order(data[[col]]), , drop = FALSE]

# Add rearrange factor (of type integer)
data[[new_col]] <-
create_rearrange_factor_pair_extremes_(
size = nrow(data),
unequal_method = unequal_method
)

data
}
Loading

0 comments on commit 6b2ab2d

Please sign in to comment.