Skip to content

Commit

Permalink
Merge pull request #471 from krlmlr/f-469-check-key-null
Browse files Browse the repository at this point in the history
- Columns with missing values are no longer primary keys (#469).
  • Loading branch information
krlmlr committed Jan 7, 2021
1 parent be198c6 commit 250e605
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 10 deletions.
4 changes: 2 additions & 2 deletions R/key-helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ is_unique_key <- function(.data, column) {
.data %>%
select(value = !!col_expr) %>%
safe_count(value) %>%
filter(n != 1) %>%
arrange(value) %>%
filter(n != 1 | is.na(value)) %>%
arrange(if_else(is.na(value), 0, 1), value) %>%
utils::head(MAX_COMMAS + 1) %>%
collect() %>%
{
Expand Down
22 changes: 20 additions & 2 deletions R/primary-keys.R
Original file line number Diff line number Diff line change
Expand Up @@ -318,8 +318,26 @@ check_pk <- function(table, column) {
}

fun <- ~ format(.x, trim = TRUE, justify = "none")
values <- commas(duplicate_values$data[[1]]$value, capped = TRUE, fun = fun)
paste0("has duplicate values: ", values)

values <- duplicate_values$data[[1]]$value
values_na <- is.na(values)

if (any(values_na)) {
missing <- "missing values"
values <- values[!values_na]
} else {
missing <- NULL
}

if (length(values) > 0) {
values_text <- commas(values, capped = TRUE, fun = fun)
duplicate <- paste0("duplicate values: ", values_text)
} else {
duplicate <- NULL
}

problem <- glue_collapse(c(missing, duplicate), sep = "", last = ", and ")
paste0("has ", problem)
}


Expand Down
8 changes: 6 additions & 2 deletions tests/testthat/helper-src.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ data_card_5 %<-% tibble::tibble(a = 1:5)
data_card_6 %<-% tibble::tibble(c = 1:4)
data_card_7 %<-% tibble::tibble(c = c(1:5, 5L, 6L))
data_card_8 %<-% tibble::tibble(c = c(1:6))
data_card_9 %<-% tibble::tibble(c = c(1:5, NA))
data_card_10 %<-% tibble::tibble(c = c(1:3, 4:3, NA))

# for check_key() ---------------------------------------------------------

Expand Down Expand Up @@ -287,7 +289,9 @@ dm_test_obj %<-% as_dm(list(
dm_table_1 = data_card_2(),
dm_table_2 = data_card_4(),
dm_table_3 = data_card_7(),
dm_table_4 = data_card_8()
dm_table_4 = data_card_8(),
dm_table_5 = data_card_9(),
dm_table_6 = data_card_10()
))

dm_test_obj_2 %<-% as_dm(list(
Expand All @@ -299,7 +303,7 @@ dm_test_obj_2 %<-% as_dm(list(

# for `dm_nrow()` ---------------------------------------------------------

rows_dm_obj <- 24L
rows_dm_obj <- 36L

# Complicated `dm` --------------------------------------------------------

Expand Down
25 changes: 21 additions & 4 deletions tests/testthat/test-primary-keys.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ test_that("dm_rm_pk() works as intended?", {
expect_dm_error(
dm_test_obj() %>%
dm_add_pk(dm_table_1, a) %>%
dm_rm_pk(dm_table_5),
dm_rm_pk(dm_table_bogus),
class = "table_not_in_dm"
)

Expand Down Expand Up @@ -102,17 +102,34 @@ test_that("dm_enum_pk_candidates() works properly?", {
candidates_table_1 <- tibble(column = c("a", "b"), candidate = c(TRUE, TRUE), why = c("", "")) %>%
rename(columns = column) %>%
mutate(columns = new_keys(columns))
candidates_table_2 <- tibble(column = c("c"), candidate = c(FALSE), why = "has duplicate values: 5") %>%
rename(columns = column) %>%
mutate(columns = new_keys(columns))
expect_identical(
dm_enum_pk_candidates(dm_test_obj(), dm_table_1),
candidates_table_1
)

candidates_table_2 <- tibble(column = c("c"), candidate = c(FALSE), why = "has duplicate values: 5") %>%
rename(columns = column) %>%
mutate(columns = new_keys(columns))
expect_identical(
dm_enum_pk_candidates(dm_test_obj(), dm_table_2),
candidates_table_2
)

candidates_table_3 <- tibble(column = c("c"), candidate = c(FALSE), why = "has missing values") %>%
rename(columns = column) %>%
mutate(columns = new_keys(columns))
expect_identical(
dm_enum_pk_candidates(dm_test_obj(), dm_table_5),
candidates_table_3
)

candidates_table_4 <- tibble(column = c("c"), candidate = c(FALSE), why = "has missing values, and duplicate values: 3") %>%
rename(columns = column) %>%
mutate(columns = new_keys(columns))
expect_identical(
dm_enum_pk_candidates(dm_test_obj(), dm_table_6),
candidates_table_4
)
})

test_that("enum_pk_candidates() works properly", {
Expand Down

0 comments on commit 250e605

Please sign in to comment.