From 649ba287f7f8ff7faa0992fba276ebee1d22354f Mon Sep 17 00:00:00 2001
From: Shantanu Singh <shsingh@broadinstitute.org>
Date: Wed, 13 Sep 2017 12:41:24 -0400
Subject: [PATCH] Issues/98 (#99)

* edits based on lintr checks

* changes based on lintr

* suppressMessages when loading magrittr

* Don't print logs
---
 R/correlation_threshold.R                   |  2 +-
 R/count_na_rows.R                           |  1 -
 R/drop_na_columns.R                         |  8 ++---
 R/drop_na_rows.R                            | 14 ++++----
 R/generalized_log.R                         |  4 +--
 R/normalize.R                               |  7 ++--
 R/replicate_correlation.R                   |  6 ++--
 R/transform.R                               |  3 +-
 R/variable_importance.R                     |  7 ++--
 R/variable_select.R                         |  4 ++-
 R/variance_threshold.R                      |  3 +-
 docs/articles/cytominer-pipeline.html       |  2 +-
 docs/reference/correlation_threshold.html   |  2 +-
 docs/reference/normalize.html               |  2 +-
 docs/reference/variable_select.html         |  6 ++--
 man/correlation_threshold.Rd                |  2 +-
 man/normalize.Rd                            |  2 +-
 man/variable_select.Rd                      |  4 ++-
 tests/testthat/test-aggregate.R             |  7 ++--
 tests/testthat/test-correlation_threshold.R |  5 +--
 tests/testthat/test-count_na_rows.R         |  4 +--
 tests/testthat/test-cytominer.R             | 38 +++++++++++----------
 tests/testthat/test-drop_na_columns.R       |  4 +--
 tests/testthat/test-drop_na_rows.R          | 20 +++++------
 tests/testthat/test-generalized_log.R       | 14 ++++----
 tests/testthat/test-normalize.R             | 24 ++++++-------
 tests/testthat/test-replicate_correlation.R | 22 ++++++------
 tests/testthat/test-variance_threshold.R    |  7 ++--
 28 files changed, 119 insertions(+), 105 deletions(-)

diff --git a/R/correlation_threshold.R b/R/correlation_threshold.R
index c236e37..41392a7 100644
--- a/R/correlation_threshold.R
+++ b/R/correlation_threshold.R
@@ -16,7 +16,7 @@
 #' 
 #' @examples
 #' 
-#' library(magrittr)
+#' suppressMessages(suppressWarnings(library(magrittr)))
 #' sample <- tibble::data_frame(
 #'    x = rnorm(30),
 #'    y = rnorm(30)/1000
diff --git a/R/count_na_rows.R b/R/count_na_rows.R
index 73e820f..240d39f 100644
--- a/R/count_na_rows.R
+++ b/R/count_na_rows.R
@@ -29,5 +29,4 @@ count_na_rows <- function(population, variables) {
     dplyr::summarize_at(variables, dplyr::funs(sum)) %>%
     dplyr::collect() %>%
     data.frame()
-  
 }
diff --git a/R/drop_na_columns.R b/R/drop_na_columns.R
index 93c079b..6f998a9 100644
--- a/R/drop_na_columns.R
+++ b/R/drop_na_columns.R
@@ -25,7 +25,7 @@
 #' @export
 drop_na_columns <- function(population, variables, cutoff = 0.05) {
   cutoff <- rlang::enquo(cutoff)
-  
+
   nrows <-
     population %>%
     dplyr::tally() %>%
@@ -33,9 +33,9 @@ drop_na_columns <- function(population, variables, cutoff = 0.05) {
     magrittr::extract2("n")
 
   count <- rlang::sym("count")
-  
+
   feature <- rlang::sym("feature")
-  
+
   percent <- rlang::sym("percent")
 
   population %>%
@@ -44,6 +44,6 @@ drop_na_columns <- function(population, variables, cutoff = 0.05) {
     dplyr::collect() %>%
     tidyr::gather(!!feature, !!count, !!!variables) %>%
     dplyr::mutate(!!percent := (!!count) / nrows) %>%
-    dplyr::filter((!!percent) > (!!cutoff)) %>%
+    dplyr::filter( (!!percent) > (!!cutoff)) %>%
     magrittr::extract2("feature")
 }
diff --git a/R/drop_na_rows.R b/R/drop_na_rows.R
index d6cb404..d9de4ce 100644
--- a/R/drop_na_rows.R
+++ b/R/drop_na_rows.R
@@ -25,9 +25,9 @@
 drop_na_rows <- function(population, variables) {
 
   key <- rlang::sym("key")
-  
+
   value <- rlang::sym("value")
-  
+
   rowname_temp <- rlang::sym("rowname_temp")
 
   if (is.data.frame(population)) {
@@ -37,13 +37,13 @@ drop_na_rows <- function(population, variables) {
       dplyr::filter(!is.na(!!value)) %>%
       tidyr::spread(!!key, !!value) %>%
       dplyr::select(-!!rowname_temp)
-    
-  } else { 
-    
+
+  } else {
+
     # Coalesce() must have at least 2 arguments.
-    if(length(variables) == 1)
+    if (length(variables) == 1)
       variables <- c(variables, variables)
-    
+
     population %>%
       dplyr::filter_(.dots =
           sprintf("!is.null(coalesce(%s))",
diff --git a/R/generalized_log.R b/R/generalized_log.R
index a24663f..ee8aa78 100644
--- a/R/generalized_log.R
+++ b/R/generalized_log.R
@@ -27,9 +27,9 @@ generalized_log <- function(population, variables, offset = 1) {
     x <- rlang::sym(variable)
 
     population %<>%
-      dplyr::mutate(!!x := log( ((!!x) + ((!!x) ^ 2 + (!!offset) ^ 2) ^ 0.5 ) / 2))
+      dplyr::mutate(!!x :=
+                      log( ( (!!x) + ( (!!x) ^ 2 + (!!offset) ^ 2) ^ 0.5 ) / 2))
   }
 
   population
 }
-
diff --git a/R/normalize.R b/R/normalize.R
index 10ca94d..4877c13 100644
--- a/R/normalize.R
+++ b/R/normalize.R
@@ -17,7 +17,7 @@
 #' @importFrom stats cor mad median sd setNames
 #'
 #' @examples
-#' library(magrittr)
+#' suppressMessages(suppressWarnings(library(magrittr)))
 #' population <- tibble::data_frame(
 #'    Metadata_group = c("control", "control", "control", "control",
 #'                       "experiment", "experiment", "experiment", "experiment"),
@@ -30,7 +30,8 @@
 #' cytominer::normalize(population, variables, strata, sample, operation = "standardize")
 #' 
 #' @export
-normalize <- function(population, variables, strata, sample, operation = "standardize", ...) {
+normalize <- function(population, variables, strata, sample,
+                      operation = "standardize", ...) {
   scale <- function(data, location, dispersion, variables) {
     if (is.data.frame(data)) {
       futile.logger::flog.debug(paste0("\t\tUsing base::scale (data is ",
@@ -59,7 +60,7 @@ normalize <- function(population, variables, strata, sample, operation = "standa
         s <- dispersion[[variable]]
 
         data %<>%
-          dplyr::mutate(!!x := ((!!x) - m) / s )
+          dplyr::mutate(!!x := ( (!!x) - m) / s )
 
       }
 
diff --git a/R/replicate_correlation.R b/R/replicate_correlation.R
index 4f72ed8..a2d5970 100644
--- a/R/replicate_correlation.R
+++ b/R/replicate_correlation.R
@@ -82,8 +82,7 @@ replicate_correlation <-
       strata <- c(strata, replicate_by)
     }
 
-    foreach::foreach(variable = variables, .combine = rbind) %dopar%
-    {
+    foreach::foreach(variable = variables, .combine = rbind) %dopar% {
 
       sample %>%
         split(.[split_by]) %>%
@@ -94,7 +93,8 @@ replicate_correlation <-
                 dplyr::arrange_(.dots = strata) %>%
                 dplyr::select_(.dots = c(strata, variable, replicate_by)) %>%
                 tidyr::spread_(replicate_by, variable) %>%
-                dplyr::select_(~-dplyr::one_of(setdiff(strata, replicate_by))) %>%
+                dplyr::select_(~-dplyr::one_of(setdiff(strata,
+                                                       replicate_by))) %>%
                 stats::cor()
               median(correlation_matrix[upper.tri(correlation_matrix)])
             }) %>%
diff --git a/R/transform.R b/R/transform.R
index 627af0c..a79146d 100644
--- a/R/transform.R
+++ b/R/transform.R
@@ -20,7 +20,8 @@
 #' @importFrom magrittr %>%
 #' @importFrom magrittr %<>%
 #' @export
-transform <- function(population, variables, operation = "generalized_log", ...) {
+transform <- function(population, variables,
+                      operation = "generalized_log", ...) {
   if (operation == "generalized_log") {
     generalized_log(population, variables, ...)
   } else {
diff --git a/R/variable_importance.R b/R/variable_importance.R
index d20a503..717f096 100644
--- a/R/variable_importance.R
+++ b/R/variable_importance.R
@@ -45,17 +45,18 @@
 #' @importFrom magrittr %>%
 #' @importFrom magrittr %<>%
 #' @export
-variable_importance <- function(sample, variables, operation = "replicate_correlation", ...) {
+variable_importance <- function(sample, variables,
+                                operation = "replicate_correlation", ...) {
   if (operation == "replicate_correlation") {
    importance <- replicate_correlation(sample, variables, ...)
-   
+
   } else {
     error <- paste0("undefined operation `", operation, "'")
 
     futile.logger::flog.error(msg = error)
 
     stop(error)
-    
+
   }
 
   importance
diff --git a/R/variable_select.R b/R/variable_select.R
index 4d4be6f..4ed5666 100644
--- a/R/variable_select.R
+++ b/R/variable_select.R
@@ -15,7 +15,7 @@
 #' # In this example, we use `correlation_threshold` as the operation for 
 #' # variable selection.
 #' 
-#' library(magrittr)
+#' suppressMessages(suppressWarnings(library(magrittr)))
 #' population <- tibble::data_frame(
 #'    x = rnorm(100),
 #'    y = rnorm(100)/1000
@@ -35,6 +35,8 @@
 #' 
 #' head(population)
 #' 
+#' futile.logger::flog.threshold(futile.logger::ERROR)
+#' 
 #' variable_select(population, variables, sample, operation) %>% head()
 #'
 #' @importFrom magrittr %>%
diff --git a/R/variance_threshold.R b/R/variance_threshold.R
index 1985134..a22d8c1 100644
--- a/R/variance_threshold.R
+++ b/R/variance_threshold.R
@@ -35,7 +35,8 @@ variance_threshold <- function(variables, sample) {
 
     lunique <- apply(x, 2, function(data) length(unique(data[!is.na(data)])))
 
-    which((ratio > 19 & (100 * lunique / apply(x, 2, length)) <= 10) | (lunique == 1) | apply(x, 2, function(data) all(is.na(data))))
+    which( (ratio > 19 & (100 * lunique / apply(x, 2, length)) <= 10) |
+            (lunique == 1) | apply(x, 2, function(data) all(is.na(data))))
   }
 
   excluded_indexes <-
diff --git a/docs/articles/cytominer-pipeline.html b/docs/articles/cytominer-pipeline.html
index 7e2133e..42f84ed 100644
--- a/docs/articles/cytominer-pipeline.html
+++ b/docs/articles/cytominer-pipeline.html
@@ -75,7 +75,7 @@
       <h1>Introduction to cytominer</h1>
                         <h4 class="author">Allen Goodman and Shantanu Singh</h4>
             
-            <h4 class="date">2017-09-12</h4>
+            <h4 class="date">2017-09-13</h4>
           </div>
 
     
diff --git a/docs/reference/correlation_threshold.html b/docs/reference/correlation_threshold.html
index e56ae9b..63dfea3 100644
--- a/docs/reference/correlation_threshold.html
+++ b/docs/reference/correlation_threshold.html
@@ -136,7 +136,7 @@ <h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details
 
     <h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examples</h2>
     <pre class="examples"><div class='input'>
-<span class='fu'>library</span>(<span class='no'>magrittr</span>)
+<span class='fu'>suppressMessages</span>(<span class='fu'>suppressWarnings</span>(<span class='fu'>library</span>(<span class='no'>magrittr</span>)))
 <span class='no'>sample</span> <span class='kw'>&lt;-</span> <span class='kw pkg'>tibble</span><span class='kw ns'>::</span><span class='fu'><a href='http://www.rdocumentation.org/packages/tibble/topics/tibble'>data_frame</a></span>(
    <span class='kw'>x</span> <span class='kw'>=</span> <span class='fu'>rnorm</span>(<span class='fl'>30</span>),
    <span class='kw'>y</span> <span class='kw'>=</span> <span class='fu'>rnorm</span>(<span class='fl'>30</span>)/<span class='fl'>1000</span>
diff --git a/docs/reference/normalize.html b/docs/reference/normalize.html
index 6c628f6..8dd1e97 100644
--- a/docs/reference/normalize.html
+++ b/docs/reference/normalize.html
@@ -140,7 +140,7 @@ <h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
     
 
     <h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examples</h2>
-    <pre class="examples"><div class='input'><span class='fu'>library</span>(<span class='no'>magrittr</span>)
+    <pre class="examples"><div class='input'><span class='fu'>suppressMessages</span>(<span class='fu'>suppressWarnings</span>(<span class='fu'>library</span>(<span class='no'>magrittr</span>)))
 <span class='no'>population</span> <span class='kw'>&lt;-</span> <span class='kw pkg'>tibble</span><span class='kw ns'>::</span><span class='fu'><a href='http://www.rdocumentation.org/packages/tibble/topics/tibble'>data_frame</a></span>(
    <span class='kw'>Metadata_group</span> <span class='kw'>=</span> <span class='fu'>c</span>(<span class='st'>"control"</span>, <span class='st'>"control"</span>, <span class='st'>"control"</span>, <span class='st'>"control"</span>,
                       <span class='st'>"experiment"</span>, <span class='st'>"experiment"</span>, <span class='st'>"experiment"</span>, <span class='st'>"experiment"</span>),
diff --git a/docs/reference/variable_select.html b/docs/reference/variable_select.html
index 6976a69..3a8b330 100644
--- a/docs/reference/variable_select.html
+++ b/docs/reference/variable_select.html
@@ -140,7 +140,7 @@ <h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examp
 <span class='co'># In this example, we use `correlation_threshold` as the operation for </span>
 <span class='co'># variable selection.</span>
 
-<span class='fu'>library</span>(<span class='no'>magrittr</span>)
+<span class='fu'>suppressMessages</span>(<span class='fu'>suppressWarnings</span>(<span class='fu'>library</span>(<span class='no'>magrittr</span>)))
 <span class='no'>population</span> <span class='kw'>&lt;-</span> <span class='kw pkg'>tibble</span><span class='kw ns'>::</span><span class='fu'><a href='http://www.rdocumentation.org/packages/tibble/topics/tibble'>data_frame</a></span>(
    <span class='kw'>x</span> <span class='kw'>=</span> <span class='fu'>rnorm</span>(<span class='fl'>100</span>),
    <span class='kw'>y</span> <span class='kw'>=</span> <span class='fu'>rnorm</span>(<span class='fl'>100</span>)/<span class='fl'>1000</span>
@@ -169,8 +169,8 @@ <h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examp
 #&gt; 4 -1.0185754  0.0032410399 -0.8885555
 #&gt; 5 -1.0717912 -0.0004168576 -0.8424833
 #&gt; 6  0.3035286  0.0002982276  0.4582867</div><div class='input'>
-<span class='fu'>variable_select</span>(<span class='no'>population</span>, <span class='no'>variables</span>, <span class='no'>sample</span>, <span class='no'>operation</span>) <span class='kw'>%&gt;%</span> <span class='fu'>head</span>()</div><div class='output co'>#&gt; INFO [2017-09-12 11:16:42] excluded:
-#&gt; INFO [2017-09-12 11:16:42] 	 x</div><div class='output co'>#&gt; # A tibble: 6 x 2
+<span class='kw pkg'>futile.logger</span><span class='kw ns'>::</span><span class='fu'><a href='http://www.rdocumentation.org/packages/futile.logger/topics/flog.threshold'>flog.threshold</a></span>(<span class='kw pkg'>futile.logger</span><span class='kw ns'>::</span><span class='no'><a href='http://www.rdocumentation.org/packages/futile.logger/topics/logger.options'>ERROR</a></span>)</div><div class='output co'>#&gt; NULL</div><div class='input'>
+<span class='fu'>variable_select</span>(<span class='no'>population</span>, <span class='no'>variables</span>, <span class='no'>sample</span>, <span class='no'>operation</span>) <span class='kw'>%&gt;%</span> <span class='fu'>head</span>()</div><div class='output co'>#&gt; # A tibble: 6 x 2
 #&gt;               y          z
 #&gt;           &lt;dbl&gt;      &lt;dbl&gt;
 #&gt; 1  0.0010527115  0.3275278
diff --git a/man/correlation_threshold.Rd b/man/correlation_threshold.Rd
index c44dfa8..bb968d4 100644
--- a/man/correlation_threshold.Rd
+++ b/man/correlation_threshold.Rd
@@ -26,7 +26,7 @@ character vector specifying observation variables to be excluded.
 }
 \examples{
 
-library(magrittr)
+suppressMessages(suppressWarnings(library(magrittr)))
 sample <- tibble::data_frame(
    x = rnorm(30),
    y = rnorm(30)/1000
diff --git a/man/normalize.Rd b/man/normalize.Rd
index ceea78b..e58a59c 100644
--- a/man/normalize.Rd
+++ b/man/normalize.Rd
@@ -27,7 +27,7 @@ normalized data of the same class as \code{population}.
 \code{normalize} normalizes observation variables based on the specified normalization method.
 }
 \examples{
-library(magrittr)
+suppressMessages(suppressWarnings(library(magrittr)))
 population <- tibble::data_frame(
    Metadata_group = c("control", "control", "control", "control",
                       "experiment", "experiment", "experiment", "experiment"),
diff --git a/man/variable_select.Rd b/man/variable_select.Rd
index 9d5d4bf..633121d 100644
--- a/man/variable_select.Rd
+++ b/man/variable_select.Rd
@@ -29,7 +29,7 @@ variable-selected data of the same class as \code{population}.
 # In this example, we use `correlation_threshold` as the operation for 
 # variable selection.
 
-library(magrittr)
+suppressMessages(suppressWarnings(library(magrittr)))
 population <- tibble::data_frame(
    x = rnorm(100),
    y = rnorm(100)/1000
@@ -49,6 +49,8 @@ cor(sample)
 
 head(population)
 
+futile.logger::flog.threshold(futile.logger::ERROR)
+
 variable_select(population, variables, sample, operation) \%>\% head()
 
 }
diff --git a/tests/testthat/test-aggregate.R b/tests/testthat/test-aggregate.R
index a7eb030..14b8054 100644
--- a/tests/testthat/test-aggregate.R
+++ b/tests/testthat/test-aggregate.R
@@ -9,7 +9,7 @@ test_that("`aggregate` aggregates data", {
     )
 
   db <- dplyr::src_sqlite(":memory:", create = T)
-  
+
   data <- dplyr::copy_to(db, data)
 
   expect_equal(
@@ -31,7 +31,7 @@ test_that("`aggregate` aggregates data", {
       dplyr::collect(),
     data %>%
       dplyr::group_by(g) %>%
-      dplyr::summarise_at(.funs =dplyr::funs(median), .vars = c("x", "y"))
+      dplyr::summarise_at(.funs = dplyr::funs(median), .vars = c("x", "y"))
   )
 
   expect_equal(
@@ -42,6 +42,7 @@ test_that("`aggregate` aggregates data", {
       dplyr::collect(),
     data %>%
       dplyr::group_by(g) %>%
-      dplyr::summarise_at(.funs =c(dplyr::funs(mean), dplyr::funs(sd)), .vars = c("x", "y"))
+      dplyr::summarise_at(.funs = c(dplyr::funs(mean), dplyr::funs(sd)),
+                          .vars = c("x", "y"))
   )
 })
diff --git a/tests/testthat/test-correlation_threshold.R b/tests/testthat/test-correlation_threshold.R
index 120bc2a..95396b5 100644
--- a/tests/testthat/test-correlation_threshold.R
+++ b/tests/testthat/test-correlation_threshold.R
@@ -1,6 +1,7 @@
 context("correlation_threshold")
 
-test_that("`correlation_threshold` selects variables that are not highly correlated", {
+test_that(
+  "`correlation_threshold` selects variables that are not highly correlated", {
 
   set.seed(123)
   data <- data.frame(x = rnorm(30))
@@ -8,7 +9,7 @@ test_that("`correlation_threshold` selects variables that are not highly correla
   data$z <- data$x + rnorm(30) / 1000
 
   db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
-  
+
   data <- dplyr::copy_to(db, data)
 
   expect_equal(
diff --git a/tests/testthat/test-count_na_rows.R b/tests/testthat/test-count_na_rows.R
index 81c091f..3c4e5da 100644
--- a/tests/testthat/test-count_na_rows.R
+++ b/tests/testthat/test-count_na_rows.R
@@ -7,9 +7,9 @@ test_that("`count_na_rows` returns the frequency of NAs per variable", {
   data[c(2, 3, 5), "y"] <- NA
 
   db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
-  
+
   data <- dplyr::copy_to(db, data)
-  
+
   expect_equal(
     count_na_rows(population = data,
                     variables = c("x", "y", "z")),
diff --git a/tests/testthat/test-cytominer.R b/tests/testthat/test-cytominer.R
index 32b290c..267c957 100644
--- a/tests/testthat/test-cytominer.R
+++ b/tests/testthat/test-cytominer.R
@@ -1,7 +1,7 @@
 context("cytominer integration test")
 
 test_that("cytominer can process dataset with a normalized schema", {
-  
+
   skip_on_os("windows")
 
   futile.logger::flog.threshold(futile.logger::WARN)
@@ -129,9 +129,9 @@ test_that("cytominer can process dataset with a normalized schema", {
 })
 
 test_that("cytominer can process dataset with a CellProfiler schema", {
-  
+
   skip_on_os("windows")
-  
+
   futile.logger::flog.threshold(futile.logger::WARN)
 
   fixture <-
@@ -159,19 +159,21 @@ test_that("cytominer can process dataset with a CellProfiler schema", {
                       by = c("TableNumber", "ImageNumber", "ObjectNumber"))
 
   object %<>% dplyr::inner_join(image %>%
-                                  dplyr::select(TableNumber,
-                                                ImageNumber,
-                                                image_Metadata_Barcode,
-                                                image_Metadata_Well,
-                                                image_Metadata_isDebris) ,
-                                by = c("TableNumber", "ImageNumber")) 
-  
-  # need to rename individually because of https://github.com/tidyverse/dplyr/issues/2860
-  object %<>% dplyr::rename(g_plate = image_Metadata_Barcode)
-  object %<>% dplyr::rename(g_well = image_Metadata_Well)
-  object %<>% dplyr::rename(g_table = TableNumber)
-  object %<>% dplyr::rename(g_image = ImageNumber)
-  object %<>% dplyr::rename(q_debris = image_Metadata_isDebris)
+                                  dplyr::select("TableNumber",
+                                                "ImageNumber",
+                                                "image_Metadata_Barcode",
+                                                "image_Metadata_Well",
+                                                "image_Metadata_isDebris"),
+                                by = c("TableNumber", "ImageNumber"))
+
+  # need to rename individually because of
+  # https://github.com/tidyverse/dplyr/issues/2860
+  # https://github.com/tidyverse/dplyr/issues/2896
+  object %<>% dplyr::rename(g_plate = "image_Metadata_Barcode")
+  object %<>% dplyr::rename(g_well = "image_Metadata_Well")
+  object %<>% dplyr::rename(g_table = "TableNumber")
+  object %<>% dplyr::rename(g_image = "ImageNumber")
+  object %<>% dplyr::rename(q_debris = "image_Metadata_isDebris")
 
   futile.logger::flog.info("Created table for objects")
 
@@ -214,7 +216,7 @@ test_that("cytominer can process dataset with a CellProfiler schema", {
 
   # Coalesce can't handle the large number of columns so skipping the
   # `na_rows_removed` step
-  na_rows_removed <- debris_removed 
+  na_rows_removed <- debris_removed
 
   # dplyr::collect is forced below for `population` and `sample`
   # not doing this is resulting in "parser stack overflow" likely because
@@ -253,7 +255,7 @@ test_that("cytominer can process dataset with a CellProfiler schema", {
   feature_cols <-
     colnames(cleaned) %>%
     stringr::str_subset("^Nuclei_|^Cells_|^Cytoplasm_")
-  
+
   # tranformation (generalized log by default)
   transformed <-
     transform(
diff --git a/tests/testthat/test-drop_na_columns.R b/tests/testthat/test-drop_na_columns.R
index a50d312..379b15e 100644
--- a/tests/testthat/test-drop_na_columns.R
+++ b/tests/testthat/test-drop_na_columns.R
@@ -5,9 +5,9 @@ test_that("`drop_na_columns` removes columns have only NAs", {
   data <- data.frame(x = rnorm(5), y = NA)
 
   db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
-  
+
   data <- dplyr::copy_to(db, data)
-  
+
   expect_equal(
     drop_na_columns(population = data,
                     variables = c("x", "y")),
diff --git a/tests/testthat/test-drop_na_rows.R b/tests/testthat/test-drop_na_rows.R
index 4945ea9..cfa71a4 100644
--- a/tests/testthat/test-drop_na_rows.R
+++ b/tests/testthat/test-drop_na_rows.R
@@ -7,12 +7,12 @@ test_that("`drop_na_rows` removes rows have only NAs", {
     tibble::rownames_to_column()
 
   db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
-  
+
   data <- dplyr::copy_to(db, data)
-  
+
   data %<>% dplyr::filter(x != 1)
 
-  drop_na_rows.data.frame <- function(population, variables) {
+  drop_na_rows_data_frame <- function(population, variables) {
     population %>%
       tidyr::gather_("key", "value", variables) %>%
       dplyr::filter(!is.na(value)) %>%
@@ -26,7 +26,7 @@ test_that("`drop_na_rows` removes rows have only NAs", {
       dplyr::arrange(rowname),
     data %>%
       dplyr::collect() %>%
-      drop_na_rows.data.frame(variables = c("x", "y")) %>%
+      drop_na_rows_data_frame(variables = c("x", "y")) %>%
       dplyr::arrange(rowname)
   )
 
@@ -37,10 +37,10 @@ test_that("`drop_na_rows` removes rows have only NAs", {
       dplyr::arrange(rowname),
     data %>%
       dplyr::collect() %>%
-      drop_na_rows.data.frame(variables = c("x")) %>%
+      drop_na_rows_data_frame(variables = c("x")) %>%
       dplyr::arrange(rowname)
   )
-  
+
   # repeat tests with data frames instead of sql tables
   expect_equal(
     drop_na_rows(population = data %>% dplyr::collect(),
@@ -48,19 +48,19 @@ test_that("`drop_na_rows` removes rows have only NAs", {
       dplyr::arrange(rowname),
     data %>%
       dplyr::collect() %>%
-      drop_na_rows.data.frame(variables = c("x", "y")) %>%
+      drop_na_rows_data_frame(variables = c("x", "y")) %>%
       dplyr::arrange(rowname)
   )
-  
+
   expect_equal(
     drop_na_rows(population = data %>% dplyr::collect(),
       variables = c("x")) %>%
       dplyr::arrange(rowname),
     data %>%
       dplyr::collect() %>%
-      drop_na_rows.data.frame(variables = c("x")) %>%
+      drop_na_rows_data_frame(variables = c("x")) %>%
       dplyr::arrange(rowname)
   )
-  
+
   DBI::dbDisconnect(db)
 })
diff --git a/tests/testthat/test-generalized_log.R b/tests/testthat/test-generalized_log.R
index 6745f71..a61631b 100644
--- a/tests/testthat/test-generalized_log.R
+++ b/tests/testthat/test-generalized_log.R
@@ -4,12 +4,12 @@ test_that("`generalized_log` generalized_logs data", {
 
   data <- data.frame(x = rnorm(5), y = rnorm(5))
 
-  # The call to dplyr::src_sqlite was not changed to DBI::dbConnect
-  # because it results in an error "no such function: log"
-  # db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
-  # data <- dplyr::copy_to(db, data)
-  data <- dplyr::copy_to(dplyr::src_sqlite(":memory:", create = T),
-                        data)
+  db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
+
+  #https://github.com/tidyverse/dplyr/issues/3093
+  RSQLite::initExtension(db)
+
+  data <- dplyr::copy_to(db, data)
 
   glog <- function(x, c=1) log( (x + ( x ^ 2 + c ^ 2) ^ 0.5 ) / 2 )
 
@@ -24,4 +24,6 @@ test_that("`generalized_log` generalized_logs data", {
                     variables = c("x")) %>% dplyr::collect(),
     within(data %>% dplyr::collect(), x <- glog(x))
   )
+
+  DBI::dbDisconnect(db)
 })
diff --git a/tests/testthat/test-normalize.R b/tests/testthat/test-normalize.R
index fbb731d..e9a0c42 100644
--- a/tests/testthat/test-normalize.R
+++ b/tests/testthat/test-normalize.R
@@ -2,17 +2,17 @@ context("normalize")
 
 test_that("`normalize' normalizes data", {
   set.seed(123)
-  
+
   generate_matrix <- function(cvec, svec) {
     n <- 30
 
     m <- matrix(runif(n * 2), n, 2) %>% scale(.)
 
     m %<>% sweep(., 2, svec, FUN = "*") %>% sweep(., 2, cvec, FUN = "+")
-    
+
     m[1, 1] <- NA
-    
-    cbind(scale(m), m) %>% as.data.frame()    
+
+    cbind(scale(m), m) %>% as.data.frame()
   }
 
   data <-
@@ -26,7 +26,7 @@ test_that("`normalize' normalizes data", {
       generate_matrix(rnorm(2), rnorm(2) ^ 2) %>%
         dplyr::mutate(g1 = "b", g2 = "y")
     )
-  
+
   data %<>% dplyr::mutate(g3 = seq(nrow(data)))
 
   data_normalized <-
@@ -39,12 +39,12 @@ test_that("`normalize' normalizes data", {
     dplyr::select(g1, g2, g3, V3, V4) %>%
     dplyr::rename(x = V3, y = V4)
 
-  # The call to dplyr::src_sqlite was not changed to DBI::dbConnect
-  # because it results in an error "no such function: STDEV"
-  # db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
-  # data <- dplyr::copy_to(db, data)
-  data <- dplyr::copy_to(dplyr::src_sqlite(":memory:", create = T),
-                        data)
+  db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
+
+  # https://github.com/tidyverse/dplyr/issues/3093
+  RSQLite::initExtension(db)
+
+  data <- dplyr::copy_to(db, data)
 
   expect_lt(
     mean(abs(
@@ -86,6 +86,6 @@ test_that("`normalize' normalizes data", {
     ),
     .Machine$double.eps * 1000000
   )
-  
 
+  DBI::dbDisconnect(db)
 })
diff --git a/tests/testthat/test-replicate_correlation.R b/tests/testthat/test-replicate_correlation.R
index 4b163ae..0c6f738 100644
--- a/tests/testthat/test-replicate_correlation.R
+++ b/tests/testthat/test-replicate_correlation.R
@@ -1,7 +1,7 @@
 context("replicate_correlation")
 
-test_that("`replicate_correlation` measure correlation between replicates in each feature", {
-
+test_that(paste0("`replicate_correlation` measures correlation",
+                 "between replicates in each feature"), {
   set.seed(123)
 
   x1 <- rnorm(10)
@@ -13,7 +13,7 @@ test_that("`replicate_correlation` measure correlation between replicates in eac
 
   correlations <-
     tibble::data_frame(
-      variable = c('x', 'y', 'z'),
+      variable = c("x", "y", "z"),
       median = c(
         cor(x1, x2, method = "pearson"),
         cor(y1, y2, method = "pearson"),
@@ -22,7 +22,7 @@ test_that("`replicate_correlation` measure correlation between replicates in eac
 
   correlations_batched <-
     tibble::data_frame(
-      variable = c('x', 'y', 'z'),
+      variable = c("x", "y", "z"),
       b1 = c(
         cor(x1[1:5], x2[1:5], method = "pearson"),
         cor(y1[1:5], y2[1:5], method = "pearson"),
@@ -41,11 +41,11 @@ test_that("`replicate_correlation` measure correlation between replicates in eac
     dplyr::ungroup() %>%
     dplyr::select(-b1, -b2)
 
-  batch <- rep(rep(1:2, each=5), 2)
+  batch <- rep(rep(1:2, each = 5), 2)
 
   cpd <- rep(1:10, 2)
 
-  replicate_id <- rep(1:2, each=10)
+  replicate_id <- rep(1:2, each = 10)
 
   data <- data.frame(x = c(x1, x2),
                      y = c(y1, y2),
@@ -60,8 +60,8 @@ test_that("`replicate_correlation` measure correlation between replicates in eac
                           strata = c("cpd"),
                           replicates = 2,
                           cores = 2) %>%
-      dplyr::select_(.dots = c('variable', 'median')) %>%
-      dplyr::arrange_(.dots = c('variable')) %>%
+      dplyr::select_(.dots = c("variable", "median")) %>%
+      dplyr::arrange_(.dots = c("variable")) %>%
       as.data.frame(),
     correlations %>%
       as.data.frame(),
@@ -75,8 +75,8 @@ test_that("`replicate_correlation` measure correlation between replicates in eac
                           replicates = 2,
                           replicate_by = "replicate_id",
                           cores = 2) %>%
-      dplyr::select_(.dots = c('variable', 'median')) %>%
-      dplyr::arrange_(.dots = c('variable')) %>%
+      dplyr::select_(.dots = c("variable", "median")) %>%
+      dplyr::arrange_(.dots = c("variable")) %>%
       as.data.frame(),
     correlations %>%
       as.data.frame(),
@@ -91,7 +91,7 @@ test_that("`replicate_correlation` measure correlation between replicates in eac
                           split_by = "batch",
                           replicate_by = "replicate_id",
                           cores = 2) %>%
-      dplyr::arrange_(.dots = c('variable')) %>%
+      dplyr::arrange_(.dots = c("variable")) %>%
       as.data.frame(),
     correlations_batched %>%
       as.data.frame(),
diff --git a/tests/testthat/test-variance_threshold.R b/tests/testthat/test-variance_threshold.R
index 658a7ce..ca73dd0 100644
--- a/tests/testthat/test-variance_threshold.R
+++ b/tests/testthat/test-variance_threshold.R
@@ -1,13 +1,14 @@
 context("variance_threshold")
 
-test_that("`variance_threshold` selects variables that have non-trivial variance", {
+test_that(
+  "`variance_threshold` selects variables that have non-trivial variance", {
 
   data <- data.frame(x = rnorm(30), y = 1)
 
   db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
-  
+
   data <- dplyr::copy_to(db, data)
-  
+
   expect_equal(
     variance_threshold(variables = c("x", "y"),
                        sample = data %>% dplyr::collect()),