Skip to content

Commit

Permalink
Fix
Browse files Browse the repository at this point in the history
  • Loading branch information
martinstuder committed Oct 28, 2024
1 parent 68e177b commit 37233f6
Show file tree
Hide file tree
Showing 7 changed files with 78 additions and 67 deletions.
61 changes: 30 additions & 31 deletions .github/workflows/ci-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,36 +30,36 @@ jobs:
java: 17
spark: "3.5"
r-version: release
# - os-name: ubuntu
# os-version: latest
# java: 8
# spark: "2.4"
# r-version: devel
# - os-name: ubuntu
# os-version: "22.04"
# java: 11
# spark: "3.0"
# r-version: oldrel
# - os-name: macos
# os-version: latest
# java: 8
# spark: "3.2"
# r-version: release
# - os-name: macos
# os-version: latest
# java: 17
# spark: "3.4"
# r-version: devel
# - os-name: windows
# os-version: latest
# java: 8
# spark: "3.1"
# r-version: oldrel
# - os-name: windows
# os-version: "2022"
# java: 17
# spark: "3.3"
# r-version: release
- os-name: ubuntu
os-version: latest
java: 8
spark: "2.4"
r-version: devel
- os-name: ubuntu
os-version: "22.04"
java: 11
spark: "3.0"
r-version: oldrel
- os-name: macos
os-version: latest
java: 8
spark: "3.2"
r-version: release
- os-name: macos
os-version: latest
java: 17
spark: "3.4"
r-version: devel
- os-name: windows
os-version: latest
java: 8
spark: "3.1"
r-version: oldrel
- os-name: windows
os-version: "2022"
java: 17
spark: "3.3"
r-version: release
env:
SPARK_VERSION: ${{ matrix.config.spark }}
BIGQUERY_PROJECT_ID: mirai-sbb
Expand Down Expand Up @@ -150,7 +150,6 @@ jobs:
shell: bash
run: |
echo "$ADC" > $BIGQUERY_APPLICATION_CREDENTIALS
chmod a+r $BIGQUERY_APPLICATION_CREDENTIALS
- name: Run R CMD check
uses: r-lib/actions/check-r-package@v2
Expand Down
25 changes: 15 additions & 10 deletions R/spark_read_bigquery.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
#' @param sc \code{\link[sparklyr]{spark_connection}} provided by sparklyr.
#' @param name The name to assign to the newly generated table (see also
#' \code{\link[sparklyr]{spark_read_source}}).
#' @param projectId Google Cloud Platform project ID of BigQuery dataset.
#' @param billingProjectId Google Cloud Platform project ID for billing purposes.
#' Defaults to \code{\link{default_project_id}}.
#' @param projectId Google Cloud Platform project ID of BigQuery dataset to query from.
#' Defaults to \code{\link{default_project_id}}.
#' @param datasetId Google BigQuery dataset ID (may contain letters, numbers and underscores).
#' Either both of \code{datasetId} and \code{tableId} or \code{sqlQuery} must be specified.
Expand Down Expand Up @@ -38,19 +40,19 @@
#' Spark DataFrame.
#' @references
#' \url{https://github.com/GoogleCloudDataproc/spark-bigquery-connector}
#'
#'
#' \url{https://cloud.google.com/bigquery/docs/datasets}
#'
#'
#' \url{https://cloud.google.com/bigquery/docs/tables}
#'
#'
#' \url{https://cloud.google.com/bigquery/docs/reference/standard-sql/}
#'
#'
#' \url{https://cloud.google.com/bigquery/pricing}
#'
#'
#' \url{https://cloud.google.com/bigquery/docs/dataset-locations}
#'
#'
#' \url{https://cloud.google.com/docs/authentication/}
#'
#'
#' \url{https://cloud.google.com/bigquery/docs/authentication/}
#' @family Spark serialization routines
#' @seealso \code{\link[sparklyr]{spark_read_source}}, \code{\link{spark_write_bigquery}},
Expand Down Expand Up @@ -81,6 +83,7 @@
#' @export
spark_read_bigquery <- function(sc,
name,
billingProjectId = default_project_id(),
projectId = default_project_id(),
datasetId = NULL,
tableId = NULL,
Expand All @@ -91,9 +94,11 @@ spark_read_bigquery <- function(sc,
additionalParameters = NULL,
memory = FALSE,
...) {
parameters <- c(list(), additionalParameters)
parameters <- c(list(parentProject = billingProjectId),
additionalParameters)

if (!is.null(serviceAccountKeyFile)) {
parameters[["credentialsFile"]] = gsub("\\\\", "/", serviceAccountKeyFile)
parameters[["credentialsFile"]] <- normalizePath(serviceAccountKeyFile, winslash = "/")
}

if (!is.null(datasetId) && !is.null(tableId)) {
Expand Down
22 changes: 14 additions & 8 deletions R/spark_write_bigquery.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
#' Data is written directly to BigQuery using the
#' \href{https://cloud.google.com/bigquery/docs/write-api}{BigQuery Storage Write API}.
#' @param data Spark DataFrame to write to Google BigQuery.
#' @param projectId Google Cloud Platform project ID of BigQuery dataset.
#' @param billingProjectId Google Cloud Platform project ID for billing purposes.
#' Defaults to \code{\link{default_project_id}}.
#' @param projectId Google Cloud Platform project ID of BigQuery dataset to write to.
#' Defaults to \code{default_project_id()}.
#' @param datasetId Google BigQuery dataset ID (may contain letters, numbers and underscores).
#' @param tableId Google BigQuery table ID (may contain letters, numbers and underscores).
Expand Down Expand Up @@ -33,7 +35,7 @@
#' \url{https://cloud.google.com/docs/authentication/}
#'
#' \url{https://cloud.google.com/bigquery/docs/authentication/}
#'
#'
#' \url{https://cloud.google.com/bigquery/docs/write-api}
#'
#' @family Spark serialization routines
Expand Down Expand Up @@ -62,21 +64,25 @@
#' @importFrom sparklyr spark_write_source
#' @export
spark_write_bigquery <- function(data,
billingProjectId = default_project_id(),
projectId = default_project_id(),
datasetId,
tableId,
serviceAccountKeyFile = default_service_account_key_file(),
additionalParameters = NULL,
mode = "error",
...) {
parameters <- c(list(
table = sprintf("%s.%s.%s", projectId, datasetId, tableId),
writeMethod = "direct"
),
additionalParameters)
parameters <- c(
list(
parentProject = billingProjectId,
table = sprintf("%s.%s.%s", projectId, datasetId, tableId),
writeMethod = "direct"
),
additionalParameters
)

if (!is.null(serviceAccountKeyFile)) {
parameters[["credentialsFile"]] = gsub("\\\\", "/", serviceAccountKeyFile)
parameters[["credentialsFile"]] <- normalizePath(serviceAccountKeyFile, winslash = "/")
}

spark_write_source(data,
Expand Down
6 changes: 5 additions & 1 deletion man/spark_read_bigquery.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion man/spark_write_bigquery.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 2 additions & 8 deletions tests/testthat/test-read.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@ test_that("reading BigQuery tables works", {
name = "shakespeare",
projectId = "bigquery-public-data",
datasetId = "samples",
tableId = "shakespeare",
additionalParameters = list(parentProject = default_project_id())
tableId = "shakespeare"
)

expect_equal(shakespeare %>% sparklyr::sdf_nrow(), 164656)
Expand All @@ -20,12 +19,7 @@ test_that("executing SQL queries works", {

sc <- sparklyr::spark_connect(master = "local", version = getOption("spark.version"))

shakespeare <- spark_read_bigquery(
sc,
name = "shakespeare",
sqlQuery = "SELECT * FROM bigquery-public-data.samples.shakespeare",
additionalParameters = list(parentProject = default_project_id())
)
shakespeare <- spark_read_bigquery(sc, name = "shakespeare", sqlQuery = "SELECT * FROM bigquery-public-data.samples.shakespeare")

expect_equal(shakespeare %>% sparklyr::sdf_nrow(), 164656)
})
15 changes: 7 additions & 8 deletions tests/testthat/test-write.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,14 @@ test_that("writing BigQuery tables using direct method works", {
mode = "overwrite"
)

mtcars2 <- spark_read_bigquery(
sc,
name = "shakespeare",
datasetId = "test",
tableId = "mtcars"
) %>% sparklyr::collect()
mtcars2 <- spark_read_bigquery(sc,
name = "shakespeare",
datasetId = "test",
tableId = "mtcars") %>% sparklyr::collect()

expect_equal(
mtcars %>% dplyr::arrange_at(names(mtcars)),
mtcars %>% dplyr::arrange_at(names(mtcars)),
as.data.frame(mtcars2) %>% dplyr::arrange_at(names(mtcars)),
ignore_attr = "row.names")
ignore_attr = "row.names"
)
})

0 comments on commit 37233f6

Please sign in to comment.