Skip to content

Commit

Permalink
Refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
martinstuder committed Oct 21, 2024
1 parent e18e4cd commit 74e5967
Show file tree
Hide file tree
Showing 11 changed files with 74 additions and 94 deletions.
58 changes: 29 additions & 29 deletions .github/workflows/ci-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,31 +26,31 @@ jobs:
matrix:
config:
# Spark 3.5 seems currently unavailable so we use 3.4 for now
# - os-name: ubuntu
# os-version: latest
# java: 17
# spark: "3.4"
# r-version: release
# - os-name: ubuntu
# os-version: latest
# java: 8
# spark: "2.4"
# r-version: devel
# - os-name: ubuntu
# os-version: "22.04"
# java: 11
# spark: "3.0"
# r-version: oldrel
# - os-name: macos
# os-version: latest
# java: 8
# spark: "3.2"
# r-version: release
# - os-name: macos
# os-version: latest
# java: 17
# spark: "3.4"
# r-version: devel
- os-name: ubuntu
os-version: latest
java: 17
spark: "3.4"
r-version: release
- os-name: ubuntu
os-version: latest
java: 8
spark: "2.4"
r-version: devel
- os-name: ubuntu
os-version: "22.04"
java: 11
spark: "3.0"
r-version: oldrel
- os-name: macos
os-version: latest
java: 8
spark: "3.2"
r-version: release
- os-name: macos
os-version: latest
java: 17
spark: "3.4"
r-version: devel
- os-name: windows
os-version: latest
java: 8
Expand All @@ -63,9 +63,9 @@ jobs:
r-version: release
env:
SPARK_VERSION: ${{ matrix.config.spark }}
BILLING_PROJECT_ID: ${{ secrets.GCLOUD_BILLING_PROJECT_ID }}
MATERIALIZATION_DATASET: test
GOOGLE_APPLICATION_CREDENTIALS: ${{ github.workspace }}/adc.json
BIGQUERY_BILLING_PROJECT_ID: ${{ secrets.GCLOUD_BILLING_PROJECT_ID }}
BIGQUERY_MATERIALIZATION_DATASET: test
BIGQUERY_APPLICATION_CREDENTIALS: ${{ github.workspace }}/adc.json
R_DEFAULT_INTERNET_TIMEOUT: 1800

steps:
Expand Down Expand Up @@ -148,7 +148,7 @@ jobs:
ADC: ${{ secrets.GCLOUD_APPLICATION_CREDENTIALS }}
shell: bash
run: |
echo $ADC > $GOOGLE_APPLICATION_CREDENTIALS
echo $ADC > ${BIGQUERY_APPLICATION_CREDENTIALS}
- name: Run R CMD check
uses: r-lib/actions/check-r-package@v2
Expand Down
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Generated by roxygen2: do not edit by hand

export(bigquery_defaults)
export(default_billing_project_id)
export(default_materialization_dataset)
export(default_materialization_project)
export(default_project_id)
export(default_service_account_key_file)
export(spark_read_bigquery)
export(spark_write_bigquery)
Expand Down
19 changes: 9 additions & 10 deletions R/defaults.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#' @title Google BigQuery Default Settings
#' @description Sets default values for several Google BigQuery related settings.
#' @param billingProjectId Default Google Cloud Platform (GCP) project ID for billing purposes.
#' This is the project on whose behalf to perform BigQuery operations.
#' @param projectId Default Google Cloud Platform (GCP) project ID to use.
#' @param materializationProject Project to use for materializing SQL queries. See also
#' \code{materializationDataset}. Defaults to the billing project (\code{billingProjectId}).
#' @param materializationDataset Dataset (in materialization project) which is used for
Expand Down Expand Up @@ -32,7 +31,7 @@
#'
#' \code{\link{spark_write_bigquery}}
#'
#' \code{\link{default_billing_project_id}}
#' \code{\link{default_project_id}}
#'
#' \code{\link{default_materialization_project}}
#'
Expand All @@ -41,8 +40,8 @@
#' \code{\link{default_service_account_key_file}}
#' @keywords database connection
#' @export
bigquery_defaults <- function(billingProjectId,
materializationProject = billingProjectId,
bigquery_defaults <- function(projectId,
materializationProject = projectId,
materializationDataset = NULL,
serviceAccountKeyFile = NULL) {
if (is.null(serviceAccountKeyFile)) {
Expand All @@ -53,19 +52,19 @@ bigquery_defaults <- function(billingProjectId,
}

options(
"sparkbq.default.billingProjectId" = billingProjectId,
"sparkbq.default.projectId" = projectId,
"sparkbq.default.materializationProject" = materializationProject,
"sparkbq.default.materializationDataset" = materializationDataset,
"sparkbq.default.serviceAccountKeyFile" = serviceAccountKeyFile
)
}

#' @title Default Google BigQuery Billing Project ID
#' @description Returns the default Google BigQuery billing project ID.
#' @title Default Google BigQuery Project ID
#' @description Returns the default Google BigQuery project ID.
#' @seealso \code{\link{bigquery_defaults}}
#' @export
default_billing_project_id <- function() {
getOption("sparkbq.default.billingProjectId")
default_project_id <- function() {
getOption("sparkbq.default.projectId")
}

#' @title Default Google BigQuery Materialization Project
Expand Down
12 changes: 4 additions & 8 deletions R/spark_read_bigquery.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,8 @@
#' @param sc \code{\link[sparklyr]{spark_connection}} provided by sparklyr.
#' @param name The name to assign to the newly generated table (see also
#' \code{\link[sparklyr]{spark_read_source}}).
#' @param billingProjectId Google Cloud Platform project ID for billing purposes.
#' This is the project on whose behalf to perform BigQuery operations.
#' Defaults to \code{\link{default_billing_project_id}}.
#' @param projectId Google Cloud Platform project ID of BigQuery dataset.
#' Defaults to \code{billingProjectId}.
#' Defaults to \code{\link{default_project_id}}.
#' @param datasetId Google BigQuery dataset ID (may contain letters, numbers and underscores).
#' Either both of \code{datasetId} and \code{tableId} or \code{sqlQuery} must be specified.
#' @param tableId Google BigQuery table ID (may contain letters, numbers and underscores).
Expand All @@ -17,7 +14,7 @@
#' (SQL-2011). Legacy SQL is not supported. Tables are specified as
#' \code{<project_id>.<dataset_id>.<table_id>}.
#' @param materializationProject Project to use for materializing SQL queries. See also
#' \code{materializationDataset}. Defaults to billing project
#' \code{materializationDataset}. Defaults to project
#' \code{\link{default_materialization_project}}.
#' @param materializationDataset Dataset (in materialization project) which is used for
#' materializing SQL queries (see \code{sqlQuery}). The GCP user
Expand Down Expand Up @@ -66,7 +63,7 @@
#' sc <- spark_connect(master = "local", config = config)
#'
#' bigquery_defaults(
#' billingProjectId = "<your_billing_project_id>",
#' projectId = "<your_project_id>",
#' serviceAccountKeyFile = "<your_service_account_key_file>")
#'
#' # Reading the public shakespeare data table
Expand All @@ -84,8 +81,7 @@
#' @export
spark_read_bigquery <- function(sc,
name,
billingProjectId = default_billing_project_id(),
projectId = billingProjectId,
projectId = default_project_id(),
datasetId = NULL,
tableId = NULL,
sqlQuery = NULL,
Expand Down
10 changes: 3 additions & 7 deletions R/spark_write_bigquery.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,8 @@
#' Data is written directly to BigQuery using the
#' \href{https://cloud.google.com/bigquery/docs/write-api}{BigQuery Storage Write API}.
#' @param data Spark DataFrame to write to Google BigQuery.
#' @param billingProjectId Google Cloud Platform project ID for billing purposes.
#' This is the project on whose behalf to perform BigQuery operations.
#' Defaults to \code{default_billing_project_id()}.
#' @param projectId Google Cloud Platform project ID of BigQuery dataset.
#' Defaults to \code{billingProjectId}.
#' Defaults to \code{default_project_id()}.
#' @param datasetId Google BigQuery dataset ID (may contain letters, numbers and underscores).
#' @param tableId Google BigQuery table ID (may contain letters, numbers and underscores).
#' @param serviceAccountKeyFile Google Cloud service account key file to use for authentication
Expand Down Expand Up @@ -50,7 +47,7 @@
#' sc <- spark_connect(master = "local", config = config)
#'
#' bigquery_defaults(
#' billingProjectId = "<your_billing_project_id>",
#' projectId = "<your_project_id>",
#' serviceAccountKeyFile = "<your_service_account_key_file>")
#'
#' # Copy mtcars to Spark
Expand All @@ -65,8 +62,7 @@
#' @importFrom sparklyr spark_write_source
#' @export
spark_write_bigquery <- function(data,
billingProjectId = default_billing_project_id(),
projectId = billingProjectId,
projectId = default_project_id(),
datasetId,
tableId,
serviceAccountKeyFile = default_service_account_key_file(),
Expand Down
9 changes: 4 additions & 5 deletions man/bigquery_defaults.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 0 additions & 14 deletions man/default_billing_project_id.Rd

This file was deleted.

14 changes: 14 additions & 0 deletions man/default_project_id.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 4 additions & 9 deletions man/spark_read_bigquery.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 3 additions & 8 deletions man/spark_write_bigquery.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions tests/testthat/setup.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Setting some BigQuery defaults for use in tests
bigquery_defaults(
billingProjectId = Sys.getenv("BILLING_PROJECT_ID"),
materializationDataset = Sys.getenv("MATERIALIZATION_DATASET"),
serviceAccountKeyFile = Sys.getenv("GOOGLE_APPLICATION_CREDENTIALS")
projectId = Sys.getenv("BIGQUERY_PROJECT_ID"),
materializationDataset = Sys.getenv("BIGQUERY_MATERIALIZATION_DATASET"),
serviceAccountKeyFile = Sys.getenv("BIGQUERY_APPLICATION_CREDENTIALS")
)

options(spark.version = Sys.getenv("SPARK_VERSION", "3.5"))

0 comments on commit 74e5967

Please sign in to comment.