Refactoring

miraisolutions · Oct 21, 2024 · 74e5967 · 74e5967
1 parent e18e4cd
commit 74e5967
Show file tree

Hide file tree

Showing 11 changed files with 74 additions and 94 deletions.
diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml
@@ -26,31 +26,31 @@ jobs:
       matrix:
         config:
           # Spark 3.5 seems currently unavailable so we use 3.4 for now
-          # - os-name: ubuntu
-          #   os-version: latest
-          #   java: 17
-          #   spark: "3.4"
-          #   r-version: release
-          # - os-name: ubuntu
-          #   os-version: latest
-          #   java: 8
-          #   spark: "2.4"
-          #   r-version: devel
-          # - os-name: ubuntu
-          #   os-version: "22.04"
-          #   java: 11
-          #   spark: "3.0"
-          #   r-version: oldrel
-          # - os-name: macos
-          #   os-version: latest
-          #   java: 8
-          #   spark: "3.2"
-          #   r-version: release
-          # - os-name: macos
-          #   os-version: latest
-          #   java: 17
-          #   spark: "3.4"
-          #   r-version: devel
+          - os-name: ubuntu
+            os-version: latest
+            java: 17
+            spark: "3.4"
+            r-version: release
+          - os-name: ubuntu
+            os-version: latest
+            java: 8
+            spark: "2.4"
+            r-version: devel
+          - os-name: ubuntu
+            os-version: "22.04"
+            java: 11
+            spark: "3.0"
+            r-version: oldrel
+          - os-name: macos
+            os-version: latest
+            java: 8
+            spark: "3.2"
+            r-version: release
+          - os-name: macos
+            os-version: latest
+            java: 17
+            spark: "3.4"
+            r-version: devel
           - os-name: windows
             os-version: latest
             java: 8
@@ -63,9 +63,9 @@ jobs:
             r-version: release
     env:
       SPARK_VERSION: ${{ matrix.config.spark }}
-      BILLING_PROJECT_ID: ${{ secrets.GCLOUD_BILLING_PROJECT_ID }}
-      MATERIALIZATION_DATASET: test
-      GOOGLE_APPLICATION_CREDENTIALS: ${{ github.workspace }}/adc.json
+      BIGQUERY_BILLING_PROJECT_ID: ${{ secrets.GCLOUD_BILLING_PROJECT_ID }}
+      BIGQUERY_MATERIALIZATION_DATASET: test
+      BIGQUERY_APPLICATION_CREDENTIALS: ${{ github.workspace }}/adc.json
       R_DEFAULT_INTERNET_TIMEOUT: 1800
 
     steps:
@@ -148,7 +148,7 @@ jobs:
           ADC: ${{ secrets.GCLOUD_APPLICATION_CREDENTIALS }}
         shell: bash
         run: |
-          echo $ADC > $GOOGLE_APPLICATION_CREDENTIALS
+          echo $ADC > ${BIGQUERY_APPLICATION_CREDENTIALS}
       
       - name: Run R CMD check
         uses: r-lib/actions/check-r-package@v2

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,9 +1,9 @@
 # Generated by roxygen2: do not edit by hand
 
 export(bigquery_defaults)
-export(default_billing_project_id)
 export(default_materialization_dataset)
 export(default_materialization_project)
+export(default_project_id)
 export(default_service_account_key_file)
 export(spark_read_bigquery)
 export(spark_write_bigquery)

diff --git a/R/defaults.R b/R/defaults.R
@@ -1,7 +1,6 @@
 #' @title Google BigQuery Default Settings
 #' @description Sets default values for several Google BigQuery related settings.
-#' @param billingProjectId Default Google Cloud Platform (GCP) project ID for billing purposes.
-#' This is the project on whose behalf to perform BigQuery operations.
+#' @param projectId Default Google Cloud Platform (GCP) project ID to use.
 #' @param materializationProject Project to use for materializing SQL queries. See also
 #' \code{materializationDataset}. Defaults to the billing project (\code{billingProjectId}).
 #' @param materializationDataset Dataset (in materialization project) which is used for
@@ -32,7 +31,7 @@
 #' 
 #' \code{\link{spark_write_bigquery}}
 #' 
-#' \code{\link{default_billing_project_id}}
+#' \code{\link{default_project_id}}
 #' 
 #' \code{\link{default_materialization_project}}
 #' 
@@ -41,8 +40,8 @@
 #' \code{\link{default_service_account_key_file}}
 #' @keywords database connection
 #' @export
-bigquery_defaults <- function(billingProjectId,
-                              materializationProject = billingProjectId,
+bigquery_defaults <- function(projectId,
+                              materializationProject = projectId,
                               materializationDataset = NULL,
                               serviceAccountKeyFile = NULL) {
   if (is.null(serviceAccountKeyFile)) {
@@ -53,19 +52,19 @@ bigquery_defaults <- function(billingProjectId,
   }
 
   options(
-    "sparkbq.default.billingProjectId" = billingProjectId,
+    "sparkbq.default.projectId" = projectId,
     "sparkbq.default.materializationProject" = materializationProject,
     "sparkbq.default.materializationDataset" = materializationDataset,
     "sparkbq.default.serviceAccountKeyFile" = serviceAccountKeyFile
   )
 }
 
-#' @title Default Google BigQuery Billing Project ID
-#' @description Returns the default Google BigQuery billing project ID.
+#' @title Default Google BigQuery Project ID
+#' @description Returns the default Google BigQuery project ID.
 #' @seealso \code{\link{bigquery_defaults}}
 #' @export
-default_billing_project_id <- function() {
-  getOption("sparkbq.default.billingProjectId")
+default_project_id <- function() {
+  getOption("sparkbq.default.projectId")
 }
 
 #' @title Default Google BigQuery Materialization Project

diff --git a/R/spark_read_bigquery.R b/R/spark_read_bigquery.R
@@ -3,11 +3,8 @@
 #' @param sc \code{\link[sparklyr]{spark_connection}} provided by sparklyr.
 #' @param name The name to assign to the newly generated table (see also
 #' \code{\link[sparklyr]{spark_read_source}}).
-#' @param billingProjectId Google Cloud Platform project ID for billing purposes.
-#' This is the project on whose behalf to perform BigQuery operations.
-#' Defaults to \code{\link{default_billing_project_id}}.
 #' @param projectId Google Cloud Platform project ID of BigQuery dataset.
-#' Defaults to \code{billingProjectId}.
+#' Defaults to \code{\link{default_project_id}}.
 #' @param datasetId Google BigQuery dataset ID (may contain letters, numbers and underscores).
 #' Either both of \code{datasetId} and \code{tableId} or \code{sqlQuery} must be specified.
 #' @param tableId Google BigQuery table ID (may contain letters, numbers and underscores).
@@ -17,7 +14,7 @@
 #' (SQL-2011). Legacy SQL is not supported. Tables are specified as
 #' \code{<project_id>.<dataset_id>.<table_id>}.
 #' @param materializationProject Project to use for materializing SQL queries. See also
-#' \code{materializationDataset}. Defaults to billing project
+#' \code{materializationDataset}. Defaults to project
 #' \code{\link{default_materialization_project}}.
 #' @param materializationDataset Dataset (in materialization project) which is used for
 #' materializing SQL queries (see \code{sqlQuery}). The GCP user
@@ -66,7 +63,7 @@
 #' sc <- spark_connect(master = "local", config = config)
 #'
 #' bigquery_defaults(
-#'   billingProjectId = "<your_billing_project_id>",
+#'   projectId = "<your_project_id>",
 #'   serviceAccountKeyFile = "<your_service_account_key_file>")
 #'
 #' # Reading the public shakespeare data table
@@ -84,8 +81,7 @@
 #' @export
 spark_read_bigquery <- function(sc,
                                 name,
-                                billingProjectId = default_billing_project_id(),
-                                projectId = billingProjectId,
+                                projectId = default_project_id(),
                                 datasetId = NULL,
                                 tableId = NULL,
                                 sqlQuery = NULL,

diff --git a/R/spark_write_bigquery.R b/R/spark_write_bigquery.R
@@ -4,11 +4,8 @@
 #' Data is written directly to BigQuery using the
 #' \href{https://cloud.google.com/bigquery/docs/write-api}{BigQuery Storage Write API}.
 #' @param data Spark DataFrame to write to Google BigQuery.
-#' @param billingProjectId Google Cloud Platform project ID for billing purposes.
-#' This is the project on whose behalf to perform BigQuery operations.
-#' Defaults to \code{default_billing_project_id()}.
 #' @param projectId Google Cloud Platform project ID of BigQuery dataset.
-#' Defaults to \code{billingProjectId}.
+#' Defaults to \code{default_project_id()}.
 #' @param datasetId Google BigQuery dataset ID (may contain letters, numbers and underscores).
 #' @param tableId Google BigQuery table ID (may contain letters, numbers and underscores).
 #' @param serviceAccountKeyFile Google Cloud service account key file to use for authentication
@@ -50,7 +47,7 @@
 #' sc <- spark_connect(master = "local", config = config)
 #'
 #' bigquery_defaults(
-#'   billingProjectId = "<your_billing_project_id>",
+#'   projectId = "<your_project_id>",
 #'   serviceAccountKeyFile = "<your_service_account_key_file>")
 #'
 #' # Copy mtcars to Spark
@@ -65,8 +62,7 @@
 #' @importFrom sparklyr spark_write_source
 #' @export
 spark_write_bigquery <- function(data,
-                                 billingProjectId = default_billing_project_id(),
-                                 projectId = billingProjectId,
+                                 projectId = default_project_id(),
                                  datasetId,
                                  tableId,
                                  serviceAccountKeyFile = default_service_account_key_file(),

diff --git a/man/bigquery_defaults.Rd b/man/bigquery_defaults.Rd
diff --git a/man/default_billing_project_id.Rd b/man/default_billing_project_id.Rd
diff --git a/man/default_project_id.Rd b/man/default_project_id.Rd
diff --git a/man/spark_read_bigquery.Rd b/man/spark_read_bigquery.Rd
diff --git a/man/spark_write_bigquery.Rd b/man/spark_write_bigquery.Rd
diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R
@@ -1,8 +1,8 @@
 # Setting some BigQuery defaults for use in tests
 bigquery_defaults(
-  billingProjectId = Sys.getenv("BILLING_PROJECT_ID"),
-  materializationDataset = Sys.getenv("MATERIALIZATION_DATASET"),
-  serviceAccountKeyFile = Sys.getenv("GOOGLE_APPLICATION_CREDENTIALS")
+  projectId = Sys.getenv("BIGQUERY_PROJECT_ID"),
+  materializationDataset = Sys.getenv("BIGQUERY_MATERIALIZATION_DATASET"),
+  serviceAccountKeyFile = Sys.getenv("BIGQUERY_APPLICATION_CREDENTIALS")
 )
 
 options(spark.version = Sys.getenv("SPARK_VERSION", "3.5"))