From 617371ff8f8518de24bb769e06a0d2f1c07d6905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Mont=C3=B3n?= Date: Fri, 23 Sep 2022 23:11:25 +0200 Subject: [PATCH] BigQuerySchemas API (#249) * Added BigQuerySchemas as a public API to return only Schemas from BigQuery, without creating tables in a real environment. Changed JavaConverters to accept Seqs * Seq in Java converters for Scala 2.13+ * BigQuerySchemas with `schema(value: A)` being A any Product type. * CHANGELOG.md and version * removing unused code. Test renamed to Spec --- CHANGELOG.md | 8 +-- .../bigquery/BigQueryDefinitions.scala | 2 + .../bigquery/BigQuerySchemas.scala | 32 +++++++++ .../bigdatatypes/bigquery/BigQueryTable.scala | 4 ++ .../bigquery/JavaConverters.scala | 2 +- .../bigquery/JavaConverters.scala | 2 +- .../bigquery/BigQuerySchemasSpec.scala | 71 +++++++++++++++++++ build.sbt | 2 +- 8 files changed, 116 insertions(+), 7 deletions(-) create mode 100644 bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQuerySchemas.scala create mode 100644 bigquery/src/test/scala/org/datatools/bigdatatypes/bigquery/BigQuerySchemasSpec.scala diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a25b00c..2d5cacff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,8 @@ -### Big Data Types v1.2.1 -Some dependencies (Circe - ScalaTest) have updated Scala to 3.1.X -- Dependencies updated to newer versions -- Scala version changed to 3.1.X +### Big Data Types v1.3.0 +- Added BigQuerySchemas as an interface to create Schemas without creating tables +- Updated Scala to Scala 3.1.X - This version is no longer compatible with Scala 3.0.X + - The Scala version has been upgraded due to some dependencies (Circe - ScalaTest) that have updated Scala to 3.1.X ### Big Data Types v1.2.0 - New module for Circe (JSON) diff --git a/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQueryDefinitions.scala b/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQueryDefinitions.scala index 9c222f17..d34da39f 100644 --- a/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQueryDefinitions.scala +++ b/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQueryDefinitions.scala @@ -2,6 +2,8 @@ package org.datatools.bigdatatypes.bigquery import com.google.cloud.bigquery.{Schema, StandardTableDefinition, TimePartitioning} import org.datatools.bigdatatypes.bigquery.JavaConverters.toJava +import org.datatools.bigdatatypes.conversions.SqlInstanceConversion +import org.datatools.bigdatatypes.formats.Formats.implicitDefaultFormats private[bigquery] object BigQueryDefinitions { diff --git a/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQuerySchemas.scala b/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQuerySchemas.scala new file mode 100644 index 00000000..90c2f686 --- /dev/null +++ b/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQuerySchemas.scala @@ -0,0 +1,32 @@ +package org.datatools.bigdatatypes.bigquery + +import com.google.cloud.bigquery.Schema +import org.datatools.bigdatatypes.bigquery.JavaConverters.toJava + +/** + * Public API for generating BigQuery Schemas. + * Any type implementing [[SqlTypeToBigQuery]] or [[SqlInstanceToBigQuery]] can be converted into a BigQuery [[Schema]] + * If multiple types are given, the resulting schema will be the concatenation of them. + */ +object BigQuerySchemas { + + /** + * Given any type that implements [[SqlTypeToBigQuery]] returns the BigQuery Schema for that type + * @tparam A is any type implementing [[SqlTypeToBigQuery]] + * @return [[Schema]] ready to be used in BigQuery + */ + def schema[A: SqlTypeToBigQuery]: Schema = BigQueryDefinitions.generateSchema[A] + def schema[A: SqlTypeToBigQuery, B: SqlTypeToBigQuery]: Schema = BigQueryDefinitions.generateSchema[A, B] + def schema[A: SqlTypeToBigQuery, B: SqlTypeToBigQuery, C: SqlTypeToBigQuery]: Schema = BigQueryDefinitions.generateSchema[A, B, C] + def schema[A: SqlTypeToBigQuery, B: SqlTypeToBigQuery, C: SqlTypeToBigQuery, D: SqlTypeToBigQuery]: Schema = BigQueryDefinitions.generateSchema[A, B, C, D] + def schema[A: SqlTypeToBigQuery, B: SqlTypeToBigQuery, C: SqlTypeToBigQuery, D: SqlTypeToBigQuery, E: SqlTypeToBigQuery]: Schema = BigQueryDefinitions.generateSchema[A, B, C, D, E] + + /** + * Given an instance of a Product, extracts the BQ [[Schema]] from its type + * @param value an instance of any Product + * @tparam A is any Product type + * @return [[Schema]] with the same structure as the given input + */ + def schema[A <: Product](value: A)(implicit a: SqlTypeToBigQuery[A]): Schema = + Schema.of(toJava(SqlTypeToBigQuery[A].bigQueryFields)) +} diff --git a/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQueryTable.scala b/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQueryTable.scala index ac02acdb..2ffd7b16 100644 --- a/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQueryTable.scala +++ b/bigquery/src/main/scala/org/datatools/bigdatatypes/bigquery/BigQueryTable.scala @@ -14,6 +14,10 @@ import org.datatools.bigdatatypes.bigquery.BigQueryDefinitions.{generateSchema, import scala.util.{Failure, Try} +/** + * Methods in this object are creating real tables into a BigQuery environment. + * If only the schema of the table is desired, please use [[BigQuerySchemas]] + */ object BigQueryTable { lazy val service: BigQuery = BigQueryOptions.getDefaultInstance.getService diff --git a/bigquery/src/main/scala_2.13+/org/datatools/bigdatatypes/bigquery/JavaConverters.scala b/bigquery/src/main/scala_2.13+/org/datatools/bigdatatypes/bigquery/JavaConverters.scala index 332fc1d4..68e04f9b 100644 --- a/bigquery/src/main/scala_2.13+/org/datatools/bigdatatypes/bigquery/JavaConverters.scala +++ b/bigquery/src/main/scala_2.13+/org/datatools/bigdatatypes/bigquery/JavaConverters.scala @@ -5,6 +5,6 @@ import scala.jdk.CollectionConverters.{IterableHasAsJava, IterableHasAsScala} object JavaConverters { - def toJava[A](value: List[A]): lang.Iterable[A] = value.asJava + def toJava[A](value: Seq[A]): lang.Iterable[A] = value.asJava def toScala[A](value: lang.Iterable[A]): List[A] = value.asScala.toList } diff --git a/bigquery/src/main/scala_2.13-/org/datatools/bigdatatypes/bigquery/JavaConverters.scala b/bigquery/src/main/scala_2.13-/org/datatools/bigdatatypes/bigquery/JavaConverters.scala index 02f6f930..6ff08690 100644 --- a/bigquery/src/main/scala_2.13-/org/datatools/bigdatatypes/bigquery/JavaConverters.scala +++ b/bigquery/src/main/scala_2.13-/org/datatools/bigdatatypes/bigquery/JavaConverters.scala @@ -5,6 +5,6 @@ import scala.collection.JavaConverters.{asJavaIterableConverter, iterableAsScala object JavaConverters { - def toJava[A](value: List[A]): lang.Iterable[A] = value.asJava + def toJava[A](value: Seq[A]): lang.Iterable[A] = value.asJava def toScala[A](value: lang.Iterable[A]): List[A] = value.asScala.toList } diff --git a/bigquery/src/test/scala/org/datatools/bigdatatypes/bigquery/BigQuerySchemasSpec.scala b/bigquery/src/test/scala/org/datatools/bigdatatypes/bigquery/BigQuerySchemasSpec.scala new file mode 100644 index 00000000..197a9386 --- /dev/null +++ b/bigquery/src/test/scala/org/datatools/bigdatatypes/bigquery/BigQuerySchemasSpec.scala @@ -0,0 +1,71 @@ +package org.datatools.bigdatatypes.bigquery + +import com.google.cloud.bigquery.Field.Mode +import com.google.cloud.bigquery.{Field, Schema, StandardSQLTypeName} +import org.datatools.bigdatatypes.TestTypes.ListOfStruct +import org.datatools.bigdatatypes.bigquery.JavaConverters.toJava +import org.datatools.bigdatatypes.{BigQueryTestTypes, UnitSpec} +import org.datatools.bigdatatypes.formats.Formats.implicitDefaultFormats +import org.datatools.bigdatatypes.conversions.SqlTypeConversion.* + +class BigQuerySchemasSpec extends UnitSpec { + + val elements1: Seq[Field] = List( + Field.newBuilder("a", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build() + ) + val elements2: Seq[Field] = List( + Field.newBuilder("a", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build(), + Field.newBuilder("b", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build() + ) + val elements3: Seq[Field] = List( + Field.newBuilder("a", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build(), + Field.newBuilder("b", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build(), + Field.newBuilder("c", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build() + ) + val elements4: Seq[Field] = List( + Field.newBuilder("a", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build(), + Field.newBuilder("b", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build(), + Field.newBuilder("c", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build(), + Field.newBuilder("d", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build() + ) + val elements5: Seq[Field] = List( + Field.newBuilder("a", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build(), + Field.newBuilder("b", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build(), + Field.newBuilder("c", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build(), + Field.newBuilder("d", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build(), + Field.newBuilder("e", StandardSQLTypeName.INT64).setMode(Mode.REQUIRED).build() + ) + case class Simple1(a: Int) + case class Simple2(b: Int) + case class Simple3(c: Int) + case class Simple4(d: Int) + case class Simple5(e: Int) + + behavior of "BigQuerySchemas" + + "Case class with Struct List" should "be converted into BQ Schema" in { + val fields: Schema = BigQuerySchemas.schema[ListOfStruct] + fields shouldBe Schema.of(toJava(BigQueryTestTypes.basicNestedWithList)) + } + + "2 classes" should "be converted into a BQ Schema" in { + BigQuerySchemas.schema[Simple1, Simple2] shouldBe Schema.of(toJava(elements2)) + } + + "3 classes" should "be converted into a BQ Schema" in { + BigQuerySchemas.schema[Simple1, Simple2, Simple3] shouldBe Schema.of(toJava(elements3)) + } + + "4 classes" should "be converted into a BQ Schema" in { + BigQuerySchemas.schema[Simple1, Simple2, Simple3, Simple4] shouldBe Schema.of(toJava(elements4)) + } + + "5 classes" should "be converted into a BQ Schema" in { + BigQuerySchemas.schema[Simple1, Simple2, Simple3, Simple4, Simple5] shouldBe Schema.of(toJava(elements5)) + } + + "An instance" should "be converted into a BQ Schema" in { + val s = Simple1(1) + BigQuerySchemas.schema[Simple1](s) shouldBe Schema.of(toJava(elements1)) + } +} diff --git a/build.sbt b/build.sbt index 12ad843d..189dc930 100644 --- a/build.sbt +++ b/build.sbt @@ -1,5 +1,5 @@ //used to build Sonatype releases -lazy val versionNumber = "1.2.1" +lazy val versionNumber = "1.3.0" lazy val projectName = "big-data-types" version := versionNumber name := projectName