From 1de86e60a47f424f9004234e8e47ed1a59b1e5ae Mon Sep 17 00:00:00 2001 From: Becket Qin Date: Wed, 24 Apr 2024 10:38:29 -0700 Subject: [PATCH] Add withFunction() method to StatementOnlySqlTransform --- .../beam/gradle/BeamModulePlugin.groovy | 2 +- gradle.properties | 4 +- ...tementOnlyFlinkSqlTransformTranslator.java | 3 ++ .../sql/StatementOnlySqlTransform.java | 43 +++++++++++++++++++ .../transform/sql/FlinkSqlTestUtils.java | 4 +- .../sql/StatementOnlySqlTransformTest.java | 19 ++++++++ .../sql/TestingInMemCatalogFactory.java | 6 ++- .../tables/OrdersWithConvertedBuyerNames | 11 +++++ 8 files changed, 86 insertions(+), 6 deletions(-) create mode 100644 runners/flink/1.15/src/test/resources/tables/OrdersWithConvertedBuyerNames diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index ba95ffb74089..3e1f4ff6e036 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -398,7 +398,7 @@ class BeamModulePlugin implements Plugin { // Automatically use the official release version if we are performing a release // otherwise append '-SNAPSHOT' - project.version = '2.45.22' + project.version = '2.45.23' if (isLinkedin(project)) { project.ext.mavenGroupId = 'com.linkedin.beam' } diff --git a/gradle.properties b/gradle.properties index 46bb2c87c3f6..0126fa707469 100644 --- a/gradle.properties +++ b/gradle.properties @@ -30,8 +30,8 @@ signing.gnupg.useLegacyGpg=true # buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy. # To build a custom Beam version make sure you change it in both places, see # https://github.com/apache/beam/issues/21302. -version=2.45.22 -sdk_version=2.45.22 +version=2.45.23 +sdk_version=2.45.23 javaVersion=1.8 diff --git a/runners/flink/1.15/src/main/java/org/apache/beam/runners/flink/transform/sql/StatementOnlyFlinkSqlTransformTranslator.java b/runners/flink/1.15/src/main/java/org/apache/beam/runners/flink/transform/sql/StatementOnlyFlinkSqlTransformTranslator.java index 4a36dea4d109..42e1a8d93bbf 100644 --- a/runners/flink/1.15/src/main/java/org/apache/beam/runners/flink/transform/sql/StatementOnlyFlinkSqlTransformTranslator.java +++ b/runners/flink/1.15/src/main/java/org/apache/beam/runners/flink/transform/sql/StatementOnlyFlinkSqlTransformTranslator.java @@ -48,6 +48,9 @@ public void translateNode(PTransform transform, FlinkStreamingTra StreamTableEnvironment tEnv = StreamTableEnvironment.create(context.getExecutionEnvironment()); sqlTransform.getCatalogs().forEach(tEnv::registerCatalog); + sqlTransform.getFunctionClasses().forEach(tEnv::createTemporarySystemFunction); + sqlTransform.getFunctionInstances().forEach(tEnv::createTemporarySystemFunction); + StringJoiner combinedStatements = new StringJoiner("\n\n"); StreamStatementSet ss = tEnv.createStatementSet(); for (String statement : sqlTransform.getStatements()) { diff --git a/runners/flink/1.15/src/main/java/org/apache/beam/runners/flink/transform/sql/StatementOnlySqlTransform.java b/runners/flink/1.15/src/main/java/org/apache/beam/runners/flink/transform/sql/StatementOnlySqlTransform.java index 6fbcf7b39ed2..0fcd9c415125 100644 --- a/runners/flink/1.15/src/main/java/org/apache/beam/runners/flink/transform/sql/StatementOnlySqlTransform.java +++ b/runners/flink/1.15/src/main/java/org/apache/beam/runners/flink/transform/sql/StatementOnlySqlTransform.java @@ -28,6 +28,7 @@ import org.apache.beam.sdk.values.PBegin; import org.apache.beam.sdk.values.PDone; import org.apache.flink.table.catalog.Catalog; +import org.apache.flink.table.functions.UserDefinedFunction; import org.apache.flink.util.Preconditions; import org.checkerframework.checker.nullness.qual.Nullable; import org.slf4j.Logger; @@ -41,10 +42,14 @@ public class StatementOnlySqlTransform extends PTransform { private final List statements; private final Map catalogs; + private final Map functionInstances; + private final Map> functionClasses; StatementOnlySqlTransform() { this.statements = new ArrayList<>(); this.catalogs = new HashMap<>(); + this.functionInstances = new HashMap<>(); + this.functionClasses = new HashMap<>(); } @Override @@ -87,6 +92,36 @@ public StatementOnlySqlTransform withCatalog(String name, SerializableCatalog ca return this; } + /** + * Register a temporary user defined function for this SQL transform. The function will be + * registered as a System Function which means it will temporarily override other functions + * with the same name, if such function exists. + * + * @param name the name of the function. + * @param functionClass the class of the user defined function. + * @return this {@link StatementOnlySqlTransform} itself. + */ + public StatementOnlySqlTransform withFunction( + String name, Class functionClass) { + functionClasses.put(name, functionClass); + return this; + } + + /** + * Register a temporary user defined function for this SQL transform. The function will be + * registered as a System Function which means it will temporarily override other functions + * with the same name, if such function exists. + * + * @param name the name of the function. + * @param functionInstance the user defined function instance. + * @return this {@link StatementOnlySqlTransform} itself. + */ + public StatementOnlySqlTransform withFunction( + String name, UserDefinedFunction functionInstance) { + functionInstances.put(name, functionInstance); + return this; + } + // --------------------- package private getters ----------------- List getStatements() { return Collections.unmodifiableList(statements); @@ -96,6 +131,14 @@ Map getCatalogs() { return Collections.unmodifiableMap(catalogs); } + Map getFunctionInstances() { + return functionInstances; + } + + Map> getFunctionClasses() { + return functionClasses; + } + // --------------------- private helpers ------------------------ private static String cleanUp(String s) { return s.trim().endsWith(";") ? s : s + ";"; diff --git a/runners/flink/1.15/src/test/java/org/apache/beam/runners/flink/transform/sql/FlinkSqlTestUtils.java b/runners/flink/1.15/src/test/java/org/apache/beam/runners/flink/transform/sql/FlinkSqlTestUtils.java index 9c60fc0903ec..a606a63f882e 100644 --- a/runners/flink/1.15/src/test/java/org/apache/beam/runners/flink/transform/sql/FlinkSqlTestUtils.java +++ b/runners/flink/1.15/src/test/java/org/apache/beam/runners/flink/transform/sql/FlinkSqlTestUtils.java @@ -167,13 +167,13 @@ public static CatalogTable getOrdersCatalogTable() { return new ResolvedCatalogTable(origin, resolvedSchema); } - public static CatalogTable getOrdersVerifyCatalogTable() { + public static CatalogTable getOrdersVerifyCatalogTable(String verificationFile) { // Create schema ResolvedSchema resolvedSchema = getOrdersSchema(); Map connectorOptions = new HashMap<>(); connectorOptions.put(FactoryUtil.CONNECTOR.key(), VerifyingTableSinkFactory.IDENTIFIER); - connectorOptions.put(VerifyingTableSinkFactory.EXPECTED_RESULT_FILE_PATH_OPTION.key(), getFilePath("Orders")); + connectorOptions.put(VerifyingTableSinkFactory.EXPECTED_RESULT_FILE_PATH_OPTION.key(), getFilePath(verificationFile)); connectorOptions.put(VerifyingTableSinkFactory.HAS_HEADER_OPTION.key(), "true"); final CatalogTable origin = diff --git a/runners/flink/1.15/src/test/java/org/apache/beam/runners/flink/transform/sql/StatementOnlySqlTransformTest.java b/runners/flink/1.15/src/test/java/org/apache/beam/runners/flink/transform/sql/StatementOnlySqlTransformTest.java index b9f64e2ee451..682d64fca7e6 100644 --- a/runners/flink/1.15/src/test/java/org/apache/beam/runners/flink/transform/sql/StatementOnlySqlTransformTest.java +++ b/runners/flink/1.15/src/test/java/org/apache/beam/runners/flink/transform/sql/StatementOnlySqlTransformTest.java @@ -90,6 +90,25 @@ public void testInsertOverwrite() { pipeline.run(getPipelineOptions()); } + @Test + public void testWithFunction() { + SerializableCatalog catalog = TestingInMemCatalogFactory.getCatalog("TestCatalog"); + Pipeline pipeline = Pipeline.create(); + + StatementOnlySqlTransform transform = SqlTransform.ofStatements(); + transform + .withCatalog("MyCatalog", catalog) + .withFunction("udfViaClass", FlinkSqlTestUtils.ToUpperCaseAndReplaceString.class) + .withFunction("udfViaInstance", new FlinkSqlTestUtils.ToUpperCaseAndReplaceString()) + .addStatement("INSERT INTO MyCatalog.TestDatabase.OrdersVerifyWithModifiedBuyerNames " + + "SELECT orderNumber, product, amount, price, udfViaClass(buyer), orderTime FROM MyCatalog.TestDatabase.Orders") + .addStatement("INSERT INTO MyCatalog.TestDatabase.OrdersVerifyWithModifiedBuyerNames " + + "SELECT orderNumber, product, amount, price, udfViaInstance(buyer), orderTime FROM MyCatalog.TestDatabase.Orders"); + + pipeline.apply(transform); + pipeline.run(getPipelineOptions()); + } + // ---------------- private void testBasics(boolean isStreaming) { SerializableCatalog catalog = TestingInMemCatalogFactory.getCatalog("TestCatalog"); diff --git a/runners/flink/1.15/src/test/java/org/apache/beam/runners/flink/transform/sql/TestingInMemCatalogFactory.java b/runners/flink/1.15/src/test/java/org/apache/beam/runners/flink/transform/sql/TestingInMemCatalogFactory.java index 3d28b67677a0..66b519e61104 100644 --- a/runners/flink/1.15/src/test/java/org/apache/beam/runners/flink/transform/sql/TestingInMemCatalogFactory.java +++ b/runners/flink/1.15/src/test/java/org/apache/beam/runners/flink/transform/sql/TestingInMemCatalogFactory.java @@ -41,7 +41,11 @@ public static TestingInMemCatalog getCatalog(String name) { true); catalog.createTable( new ObjectPath("TestDatabase", "OrdersVerify"), - getOrdersVerifyCatalogTable(), + getOrdersVerifyCatalogTable("Orders"), + true); + catalog.createTable( + new ObjectPath("TestDatabase", "OrdersVerifyWithModifiedBuyerNames"), + getOrdersVerifyCatalogTable("OrdersWithConvertedBuyerNames"), true); } catch (TableAlreadyExistException | DatabaseNotExistException e) { throw new RuntimeException(e); diff --git a/runners/flink/1.15/src/test/resources/tables/OrdersWithConvertedBuyerNames b/runners/flink/1.15/src/test/resources/tables/OrdersWithConvertedBuyerNames new file mode 100644 index 000000000000..00a105bd8a63 --- /dev/null +++ b/runners/flink/1.15/src/test/resources/tables/OrdersWithConvertedBuyerNames @@ -0,0 +1,11 @@ +#orderNumber,product,amount,price,buyer,orderTime +1,Apple,1,10.0,ALIC3,2023-03-24 16:23:00 +2,Orange,2,100.0,B0B,2022-04-24 16:12:00 +3,Mango,3,1000.0,CHARLI3,2021-04-26 15:00:00 +4,Pear,1,12.0,D0NNA,2023-04-26 23:00:00 +5,Strawberry,6,13.0,3LL3N,2023-04-24 15:00:00 +6,Orange,1,120.0,ALIC3,2022-04-25 07:00:00 +7,Blueberry,7,900.0,CHARLI3,2023-04-23 22:00:00 +8,Mango,3,1000.0,D0NNA,2023-04-22 11:00:00 +9,Mango,5,950.0,D0NNA,2020-02-14 17:00:00 +10,Orange,6,90.0,B0B,2023-01-24 18:00:00 \ No newline at end of file