From 3cc492100fdd4f193914e958d1669207e990390a Mon Sep 17 00:00:00 2001
From: liangchg <liangchg@amazon.com>
Date: Thu, 27 Feb 2025 15:33:09 +0800
Subject: [PATCH 1/2] Add Paimon format support

---
 .../services/org.apache.hadoop.fs.FileSystem  |   0
 java/PaimonCDCSink/README.md                  | 165 +++++++++
 java/PaimonCDCSink/pom.xml                    | 340 ++++++++++++++++++
 .../services/msf/PaimonCDCSinkJob.java        | 107 ++++++
 .../flink-application-properties-dev.json     |  39 ++
 .../src/main/resources/hive-site.xml          | 273 ++++++++++++++
 .../src/main/resources/log4j2.properties      |   7 +
 7 files changed, 931 insertions(+)
 create mode 100644 java/IcebergDatastreamSink/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem
 create mode 100644 java/PaimonCDCSink/README.md
 create mode 100644 java/PaimonCDCSink/pom.xml
 create mode 100644 java/PaimonCDCSink/src/main/java/com/amazonaws/services/msf/PaimonCDCSinkJob.java
 create mode 100644 java/PaimonCDCSink/src/main/resources/flink-application-properties-dev.json
 create mode 100644 java/PaimonCDCSink/src/main/resources/hive-site.xml
 create mode 100644 java/PaimonCDCSink/src/main/resources/log4j2.properties

diff --git a/java/IcebergDatastreamSink/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem b/java/IcebergDatastreamSink/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem
new file mode 100644
index 0000000..e69de29
diff --git a/java/PaimonCDCSink/README.md b/java/PaimonCDCSink/README.md
new file mode 100644
index 0000000..49418b9
--- /dev/null
+++ b/java/PaimonCDCSink/README.md
@@ -0,0 +1,165 @@
+## Flink Apache Paimon Sink using DataStream API
+
+* Flink version: 1.20
+* Flink API: DataStream API
+* Language: Java (11)
+* Apache Paimon: 1.0.1
+* Flink connectors: Flink CDC-MySQL / PostgreSQL / MongoDB / Kafka
+
+This example demonstrates how to use Apache Paimon CDC ingestion components(MySQL / PostgreSQL / MongoDB / Kafka) to sink
+data to Amazon S3 with Apache Paimon table format. The Apache Paimon Hive Catalog can work with Glue Data Catalog.
+
+The project can run both on Amazon Managed Service for Apache Flink, and locally for development.
+
+### Prerequisites
+* A database source(MySQL, PostgreSQL, MongoDB) with binlog enabled or Kakfa / Amazon MSK source with Apache Paimon 
+  supported CDC format(Canal CDC, Debezium CDC, Maxwell CDC, OGG CDC, JSON, aws-dms-json ) data streamed in it.
+* If you want to use Apache Paimon Hive catalog with Glue Data Catalog, please install aws-glue-datacatalog-hive3-client 
+  jar file into your local maven repo(please refer this [github repo](https://github.com/awslabs/aws-glue-data-catalog-client-for-apache-hive-metastore) to install or
+  you can find this jar file in EMR Cluster and install it into your local maven repo) and copy your EMR cluster's `hive-site.xml` file into the project and repackage the project. 
+* An S3 bucket to write the Paimon table.
+
+
+#### IAM Permissions
+
+The application must have IAM permissions to:
+* Show and alter Glue Data Catalog databases, show and create Glue Data Catalog tables.
+  See [Glue Data Catalog permissions](https://docs.aws.amazon.com/athena/latest/ug/fine-grained-access-to-glue-resources.html).
+* Read and Write from the S3 bucket.
+
+
+### Runtime configuration
+
+When running on Amazon Managed Service for Apache Flink the runtime configuration is read from *Runtime Properties*.
+
+When running locally, the configuration is read from the [`resources/flink-application-properties-dev.json`](resources/flink-application-properties-dev.json) file located in the resources folder.
+
+This example parses runtime parameters according to the following rules and passes the parsed parameters to Apache Paimon Actions.
+
+- The Paimon CDC ingestion action name is parsed from the key named action in the 'ActionConf' parameter group.
+- Some global or common parameters can be placed in the 'ActionConf' parameter group. The parameter names should refer to the specific ingestion [action name](https://paimon.apache.org/docs/1.0/cdc-ingestion/overview/).
+- For parameters like 'table_conf' and 'catalog_conf' that are set in the format of Key=Value, the name of the parameter group can be customized, such as “TableConf” or “CatalogConf”. 
+For specific parameter names within the parameter group, they should follow the format “parameter group name@_parameter Key”, 
+such as “table_conf@_bucket”, and the parameter value should be the corresponding Value.
+
+
+Runtime parameters(Sample):
+
+| Group ID      | Key                                        | Description                                                                            | 
+|---------------|--------------------------------------------|----------------------------------------------------------------------------------------|
+| `ActionConf`  | `action`                                   | Name of Apache Paimon CDC ingestion, `kafka_sync_database`, `mysql_sync_database` etc. |
+| `ActionConf`  | `database`                                 | Target Paimon database name.                                                           |
+| `ActionConf`  | `primary_keys`                             | (Optional) The primary keys for Paimon table                                           |
+| `KafkaConf`   | `kafka_conf@_properties.bootstrap.servers` | Bootstrap servers of the Kafka Cluster.                                                |
+| `KafkaConf`   | `kafka_conf@_properties.auto.offset.reset` | Offset of the Kafka Consumer                                                           |
+| `KafkaConf`   | `kafka_conf@_properties.group.id`          | Consumer group Id                                                                      |
+| `CatalogConf` | `catalog_conf@_metastore.client.class`     | Paimon Hive Catalog metastore client class name                                        |
+| `CatalogConf` | `...`                                      | ...                                                                                    |
+| `TableConf`   | `table_conf@_bucket`                       | Bucket of Paimon table                                                                 |
+| `TableConf`   | `...`                                      | ...                                                                                    |
+
+All parameters are case-sensitive.
+
+### Samples
+**Create an MSF application**
+
+First, compile and package the application using Maven, then copy the packaged jar file to your s3.
+
+```shell
+mvn clean package -P KafkaCDC
+```
+
+Second, prepare an input json file to create a MSF application, you can add required information(like VPC, Subnets,Security.etc.) into this json file.
+
+**Notice:** Your service execution role should have appropriate permissions, like s3 bucket access and glue access if you want to use Glue Data Catalog as Paimon Hive Catalog.
+```json
+{
+  "ApplicationName": "kafka-cdc-paimon",
+  "ApplicationDescription": "Sink CDC from Kafka as Apache Paimon table",
+  "RuntimeEnvironment": "FLINK-1_20",
+  "ServiceExecutionRole": "Your service role arn",
+  "ApplicationConfiguration": {
+    "ApplicationCodeConfiguration": {
+      "CodeContent": {
+        "S3ContentLocation": {
+          "BucketARN": "Your bucket arn",
+          "FileKey": "Your jar file s3 key"
+        }
+      },
+      "CodeContentType": "ZIPFILE"
+    },
+    "EnvironmentProperties": {
+      "PropertyGroups": [
+        {
+          "PropertyGroupId": "ActionConf",
+          "PropertyMap": {
+            "action": "kafka_sync_database",
+            "database": "Your Paimon Database",
+            "warehouse": "Your paimon warehouse path"
+          }
+        },
+        {
+          "PropertyGroupId": "KafkaConf",
+          "PropertyMap": {
+            "kafka_conf@_properties.bootstrap.servers": "MSK bootstrap servers",
+            "kafka_conf@_properties.auto.offset.reset": "earliest",
+            "kafka_conf@_properties.group.id": "group id",
+            "kafka_conf@_topic": "Your cdc topic",
+            "kafka_conf@_value.format": "debezium-json"
+          }
+        },
+        {
+          "PropertyGroupId": "CatalogConf",
+          "PropertyMap": {
+            "catalog_conf@_hadoop.fs.s3.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem",
+            "catalog_conf@_hadoop.fs.s3.buffer.dir": "/var/tmp"
+          }
+        },
+        {
+          "PropertyGroupId": "TableConf",
+          "PropertyMap": {
+            "table_conf@_bucket": "4",
+            "table_conf@_metadata.iceberg.storage": "hive-catalog",
+            "table_conf@_metadata.iceberg.manifest-legacy-version": "true",
+            "table_conf@_metadata.iceberg.hive-client-class": "com.amazonaws.glue.catalog.metastore.AWSCatalogMetastoreClient",
+            "table_conf@_fs.s3.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem",
+            "table_conf@_fs.s3.buffer.dir": "/var/tmp",
+            "table_conf@_sink.parallelism": "4"
+          }
+        }
+      ]
+    }
+  },
+  "FlinkApplicationConfiguration": {
+    "ParallelismConfiguration": {
+      "AutoScalingEnabled": true,
+      "Parallelism": 4,
+      "ParallelismPerKPU": 1
+    }
+  },
+  "CloudWatchLoggingOptions": [
+    {
+      "LogStreamARN": "arn:aws:logs:us-west-2:YourAccountId:log-group:/aws/kinesis-analytics/kafka-cdc-paimon:log-stream:kinesis-analytics-log-stream"
+    }
+  ]
+}
+```
+
+Last, create an MSF application using AWS CLI.
+
+```shell
+aws kinesisanalyticsv2 create-application \
+--cli-input-json file://create-kafkacdc-paimon.json
+```
+
+### Running in IntelliJ
+
+You can run this example directly in IntelliJ, without any local Flink cluster or local Flink installation.
+
+See [Running examples locally](../running-examples-locally.md) for details.
+
+### Generating data
+
+You can use [Kinesis Data Generator](https://github.com/awslabs/amazon-kinesis-data-generator),
+also available in a [hosted version](https://awslabs.github.io/amazon-kinesis-data-generator/web/producer.html),
+to generate random data to Kinesis Data Stream and test the application.
\ No newline at end of file
diff --git a/java/PaimonCDCSink/pom.xml b/java/PaimonCDCSink/pom.xml
new file mode 100644
index 0000000..e4a4fdd
--- /dev/null
+++ b/java/PaimonCDCSink/pom.xml
@@ -0,0 +1,340 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>com.amazonaws</groupId>
+        <artifactId>amazon-msf-examples</artifactId>
+        <version>1.0</version>
+    </parent>
+
+    <artifactId>paimon-cdc-sink</artifactId>
+
+    <properties>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <buildDirectory>${project.basedir}/target</buildDirectory>
+        <jar.finalName>${project.name}-${project.version}</jar.finalName>
+        <target.java.version>11</target.java.version>
+        <maven.compiler.source>${target.java.version}</maven.compiler.source>
+        <maven.compiler.target>${target.java.version}</maven.compiler.target>
+        <flink.version>1.20.0</flink.version>
+        <flink.connector.version>5.0.0-1.20</flink.connector.version>
+        <kda.runtime.version>1.2.0</kda.runtime.version>
+        <log4j.version>2.23.1</log4j.version>
+        <jackson.version>2.16.2</jackson.version>
+        <paimon.version>1.0.1</paimon.version>
+        <flink.kafka.sql.version>3.4.0-1.20</flink.kafka.sql.version>
+        <flink.cdc.version>3.3.0</flink.cdc.version>
+        <mysql.driver.version>8.4.0</mysql.driver.version>
+        <hadoop.version>3.4.0</hadoop.version>
+        <aws.sdkv2.version>2.30.16</aws.sdkv2.version>
+    </properties>
+
+    <dependencyManagement>
+        <dependencies>
+            <dependency>
+                <groupId>com.amazonaws</groupId>
+                <artifactId>aws-java-sdk-bom</artifactId>
+                <!-- Get the latest SDK version from https://mvnrepository.com/artifact/com.amazonaws/aws-java-sdk-bom -->
+                <version>1.12.676</version>
+                <type>pom</type>
+                <scope>import</scope>
+            </dependency>
+        </dependencies>
+    </dependencyManagement>
+
+    <dependencies>
+        <!-- Amazon Managed Service for Apache Flink (formerly Amazon Kinesis Data Analytics) runtime-->
+        <dependency>
+            <groupId>com.amazonaws</groupId>
+            <artifactId>aws-kinesisanalytics-runtime</artifactId>
+            <version>${kda.runtime.version}</version>
+            <scope>provided</scope>
+        </dependency>
+
+        <!-- Apache Flink dependencies -->
+        <!-- These dependencies are provided, because they should not be packaged into the JAR file. -->
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-streaming-java</artifactId>
+            <version>${flink.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-runtime-web</artifactId>
+            <version>${flink.version}</version>
+            <scope>provided</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-runtime</artifactId>
+            <version>${flink.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-common</artifactId>
+            <version>${flink.version}</version>
+            <scope>provided</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-s3-fs-hadoop</artifactId>
+            <version>${flink.version}</version>
+        </dependency>
+        <!-- Add logging framework, to produce console output when running in the IDE. -->
+        <!-- These dependencies are excluded from the application JAR by default. -->
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-api</artifactId>
+            <version>${log4j.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-slf4j-impl</artifactId>
+            <version>${log4j.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-core</artifactId>
+            <version>${log4j.version}</version>
+            <scope>provided</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.paimon</groupId>
+            <artifactId>paimon-flink-action</artifactId>
+            <version>${paimon.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.paimon</groupId>
+            <artifactId>paimon-flink-cdc</artifactId>
+            <version>${paimon.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.paimon</groupId>
+            <artifactId>paimon-flink-1.20</artifactId>
+            <version>${paimon.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.paimon</groupId>
+            <artifactId>paimon-hive-connector-3.1</artifactId>
+            <version>${paimon.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.thrift</groupId>
+            <artifactId>libthrift</artifactId>
+            <version>0.21.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.thrift</groupId>
+            <artifactId>libfb303</artifactId>
+            <version>0.9.3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hive</groupId>
+            <artifactId>hive-exec</artifactId>
+            <version>3.1.3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-client</artifactId>
+            <version>${hadoop.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.apache.avro</groupId>
+                    <artifactId>avro</artifactId>
+                </exclusion>
+                <!-- exclude to prevent multiple of SLF4j binding conflict -->
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-reload4j</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>com.amazonaws.glue</groupId>
+            <artifactId>aws-glue-datacatalog-hive3-client</artifactId>
+            <version>4.2.0</version>
+        </dependency>
+
+        <dependency>
+            <groupId>software.amazon.awssdk</groupId>
+            <artifactId>glue</artifactId>
+            <version>${aws.sdkv2.version}</version>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>software.amazon.awssdk</groupId>
+            <artifactId>aws-core</artifactId>
+            <version>${aws.sdkv2.version}</version>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>software.amazon.awssdk</groupId>
+            <artifactId>sts</artifactId>
+            <version>${aws.sdkv2.version}</version>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>software.amazon.awssdk</groupId>
+            <artifactId>utils</artifactId>
+            <version>${aws.sdkv2.version}</version>
+        </dependency>
+
+    </dependencies>
+
+    <profiles>
+        <profile>
+            <id>KafkaCDC</id>
+            <dependencies>
+                <dependency>
+                    <groupId>org.apache.flink</groupId>
+                    <artifactId>flink-sql-connector-kafka</artifactId>
+                    <version>${flink.kafka.sql.version}</version>
+                </dependency>
+            </dependencies>
+            <properties>
+                <cdc.source>kafka</cdc.source>
+            </properties>
+        </profile>
+
+        <profile>
+            <id>MySQLCDC</id>
+            <dependencies>
+                <dependency>
+                    <groupId>org.apache.flink</groupId>
+                    <artifactId>flink-connector-mysql-cdc</artifactId>
+                    <version>${flink.cdc.version}</version>
+                </dependency>
+                <dependency>
+                    <groupId>com.mysql</groupId>
+                    <artifactId>mysql-connector-j</artifactId>
+                    <version>${mysql.driver.version}</version>
+                </dependency>
+            </dependencies>
+            <properties>
+                <cdc.source>mysql</cdc.source>
+            </properties>
+        </profile>
+        <profile>
+            <id>PostgresCDC</id>
+            <dependencies>
+                <dependency>
+                    <groupId>org.apache.flink</groupId>
+                    <artifactId>flink-connector-postgres-cdc</artifactId>
+                    <version>${flink.cdc.version}</version>
+                </dependency>
+            </dependencies>
+            <properties>
+                <cdc.source>postgre</cdc.source>
+            </properties>
+        </profile>
+        <profile>
+            <id>MongoDBCDC</id>
+            <dependencies>
+                <dependency>
+                    <groupId>org.apache.flink</groupId>
+                    <artifactId>flink-connector-mongodb-cdc</artifactId>
+                    <version>${flink.cdc.version}</version>
+                </dependency>
+            </dependencies>
+            <properties>
+                <cdc.source>mongo</cdc.source>
+            </properties>
+        </profile>
+
+    </profiles>
+
+    <build>
+        <directory>${buildDirectory}</directory>
+        <finalName>${cdc.source}-${jar.finalName}</finalName>
+
+        <plugins>
+            <!-- Java Compiler -->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>3.8.1</version>
+                <configuration>
+                    <source>${target.java.version}</source>
+                    <target>${target.java.version}</target>
+                    <release>${target.java.version}</release>
+                </configuration>
+            </plugin>
+
+            <!-- Shade plugin to build the fat-jar including all required dependencies -->
+            <!-- Change the value of <mainClass>...</mainClass> if your program entry point changes. -->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <version>3.2.1</version>
+                <executions>
+                    <!-- Run shade goal on package phase -->
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <artifactSet>
+                                <excludes>
+                                    <exclude>org.apache.flink:force-shading</exclude>
+                                    <exclude>com.google.code.findbugs:jsr305</exclude>
+                                    <exclude>org.slf4j:*</exclude>
+                                    <exclude>log4j:*</exclude>
+                                </excludes>
+                            </artifactSet>
+                            <filters>
+                                <filter>
+                                    <!-- Do not copy the signatures in the META-INF folder.
+                                    Otherwise, this might cause SecurityExceptions when using the JAR. -->
+                                    <artifact>*:*</artifact>
+                                    <excludes>
+                                        <exclude>META-INF/*.SF</exclude>
+                                        <exclude>META-INF/*.DSA</exclude>
+                                        <exclude>META-INF/*.RSA</exclude>
+                                        <exclude>META-INF/versions/17/**/*.class</exclude>
+                                        <exclude>META-INF/versions/19/**/*.class</exclude>
+                                        <exclude>META-INF/versions/15/**/*.class</exclude>
+                                    </excludes>
+                                </filter>
+                            </filters>
+
+                            <relocations>
+                                <relocation>
+                                    <pattern>org.apache.kafka.connect</pattern>
+                                    <shadedPattern>org.apache.flink.cdc.connectors.shaded.org.apache.kafka.connect</shadedPattern>
+                                </relocation>
+                                <relocation>
+                                    <pattern>org.apache.kafka</pattern>
+                                    <shadedPattern>org.apache.flink.kafka.shaded.org.apache.kafka</shadedPattern>
+                                </relocation>
+                            </relocations>
+
+                            <transformers>
+                                <transformer
+                                        implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+                                <transformer
+                                        implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+                                    <mainClass>com.amazonaws.services.msf.PaimonCDCSinkJob</mainClass>
+                                </transformer>
+                            </transformers>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
\ No newline at end of file
diff --git a/java/PaimonCDCSink/src/main/java/com/amazonaws/services/msf/PaimonCDCSinkJob.java b/java/PaimonCDCSink/src/main/java/com/amazonaws/services/msf/PaimonCDCSinkJob.java
new file mode 100644
index 0000000..c6ff043
--- /dev/null
+++ b/java/PaimonCDCSink/src/main/java/com/amazonaws/services/msf/PaimonCDCSinkJob.java
@@ -0,0 +1,107 @@
+package com.amazonaws.services.msf;
+
+import com.amazonaws.services.kinesisanalytics.runtime.KinesisAnalyticsRuntime;
+import org.apache.flink.streaming.api.environment.LocalStreamEnvironment;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.paimon.flink.action.Action;
+import org.apache.paimon.flink.action.ActionBase;
+import org.apache.paimon.flink.action.ActionFactory;
+
+import java.io.IOException;
+import java.util.*;
+
+public class PaimonCDCSinkJob {
+
+    private static final Logger LOGGER = LogManager.getLogger(PaimonCDCSinkJob.class);
+    private static final String LOCAL_APPLICATION_PROPERTIES_RESOURCE = "flink-application-properties-dev.json";
+    private static final String SEP_KEY = "@_";
+    private static final String ACTION_CONF_GROUP = "ActionConf";
+    private static final String ACTION_KEY = "action";
+    private static final String PARAM_KEY_PREFIX = "--";
+
+    public static void main(String[] args) throws Exception{
+
+        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
+
+        Map<String, Properties> confMap = loadApplicationProperties(env);
+        String[] actionArgs = configToActionParameters(confMap);
+        if (actionArgs.length < 1) {
+            LOGGER.error("No action specified");
+            System.exit(1);
+        }
+
+        LOGGER.info("actionArgs: {}", Arrays.toString(actionArgs));
+
+        Optional<Action> actionOpt = ActionFactory.createAction(actionArgs);
+
+        if (actionOpt.isPresent()) {
+            Action action = actionOpt.get();
+            if (action instanceof ActionBase) {
+                LOGGER.info("ActionBase: {}", action.getClass().getName());
+                ((ActionBase) action).withStreamExecutionEnvironment(env).run();
+            } else {
+                action.run();
+            }
+        } else {
+            LOGGER.info("No paimon flink action service found");
+            System.exit(1);
+        }
+    }
+
+    private static Map<String, Properties> loadApplicationProperties(StreamExecutionEnvironment env) throws IOException {
+        if (env instanceof LocalStreamEnvironment) {
+            LOGGER.debug("Loading application properties from '{}'", LOCAL_APPLICATION_PROPERTIES_RESOURCE);
+            return KinesisAnalyticsRuntime.getApplicationProperties(
+                    PaimonCDCSinkJob.class.getClassLoader()
+                            .getResource(LOCAL_APPLICATION_PROPERTIES_RESOURCE).getPath());
+        } else {
+            LOGGER.debug("Loading application properties from Amazon Managed Service for Apache Flink");
+            return KinesisAnalyticsRuntime.getApplicationProperties();
+        }
+    }
+
+    private static String[] configToActionParameters(Map<String, Properties> confMap) {
+
+        Properties actionProp = confMap.get(ACTION_CONF_GROUP);
+        if (actionProp == null) {
+            LOGGER.error("ActionConf not found in application properties");
+            System.exit(1);
+        }
+
+        String action = actionProp.getProperty(ACTION_KEY);
+        if (action == null || action.isEmpty()) {
+            LOGGER.error("Action not found in application properties");
+        }
+
+        actionProp.remove(ACTION_KEY);
+
+        List<String> params = new ArrayList<>();
+        params.add(action);
+
+        for (Map.Entry<String, Properties> confEntry : confMap.entrySet()) {
+            confEntry.getValue().forEach(
+                   (k, v) -> {
+                       String ks = k.toString();
+                       int idx = ks.indexOf(SEP_KEY);
+                       String paramKey;
+                       String paramVal;
+                       if (idx != -1) {
+                           paramKey = String.format("%s%s", PARAM_KEY_PREFIX , ks.substring(0, idx));
+                           paramVal = String.format("%s=%s", ks.substring(idx  + SEP_KEY.length()), v);
+
+                       } else {
+                           paramKey = String.format("%s%s", PARAM_KEY_PREFIX , ks);
+                           paramVal = v.toString();
+                       }
+                       params.add(paramKey);
+                       params.add(paramVal);
+                   }
+            );
+        }
+
+        return params.toArray(new String[0]);
+    }
+
+}
diff --git a/java/PaimonCDCSink/src/main/resources/flink-application-properties-dev.json b/java/PaimonCDCSink/src/main/resources/flink-application-properties-dev.json
new file mode 100644
index 0000000..cb0fa4d
--- /dev/null
+++ b/java/PaimonCDCSink/src/main/resources/flink-application-properties-dev.json
@@ -0,0 +1,39 @@
+[
+  {
+    "PropertyGroupId": "ActionConf",
+    "PropertyMap": {
+      "action": "kafka_sync_database",
+      "warehouse": "s3://bucket/data/prefix",
+      "database": "paimon_flink",
+      "primary_keys": "ID",
+      "table_prefix": "ods_"
+    }
+  },
+  {
+    "PropertyGroupId": "KafkaConf",
+    "PropertyMap": {
+      "kafka_conf@_properties.bootstrap.servers": "b-2.mycluster.bzvtby.c8.kafka.us-west-2.amazonaws.com:9092,b-1.mycluster.bzvtby.c8.kafka.us-west-2.amazonaws.com:9092",
+      "kafka_conf@_topic": "kafka_topic",
+      "kafka_conf@_properties.group.id": 1234546,
+      "kafka_conf@_properties.auto.offset.reset": "earliest"
+    }
+  },
+  {
+    "PropertyGroupId": "CatalogConf",
+    "PropertyMap": {
+      "catalog_conf@_metastore": "hive",
+      "catalog_conf@_hive-conf-dir": "/etc/hive/conf.dist",
+      "catalog_conf@_lock.enabled": "false",
+      "catalog_conf@_metastore.client.class": "com.amazonaws.glue.catalog.metastore.AWSCatalogMetastoreClient",
+      "catalog_conf@_warehouse": "s3://bucket/data/prefix"
+    }
+  },
+  {
+    "PropertyGroupId": "TableConf",
+    "PropertyMap": {
+      "table_conf@_bucket": "4",
+      "table_conf@_changelog-producer": "input",
+      "table_conf@_sink.parallelism": "4"
+    }
+  }
+]
\ No newline at end of file
diff --git a/java/PaimonCDCSink/src/main/resources/hive-site.xml b/java/PaimonCDCSink/src/main/resources/hive-site.xml
new file mode 100644
index 0000000..95bb612
--- /dev/null
+++ b/java/PaimonCDCSink/src/main/resources/hive-site.xml
@@ -0,0 +1,273 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more       -->
+<!-- contributor license agreements.  See the NOTICE file distributed with    -->
+<!-- this work for additional information regarding copyright ownership.      -->
+<!-- The ASF licenses this file to You under the Apache License, Version 2.0  -->
+<!-- (the "License"); you may not use this file except in compliance with     -->
+<!-- the License.  You may obtain a copy of the License at                    -->
+<!--                                                                          -->
+<!--     http://www.apache.org/licenses/LICENSE-2.0                           -->
+<!--                                                                          -->
+<!-- Unless required by applicable law or agreed to in writing, software      -->
+<!-- distributed under the License is distributed on an "AS IS" BASIS,        -->
+<!-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -->
+<!-- See the License for the specific language governing permissions and      -->
+<!-- limitations under the License.                                           -->
+
+<configuration>
+
+<!-- Hive Configuration can either be stored in this file or in the hadoop configuration files  -->
+<!-- that are implied by Hadoop setup variables.                                                -->
+<!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive    -->
+<!-- users do not have to edit hadoop configuration files (that may be managed as a centralized -->
+<!-- resource).                                                                                 -->
+
+<!-- Hive Execution Parameters -->
+
+<property>
+  <name>hbase.master</name>
+  <value></value>
+  <description>http://wiki.apache.org/hadoop/Hive/HBaseIntegration</description>
+</property>
+
+<property>
+  <name>hive.zookeeper.quorum</name>
+  <value>ip-xx-xx-xx-xx.us-west-2.compute.internal:2181</value>
+</property>
+
+<property>
+  <name>hive.llap.zk.sm.connectionString</name>
+  <value>ip-xx-xx-xx-xx.us-west-2.compute.internal:2181</value>
+</property>
+
+<property>
+  <name>hbase.zookeeper.quorum</name>
+  <value>ip-xx-xx-xx-xx.us-west-2.compute.internal</value>
+  <description>http://wiki.apache.org/hadoop/Hive/HBaseIntegration</description>
+</property>
+
+<property>
+  <name>hive.execution.engine</name>
+  <value>tez</value>
+</property>
+
+  <property>
+    <name>fs.defaultFS</name>
+    <value>hdfs://ip-xx-xx-xx-xx.us-west-2.compute.internal:8020</value>
+  </property>
+
+
+  <property>
+    <name>hive.metastore.uris</name>
+    <value>thrift://ip-xx-xx-xx-xx.us-west-2.compute.internal:9083</value>
+    <description>JDBC connect string for a JDBC metastore</description>
+  </property>
+
+  <property>
+    <name>javax.jdo.option.ConnectionURL</name>
+    <value>jdbc:mysql://ip-xx-xx-xx-xx.us-west-2.compute.internal:3306/hive?createDatabaseIfNotExist=true</value>
+    <description>username to use against metastore database</description>
+  </property>
+
+  <property>
+    <name>javax.jdo.option.ConnectionDriverName</name>
+    <value>org.mariadb.jdbc.Driver</value>
+    <description>username to use against metastore database</description>
+  </property>
+
+  <property>
+    <name>javax.jdo.option.ConnectionUserName</name>
+    <value>hive</value>
+    <description>username to use against metastore database</description>
+  </property>
+
+  <property>
+    <name>javax.jdo.option.ConnectionPassword</name>
+    <value>kWs5sQ8HnZaEC2kj</value>
+    <description>password to use against metastore database</description>
+  </property>
+
+<property>
+   <name>hive.server2.allow.user.substitution</name>
+   <value>true</value>
+</property>
+
+<property>
+   <name>hive.server2.enable.doAs</name>
+   <value>true</value>
+</property>
+
+<property>
+   <name>hive.server2.thrift.port</name>
+   <value>10000</value>
+</property>
+
+<property>
+   <name>hive.server2.thrift.http.port</name>
+   <value>10001</value>
+</property>
+
+
+
+<property>
+  <name>hive.optimize.ppd.input.formats</name>
+  <value>com.amazonaws.emr.s3select.hive.S3SelectableTextInputFormat</value>
+</property>
+
+<property>
+  <name>s3select.filter</name>
+  <value>false</value>
+</property>
+
+<property>
+    <name>hive.server2.in.place.progress</name>
+    <value>false</value>
+</property>
+
+<property>
+    <name>hive.llap.zk.registry.user</name>
+    <value>hadoop</value>
+</property>
+
+<property>
+    <name>hive.security.metastore.authorization.manager</name>
+    <value>org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider</value>
+</property>
+
+<property>
+    <name>hive.log.explain.output</name>
+    <value>false</value>
+</property>
+
+  <property>
+    <name>datanucleus.fixedDatastore</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>mapred.reduce.tasks</name>
+    <value>-1</value>
+  </property>
+
+  <property>
+    <name>mapred.max.split.size</name>
+    <value>256000000</value>
+  </property>
+
+  <property>
+    <name>hive.mapjoin.hybridgrace.hashtable</name>
+    <value>false</value>
+  </property>
+
+  <property>
+    <name>hive.merge.nway.joins</name>
+    <value>false</value>
+  </property>
+
+  <property>
+    <name>hive.metastore.connect.retries</name>
+    <value>15</value>
+  </property>
+
+  <property>
+    <name>hive.optimize.joinreducededuplication</name>
+    <value>false</value>
+  </property>
+
+  <property>
+    <name>hive.optimize.sort.dynamic.partition.threshold</name>
+    <value>1</value>
+  </property>
+
+  <property>
+    <name>hive.server2.materializedviews.registry.impl</name>
+    <value>DUMMY</value>
+  </property>
+
+  <property>
+    <name>hive.tez.auto.reducer.parallelism</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hive.vectorized.execution.mapjoin.minmax.enabled</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hive.optimize.dynamic.partition.hashjoin</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hive.compactor.initiator.on</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hive.blobstore.use.output-committer</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hive.llap.daemon.service.hosts</name>
+    <value>@llap0</value>
+  </property>
+
+  <property>
+    <name>hive.llap.execution.mode</name>
+    <value>only</value>
+  </property>
+
+  <property>
+    <name>hive.optimize.metadataonly</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hive.tez.bucket.pruning</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hive.exec.mode.local.auto</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hive.exec.mode.local.auto.inputbytes.max</name>
+    <value>50000000</value>
+  </property>
+
+  <property>
+    <name>hive.query.reexecution.stats.persist.scope</name>
+    <value>hiveserver</value>
+  </property>
+
+  <property>
+    <name>hive.metastore.client.factory.class</name>
+    <value>com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory</value>
+  </property>
+
+  <property>
+    <name>hive.auto.convert.join.noconditionaltask.size</name>
+    <value>1073741824</value>
+  </property>
+
+  <property>
+    <name>hive.compactor.worker.threads</name>
+    <value>1</value>
+  </property>
+
+
+
+
+
+</configuration>
diff --git a/java/PaimonCDCSink/src/main/resources/log4j2.properties b/java/PaimonCDCSink/src/main/resources/log4j2.properties
new file mode 100644
index 0000000..3546643
--- /dev/null
+++ b/java/PaimonCDCSink/src/main/resources/log4j2.properties
@@ -0,0 +1,7 @@
+rootLogger.level = INFO
+rootLogger.appenderRef.console.ref = ConsoleAppender
+
+appender.console.name = ConsoleAppender
+appender.console.type = CONSOLE
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n

From 5231067e792ef6e5ed071388231b34f156b99a3a Mon Sep 17 00:00:00 2001
From: liangchg <liangchg@amazon.com>
Date: Thu, 27 Feb 2025 15:33:09 +0800
Subject: [PATCH 2/2] Add Paimon format support

---
 .../services/org.apache.hadoop.fs.FileSystem  |   0
 java/PaimonCDCSink/README.md                  | 165 +++++++++
 java/PaimonCDCSink/pom.xml                    | 340 ++++++++++++++++++
 .../services/msf/PaimonCDCSinkJob.java        | 107 ++++++
 .../flink-application-properties-dev.json     |  39 ++
 .../src/main/resources/hive-site.xml          | 273 ++++++++++++++
 .../src/main/resources/log4j2.properties      |   7 +
 java/pom.xml                                  |   1 +
 8 files changed, 932 insertions(+)
 create mode 100644 java/IcebergDatastreamSink/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem
 create mode 100644 java/PaimonCDCSink/README.md
 create mode 100644 java/PaimonCDCSink/pom.xml
 create mode 100644 java/PaimonCDCSink/src/main/java/com/amazonaws/services/msf/PaimonCDCSinkJob.java
 create mode 100644 java/PaimonCDCSink/src/main/resources/flink-application-properties-dev.json
 create mode 100644 java/PaimonCDCSink/src/main/resources/hive-site.xml
 create mode 100644 java/PaimonCDCSink/src/main/resources/log4j2.properties

diff --git a/java/IcebergDatastreamSink/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem b/java/IcebergDatastreamSink/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem
new file mode 100644
index 0000000..e69de29
diff --git a/java/PaimonCDCSink/README.md b/java/PaimonCDCSink/README.md
new file mode 100644
index 0000000..49418b9
--- /dev/null
+++ b/java/PaimonCDCSink/README.md
@@ -0,0 +1,165 @@
+## Flink Apache Paimon Sink using DataStream API
+
+* Flink version: 1.20
+* Flink API: DataStream API
+* Language: Java (11)
+* Apache Paimon: 1.0.1
+* Flink connectors: Flink CDC-MySQL / PostgreSQL / MongoDB / Kafka
+
+This example demonstrates how to use Apache Paimon CDC ingestion components(MySQL / PostgreSQL / MongoDB / Kafka) to sink
+data to Amazon S3 with Apache Paimon table format. The Apache Paimon Hive Catalog can work with Glue Data Catalog.
+
+The project can run both on Amazon Managed Service for Apache Flink, and locally for development.
+
+### Prerequisites
+* A database source(MySQL, PostgreSQL, MongoDB) with binlog enabled or Kakfa / Amazon MSK source with Apache Paimon 
+  supported CDC format(Canal CDC, Debezium CDC, Maxwell CDC, OGG CDC, JSON, aws-dms-json ) data streamed in it.
+* If you want to use Apache Paimon Hive catalog with Glue Data Catalog, please install aws-glue-datacatalog-hive3-client 
+  jar file into your local maven repo(please refer this [github repo](https://github.com/awslabs/aws-glue-data-catalog-client-for-apache-hive-metastore) to install or
+  you can find this jar file in EMR Cluster and install it into your local maven repo) and copy your EMR cluster's `hive-site.xml` file into the project and repackage the project. 
+* An S3 bucket to write the Paimon table.
+
+
+#### IAM Permissions
+
+The application must have IAM permissions to:
+* Show and alter Glue Data Catalog databases, show and create Glue Data Catalog tables.
+  See [Glue Data Catalog permissions](https://docs.aws.amazon.com/athena/latest/ug/fine-grained-access-to-glue-resources.html).
+* Read and Write from the S3 bucket.
+
+
+### Runtime configuration
+
+When running on Amazon Managed Service for Apache Flink the runtime configuration is read from *Runtime Properties*.
+
+When running locally, the configuration is read from the [`resources/flink-application-properties-dev.json`](resources/flink-application-properties-dev.json) file located in the resources folder.
+
+This example parses runtime parameters according to the following rules and passes the parsed parameters to Apache Paimon Actions.
+
+- The Paimon CDC ingestion action name is parsed from the key named action in the 'ActionConf' parameter group.
+- Some global or common parameters can be placed in the 'ActionConf' parameter group. The parameter names should refer to the specific ingestion [action name](https://paimon.apache.org/docs/1.0/cdc-ingestion/overview/).
+- For parameters like 'table_conf' and 'catalog_conf' that are set in the format of Key=Value, the name of the parameter group can be customized, such as “TableConf” or “CatalogConf”. 
+For specific parameter names within the parameter group, they should follow the format “parameter group name@_parameter Key”, 
+such as “table_conf@_bucket”, and the parameter value should be the corresponding Value.
+
+
+Runtime parameters(Sample):
+
+| Group ID      | Key                                        | Description                                                                            | 
+|---------------|--------------------------------------------|----------------------------------------------------------------------------------------|
+| `ActionConf`  | `action`                                   | Name of Apache Paimon CDC ingestion, `kafka_sync_database`, `mysql_sync_database` etc. |
+| `ActionConf`  | `database`                                 | Target Paimon database name.                                                           |
+| `ActionConf`  | `primary_keys`                             | (Optional) The primary keys for Paimon table                                           |
+| `KafkaConf`   | `kafka_conf@_properties.bootstrap.servers` | Bootstrap servers of the Kafka Cluster.                                                |
+| `KafkaConf`   | `kafka_conf@_properties.auto.offset.reset` | Offset of the Kafka Consumer                                                           |
+| `KafkaConf`   | `kafka_conf@_properties.group.id`          | Consumer group Id                                                                      |
+| `CatalogConf` | `catalog_conf@_metastore.client.class`     | Paimon Hive Catalog metastore client class name                                        |
+| `CatalogConf` | `...`                                      | ...                                                                                    |
+| `TableConf`   | `table_conf@_bucket`                       | Bucket of Paimon table                                                                 |
+| `TableConf`   | `...`                                      | ...                                                                                    |
+
+All parameters are case-sensitive.
+
+### Samples
+**Create an MSF application**
+
+First, compile and package the application using Maven, then copy the packaged jar file to your s3.
+
+```shell
+mvn clean package -P KafkaCDC
+```
+
+Second, prepare an input json file to create a MSF application, you can add required information(like VPC, Subnets,Security.etc.) into this json file.
+
+**Notice:** Your service execution role should have appropriate permissions, like s3 bucket access and glue access if you want to use Glue Data Catalog as Paimon Hive Catalog.
+```json
+{
+  "ApplicationName": "kafka-cdc-paimon",
+  "ApplicationDescription": "Sink CDC from Kafka as Apache Paimon table",
+  "RuntimeEnvironment": "FLINK-1_20",
+  "ServiceExecutionRole": "Your service role arn",
+  "ApplicationConfiguration": {
+    "ApplicationCodeConfiguration": {
+      "CodeContent": {
+        "S3ContentLocation": {
+          "BucketARN": "Your bucket arn",
+          "FileKey": "Your jar file s3 key"
+        }
+      },
+      "CodeContentType": "ZIPFILE"
+    },
+    "EnvironmentProperties": {
+      "PropertyGroups": [
+        {
+          "PropertyGroupId": "ActionConf",
+          "PropertyMap": {
+            "action": "kafka_sync_database",
+            "database": "Your Paimon Database",
+            "warehouse": "Your paimon warehouse path"
+          }
+        },
+        {
+          "PropertyGroupId": "KafkaConf",
+          "PropertyMap": {
+            "kafka_conf@_properties.bootstrap.servers": "MSK bootstrap servers",
+            "kafka_conf@_properties.auto.offset.reset": "earliest",
+            "kafka_conf@_properties.group.id": "group id",
+            "kafka_conf@_topic": "Your cdc topic",
+            "kafka_conf@_value.format": "debezium-json"
+          }
+        },
+        {
+          "PropertyGroupId": "CatalogConf",
+          "PropertyMap": {
+            "catalog_conf@_hadoop.fs.s3.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem",
+            "catalog_conf@_hadoop.fs.s3.buffer.dir": "/var/tmp"
+          }
+        },
+        {
+          "PropertyGroupId": "TableConf",
+          "PropertyMap": {
+            "table_conf@_bucket": "4",
+            "table_conf@_metadata.iceberg.storage": "hive-catalog",
+            "table_conf@_metadata.iceberg.manifest-legacy-version": "true",
+            "table_conf@_metadata.iceberg.hive-client-class": "com.amazonaws.glue.catalog.metastore.AWSCatalogMetastoreClient",
+            "table_conf@_fs.s3.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem",
+            "table_conf@_fs.s3.buffer.dir": "/var/tmp",
+            "table_conf@_sink.parallelism": "4"
+          }
+        }
+      ]
+    }
+  },
+  "FlinkApplicationConfiguration": {
+    "ParallelismConfiguration": {
+      "AutoScalingEnabled": true,
+      "Parallelism": 4,
+      "ParallelismPerKPU": 1
+    }
+  },
+  "CloudWatchLoggingOptions": [
+    {
+      "LogStreamARN": "arn:aws:logs:us-west-2:YourAccountId:log-group:/aws/kinesis-analytics/kafka-cdc-paimon:log-stream:kinesis-analytics-log-stream"
+    }
+  ]
+}
+```
+
+Last, create an MSF application using AWS CLI.
+
+```shell
+aws kinesisanalyticsv2 create-application \
+--cli-input-json file://create-kafkacdc-paimon.json
+```
+
+### Running in IntelliJ
+
+You can run this example directly in IntelliJ, without any local Flink cluster or local Flink installation.
+
+See [Running examples locally](../running-examples-locally.md) for details.
+
+### Generating data
+
+You can use [Kinesis Data Generator](https://github.com/awslabs/amazon-kinesis-data-generator),
+also available in a [hosted version](https://awslabs.github.io/amazon-kinesis-data-generator/web/producer.html),
+to generate random data to Kinesis Data Stream and test the application.
\ No newline at end of file
diff --git a/java/PaimonCDCSink/pom.xml b/java/PaimonCDCSink/pom.xml
new file mode 100644
index 0000000..e4a4fdd
--- /dev/null
+++ b/java/PaimonCDCSink/pom.xml
@@ -0,0 +1,340 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>com.amazonaws</groupId>
+        <artifactId>amazon-msf-examples</artifactId>
+        <version>1.0</version>
+    </parent>
+
+    <artifactId>paimon-cdc-sink</artifactId>
+
+    <properties>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <buildDirectory>${project.basedir}/target</buildDirectory>
+        <jar.finalName>${project.name}-${project.version}</jar.finalName>
+        <target.java.version>11</target.java.version>
+        <maven.compiler.source>${target.java.version}</maven.compiler.source>
+        <maven.compiler.target>${target.java.version}</maven.compiler.target>
+        <flink.version>1.20.0</flink.version>
+        <flink.connector.version>5.0.0-1.20</flink.connector.version>
+        <kda.runtime.version>1.2.0</kda.runtime.version>
+        <log4j.version>2.23.1</log4j.version>
+        <jackson.version>2.16.2</jackson.version>
+        <paimon.version>1.0.1</paimon.version>
+        <flink.kafka.sql.version>3.4.0-1.20</flink.kafka.sql.version>
+        <flink.cdc.version>3.3.0</flink.cdc.version>
+        <mysql.driver.version>8.4.0</mysql.driver.version>
+        <hadoop.version>3.4.0</hadoop.version>
+        <aws.sdkv2.version>2.30.16</aws.sdkv2.version>
+    </properties>
+
+    <dependencyManagement>
+        <dependencies>
+            <dependency>
+                <groupId>com.amazonaws</groupId>
+                <artifactId>aws-java-sdk-bom</artifactId>
+                <!-- Get the latest SDK version from https://mvnrepository.com/artifact/com.amazonaws/aws-java-sdk-bom -->
+                <version>1.12.676</version>
+                <type>pom</type>
+                <scope>import</scope>
+            </dependency>
+        </dependencies>
+    </dependencyManagement>
+
+    <dependencies>
+        <!-- Amazon Managed Service for Apache Flink (formerly Amazon Kinesis Data Analytics) runtime-->
+        <dependency>
+            <groupId>com.amazonaws</groupId>
+            <artifactId>aws-kinesisanalytics-runtime</artifactId>
+            <version>${kda.runtime.version}</version>
+            <scope>provided</scope>
+        </dependency>
+
+        <!-- Apache Flink dependencies -->
+        <!-- These dependencies are provided, because they should not be packaged into the JAR file. -->
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-streaming-java</artifactId>
+            <version>${flink.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-runtime-web</artifactId>
+            <version>${flink.version}</version>
+            <scope>provided</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-runtime</artifactId>
+            <version>${flink.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-table-common</artifactId>
+            <version>${flink.version}</version>
+            <scope>provided</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-s3-fs-hadoop</artifactId>
+            <version>${flink.version}</version>
+        </dependency>
+        <!-- Add logging framework, to produce console output when running in the IDE. -->
+        <!-- These dependencies are excluded from the application JAR by default. -->
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-api</artifactId>
+            <version>${log4j.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-slf4j-impl</artifactId>
+            <version>${log4j.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-core</artifactId>
+            <version>${log4j.version}</version>
+            <scope>provided</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.paimon</groupId>
+            <artifactId>paimon-flink-action</artifactId>
+            <version>${paimon.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.paimon</groupId>
+            <artifactId>paimon-flink-cdc</artifactId>
+            <version>${paimon.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.paimon</groupId>
+            <artifactId>paimon-flink-1.20</artifactId>
+            <version>${paimon.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.paimon</groupId>
+            <artifactId>paimon-hive-connector-3.1</artifactId>
+            <version>${paimon.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.thrift</groupId>
+            <artifactId>libthrift</artifactId>
+            <version>0.21.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.thrift</groupId>
+            <artifactId>libfb303</artifactId>
+            <version>0.9.3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hive</groupId>
+            <artifactId>hive-exec</artifactId>
+            <version>3.1.3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-client</artifactId>
+            <version>${hadoop.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.apache.avro</groupId>
+                    <artifactId>avro</artifactId>
+                </exclusion>
+                <!-- exclude to prevent multiple of SLF4j binding conflict -->
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-reload4j</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <dependency>
+            <groupId>com.amazonaws.glue</groupId>
+            <artifactId>aws-glue-datacatalog-hive3-client</artifactId>
+            <version>4.2.0</version>
+        </dependency>
+
+        <dependency>
+            <groupId>software.amazon.awssdk</groupId>
+            <artifactId>glue</artifactId>
+            <version>${aws.sdkv2.version}</version>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>software.amazon.awssdk</groupId>
+            <artifactId>aws-core</artifactId>
+            <version>${aws.sdkv2.version}</version>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>software.amazon.awssdk</groupId>
+            <artifactId>sts</artifactId>
+            <version>${aws.sdkv2.version}</version>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>software.amazon.awssdk</groupId>
+            <artifactId>utils</artifactId>
+            <version>${aws.sdkv2.version}</version>
+        </dependency>
+
+    </dependencies>
+
+    <profiles>
+        <profile>
+            <id>KafkaCDC</id>
+            <dependencies>
+                <dependency>
+                    <groupId>org.apache.flink</groupId>
+                    <artifactId>flink-sql-connector-kafka</artifactId>
+                    <version>${flink.kafka.sql.version}</version>
+                </dependency>
+            </dependencies>
+            <properties>
+                <cdc.source>kafka</cdc.source>
+            </properties>
+        </profile>
+
+        <profile>
+            <id>MySQLCDC</id>
+            <dependencies>
+                <dependency>
+                    <groupId>org.apache.flink</groupId>
+                    <artifactId>flink-connector-mysql-cdc</artifactId>
+                    <version>${flink.cdc.version}</version>
+                </dependency>
+                <dependency>
+                    <groupId>com.mysql</groupId>
+                    <artifactId>mysql-connector-j</artifactId>
+                    <version>${mysql.driver.version}</version>
+                </dependency>
+            </dependencies>
+            <properties>
+                <cdc.source>mysql</cdc.source>
+            </properties>
+        </profile>
+        <profile>
+            <id>PostgresCDC</id>
+            <dependencies>
+                <dependency>
+                    <groupId>org.apache.flink</groupId>
+                    <artifactId>flink-connector-postgres-cdc</artifactId>
+                    <version>${flink.cdc.version}</version>
+                </dependency>
+            </dependencies>
+            <properties>
+                <cdc.source>postgre</cdc.source>
+            </properties>
+        </profile>
+        <profile>
+            <id>MongoDBCDC</id>
+            <dependencies>
+                <dependency>
+                    <groupId>org.apache.flink</groupId>
+                    <artifactId>flink-connector-mongodb-cdc</artifactId>
+                    <version>${flink.cdc.version}</version>
+                </dependency>
+            </dependencies>
+            <properties>
+                <cdc.source>mongo</cdc.source>
+            </properties>
+        </profile>
+
+    </profiles>
+
+    <build>
+        <directory>${buildDirectory}</directory>
+        <finalName>${cdc.source}-${jar.finalName}</finalName>
+
+        <plugins>
+            <!-- Java Compiler -->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>3.8.1</version>
+                <configuration>
+                    <source>${target.java.version}</source>
+                    <target>${target.java.version}</target>
+                    <release>${target.java.version}</release>
+                </configuration>
+            </plugin>
+
+            <!-- Shade plugin to build the fat-jar including all required dependencies -->
+            <!-- Change the value of <mainClass>...</mainClass> if your program entry point changes. -->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <version>3.2.1</version>
+                <executions>
+                    <!-- Run shade goal on package phase -->
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <artifactSet>
+                                <excludes>
+                                    <exclude>org.apache.flink:force-shading</exclude>
+                                    <exclude>com.google.code.findbugs:jsr305</exclude>
+                                    <exclude>org.slf4j:*</exclude>
+                                    <exclude>log4j:*</exclude>
+                                </excludes>
+                            </artifactSet>
+                            <filters>
+                                <filter>
+                                    <!-- Do not copy the signatures in the META-INF folder.
+                                    Otherwise, this might cause SecurityExceptions when using the JAR. -->
+                                    <artifact>*:*</artifact>
+                                    <excludes>
+                                        <exclude>META-INF/*.SF</exclude>
+                                        <exclude>META-INF/*.DSA</exclude>
+                                        <exclude>META-INF/*.RSA</exclude>
+                                        <exclude>META-INF/versions/17/**/*.class</exclude>
+                                        <exclude>META-INF/versions/19/**/*.class</exclude>
+                                        <exclude>META-INF/versions/15/**/*.class</exclude>
+                                    </excludes>
+                                </filter>
+                            </filters>
+
+                            <relocations>
+                                <relocation>
+                                    <pattern>org.apache.kafka.connect</pattern>
+                                    <shadedPattern>org.apache.flink.cdc.connectors.shaded.org.apache.kafka.connect</shadedPattern>
+                                </relocation>
+                                <relocation>
+                                    <pattern>org.apache.kafka</pattern>
+                                    <shadedPattern>org.apache.flink.kafka.shaded.org.apache.kafka</shadedPattern>
+                                </relocation>
+                            </relocations>
+
+                            <transformers>
+                                <transformer
+                                        implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+                                <transformer
+                                        implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+                                    <mainClass>com.amazonaws.services.msf.PaimonCDCSinkJob</mainClass>
+                                </transformer>
+                            </transformers>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
\ No newline at end of file
diff --git a/java/PaimonCDCSink/src/main/java/com/amazonaws/services/msf/PaimonCDCSinkJob.java b/java/PaimonCDCSink/src/main/java/com/amazonaws/services/msf/PaimonCDCSinkJob.java
new file mode 100644
index 0000000..c6ff043
--- /dev/null
+++ b/java/PaimonCDCSink/src/main/java/com/amazonaws/services/msf/PaimonCDCSinkJob.java
@@ -0,0 +1,107 @@
+package com.amazonaws.services.msf;
+
+import com.amazonaws.services.kinesisanalytics.runtime.KinesisAnalyticsRuntime;
+import org.apache.flink.streaming.api.environment.LocalStreamEnvironment;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.paimon.flink.action.Action;
+import org.apache.paimon.flink.action.ActionBase;
+import org.apache.paimon.flink.action.ActionFactory;
+
+import java.io.IOException;
+import java.util.*;
+
+public class PaimonCDCSinkJob {
+
+    private static final Logger LOGGER = LogManager.getLogger(PaimonCDCSinkJob.class);
+    private static final String LOCAL_APPLICATION_PROPERTIES_RESOURCE = "flink-application-properties-dev.json";
+    private static final String SEP_KEY = "@_";
+    private static final String ACTION_CONF_GROUP = "ActionConf";
+    private static final String ACTION_KEY = "action";
+    private static final String PARAM_KEY_PREFIX = "--";
+
+    public static void main(String[] args) throws Exception{
+
+        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
+
+        Map<String, Properties> confMap = loadApplicationProperties(env);
+        String[] actionArgs = configToActionParameters(confMap);
+        if (actionArgs.length < 1) {
+            LOGGER.error("No action specified");
+            System.exit(1);
+        }
+
+        LOGGER.info("actionArgs: {}", Arrays.toString(actionArgs));
+
+        Optional<Action> actionOpt = ActionFactory.createAction(actionArgs);
+
+        if (actionOpt.isPresent()) {
+            Action action = actionOpt.get();
+            if (action instanceof ActionBase) {
+                LOGGER.info("ActionBase: {}", action.getClass().getName());
+                ((ActionBase) action).withStreamExecutionEnvironment(env).run();
+            } else {
+                action.run();
+            }
+        } else {
+            LOGGER.info("No paimon flink action service found");
+            System.exit(1);
+        }
+    }
+
+    private static Map<String, Properties> loadApplicationProperties(StreamExecutionEnvironment env) throws IOException {
+        if (env instanceof LocalStreamEnvironment) {
+            LOGGER.debug("Loading application properties from '{}'", LOCAL_APPLICATION_PROPERTIES_RESOURCE);
+            return KinesisAnalyticsRuntime.getApplicationProperties(
+                    PaimonCDCSinkJob.class.getClassLoader()
+                            .getResource(LOCAL_APPLICATION_PROPERTIES_RESOURCE).getPath());
+        } else {
+            LOGGER.debug("Loading application properties from Amazon Managed Service for Apache Flink");
+            return KinesisAnalyticsRuntime.getApplicationProperties();
+        }
+    }
+
+    private static String[] configToActionParameters(Map<String, Properties> confMap) {
+
+        Properties actionProp = confMap.get(ACTION_CONF_GROUP);
+        if (actionProp == null) {
+            LOGGER.error("ActionConf not found in application properties");
+            System.exit(1);
+        }
+
+        String action = actionProp.getProperty(ACTION_KEY);
+        if (action == null || action.isEmpty()) {
+            LOGGER.error("Action not found in application properties");
+        }
+
+        actionProp.remove(ACTION_KEY);
+
+        List<String> params = new ArrayList<>();
+        params.add(action);
+
+        for (Map.Entry<String, Properties> confEntry : confMap.entrySet()) {
+            confEntry.getValue().forEach(
+                   (k, v) -> {
+                       String ks = k.toString();
+                       int idx = ks.indexOf(SEP_KEY);
+                       String paramKey;
+                       String paramVal;
+                       if (idx != -1) {
+                           paramKey = String.format("%s%s", PARAM_KEY_PREFIX , ks.substring(0, idx));
+                           paramVal = String.format("%s=%s", ks.substring(idx  + SEP_KEY.length()), v);
+
+                       } else {
+                           paramKey = String.format("%s%s", PARAM_KEY_PREFIX , ks);
+                           paramVal = v.toString();
+                       }
+                       params.add(paramKey);
+                       params.add(paramVal);
+                   }
+            );
+        }
+
+        return params.toArray(new String[0]);
+    }
+
+}
diff --git a/java/PaimonCDCSink/src/main/resources/flink-application-properties-dev.json b/java/PaimonCDCSink/src/main/resources/flink-application-properties-dev.json
new file mode 100644
index 0000000..cb0fa4d
--- /dev/null
+++ b/java/PaimonCDCSink/src/main/resources/flink-application-properties-dev.json
@@ -0,0 +1,39 @@
+[
+  {
+    "PropertyGroupId": "ActionConf",
+    "PropertyMap": {
+      "action": "kafka_sync_database",
+      "warehouse": "s3://bucket/data/prefix",
+      "database": "paimon_flink",
+      "primary_keys": "ID",
+      "table_prefix": "ods_"
+    }
+  },
+  {
+    "PropertyGroupId": "KafkaConf",
+    "PropertyMap": {
+      "kafka_conf@_properties.bootstrap.servers": "b-2.mycluster.bzvtby.c8.kafka.us-west-2.amazonaws.com:9092,b-1.mycluster.bzvtby.c8.kafka.us-west-2.amazonaws.com:9092",
+      "kafka_conf@_topic": "kafka_topic",
+      "kafka_conf@_properties.group.id": 1234546,
+      "kafka_conf@_properties.auto.offset.reset": "earliest"
+    }
+  },
+  {
+    "PropertyGroupId": "CatalogConf",
+    "PropertyMap": {
+      "catalog_conf@_metastore": "hive",
+      "catalog_conf@_hive-conf-dir": "/etc/hive/conf.dist",
+      "catalog_conf@_lock.enabled": "false",
+      "catalog_conf@_metastore.client.class": "com.amazonaws.glue.catalog.metastore.AWSCatalogMetastoreClient",
+      "catalog_conf@_warehouse": "s3://bucket/data/prefix"
+    }
+  },
+  {
+    "PropertyGroupId": "TableConf",
+    "PropertyMap": {
+      "table_conf@_bucket": "4",
+      "table_conf@_changelog-producer": "input",
+      "table_conf@_sink.parallelism": "4"
+    }
+  }
+]
\ No newline at end of file
diff --git a/java/PaimonCDCSink/src/main/resources/hive-site.xml b/java/PaimonCDCSink/src/main/resources/hive-site.xml
new file mode 100644
index 0000000..95bb612
--- /dev/null
+++ b/java/PaimonCDCSink/src/main/resources/hive-site.xml
@@ -0,0 +1,273 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more       -->
+<!-- contributor license agreements.  See the NOTICE file distributed with    -->
+<!-- this work for additional information regarding copyright ownership.      -->
+<!-- The ASF licenses this file to You under the Apache License, Version 2.0  -->
+<!-- (the "License"); you may not use this file except in compliance with     -->
+<!-- the License.  You may obtain a copy of the License at                    -->
+<!--                                                                          -->
+<!--     http://www.apache.org/licenses/LICENSE-2.0                           -->
+<!--                                                                          -->
+<!-- Unless required by applicable law or agreed to in writing, software      -->
+<!-- distributed under the License is distributed on an "AS IS" BASIS,        -->
+<!-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -->
+<!-- See the License for the specific language governing permissions and      -->
+<!-- limitations under the License.                                           -->
+
+<configuration>
+
+<!-- Hive Configuration can either be stored in this file or in the hadoop configuration files  -->
+<!-- that are implied by Hadoop setup variables.                                                -->
+<!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive    -->
+<!-- users do not have to edit hadoop configuration files (that may be managed as a centralized -->
+<!-- resource).                                                                                 -->
+
+<!-- Hive Execution Parameters -->
+
+<property>
+  <name>hbase.master</name>
+  <value></value>
+  <description>http://wiki.apache.org/hadoop/Hive/HBaseIntegration</description>
+</property>
+
+<property>
+  <name>hive.zookeeper.quorum</name>
+  <value>ip-xx-xx-xx-xx.us-west-2.compute.internal:2181</value>
+</property>
+
+<property>
+  <name>hive.llap.zk.sm.connectionString</name>
+  <value>ip-xx-xx-xx-xx.us-west-2.compute.internal:2181</value>
+</property>
+
+<property>
+  <name>hbase.zookeeper.quorum</name>
+  <value>ip-xx-xx-xx-xx.us-west-2.compute.internal</value>
+  <description>http://wiki.apache.org/hadoop/Hive/HBaseIntegration</description>
+</property>
+
+<property>
+  <name>hive.execution.engine</name>
+  <value>tez</value>
+</property>
+
+  <property>
+    <name>fs.defaultFS</name>
+    <value>hdfs://ip-xx-xx-xx-xx.us-west-2.compute.internal:8020</value>
+  </property>
+
+
+  <property>
+    <name>hive.metastore.uris</name>
+    <value>thrift://ip-xx-xx-xx-xx.us-west-2.compute.internal:9083</value>
+    <description>JDBC connect string for a JDBC metastore</description>
+  </property>
+
+  <property>
+    <name>javax.jdo.option.ConnectionURL</name>
+    <value>jdbc:mysql://ip-xx-xx-xx-xx.us-west-2.compute.internal:3306/hive?createDatabaseIfNotExist=true</value>
+    <description>username to use against metastore database</description>
+  </property>
+
+  <property>
+    <name>javax.jdo.option.ConnectionDriverName</name>
+    <value>org.mariadb.jdbc.Driver</value>
+    <description>username to use against metastore database</description>
+  </property>
+
+  <property>
+    <name>javax.jdo.option.ConnectionUserName</name>
+    <value>hive</value>
+    <description>username to use against metastore database</description>
+  </property>
+
+  <property>
+    <name>javax.jdo.option.ConnectionPassword</name>
+    <value>kWs5sQ8HnZaEC2kj</value>
+    <description>password to use against metastore database</description>
+  </property>
+
+<property>
+   <name>hive.server2.allow.user.substitution</name>
+   <value>true</value>
+</property>
+
+<property>
+   <name>hive.server2.enable.doAs</name>
+   <value>true</value>
+</property>
+
+<property>
+   <name>hive.server2.thrift.port</name>
+   <value>10000</value>
+</property>
+
+<property>
+   <name>hive.server2.thrift.http.port</name>
+   <value>10001</value>
+</property>
+
+
+
+<property>
+  <name>hive.optimize.ppd.input.formats</name>
+  <value>com.amazonaws.emr.s3select.hive.S3SelectableTextInputFormat</value>
+</property>
+
+<property>
+  <name>s3select.filter</name>
+  <value>false</value>
+</property>
+
+<property>
+    <name>hive.server2.in.place.progress</name>
+    <value>false</value>
+</property>
+
+<property>
+    <name>hive.llap.zk.registry.user</name>
+    <value>hadoop</value>
+</property>
+
+<property>
+    <name>hive.security.metastore.authorization.manager</name>
+    <value>org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider</value>
+</property>
+
+<property>
+    <name>hive.log.explain.output</name>
+    <value>false</value>
+</property>
+
+  <property>
+    <name>datanucleus.fixedDatastore</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>mapred.reduce.tasks</name>
+    <value>-1</value>
+  </property>
+
+  <property>
+    <name>mapred.max.split.size</name>
+    <value>256000000</value>
+  </property>
+
+  <property>
+    <name>hive.mapjoin.hybridgrace.hashtable</name>
+    <value>false</value>
+  </property>
+
+  <property>
+    <name>hive.merge.nway.joins</name>
+    <value>false</value>
+  </property>
+
+  <property>
+    <name>hive.metastore.connect.retries</name>
+    <value>15</value>
+  </property>
+
+  <property>
+    <name>hive.optimize.joinreducededuplication</name>
+    <value>false</value>
+  </property>
+
+  <property>
+    <name>hive.optimize.sort.dynamic.partition.threshold</name>
+    <value>1</value>
+  </property>
+
+  <property>
+    <name>hive.server2.materializedviews.registry.impl</name>
+    <value>DUMMY</value>
+  </property>
+
+  <property>
+    <name>hive.tez.auto.reducer.parallelism</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hive.vectorized.execution.mapjoin.minmax.enabled</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hive.optimize.dynamic.partition.hashjoin</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hive.compactor.initiator.on</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hive.blobstore.use.output-committer</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hive.llap.daemon.service.hosts</name>
+    <value>@llap0</value>
+  </property>
+
+  <property>
+    <name>hive.llap.execution.mode</name>
+    <value>only</value>
+  </property>
+
+  <property>
+    <name>hive.optimize.metadataonly</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hive.tez.bucket.pruning</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hive.exec.mode.local.auto</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>hive.exec.mode.local.auto.inputbytes.max</name>
+    <value>50000000</value>
+  </property>
+
+  <property>
+    <name>hive.query.reexecution.stats.persist.scope</name>
+    <value>hiveserver</value>
+  </property>
+
+  <property>
+    <name>hive.metastore.client.factory.class</name>
+    <value>com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory</value>
+  </property>
+
+  <property>
+    <name>hive.auto.convert.join.noconditionaltask.size</name>
+    <value>1073741824</value>
+  </property>
+
+  <property>
+    <name>hive.compactor.worker.threads</name>
+    <value>1</value>
+  </property>
+
+
+
+
+
+</configuration>
diff --git a/java/PaimonCDCSink/src/main/resources/log4j2.properties b/java/PaimonCDCSink/src/main/resources/log4j2.properties
new file mode 100644
index 0000000..3546643
--- /dev/null
+++ b/java/PaimonCDCSink/src/main/resources/log4j2.properties
@@ -0,0 +1,7 @@
+rootLogger.level = INFO
+rootLogger.appenderRef.console.ref = ConsoleAppender
+
+appender.console.name = ConsoleAppender
+appender.console.type = CONSOLE
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
diff --git a/java/pom.xml b/java/pom.xml
index 29e281b..91229c2 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -35,5 +35,6 @@
         <module>Serialization/CustomTypeInfo</module>
         <module>SideOutputs</module>
         <module>PrometheusSink</module>
+        <module>PaimonCDCSink</module>
     </modules>
 </project>
\ No newline at end of file