Skip to content

Commit

Permalink
Create new coral-dbt dbt package with trino_to_spark POC (#359)
Browse files Browse the repository at this point in the history
* Create new coral-dbt dbt package with trino_to_spark POC

* Add gradle setup and linter
  • Loading branch information
AliceYeh12 authored Mar 3, 2023
1 parent 1030924 commit 37f827e
Show file tree
Hide file tree
Showing 9 changed files with 129 additions and 0 deletions.
6 changes: 6 additions & 0 deletions coral-dbt/.sqlfluff
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[sqlfluff]
templater = jinja
sql_file_exts = .sql

[sqlfluff:templater:jinja]
apply_dbt_builtins = true
1 change: 1 addition & 0 deletions coral-dbt/.sqlfluffignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
src/main/resources/macros/coral_macros/**/materializations
34 changes: 34 additions & 0 deletions coral-dbt/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Using coral-dbt Materialization Modes

Note: This project is currently in development and is not intended to be a production ready package. Currently, only Spark adapter is supported.

## Setup
1. The materialization mode in this package requires a minor modification to the dbt-core source. Clone and setup a local dbt-core source and create/activate a Python virtual environment.
2. Make the following modifications to `core/dbt/context/base.py` in your local dbt-core source:
```
import requests
def get_requests_module_context() -> Dict[str, Any]:
context_exports = ["get", "post"]
return {name: getattr(requests, name) for name in context_exports}
def get_context_modules() -> Dict[str, Dict[str, Any]]:
return {
"pytz": get_pytz_module_context(),
"datetime": get_datetime_module_context(),
"re": get_re_module_context(),
"itertools": get_itertools_module_context(),
"requests": get_requests_module_context(),
}
```
3. Add this package to your dbt project by creating or modifying `packages.yml` and run `dbt deps` to install the package.

```
packages:
- git: "https://github.com/linkedin/coral.git"
revision: master
subdirectory: coral-dbt/src/main/resources
```

4. Follow the instructions in the main project README to start up Coral Service.
8 changes: 8 additions & 0 deletions coral-dbt/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apply plugin: "com.diffplug.spotless"

spotless {
sql {
target 'src/main/resources/macros/coral_macros/**/**/*.sql'
licenseHeaderFile(rootProject.file("gradle/license/LICENSE_HEADER"), "\\{% ")
}
}
6 changes: 6 additions & 0 deletions coral-dbt/src/main/resources/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: 'coral_dbt'

config-version: 2
version: '0.1.0'

macro-paths: ["macros"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/**
* Copyright 2023 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
{% macro trino_to_spark(sql) %}
{{ return(adapter.dispatch('trino_to_spark', macro_namespace = 'coral_dbt')(sql)) }}
{% endmacro %}


{% macro default__trino_to_spark(sql) -%}
{{ exceptions.raise_compiler_error("macro trino_to_spark not implemented for adapters other than Spark") }}
{%- endmacro %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/**
* Copyright 2023 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
{% materialization trino_table, adapter = 'spark' %}

{%- set identifier = model['alias'] -%}
{%- set grant_config = config.get('grants') -%}

{%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}
{%- set target_relation = api.Relation.create(identifier=identifier,
schema=schema,
database=database,
type='table') -%}

{{ run_hooks(pre_hooks) }}

-- setup: if the target relation already exists, drop it
-- in case if the existing and future table is delta, we want to do a
-- create or replace table instead of dropping, so we don't have the table unavailable
{% if old_relation and not (old_relation.is_delta and config.get('file_format', validator=validation.any[basestring]) == 'delta') -%}
{{ adapter.drop_relation(old_relation) }}
{%- endif %}

-- build model
{% call statement('main') -%}
{%- set modified_sql = coral_dbt.trino_to_spark(sql) -%}
{{ create_table_as(False, target_relation, modified_sql) }}
{%- endcall %}

{% set should_revoke = should_revoke(old_relation, full_refresh_mode=True) %}
{% do apply_grants(target_relation, grant_config, should_revoke) %}

{% do persist_docs(target_relation, model) %}

{{ run_hooks(post_hooks) }}

{{ return({'relations': [target_relation]})}}

{% endmaterialization %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/**
* Copyright 2023 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
{% macro spark__trino_to_spark(sql) -%}

{% set requests = modules.requests %}
{% set url = 'http://localhost:8080/api/translations/translate' %}
{% set request_data = {
"fromLanguage": "trino",
"toLanguage": "spark",
"query": sql
} %}
{% set response = requests.post(url, json=request_data) %}
{% set spark_sql = response.text.split(':')[2] %}
{{ return(spark_sql) }}

{%- endmacro %}
1 change: 1 addition & 0 deletions settings.gradle
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
def modules = [
'coral-common',
'coral-dbt',
'coral-hive',
'coral-pig',
'coral-spark',
Expand Down

0 comments on commit 37f827e

Please sign in to comment.