From d44e80eb24513d680863049ac18a5e5b8978f6c6 Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Wed, 11 Oct 2023 18:13:09 -0400 Subject: [PATCH] ADAP-865: Parameterize where clause, add option to supply list of relations (#758) * parameterize where clause, add option to supply list of relations * parameterize where clause, add option to supply list of relations * revert whitespace fix * revert whitespace fix * revert whitespace fix * fix missing macro keyword * point to the dev branch on core, revert before pushing to main * add new macro get_catalog_relations, update get_catalog to share common logic with get_catalog_relations * fixed reference in get_catalog_relations, added original dict version of relations temporarily for testing * remove dict version of relations based get_catalog, point to List[BaseRelation] version * point dev reqs back to main on core * fix typo in schemas argument * add feature flag to turn on relation filtering for get_catalog * update changelog to point to the PR instead of a broken url * Support changes to dbt-core capability system --------- Co-authored-by: Peter Allen Webb --- .../unreleased/Features-20230829-152412.yaml | 6 + dbt/adapters/snowflake/impl.py | 5 + dbt/include/snowflake/macros/catalog.sql | 185 +++++++++++------- 3 files changed, 129 insertions(+), 67 deletions(-) create mode 100644 .changes/unreleased/Features-20230829-152412.yaml diff --git a/.changes/unreleased/Features-20230829-152412.yaml b/.changes/unreleased/Features-20230829-152412.yaml new file mode 100644 index 000000000..364f755f3 --- /dev/null +++ b/.changes/unreleased/Features-20230829-152412.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Support limiting get_catalog by object name +time: 2023-08-29T15:24:12.649104-04:00 +custom: + Author: mikealfare + Issue: "758" diff --git a/dbt/adapters/snowflake/impl.py b/dbt/adapters/snowflake/impl.py index 671906242..3f8124a9a 100644 --- a/dbt/adapters/snowflake/impl.py +++ b/dbt/adapters/snowflake/impl.py @@ -5,6 +5,7 @@ from dbt.adapters.base.impl import AdapterConfig, ConstraintSupport # type: ignore from dbt.adapters.base.meta import available +from dbt.adapters.capability import CapabilityDict, CapabilitySupport, Support, Capability from dbt.adapters.sql import SQLAdapter # type: ignore from dbt.adapters.sql.impl import ( LIST_SCHEMAS_MACRO_NAME, @@ -49,6 +50,10 @@ class SnowflakeAdapter(SQLAdapter): ConstraintType.foreign_key: ConstraintSupport.NOT_ENFORCED, } + _capabilities = CapabilityDict( + {Capability.SchemaMetadataByRelations: CapabilitySupport(support=Support.Full)} + ) + @classmethod def date_function(cls): return "CURRENT_TIMESTAMP()" diff --git a/dbt/include/snowflake/macros/catalog.sql b/dbt/include/snowflake/macros/catalog.sql index cf96df25d..eaa7582e9 100644 --- a/dbt/include/snowflake/macros/catalog.sql +++ b/dbt/include/snowflake/macros/catalog.sql @@ -1,72 +1,123 @@ {% macro snowflake__get_catalog(information_schema, schemas) -%} - {% set query %} - with tables as ( - - select - table_catalog as "table_database", - table_schema as "table_schema", - table_name as "table_name", - table_type as "table_type", - comment as "table_comment", - - -- note: this is the _role_ that owns the table - table_owner as "table_owner", - - 'Clustering Key' as "stats:clustering_key:label", - clustering_key as "stats:clustering_key:value", - 'The key used to cluster this table' as "stats:clustering_key:description", - (clustering_key is not null) as "stats:clustering_key:include", - - 'Row Count' as "stats:row_count:label", - row_count as "stats:row_count:value", - 'An approximate count of rows in this table' as "stats:row_count:description", - (row_count is not null) as "stats:row_count:include", - - 'Approximate Size' as "stats:bytes:label", - bytes as "stats:bytes:value", - 'Approximate size of the table as reported by Snowflake' as "stats:bytes:description", - (bytes is not null) as "stats:bytes:include", - - 'Last Modified' as "stats:last_modified:label", - to_varchar(convert_timezone('UTC', last_altered), 'yyyy-mm-dd HH24:MI'||'UTC') as "stats:last_modified:value", - 'The timestamp for last update/change' as "stats:last_modified:description", - (last_altered is not null and table_type='BASE TABLE') as "stats:last_modified:include" - - from {{ information_schema }}.tables - where ( - {%- for schema in schemas -%} - upper("table_schema") = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%} - {%- endfor -%} - ) - - ), - - columns as ( - - select - table_catalog as "table_database", - table_schema as "table_schema", - table_name as "table_name", - - column_name as "column_name", - ordinal_position as "column_index", - data_type as "column_type", - comment as "column_comment" - - from {{ information_schema }}.columns - where ( - {%- for schema in schemas -%} - upper("table_schema") = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%} - {%- endfor -%} - ) - ) - - select * - from tables - join columns using ("table_database", "table_schema", "table_name") - order by "column_index" + + {% set query %} + with tables as ( + {{ snowflake__get_catalog_tables_sql(information_schema) }} + {{ snowflake__get_catalog_schemas_where_clause_sql(schemas) }} + ), + columns as ( + {{ snowflake__get_catalog_columns_sql(information_schema) }} + {{ snowflake__get_catalog_schemas_where_clause_sql(schemas) }} + ) + {{ snowflake__get_catalog_results_sql() }} + {%- endset -%} + + {{ return(run_query(query)) }} + +{%- endmacro %} + + +{% macro snowflake__get_catalog_relations(information_schema, relations) -%} + + {% set query %} + with tables as ( + {{ snowflake__get_catalog_tables_sql(information_schema) }} + {{ snowflake__get_catalog_relations_where_clause_sql(relations) }} + ), + columns as ( + {{ snowflake__get_catalog_columns_sql(information_schema) }} + {{ snowflake__get_catalog_relations_where_clause_sql(relations) }} + ) + {{ snowflake__get_catalog_results_sql() }} {%- endset -%} - {{ return(run_query(query)) }} + {{ return(run_query(query)) }} + +{%- endmacro %} + + +{% macro snowflake__get_catalog_tables_sql(information_schema) -%} + select + table_catalog as "table_database", + table_schema as "table_schema", + table_name as "table_name", + table_type as "table_type", + comment as "table_comment", + + -- note: this is the _role_ that owns the table + table_owner as "table_owner", + + 'Clustering Key' as "stats:clustering_key:label", + clustering_key as "stats:clustering_key:value", + 'The key used to cluster this table' as "stats:clustering_key:description", + (clustering_key is not null) as "stats:clustering_key:include", + + 'Row Count' as "stats:row_count:label", + row_count as "stats:row_count:value", + 'An approximate count of rows in this table' as "stats:row_count:description", + (row_count is not null) as "stats:row_count:include", + + 'Approximate Size' as "stats:bytes:label", + bytes as "stats:bytes:value", + 'Approximate size of the table as reported by Snowflake' as "stats:bytes:description", + (bytes is not null) as "stats:bytes:include", + + 'Last Modified' as "stats:last_modified:label", + to_varchar(convert_timezone('UTC', last_altered), 'yyyy-mm-dd HH24:MI'||'UTC') as "stats:last_modified:value", + 'The timestamp for last update/change' as "stats:last_modified:description", + (last_altered is not null and table_type='BASE TABLE') as "stats:last_modified:include" + from {{ information_schema }}.tables +{%- endmacro %} + + +{% macro snowflake__get_catalog_columns_sql(information_schema) -%} + select + table_catalog as "table_database", + table_schema as "table_schema", + table_name as "table_name", + + column_name as "column_name", + ordinal_position as "column_index", + data_type as "column_type", + comment as "column_comment" + from {{ information_schema }}.columns +{%- endmacro %} + + +{% macro snowflake__get_catalog_results_sql() -%} + select * + from tables + join columns using ("table_database", "table_schema", "table_name") + order by "column_index" +{%- endmacro %} + + +{% macro snowflake__get_catalog_schemas_where_clause_sql(schemas) -%} + where ({%- for schema in schemas -%} + upper("table_schema") = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%} + {%- endfor -%}) +{%- endmacro %} + + +{% macro snowflake__get_catalog_relations_where_clause_sql(relations) -%} + where ( + {%- for relation in relations -%} + {% if relation.schema and relation.identifier %} + ( + upper("table_schema") = upper('{{ relation.schema }}') + and upper("table_name") = upper('{{ relation.identifier }}') + ) + {% elif relation.schema %} + ( + upper("table_schema") = upper('{{ relation.schema }}') + ) + {% else %} + {% do exceptions.raise_compiler_error( + '`get_catalog_relations` requires a list of relations, each with a schema' + ) %} + {% endif %} + {%- if not loop.last %} or {% endif -%} + {%- endfor -%} + ) {%- endmacro %}