diff --git a/.changes/unreleased/Features-20240709-194316.yaml b/.changes/unreleased/Features-20240709-194316.yaml new file mode 100644 index 000000000..a867387e3 --- /dev/null +++ b/.changes/unreleased/Features-20240709-194316.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Improve run times for large projects by reusing connections by default +time: 2024-07-09T19:43:16.489649-04:00 +custom: + Author: mikealfare amardatar + Issue: "1082" diff --git a/dbt/adapters/snowflake/connections.py b/dbt/adapters/snowflake/connections.py index a9e83cd2e..6b325ab9c 100644 --- a/dbt/adapters/snowflake/connections.py +++ b/dbt/adapters/snowflake/connections.py @@ -113,6 +113,7 @@ class SnowflakeCredentials(Credentials): retry_on_database_errors: bool = False retry_all: bool = False insecure_mode: Optional[bool] = False + # this needs to default to `None` so that we can tell if the user set it; see `__post_init__()` reuse_connections: Optional[bool] = None def __post_init__(self): @@ -143,6 +144,11 @@ def __post_init__(self): self.account = self.account.replace("_", "-") + # only default `reuse_connections` to `True` if the user has not turned on `client_session_keep_alive` + # having both of these set to `True` could lead to hanging open connections, so it should be opt-in behavior + if self.client_session_keep_alive is False and self.reuse_connections is None: + self.reuse_connections = True + @property def type(self): return "snowflake" diff --git a/tests/performance/README.md b/tests/performance/README.md new file mode 100644 index 000000000..02130c5c6 --- /dev/null +++ b/tests/performance/README.md @@ -0,0 +1,6 @@ +# Performance testing + +These tests are not meant to run on a regular basis; instead, they are tools for measuring performance impacts of changes as needed. +We often get requests for reducing processing times, researching why a particular component is taking longer to run than expected, etc. +In the past we have performed one-off analyses to address these requests and documented the results in the relevant PR (when a change is made). +It is more useful to document those analyses in the form of performance tests so that we can easily rerun the analysis at a later date. diff --git a/tests/performance/test_auth_methods.py b/tests/performance/test_auth_methods.py new file mode 100644 index 000000000..ad0b424ab --- /dev/null +++ b/tests/performance/test_auth_methods.py @@ -0,0 +1,132 @@ +""" +Results: + +| method | project_size | reuse_connections | unsafe_skip_rsa_key_validation | duration | +|---------------|--------------|-------------------|--------------------------------|----------| +| User Password | 1,000 | False | - | 234.09s | +| User Password | 1,000 | True | - | 78.34s | +| Key Pair | 1,000 | False | False | 271.47s | +| Key Pair | 1,000 | False | True | 275.73s | +| Key Pair | 1,000 | True | False | 63.69s | +| Key Pair | 1,000 | True | True | 73.45s | + +Notes: +- run locally on MacOS, single threaded +- `unsafe_skip_rsa_key_validation` only applies to the Key Pair auth method +- `unsafe_skip_rsa_key_validation=True` was tested by updating the relevant `cryptography` calls directly as it is not a user configuration +- since the models are all views, time differences should be viewed as absolute differences, e.g.: + - this: (271.47s - 63.69s) / 1,000 models = 208ms improvement + - NOT this: 1 - (63.69s / 271.47s) = 76.7% improvement +""" + +from datetime import datetime +import os + +from dbt.tests.util import run_dbt +import pytest + + +SEED = """ +id,value +1,a +2,b +3,c +""".strip() + + +MODEL = """ +select * from {{ ref("my_seed") }} +""" + + +class Scenario: + """ + Runs a full load test. The test can be configured to run an arbitrary number of models. + + To use this test, configure the test by setting `project_size` and/or `expected_duration`. + """ + + auth_method: str + project_size: int = 1 + reuse_connections: bool = False + + @pytest.fixture(scope="class") + def seeds(self): + return {"my_seed.csv": SEED} + + @pytest.fixture(scope="class") + def models(self): + return {f"my_model_{i}.sql": MODEL for i in range(self.project_size)} + + @pytest.fixture(scope="class", autouse=True) + def setup(self, project): + run_dbt(["seed"]) + + start = datetime.now() + yield + end = datetime.now() + + duration = (end - start).total_seconds() + print(f"Run took: {duration} seconds") + + @pytest.fixture(scope="class") + def dbt_profile_target(self, auth_params): + yield { + "type": "snowflake", + "threads": 4, + "account": os.getenv("SNOWFLAKE_TEST_ACCOUNT"), + "database": os.getenv("SNOWFLAKE_TEST_DATABASE"), + "warehouse": os.getenv("SNOWFLAKE_TEST_WAREHOUSE"), + "user": os.getenv("SNOWFLAKE_TEST_USER"), + "reuse_connections": self.reuse_connections, + **auth_params, + } + + @pytest.fixture(scope="class") + def auth_params(self): + + if self.auth_method == "user_password": + yield {"password": os.getenv("SNOWFLAKE_TEST_PASSWORD")} + + elif self.auth_method == "key_pair": + """ + This connection method uses key pair auth. + Follow the instructions here to setup key pair authentication for your test user: + https://docs.snowflake.com/en/user-guide/key-pair-auth + """ + yield { + "private_key": os.getenv("SNOWFLAKE_TEST_PRIVATE_KEY"), + "private_key_passphrase": os.getenv("SNOWFLAKE_TEST_PRIVATE_KEY_PASSPHRASE"), + } + + else: + raise ValueError( + f"`auth_method` must be one of `user_password` or `key_pair`, received: {self.auth_method}" + ) + + def test_scenario(self, project): + run_dbt(["run"]) + + +class TestUserPasswordAuth(Scenario): + auth_method = "user_password" + project_size = 1_000 + reuse_connections = False + + +class TestUserPasswordAuthReuseConnections(Scenario): + auth_method = "user_password" + project_size = 1_000 + reuse_connections = True + + +class TestKeyPairAuth(Scenario): + auth_method = "key_pair" + project_size = 1_000 + reuse_connections = False + + +class TestKeyPairAuthReuseConnections(Scenario): + auth_method = "key_pair" + project_size = 1_000 + reuse_connections = True diff --git a/tests/unit/test_snowflake_adapter.py b/tests/unit/test_snowflake_adapter.py index f6a768da8..32e73eb45 100644 --- a/tests/unit/test_snowflake_adapter.py +++ b/tests/unit/test_snowflake_adapter.py @@ -290,13 +290,19 @@ def test_client_session_keep_alive_false_by_default(self): application="dbt", insecure_mode=False, session_parameters={}, - reuse_connections=None, + reuse_connections=True, ), ] ) def test_client_session_keep_alive_true(self): - self.config.credentials = self.config.credentials.replace(client_session_keep_alive=True) + self.config.credentials = self.config.credentials.replace( + client_session_keep_alive=True, + # this gets defaulted via `__post_init__` when `client_session_keep_alive` comes in as `False` + # then when `replace` is called, `__post_init__` cannot set it back to `None` since it cannot + # tell the difference between set by user and set by `__post_init__` + reuse_connections=None, + ) self.adapter = SnowflakeAdapter(self.config, get_context("spawn")) conn = self.adapter.connections.set_connection_name(name="new_connection_with_new_config") @@ -339,7 +345,7 @@ def test_client_has_query_tag(self): role=None, schema="public", user="test_user", - reuse_connections=None, + reuse_connections=True, warehouse="test_warehouse", private_key=None, application="dbt", @@ -379,7 +385,7 @@ def test_user_pass_authentication(self): application="dbt", insecure_mode=False, session_parameters={}, - reuse_connections=None, + reuse_connections=True, ) ] ) @@ -413,7 +419,7 @@ def test_authenticator_user_pass_authentication(self): client_store_temporary_credential=True, insecure_mode=False, session_parameters={}, - reuse_connections=None, + reuse_connections=True, ) ] ) @@ -443,7 +449,7 @@ def test_authenticator_externalbrowser_authentication(self): client_store_temporary_credential=True, insecure_mode=False, session_parameters={}, - reuse_connections=None, + reuse_connections=True, ) ] ) @@ -477,7 +483,7 @@ def test_authenticator_oauth_authentication(self): client_store_temporary_credential=True, insecure_mode=False, session_parameters={}, - reuse_connections=None, + reuse_connections=True, ) ] ) @@ -511,7 +517,7 @@ def test_authenticator_private_key_authentication(self, mock_get_private_key): application="dbt", insecure_mode=False, session_parameters={}, - reuse_connections=None, + reuse_connections=True, ) ] ) @@ -545,7 +551,7 @@ def test_authenticator_private_key_authentication_no_passphrase(self, mock_get_p application="dbt", insecure_mode=False, session_parameters={}, - reuse_connections=None, + reuse_connections=True, ) ] ) @@ -577,7 +583,7 @@ def test_authenticator_jwt_authentication(self): client_store_temporary_credential=True, insecure_mode=False, session_parameters={}, - reuse_connections=None, + reuse_connections=True, ) ] ) @@ -607,7 +613,7 @@ def test_query_tag(self): application="dbt", insecure_mode=False, session_parameters={"QUERY_TAG": "test_query_tag"}, - reuse_connections=None, + reuse_connections=True, ) ] ) @@ -670,7 +676,7 @@ def test_authenticator_private_key_string_authentication(self, mock_get_private_ application="dbt", insecure_mode=False, session_parameters={}, - reuse_connections=None, + reuse_connections=True, ) ] ) @@ -706,7 +712,7 @@ def test_authenticator_private_key_string_authentication_no_passphrase( application="dbt", insecure_mode=False, session_parameters={}, - reuse_connections=None, + reuse_connections=True, ) ] )