From 0f264becd39988c106ed1529e600fd31298ddb46 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Tue, 9 Jul 2024 19:40:53 -0400 Subject: [PATCH 01/10] add performance test for auth methods --- tests/performance/README.md | 6 ++ tests/performance/test_auth_methods.py | 128 +++++++++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 tests/performance/README.md create mode 100644 tests/performance/test_auth_methods.py diff --git a/tests/performance/README.md b/tests/performance/README.md new file mode 100644 index 000000000..02130c5c6 --- /dev/null +++ b/tests/performance/README.md @@ -0,0 +1,6 @@ +# Performance testing + +These tests are not meant to run on a regular basis; instead, they are tools for measuring performance impacts of changes as needed. +We often get requests for reducing processing times, researching why a particular component is taking longer to run than expected, etc. +In the past we have performed one-off analyses to address these requests and documented the results in the relevant PR (when a change is made). +It is more useful to document those analyses in the form of performance tests so that we can easily rerun the analysis at a later date. diff --git a/tests/performance/test_auth_methods.py b/tests/performance/test_auth_methods.py new file mode 100644 index 000000000..a92081ac1 --- /dev/null +++ b/tests/performance/test_auth_methods.py @@ -0,0 +1,128 @@ +""" +Results: + +| method | project_size | reuse_connections | unsafe_skip_rsa_key_validation | duration | +|---------------|--------------|-------------------|--------------------------------|----------| +| User Password | 1,000 | False | - | 234.09s | +| User Password | 1,000 | True | - | 78.34s | +| Key Pair | 1,000 | False | False | 331.75s | +| Key Pair | 1,000 | False | True | 278.65s | +| Key Pair | 1,000 | True | False | 75.42s | +| Key Pair | 1,000 | True | True | 73.14s | + +Notes: +- `unsafe_skip_rsa_key_validation` only applies to the Key Pair auth method +- `unsafe_skip_rsa_key_validation=True` was tested by updating the relevant `cryptography` calls directly as it is not a user configuration +""" + +from datetime import datetime +import os + +from dbt.tests.util import run_dbt +import pytest + + +SEED = """ +id,value +1,a +2,b +3,c +""".strip() + + +MODEL = """ +select * from {{ ref("my_seed") }} +""" + + +class Scenario: + """ + Runs a full load test. The test can be configured to run an arbitrary number of models. + + To use this test, configure the test by setting `project_size` and/or `expected_duration`. + """ + + auth_method: str + project_size: int = 1 + reuse_connections: bool = False + + @pytest.fixture(scope="class") + def seeds(self): + return {"my_seed.csv": SEED} + + @pytest.fixture(scope="class") + def models(self): + return {f"my_model_{i}.sql": MODEL for i in range(self.project_size)} + + @pytest.fixture(scope="class", autouse=True) + def setup(self, project): + run_dbt(["seed"]) + + start = datetime.now() + yield + end = datetime.now() + + duration = (end - start).total_seconds() + print(f"Run took: {duration} seconds") + + @pytest.fixture(scope="class") + def dbt_profile_target(self, auth_params): + yield { + "type": "snowflake", + "threads": 4, + "account": os.getenv("SNOWFLAKE_TEST_ACCOUNT"), + "database": os.getenv("SNOWFLAKE_TEST_DATABASE"), + "warehouse": os.getenv("SNOWFLAKE_TEST_WAREHOUSE"), + "user": os.getenv("SNOWFLAKE_TEST_USER"), + "reuse_connections": self.reuse_connections, + **auth_params, + } + + @pytest.fixture(scope="class") + def auth_params(self): + + if self.auth_method == "user_password": + yield {"password": os.getenv("SNOWFLAKE_TEST_PASSWORD")} + + elif self.auth_method == "key_pair": + """ + This connection method uses key pair auth. + Follow the instructions here to setup key pair authentication for your test user: + https://docs.snowflake.com/en/user-guide/key-pair-auth + """ + yield { + "private_key": os.getenv("SNOWFLAKE_PRIVATE_KEY"), + "private_key_passphrase": os.getenv("SNOWFLAKE_PRIVATE_KEY_PASSPHRASE"), + } + + else: + raise ValueError( + f"`auth_method` must be one of `user_password` or `key_pair`, received: {self.auth_method}" + ) + + def test_scenario(self, project): + run_dbt(["run"]) + + +class TestUserPasswordAuth(Scenario): + auth_method = "user_password" + project_size = 1_000 + reuse_connections = False + + +class TestUserPasswordAuthReuseConnections(Scenario): + auth_method = "user_password" + project_size = 1_000 + reuse_connections = True + + +class TestKeyPairAuth(Scenario): + auth_method = "key_pair" + project_size = 1_000 + reuse_connections = False + + +class TestKeyPairAuthReuseConnections(Scenario): + auth_method = "key_pair" + project_size = 1_000 + reuse_connections = True From 5044440ba0b44982e907cc612e735c128d0a7f78 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Tue, 9 Jul 2024 19:41:13 -0400 Subject: [PATCH 02/10] update default setting for reuse_connections to True --- dbt/adapters/snowflake/connections.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/adapters/snowflake/connections.py b/dbt/adapters/snowflake/connections.py index 6e9a5aaba..175b5c2f6 100644 --- a/dbt/adapters/snowflake/connections.py +++ b/dbt/adapters/snowflake/connections.py @@ -94,7 +94,7 @@ class SnowflakeCredentials(Credentials): retry_on_database_errors: bool = False retry_all: bool = False insecure_mode: Optional[bool] = False - reuse_connections: Optional[bool] = None + reuse_connections: Optional[bool] = True def __post_init__(self): if self.authenticator != "oauth" and (self.oauth_client_secret or self.oauth_client_id): From 208c97050c875c685893567b30eb58a0c66cd47c Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Tue, 9 Jul 2024 19:43:28 -0400 Subject: [PATCH 03/10] changelog --- .changes/unreleased/Features-20240709-194316.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Features-20240709-194316.yaml diff --git a/.changes/unreleased/Features-20240709-194316.yaml b/.changes/unreleased/Features-20240709-194316.yaml new file mode 100644 index 000000000..a867387e3 --- /dev/null +++ b/.changes/unreleased/Features-20240709-194316.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Improve run times for large projects by reusing connections by default +time: 2024-07-09T19:43:16.489649-04:00 +custom: + Author: mikealfare amardatar + Issue: "1082" From 72594869447b66c7a9a8405979967d9e82fd4ef7 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Tue, 9 Jul 2024 19:56:14 -0400 Subject: [PATCH 04/10] update unit tests to reflect the new default value for reuse_connections --- tests/unit/test_snowflake_adapter.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/unit/test_snowflake_adapter.py b/tests/unit/test_snowflake_adapter.py index f6a768da8..2801b3b50 100644 --- a/tests/unit/test_snowflake_adapter.py +++ b/tests/unit/test_snowflake_adapter.py @@ -290,7 +290,7 @@ def test_client_session_keep_alive_false_by_default(self): application="dbt", insecure_mode=False, session_parameters={}, - reuse_connections=None, + reuse_connections=True, ), ] ) @@ -317,7 +317,7 @@ def test_client_session_keep_alive_true(self): application="dbt", insecure_mode=False, session_parameters={}, - reuse_connections=None, + reuse_connections=True, ) ] ) @@ -339,7 +339,7 @@ def test_client_has_query_tag(self): role=None, schema="public", user="test_user", - reuse_connections=None, + reuse_connections=True, warehouse="test_warehouse", private_key=None, application="dbt", @@ -379,7 +379,7 @@ def test_user_pass_authentication(self): application="dbt", insecure_mode=False, session_parameters={}, - reuse_connections=None, + reuse_connections=True, ) ] ) @@ -413,7 +413,7 @@ def test_authenticator_user_pass_authentication(self): client_store_temporary_credential=True, insecure_mode=False, session_parameters={}, - reuse_connections=None, + reuse_connections=True, ) ] ) @@ -443,7 +443,7 @@ def test_authenticator_externalbrowser_authentication(self): client_store_temporary_credential=True, insecure_mode=False, session_parameters={}, - reuse_connections=None, + reuse_connections=True, ) ] ) @@ -477,7 +477,7 @@ def test_authenticator_oauth_authentication(self): client_store_temporary_credential=True, insecure_mode=False, session_parameters={}, - reuse_connections=None, + reuse_connections=True, ) ] ) @@ -511,7 +511,7 @@ def test_authenticator_private_key_authentication(self, mock_get_private_key): application="dbt", insecure_mode=False, session_parameters={}, - reuse_connections=None, + reuse_connections=True, ) ] ) @@ -545,7 +545,7 @@ def test_authenticator_private_key_authentication_no_passphrase(self, mock_get_p application="dbt", insecure_mode=False, session_parameters={}, - reuse_connections=None, + reuse_connections=True, ) ] ) @@ -577,7 +577,7 @@ def test_authenticator_jwt_authentication(self): client_store_temporary_credential=True, insecure_mode=False, session_parameters={}, - reuse_connections=None, + reuse_connections=True, ) ] ) @@ -607,7 +607,7 @@ def test_query_tag(self): application="dbt", insecure_mode=False, session_parameters={"QUERY_TAG": "test_query_tag"}, - reuse_connections=None, + reuse_connections=True, ) ] ) @@ -670,7 +670,7 @@ def test_authenticator_private_key_string_authentication(self, mock_get_private_ application="dbt", insecure_mode=False, session_parameters={}, - reuse_connections=None, + reuse_connections=True, ) ] ) @@ -706,7 +706,7 @@ def test_authenticator_private_key_string_authentication_no_passphrase( application="dbt", insecure_mode=False, session_parameters={}, - reuse_connections=None, + reuse_connections=True, ) ] ) From 20c0ca9a7ba1aa31a044deb6f71b4d4c093bb88c Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Thu, 11 Jul 2024 11:21:39 -0400 Subject: [PATCH 05/10] update the full default on reuse_connections to a conditional default based on client_session_keep_alive --- dbt/adapters/snowflake/connections.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dbt/adapters/snowflake/connections.py b/dbt/adapters/snowflake/connections.py index 175b5c2f6..b482b8f4c 100644 --- a/dbt/adapters/snowflake/connections.py +++ b/dbt/adapters/snowflake/connections.py @@ -94,7 +94,8 @@ class SnowflakeCredentials(Credentials): retry_on_database_errors: bool = False retry_all: bool = False insecure_mode: Optional[bool] = False - reuse_connections: Optional[bool] = True + # this needs to default to `None` so that we can tell if the user set it; see `__post_init__()` + reuse_connections: Optional[bool] = None def __post_init__(self): if self.authenticator != "oauth" and (self.oauth_client_secret or self.oauth_client_id): @@ -124,6 +125,11 @@ def __post_init__(self): self.account = self.account.replace("_", "-") + # only default `reuse_connections` to `True` if the user has not turned on `client_session_keep_alive` + # having both of these set to `True` could lead to hanging open connections, so it should be opt-in behavior + if self.client_session_keep_alive is False and self.reuse_connections is None: + self.reuse_connections = True + @property def type(self): return "snowflake" From 535096276f514f68e6cd50d136f32cab652712eb Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Fri, 12 Jul 2024 10:50:00 -0400 Subject: [PATCH 06/10] fix unit test expected result --- tests/unit/test_snowflake_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_snowflake_adapter.py b/tests/unit/test_snowflake_adapter.py index 2801b3b50..59600356e 100644 --- a/tests/unit/test_snowflake_adapter.py +++ b/tests/unit/test_snowflake_adapter.py @@ -317,7 +317,7 @@ def test_client_session_keep_alive_true(self): application="dbt", insecure_mode=False, session_parameters={}, - reuse_connections=True, + reuse_connections=None, ) ] ) From 66ddd963a15e6df42362e0465471799d8681e826 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Fri, 12 Jul 2024 11:22:59 -0400 Subject: [PATCH 07/10] update unit test to account for __post_init__ --- dbt/adapters/snowflake/connections.py | 3 +++ tests/performance/test_auth_methods.py | 8 ++++---- tests/unit/test_snowflake_adapter.py | 8 +++++++- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/dbt/adapters/snowflake/connections.py b/dbt/adapters/snowflake/connections.py index 6b325ab9c..e98e6ac42 100644 --- a/dbt/adapters/snowflake/connections.py +++ b/dbt/adapters/snowflake/connections.py @@ -146,7 +146,10 @@ def __post_init__(self): # only default `reuse_connections` to `True` if the user has not turned on `client_session_keep_alive` # having both of these set to `True` could lead to hanging open connections, so it should be opt-in behavior + print(f"client_session_keep_alive = {self.client_session_keep_alive}") + print(f"reuse_connections = {self.reuse_connections}") if self.client_session_keep_alive is False and self.reuse_connections is None: + print("Updating reuse_connections") self.reuse_connections = True @property diff --git a/tests/performance/test_auth_methods.py b/tests/performance/test_auth_methods.py index a92081ac1..8f9d1c442 100644 --- a/tests/performance/test_auth_methods.py +++ b/tests/performance/test_auth_methods.py @@ -5,9 +5,9 @@ |---------------|--------------|-------------------|--------------------------------|----------| | User Password | 1,000 | False | - | 234.09s | | User Password | 1,000 | True | - | 78.34s | -| Key Pair | 1,000 | False | False | 331.75s | +| Key Pair | 1,000 | False | False | 271.47s | x | Key Pair | 1,000 | False | True | 278.65s | -| Key Pair | 1,000 | True | False | 75.42s | +| Key Pair | 1,000 | True | False | 63.69s | x | Key Pair | 1,000 | True | True | 73.14s | Notes: @@ -91,8 +91,8 @@ def auth_params(self): https://docs.snowflake.com/en/user-guide/key-pair-auth """ yield { - "private_key": os.getenv("SNOWFLAKE_PRIVATE_KEY"), - "private_key_passphrase": os.getenv("SNOWFLAKE_PRIVATE_KEY_PASSPHRASE"), + "private_key": os.getenv("SNOWFLAKE_TEST_PRIVATE_KEY"), + "private_key_passphrase": os.getenv("SNOWFLAKE_TEST_PRIVATE_KEY_PASSPHRASE"), } else: diff --git a/tests/unit/test_snowflake_adapter.py b/tests/unit/test_snowflake_adapter.py index 59600356e..32e73eb45 100644 --- a/tests/unit/test_snowflake_adapter.py +++ b/tests/unit/test_snowflake_adapter.py @@ -296,7 +296,13 @@ def test_client_session_keep_alive_false_by_default(self): ) def test_client_session_keep_alive_true(self): - self.config.credentials = self.config.credentials.replace(client_session_keep_alive=True) + self.config.credentials = self.config.credentials.replace( + client_session_keep_alive=True, + # this gets defaulted via `__post_init__` when `client_session_keep_alive` comes in as `False` + # then when `replace` is called, `__post_init__` cannot set it back to `None` since it cannot + # tell the difference between set by user and set by `__post_init__` + reuse_connections=None, + ) self.adapter = SnowflakeAdapter(self.config, get_context("spawn")) conn = self.adapter.connections.set_connection_name(name="new_connection_with_new_config") From c228a846a544da69ef4e460e92b133df61b6c1b5 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Fri, 12 Jul 2024 11:38:28 -0400 Subject: [PATCH 08/10] rerun auth method analysis --- tests/performance/test_auth_methods.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/performance/test_auth_methods.py b/tests/performance/test_auth_methods.py index 8f9d1c442..ad0b424ab 100644 --- a/tests/performance/test_auth_methods.py +++ b/tests/performance/test_auth_methods.py @@ -5,14 +5,18 @@ |---------------|--------------|-------------------|--------------------------------|----------| | User Password | 1,000 | False | - | 234.09s | | User Password | 1,000 | True | - | 78.34s | -| Key Pair | 1,000 | False | False | 271.47s | x -| Key Pair | 1,000 | False | True | 278.65s | -| Key Pair | 1,000 | True | False | 63.69s | x -| Key Pair | 1,000 | True | True | 73.14s | +| Key Pair | 1,000 | False | False | 271.47s | +| Key Pair | 1,000 | False | True | 275.73s | +| Key Pair | 1,000 | True | False | 63.69s | +| Key Pair | 1,000 | True | True | 73.45s | Notes: +- run locally on MacOS, single threaded - `unsafe_skip_rsa_key_validation` only applies to the Key Pair auth method - `unsafe_skip_rsa_key_validation=True` was tested by updating the relevant `cryptography` calls directly as it is not a user configuration +- since the models are all views, time differences should be viewed as absolute differences, e.g.: + - this: (271.47s - 63.69s) / 1,000 models = 208ms improvement + - NOT this: 1 - (63.69s / 271.47s) = 76.7% improvement """ from datetime import datetime From f41c7257a683f345a26b244ab4c2ff4b29d2056a Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Fri, 12 Jul 2024 11:39:15 -0400 Subject: [PATCH 09/10] remove print statements --- dbt/adapters/snowflake/connections.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbt/adapters/snowflake/connections.py b/dbt/adapters/snowflake/connections.py index e98e6ac42..c6d1f9567 100644 --- a/dbt/adapters/snowflake/connections.py +++ b/dbt/adapters/snowflake/connections.py @@ -146,8 +146,6 @@ def __post_init__(self): # only default `reuse_connections` to `True` if the user has not turned on `client_session_keep_alive` # having both of these set to `True` could lead to hanging open connections, so it should be opt-in behavior - print(f"client_session_keep_alive = {self.client_session_keep_alive}") - print(f"reuse_connections = {self.reuse_connections}") if self.client_session_keep_alive is False and self.reuse_connections is None: print("Updating reuse_connections") self.reuse_connections = True From f875c803b67298ee28f09e64ef1ca22c9d018f67 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Fri, 12 Jul 2024 14:45:50 -0400 Subject: [PATCH 10/10] remove print statement --- dbt/adapters/snowflake/connections.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dbt/adapters/snowflake/connections.py b/dbt/adapters/snowflake/connections.py index c6d1f9567..6b325ab9c 100644 --- a/dbt/adapters/snowflake/connections.py +++ b/dbt/adapters/snowflake/connections.py @@ -147,7 +147,6 @@ def __post_init__(self): # only default `reuse_connections` to `True` if the user has not turned on `client_session_keep_alive` # having both of these set to `True` could lead to hanging open connections, so it should be opt-in behavior if self.client_session_keep_alive is False and self.reuse_connections is None: - print("Updating reuse_connections") self.reuse_connections = True @property