From e5b087ad7434748f955e8afee3830a756ac83b71 Mon Sep 17 00:00:00 2001 From: Richard Berendsen Date: Tue, 19 Mar 2024 16:35:58 +0100 Subject: [PATCH 1/3] Redact multiline secrets as well on stdout YAML dump --- schemachange/cli.py | 41 ++++++++++++++++--- tests/test_SecretManager.py | 7 ++++ tests/test_extract_config_secrets.py | 9 ++++ tests/test_redact_config_vars.py | 61 ++++++++++++++++++++++++++++ 4 files changed, 113 insertions(+), 5 deletions(-) create mode 100644 tests/test_redact_config_vars.py diff --git a/schemachange/cli.py b/schemachange/cli.py index 31745f43..660c6003 100644 --- a/schemachange/cli.py +++ b/schemachange/cli.py @@ -1,4 +1,5 @@ import argparse +import copy import hashlib import json import os @@ -74,6 +75,7 @@ _log_auth_type ="Proceeding with %s authentication" _log_pk_enc ="No private key passphrase provided. Assuming the key is not encrypted." _log_okta_ep ="Okta Endpoint: %s" +_err_non_str_secret ="Found a secret that is not of type str." #endregion Global Variables @@ -799,7 +801,10 @@ def inner_extract_dictionary_secrets(dictionary: Dict[str, Any], child_of_secret else : extracted_secrets = extracted_secrets | inner_extract_dictionary_secrets(value, child_of_secrets) elif child_of_secrets or "SECRET" in key.upper(): + if not isinstance(value, str): + raise ValueError(_err_non_str_secret) extracted_secrets.add(value.strip()) + return extracted_secrets extracted = set() @@ -809,6 +814,33 @@ def inner_extract_dictionary_secrets(dictionary: Dict[str, Any], child_of_secret extracted = inner_extract_dictionary_secrets(config["vars"]) return extracted + +def redact_config_vars(config_vars: Dict[str, Any]) -> Dict[str, Any]: + # Our goal is simply to print the config vars to stdout, but: + # - If we serialize to YAML and then redact, we won't redact multiline secrets, + # because YAML dump would add leading whitespace to them, changing them. + # - If we serialize to JSON and then redact, we may end up redacting floats, integers, etc, + # and we could not deserialise anymore in order to finally serialize to redacted YAML. + + # Therefore, we have this function here, which sole purpose is to redact the config_vars + # dict leaf nodes (only those of type str). This dict can then be serialized to YAML + # or any other format by the caller. + + # Using an inner function, cause I don't want to expose this generic signature to + # the caller, given the limited scope of what we are trying to accomplish + def inner_redact_config_vars(ob: Any) -> Any: + if isinstance(ob, dict): + return {key: inner_redact_config_vars(value) for key, value in ob.items()} + elif isinstance(ob, list): + return [inner_redact_config_vars(value) for value in ob] + elif isinstance(ob, str): + return SecretManager.global_redact(ob) + # Just to be sure, making a deep copy, cause ob can be a reference type + return copy.deepcopy(ob) + + return inner_redact_config_vars(config_vars) + + def main(argv=sys.argv): parser = argparse.ArgumentParser(prog = 'schemachange', description = 'Apply schema changes to a Snowflake account. Full readme at https://github.com/Snowflake-Labs/schemachange', formatter_class = argparse.RawTextHelpFormatter) subcommands = parser.add_subparsers(dest='subcommand') @@ -883,11 +915,10 @@ def main(argv=sys.argv): print("Using variables: {}") else: print("Using variables:") - print(textwrap.indent( \ - SecretManager.global_redact(yaml.dump( \ - config['vars'], \ - sort_keys=False, \ - default_flow_style=False)), prefix = " ")) + print(textwrap.indent(yaml.dump( \ + redact_config_vars(config['vars']), \ + sort_keys=False, \ + default_flow_style=False), prefix = " ")) # Finally, execute the command if args.subcommand == 'render': diff --git a/tests/test_SecretManager.py b/tests/test_SecretManager.py index 2ed70acb..4a91355e 100644 --- a/tests/test_SecretManager.py +++ b/tests/test_SecretManager.py @@ -22,6 +22,13 @@ def test_SecretManager_given_secrets_when_redact_then_return_redacted_value(): assert result == "Hello *****!" +def test_SecretManager_given_multiline_secrets_when_redact_then_return_redacted_value(): + sm = SecretManager() + sm.add("Hello\nworld") + result = sm.redact("Hello\nworld!") + assert result == "***********!" + + def test_SecretManager_given_secrets_when_clear_then_should_hold_zero_secrets(): sm = SecretManager() sm.add("world") diff --git a/tests/test_extract_config_secrets.py b/tests/test_extract_config_secrets.py index 91560746..0a0b8b4f 100644 --- a/tests/test_extract_config_secrets.py +++ b/tests/test_extract_config_secrets.py @@ -52,3 +52,12 @@ def test_extract_config_secrets_given__vars_with_same_secret_twice_then_only_ext assert len(results) == 1 assert "SECRET_VALUE" in results + + +def test_extract_config_secrets_given__vars_with_multiline_secret_then_preserve_newlines(): + config = {"vars": {"secrets" : {"database_name": "SECRET\nVALUE"} } } + + results = extract_config_secrets(config) + + assert len(results) == 1 + assert "SECRET\nVALUE" in results diff --git a/tests/test_redact_config_vars.py b/tests/test_redact_config_vars.py new file mode 100644 index 00000000..b26414c8 --- /dev/null +++ b/tests/test_redact_config_vars.py @@ -0,0 +1,61 @@ +import pytest +from schemachange.cli import redact_config_vars, SecretManager + + +def test_redact_config_vars_given_secret_val_should_redact_it(): + config_vars = {'secret' : 'secret_val'} + sm = SecretManager() + SecretManager.set_global_manager(sm) + sm.add('secret_val') + results = redact_config_vars(config_vars) + redacted = {'secret' : '**********'} + assert results == redacted + + +def test_redact_config_vars_given_number_like_secret_should_redact_it(): + config_vars = {'secret' : '123'} + sm = SecretManager() + SecretManager.set_global_manager(sm) + sm.add('123') + results = redact_config_vars(config_vars) + redacted = {'secret' : '***'} + assert results == redacted + + +def test_redact_config_vars_given_number_like_secret_should_redact_it_but_not_an_actual_number(): + config_vars = {'secret' : '123', 'public': 123} + sm = SecretManager() + SecretManager.set_global_manager(sm) + sm.add('123') + results = redact_config_vars(config_vars) + redacted = {'secret' : '***', 'public': 123} + assert results == redacted + + +def test_redact_config_vars_given_secret_should_redact_it_in_any_string_it_finds_it(): + config_vars = {'secrets': {'password' : 'hi'}, 'jdbc': 'jdbc:mysql://127.0.0.1;user=john;password=hi'} + sm = SecretManager() + SecretManager.set_global_manager(sm) + sm.add('hi') + results = redact_config_vars(config_vars) + redacted = {'secrets': {'password' : '**'}, 'jdbc': 'jdbc:mysql://127.0.0.1;user=john;password=**'} + assert results == redacted + + +def test_redact_config_vars_given_secret_should_redact_it_in_any_string_it_finds_it_even_in_lists(): + # At the moment, it's not documented that we could specify lists as values for variables, but, + # nothing is preventing people from trying things. Just in case, redact_config_vars is + # able to also redact strings inside lists. That's why this test is here. + # Finally note that tuples are not scanned, just making the point that this redact function cannot + # do everything; YAML deserialises lists as Python lists, not tuples, so we have no use case for tuples. + config_vars = {'secrets': {'password' : 'hi'}, 'jdbc': 'jdbc:mysql://127.0.0.1', + 'accounts': [{'user': 'john', 'password': '**'}], 'greetings': ['hi', 'hello'], 'greetings_tuple': ('hi',)} + sm = SecretManager() + SecretManager.set_global_manager(sm) + sm.add('hi') + results = redact_config_vars(config_vars) + redacted = {'secrets': {'password' : '**'}, 'jdbc': 'jdbc:mysql://127.0.0.1', + 'accounts': [{'user': 'john', 'password': '**'}], 'greetings': ['**', 'hello'], 'greetings_tuple': ('hi',)} + assert results == redacted + + From 705f3eb42baff7e450a6d6b0562690687a062500 Mon Sep 17 00:00:00 2001 From: Richard Berendsen Date: Wed, 20 Mar 2024 09:39:19 +0100 Subject: [PATCH 2/3] Preserve newlines in redacted secrets --- schemachange/cli.py | 2 +- tests/test_SecretManager.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/schemachange/cli.py b/schemachange/cli.py index 660c6003..2146350b 100644 --- a/schemachange/cli.py +++ b/schemachange/cli.py @@ -185,7 +185,7 @@ def redact(self, context: str) -> str: redacted = context if redacted: for secret in self.__secrets: - redacted = redacted.replace(secret, "*" * len(secret)) + redacted = redacted.replace(secret, '\n'.join(["*" * len(l) for l in secret.split('\n')])) return redacted diff --git a/tests/test_SecretManager.py b/tests/test_SecretManager.py index 4a91355e..7736516e 100644 --- a/tests/test_SecretManager.py +++ b/tests/test_SecretManager.py @@ -26,7 +26,7 @@ def test_SecretManager_given_multiline_secrets_when_redact_then_return_redacted_ sm = SecretManager() sm.add("Hello\nworld") result = sm.redact("Hello\nworld!") - assert result == "***********!" + assert result == "*****\n*****!" def test_SecretManager_given_secrets_when_clear_then_should_hold_zero_secrets(): From 37a4ac3d8e8ef540f4c4883eb99b1d58f81f11ab Mon Sep 17 00:00:00 2001 From: Richard Berendsen Date: Wed, 20 Mar 2024 10:30:09 +0100 Subject: [PATCH 3/3] small change to test case --- tests/test_redact_config_vars.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_redact_config_vars.py b/tests/test_redact_config_vars.py index b26414c8..02bdeacd 100644 --- a/tests/test_redact_config_vars.py +++ b/tests/test_redact_config_vars.py @@ -49,7 +49,7 @@ def test_redact_config_vars_given_secret_should_redact_it_in_any_string_it_finds # Finally note that tuples are not scanned, just making the point that this redact function cannot # do everything; YAML deserialises lists as Python lists, not tuples, so we have no use case for tuples. config_vars = {'secrets': {'password' : 'hi'}, 'jdbc': 'jdbc:mysql://127.0.0.1', - 'accounts': [{'user': 'john', 'password': '**'}], 'greetings': ['hi', 'hello'], 'greetings_tuple': ('hi',)} + 'accounts': [{'user': 'john', 'password': 'hi'}], 'greetings': ['hi', 'hello'], 'greetings_tuple': ('hi',)} sm = SecretManager() SecretManager.set_global_manager(sm) sm.add('hi')