From af86ecd3d1f4f9b2f05759717981063a8a716bf4 Mon Sep 17 00:00:00 2001 From: Trish Gillett-Kawamoto Date: Tue, 10 Sep 2024 09:25:40 -0600 Subject: [PATCH] feat: Enable running with multiple app keys provided via config (#302) Adds a way to input the keys for multiple github apps, which people might want to do in order to increase capacity and protect against rate limits. As a bonus, this also gives users the option to provide their app key via config rather than having it inferred, and the option to use environment variable names of their choice. --- README.md | 11 +++++--- meltano.yml | 2 ++ tap_github/authenticator.py | 38 ++++++++++++++++++-------- tap_github/tap.py | 18 ++++++++++++ tap_github/tests/test_authenticator.py | 37 +++++++++++++++++++++++++ 5 files changed, 91 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index f315cb04..325434b9 100644 --- a/README.md +++ b/README.md @@ -33,10 +33,13 @@ This tap accepts the following configuration options: 4. `user_usernames`: A list of github usernames 5. `user_ids`: A list of github user ids [int] - Highly recommended: - - `auth_token` - GitHub token to authenticate with. - - `additional_auth_tokens` - List of GitHub tokens to authenticate with. Streams will loop through them when hitting rate limits.. - - alternatively, you can input authentication tokens with any environment variables starting with GITHUB_TOKEN. - - or authenticate as a GitHub app setting a private key in GITHUB_APP_PRIVATE_KEY. Formatted as follows: `:app_id:;;-----BEGIN RSA PRIVATE KEY-----\n_YOUR_P_KEY_\n-----END RSA PRIVATE KEY-----`. You can generate it from the `Private keys` section on https://github.com/organizations/:organization_name/settings/apps/:app_name. Read more about GitHub App quotas [here](https://docs.github.com/en/enterprise-server@3.3/developers/apps/building-github-apps/rate-limits-for-github-apps#server-to-server-requests). + - Personal access tokens (PATs) for authentication can be provided in 3 ways: + - `auth_token` - Takes a single token. + - `additional_auth_tokens` - Takes a list of tokens. Can be used together with `auth_token` or as the sole source of PATs. + - Any environment variables beginning with `GITHUB_TOKEN` will be assumed to be PATs. These tokens will be used in addition to `auth_token` (if provided), but will not be used if `additional_auth_tokens` is provided. + - GitHub App keys are another option for authentication, and can be used in combination with PATs if desired. App IDs and keys should be assembled into the format `:app_id:;;-----BEGIN RSA PRIVATE KEY-----\n_YOUR_P_KEY_\n-----END RSA PRIVATE KEY-----` where the key can be generated from the `Private keys` section on https://github.com/organizations/:organization_name/settings/apps/:app_name. Read more about GitHub App quotas [here](https://docs.github.com/en/enterprise-server@3.3/developers/apps/building-github-apps/rate-limits-for-github-apps#server-to-server-requests). Formatted app keys can be provided in 2 ways: + - `auth_app_keys` - List of GitHub App keys in the prescribed format. + - If `auth_app_keys` is not provided but there is an environment variable with the name `GITHUB_APP_PRIVATE_KEY`, it will be assumed to be an App key in the prescribed format. - Optional: - `user_agent` - `start_date` diff --git a/meltano.yml b/meltano.yml index 105386ca..8742c8a1 100644 --- a/meltano.yml +++ b/meltano.yml @@ -19,6 +19,8 @@ plugins: kind: password - name: additional_auth_tokens kind: array + - name: auth_app_keys + kind: array - name: rate_limit_buffer kind: integer - name: expiry_time_buffer diff --git a/tap_github/authenticator.py b/tap_github/authenticator.py index 7485add6..f9343438 100644 --- a/tap_github/authenticator.py +++ b/tap_github/authenticator.py @@ -270,37 +270,53 @@ def prepare_tokens(self) -> list[TokenManager]: ) personal_tokens = personal_tokens.union(env_tokens) - token_managers: list[TokenManager] = [] + personal_token_managers: list[TokenManager] = [] for token in personal_tokens: token_manager = PersonalTokenManager( token, rate_limit_buffer=rate_limit_buffer, logger=self.logger ) if token_manager.is_valid_token(): - token_managers.append(token_manager) + personal_token_managers.append(token_manager) else: logging.warn("A token was dismissed.") - # Parse App level private key and generate a token - if "GITHUB_APP_PRIVATE_KEY" in env_dict: - # To simplify settings, we use a single env-key formatted as follows: - # "{app_id};;{-----BEGIN RSA PRIVATE KEY-----\n_YOUR_PRIVATE_KEY_\n-----END RSA PRIVATE KEY-----}" # noqa: E501 - env_key = env_dict["GITHUB_APP_PRIVATE_KEY"] + # Parse App level private keys and generate tokens + # To simplify settings, we use a single env-key formatted as follows: + # "{app_id};;{-----BEGIN RSA PRIVATE KEY-----\n_YOUR_PRIVATE_KEY_\n-----END RSA PRIVATE KEY-----}" # noqa: E501 + + app_keys: set[str] = set() + if "auth_app_keys" in self._config: + app_keys = app_keys.union(self._config["auth_app_keys"]) + self.logger.info( + f"Provided {len(app_keys)} app keys via config for authentication." + ) + elif "GITHUB_APP_PRIVATE_KEY" in env_dict: + app_keys.add(env_dict["GITHUB_APP_PRIVATE_KEY"]) + self.logger.info( + "Found 1 app key via environment variable for authentication." + ) + + app_token_managers: list[TokenManager] = [] + for app_key in app_keys: try: app_token_manager = AppTokenManager( - env_key, + app_key, rate_limit_buffer=rate_limit_buffer, expiry_time_buffer=expiry_time_buffer, logger=self.logger, ) if app_token_manager.is_valid_token(): - token_managers.append(app_token_manager) + app_token_managers.append(app_token_manager) except ValueError as e: self.logger.warn( f"An error was thrown while preparing an app token: {e}" ) - self.logger.info(f"Tap will run with {len(token_managers)} auth tokens") - return token_managers + self.logger.info( + f"Tap will run with {len(personal_token_managers)} personal auth tokens " + f"and {len(app_token_managers)} app keys." + ) + return personal_token_managers + app_token_managers def __init__(self, stream: RESTStream) -> None: """Init authenticator. diff --git a/tap_github/tap.py b/tap_github/tap.py index 604a7206..3ab8e40e 100644 --- a/tap_github/tap.py +++ b/tap_github/tap.py @@ -47,11 +47,29 @@ def logger(cls) -> logging.Logger: th.ArrayType(th.StringType), description="List of GitHub tokens to authenticate with. Streams will loop through them when hitting rate limits.", # noqa: E501 ), + th.Property( + "auth_app_keys", + th.ArrayType(th.StringType), + description=( + "List of GitHub App credentials to authenticate with. Each credential " + "can be constructed by combining an App ID and App private key into " + "the format `:app_id:;;-----BEGIN RSA PRIVATE KEY-----\n_YOUR_P_KEY_\n-----END RSA PRIVATE KEY-----`." # noqa: E501 + ), + ), th.Property( "rate_limit_buffer", th.IntegerType, description="Add a buffer to avoid consuming all query points for the token at hand. Defaults to 1000.", # noqa: E501 ), + th.Property( + "expiry_time_buffer", + th.IntegerType, + description=( + "When authenticating as a GitHub App, this buffer controls how many " + "minutes before expiry the GitHub app tokens will be refreshed. " + "Defaults to 10 minutes.", + ), + ), th.Property( "searches", th.ArrayType( diff --git a/tap_github/tests/test_authenticator.py b/tap_github/tests/test_authenticator.py index e5c84618..042e483d 100644 --- a/tap_github/tests/test_authenticator.py +++ b/tap_github/tests/test_authenticator.py @@ -356,6 +356,43 @@ def test_env_personal_tokens_only(self, mock_stream): assert len(token_managers) == 2 assert sorted({tm.token for tm in token_managers}) == ["gt1", "gt2"] + def test_config_app_keys(self, mock_stream): + def generate_token_mock(app_id, private_key, installation_id): + return (f"installationtokenfor{app_id}", MagicMock()) + + with patch.object(TokenManager, "is_valid_token", return_value=True), patch( + "tap_github.authenticator.generate_app_access_token", + side_effect=generate_token_mock, + ): + stream = mock_stream + stream.config.update( + { + "auth_token": "gt5", + "additional_auth_tokens": ["gt7", "gt8", "gt9"], + "auth_app_keys": [ + "123;;gak1;;13", + "456;;gak1;;46", + "789;;gak1;;79", + ], + } + ) + auth = GitHubTokenAuthenticator(stream=stream) + token_managers = auth.prepare_tokens() + + assert len(token_managers) == 7 + + app_token_managers = { + tm for tm in token_managers if isinstance(tm, AppTokenManager) + } + assert len(app_token_managers) == 3 + + app_tokens = {tm.token for tm in app_token_managers} + assert app_tokens == { + "installationtokenfor123", + "installationtokenfor456", + "installationtokenfor789", + } + def test_env_app_key_only(self, mock_stream): with patch.object( GitHubTokenAuthenticator,