From 74179b29f0abc25e91a2507f7c1801da70ab354f Mon Sep 17 00:00:00 2001 From: Alexander VanTol Date: Wed, 18 Dec 2019 09:29:34 -0600 Subject: [PATCH 01/13] feat(cleversafe): allow specifying s3-compliant url for a bucket in cfg for signed url creation --- fence/blueprints/data/indexd.py | 19 +++++++++++++++---- fence/config-default.yaml | 2 ++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index d7895b775..b9c3b2973 100644 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -594,13 +594,24 @@ def get_signed_url( aws_creds = get_value( config, "AWS_CREDENTIALS", InternalError("credentials not configured") ) - - http_url = "https://{}.s3.amazonaws.com/{}".format( - self.parsed_url.netloc, self.parsed_url.path.strip("/") + s3_buckets = get_value( + config, "S3_BUCKETS", InternalError("buckets not configured") ) + bucket = self.bucket_name() + + url_for_s3 = config["S3_BUCKETS"].get("endpoint_url") + if url_for_s3: + http_url = url_for_s3.strip("/") + "/{}".format( + self.parsed_url.path.strip("/") + ) + else: + http_url = "https://{}.s3.amazonaws.com/{}".format( + self.parsed_url.netloc, self.parsed_url.path.strip("/") + ) + credential = S3IndexedFileLocation.get_credential_to_access_bucket( - self.bucket_name(), aws_creds, expires_in + bucket, aws_creds, expires_in ) # if it's public and we don't need to force the signed url, just return the raw diff --git a/fence/config-default.yaml b/fence/config-default.yaml index 9984a14d2..d2dc734a2 100644 --- a/fence/config-default.yaml +++ b/fence/config-default.yaml @@ -483,6 +483,8 @@ AWS_CREDENTIALS: S3_BUCKETS: bucket1: cred: 'CRED1' + # optionally you can manually specify an s3-compliant endpoint for this bucket + endpoint_url: 'https://cleversafe.example.com/' bucket2: cred: 'CRED2' region: 'us-east-1' #optional but if specified avoids a call to GetBucketLocation which you may lack the AWS ACLs for. From 40c336ab3db5fcc3665c2a3ec6d42d363b8a0675 Mon Sep 17 00:00:00 2001 From: Edward Malinowski Date: Wed, 18 Dec 2019 09:50:29 -0600 Subject: [PATCH 02/13] rebuild --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6cecd77e0..8e11f2f68 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Fence +# Fence [![Build Status](https://travis-ci.org/uc-cdis/fence.svg?branch=master)](https://travis-ci.org/uc-cdis/fence) From 14acf995b0e48b56bd11dac8b186e61f0f3c7493 Mon Sep 17 00:00:00 2001 From: Edward Malinowski Date: Wed, 18 Dec 2019 11:47:21 -0600 Subject: [PATCH 03/13] fix(endpoint-url): fix getting the endpoint url --- fence/blueprints/data/indexd.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index b9c3b2973..bcd5e389d 100644 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -599,8 +599,9 @@ def get_signed_url( ) bucket = self.bucket_name() + current_bucket = s3_buckets.get(self.bucket_name()) - url_for_s3 = config["S3_BUCKETS"].get("endpoint_url") + url_for_s3 = current_bucket["endpoint_url"] if url_for_s3: http_url = url_for_s3.strip("/") + "/{}".format( self.parsed_url.path.strip("/") From a0c4a39a87f667d218bfbf43c9ac48610562dbf9 Mon Sep 17 00:00:00 2001 From: Edward Malinowski Date: Wed, 18 Dec 2019 13:00:12 -0600 Subject: [PATCH 04/13] fix(endpoint-url): fix getting the endpoint url --- fence/blueprints/data/indexd.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index bcd5e389d..0193b21db 100644 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -603,8 +603,8 @@ def get_signed_url( url_for_s3 = current_bucket["endpoint_url"] if url_for_s3: - http_url = url_for_s3.strip("/") + "/{}".format( - self.parsed_url.path.strip("/") + http_url = url_for_s3.strip("/") + "/{}/{}".format( + self.parsed_url.netloc, self.parsed_url.path.strip("/") ) else: http_url = "https://{}.s3.amazonaws.com/{}".format( From 2671e14180ca402d9cca9029dd09047cfe0b0386 Mon Sep 17 00:00:00 2001 From: Edward Malinowski Date: Mon, 6 Jan 2020 09:29:06 -0600 Subject: [PATCH 05/13] fix(endpoint-url): fix getting the endpoint url --- fence/blueprints/data/indexd.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index 0193b21db..da0201fd4 100644 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -601,9 +601,8 @@ def get_signed_url( bucket = self.bucket_name() current_bucket = s3_buckets.get(self.bucket_name()) - url_for_s3 = current_bucket["endpoint_url"] - if url_for_s3: - http_url = url_for_s3.strip("/") + "/{}/{}".format( + if current_bucket["endpoint_url"]: + http_url = current_bucket["endpoint_url"].strip("/") + "/{}/{}".format( self.parsed_url.netloc, self.parsed_url.path.strip("/") ) else: From 5b8f159ebff82ad2efa461d11442af6a72f45468 Mon Sep 17 00:00:00 2001 From: Edward Malinowski Date: Mon, 6 Jan 2020 15:12:48 -0600 Subject: [PATCH 06/13] fix(endpoint-url): fix getting the endpoint url --- fence/blueprints/data/indexd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index da0201fd4..fac390e5a 100644 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -601,7 +601,7 @@ def get_signed_url( bucket = self.bucket_name() current_bucket = s3_buckets.get(self.bucket_name()) - if current_bucket["endpoint_url"]: + if "endpoint_url" in current_bucket and current_bucket["endpoint_url"]: http_url = current_bucket["endpoint_url"].strip("/") + "/{}/{}".format( self.parsed_url.netloc, self.parsed_url.path.strip("/") ) From a72bf6e1ff6c7ca0d744d15a0ad2bfae8ac83ced Mon Sep 17 00:00:00 2001 From: Edward Malinowski Date: Mon, 6 Jan 2020 15:16:24 -0600 Subject: [PATCH 07/13] fix(endpoint-url): fix getting the endpoint url --- fence/blueprints/data/indexd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index fac390e5a..1652a34c7 100644 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -601,7 +601,7 @@ def get_signed_url( bucket = self.bucket_name() current_bucket = s3_buckets.get(self.bucket_name()) - if "endpoint_url" in current_bucket and current_bucket["endpoint_url"]: + if current_bucket and current_bucket.get("endpoint_url"): http_url = current_bucket["endpoint_url"].strip("/") + "/{}/{}".format( self.parsed_url.netloc, self.parsed_url.path.strip("/") ) From 554a2381428923e50ed33c94d123061a885bab8e Mon Sep 17 00:00:00 2001 From: Alexander VanTol Date: Thu, 5 Mar 2020 13:44:31 -0600 Subject: [PATCH 08/13] chore(tests): configure tests to have a bucket with a different s3 endpoint to ensure it doesn't break existing signed url logic/unit tests --- tests/conftest.py | 4 ++-- tests/test-fence-config.yaml | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index b2f3bcefd..a02a2d6b1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -461,8 +461,8 @@ def indexd_client(app, request): "baseid": "", "rev": "", "size": 10, - "file_name": "file1", - "urls": ["s3://bucket1/key"], + "file_name": "file2", + "urls": ["s3://bucket2/key"], "hashes": {}, "acl": ["phs000178", "phs000218"], "form": "", diff --git a/tests/test-fence-config.yaml b/tests/test-fence-config.yaml index c572f39b4..8e03b9716 100644 --- a/tests/test-fence-config.yaml +++ b/tests/test-fence-config.yaml @@ -360,6 +360,7 @@ S3_BUCKETS: cred: 'CRED1' bucket2: cred: 'CRED2' + endpoint_url: 'https://cleversafe.example.com/' bucket3: cred: 'CRED1' bucket4: From 246359adb60a03321eb600b82f40bfe23c3c2edf Mon Sep 17 00:00:00 2001 From: Alexander VanTol Date: Tue, 10 Mar 2020 12:31:18 -0500 Subject: [PATCH 09/13] Update fence/blueprints/data/indexd.py Co-Authored-By: Pauline Ribeyre --- fence/blueprints/data/indexd.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index 1652a34c7..a62121521 100644 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -598,8 +598,8 @@ def get_signed_url( config, "S3_BUCKETS", InternalError("buckets not configured") ) - bucket = self.bucket_name() - current_bucket = s3_buckets.get(self.bucket_name()) + bucket_name = self.bucket_name() + bucket = s3_buckets.get(bucket_name) if current_bucket and current_bucket.get("endpoint_url"): http_url = current_bucket["endpoint_url"].strip("/") + "/{}/{}".format( From eacd20a9fc596812ba2b3ebcfaedd4a30e6d15dc Mon Sep 17 00:00:00 2001 From: Alexander VanTol Date: Tue, 10 Mar 2020 12:36:06 -0500 Subject: [PATCH 10/13] fix(indexd): use bucket name instead of bucket config for function --- fence/blueprints/data/indexd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index 9e7f24f5a..f58e587e9 100644 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -622,7 +622,7 @@ def get_signed_url( ) credential = S3IndexedFileLocation.get_credential_to_access_bucket( - bucket, aws_creds, expires_in + bucket_name, aws_creds, expires_in ) # if it's public and we don't need to force the signed url, just return the raw From 10624322fc847fb43f955c064b19cc5014ac9bd7 Mon Sep 17 00:00:00 2001 From: Alexander VanTol Date: Tue, 10 Mar 2020 13:25:18 -0500 Subject: [PATCH 11/13] fix(index): fix var name --- fence/blueprints/data/indexd.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index f58e587e9..bb9e108a0 100644 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -612,8 +612,8 @@ def get_signed_url( bucket_name = self.bucket_name() bucket = s3_buckets.get(bucket_name) - if current_bucket and current_bucket.get("endpoint_url"): - http_url = current_bucket["endpoint_url"].strip("/") + "/{}/{}".format( + if bucket and bucket.get("endpoint_url"): + http_url = bucket["endpoint_url"].strip("/") + "/{}/{}".format( self.parsed_url.netloc, self.parsed_url.path.strip("/") ) else: From bbbbd054cb887a62f9d92b98609c537f95e075c3 Mon Sep 17 00:00:00 2001 From: Alexander VanTol Date: Tue, 10 Mar 2020 16:04:22 -0500 Subject: [PATCH 12/13] feat(cfg): ensure we don't make unecessary call to get region when endpoint for s3 is specified --- fence/__init__.py | 36 ++++++++++++++++++--------------- fence/blueprints/data/indexd.py | 14 ++++++++----- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/fence/__init__.py b/fence/__init__.py index c234034d4..12e348d87 100644 --- a/fence/__init__.py +++ b/fence/__init__.py @@ -171,8 +171,8 @@ def public_keys(): def _check_s3_buckets(app): """ - Function to ensure that all s3_buckets have a valid credential. - Additionally, if there is no region it will produce a warning then trys to fetch and cache the region. + Function to ensure that all s3_buckets have a valid credential. + Additionally, if there is no region it will produce a warning then trys to fetch and cache the region. """ buckets = config.get("S3_BUCKETS") or {} aws_creds = config.get("AWS_CREDENTIALS") or {} @@ -190,21 +190,25 @@ def _check_s3_buckets(app): cred, bucket_name ) ) - if not region: - logger.warning( - "WARNING: no region for S3_BUCKET: {}. Providing the region will reduce" - " response time and avoid a call to GetBucketLocation which you make lack the AWS ACLs for.".format( - bucket_name + + # only require region when we're not specifying an + # s3-compatible endpoint URL (ex: no need for region when using cleversafe) + if not bucket_details.get("endpoint_url"): + if not region: + logger.warning( + "WARNING: no region for S3_BUCKET: {}. Providing the region will reduce" + " response time and avoid a call to GetBucketLocation which you make lack the AWS ACLs for.".format( + bucket_name + ) ) - ) - credential = S3IndexedFileLocation.get_credential_to_access_bucket( - bucket_name, - aws_creds, - config.get("MAX_PRESIGNED_URL_TTL", 3600), - app.boto, - ) - region = app.boto.get_bucket_region(bucket_name, credential) - config["S3_BUCKETS"][bucket_name]["region"] = region + credential = S3IndexedFileLocation.get_credential_to_access_bucket( + bucket_name, + aws_creds, + config.get("MAX_PRESIGNED_URL_TTL", 3600), + app.boto, + ) + region = app.boto.get_bucket_region(bucket_name, credential) + config["S3_BUCKETS"][bucket_name]["region"] = region def app_config( diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index bb9e108a0..fec8e913d 100644 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -638,11 +638,15 @@ def get_signed_url( if aws_access_key_id == "*" or (public_data and not force_signed_url): return http_url - region = self.get_bucket_region() - if not region: - region = flask.current_app.boto.get_bucket_region( - self.parsed_url.netloc, credential - ) + # only attempt to get the region when we're not specifying an + # s3-compatible endpoint URL (ex: no need for region when using cleversafe) + region = None + if not current_bucket.get("endpoint_url"): + region = self.get_bucket_region() + if not region: + region = flask.current_app.boto.get_bucket_region( + self.parsed_url.netloc, credential + ) user_info = _get_user_info() From 7d78f17f2ceba94bd3dfb58d641688f3c6b15d2d Mon Sep 17 00:00:00 2001 From: Pauline Ribeyre Date: Tue, 10 Mar 2020 17:36:53 -0500 Subject: [PATCH 13/13] Update fence/blueprints/data/indexd.py --- fence/blueprints/data/indexd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index fec8e913d..52e867573 100644 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -641,7 +641,7 @@ def get_signed_url( # only attempt to get the region when we're not specifying an # s3-compatible endpoint URL (ex: no need for region when using cleversafe) region = None - if not current_bucket.get("endpoint_url"): + if not bucket.get("endpoint_url"): region = self.get_bucket_region() if not region: region = flask.current_app.boto.get_bucket_region(