Skip to content

Commit

Permalink
STS and S3ToGCS (#364)
Browse files Browse the repository at this point in the history
* Just the STS / S3ToGCS Part

* Mock the AWS credentials provider

* Unused import

* How cool are linters, yeah

* PR fixes

* make line short
  • Loading branch information
Fryyyyy authored Aug 6, 2021
1 parent 71b33ea commit c7b7bba
Show file tree
Hide file tree
Showing 14 changed files with 404 additions and 43 deletions.
11 changes: 10 additions & 1 deletion libcloudforensics/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,5 +73,14 @@ class InstanceStateChangeError(LCFError):
class ServiceAccountRemovalError(LCFError):
"""Error when an issue with removing a service account is encountered."""


class InstanceProfileCreationError(LCFError):
"""Error when there is an issue creating an instance profile"""
"""Error when there is an issue creating an instance profile."""


class TransferCreationError(LCFError):
"""Error when an issue with creating a new transfer job is encountered."""


class TransferExecutionError(LCFError):
"""Error when an issue with running a transfer job is encountered."""
4 changes: 2 additions & 2 deletions libcloudforensics/providers/aws/forensics.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from libcloudforensics.providers.aws.internal.common import UBUNTU_1804_FILTER
from libcloudforensics.providers.aws.internal import account
from libcloudforensics.providers.aws.internal import iam
from libcloudforensics.providers.aws.internal import s3
from libcloudforensics.providers.utils.storage_utils import SplitStoragePath
from libcloudforensics.scripts import utils
from libcloudforensics import logging_utils
from libcloudforensics import errors
Expand Down Expand Up @@ -341,7 +341,7 @@ def CopyEBSSnapshotToS3(
# Correct destination if necessary
if not s3_destination.startswith('s3://'):
s3_destination = 's3://' + s3_destination
path_components = s3.SplitStoragePath(s3_destination)
path_components = SplitStoragePath(s3_destination)
bucket = path_components[0]
object_path = path_components[1]

Expand Down
22 changes: 4 additions & 18 deletions libcloudforensics/providers/aws/internal/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,13 @@
"""Bucket functionality."""

import os
from typing import TYPE_CHECKING, List, Dict, Optional, Any, Tuple
from typing import TYPE_CHECKING, List, Dict, Optional, Any

from libcloudforensics import errors
from libcloudforensics import logging_utils
from libcloudforensics.providers.aws.internal import common
from libcloudforensics.providers.gcp.internal import storage as gcp_storage
from libcloudforensics.providers.utils.storage_utils import SplitStoragePath


logging_utils.SetUpLogger(__name__)
Expand Down Expand Up @@ -160,7 +161,7 @@ def Put(
if not s3_path.endswith('/'):
s3_path = s3_path + '/'
try:
(bucket, path) = gcp_storage.SplitStoragePath(s3_path)
(bucket, path) = SplitStoragePath(s3_path)
client.upload_file(
filepath,
bucket,
Expand Down Expand Up @@ -214,7 +215,7 @@ def GCSToS3(self,
.format(object_md.get('size', 'Error')))
localcopy = gcs.GetObject(gcs_path)
try:
self.CreateBucket(gcp_storage.SplitStoragePath(s3_path)[0])
self.CreateBucket(SplitStoragePath(s3_path)[0])
except errors.ResourceCreationError as exception:
if 'already exists' in exception.message:
logger.info('Target bucket already exists. Reusing.')
Expand Down Expand Up @@ -294,18 +295,3 @@ def RmBucket(
logger.info('Deleting bucket {0:s}'.format(bucket))
s3_client = self.aws_account.ClientApi(common.S3_SERVICE)
s3_client.delete_bucket(Bucket=bucket)

def SplitStoragePath(path: str) -> Tuple[str, str]:
"""Split a path to bucket name and object URI.
Args:
path (str): File path to a resource in S3.
Ex: s3://bucket/folder/obj
Returns:
Tuple[str, str]: Bucket name. Object URI.
"""

_, _, full_path = path.partition('//')
bucket, _, object_uri = full_path.partition('/')
return bucket, object_uri
2 changes: 1 addition & 1 deletion libcloudforensics/providers/gcp/internal/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def CreateService(service_name: str,
except DefaultCredentialsError as exception:
raise errors.CredentialsConfigurationError(
'Could not get application default credentials. Have you run $ gcloud '
'auth application-default login?: {0!s}'.format_map(exception),
'auth application-default login?: {0!s}'.format(exception),
__name__) from exception

service_built = False
Expand Down
19 changes: 2 additions & 17 deletions libcloudforensics/providers/gcp/internal/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import os
import shutil
import tempfile
from typing import TYPE_CHECKING, List, Dict, Any, Optional, Tuple
from typing import TYPE_CHECKING, List, Dict, Any, Optional

import googleapiclient.http
from googleapiclient.errors import HttpError
Expand All @@ -30,6 +30,7 @@
# pylint: disable=line-too-long
from libcloudforensics.providers.gcp.internal import monitoring as gcp_monitoring
# pylint: enable=line-too-long
from libcloudforensics.providers.utils.storage_utils import SplitStoragePath

logging_utils.SetUpLogger(__name__)
logger = logging_utils.GetLogger(__name__)
Expand All @@ -38,22 +39,6 @@
import googleapiclient # pylint: disable=ungrouped-imports


def SplitStoragePath(path: str) -> Tuple[str, str]:
"""Split a path to bucket name and object URI.
Args:
path (str): File path to a resource in GCS.
Ex: gs://bucket/folder/obj
Returns:
Tuple[str, str]: Bucket name. Object URI.
"""

_, _, full_path = path.partition('//')
bucket, _, object_uri = full_path.partition('/')
return bucket, object_uri


class GoogleCloudStorage:
"""Class to call Google Cloud Storage APIs.
Expand Down
173 changes: 173 additions & 0 deletions libcloudforensics/providers/gcp/internal/storagetransfer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
# -*- coding: utf-8 -*-
# Copyright 2021 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Google Cloud Storage Transfer functionalities."""


from typing import TYPE_CHECKING, Dict, Any, Optional
import datetime
import time

from libcloudforensics import errors
from libcloudforensics import logging_utils
from libcloudforensics.providers.aws.internal import account
from libcloudforensics.providers.gcp.internal import common
from libcloudforensics.providers.utils.storage_utils import SplitStoragePath

logging_utils.SetUpLogger(__name__)
logger = logging_utils.GetLogger(__name__)

if TYPE_CHECKING:
import googleapiclient


class GoogleCloudStorageTransfer:
"""Class to call Google Cloud Storage Transfer APIs.
Attributes:
gcst_api_client: Client to interact with GCST APIs.
project_id: Google Cloud project ID.
"""
CLOUD_STORAGE_TRANSFER_API_VERSION = 'v1'

def __init__(self, project_id: Optional[str] = None) -> None:
"""Initialize the GoogleCloudStorageTransfer object.
Args:
project_id (str): Optional. Google Cloud project ID.
"""

self.gcst_api_client = None
self.project_id = project_id

def GcstApi(self) -> 'googleapiclient.discovery.Resource':
"""Get a Google Cloud Storage Transfer service object.
Returns:
googleapiclient.discovery.Resource: A Google Cloud Storage Transfer
service object.
"""

if self.gcst_api_client:
return self.gcst_api_client
self.gcst_api_client = common.CreateService(
'storagetransfer', self.CLOUD_STORAGE_TRANSFER_API_VERSION)
return self.gcst_api_client

def S3ToGCS(self, s3_path: str, zone: str, gcs_path: str) -> Dict[str, Any]:
"""Copy an S3 object to a GCS bucket.
Args:
s3_path (str): File path to the S3 resource.
Ex: s3://test/bucket/obj
zone (str): The AWS zone in which resources are located.
Available zones are listed at:
https://cloud.google.com/storage-transfer/docs/create-manage-transfer-program#s3-to-cloud # pylint: disable=line-too-long
gcs_path (str): File path to the target GCS bucket.
Ex: gs://bucket/folder
Returns:
Dict: An API operation object for a Google Cloud Storage Transfer operation.
https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferOperations/list # pylint: disable=line-too-long
Raises:
TransferCreationError: If the transfer couldn't be created.
TransferExecutionError: If the transfer couldn't be run.
"""
aws_creds = account.AWSAccount(zone).session.get_credentials()
if (aws_creds is None or aws_creds.access_key is None or
aws_creds.access_key.startswith('ASIA')):
raise errors.TransferCreationError(
'Could not create transfer. No long term AWS credentials available',
__name__)
s3_bucket, s3_path = SplitStoragePath(s3_path)
gcs_bucket, gcs_path = SplitStoragePath(gcs_path)
if not gcs_path.endswith('/'):
gcs_path = gcs_path + '/'
# Don't specify a path if we're writing to the bucket root.
if gcs_path == '/':
gcs_path = ''
today = datetime.datetime.now()
transfer_job_body = {
'projectId': self.project_id,
'description': 'created_by_cfu',
'transferSpec': {
'objectConditions': {
'includePrefixes': [s3_path]
},
'awsS3DataSource': {
'bucketName': s3_bucket,
'awsAccessKey': {
'accessKeyId': aws_creds.access_key,
'secretAccessKey': aws_creds.secret_key
}
},
'gcsDataSink': {
'bucketName': gcs_bucket, 'path': gcs_path
}
},
'schedule': {
'scheduleStartDate': {
'year': today.year, 'month': today.month, 'day': today.day
},
'scheduleEndDate': {
'year': today.year, 'month': today.month, 'day': today.day
},
'endTimeOfDay': {}
},
'status': 'ENABLED'
}
logger.info('Creating transfer job')
gcst_jobs = self.GcstApi().transferJobs()
create_request = gcst_jobs.create(body=transfer_job_body)
transfer_job = create_request.execute()
logger.info('Job created: {0:s}'.format(str(transfer_job)))
job_name = transfer_job.get('name', None)
if job_name is None:
raise errors.TransferCreationError(
'Could not create transfer. Job output: {0:s}'.format(
str(transfer_job)),
__name__)
logger.info('Job created: {0:s}'.format(job_name))
gcst_transfers = self.GcstApi().transferOperations()
filter_string = ('{{"projectId": "{0:s}", "jobNames": ["{1:s}"]}}').format(
self.project_id, job_name)
status = {}
while 'operations' not in status:
time.sleep(5)
status = gcst_transfers.list(
name='transferOperations', filter=filter_string).execute()
logger.info('Waiting for transfer to start...')
logger.info('Job status: {0:s}'.format(str(status)))
while not status['operations'][0].get('done'):
time.sleep(5)
status = gcst_transfers.list(
name='transferOperations', filter=filter_string).execute()
logger.info('Waiting to finish...')
logger.info(status)
error = status['operations'][0].get('error', None)
if error:
raise errors.TransferExecutionError(
'Could not execute transfer. Job output: {0:s}'.format(str(status)),
__name__)
counters = status['operations'][0].get('metadata', {}).get('counters', {})
logger.info(
'Transferred {0:s}/{1:s} files ({2:s}/{3:s} bytes).'.format(
counters.get('objectsFoundFromSource', '0'),
counters.get('objectsCopiedToSink', '0'),
counters.get('bytesFoundFromSource', '0'),
counters.get('bytesCopiedToSink', '0')))
logger.info(
'Skipped {0:s} files ({1:s} bytes).'.format(
counters.get('objectsFromSourceSkippedBySync', '0'),
counters.get('bytesFromSourceSkippedBySync', '0')))
return status
Empty file.
34 changes: 34 additions & 0 deletions libcloudforensics/providers/utils/storage_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
# Copyright 2021 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Cross-provider functionalities."""


from typing import Tuple


def SplitStoragePath(path: str) -> Tuple[str, str]:
"""Split a path to bucket name and object URI.
Args:
path (str): File path to a resource in GCS.
Ex: gs://bucket/folder/obj
Returns:
Tuple[str, str]: Bucket name. Object URI.
"""

_, _, full_path = path.partition('//')
bucket, _, object_uri = full_path.partition('/')
return bucket, object_uri
4 changes: 2 additions & 2 deletions tests/providers/aws/e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
from libcloudforensics.providers.aws.internal.common import EC2_SERVICE
from libcloudforensics.providers.aws.internal.common import S3_SERVICE
from libcloudforensics.providers.aws.internal import account
from libcloudforensics.providers.aws.internal import s3
from libcloudforensics.providers.aws import forensics
from libcloudforensics.providers.utils.storage_utils import SplitStoragePath
from libcloudforensics import logging_utils
from tests.scripts import utils

Expand Down Expand Up @@ -250,7 +250,7 @@ def testCopyEBSSnapshotToS3(self):

if not self.s3_destination.startswith('s3://'):
self.s3_destination = 's3://' + self.s3_destination
path_components = s3.SplitStoragePath(self.s3_destination)
path_components = SplitStoragePath(self.s3_destination)
bucket = path_components[0]
object_path = path_components[1]

Expand Down
Loading

0 comments on commit c7b7bba

Please sign in to comment.