Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OpenConceptLab/ocl_issues#1729 | Azure Blob Storage Class for exports and other uploads #643

Merged
merged 3 commits into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 133 additions & 0 deletions core/services/storages/cloud/azure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import base64

from azure.storage.blob import BlobServiceClient, ContentSettings, BlobPrefix
from django.conf import settings
from django.core.files.base import ContentFile
from pydash import get

from core.services.storages.cloud.core import CloudStorageServiceInterface


class BlobStorage(CloudStorageServiceInterface):
def __init__(self):
super().__init__()
self.account_name = settings.AZURE_STORAGE_ACCOUNT_NAME
self.container_name = settings.AZURE_STORAGE_CONTAINER_NAME
self.connection_string = settings.AZURE_STORAGE_CONNECTION_STRING
self.client = self.__get_container_client()

def public_url_for(self, file_path):
return f"https://{self.account_name}.blob.core.windows.net/{self.container_name}/{file_path}"

def url_for(self, file_path):
return self.public_url_for(file_path)

def exists(self, key):
try:
self.__get_blob_client(key).get_blob_properties()
return True
except: # pylint: disable=bare-except
return False

def has_path(self, prefix='/', delimiter='/'):
try:
blobs = self._fetch_blobs(prefix, delimiter)
return any(blob.name.startswith(prefix) for blob in blobs if not isinstance(blob, BlobPrefix))
except: # pylint: disable=bare-except
return False

def get_last_key_from_path(self, prefix='/', delimiter=''):
try:
if delimiter and not prefix.endswith(delimiter):
prefix = prefix + delimiter
blobs = self._fetch_blobs(prefix, delimiter)
blob_names = [[blob.name, blob.last_modified] for blob in blobs if not isinstance(blob, BlobPrefix)]
return sorted(
blob_names, key=lambda x: x[1], reverse=True)[0][0] if len(blob_names) > 1 else blob_names[0][0]
except: # pylint: disable=bare-except
return None

def delete_objects(self, path):
count_deleted = 0
try:
for blob in self._fetch_blobs(path, ''):
if not isinstance(blob, BlobPrefix):
self._remove(blob.name)
count_deleted += 1
return count_deleted
except: # pylint: disable=bare-except
return count_deleted

def remove(self, key):
try:
return self._remove(key)
except: # pylint: disable=bare-except
pass

return None

def upload_file(
self, key, file_path=None, headers=None, binary=False, metadata=None
): # pylint: disable=too-many-arguments
read_directive = 'rb' if binary else 'r'
file_path = file_path if file_path else key
return self._upload(key, open(file_path, read_directive).read(), headers or metadata)
snyaggarwal marked this conversation as resolved.
Show resolved Hide resolved

def upload_base64( # pylint: disable=too-many-arguments,inconsistent-return-statements
self, doc_base64, file_name, append_extension=True, public_read=False, headers=None
):
_format = None
_doc_string = None
try:
_format, _doc_string = doc_base64.split(';base64,')
except: # pylint: disable=bare-except # pragma: no cover
pass

if not _format or not _doc_string: # pragma: no cover
return

if append_extension:
file_name_with_ext = file_name + "." + _format.split('/')[-1]
else:
if file_name and file_name.split('.')[-1].lower() not in [
'pdf', 'jpg', 'jpeg', 'bmp', 'gif', 'png'
]:
file_name += '.jpg'
file_name_with_ext = file_name

self._upload(file_name_with_ext, ContentFile(base64.b64decode(_doc_string)), headers)

return file_name_with_ext

def _upload(self, blob_name, file_content, metadata=None):
try:
content_settings = ContentSettings(content_type='application/octet-stream')
content_type = get(metadata, 'content-type') or get(metadata, 'ContentType')
if content_type and 'application/' in content_type:
content_settings.content_encoding = content_type.split('application/')[1]

blob_client = self.__get_blob_client(blob_name)
blob_client.upload_blob(data=file_content, content_settings=content_settings, overwrite=True)

return blob_client.url
except: # pylint: disable=bare-except
return None

def _fetch_blobs(self, prefix, delimiter):
if delimiter and prefix.endswith(delimiter):
prefix = prefix[:-1]
return self.client.walk_blobs(name_starts_with=prefix, delimiter=delimiter)

def _remove(self, blob_name):
return self.__get_blob_client(blob_name).delete_blob()

def __get_blob_client(self, blob_name):
return self.client.get_blob_client(blob=blob_name)

def __get_container_client(self):
try:
return BlobServiceClient.from_connection_string(
conn_str=self.connection_string
).get_container_client(self.container_name)
except: # pylint: disable=bare-except
return None
207 changes: 206 additions & 1 deletion core/services/storages/cloud/tests.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
import base64
from unittest.mock import Mock, patch, mock_open
import os
from datetime import timedelta
from unittest.mock import Mock, patch, mock_open, ANY

import boto3
from azure.storage.blob import BlobPrefix
from botocore.exceptions import ClientError
from django.core.files.base import ContentFile
from django.test import TestCase
from django.utils import timezone
from mock.mock import call
from moto import mock_s3

from core.services.storages.cloud.aws import S3
from core.services.storages.cloud.azure import BlobStorage


class S3Test(TestCase):
Expand Down Expand Up @@ -183,3 +189,202 @@ def test_public_url_for(self):
S3().public_url_for('some/path').replace('https://', 'http://'),
'http://oclapi2-dev.s3.amazonaws.com/some/path'
)


class BlobStorageTest(TestCase):
@patch('core.services.storages.cloud.azure.BlobServiceClient')
def test_client(self, blob_service_client):
container_mock = Mock(get_container_client=Mock(return_value='container-client'))
blob_service_client.from_connection_string = Mock(return_value=container_mock)
blob_storage = BlobStorage()
self.assertEqual(blob_storage.client, 'container-client')
blob_service_client.from_connection_string.assert_called_once_with(conn_str='conn-str')
container_mock.get_container_client.assert_called_once_with('ocl-test-exports')

@patch('core.services.storages.cloud.azure.BlobServiceClient')
def test_upload_file(self, blob_service_client):
client_mock = Mock(upload_blob=Mock(return_value='success'), url='https://some-url')
container_client_mock = Mock(get_blob_client=Mock(return_value=client_mock))
container_mock = Mock(get_container_client=Mock(return_value=container_client_mock))
blob_service_client.from_connection_string = Mock(return_value=container_mock)
blob_storage = BlobStorage()

file_path = os.path.join(os.path.dirname(__file__), '../../../', 'samples/sample_ocldev.json')

result = blob_storage.upload_file(
'foo/bar/foo.json', file_path, {'content-type': 'application/zip'}, True
)

self.assertEqual(result, 'https://some-url')
container_client_mock.get_blob_client.assert_called_once_with(blob='foo/bar/foo.json')
client_mock.upload_blob.assert_called_once_with(
data=open(file_path, 'rb').read(), content_settings=ANY, overwrite=True)
self.assertEqual(
dict(client_mock.upload_blob.call_args[1]['content_settings']),
{
'content_type': 'application/octet-stream',
'content_encoding': 'zip',
'content_language': None,
'content_md5': None,
'content_disposition': None,
'cache_control': None
}
)

@patch('core.services.storages.cloud.azure.BlobServiceClient')
def test_upload_base64(self, blob_service_client):
client_mock = Mock(upload_blob=Mock(return_value='success'), url='https://some-url')
container_client_mock = Mock(get_blob_client=Mock(return_value=client_mock))
container_mock = Mock(get_container_client=Mock(return_value=container_client_mock))
blob_service_client.from_connection_string = Mock(return_value=container_mock)
blob_storage = BlobStorage()

file_content = base64.b64encode(b'file-content')

uploaded_file_name_with_ext = blob_storage.upload_base64(
doc_base64='extension/ext;base64,' + file_content.decode(),
file_name='some-file-name',
)

self.assertEqual(uploaded_file_name_with_ext, 'some-file-name.ext')
container_client_mock.get_blob_client.assert_called_once_with(blob='some-file-name.ext')
client_mock.upload_blob.assert_called_once_with(data=ANY, content_settings=ANY, overwrite=True)
self.assertEqual(
dict(client_mock.upload_blob.call_args[1]['content_settings']),
{
'content_type': 'application/octet-stream',
'content_encoding': None,
'content_language': None,
'content_md5': None,
'content_disposition': None,
'cache_control': None
}
)

@patch('core.services.storages.cloud.azure.BlobServiceClient', Mock())
def test_public_url_for(self):
blob_storage = BlobStorage()

self.assertEqual(
blob_storage.public_url_for('some/path/file.json'),
'https://ocltestaccount.blob.core.windows.net/ocl-test-exports/some/path/file.json'
)

self.assertEqual(
blob_storage.public_url_for('file.zip'),
'https://ocltestaccount.blob.core.windows.net/ocl-test-exports/file.zip'
)

@patch('core.services.storages.cloud.azure.BlobServiceClient', Mock())
def test_url_for(self):
blob_storage = BlobStorage()

self.assertEqual(
blob_storage.url_for('some/path/file.json'),
'https://ocltestaccount.blob.core.windows.net/ocl-test-exports/some/path/file.json'
)

self.assertEqual(
blob_storage.url_for('file.zip'),
'https://ocltestaccount.blob.core.windows.net/ocl-test-exports/file.zip'
)

@patch('core.services.storages.cloud.azure.BlobServiceClient')
def test_exists(self, blob_service_client):
client_mock = Mock()
client_mock.get_blob_properties.side_effect = [{'name': 'blah', 'last_modified': 'blah'}, Exception()]
container_client_mock = Mock(get_blob_client=Mock(return_value=client_mock))
container_mock = Mock(get_container_client=Mock(return_value=container_client_mock))
blob_service_client.from_connection_string = Mock(return_value=container_mock)

blob_storage = BlobStorage()

self.assertTrue(blob_storage.exists('some/path/file.zip'))
self.assertFalse(blob_storage.exists('foo.json'))
self.assertEqual(container_client_mock.get_blob_client.call_count, 2)
self.assertEqual(
container_client_mock.get_blob_client.mock_calls[0],
call(blob='some/path/file.zip')
)
self.assertEqual(
container_client_mock.get_blob_client.mock_calls[1],
call(blob='foo.json')
)
self.assertEqual(client_mock.get_blob_properties.call_count, 2)

@patch('core.services.storages.cloud.azure.BlobServiceClient')
def test_has_path(self, blob_service_client):
blob1 = Mock()
blob1.name = 'foo/bar/foobar.json'
blob2 = Mock()
blob2.name = 'foobar.json'
blobs_mock = [blob1, blob2, BlobPrefix(name='bar/foobar.json')]
container_client_mock = Mock(walk_blobs=Mock(return_value=blobs_mock))
container_mock = Mock(get_container_client=Mock(return_value=container_client_mock))
blob_service_client.from_connection_string = Mock(return_value=container_mock)

blob_storage = BlobStorage()

self.assertTrue(blob_storage.has_path('foo/bar/'))
self.assertTrue(blob_storage.has_path('foo/bar'))
self.assertFalse(blob_storage.has_path('bar/'))

self.assertEqual(container_client_mock.walk_blobs.call_count, 3)
self.assertEqual(
container_client_mock.walk_blobs.mock_calls[0], call(name_starts_with='foo/bar', delimiter='/'))
self.assertEqual(
container_client_mock.walk_blobs.mock_calls[1], call(name_starts_with='foo/bar', delimiter='/'))
self.assertEqual(
container_client_mock.walk_blobs.mock_calls[2], call(name_starts_with='bar', delimiter='/'))

@patch('core.services.storages.cloud.azure.BlobServiceClient')
def test_get_last_key_from_path(self, blob_service_client):
now = timezone.now()
blob1 = Mock(last_modified=now)
blob1.name = 'foo/bar/foobar.json'
blob2 = Mock(last_modified=now - timedelta(days=1))
blob2.name = 'foo/bar/foobar1.json'
blobs_mock = [blob1, blob2, BlobPrefix(name='foo/bar/foobar.json')]
container_client_mock = Mock(walk_blobs=Mock(return_value=blobs_mock))
container_mock = Mock(get_container_client=Mock(return_value=container_client_mock))
blob_service_client.from_connection_string = Mock(return_value=container_mock)

self.assertEqual(BlobStorage().get_last_key_from_path('foo/bar/'), 'foo/bar/foobar.json')
container_client_mock.walk_blobs.assert_called_once_with(name_starts_with='foo/bar/', delimiter='')

@patch('core.services.storages.cloud.azure.BlobServiceClient')
def test_delete_objects(self, blob_service_client):
blob1 = Mock()
blob1.name = 'foo/bar/foobar.json'
blob2 = Mock()
blob2.name = 'foo/bar/foobar1.json'
blobs_mock = [blob1, blob2, BlobPrefix(name='foo/bar/foobar2.json')]
client_mock = Mock(delete_blob=Mock())
container_client_mock = Mock(
walk_blobs=Mock(return_value=blobs_mock), get_blob_client=Mock(return_value=client_mock))
container_mock = Mock(get_container_client=Mock(return_value=container_client_mock))
blob_service_client.from_connection_string = Mock(return_value=container_mock)

self.assertEqual(BlobStorage().delete_objects('foo/bar/'), 2)
container_client_mock.walk_blobs.assert_called_once_with(name_starts_with='foo/bar/', delimiter='')
self.assertEqual(container_client_mock.get_blob_client.call_count, 2)
self.assertEqual(container_client_mock.get_blob_client.mock_calls[0], call(blob='foo/bar/foobar.json'))
self.assertEqual(container_client_mock.get_blob_client.mock_calls[1], call(blob='foo/bar/foobar1.json'))
self.assertEqual(client_mock.delete_blob.call_count, 2)

@patch('core.services.storages.cloud.azure.BlobServiceClient')
def test_remove(self, blob_service_client):
blob1 = Mock()
blob1.name = 'foo/bar/foobar.json'
blob2 = Mock()
blob2.name = 'foo/bar/foobar1.json'
client_mock = Mock(delete_blob=Mock())
container_client_mock = Mock(
get_blob_client=Mock(return_value=client_mock))
container_mock = Mock(get_container_client=Mock(return_value=container_client_mock))
blob_service_client.from_connection_string = Mock(return_value=container_mock)

BlobStorage().remove('foo/bar/foobar.json')

container_client_mock.get_blob_client.assert_called_once_with(blob='foo/bar/foobar.json')
client_mock.delete_blob.assert_called_once()
14 changes: 11 additions & 3 deletions core/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,10 +314,21 @@
AUTH_USER_MODEL = 'users.UserProfile'
TEST_RUNNER = 'core.common.tests.CustomTestRunner'
DEFAULT_LOCALE = os.environ.get('DEFAULT_LOCALE', 'en')

# AWS storage settings
AWS_ACCESS_KEY_ID = os.environ.get('AWS_ACCESS_KEY_ID', '')
AWS_SECRET_ACCESS_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY', '')
AWS_STORAGE_BUCKET_NAME = os.environ.get('AWS_STORAGE_BUCKET_NAME', 'oclapi2-dev')
AWS_REGION_NAME = os.environ.get('AWS_REGION_NAME', 'us-east-2')

# Azure storage settings
AZURE_STORAGE_ACCOUNT_NAME = os.environ.get('AZURE_STORAGE_ACCOUNT_NAME', 'ocltestaccount')
AZURE_STORAGE_CONTAINER_NAME = os.environ.get('AZURE_STORAGE_CONTAINER_NAME', 'ocl-test-exports')
AZURE_STORAGE_CONNECTION_STRING = os.environ.get('AZURE_STORAGE_CONNECTION_STRING', 'conn-str')

# Repo Export Upload/download
EXPORT_SERVICE = os.environ.get('EXPORT_SERVICE', 'core.services.storages.cloud.aws.S3')

DISABLE_VALIDATION = os.environ.get('DISABLE_VALIDATION', False)
API_SUPERUSER_PASSWORD = os.environ.get('API_SUPERUSER_PASSWORD', 'Root123') # password for ocladmin superuser
API_SUPERUSER_TOKEN = os.environ.get('API_SUPERUSER_TOKEN', '891b4b17feab99f3ff7e5b5d04ccc5da7aa96da6')
Expand Down Expand Up @@ -522,9 +533,6 @@
ERRBIT_URL = os.environ.get('ERRBIT_URL', 'http://errbit:8080')
ERRBIT_KEY = os.environ.get('ERRBIT_KEY', 'errbit-key')

# Repo Export Upload/download
EXPORT_SERVICE = os.environ.get('EXPORT_SERVICE', 'core.services.storages.cloud.aws.S3')

# Locales Repository URI
# can either be /orgs/OCL/sources/Locales/ (old-style, ISO-639-2)
# or /orgs/ISO/sources/iso639-1/ (ISO-639-1, OCL's new default)
Expand Down
3 changes: 3 additions & 0 deletions docker-compose.prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ services:
- AWS_REGION_NAME
- ERRBIT_URL
- ERRBIT_KEY
- AZURE_STORAGE_ACCOUNT_NAME
- AZURE_STORAGE_CONTAINER_NAME
- AZURE_STORAGE_CONNECTION_STRING
healthcheck:
test: "curl --silent --fail http://localhost:8000/version/ || exit 1"
celery:
Expand Down
Loading
Loading