diff --git a/annotations/forms.py b/annotations/forms.py index fc0a1b9d..cce6ee1b 100644 --- a/annotations/forms.py +++ b/annotations/forms.py @@ -169,34 +169,6 @@ def label_from_instance(self, obj): """ return obj.uri - def to_python(self, value): - if value in self.empty_values: - return None - try: - key = 'uri' - py_value = self.queryset.get(**{key: value}) - except self.queryset.model.DoesNotExist: - import goat - goat.GOAT = settings.GOAT - goat.GOAT_APP_TOKEN = settings.GOAT_APP_TOKEN - concept = goat.Concept.retrieve(identifier=value) - - data = dict( - uri=value, - label=concept.data['name'], - description=concept.data['description'], - ) - ctype_data = concept.data['concept_type']# - if ctype_data: - data.update({'typed': Type.objects.get_or_create(uri=ctype_data['identifier'])[0]}) - - py_value = Concept.objects.create(**data) - - return py_value - except (ValueError, TypeError): - raise ValidationError(self.error_messages['invalid_choice'], code='invalid_choice') - return py_value - class TemplateChoiceField(forms.ChoiceField): def label_from_instance(self, obj): diff --git a/annotations/templates/annotations/repository_ioerror.html b/annotations/templates/annotations/repository_ioerror.html index 9d81edf1..947ab73c 100644 --- a/annotations/templates/annotations/repository_ioerror.html +++ b/annotations/templates/annotations/repository_ioerror.html @@ -9,6 +9,7 @@ There was a problem communicating with the remote repository that contains this content. Please go back, and try again. If this problem persists, please contact an administrator. +
Error: {{ error }}
diff --git a/annotations/views/repository_views.py b/annotations/views/repository_views.py index 12772d72..d36bc96d 100644 --- a/annotations/views/repository_views.py +++ b/annotations/views/repository_views.py @@ -29,6 +29,8 @@ from external_accounts.decorators import citesphere_authenticated from annotations.utils import get_pagination_metadata +import traceback + def _get_params(request): # The request may include parameters that should be passed along to the # repository -- at this point, this is just for pagination. @@ -79,7 +81,14 @@ def repository_collections(request, repository_id): manager = RepositoryManager(user=request.user, repository=repository) project_id = request.GET.get('project_id') - collections = manager.groups() # Fetch collections + try: + collections = manager.groups() # Fetch collections + except CitesphereAPIError as e: + print(traceback.format_exc()) + return render(request, 'annotations/repository_ioerror.html', {'error': str(e)}, status=500) + except Exception as e: + print(traceback.format_exc()) + return render(request, 'annotations/repository_ioerror.html', {'error': 'An unexpected error occurred'}, status=500) context = { 'collections': collections, @@ -102,12 +111,16 @@ def repository_collection(request, repository_id, group_id): page = int(request.GET.get('page', 1)) try: - response_data = manager.collections(groupId=group_id) + response_data = manager.collections(group_id=group_id) group_info = response_data.get('group') collections = response_data.get('collections', []) - group_texts = manager.group_items(groupId=group_id, page=page) - except IOError: - return render(request, 'annotations/repository_ioerror.html', {}, status=500) + group_texts = manager.group_items(group_id=group_id, page=page) + except CitesphereAPIError as e: + print(traceback.format_exc()) + return render(request, 'annotations/repository_ioerror.html', {'error': str(e)}, status=500) + except Exception as e: + print(traceback.format_exc()) + return render(request, 'annotations/repository_ioerror.html', {'error': 'An unexpected error occurred'}, status=500) project_id = request.GET.get('project_id') @@ -135,7 +148,6 @@ def repository_collection(request, repository_id, group_id): return render(request, 'annotations/repository_collection.html', context) - @citesphere_authenticated def repository_browse(request, repository_id): params = _get_params(request) @@ -145,8 +157,12 @@ def repository_browse(request, repository_id): project_id = request.GET.get('project_id') try: resources = manager.list(**params) - except IOError: - return render(request, 'annotations/repository_ioerror.html', {}, status=500) + except CitesphereAPIError as e: + print(traceback.format_exc()) + return render(request, 'annotations/repository_ioerror.html', {'error': str(e)}, status=500) + except Exception as e: + print(traceback.format_exc()) + return render(request, 'annotations/repository_ioerror.html', {'error': 'An unexpected error occurred'}, status=500) base_url = reverse('repository_browse', args=(repository_id,)) base_params = {} @@ -160,7 +176,6 @@ def repository_browse(request, repository_id): 'manager': manager, 'title': 'Browse repository %s' % repository.name, 'project_id': project_id, - 'manager': manager, 'resources': resources['resources'], } previous_page, next_page = _get_pagination(resources, base_url, base_params) @@ -172,7 +187,6 @@ def repository_browse(request, repository_id): return render(request, 'annotations/repository_browse.html', context) - @citesphere_authenticated def repository_search(request, repository_id): repository = get_object_or_404(Repository, pk=repository_id) @@ -270,8 +284,12 @@ def repository_collection_texts(request, repository_id, group_id, group_collecti try: texts = manager.collection_items(group_id, group_collection_id, page=page) - except Exception as e: + except CitesphereAPIError as e: + print(traceback.format_exc()) return render(request, 'annotations/repository_ioerror.html', {'error': str(e)}, status=500) + except Exception as e: + print(traceback.format_exc()) + return render(request, 'annotations/repository_ioerror.html', {'error': 'An unexpected error occurred'}, status=500) # retrieve items per page from settings and calculate pagination metadata from util function items_per_page = settings.PAGINATION_PAGE_SIZE @@ -308,8 +326,12 @@ def repository_text_import(request, repository_id, group_id, text_key, project_i try: result = manager.item(group_id, text_key) - except IOError: - return render(request, 'annotations/repository_ioerror.html', {}, status=500) + except CitesphereAPIError as e: + print(traceback.format_exc()) + return render(request, 'annotations/repository_ioerror.html', {'error': str(e)}, status=500) + except Exception as e: + print(traceback.format_exc()) + return render(request, 'annotations/repository_ioerror.html', {'error': 'An unexpected error occurred'}, status=500) item_details = result.get('item', {}).get('details', {}) giles_text = result.get('item', {}).get('text', []) @@ -352,8 +374,12 @@ def repository_text_content(request, repository_id, text_id, content_id): try: content = manager.content(id=int(content_id)) resource = manager.resource(id=int(text_id)) - except IOError: - return render(request, 'annotations/repository_ioerror.html', {}, status=500) + except CitesphereAPIError as e: + print(traceback.format_exc()) + return render(request, 'annotations/repository_ioerror.html', {'error': str(e)}, status=500) + except Exception as e: + print(traceback.format_exc()) + return render(request, 'annotations/repository_ioerror.html', {'error': 'An unexpected error occurred'}, status=500) content_type = content.get('content_type', None) from annotations import annotators @@ -370,8 +396,10 @@ def repository_text_content(request, repository_id, text_id, content_id): if part_of_id: try: master = manager.resource(id=int(part_of_id)) - except IOError: - return render(request, 'annotations/repository_ioerror.html', {}, status=500) + except CitesphereAPIError as e: + return render(request, 'annotations/repository_ioerror.html', {'error': str(e)}, status=500) + except Exception as e: + return render(request, 'annotations/repository_ioerror.html', {'error': 'An unexpected error occurred'}, status=500) master_resource, _ = Text.objects.get_or_create(uri=master['uri'], defaults={ 'title': master.get('title'), diff --git a/giles/__init__.py b/giles/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/giles/functions.py b/giles/functions.py deleted file mode 100644 index 3ce8cb0f..00000000 --- a/giles/functions.py +++ /dev/null @@ -1,122 +0,0 @@ -from django.conf import settings -from django.core.files import File - -from annotations.models import * -from annotations.exceptions import * -from giles.models import * - -import requests, os -from collections import defaultdict - -_fix_url = lambda url: url.replace('http://', 'https://') if url is not None else None - - - -def handle_status_exception(func): - def wrapper(user, *args, **kwargs): - response = func(user, *args, **kwargs) - if response.status_code == 401: # Auth token expired. - try: - user.giles_token.delete() - except AssertionError: - pass - - get_user_auth_token(user, **kwargs) - user.refresh_from_db() - # TODO: we could put some Exception handling here. - return func(user, *args, **kwargs) - elif response.status_code != requests.codes.ok and response.status_code != 202: - message = 'Status %i, content: %s' % (response.status_code, response.content) - logger.error(message) - raise StatusException(response) - return response - return wrapper - - -def api_request(func): - def wrapper(user, *args, **kwargs): - response = func(user, *args, **kwargs) - return response.status_code, response.json() - return wrapper - - -def _create_auth_header(user, **kwargs): - provider = kwargs.get('provider', settings.GILES_DEFAULT_PROVIDER) - # token = user.social_auth.get(provider=provider).extra_data['access_token'] - token = get_user_auth_token(user) - return {'Authorization': 'token %s' % token} - - -def get_user_auth_token(user, **kwargs): - """ - Get the current auth token for a :class:`.User`\. - - If the user has no auth token, retrieve one and store it. - - Supports dependency injection. - - Parameters - ---------- - user : :class:`django.contrib.auth.User` - kwargs : kwargs - - Returns - ------- - str - Giles authorization token for ``user``. - """ - fresh = kwargs.get('fresh', False) - try: - if user.giles_token and not fresh: - return user.giles_token.token - except AttributeError: # RelatedObjectDoesNotExist. - pass # Will proceed to retrieve token. - - try: - status_code, data = get_auth_token(user, **kwargs) - try: - user.giles_token.delete() - except: - pass - - user.giles_token = GilesToken.objects.create(for_user=user, token=data["token"]) - user.save() - return user.giles_token.token - except Exception as E: - print((str(E))) - print((status_code, data)) - template = "Failed to retrieve access token for user {u}" - msg = template.format(u=user.username) - if kwargs.get('raise_exception', False): - raise E - logger.error(msg) - logger.error(str(E)) - - -# @handle_status_exception -@api_request -def get_auth_token(user, **kwargs): - """ - Obtain and store a short-lived authorization token from Giles. - - See https://diging.atlassian.net/wiki/display/GIL/REST+Authentication. - """ - giles = kwargs.get('giles', settings.GILES) - post = kwargs.get('post', settings.POST) - provider = kwargs.get('provider', settings.GILES_DEFAULT_PROVIDER) - app_token = kwargs.get('app_token', settings.GILES_APP_TOKEN) - - path = '/'.join([giles, 'rest', 'token']) - provider_token = user.social_auth.get(provider=provider)\ - .extra_data.get('access_token') - - return post(path, data={'providerToken': provider_token}, - headers={'Authorization': 'token %s' % app_token}) - - - - -def format_giles_url(url, user, dw=300): - """ - """ - return url + '&accessToken=' + get_user_auth_token(user) + '&dw=%i' % 300 diff --git a/giles/migrations/0001_initial.py b/giles/migrations/0001_initial.py deleted file mode 100644 index 6f2b7179..00000000 --- a/giles/migrations/0001_initial.py +++ /dev/null @@ -1,24 +0,0 @@ -# -*- coding: utf-8 -*- - - -from django.db import models, migrations -from django.conf import settings - - -class Migration(migrations.Migration): - - dependencies = [ - migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ] - - operations = [ - migrations.CreateModel( - name='GilesToken', - fields=[ - ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), - ('created', models.DateTimeField(auto_now_add=True)), - ('token', models.CharField(max_length=255)), - ('for_user', models.OneToOneField(related_name='giles_token', to=settings.AUTH_USER_MODEL, on_delete=models.CASCADE)), - ], - ), - ] diff --git a/giles/migrations/__init__.py b/giles/migrations/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/giles/models.py b/giles/models.py deleted file mode 100644 index d8e871f0..00000000 --- a/giles/models.py +++ /dev/null @@ -1,14 +0,0 @@ -from django.db import models, IntegrityError -from annotations.models import VogonUser as User - - -class GilesToken(models.Model): - """ - A short-lived auth token for sending content to Giles on behalf of a user. - - See https://diging.atlassian.net/wiki/display/GIL/REST+Authentication. - """ - - for_user = models.OneToOneField(User, related_name='giles_token', on_delete=models.CASCADE) - created = models.DateTimeField(auto_now_add=True) - token = models.CharField(max_length=255) diff --git a/repository/managers.py b/repository/managers.py index d94c5515..9cf5933f 100644 --- a/repository/managers.py +++ b/repository/managers.py @@ -1,51 +1,105 @@ -from django.conf import settings -from repository.restable import RESTManager -from repository import auth from external_accounts.utils import get_giles_document_details +from repository import auth +from requests.exceptions import RequestException import requests -class RepositoryManager(RESTManager): - def __init__(self, **kwargs): - self.user = kwargs.get('user') - self.repository = kwargs.get('repository') - - if self.user and self.repository: - kwargs.update({'headers': auth.citesphere_auth(self.user, self.repository)}) - - super(RepositoryManager, self).__init__(**kwargs) +class CitesphereAPIError(Exception): + """Base exception class for Citesphere API errors""" + def __init__(self, message, error_code=None, details=None): + self.message = message + self.error_code = error_code + self.details = details + super().__init__(self.message) + +class CitesphereAPIv1: + def __init__(self, user, repository): + self.user = user + self.repository = repository + self.base_url = f"{repository.endpoint}/api/v1" + + def _get_headers(self): + """Generate headers required for API requests.""" + try: + return auth.citesphere_auth(self.user, self.repository) + except Exception as e: + raise CitesphereAPIError(message="Authentication failed, please try again.", error_code="AUTH_ERROR", details=str(e)) + + def _make_request(self, endpoint, params=None): + """Helper function to handle GET requests with optional parameters.""" + url = f"{self.base_url}{endpoint}" + try: + response = requests.get(url, headers=self._get_headers(), params=params) + response.raise_for_status() + return response.json() + except RequestException as e: + raise CitesphereAPIError(message="API request failed", error_code="REQUEST_ERROR", details=str(e)) + except ValueError as e: + raise CitesphereAPIError(message="Invalid JSON response", error_code="RESPONSE_ERROR", details=str(e)) + + def get_groups(self, params=None): + """Fetch all groups with optional parameters.""" + return self._make_request("/groups/", params=params) + + def get_group_items(self, group_id, params=None): + """Make a request to fetch group items.""" + return self._make_request(f"/groups/{group_id}/items/", params=params) + + def get_group_collections(self, group_id, params=None): + """Fetch all collections within a group with optional parameters.""" + return self._make_request(f"/groups/{group_id}/collections/", params=params) + + def get_collection_items(self, group_id, collection_id, params=None): + """Fetch items in a specific collection with optional parameters.""" + return self._make_request(f"/groups/{group_id}/collections/{collection_id}/items/", params=params) + + def get_item_details(self, group_id, item_id, params=None): + """Fetch detailed information of an item with optional parameters.""" + return self._make_request(f"/groups/{group_id}/items/{item_id}/", params=params) + + +class RepositoryManager: + def __init__(self, user, repository): + """Initialize the manager with the user and repository.""" + self.api = CitesphereAPIv1(user, repository) + self.user = user + self.repository = repository def get_raw(self, target, **params): - headers = {} - if self.user and self.repository: - headers = auth.citesphere_auth(self.user, self.repository) - return requests.get(target, headers=headers, params=params).content + """Fetch raw data from any API target.""" + try: + response = requests.get(target, headers=self.api._get_headers(), params=params) + response.raise_for_status() + return response.content + except RequestException as e: + raise CitesphereAPIError(message="Failed to fetch data", error_code="RAW_DATA_ERROR", details=str(e)) def groups(self): - """Fetch Groups from the repository's endpoint""" - headers = auth.citesphere_auth(self.user, self.repository) - url = f"{self.repository.endpoint}/api/v1/groups/" - response = requests.get(url, headers=headers) - - if response.status_code == 200: - return response.json() # Return the groups data - else: - response.raise_for_status() + """Fetch all groups from the repository.""" + return self.api.get_groups() - def group_items(self, groupId, page=1): + def group_items(self, group_id, page=1): """ Fetch items from a specific group for a specific page. - """ - headers = auth.citesphere_auth(self.user, self.repository) - base_url = f"{self.repository.endpoint}/api/v1/groups/{groupId}/items/" - params = { - 'page': page, - } + Args: + group_id: The ID of the group in the repository. + page: The page number to retrieve. + + Returns: + A dictionary containing: + - "group": Details about the group. + - "items": A list of items in the group for the specified page. + - "total_items": The total number of items in the group. + + Raises: + CitesphereAPIError + """ + if not isinstance(page, int) or page < 1: + raise CitesphereAPIError(message="Invalid page number", error_code="INVALID_PAGE", details="Page must be a positive integer") - group_response = requests.get(base_url, headers=headers, params=params) - group_response.raise_for_status() + # Make the API call using CitesphereAPIv1 + response_data = self.api.get_group_items(group_id, params={'page': page}) - response_data = group_response.json() group_data = response_data.get('group', {}) items = response_data.get('items', []) total_items = group_data.get('numItems', 0) @@ -55,25 +109,18 @@ def group_items(self, groupId, page=1): "items": items, "total_items": total_items } - - def collections(self, groupId): - """Fetch collections from the repository's endpoint""" - headers = auth.citesphere_auth(self.user, self.repository) - url = f"{self.repository.endpoint}/api/v1/groups/{groupId}/collections/" - response = requests.get(url, headers=headers) - - if response.status_code == 200: - return response.json() # Return the Collections data - else: - response.raise_for_status() - def collection_items(self, groupId, collectionId, page=1): + def collections(self, group_id): + """Fetch all collections within a specific group.""" + return self.api.get_group_collections(group_id) + + def collection_items(self, group_id, collection_id, page=1): """ Fetch items from a specific collection in a group for a specific page. Args: - groupId: The ID of the group in the repository. - collectionId: The ID of the collection within the group. + group_id: The ID of the group in the repository. + collection_id: The ID of the collection within the group. page: The page number to retrieve. Returns: @@ -81,89 +128,116 @@ def collection_items(self, groupId, collectionId, page=1): - "group": Details about the group. - "items": A list of items in the specified collection for the given page. - "total_items": The total number of items in the collection. + + Raises: + CitesphereAPIError """ - headers = auth.citesphere_auth(self.user, self.repository) + if not isinstance(page, int) or page < 1: + raise CitesphereAPIError(message="Invalid page number", error_code="INVALID_PAGE", details="Page must be a positive integer") + + try: + collections_data = self.api.get_group_collections(group_id).get('collections', []) + # TODO: Once there is a collection information endpoint,this will need to be updated + total_items = next((c.get('numberOfItems', 0) for c in collections_data if c.get('key') == collection_id), 0) + # Fetch paginated items for the collection + items = self.api.get_collection_items(group_id, collection_id, params={'page': page}).get('items', []) + + return { + "group": collections_data, + "items": items, + "total_items": total_items + } - items_url = f"{self.repository.endpoint}/api/v1/groups/{groupId}/collections/{collectionId}/items/" - collections_url = f"{self.repository.endpoint}/api/v1/groups/{groupId}/collections/" + # TODO: Once there is a collection information endpoint, this will no longer be needed, this will be an Exception error + except StopIteration: + raise CitesphereAPIError(message="Collection not found", error_code="COLLECTION_NOT_FOUND", details=f"Collection {collection_id} not found in group {group_id}") - # Fetch collection details to get total items - collections_response = requests.get(collections_url, headers=headers) - collections_response.raise_for_status() - - # Extract group and total items for the collection - collections_data = collections_response.json().get('collections', []) - group_info = collections_response.json().get('group', {}) - - # TODO: Once there is a collection information endpoint,this will need to be updated - total_items = next((c.get('numberOfItems', 0) for c in collections_data if c.get('key') == collectionId), 0) + def item(self, group_id, item_id): + """ + Fetch individual item details from the repository and extract Giles document text. - # Get items for the specific page - response = requests.get(items_url, headers=headers, params={'page': page}) - response.raise_for_status() - items = response.json().get('items', []) + Args: + group_id: The group ID from which the item is fetched. + item_id: The item ID to fetch. - return { - "group": group_info, - "items": items, - "total_items": total_items + Returns: + A dictionary containing item details and Giles document text. + + Raises: + CitesphereAPIError + """ + # Fetch item details using CitesphereAPIv1 + item_data = self.api.get_item_details(group_id, item_id) + + if not item_data or 'item' not in item_data: + raise CitesphereAPIError(message="Invalid item data", error_code="INVALID_ITEM_DATA", details="Response missing item data") + + # Extract core item details + item = item_data.get('item', {}) + item_details = { + 'key': item.get('key'), + 'title': item.get('title'), + 'authors': item.get('authors', []), + 'itemType': item.get('itemType'), + 'addedOn': item.get('dateAdded', 'Unknown date'), + 'url': item.get('url') } + # Extract Giles uploads and their text if available + giles_uploads = item.get('gilesUploads', []) + item_data['item']['text'] = self._fetch_giles_text(giles_uploads) + item_data['item']['details'] = item_details - def item(self, groupId, itemId): - """ - Fetch individual item from repository's endpoint and get Giles document details for documents of type 'text/plain' + return item_data + def _fetch_giles_text(self, giles_uploads): + """ + Extract text from Giles uploads. + Args: - groupId: The group ID in the repository - itemId: The item ID in the repository - + giles_uploads: List of Giles upload objects + Returns: - A dictionary containing item details from repository, and Giles document details with extracted text + str: Extracted text content or error message + + Raises: + CitesphereAPIError """ - headers = auth.citesphere_auth(self.user, self.repository) - url = f"{self.repository.endpoint}/api/v1/groups/{groupId}/items/{itemId}/" - response = requests.get(url, headers=headers) - - if response.status_code == 200: - item_data = response.json() - - item_details = { - 'key': item_data.get('item', {}).get('key'), - 'title': item_data.get('item', {}).get('title'), - 'authors': item_data.get('item', {}).get('authors', []), - 'itemType': item_data.get('item', {}).get('itemType'), - 'addedOn': item_data.get('item', {}).get('dateAdded', 'Unknown date'), - 'url': item_data.get('item', {}).get('url') - } - - # Extract Giles upload details if available - giles_uploads = item_data.get('item', {}).get('gilesUploads', []) - - if giles_uploads: - giles_details = [] - extracted_text = giles_uploads[0].get('extractedText', {}) - - if extracted_text and extracted_text.get('content-type') == 'text/plain': - extracted_text_data = get_giles_document_details(self.user, extracted_text.get('id')) - item_data['item']['text'] = extracted_text_data - elif giles_uploads[0].get('pages'): - pages = giles_uploads[0].get('pages') - text = "" - for page in pages: - if page.get('text') and page.get('text').get('content-type') == 'text/plain': - data = get_giles_document_details(self.user, page.get('text').get('id')) - text += data - item_data['item']['text'] = text - else: - item_data['item']['text'] = "No valid text/plain content found." - else: - print("No Giles uploads available") - item_data['item']['text'] = "No Giles uploads available." - - item_data['item']['details'] = item_details - - return item_data - - else: - response.raise_for_status() + if not giles_uploads: + return "No Giles uploads available." + + try: + upload = giles_uploads[0] + text_content = "" + + # Extract plain text from Giles extracted text if available + extracted_text = upload.get('extractedText', {}) + if extracted_text and extracted_text.get('content-type') == 'text/plain': + text_content = get_giles_document_details(self.user, extracted_text['id']) + if text_content is None: + raise CitesphereAPIError(message="Failed to fetch document text from Giles, please try again later.", error_code="GILES_TEXT_ERROR", details="Failed to fetch document text from Giles") + + # Extract plain text from upload file if available + elif upload.get('uploadedFile').get('content-type') == 'text/plain' and upload.get('uploadedFile').get('id'): + text_content = get_giles_document_details(self.user, upload.get('uploadedFile')['id']) + if text_content is None: + raise CitesphereAPIError(message="Failed to fetch document text from Giles, please try again later.", error_code="GILES_UPLOAD_PLAIN_TEXT_ERROR", details=f"Failed to fetch text from plain text file {upload.get('uploadedFile')['id']}") + + # Fallback to extracting text from pages + elif 'pages' in upload: + for page in upload['pages']: + text_data = page.get('text') + if text_data and text_data.get('content-type') == 'text/plain': + page_text = get_giles_document_details(self.user, text_data['id']) + if page_text is not None: + text_content += page_text + else: + raise CitesphereAPIError(message="Failed to fetch document text from Giles, please try again later.", error_code="GILES_PAGE_ERROR", details=f"Failed to fetch text for page {page.get('number', 'unknown')}") + + return text_content or "No valid text/plain content found." + + except Exception as e: + # If the exception is already a CitesphereAPIError, re-raise it directly to preserve the original error details. + if isinstance(e, CitesphereAPIError): + raise + raise CitesphereAPIError(message="Giles text extraction has failed", error_code="GILES_EXTRACTION_ERROR", details=str(e)) diff --git a/repository/restable/__init__.py b/repository/restable/__init__.py deleted file mode 100644 index 8aba8a9a..00000000 --- a/repository/restable/__init__.py +++ /dev/null @@ -1,108 +0,0 @@ -from .util import * -from ..auth import * -from django.shortcuts import get_object_or_404 -from django.conf import settings -import requests - -from ..models import Repository - -class RESTManager(object): - """ - Simplified RESTManager for handling Citesphere groups, collections, and items. - """ - - def __init__(self, user=None, repository=None, headers=None): - """ - Initialize the RESTManager with user authentication and base URL for Citesphere API. - - Parameters - ---------- - user : User object - The user for which authentication is handled. - repository : Repository object - The repository for which the RESTManager is handling requests. - base_url : str - The base URL for the Citesphere API. - headers : dict - Additional headers to be sent with the request. - """ - self.user = user - self.repository = repository - self.base_url = repository.endpoint - self.headers = headers or {} - - def _get_headers(self): - if self.user and self.repository: - auth_headers = citesphere_auth(self.user, self.repository) - if auth_headers: - self.headers.update(auth_headers) - else: - # Handle authentication failure appropriately - raise Exception("Authentication required. Please authenticate with Citesphere.") - return self.headers - - def get(self, endpoint, params=None): - """ - Generic method for performing GET requests. - - Parameters - ---------- - endpoint : str - The endpoint to hit (appended to the base URL). - params : dict - Optional query parameters. - - Returns - ------- - JSON response or raises an HTTPError if the request fails. - """ - url = f"{self.base_url}/{endpoint}" - response = requests.get(url, headers=self._get_headers(), params=params) - - if response.status_code == 200: - return response.json() # Parse JSON if successful - else: - response.raise_for_status() - - def groups(self): - """ - Fetch groups from the Citesphere API. - - Returns - ------- - JSON response containing the groups. - """ - return self.get('v1/api/groups') - - def collections(self, group_id): - """ - Fetch collections for a specific group from the Citesphere API. - - Parameters - ---------- - group_id : int - The ID of the group for which collections are to be fetched. - - Returns - ------- - JSON response containing the collections. - """ - return self.get(f'v1/api/groups/{group_id}/collections') - - def items(self, group_id, collection_id): - """ - Fetch items for a specific collection within a group. - - Parameters - ---------- - group_id : int - The ID of the group. - collection_id : int - The ID of the collection. - - Returns - ------- - JSON response containing the items. - """ - return self.get(f'v1/api/groups/{group_id}/collections/{collection_id}/items') - diff --git a/repository/restable/util.py b/repository/restable/util.py deleted file mode 100644 index 8953061f..00000000 --- a/repository/restable/util.py +++ /dev/null @@ -1,485 +0,0 @@ -""" -Helper functions for parsing REST responses. -""" - -import re, requests, json, jsonpickle -import lxml.etree as ET -from pprint import pprint - - -class ResultList(list): - def __init__(self, *args, **kwargs): - super(ResultList, self).__init__(*args) - self.previous_page = kwargs.get('previous_page') - self.next_page = kwargs.get('next_page') - - -class JSONData(dict): - def __init__(self, obj={}): - for key, value in obj.items(): - if type(value) is list: - value = JSONArray(value) - elif type(value) is dict: - value = JSONData(value) - self[key] = value - - def get(self, key, *args, **kwargs): - return super(JSONData, self).get(key) - - -class JSONArray(list): - """ - Adds ``get`` support to a list. - """ - def __init__(self, obj=[]): - for item in obj: - if type(item) is dict: - item = JSONData(item) - self.append(item) - - def get(self, key, *args, **kwargs): - """ - Return the value of ``key`` in the first object in list. - """ - return self[0].get(key) if len(self) > 0 else None - - def get_list(self, key=None, *args, **kwargs): - """ - Return the value of ``key`` in each object in list. - """ - if key: - return [obj.get(key) for obj in self if key in obj] - return [obj for obj in self] - - -def is_multiple(tag): - """ - Detect the multi-value flag (``*``) in a path part (``tag``). - - Parameters - ---------- - tag : str - - Returns - ------- - tuple - tag name (str), multiple (bool) - """ - if not tag: - return None, None - if tag == '*': - return None, '*' - return re.match(r'([^\*]+)(\*)?', tag).groups() - - -def get_recursive_pathfinder(nsmap={}, method='find', mult_method='findall'): - """ - Generate a recursive function that follows the path in ``tags``, starting - at ``elem``. - """ - - def _get(elem, tags): - """ - Parameters - ---------- - elem : :class:`lxml.etree.Element` - tags : list - """ - if not tags: # Bottomed out; recursion stops. - return elem - - this_tag, multiple = is_multiple(tags.pop()) - base = _get(elem, tags) - - if not base: - return [] if multiple else None - - if type(base) is list: - _apply = lambda b, t, meth: [getattr(c, meth)(t, nsmap) for c in b] - else: - _apply = lambda b, t, meth: getattr(b, meth)(t, nsmap) - - - if multiple: - return _apply(base, this_tag, mult_method) - return _apply(base, this_tag, method) - return _get - - -def _to_unicode(e): - if isinstance(e, unicode): - return e - return e.decode('utf-8') - - -_etree_attribute_getter = lambda e, attr: _to_unicode(getattr(e, 'attrib', {}).get(attr, u'').strip())#.encode('utf-8') -_etree_cdata_getter = lambda e: _to_unicode(getattr(getattr(e, 'text', u''), 'strip', lambda: u'')())#.encode('utf-8') -_json_content_getter = lambda e: e - - -def content_picker_factory(env, content_getter=_etree_cdata_getter, attrib_getter=_etree_attribute_getter): - """ - Generates a function that retrives the CDATA content or attribute value of - an element. - - Parameters - ---------- - env : dict - - Returns - ------- - function - """ - attribute, sep = env.get('attribute', False), env.get('sep', None) - _separator = lambda value: [v.strip() for v in value.split(sep)] if sep else value - if attribute: - return lambda elem: _separator(attrib_getter(elem, attribute[1:-1])) - return lambda elem: _separator(content_getter(elem)) - - -def passthrough_picker_factory(env, *args, **kwargs): - """ - Generates a function that simply returns a passed - :class:`lxml.etree.Element`\. - - Parameters - ---------- - env : dict - - Returns - ------- - function - """ - return lambda e: e - - -def decompose_path(path_string): - """ - Split a path string into its constituent parts. - - Parameters - ---------- - path_string : str - - Returns - ------- - path : list - attribute : str or None - """ - if '|' in path_string: - try: - path_string, sep = path_string.split('|') - except ValueError: - raise ValueError("Malformed path: only one separator reference" - " (|) allowed.") - else: - sep = None - - path, attribute = re.match(r'([^\[]+)(\[.+\])?', path_string).groups() - if '[' in path and not attribute: - raise ValueError("Malformed path: attribute references must come at" - " the very end of the path.") - - path = path.split('/') - return path, attribute, sep - - -def _parse_path(path_string, nsmap={}, picker_factory={}, - content_getter=_etree_cdata_getter, - attrib_getter=_etree_attribute_getter, - get_method='find', mult_method='findall'): - """ - Generate a function that will retrieve data of interest from an arbitrary - object. This combines common logic from public parser functions. - - Parameters - ---------- - path_string : str - See docs for how this should be written. TODO: write the docs. - nsmap: dict - picker_factory : function - get_method : str - list_method : str - - Returns - ------- - function - """ - path, attribute, sep = decompose_path(path_string) - _get = get_recursive_pathfinder(nsmap=nsmap, method=get_method, - mult_method=mult_method) - _picker = picker_factory(locals(), content_getter=content_getter) - - def _apply(obj): # No empty values. - value = _picker(obj) - if value and (not type(value) is list or value[0]): - return value - - def _call(elem): - base = _get(elem, path) - if type(base) is list: - return [_apply(child) for child in base] - return _apply(base) - return _call - - -def parse_json_path(path_string, nsmap={}, picker_factory=content_picker_factory): - """ - Generate a function that will retrieve data of interest from a - :class:`.JSONData` object. - - Parameters - ---------- - path_string : str - See docs for how this should be written. TODO: write the docs. - nsmap: dict - Not used. - picker_factory : function - - - Returns - ------- - function - """ - return _parse_path(path_string, nsmap, picker_factory, _json_content_getter, - _json_content_getter, 'get', 'get_list') - - -def parse_xml_path(path_string, nsmap={}, picker_factory=content_picker_factory): - """ - Generate a function that will retrieve data of interest from an - :class:`lxml.etree.Element`\. - - Parameters - ---------- - path_string : str - See docs for how this should be written. TODO: write the docs. - nsmap: dict - See the ``lxml.etree`` docs. - picker_factory : function - - - Returns - ------- - function - """ - return _parse_path(path_string, nsmap, picker_factory) - - -def generate_request(config, glob={}): - """ - Generate a function that performs an HTTP request based on the configuration - in ``config``. - - Parameters - ---------- - config : dict - glob : dict - - Returns - ------- - function - Expects keyword arguments defined in the configuration. If provided, - ``headers`` will be pulled out and passed as headers in the request. - """ - try: - path_partial = config['path'] - except KeyError: - raise ValueError("Malformed configuration: no path specified.") - - method = config.get("method", "GET") # GET by default. - - # Maps accept -> send parameter names. - parameters = {param['accept']: param['send'] - for param in config.get("parameters", [])} - required = {param['accept'] for param in config.get("parameters", []) - if param.get('required', False)} - defaults = {param['accept']: param['default'] - for param in config.get("parameters", []) - if 'default' in param} - - format_keys = re.findall(r'\{([^\}]+)\}', path_partial) - fmt = {k: v for k, v in glob.items() if k in format_keys} - - def _get_path(extra={}): - fmt.update(extra) - return path_partial.format(**fmt) - - def _call(**params): - """ - Perform the configured request. - - Parameters - ---------- - params : kwargs - - Returns - ------- - - """ - headers = params.pop('headers', {}) - for param in required: - if param not in params: - raise TypeError('expected parameter %s' % param) - - # Relabel accepts -> send parameter names. - params = {parameters.get(k): v for k, v in params.items() - if k in parameters} - - extra = {key: params.pop(key, defaults.pop(key, '')) - for key in format_keys - if key not in fmt} # Don't overwrite. - - if method == 'GET': - request_method = requests.get - payload = {'params': params, 'headers': headers} - elif method == 'POST': - request_method = requests.post - payload = {'data': params, 'headers': headers} - - target = _get_path(extra) - try: - response = request_method(target, **payload) - except Exception as E: - print('request to %s failed with %s' % (target, str(payload))) - raise E - if response.status_code >= 400: - print('request to %s failed' % response.url) - raise IOError(response.content) - return response.content - return _call - - -def generate_simple_request(path, method): - def _call(**params): - """ - Perform the configured request. - - Parameters - ---------- - params : kwargs - - Returns - ------- - - """ - headers = params.pop('headers', {}) - - if method == 'GET': - request_method = requests.get - payload = {'params': params, 'headers': headers} - elif method == 'POST': - request_method = requests.post - payload = {'data': params, 'headers': headers} - response = request_method(path, **params) - if response.status_code >= 400: - raise IOError(response.content) - return response.content - return _call - - -def parse_result(config, data, path_parser=parse_xml_path, glob={}, nsmap={}): - """ - Extract data from an :class:`lxml.etree.Element` using a configuration - schema. - - Parameters - ---------- - config : dict - data : :class:`lxml.etree.Element` - path_parser : function - glob : dict - nsmap : dict - - Returns - ------- - list - """ - base_path = config.get('path', None) - _, multiple = is_multiple(base_path) - if base_path: - _parser = path_parser(base_path, nsmap=nsmap, - picker_factory=passthrough_picker_factory) - base_elems = _parser(data) - else: - base_elems = [data] - - data = ResultList() - - # Pagination. - pagination = config.get('pagination') - if pagination: - if "next" in pagination: - data.next_page = generate_simple_request(path_parser(pagination.get("next").get('path'), nsmap)(data), 'GET') - if "previous" in pagination: - data.previous_page = generate_simple_request(path_parser(pagination.get("previous").get('path'), nsmap)(data), 'GET') - - base_elems = [base_elems] if not type(base_elems) is list else base_elems - for base_elem in base_elems: - # Serialized raw data is preserved. - parsed_data = {'raw': jsonpickle.dumps(base_elem)} - - # Each parameter is parsed separately. - for parameter in config.get('parameters'): - name = parameter.get('name') - ctype = parameter.get('type') - - value = path_parser(parameter.get('path'), nsmap)(base_elem) - if ctype == 'object': - value = parse_result(parameter.get('config'), value, - path_parser=path_parser, glob=glob, - nsmap=nsmap) - - # Templated parameters use response data and globals to generate - # values (e.g. URI from ID). - template = parameter.get('template') - if template: - # Isolate only the globals needed to render the template. - format_keys = re.findall(r'\{([^\}]+)\}', template) - fmt = {k: v for k, v in glob.items() if k in format_keys} - if name in format_keys: # Probably this is always true... - fmt[name] = value - value = template.format(**fmt) - parsed_data[name] = value - data.append(parsed_data) - - if not multiple: - assert len(data) == 1 - return data[0] - return data - - -# This isn't particularly special at the moment, but makes it easier to swap -# out parsers later, or add additional logic. -def parse_raw_xml(raw): - """ - Parse raw XML response content. - - Parameters - ---------- - raw : unicode - - Returns - ------- - :class:`lxml.etree.Element` - """ - # if type(raw) is str: - # raw = raw.decode('utf-8') - return ET.fromstring(raw) - - -def parse_raw_json(raw): - """ - Parse raw JSON response content. - - Parameters - ---------- - raw : unicode - - Returns - ------- - :class:`lxml.etree.Element` - """ - if type(raw) is str: - raw = raw.decode('utf-8') - return JSONData(json.loads(raw)) diff --git a/vogon/settings.py b/vogon/settings.py index a1fdc7a6..5f06a12c 100644 --- a/vogon/settings.py +++ b/vogon/settings.py @@ -11,12 +11,8 @@ """ # Build paths inside the project like this: os.path.join(BASE_DIR, ...) -import os, sys, requests +import os from urllib.parse import urlparse -import socket -import dj_database_url -# import djcelery -from datetime import timedelta from dotenv import load_dotenv load_dotenv() @@ -50,12 +46,10 @@ 'allauth.socialaccount', 'django_inlinecss', 'concepts', - 'giles', 'annotations', 'external_accounts', 'rest_framework', 'corsheaders', - 'djcelery', 'repository', 'oauth2_provider', ) @@ -105,11 +99,6 @@ WSGI_APPLICATION = 'vogon.wsgi.application' -# Database -# https://docs.djangoproject.com/en/1.8/ref/settings/#databases - -# DATABASES = {'default': dj_database_url.config()} -# DATABASES['default']['ENGINE'] = 'django.db.backends.postgresql_psycopg2' DATABASES = { 'default': { @@ -123,14 +112,12 @@ } -# print DATABASES - AUTHENTICATION_BACKENDS = ( 'django.contrib.auth.backends.ModelBackend', # default 'allauth.account.auth_backends.AuthenticationBackend', #Allauth ) -ACCOUNT_AUTHENTICATED_LOGIN_REDIRECTS =True +ACCOUNT_AUTHENTICATED_LOGIN_REDIRECTS = True ANONYMOUS_USER_ID = -1 # Allauth Email Settings @@ -168,9 +155,6 @@ USE_X_FORWARDED_HOST = True SECURE_PROXY_SSL_HEADER = ('HTTP_X_FORWARDED_PROTO', 'https') -# Allow all host headers -ALLOWED_HOSTS = ['*'] - # Static asset configuration BASE_PATH = os.environ.get('BASE_PATH', '/') BASE_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -180,16 +164,12 @@ STATICFILES_DIRS = (os.path.join(BASE_DIR, 'static'), ) -JARS_KEY = '050814a54ac5c81b990140c3c43278031d391676' AUTH_USER_MODEL = 'annotations.VogonUser' es = urlparse(os.environ.get('SEARCHBOX_URL') or 'http://127.0.0.1:9200/') port = es.port or 80 -# AWS Access Key and Secret Key credentials -AWS_ACCESS_KEY = os.environ.get('AWS_ACCESS_KEY', None) -AWS_SECRET_KEY = os.environ.get('AWS_SECRET_KEY', None) -S3_BUCKET = 'vogonweb-test' +# User profile default image DEFAULT_USER_IMAGE = 'https://s3-us-west-2.amazonaws.com/vogonweb-test/defaultprofile.png' TEMPORAL_PREDICATES = { @@ -216,8 +196,8 @@ } } -CONCEPTPOWER_USERID = os.environ.get('CONCEPTPOWER_USERID', None) -CONCEPTPOWER_PASSWORD = os.environ.get('CONCEPTPOWER_PASSWORD', None) +CONCEPTPOWER_USERID = os.environ.get('CONCEPTPOWER_USERID') +CONCEPTPOWER_PASSWORD = os.environ.get('CONCEPTPOWER_PASSWORD') CONCEPTPOWER_ENDPOINT = os.environ.get('CONCEPTPOWER_ENDPOINT') CONCEPTPOWER_NAMESPACE = os.environ.get('CONCEPTPOWER_NAMESPACE') @@ -229,25 +209,10 @@ BASE_URI_NAMESPACE = u'http://www.vogonweb.net' -# Celery config. - -# djcelery.setup_loader() -# CELERYBEAT_SCHEDULE = { -# 'accession_ready_relationsets': { -# 'task': 'annotations.tasks.accession_ready_relationsets', -# 'schedule': timedelta(minutes=10, seconds=0), -# }, -# } - -CELERY_TIMEZONE = 'UTC' - GOOGLE_ANALYTICS_ID = os.environ.get('GOOGLE_ANALYTICS_ID', None) VERSION = '0.4' -GOAT = os.environ.get('GOAT', 'http://127.0.0.1:8000') -GOAT_APP_TOKEN = os.environ.get('GOAT_APP_TOKEN') - LOGLEVEL = os.environ.get('LOGLEVEL', 'DEBUG') @@ -270,16 +235,9 @@ 'viaf:geographic': GEOGRAPHIC_CONCEPT_TYPE, # E53 Place } -SUBMIT_WAIT_TIME = {'days': 3, 'hours': 0, 'minutes': 0} - # Giles Credentials GILES_ENDPOINT = os.environ.get('GILES_ENDPOINT') -IMAGE_AFFIXES = ['png', 'jpg', 'jpeg', 'tiff', 'tif'] -GET = requests.get -POST = requests.post -GILES_APP_TOKEN = os.environ.get('GILES_APP_TOKEN', 'nope') -GILES_DEFAULT_PROVIDER = os.environ.get('GILES_DEFAULT_PROVIDER', 'github') -MAX_GILES_UPLOADS = 20 + CONCEPT_URI_PREFIXES = [ 'http://www.digitalhps.org/',