From 8eb305ee6cd983f2d21775743bc7477d48cf653e Mon Sep 17 00:00:00 2001 From: Girik1105 Date: Thu, 17 Oct 2024 16:18:37 -0700 Subject: [PATCH 01/11] [VOGRE-9] created V1 class --- external_accounts/citesphere_api_v1.py | 38 ++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 external_accounts/citesphere_api_v1.py diff --git a/external_accounts/citesphere_api_v1.py b/external_accounts/citesphere_api_v1.py new file mode 100644 index 00000000..dc967e98 --- /dev/null +++ b/external_accounts/citesphere_api_v1.py @@ -0,0 +1,38 @@ +import requests +from repository import auth + + +class CitesphereAPIv1: + def __init__(self, user, repository): + self.user = user + self.repository = repository + self.base_url = f"{repository.endpoint}/api/v1" + + def _get_headers(self): + """Generate headers required for API requests.""" + return auth.citesphere_auth(self.user, self.repository) + + def _make_request(self, endpoint, params=None): + """Helper function to handle GET requests.""" + url = f"{self.base_url}{endpoint}" + response = requests.get(url, headers=self._get_headers(), params=params) + if response.status_code == 200: + return response.json() + else: + response.raise_for_status() + + def get_groups(self): + """Fetch all groups.""" + return self._make_request("/groups/").get('groups', []) + + def get_group_collections(self, group_id): + """Fetch all collections within a group.""" + return self._make_request(f"/groups/{group_id}/collections/").get('collections', []) + + def get_collection_items(self, group_id, collection_id): + """Fetch items in a specific collection.""" + return self._make_request(f"/groups/{group_id}/collections/{collection_id}/items/").get('items', []) + + def get_item_details(self, group_id, item_id): + """Fetch detailed information of an item.""" + return self._make_request(f"/groups/{group_id}/items/{item_id}/") From 44d56407579ef11594177bcc7341e6518ec911ce Mon Sep 17 00:00:00 2001 From: Girik1105 Date: Fri, 18 Oct 2024 13:10:30 -0700 Subject: [PATCH 02/11] [VOGRE-9] synced CitesphereAPIv1 class with repository manager --- annotations/views/repository_views.py | 2 +- external_accounts/citesphere_api_v1.py | 8 +- repository/managers.py | 145 ++++++++++--------------- 3 files changed, 62 insertions(+), 93 deletions(-) diff --git a/annotations/views/repository_views.py b/annotations/views/repository_views.py index eab54888..4eb3d542 100644 --- a/annotations/views/repository_views.py +++ b/annotations/views/repository_views.py @@ -96,7 +96,7 @@ def repository_collection(request, repository_id, group_id): manager = RepositoryManager(user=request.user, repository=repository) try: - response_data = manager.collections(groupId=group_id) + response_data = manager.collections(group_id=group_id) group_info = response_data.get('group') collections = response_data.get('collections', []) except IOError: diff --git a/external_accounts/citesphere_api_v1.py b/external_accounts/citesphere_api_v1.py index dc967e98..0f882864 100644 --- a/external_accounts/citesphere_api_v1.py +++ b/external_accounts/citesphere_api_v1.py @@ -23,16 +23,16 @@ def _make_request(self, endpoint, params=None): def get_groups(self): """Fetch all groups.""" - return self._make_request("/groups/").get('groups', []) + return self._make_request("/groups/") def get_group_collections(self, group_id): """Fetch all collections within a group.""" - return self._make_request(f"/groups/{group_id}/collections/").get('collections', []) + return self._make_request(f"/groups/{group_id}/collections/") def get_collection_items(self, group_id, collection_id): """Fetch items in a specific collection.""" - return self._make_request(f"/groups/{group_id}/collections/{collection_id}/items/").get('items', []) + return self._make_request(f"/groups/{group_id}/collections/{collection_id}/items/") def get_item_details(self, group_id, item_id): """Fetch detailed information of an item.""" - return self._make_request(f"/groups/{group_id}/items/{item_id}/") + return self._make_request(f"/groups/{group_id}/items/{item_id}/") \ No newline at end of file diff --git a/repository/managers.py b/repository/managers.py index e27e662e..2be09ffb 100644 --- a/repository/managers.py +++ b/repository/managers.py @@ -1,111 +1,80 @@ -from repository.restable import RESTManager -from repository import auth +from external_accounts.citesphere_api_v1 import CitesphereAPIv1 from external_accounts.utils import get_giles_document_details import requests -class RepositoryManager(RESTManager): - def __init__(self, **kwargs): - self.user = kwargs.get('user') - self.repository = kwargs.get('repository') - - if self.user and self.repository: - kwargs.update({'headers': auth.citesphere_auth(self.user, self.repository)}) - - super(RepositoryManager, self).__init__(**kwargs) +class RepositoryManager: + def __init__(self, user, repository): + """Initialize the manager with the user and repository.""" + self.api = CitesphereAPIv1(user, repository) + self.user = user def get_raw(self, target, **params): - headers = {} - if self.user and self.repository: - headers = auth.citesphere_auth(self.user, self.repository) - return requests.get(target, headers=headers, params=params).content + """Fetch raw data from any API target.""" + response = requests.get(target, headers=self.api._get_headers(), params=params) + response.raise_for_status() + return response.content def groups(self): - """Fetch Groups from the repository's endpoint""" - headers = auth.citesphere_auth(self.user, self.repository) - url = f"{self.repository.endpoint}/api/v1/groups/" - response = requests.get(url, headers=headers) - - if response.status_code == 200: - return response.json() # Return the groups data - else: - response.raise_for_status() + """Fetch all groups from the repository.""" + return self.api.get_groups() - def collections(self, groupId): - """Fetch collections from the repository's endpoint""" - headers = auth.citesphere_auth(self.user, self.repository) - url = f"{self.repository.endpoint}/api/v1/groups/{groupId}/collections/" - response = requests.get(url, headers=headers) - - if response.status_code == 200: - return response.json() # Return the Collections data - else: - response.raise_for_status() - - def collection_items(self, groupId, collectionId): - """Fetch collection items from the repository's endpoint""" - headers = auth.citesphere_auth(self.user, self.repository) - url = f"{self.repository.endpoint}/api/v1/groups/{groupId}/collections/{collectionId}/items/" - response = requests.get(url, headers=headers) - - if response.status_code == 200: - return response.json() - else: - response.raise_for_status() + def collections(self, group_id): + """Fetch all collections within a specific group.""" + return self.api.get_group_collections(group_id) - def item(self, groupId, itemId): + def collection_items(self, group_id, collection_id): + """Fetch items from a specific collection.""" + return self.api.get_collection_items(group_id, collection_id) + + def item(self, group_id, item_id): """ - Fetch individual item from repository's endpoint and get Giles document details for documents of type 'text/plain' + Fetch individual item details from the repository and extract Giles document text. Args: - groupId: The group ID in the repository - itemId: The item ID in the repository + group_id: The group ID from which the item is fetched. + item_id: The item ID to fetch. Returns: - A dictionary containing item details from repository, and Giles document details with extracted text + A dictionary containing item details and Giles document text. """ - headers = auth.citesphere_auth(self.user, self.repository) - url = f"{self.repository.endpoint}/api/v1/groups/{groupId}/items/{itemId}/" - response = requests.get(url, headers=headers) + # Fetch item details using CitesphereAPIv1 + item_data = self.api.get_item_details(group_id, item_id) - if response.status_code == 200: - item_data = response.json() + # Extract core item details + item_details = { + 'key': item_data.get('item', {}).get('key'), + 'title': item_data.get('item', {}).get('title'), + 'authors': item_data.get('item', {}).get('authors', []), + 'itemType': item_data.get('item', {}).get('itemType'), + 'addedOn': item_data.get('item', {}).get('dateAdded', 'Unknown date'), + 'url': item_data.get('item', {}).get('url') + } - item_details = { - 'key': item_data.get('item', {}).get('key'), - 'title': item_data.get('item', {}).get('title'), - 'authors': item_data.get('item', {}).get('authors', []), - 'itemType': item_data.get('item', {}).get('itemType'), - 'addedOn': item_data.get('item', {}).get('dateAdded', 'Unknown date'), - 'url': item_data.get('item', {}).get('url') - } + # Extract Giles uploads and their text if available + giles_uploads = item_data.get('item', {}).get('gilesUploads', []) + item_data['item']['text'] = self._fetch_giles_text(giles_uploads) + item_data['item']['details'] = item_details - # Extract Giles upload details if available - giles_uploads = item_data.get('item', {}).get('gilesUploads', []) + return item_data - if giles_uploads: - giles_details = [] - extracted_text = giles_uploads[0].get('extractedText', {}) + def _fetch_giles_text(self, giles_uploads): + """Extract text from Giles uploads.""" + if not giles_uploads: + return "No Giles uploads available." - if extracted_text and extracted_text.get('content-type') == 'text/plain': - extracted_text_data = get_giles_document_details(self.user, extracted_text.get('id')) - item_data['item']['text'] = extracted_text_data - elif giles_uploads[0].get('pages'): - pages = giles_uploads[0].get('pages') - text = "" - for page in pages: - if page.get('text') and page.get('text').get('content-type') == 'text/plain': - data = get_giles_document_details(self.user, page.get('text').get('id')) - text += data - item_data['item']['text'] = text - else: - item_data['item']['text'] = "No valid text/plain content found." - else: - print("No Giles uploads available") - item_data['item']['text'] = "No Giles uploads available." + upload = giles_uploads[0] + text_content = "" - item_data['item']['details'] = item_details + # Extract plain text if available + extracted_text = upload.get('extractedText', {}) + if extracted_text and extracted_text.get('content-type') == 'text/plain': + text_content = get_giles_document_details(self.user, extracted_text['id']) - return item_data + # Fallback to extracting text from pages + elif 'pages' in upload: + for page in upload['pages']: + text_data = page.get('text') + if text_data and text_data.get('content-type') == 'text/plain': + text_content += get_giles_document_details(self.user, text_data['id']) - else: - response.raise_for_status() + return text_content or "No valid text/plain content found." \ No newline at end of file From 716c8e40ac4a93742726274b6717e635c9904457 Mon Sep 17 00:00:00 2001 From: Girik1105 Date: Fri, 18 Oct 2024 15:02:07 -0700 Subject: [PATCH 03/11] [VOGRE-9] Removed obselete code and fixed error handling in views to ensure consistency --- annotations/views/repository_views.py | 6 +- repository/restable/__init__.py | 108 ------ repository/restable/util.py | 485 -------------------------- 3 files changed, 4 insertions(+), 595 deletions(-) delete mode 100644 repository/restable/__init__.py delete mode 100644 repository/restable/util.py diff --git a/annotations/views/repository_views.py b/annotations/views/repository_views.py index 4eb3d542..368bd572 100644 --- a/annotations/views/repository_views.py +++ b/annotations/views/repository_views.py @@ -74,7 +74,10 @@ def repository_collections(request, repository_id): manager = RepositoryManager(user=request.user, repository=repository) project_id = request.GET.get('project_id') - collections = manager.groups() # Fetch collections + try: + collections = manager.groups() # Fetch collections + except IOError: + return render(request, 'annotations/repository_ioerror.html', {}, status=500) context = { 'collections': collections, @@ -148,7 +151,6 @@ def repository_browse(request, repository_id): 'manager': manager, 'title': 'Browse repository %s' % repository.name, 'project_id': project_id, - 'manager': manager, 'resources': resources['resources'], } previous_page, next_page = _get_pagination(resources, base_url, base_params) diff --git a/repository/restable/__init__.py b/repository/restable/__init__.py deleted file mode 100644 index 8aba8a9a..00000000 --- a/repository/restable/__init__.py +++ /dev/null @@ -1,108 +0,0 @@ -from .util import * -from ..auth import * -from django.shortcuts import get_object_or_404 -from django.conf import settings -import requests - -from ..models import Repository - -class RESTManager(object): - """ - Simplified RESTManager for handling Citesphere groups, collections, and items. - """ - - def __init__(self, user=None, repository=None, headers=None): - """ - Initialize the RESTManager with user authentication and base URL for Citesphere API. - - Parameters - ---------- - user : User object - The user for which authentication is handled. - repository : Repository object - The repository for which the RESTManager is handling requests. - base_url : str - The base URL for the Citesphere API. - headers : dict - Additional headers to be sent with the request. - """ - self.user = user - self.repository = repository - self.base_url = repository.endpoint - self.headers = headers or {} - - def _get_headers(self): - if self.user and self.repository: - auth_headers = citesphere_auth(self.user, self.repository) - if auth_headers: - self.headers.update(auth_headers) - else: - # Handle authentication failure appropriately - raise Exception("Authentication required. Please authenticate with Citesphere.") - return self.headers - - def get(self, endpoint, params=None): - """ - Generic method for performing GET requests. - - Parameters - ---------- - endpoint : str - The endpoint to hit (appended to the base URL). - params : dict - Optional query parameters. - - Returns - ------- - JSON response or raises an HTTPError if the request fails. - """ - url = f"{self.base_url}/{endpoint}" - response = requests.get(url, headers=self._get_headers(), params=params) - - if response.status_code == 200: - return response.json() # Parse JSON if successful - else: - response.raise_for_status() - - def groups(self): - """ - Fetch groups from the Citesphere API. - - Returns - ------- - JSON response containing the groups. - """ - return self.get('v1/api/groups') - - def collections(self, group_id): - """ - Fetch collections for a specific group from the Citesphere API. - - Parameters - ---------- - group_id : int - The ID of the group for which collections are to be fetched. - - Returns - ------- - JSON response containing the collections. - """ - return self.get(f'v1/api/groups/{group_id}/collections') - - def items(self, group_id, collection_id): - """ - Fetch items for a specific collection within a group. - - Parameters - ---------- - group_id : int - The ID of the group. - collection_id : int - The ID of the collection. - - Returns - ------- - JSON response containing the items. - """ - return self.get(f'v1/api/groups/{group_id}/collections/{collection_id}/items') - diff --git a/repository/restable/util.py b/repository/restable/util.py deleted file mode 100644 index 8953061f..00000000 --- a/repository/restable/util.py +++ /dev/null @@ -1,485 +0,0 @@ -""" -Helper functions for parsing REST responses. -""" - -import re, requests, json, jsonpickle -import lxml.etree as ET -from pprint import pprint - - -class ResultList(list): - def __init__(self, *args, **kwargs): - super(ResultList, self).__init__(*args) - self.previous_page = kwargs.get('previous_page') - self.next_page = kwargs.get('next_page') - - -class JSONData(dict): - def __init__(self, obj={}): - for key, value in obj.items(): - if type(value) is list: - value = JSONArray(value) - elif type(value) is dict: - value = JSONData(value) - self[key] = value - - def get(self, key, *args, **kwargs): - return super(JSONData, self).get(key) - - -class JSONArray(list): - """ - Adds ``get`` support to a list. - """ - def __init__(self, obj=[]): - for item in obj: - if type(item) is dict: - item = JSONData(item) - self.append(item) - - def get(self, key, *args, **kwargs): - """ - Return the value of ``key`` in the first object in list. - """ - return self[0].get(key) if len(self) > 0 else None - - def get_list(self, key=None, *args, **kwargs): - """ - Return the value of ``key`` in each object in list. - """ - if key: - return [obj.get(key) for obj in self if key in obj] - return [obj for obj in self] - - -def is_multiple(tag): - """ - Detect the multi-value flag (``*``) in a path part (``tag``). - - Parameters - ---------- - tag : str - - Returns - ------- - tuple - tag name (str), multiple (bool) - """ - if not tag: - return None, None - if tag == '*': - return None, '*' - return re.match(r'([^\*]+)(\*)?', tag).groups() - - -def get_recursive_pathfinder(nsmap={}, method='find', mult_method='findall'): - """ - Generate a recursive function that follows the path in ``tags``, starting - at ``elem``. - """ - - def _get(elem, tags): - """ - Parameters - ---------- - elem : :class:`lxml.etree.Element` - tags : list - """ - if not tags: # Bottomed out; recursion stops. - return elem - - this_tag, multiple = is_multiple(tags.pop()) - base = _get(elem, tags) - - if not base: - return [] if multiple else None - - if type(base) is list: - _apply = lambda b, t, meth: [getattr(c, meth)(t, nsmap) for c in b] - else: - _apply = lambda b, t, meth: getattr(b, meth)(t, nsmap) - - - if multiple: - return _apply(base, this_tag, mult_method) - return _apply(base, this_tag, method) - return _get - - -def _to_unicode(e): - if isinstance(e, unicode): - return e - return e.decode('utf-8') - - -_etree_attribute_getter = lambda e, attr: _to_unicode(getattr(e, 'attrib', {}).get(attr, u'').strip())#.encode('utf-8') -_etree_cdata_getter = lambda e: _to_unicode(getattr(getattr(e, 'text', u''), 'strip', lambda: u'')())#.encode('utf-8') -_json_content_getter = lambda e: e - - -def content_picker_factory(env, content_getter=_etree_cdata_getter, attrib_getter=_etree_attribute_getter): - """ - Generates a function that retrives the CDATA content or attribute value of - an element. - - Parameters - ---------- - env : dict - - Returns - ------- - function - """ - attribute, sep = env.get('attribute', False), env.get('sep', None) - _separator = lambda value: [v.strip() for v in value.split(sep)] if sep else value - if attribute: - return lambda elem: _separator(attrib_getter(elem, attribute[1:-1])) - return lambda elem: _separator(content_getter(elem)) - - -def passthrough_picker_factory(env, *args, **kwargs): - """ - Generates a function that simply returns a passed - :class:`lxml.etree.Element`\. - - Parameters - ---------- - env : dict - - Returns - ------- - function - """ - return lambda e: e - - -def decompose_path(path_string): - """ - Split a path string into its constituent parts. - - Parameters - ---------- - path_string : str - - Returns - ------- - path : list - attribute : str or None - """ - if '|' in path_string: - try: - path_string, sep = path_string.split('|') - except ValueError: - raise ValueError("Malformed path: only one separator reference" - " (|) allowed.") - else: - sep = None - - path, attribute = re.match(r'([^\[]+)(\[.+\])?', path_string).groups() - if '[' in path and not attribute: - raise ValueError("Malformed path: attribute references must come at" - " the very end of the path.") - - path = path.split('/') - return path, attribute, sep - - -def _parse_path(path_string, nsmap={}, picker_factory={}, - content_getter=_etree_cdata_getter, - attrib_getter=_etree_attribute_getter, - get_method='find', mult_method='findall'): - """ - Generate a function that will retrieve data of interest from an arbitrary - object. This combines common logic from public parser functions. - - Parameters - ---------- - path_string : str - See docs for how this should be written. TODO: write the docs. - nsmap: dict - picker_factory : function - get_method : str - list_method : str - - Returns - ------- - function - """ - path, attribute, sep = decompose_path(path_string) - _get = get_recursive_pathfinder(nsmap=nsmap, method=get_method, - mult_method=mult_method) - _picker = picker_factory(locals(), content_getter=content_getter) - - def _apply(obj): # No empty values. - value = _picker(obj) - if value and (not type(value) is list or value[0]): - return value - - def _call(elem): - base = _get(elem, path) - if type(base) is list: - return [_apply(child) for child in base] - return _apply(base) - return _call - - -def parse_json_path(path_string, nsmap={}, picker_factory=content_picker_factory): - """ - Generate a function that will retrieve data of interest from a - :class:`.JSONData` object. - - Parameters - ---------- - path_string : str - See docs for how this should be written. TODO: write the docs. - nsmap: dict - Not used. - picker_factory : function - - - Returns - ------- - function - """ - return _parse_path(path_string, nsmap, picker_factory, _json_content_getter, - _json_content_getter, 'get', 'get_list') - - -def parse_xml_path(path_string, nsmap={}, picker_factory=content_picker_factory): - """ - Generate a function that will retrieve data of interest from an - :class:`lxml.etree.Element`\. - - Parameters - ---------- - path_string : str - See docs for how this should be written. TODO: write the docs. - nsmap: dict - See the ``lxml.etree`` docs. - picker_factory : function - - - Returns - ------- - function - """ - return _parse_path(path_string, nsmap, picker_factory) - - -def generate_request(config, glob={}): - """ - Generate a function that performs an HTTP request based on the configuration - in ``config``. - - Parameters - ---------- - config : dict - glob : dict - - Returns - ------- - function - Expects keyword arguments defined in the configuration. If provided, - ``headers`` will be pulled out and passed as headers in the request. - """ - try: - path_partial = config['path'] - except KeyError: - raise ValueError("Malformed configuration: no path specified.") - - method = config.get("method", "GET") # GET by default. - - # Maps accept -> send parameter names. - parameters = {param['accept']: param['send'] - for param in config.get("parameters", [])} - required = {param['accept'] for param in config.get("parameters", []) - if param.get('required', False)} - defaults = {param['accept']: param['default'] - for param in config.get("parameters", []) - if 'default' in param} - - format_keys = re.findall(r'\{([^\}]+)\}', path_partial) - fmt = {k: v for k, v in glob.items() if k in format_keys} - - def _get_path(extra={}): - fmt.update(extra) - return path_partial.format(**fmt) - - def _call(**params): - """ - Perform the configured request. - - Parameters - ---------- - params : kwargs - - Returns - ------- - - """ - headers = params.pop('headers', {}) - for param in required: - if param not in params: - raise TypeError('expected parameter %s' % param) - - # Relabel accepts -> send parameter names. - params = {parameters.get(k): v for k, v in params.items() - if k in parameters} - - extra = {key: params.pop(key, defaults.pop(key, '')) - for key in format_keys - if key not in fmt} # Don't overwrite. - - if method == 'GET': - request_method = requests.get - payload = {'params': params, 'headers': headers} - elif method == 'POST': - request_method = requests.post - payload = {'data': params, 'headers': headers} - - target = _get_path(extra) - try: - response = request_method(target, **payload) - except Exception as E: - print('request to %s failed with %s' % (target, str(payload))) - raise E - if response.status_code >= 400: - print('request to %s failed' % response.url) - raise IOError(response.content) - return response.content - return _call - - -def generate_simple_request(path, method): - def _call(**params): - """ - Perform the configured request. - - Parameters - ---------- - params : kwargs - - Returns - ------- - - """ - headers = params.pop('headers', {}) - - if method == 'GET': - request_method = requests.get - payload = {'params': params, 'headers': headers} - elif method == 'POST': - request_method = requests.post - payload = {'data': params, 'headers': headers} - response = request_method(path, **params) - if response.status_code >= 400: - raise IOError(response.content) - return response.content - return _call - - -def parse_result(config, data, path_parser=parse_xml_path, glob={}, nsmap={}): - """ - Extract data from an :class:`lxml.etree.Element` using a configuration - schema. - - Parameters - ---------- - config : dict - data : :class:`lxml.etree.Element` - path_parser : function - glob : dict - nsmap : dict - - Returns - ------- - list - """ - base_path = config.get('path', None) - _, multiple = is_multiple(base_path) - if base_path: - _parser = path_parser(base_path, nsmap=nsmap, - picker_factory=passthrough_picker_factory) - base_elems = _parser(data) - else: - base_elems = [data] - - data = ResultList() - - # Pagination. - pagination = config.get('pagination') - if pagination: - if "next" in pagination: - data.next_page = generate_simple_request(path_parser(pagination.get("next").get('path'), nsmap)(data), 'GET') - if "previous" in pagination: - data.previous_page = generate_simple_request(path_parser(pagination.get("previous").get('path'), nsmap)(data), 'GET') - - base_elems = [base_elems] if not type(base_elems) is list else base_elems - for base_elem in base_elems: - # Serialized raw data is preserved. - parsed_data = {'raw': jsonpickle.dumps(base_elem)} - - # Each parameter is parsed separately. - for parameter in config.get('parameters'): - name = parameter.get('name') - ctype = parameter.get('type') - - value = path_parser(parameter.get('path'), nsmap)(base_elem) - if ctype == 'object': - value = parse_result(parameter.get('config'), value, - path_parser=path_parser, glob=glob, - nsmap=nsmap) - - # Templated parameters use response data and globals to generate - # values (e.g. URI from ID). - template = parameter.get('template') - if template: - # Isolate only the globals needed to render the template. - format_keys = re.findall(r'\{([^\}]+)\}', template) - fmt = {k: v for k, v in glob.items() if k in format_keys} - if name in format_keys: # Probably this is always true... - fmt[name] = value - value = template.format(**fmt) - parsed_data[name] = value - data.append(parsed_data) - - if not multiple: - assert len(data) == 1 - return data[0] - return data - - -# This isn't particularly special at the moment, but makes it easier to swap -# out parsers later, or add additional logic. -def parse_raw_xml(raw): - """ - Parse raw XML response content. - - Parameters - ---------- - raw : unicode - - Returns - ------- - :class:`lxml.etree.Element` - """ - # if type(raw) is str: - # raw = raw.decode('utf-8') - return ET.fromstring(raw) - - -def parse_raw_json(raw): - """ - Parse raw JSON response content. - - Parameters - ---------- - raw : unicode - - Returns - ------- - :class:`lxml.etree.Element` - """ - if type(raw) is str: - raw = raw.decode('utf-8') - return JSONData(json.loads(raw)) From 18682a77ddeaac9264120a3d3784618a7a52607f Mon Sep 17 00:00:00 2001 From: Girik1105 Date: Thu, 14 Nov 2024 11:16:54 -0700 Subject: [PATCH 04/11] [VOGRE-9] fixed group_id in repository_views, added class in managers, removed extra file --- annotations/views/repository_views.py | 2 +- external_accounts/citesphere_api_v1.py | 35 ------------------------- repository/managers.py | 36 +++++++++++++++++++++++++- 3 files changed, 36 insertions(+), 37 deletions(-) delete mode 100644 external_accounts/citesphere_api_v1.py diff --git a/annotations/views/repository_views.py b/annotations/views/repository_views.py index 8cbf4b02..8c34921b 100644 --- a/annotations/views/repository_views.py +++ b/annotations/views/repository_views.py @@ -106,7 +106,7 @@ def repository_collection(request, repository_id, group_id): response_data = manager.collections(group_id=group_id) group_info = response_data.get('group') collections = response_data.get('collections', []) - group_texts = manager.group_items(groupId=group_id, page=page) + group_texts = manager.group_items(group_id=group_id, page=page) except IOError: return render(request, 'annotations/repository_ioerror.html', {}, status=500) diff --git a/external_accounts/citesphere_api_v1.py b/external_accounts/citesphere_api_v1.py deleted file mode 100644 index 39fb7005..00000000 --- a/external_accounts/citesphere_api_v1.py +++ /dev/null @@ -1,35 +0,0 @@ -import requests -from repository import auth - -class CitesphereAPIv1: - def __init__(self, user, repository): - self.user = user - self.repository = repository - self.base_url = f"{repository.endpoint}/api/v1" - - def _get_headers(self): - """Generate headers required for API requests.""" - return auth.citesphere_auth(self.user, self.repository) - - def _make_request(self, endpoint, params=None): - """Helper function to handle GET requests with optional parameters.""" - url = f"{self.base_url}{endpoint}" - response = requests.get(url, headers=self._get_headers(), params=params) - response.raise_for_status() - return response.json() - - def get_groups(self, params=None): - """Fetch all groups with optional parameters.""" - return self._make_request("/groups/", params=params) - - def get_group_collections(self, group_id, params=None): - """Fetch all collections within a group with optional parameters.""" - return self._make_request(f"/groups/{group_id}/collections/", params=params) - - def get_collection_items(self, group_id, collection_id, params=None): - """Fetch items in a specific collection with optional parameters.""" - return self._make_request(f"/groups/{group_id}/collections/{collection_id}/items/", params=params) - - def get_item_details(self, group_id, item_id, params=None): - """Fetch detailed information of an item with optional parameters.""" - return self._make_request(f"/groups/{group_id}/items/{item_id}/", params=params) diff --git a/repository/managers.py b/repository/managers.py index ee7723d5..2e4cb2a4 100644 --- a/repository/managers.py +++ b/repository/managers.py @@ -1,7 +1,41 @@ -from external_accounts.citesphere_api_v1 import CitesphereAPIv1 from external_accounts.utils import get_giles_document_details +from repository import auth import requests +class CitesphereAPIv1: + def __init__(self, user, repository): + self.user = user + self.repository = repository + self.base_url = f"{repository.endpoint}/api/v1" + + def _get_headers(self): + """Generate headers required for API requests.""" + return auth.citesphere_auth(self.user, self.repository) + + def _make_request(self, endpoint, params=None): + """Helper function to handle GET requests with optional parameters.""" + url = f"{self.base_url}{endpoint}" + response = requests.get(url, headers=self._get_headers(), params=params) + response.raise_for_status() + return response.json() + + def get_groups(self, params=None): + """Fetch all groups with optional parameters.""" + return self._make_request("/groups/", params=params) + + def get_group_collections(self, group_id, params=None): + """Fetch all collections within a group with optional parameters.""" + return self._make_request(f"/groups/{group_id}/collections/", params=params) + + def get_collection_items(self, group_id, collection_id, params=None): + """Fetch items in a specific collection with optional parameters.""" + return self._make_request(f"/groups/{group_id}/collections/{collection_id}/items/", params=params) + + def get_item_details(self, group_id, item_id, params=None): + """Fetch detailed information of an item with optional parameters.""" + return self._make_request(f"/groups/{group_id}/items/{item_id}/", params=params) + + class RepositoryManager: def __init__(self, user, repository): """Initialize the manager with the user and repository.""" From 5c129b3287e99fc4f95df6ace7df1ff9267914ae Mon Sep 17 00:00:00 2001 From: Girik1105 Date: Thu, 14 Nov 2024 12:34:24 -0700 Subject: [PATCH 05/11] [VOGRE-9] Improved error handling and reporting --- .../annotations/repository_ioerror.html | 1 + annotations/views/repository_views.py | 45 ++-- repository/managers.py | 198 ++++++++++++++---- 3 files changed, 184 insertions(+), 60 deletions(-) diff --git a/annotations/templates/annotations/repository_ioerror.html b/annotations/templates/annotations/repository_ioerror.html index 9d81edf1..947ab73c 100644 --- a/annotations/templates/annotations/repository_ioerror.html +++ b/annotations/templates/annotations/repository_ioerror.html @@ -9,6 +9,7 @@ There was a problem communicating with the remote repository that contains this content. Please go back, and try again. If this problem persists, please contact an administrator. +

Error: {{ error }}

diff --git a/annotations/views/repository_views.py b/annotations/views/repository_views.py index 8c34921b..45d9784a 100644 --- a/annotations/views/repository_views.py +++ b/annotations/views/repository_views.py @@ -79,8 +79,10 @@ def repository_collections(request, repository_id): try: collections = manager.groups() # Fetch collections - except IOError: - return render(request, 'annotations/repository_ioerror.html', {}, status=500) + except CitesphereAPIError as e: + return render(request, 'annotations/repository_ioerror.html', {'error': str(e)}, status=500) + except Exception as e: + return render(request, 'annotations/repository_ioerror.html', {'error': 'An unexpected error occurred'}, status=500) context = { 'collections': collections, @@ -106,9 +108,12 @@ def repository_collection(request, repository_id, group_id): response_data = manager.collections(group_id=group_id) group_info = response_data.get('group') collections = response_data.get('collections', []) - group_texts = manager.group_items(group_id=group_id, page=page) - except IOError: - return render(request, 'annotations/repository_ioerror.html', {}, status=500) + group_texts = manager.group_items(group_id=group_id, page=page) + except CitesphereAPIError as e: + return render(request, 'annotations/repository_ioerror.html', {'error': str(e)}, status=500) + except Exception as e: + print(e) + return render(request, 'annotations/repository_ioerror.html', {'error': 'An unexpected error occurred'}, status=500) project_id = request.GET.get('project_id') @@ -136,7 +141,6 @@ def repository_collection(request, repository_id, group_id): return render(request, 'annotations/repository_collection.html', context) - @citesphere_authenticated def repository_browse(request, repository_id): params = _get_params(request) @@ -146,8 +150,10 @@ def repository_browse(request, repository_id): project_id = request.GET.get('project_id') try: resources = manager.list(**params) - except IOError: - return render(request, 'annotations/repository_ioerror.html', {}, status=500) + except CitesphereAPIError as e: + return render(request, 'annotations/repository_ioerror.html', {'error': str(e)}, status=500) + except Exception as e: + return render(request, 'annotations/repository_ioerror.html', {'error': 'An unexpected error occurred'}, status=500) base_url = reverse('repository_browse', args=(repository_id,)) base_params = {} @@ -172,7 +178,6 @@ def repository_browse(request, repository_id): return render(request, 'annotations/repository_browse.html', context) - @citesphere_authenticated def repository_search(request, repository_id): repository = get_object_or_404(Repository, pk=repository_id) @@ -249,8 +254,10 @@ def repository_collection_texts(request, repository_id, group_id, group_collecti try: texts = manager.collection_items(group_id, group_collection_id, page=page) - except Exception as e: + except CitesphereAPIError as e: return render(request, 'annotations/repository_ioerror.html', {'error': str(e)}, status=500) + except Exception as e: + return render(request, 'annotations/repository_ioerror.html', {'error': 'An unexpected error occurred'}, status=500) # retrieve items per page from settings and calculate pagination metadata from util function items_per_page = settings.PAGINATION_PAGE_SIZE @@ -280,8 +287,10 @@ def repository_text_import(request, repository_id, group_id, text_key): try: result = manager.item(group_id, text_key) - except IOError: - return render(request, 'annotations/repository_ioerror.html', {}, status=500) + except CitesphereAPIError as e: + return render(request, 'annotations/repository_ioerror.html', {'error': str(e)}, status=500) + except Exception as e: + return render(request, 'annotations/repository_ioerror.html', {'error': 'An unexpected error occurred'}, status=500) # Extracting item details and Giles details from the result item_details = result.get('item', {}).get('details', {}) @@ -330,8 +339,10 @@ def repository_text_content(request, repository_id, text_id, content_id): try: content = manager.content(id=int(content_id)) resource = manager.resource(id=int(text_id)) - except IOError: - return render(request, 'annotations/repository_ioerror.html', {}, status=500) + except CitesphereAPIError as e: + return render(request, 'annotations/repository_ioerror.html', {'error': str(e)}, status=500) + except Exception as e: + return render(request, 'annotations/repository_ioerror.html', {'error': 'An unexpected error occurred'}, status=500) content_type = content.get('content_type', None) from annotations import annotators @@ -348,8 +359,10 @@ def repository_text_content(request, repository_id, text_id, content_id): if part_of_id: try: master = manager.resource(id=int(part_of_id)) - except IOError: - return render(request, 'annotations/repository_ioerror.html', {}, status=500) + except CitesphereAPIError as e: + return render(request, 'annotations/repository_ioerror.html', {'error': str(e)}, status=500) + except Exception as e: + return render(request, 'annotations/repository_ioerror.html', {'error': 'An unexpected error occurred'}, status=500) master_resource, _ = Text.objects.get_or_create(uri=master['uri'], defaults={ 'title': master.get('title'), diff --git a/repository/managers.py b/repository/managers.py index 2e4cb2a4..d5f1377d 100644 --- a/repository/managers.py +++ b/repository/managers.py @@ -1,7 +1,16 @@ from external_accounts.utils import get_giles_document_details from repository import auth +from requests.exceptions import RequestException import requests +class CitesphereAPIError(Exception): + """Base exception class for Citesphere API errors""" + def __init__(self, message, error_code=None, details=None): + self.message = message + self.error_code = error_code + self.details = details + super().__init__(self.message) + class CitesphereAPIv1: def __init__(self, user, repository): self.user = user @@ -10,14 +19,34 @@ def __init__(self, user, repository): def _get_headers(self): """Generate headers required for API requests.""" - return auth.citesphere_auth(self.user, self.repository) + try: + return auth.citesphere_auth(self.user, self.repository) + except Exception as e: + raise CitesphereAPIError( + message="Authentication failed, please try again.", + error_code="AUTH_ERROR", + details=str(e) + ) def _make_request(self, endpoint, params=None): - """Helper function to handle GET requests with optional parameters.""" + """Helper function to handle GET requests with optional parameters.""" url = f"{self.base_url}{endpoint}" - response = requests.get(url, headers=self._get_headers(), params=params) - response.raise_for_status() - return response.json() + try: + response = requests.get(url, headers=self._get_headers(), params=params) + response.raise_for_status() + return response.json() + except RequestException as e: + raise CitesphereAPIError( + message="API request failed", + error_code="REQUEST_ERROR", + details=str(e) + ) + except ValueError as e: + raise CitesphereAPIError( + message="Invalid JSON response", + error_code="RESPONSE_ERROR", + details=str(e) + ) def get_groups(self, params=None): """Fetch all groups with optional parameters.""" @@ -45,9 +74,16 @@ def __init__(self, user, repository): def get_raw(self, target, **params): """Fetch raw data from any API target.""" - response = requests.get(target, headers=self.api._get_headers(), params=params) - response.raise_for_status() - return response.content + try: + response = requests.get(target, headers=self.api._get_headers(), params=params) + response.raise_for_status() + return response.content + except RequestException as e: + raise CitesphereAPIError( + message="Failed to fetch data", + error_code="RAW_DATA_ERROR", + details=str(e) + ) def groups(self): """Fetch all groups from the repository.""" @@ -66,7 +102,17 @@ def group_items(self, group_id, page=1): - "group": Details about the group. - "items": A list of items in the group for the specified page. - "total_items": The total number of items in the group. + + Raises: + CitesphereAPIError """ + if not isinstance(page, int) or page < 1: + raise CitesphereAPIError( + message="Invalid page number", + error_code="INVALID_PAGE", + details="Page must be a positive integer" + ) + response_data = self.api._make_request(f"/groups/{group_id}/items/", params={'page': page}) group_data = response_data.get('group', {}) items = response_data.get('items', []) @@ -96,19 +142,37 @@ def collection_items(self, group_id, collection_id, page=1): - "group": Details about the group. - "items": A list of items in the specified collection for the given page. - "total_items": The total number of items in the collection. + + Raises: + CitesphereAPIError """ - # Fetch collection details to get total items count - collections_data = self.api.get_group_collections(group_id).get('collections', []) - total_items = next((c.get('numberOfItems', 0) for c in collections_data if c.get('key') == collection_id), 0) - - # Fetch paginated items for the collection - items = self.api.get_collection_items(group_id, collection_id, params={'page': page}).get('items', []) - - return { - "group": collections_data, - "items": items, - "total_items": total_items - } + if not isinstance(page, int) or page < 1: + raise CitesphereAPIError( + message="Invalid page number", + error_code="INVALID_PAGE", + details="Page must be a positive integer" + ) + + try: + collections_data = self.api.get_group_collections(group_id).get('collections', []) + # TODO: Once there is a collection information endpoint,this will need to be updated + total_items = next((c.get('numberOfItems', 0) for c in collections_data if c.get('key') == collection_id), 0) + # Fetch paginated items for the collection + items = self.api.get_collection_items(group_id, collection_id, params={'page': page}).get('items', []) + + return { + "group": collections_data, + "items": items, + "total_items": total_items + } + + # TODO: Once there is a collection information endpoint, this will no longer be needed, this will be an Exception error + except StopIteration: + raise CitesphereAPIError( + message="Collection not found", + error_code="COLLECTION_NOT_FOUND", + details=f"Collection {collection_id} not found in group {group_id}" + ) def item(self, group_id, item_id): """ @@ -120,45 +184,91 @@ def item(self, group_id, item_id): Returns: A dictionary containing item details and Giles document text. + + Raises: + CitesphereAPIError """ # Fetch item details using CitesphereAPIv1 item_data = self.api.get_item_details(group_id, item_id) + + if not item_data or 'item' not in item_data: + raise CitesphereAPIError( + message="Invalid item data", + error_code="INVALID_ITEM_DATA", + details="Response missing item data" + ) # Extract core item details + item = item_data.get('item', {}) item_details = { - 'key': item_data.get('item', {}).get('key'), - 'title': item_data.get('item', {}).get('title'), - 'authors': item_data.get('item', {}).get('authors', []), - 'itemType': item_data.get('item', {}).get('itemType'), - 'addedOn': item_data.get('item', {}).get('dateAdded', 'Unknown date'), - 'url': item_data.get('item', {}).get('url') + 'key': item.get('key'), + 'title': item.get('title'), + 'authors': item.get('authors', []), + 'itemType': item.get('itemType'), + 'addedOn': item.get('dateAdded', 'Unknown date'), + 'url': item.get('url') } # Extract Giles uploads and their text if available - giles_uploads = item_data.get('item', {}).get('gilesUploads', []) + giles_uploads = item.get('gilesUploads', []) item_data['item']['text'] = self._fetch_giles_text(giles_uploads) item_data['item']['details'] = item_details return item_data def _fetch_giles_text(self, giles_uploads): - """Extract text from Giles uploads.""" + """ + Extract text from Giles uploads. + + Args: + giles_uploads: List of Giles upload objects + + Returns: + str: Extracted text content or error message + + Raises: + CitesphereAPIError + """ if not giles_uploads: return "No Giles uploads available." - upload = giles_uploads[0] - text_content = "" - - # Extract plain text if available - extracted_text = upload.get('extractedText', {}) - if extracted_text and extracted_text.get('content-type') == 'text/plain': - text_content = get_giles_document_details(self.user, extracted_text['id']) - - # Fallback to extracting text from pages - elif 'pages' in upload: - for page in upload['pages']: - text_data = page.get('text') - if text_data and text_data.get('content-type') == 'text/plain': - text_content += get_giles_document_details(self.user, text_data['id']) - - return text_content or "No valid text/plain content found." + try: + upload = giles_uploads[0] + text_content = "" + + # Extract plain text if available + extracted_text = upload.get('extractedText', {}) + if extracted_text and extracted_text.get('content-type') == 'text/plain': + text_content = get_giles_document_details(self.user, extracted_text['id']) + if text_content is None: + raise CitesphereAPIError( + message="Failed to fetch document text from Giles, please try again later.", + error_code="GILES_TEXT_ERROR", + details="Failed to fetch document text from Giles" + ) + + # Fallback to extracting text from pages + elif 'pages' in upload: + for page in upload['pages']: + text_data = page.get('text') + if text_data and text_data.get('content-type') == 'text/plain': + page_text = get_giles_document_details(self.user, text_data['id']) + if page_text is not None: + text_content += page_text + else: + raise CitesphereAPIError( + message="Page text fetch failed", + error_code="GILES_PAGE_ERROR", + details=f"Failed to fetch text for page {page.get('number', 'unknown')}" + ) + + return text_content or "No valid text/plain content found." + + except Exception as e: + if isinstance(e, CitesphereAPIError): + raise + raise CitesphereAPIError( + message="Giles text extraction failed", + error_code="GILES_EXTRACTION_ERROR", + details=str(e) + ) From 5f9f715d3236195b50df8cbfbed95689e135ad5c Mon Sep 17 00:00:00 2001 From: Girik1105 Date: Thu, 14 Nov 2024 12:38:38 -0700 Subject: [PATCH 06/11] [VOGRE-9] made code prettier --- repository/managers.py | 68 ++++++++---------------------------------- 1 file changed, 12 insertions(+), 56 deletions(-) diff --git a/repository/managers.py b/repository/managers.py index d5f1377d..54499a69 100644 --- a/repository/managers.py +++ b/repository/managers.py @@ -7,7 +7,7 @@ class CitesphereAPIError(Exception): """Base exception class for Citesphere API errors""" def __init__(self, message, error_code=None, details=None): self.message = message - self.error_code = error_code + self.error_code = error_code self.details = details super().__init__(self.message) @@ -22,11 +22,7 @@ def _get_headers(self): try: return auth.citesphere_auth(self.user, self.repository) except Exception as e: - raise CitesphereAPIError( - message="Authentication failed, please try again.", - error_code="AUTH_ERROR", - details=str(e) - ) + raise CitesphereAPIError(message="Authentication failed, please try again.", error_code="AUTH_ERROR", details=str(e)) def _make_request(self, endpoint, params=None): """Helper function to handle GET requests with optional parameters.""" @@ -36,17 +32,9 @@ def _make_request(self, endpoint, params=None): response.raise_for_status() return response.json() except RequestException as e: - raise CitesphereAPIError( - message="API request failed", - error_code="REQUEST_ERROR", - details=str(e) - ) + raise CitesphereAPIError(message="API request failed", error_code="REQUEST_ERROR", details=str(e)) except ValueError as e: - raise CitesphereAPIError( - message="Invalid JSON response", - error_code="RESPONSE_ERROR", - details=str(e) - ) + raise CitesphereAPIError(message="Invalid JSON response", error_code="RESPONSE_ERROR", details=str(e)) def get_groups(self, params=None): """Fetch all groups with optional parameters.""" @@ -79,11 +67,7 @@ def get_raw(self, target, **params): response.raise_for_status() return response.content except RequestException as e: - raise CitesphereAPIError( - message="Failed to fetch data", - error_code="RAW_DATA_ERROR", - details=str(e) - ) + raise CitesphereAPIError(message="Failed to fetch data", error_code="RAW_DATA_ERROR", details=str(e)) def groups(self): """Fetch all groups from the repository.""" @@ -107,11 +91,7 @@ def group_items(self, group_id, page=1): CitesphereAPIError """ if not isinstance(page, int) or page < 1: - raise CitesphereAPIError( - message="Invalid page number", - error_code="INVALID_PAGE", - details="Page must be a positive integer" - ) + raise CitesphereAPIError(message="Invalid page number", error_code="INVALID_PAGE", details="Page must be a positive integer") response_data = self.api._make_request(f"/groups/{group_id}/items/", params={'page': page}) group_data = response_data.get('group', {}) @@ -147,11 +127,7 @@ def collection_items(self, group_id, collection_id, page=1): CitesphereAPIError """ if not isinstance(page, int) or page < 1: - raise CitesphereAPIError( - message="Invalid page number", - error_code="INVALID_PAGE", - details="Page must be a positive integer" - ) + raise CitesphereAPIError(message="Invalid page number", error_code="INVALID_PAGE", details="Page must be a positive integer") try: collections_data = self.api.get_group_collections(group_id).get('collections', []) @@ -168,11 +144,7 @@ def collection_items(self, group_id, collection_id, page=1): # TODO: Once there is a collection information endpoint, this will no longer be needed, this will be an Exception error except StopIteration: - raise CitesphereAPIError( - message="Collection not found", - error_code="COLLECTION_NOT_FOUND", - details=f"Collection {collection_id} not found in group {group_id}" - ) + raise CitesphereAPIError(message="Collection not found", error_code="COLLECTION_NOT_FOUND", details=f"Collection {collection_id} not found in group {group_id}") def item(self, group_id, item_id): """ @@ -192,11 +164,7 @@ def item(self, group_id, item_id): item_data = self.api.get_item_details(group_id, item_id) if not item_data or 'item' not in item_data: - raise CitesphereAPIError( - message="Invalid item data", - error_code="INVALID_ITEM_DATA", - details="Response missing item data" - ) + raise CitesphereAPIError(message="Invalid item data", error_code="INVALID_ITEM_DATA", details="Response missing item data") # Extract core item details item = item_data.get('item', {}) @@ -241,11 +209,7 @@ def _fetch_giles_text(self, giles_uploads): if extracted_text and extracted_text.get('content-type') == 'text/plain': text_content = get_giles_document_details(self.user, extracted_text['id']) if text_content is None: - raise CitesphereAPIError( - message="Failed to fetch document text from Giles, please try again later.", - error_code="GILES_TEXT_ERROR", - details="Failed to fetch document text from Giles" - ) + raise CitesphereAPIError(message="Failed to fetch document text from Giles, please try again later.", error_code="GILES_TEXT_ERROR", details="Failed to fetch document text from Giles") # Fallback to extracting text from pages elif 'pages' in upload: @@ -256,19 +220,11 @@ def _fetch_giles_text(self, giles_uploads): if page_text is not None: text_content += page_text else: - raise CitesphereAPIError( - message="Page text fetch failed", - error_code="GILES_PAGE_ERROR", - details=f"Failed to fetch text for page {page.get('number', 'unknown')}" - ) + raise CitesphereAPIError(message="Page text fetch failed", error_code="GILES_PAGE_ERROR", details=f"Failed to fetch text for page {page.get('number', 'unknown')}") return text_content or "No valid text/plain content found." except Exception as e: if isinstance(e, CitesphereAPIError): raise - raise CitesphereAPIError( - message="Giles text extraction failed", - error_code="GILES_EXTRACTION_ERROR", - details=str(e) - ) + raise CitesphereAPIError(message="Giles text extraction failed", error_code="GILES_EXTRACTION_ERROR", details=str(e)) From 2198afacb91af079f018ced9aa5fa4b4cf130129 Mon Sep 17 00:00:00 2001 From: Girik1105 Date: Thu, 14 Nov 2024 12:49:06 -0700 Subject: [PATCH 07/11] [VOGRE-9] Better error messages and comments --- repository/managers.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/repository/managers.py b/repository/managers.py index 54499a69..e8837cbf 100644 --- a/repository/managers.py +++ b/repository/managers.py @@ -220,11 +220,12 @@ def _fetch_giles_text(self, giles_uploads): if page_text is not None: text_content += page_text else: - raise CitesphereAPIError(message="Page text fetch failed", error_code="GILES_PAGE_ERROR", details=f"Failed to fetch text for page {page.get('number', 'unknown')}") + raise CitesphereAPIError(message="Failed to fetch document text from Giles, please try again later.", error_code="GILES_PAGE_ERROR", details=f"Failed to fetch text for page {page.get('number', 'unknown')}") return text_content or "No valid text/plain content found." except Exception as e: + # If the exception is already a CitesphereAPIError, re-raise it directly to preserve the original error details. if isinstance(e, CitesphereAPIError): raise - raise CitesphereAPIError(message="Giles text extraction failed", error_code="GILES_EXTRACTION_ERROR", details=str(e)) + raise CitesphereAPIError(message="Giles text extraction has failed", error_code="GILES_EXTRACTION_ERROR", details=str(e)) From db69daf8a7686a378259038f66cd8ded0aca4325 Mon Sep 17 00:00:00 2001 From: Girik1105 Date: Thu, 5 Dec 2024 11:31:52 -0700 Subject: [PATCH 08/11] [VOGRE-9] Added traceback --- annotations/views/repository_views.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/annotations/views/repository_views.py b/annotations/views/repository_views.py index 45d9784a..1c1563bd 100644 --- a/annotations/views/repository_views.py +++ b/annotations/views/repository_views.py @@ -27,6 +27,8 @@ from external_accounts.decorators import citesphere_authenticated from annotations.utils import get_pagination_metadata +import traceback + def _get_params(request): # The request may include parameters that should be passed along to the # repository -- at this point, this is just for pagination. @@ -80,8 +82,10 @@ def repository_collections(request, repository_id): try: collections = manager.groups() # Fetch collections except CitesphereAPIError as e: + print(traceback.format_exc()) return render(request, 'annotations/repository_ioerror.html', {'error': str(e)}, status=500) except Exception as e: + print(traceback.format_exc()) return render(request, 'annotations/repository_ioerror.html', {'error': 'An unexpected error occurred'}, status=500) context = { @@ -110,9 +114,10 @@ def repository_collection(request, repository_id, group_id): collections = response_data.get('collections', []) group_texts = manager.group_items(group_id=group_id, page=page) except CitesphereAPIError as e: + print(traceback.format_exc()) return render(request, 'annotations/repository_ioerror.html', {'error': str(e)}, status=500) except Exception as e: - print(e) + print(traceback.format_exc()) return render(request, 'annotations/repository_ioerror.html', {'error': 'An unexpected error occurred'}, status=500) project_id = request.GET.get('project_id') @@ -151,8 +156,10 @@ def repository_browse(request, repository_id): try: resources = manager.list(**params) except CitesphereAPIError as e: + print(traceback.format_exc()) return render(request, 'annotations/repository_ioerror.html', {'error': str(e)}, status=500) except Exception as e: + print(traceback.format_exc()) return render(request, 'annotations/repository_ioerror.html', {'error': 'An unexpected error occurred'}, status=500) base_url = reverse('repository_browse', args=(repository_id,)) @@ -255,8 +262,10 @@ def repository_collection_texts(request, repository_id, group_id, group_collecti try: texts = manager.collection_items(group_id, group_collection_id, page=page) except CitesphereAPIError as e: + print(traceback.format_exc()) return render(request, 'annotations/repository_ioerror.html', {'error': str(e)}, status=500) except Exception as e: + print(traceback.format_exc()) return render(request, 'annotations/repository_ioerror.html', {'error': 'An unexpected error occurred'}, status=500) # retrieve items per page from settings and calculate pagination metadata from util function @@ -288,8 +297,10 @@ def repository_text_import(request, repository_id, group_id, text_key): try: result = manager.item(group_id, text_key) except CitesphereAPIError as e: + print(traceback.format_exc()) return render(request, 'annotations/repository_ioerror.html', {'error': str(e)}, status=500) except Exception as e: + print(traceback.format_exc()) return render(request, 'annotations/repository_ioerror.html', {'error': 'An unexpected error occurred'}, status=500) # Extracting item details and Giles details from the result @@ -340,8 +351,10 @@ def repository_text_content(request, repository_id, text_id, content_id): content = manager.content(id=int(content_id)) resource = manager.resource(id=int(text_id)) except CitesphereAPIError as e: + print(traceback.format_exc()) return render(request, 'annotations/repository_ioerror.html', {'error': str(e)}, status=500) except Exception as e: + print(traceback.format_exc()) return render(request, 'annotations/repository_ioerror.html', {'error': 'An unexpected error occurred'}, status=500) content_type = content.get('content_type', None) From 98a3b7a228985f403eb6e838c7e376a0eb9c6548 Mon Sep 17 00:00:00 2001 From: Girik1105 Date: Thu, 5 Dec 2024 11:59:43 -0700 Subject: [PATCH 09/11] [VOGRE-9] Added group items in Citesphere API and updated manager --- repository/managers.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/repository/managers.py b/repository/managers.py index e8837cbf..870bc0a8 100644 --- a/repository/managers.py +++ b/repository/managers.py @@ -39,6 +39,10 @@ def _make_request(self, endpoint, params=None): def get_groups(self, params=None): """Fetch all groups with optional parameters.""" return self._make_request("/groups/", params=params) + + def get_group_items(self, group_id, params=None): + """Make a request to fetch group items.""" + return self._make_request(f"/groups/{group_id}/items/", params=params) def get_group_collections(self, group_id, params=None): """Fetch all collections within a group with optional parameters.""" @@ -93,7 +97,9 @@ def group_items(self, group_id, page=1): if not isinstance(page, int) or page < 1: raise CitesphereAPIError(message="Invalid page number", error_code="INVALID_PAGE", details="Page must be a positive integer") - response_data = self.api._make_request(f"/groups/{group_id}/items/", params={'page': page}) + # Make the API call using CitesphereAPIv1 + response_data = self.api.get_group_items(group_id, params={'page': page}) + group_data = response_data.get('group', {}) items = response_data.get('items', []) total_items = group_data.get('numItems', 0) From 2ce4c811c0cccabba5940a3ab6cfd94ce5114dac Mon Sep 17 00:00:00 2001 From: Julian Ophals <113470908+jophals@users.noreply.github.com> Date: Thu, 19 Dec 2024 11:46:32 -0700 Subject: [PATCH 10/11] story/VOGRE-44 (#34) * [story/VOGRE-44] added branching point when extracting Giles uploads to upload plain text when extracted text is empty * [story/VOGRE-44] fixed string interpolated error message * [story/VOGRE-44] addressed PR comments --- repository/managers.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/repository/managers.py b/repository/managers.py index 870bc0a8..9cf5933f 100644 --- a/repository/managers.py +++ b/repository/managers.py @@ -210,12 +210,18 @@ def _fetch_giles_text(self, giles_uploads): upload = giles_uploads[0] text_content = "" - # Extract plain text if available + # Extract plain text from Giles extracted text if available extracted_text = upload.get('extractedText', {}) if extracted_text and extracted_text.get('content-type') == 'text/plain': text_content = get_giles_document_details(self.user, extracted_text['id']) if text_content is None: raise CitesphereAPIError(message="Failed to fetch document text from Giles, please try again later.", error_code="GILES_TEXT_ERROR", details="Failed to fetch document text from Giles") + + # Extract plain text from upload file if available + elif upload.get('uploadedFile').get('content-type') == 'text/plain' and upload.get('uploadedFile').get('id'): + text_content = get_giles_document_details(self.user, upload.get('uploadedFile')['id']) + if text_content is None: + raise CitesphereAPIError(message="Failed to fetch document text from Giles, please try again later.", error_code="GILES_UPLOAD_PLAIN_TEXT_ERROR", details=f"Failed to fetch text from plain text file {upload.get('uploadedFile')['id']}") # Fallback to extracting text from pages elif 'pages' in upload: From 8ef519e381e4d0ecbc1f7fd25bf1767869942114 Mon Sep 17 00:00:00 2001 From: Girik Manchanda <74312030+Girik1105@users.noreply.github.com> Date: Thu, 19 Dec 2024 12:50:56 -0700 Subject: [PATCH 11/11] task/VOGRE-30 (#29) * [VOGRE-30] Removed empty imports * [VOGRE-3] Removed extra database comments * [VOGRE-30] Removed JARS secret (depreciated), removed aws credentials * [VOGRE-30] Removed Conceptpower user id and password as now vogon uses its json public access api * [VOGRE-30] Removed celery settings (depreciated) * [VOGRE-30] Removed goat and unwanted goat code, removed submit wait time celery variable * [VOGRE-3] Removed extra import, removed giles extra settings and obselete code * [VOGRE-30] Removed obselete variables * [VOGRE-30] Re added important conceptpower variables * [VOGRE-30] Removed Giles app since giles uses citesphere in external_accounts uti;s * [VOGRE-30] Removed duplicate allowed headers variable --- annotations/forms.py | 28 ------- giles/__init__.py | 0 giles/functions.py | 122 ------------------------------- giles/migrations/0001_initial.py | 24 ------ giles/migrations/__init__.py | 0 giles/models.py | 14 ---- vogon/settings.py | 54 ++------------ 7 files changed, 6 insertions(+), 236 deletions(-) delete mode 100644 giles/__init__.py delete mode 100644 giles/functions.py delete mode 100644 giles/migrations/0001_initial.py delete mode 100644 giles/migrations/__init__.py delete mode 100644 giles/models.py diff --git a/annotations/forms.py b/annotations/forms.py index fc0a1b9d..cce6ee1b 100644 --- a/annotations/forms.py +++ b/annotations/forms.py @@ -169,34 +169,6 @@ def label_from_instance(self, obj): """ return obj.uri - def to_python(self, value): - if value in self.empty_values: - return None - try: - key = 'uri' - py_value = self.queryset.get(**{key: value}) - except self.queryset.model.DoesNotExist: - import goat - goat.GOAT = settings.GOAT - goat.GOAT_APP_TOKEN = settings.GOAT_APP_TOKEN - concept = goat.Concept.retrieve(identifier=value) - - data = dict( - uri=value, - label=concept.data['name'], - description=concept.data['description'], - ) - ctype_data = concept.data['concept_type']# - if ctype_data: - data.update({'typed': Type.objects.get_or_create(uri=ctype_data['identifier'])[0]}) - - py_value = Concept.objects.create(**data) - - return py_value - except (ValueError, TypeError): - raise ValidationError(self.error_messages['invalid_choice'], code='invalid_choice') - return py_value - class TemplateChoiceField(forms.ChoiceField): def label_from_instance(self, obj): diff --git a/giles/__init__.py b/giles/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/giles/functions.py b/giles/functions.py deleted file mode 100644 index 3ce8cb0f..00000000 --- a/giles/functions.py +++ /dev/null @@ -1,122 +0,0 @@ -from django.conf import settings -from django.core.files import File - -from annotations.models import * -from annotations.exceptions import * -from giles.models import * - -import requests, os -from collections import defaultdict - -_fix_url = lambda url: url.replace('http://', 'https://') if url is not None else None - - - -def handle_status_exception(func): - def wrapper(user, *args, **kwargs): - response = func(user, *args, **kwargs) - if response.status_code == 401: # Auth token expired. - try: - user.giles_token.delete() - except AssertionError: - pass - - get_user_auth_token(user, **kwargs) - user.refresh_from_db() - # TODO: we could put some Exception handling here. - return func(user, *args, **kwargs) - elif response.status_code != requests.codes.ok and response.status_code != 202: - message = 'Status %i, content: %s' % (response.status_code, response.content) - logger.error(message) - raise StatusException(response) - return response - return wrapper - - -def api_request(func): - def wrapper(user, *args, **kwargs): - response = func(user, *args, **kwargs) - return response.status_code, response.json() - return wrapper - - -def _create_auth_header(user, **kwargs): - provider = kwargs.get('provider', settings.GILES_DEFAULT_PROVIDER) - # token = user.social_auth.get(provider=provider).extra_data['access_token'] - token = get_user_auth_token(user) - return {'Authorization': 'token %s' % token} - - -def get_user_auth_token(user, **kwargs): - """ - Get the current auth token for a :class:`.User`\. - - If the user has no auth token, retrieve one and store it. - - Supports dependency injection. - - Parameters - ---------- - user : :class:`django.contrib.auth.User` - kwargs : kwargs - - Returns - ------- - str - Giles authorization token for ``user``. - """ - fresh = kwargs.get('fresh', False) - try: - if user.giles_token and not fresh: - return user.giles_token.token - except AttributeError: # RelatedObjectDoesNotExist. - pass # Will proceed to retrieve token. - - try: - status_code, data = get_auth_token(user, **kwargs) - try: - user.giles_token.delete() - except: - pass - - user.giles_token = GilesToken.objects.create(for_user=user, token=data["token"]) - user.save() - return user.giles_token.token - except Exception as E: - print((str(E))) - print((status_code, data)) - template = "Failed to retrieve access token for user {u}" - msg = template.format(u=user.username) - if kwargs.get('raise_exception', False): - raise E - logger.error(msg) - logger.error(str(E)) - - -# @handle_status_exception -@api_request -def get_auth_token(user, **kwargs): - """ - Obtain and store a short-lived authorization token from Giles. - - See https://diging.atlassian.net/wiki/display/GIL/REST+Authentication. - """ - giles = kwargs.get('giles', settings.GILES) - post = kwargs.get('post', settings.POST) - provider = kwargs.get('provider', settings.GILES_DEFAULT_PROVIDER) - app_token = kwargs.get('app_token', settings.GILES_APP_TOKEN) - - path = '/'.join([giles, 'rest', 'token']) - provider_token = user.social_auth.get(provider=provider)\ - .extra_data.get('access_token') - - return post(path, data={'providerToken': provider_token}, - headers={'Authorization': 'token %s' % app_token}) - - - - -def format_giles_url(url, user, dw=300): - """ - """ - return url + '&accessToken=' + get_user_auth_token(user) + '&dw=%i' % 300 diff --git a/giles/migrations/0001_initial.py b/giles/migrations/0001_initial.py deleted file mode 100644 index 6f2b7179..00000000 --- a/giles/migrations/0001_initial.py +++ /dev/null @@ -1,24 +0,0 @@ -# -*- coding: utf-8 -*- - - -from django.db import models, migrations -from django.conf import settings - - -class Migration(migrations.Migration): - - dependencies = [ - migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ] - - operations = [ - migrations.CreateModel( - name='GilesToken', - fields=[ - ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), - ('created', models.DateTimeField(auto_now_add=True)), - ('token', models.CharField(max_length=255)), - ('for_user', models.OneToOneField(related_name='giles_token', to=settings.AUTH_USER_MODEL, on_delete=models.CASCADE)), - ], - ), - ] diff --git a/giles/migrations/__init__.py b/giles/migrations/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/giles/models.py b/giles/models.py deleted file mode 100644 index d8e871f0..00000000 --- a/giles/models.py +++ /dev/null @@ -1,14 +0,0 @@ -from django.db import models, IntegrityError -from annotations.models import VogonUser as User - - -class GilesToken(models.Model): - """ - A short-lived auth token for sending content to Giles on behalf of a user. - - See https://diging.atlassian.net/wiki/display/GIL/REST+Authentication. - """ - - for_user = models.OneToOneField(User, related_name='giles_token', on_delete=models.CASCADE) - created = models.DateTimeField(auto_now_add=True) - token = models.CharField(max_length=255) diff --git a/vogon/settings.py b/vogon/settings.py index a1fdc7a6..5f06a12c 100644 --- a/vogon/settings.py +++ b/vogon/settings.py @@ -11,12 +11,8 @@ """ # Build paths inside the project like this: os.path.join(BASE_DIR, ...) -import os, sys, requests +import os from urllib.parse import urlparse -import socket -import dj_database_url -# import djcelery -from datetime import timedelta from dotenv import load_dotenv load_dotenv() @@ -50,12 +46,10 @@ 'allauth.socialaccount', 'django_inlinecss', 'concepts', - 'giles', 'annotations', 'external_accounts', 'rest_framework', 'corsheaders', - 'djcelery', 'repository', 'oauth2_provider', ) @@ -105,11 +99,6 @@ WSGI_APPLICATION = 'vogon.wsgi.application' -# Database -# https://docs.djangoproject.com/en/1.8/ref/settings/#databases - -# DATABASES = {'default': dj_database_url.config()} -# DATABASES['default']['ENGINE'] = 'django.db.backends.postgresql_psycopg2' DATABASES = { 'default': { @@ -123,14 +112,12 @@ } -# print DATABASES - AUTHENTICATION_BACKENDS = ( 'django.contrib.auth.backends.ModelBackend', # default 'allauth.account.auth_backends.AuthenticationBackend', #Allauth ) -ACCOUNT_AUTHENTICATED_LOGIN_REDIRECTS =True +ACCOUNT_AUTHENTICATED_LOGIN_REDIRECTS = True ANONYMOUS_USER_ID = -1 # Allauth Email Settings @@ -168,9 +155,6 @@ USE_X_FORWARDED_HOST = True SECURE_PROXY_SSL_HEADER = ('HTTP_X_FORWARDED_PROTO', 'https') -# Allow all host headers -ALLOWED_HOSTS = ['*'] - # Static asset configuration BASE_PATH = os.environ.get('BASE_PATH', '/') BASE_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -180,16 +164,12 @@ STATICFILES_DIRS = (os.path.join(BASE_DIR, 'static'), ) -JARS_KEY = '050814a54ac5c81b990140c3c43278031d391676' AUTH_USER_MODEL = 'annotations.VogonUser' es = urlparse(os.environ.get('SEARCHBOX_URL') or 'http://127.0.0.1:9200/') port = es.port or 80 -# AWS Access Key and Secret Key credentials -AWS_ACCESS_KEY = os.environ.get('AWS_ACCESS_KEY', None) -AWS_SECRET_KEY = os.environ.get('AWS_SECRET_KEY', None) -S3_BUCKET = 'vogonweb-test' +# User profile default image DEFAULT_USER_IMAGE = 'https://s3-us-west-2.amazonaws.com/vogonweb-test/defaultprofile.png' TEMPORAL_PREDICATES = { @@ -216,8 +196,8 @@ } } -CONCEPTPOWER_USERID = os.environ.get('CONCEPTPOWER_USERID', None) -CONCEPTPOWER_PASSWORD = os.environ.get('CONCEPTPOWER_PASSWORD', None) +CONCEPTPOWER_USERID = os.environ.get('CONCEPTPOWER_USERID') +CONCEPTPOWER_PASSWORD = os.environ.get('CONCEPTPOWER_PASSWORD') CONCEPTPOWER_ENDPOINT = os.environ.get('CONCEPTPOWER_ENDPOINT') CONCEPTPOWER_NAMESPACE = os.environ.get('CONCEPTPOWER_NAMESPACE') @@ -229,25 +209,10 @@ BASE_URI_NAMESPACE = u'http://www.vogonweb.net' -# Celery config. - -# djcelery.setup_loader() -# CELERYBEAT_SCHEDULE = { -# 'accession_ready_relationsets': { -# 'task': 'annotations.tasks.accession_ready_relationsets', -# 'schedule': timedelta(minutes=10, seconds=0), -# }, -# } - -CELERY_TIMEZONE = 'UTC' - GOOGLE_ANALYTICS_ID = os.environ.get('GOOGLE_ANALYTICS_ID', None) VERSION = '0.4' -GOAT = os.environ.get('GOAT', 'http://127.0.0.1:8000') -GOAT_APP_TOKEN = os.environ.get('GOAT_APP_TOKEN') - LOGLEVEL = os.environ.get('LOGLEVEL', 'DEBUG') @@ -270,16 +235,9 @@ 'viaf:geographic': GEOGRAPHIC_CONCEPT_TYPE, # E53 Place } -SUBMIT_WAIT_TIME = {'days': 3, 'hours': 0, 'minutes': 0} - # Giles Credentials GILES_ENDPOINT = os.environ.get('GILES_ENDPOINT') -IMAGE_AFFIXES = ['png', 'jpg', 'jpeg', 'tiff', 'tif'] -GET = requests.get -POST = requests.post -GILES_APP_TOKEN = os.environ.get('GILES_APP_TOKEN', 'nope') -GILES_DEFAULT_PROVIDER = os.environ.get('GILES_DEFAULT_PROVIDER', 'github') -MAX_GILES_UPLOADS = 20 + CONCEPT_URI_PREFIXES = [ 'http://www.digitalhps.org/',