Skip to content

Commit

Permalink
import: fix SUDOC authors facette
Browse files Browse the repository at this point in the history
* Adds `_text` for SUDOC contribubtion with $ref.

Co-Authored-by: Peter Weber <peter.weber@rero.ch>
  • Loading branch information
rerowep committed Dec 19, 2023
1 parent 948cff4 commit 7957a25
Show file tree
Hide file tree
Showing 9 changed files with 88 additions and 74 deletions.
15 changes: 9 additions & 6 deletions rero_ils/dojson/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2025,12 +2025,15 @@ def get_gnd_de_101(de_588):
f'&operation=searchRetrieve&query=identifier%3D{de_588}'
'&recordSchema=oai_dc'
)
response = requests_retry_session().get(url)
if response.status_code == requests.codes.ok:
result = xmltodict.parse(response.text)
with contextlib.suppress(Exception):
return result['searchRetrieveResponse']['records']['record'][
'recordData']['dc']['dc:identifier']['#text']
try:
response = requests_retry_session().get(url)
if response.status_code == requests.codes.ok:
result = xmltodict.parse(response.text)
with contextlib.suppress(Exception):
return result['searchRetrieveResponse']['records']['record'][
'recordData']['dc']['dc:identifier']['#text']
except Exception as err:
current_app.logger.warning(f'get_gnd_de_101 de_588: {de_588} | {err}')


def build_identifier(data):
Expand Down
87 changes: 45 additions & 42 deletions rero_ils/modules/documents/dojson/contrib/marc21tojson/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,48 +600,51 @@ def do_contribution(data, marc21, key, value):
return None

agent = {}
if ref := get_mef_link(
bibid=marc21.bib_id,
reroid=marc21.rero_id,
entity_type=EntityType.PERSON,
ids=utils.force_list(value.get('0')),
key=key
):
agent['$ref'] = ref

# we do not have a $ref
if not agent.get('$ref') and value.get('a'):
agent = build_agent(marc21=marc21, key=key, value=value)[0]

if value.get('4'):
roles = set()
for role in utils.force_list(value.get('4')):
role = role.split('/')[-1].lower()
if len(role) != 3:
error_print('WARNING CONTRIBUTION ROLE LENGTH:',
marc21.bib_id, marc21.rero_id, role)
if role == 'sce':
error_print('WARNING CONTRIBUTION ROLE SCE:',
marc21.bib_id, marc21.rero_id,
'sce --> aus')
role = 'aus'
if role not in _CONTRIBUTION_ROLE:
error_print('WARNING CONTRIBUTION ROLE DEFINITION:',
marc21.bib_id, marc21.rero_id, role)
role = 'ctb'
roles.add(role)
elif key[:3] == '100':
roles = ['cre']
elif key[:3] == '711':
roles = ['aut']
else:
roles = ['ctb']
if agent:
return {
'entity': agent,
'role': list(roles)
}
return None
if value.get('a'):
agent_data = build_agent(marc21=marc21, key=key, value=value)[0]

if ref := get_mef_link(
bibid=marc21.bib_id,
reroid=marc21.rero_id,
entity_type=EntityType.PERSON,
ids=utils.force_list(value.get('0')),
key=key
):
agent = {
'$ref': ref,
'_text': agent_data['authorized_access_point']
}
else:
agent = agent_data

if value.get('4'):
roles = set()
for role in utils.force_list(value.get('4')):
role = role.split('/')[-1].lower()
if len(role) != 3:
error_print('WARNING CONTRIBUTION ROLE LENGTH:',
marc21.bib_id, marc21.rero_id, role)
if role == 'sce':
error_print('WARNING CONTRIBUTION ROLE SCE:',
marc21.bib_id, marc21.rero_id,
'sce --> aus')
role = 'aus'
if role not in _CONTRIBUTION_ROLE:
error_print('WARNING CONTRIBUTION ROLE DEFINITION:',
marc21.bib_id, marc21.rero_id, role)
role = 'ctb'
roles.add(role)
elif key[:3] == '100':
roles = ['cre']
elif key[:3] == '711':
roles = ['aut']
else:
roles = ['ctb']
if agent:
return {
'entity': agent,
'role': list(roles)
}


def do_specific_document_relation(data, marc21, key, value):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,10 @@ def unimarc_to_contribution(self, key, value):
key=key
)):
return {
'entity': {'$ref': ref},
'entity': {
'$ref': ref,
'_text': create_authorized_access_point(agent)
},
'role': roles
}
else:
Expand Down
11 changes: 0 additions & 11 deletions rero_ils/modules/documents/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,17 +70,6 @@ def filter_document_type_buckets(buckets):
]


def clean_text(data):
"""Delete all _text from data."""
if isinstance(data, list):
data = [clean_text(val) for val in data]
elif isinstance(data, dict):
if '_text' in data:
del data['_text']
data = {key: clean_text(val) for key, val in data.items()}
return data


def display_alternate_graphic_first(language):
"""Display alternate graphic first.
Expand Down
20 changes: 12 additions & 8 deletions rero_ils/modules/imports/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@
from requests.exceptions import Timeout
from six import BytesIO

from ..documents.dojson.contrib.marc21tojson import marc21_dnb, marc21_kul, \
marc21_loc, marc21_slsp, marc21_ugent
from ..documents.dojson.contrib.unimarctojson import unimarc
from rero_ils.modules.documents.dojson.contrib.marc21tojson import \
marc21_dnb, marc21_kul, marc21_loc, marc21_slsp, marc21_ugent
from rero_ils.modules.documents.dojson.contrib.unimarctojson import unimarc


class Import(object):
Expand Down Expand Up @@ -178,10 +178,14 @@ def calculate_aggregations(self, record, id):
date = provision_activity.get('startDate')
self.calculate_aggregations_add('year', date, id)

contribution = record.get('contribution', [])
for agent in contribution:
name = agent.get('entity', {}).get('authorized_access_point')
self.calculate_aggregations_add('author', name, id)
for agent in record.get('contribution', []):
if authorized_access_point := agent.get(
'entity', {}).get('authorized_access_point'):
name = authorized_access_point
elif text := agent.get('entity', {}).get('_text'):
name = text
if name:
self.calculate_aggregations_add('author', name, id)

languages = record.get('language', [])
for language in languages:
Expand Down Expand Up @@ -469,7 +473,7 @@ def _split_stream(stream):
self.status_code = 200
except Timeout as error:
current_app.logger.warning(f'{self.name}: {error}')
abort(503, description='Timeout')
# abort(503, description='Timeout')
except Exception as error:
current_app.logger.error(
f'{type(error).__name__} {self.name}: {error}')
Expand Down
2 changes: 1 addition & 1 deletion rero_ils/modules/imports/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def __init__(self, **kwargs):
)

def get(self, **kwargs):
"""Implement the GET /test."""
"""Implement the GET."""
no_cache = True if flask_request.args.get('no_cache') else False
query = flask_request.args.get('q')
try:
Expand Down
6 changes: 3 additions & 3 deletions tests/api/documents/test_documents_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@
import mock
from flask import url_for
from invenio_accounts.testutils import login_user_via_session
from utils import VerifyRecordPermissionPatch, flush_index, get_json, \
mock_response, postdata
from utils import VerifyRecordPermissionPatch, clean_text, flush_index, \
get_json, mock_response, postdata

from rero_ils.modules.commons.identifiers import IdentifierType
from rero_ils.modules.documents.api import DocumentsSearch
from rero_ils.modules.documents.utils import clean_text, get_remote_cover
from rero_ils.modules.documents.utils import get_remote_cover
from rero_ils.modules.documents.views import can_request, \
record_library_pickup_locations
from rero_ils.modules.operation_logs.api import OperationLogsSearch
Expand Down
5 changes: 3 additions & 2 deletions tests/api/test_external_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
import mock
from flask import url_for
from invenio_accounts.testutils import login_user_via_session
from utils import VerifyRecordPermissionPatch, get_json, mock_response, \
to_relative_url
from utils import VerifyRecordPermissionPatch, clean_text, get_json, \
mock_response, to_relative_url

from rero_ils.modules.documents.api import Document
from rero_ils.modules.imports.api import LoCImport
Expand Down Expand Up @@ -426,6 +426,7 @@ def test_documents_import_dnb_isbn(mock_get, client, dnb_isbn_123,
data.update({
"$schema": "https://bib.rero.ch/schemas/documents/document-v0.0.1.json"
})
data = clean_text(data)
assert Document.create(data)
marc21_link = res_j.get('hits').get('hits')[0].get('links').get('marc21')

Expand Down
11 changes: 11 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,3 +456,14 @@ def patch_expiration_date(data):
data['patron']['expiration_date'] = \
(datetime.now() + timedelta(days=365)).strftime('%Y-%m-%d')
return data


def clean_text(data):
"""Delete all _text from data."""
if isinstance(data, list):
data = [clean_text(val) for val in data]
elif isinstance(data, dict):
if '_text' in data:
del data['_text']
data = {key: clean_text(val) for key, val in data.items()}
return data

0 comments on commit 7957a25

Please sign in to comment.