From 49c1e380344201d208955dfa592eb3f8b56e0f10 Mon Sep 17 00:00:00 2001 From: Andy Wallace Date: Tue, 19 Nov 2024 15:47:19 -0800 Subject: [PATCH] fix: Use "Parent ARK" to get parent collection name(s) --- feed_ursus/feed_ursus.py | 12 ++++++------ feed_ursus/mapper/dlp.py | 8 ++++++-- tests/test_feed_ursus.py | 9 ++++----- tests/test_integration.py | 5 ++++- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/feed_ursus/feed_ursus.py b/feed_ursus/feed_ursus.py index 2244c61..ace2478 100755 --- a/feed_ursus/feed_ursus.py +++ b/feed_ursus/feed_ursus.py @@ -63,7 +63,7 @@ def load_csv( config = { "collection_names": { - row["Item ARK"]: row["Title"] + row["Item ARK"].replace("ark:/", "").replace("/", "-")[::-1]: row["Title"] for row in csv_data.values() if row.get("Object Type") == "Collection" }, @@ -226,10 +226,11 @@ def map_record( or thumbnail_from_manifest(record) ) - # COLLECTION NAME - if "Parent ARK" in row and row["Parent ARK"] in config["collection_names"]: - dlcs_collection_name = config["collection_names"][row["Parent ARK"]] - record["dlcs_collection_name_tesim"] = [dlcs_collection_name] + # COLLECTIONS + record["member_of_collections_ssim"] = [ + config["collection_names"][id] + for id in record.get("member_of_collection_ids_ssim", []) + ] # FIELDS record["uniform_title_sim"] = record.get("uniform_title_tesim") @@ -270,7 +271,6 @@ def map_record( record["location_sim"] = record.get("location_tesim") record["named_subject_sim"] = record.get("named_subject_tesim") record["human_readable_resource_type_sim"] = record.get("resource_type_tesim") - record["member_of_collections_ssim"] = record.get("dlcs_collection_name_tesim") record["combined_subject_ssim"] = [ *record.get("named_subject_tesim", []), diff --git a/feed_ursus/mapper/dlp.py b/feed_ursus/mapper/dlp.py index 05237fa..d820b99 100644 --- a/feed_ursus/mapper/dlp.py +++ b/feed_ursus/mapper/dlp.py @@ -236,7 +236,7 @@ def visibility(row: typing.Mapping[str, str]) -> typing.Optional[str]: "dimensions_tesim": "Format.dimensions", "director_sim": ["Director", "Name.director"], "director_tesim": ["Director", "Name.director"], - "dlcs_collection_name_tesim": "Relation.isPartOf", + # "dlcs_collection_name_tesim": "", # feed_ursus.py gets from "Parent ARK" "edition_ssm": "Edition", "editor_tesim": ["Editor", "Name.editor"], "electronic_locator_ss": ["External item record", "View Record"], @@ -314,7 +314,11 @@ def visibility(row: typing.Mapping[str, str]) -> typing.Optional[str]: "masthead_parameters_ssi": "Masthead", "medium_tesim": "Format.medium", "medium_sim": "Format.medium", - "member_of_collection_ids_ssim": "Parent ARK", + "member_of_collection_ids_ssim": lambda x: [ + ark.replace("ark:/", "").replace("/", "-")[::-1] + for ark in x.get("Parent ARK", "").split("|~|") + if ark # Skip empty values like '' + ], "musician_sim": ["Musician", "Name.musician"], "musician_tesim": ["Musician", "Name.musician"], "named_subject_tesim": [ diff --git a/tests/test_feed_ursus.py b/tests/test_feed_ursus.py index d8e2c51..5fe5c87 100644 --- a/tests/test_feed_ursus.py +++ b/tests/test_feed_ursus.py @@ -75,7 +75,7 @@ def test_get_bare_field_name(): class TestMapRecord: """function map_record""" - CONFIG = {"collection_names": {"ark:/123/collection": "Test Collection KGSL"}} + CONFIG = {"collection_names": {"noitcelloc-321": "Test Collection KGSL"}} solr_client = Solr("http://localhost:8983/solr/californica", always_commit=True) def test_maps_record(self, monkeypatch): @@ -101,7 +101,7 @@ def test_maps_record(self, monkeypatch): "human_readable_resource_type_sim": None, "id": "ark:/123/abc", "location_sim": None, - "member_of_collections_ssim": None, + "member_of_collections_ssim": [], "named_subject_sim": None, "place_of_origin_sim": None, "script_sim": None, @@ -139,10 +139,10 @@ def test_maps_record(self, monkeypatch): } def test_sets_id(self): - """sets 'id' equal to 'Item ARK'/'ark_ssi'""" + """sets 'id' to reversed ark""" result = feed_ursus.map_record( { - "Item ARK": "ark:/123/abc", + "Item ARK": "cba-321", "IIIF Manifest URL": "https://iiif.library.ucla.edu/ark%3A%2F123%2Fabc/manifest", }, self.solr_client, @@ -207,7 +207,6 @@ def test_sets_collection(self): self.solr_client, config=self.CONFIG, ) - assert result["dlcs_collection_name_tesim"] == ["Test Collection KGSL"] assert result["member_of_collections_ssim"] == ["Test Collection KGSL"] @pytest.mark.parametrize( diff --git a/tests/test_integration.py b/tests/test_integration.py index 4fe1d9c..fc44c8b 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -4,7 +4,6 @@ import importlib import os -import time from click.testing import CliRunner from pysolr import Solr # type: ignore @@ -44,3 +43,7 @@ def test_feed_ursus(): work_record = solr.search("id:82765200zz-89112", defType="lucene").docs[0] assert work_record["title_tesim"] == ["Nin, Joaquin. 1914 [photograph]"] + assert work_record["member_of_collections_ssim"] == [ + "Nin (Anais) Papers, circa 1910-1977" + ] + assert work_record["member_of_collection_ids_ssim"] == ["xp6xn100zz-89112"]