Skip to content

Commit

Permalink
fix: Use "Parent ARK" to get parent collection name(s)
Browse files Browse the repository at this point in the history
  • Loading branch information
sourcefilter committed Nov 20, 2024
1 parent 43ecb72 commit 49c1e38
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 14 deletions.
12 changes: 6 additions & 6 deletions feed_ursus/feed_ursus.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def load_csv(

config = {
"collection_names": {
row["Item ARK"]: row["Title"]
row["Item ARK"].replace("ark:/", "").replace("/", "-")[::-1]: row["Title"]
for row in csv_data.values()
if row.get("Object Type") == "Collection"
},
Expand Down Expand Up @@ -226,10 +226,11 @@ def map_record(
or thumbnail_from_manifest(record)
)

# COLLECTION NAME
if "Parent ARK" in row and row["Parent ARK"] in config["collection_names"]:
dlcs_collection_name = config["collection_names"][row["Parent ARK"]]
record["dlcs_collection_name_tesim"] = [dlcs_collection_name]
# COLLECTIONS
record["member_of_collections_ssim"] = [
config["collection_names"][id]
for id in record.get("member_of_collection_ids_ssim", [])
]

# FIELDS
record["uniform_title_sim"] = record.get("uniform_title_tesim")
Expand Down Expand Up @@ -270,7 +271,6 @@ def map_record(
record["location_sim"] = record.get("location_tesim")
record["named_subject_sim"] = record.get("named_subject_tesim")
record["human_readable_resource_type_sim"] = record.get("resource_type_tesim")
record["member_of_collections_ssim"] = record.get("dlcs_collection_name_tesim")

record["combined_subject_ssim"] = [
*record.get("named_subject_tesim", []),
Expand Down
8 changes: 6 additions & 2 deletions feed_ursus/mapper/dlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def visibility(row: typing.Mapping[str, str]) -> typing.Optional[str]:
"dimensions_tesim": "Format.dimensions",
"director_sim": ["Director", "Name.director"],
"director_tesim": ["Director", "Name.director"],
"dlcs_collection_name_tesim": "Relation.isPartOf",
# "dlcs_collection_name_tesim": "", # feed_ursus.py gets from "Parent ARK"
"edition_ssm": "Edition",
"editor_tesim": ["Editor", "Name.editor"],
"electronic_locator_ss": ["External item record", "View Record"],
Expand Down Expand Up @@ -314,7 +314,11 @@ def visibility(row: typing.Mapping[str, str]) -> typing.Optional[str]:
"masthead_parameters_ssi": "Masthead",
"medium_tesim": "Format.medium",
"medium_sim": "Format.medium",
"member_of_collection_ids_ssim": "Parent ARK",
"member_of_collection_ids_ssim": lambda x: [
ark.replace("ark:/", "").replace("/", "-")[::-1]
for ark in x.get("Parent ARK", "").split("|~|")
if ark # Skip empty values like ''
],
"musician_sim": ["Musician", "Name.musician"],
"musician_tesim": ["Musician", "Name.musician"],
"named_subject_tesim": [
Expand Down
9 changes: 4 additions & 5 deletions tests/test_feed_ursus.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def test_get_bare_field_name():
class TestMapRecord:
"""function map_record"""

CONFIG = {"collection_names": {"ark:/123/collection": "Test Collection KGSL"}}
CONFIG = {"collection_names": {"noitcelloc-321": "Test Collection KGSL"}}
solr_client = Solr("http://localhost:8983/solr/californica", always_commit=True)

def test_maps_record(self, monkeypatch):
Expand All @@ -101,7 +101,7 @@ def test_maps_record(self, monkeypatch):
"human_readable_resource_type_sim": None,
"id": "ark:/123/abc",
"location_sim": None,
"member_of_collections_ssim": None,
"member_of_collections_ssim": [],
"named_subject_sim": None,
"place_of_origin_sim": None,
"script_sim": None,
Expand Down Expand Up @@ -139,10 +139,10 @@ def test_maps_record(self, monkeypatch):
}

def test_sets_id(self):
"""sets 'id' equal to 'Item ARK'/'ark_ssi'"""
"""sets 'id' to reversed ark"""
result = feed_ursus.map_record(
{
"Item ARK": "ark:/123/abc",
"Item ARK": "cba-321",
"IIIF Manifest URL": "https://iiif.library.ucla.edu/ark%3A%2F123%2Fabc/manifest",
},
self.solr_client,
Expand Down Expand Up @@ -207,7 +207,6 @@ def test_sets_collection(self):
self.solr_client,
config=self.CONFIG,
)
assert result["dlcs_collection_name_tesim"] == ["Test Collection KGSL"]
assert result["member_of_collections_ssim"] == ["Test Collection KGSL"]

@pytest.mark.parametrize(
Expand Down
5 changes: 4 additions & 1 deletion tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import importlib
import os
import time

from click.testing import CliRunner
from pysolr import Solr # type: ignore
Expand Down Expand Up @@ -44,3 +43,7 @@ def test_feed_ursus():

work_record = solr.search("id:82765200zz-89112", defType="lucene").docs[0]
assert work_record["title_tesim"] == ["Nin, Joaquin. 1914 [photograph]"]
assert work_record["member_of_collections_ssim"] == [
"Nin (Anais) Papers, circa 1910-1977"
]
assert work_record["member_of_collection_ids_ssim"] == ["xp6xn100zz-89112"]

0 comments on commit 49c1e38

Please sign in to comment.