Skip to content

Commit

Permalink
New: Indexing work nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
ahankinson committed Nov 27, 2024
1 parent 3480411 commit 747c196
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 0 deletions.
1 change: 1 addition & 0 deletions indexer/index_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def _get_sources(cfg: dict) -> Generator[dict, None, None]:
(SELECT GROUP_CONCAT(DISTINCT CONCAT_WS('|:|', stos.relator_code, sours.marc_source) SEPARATOR '|~|') FROM {dbname}.sources_to_sources AS stos LEFT JOIN {dbname}.sources AS sours ON stos.source_b_id = sours.id WHERE marc_tag = '787' AND source_a_id = child.id) AS related_sources,
(SELECT GROUP_CONCAT(DISTINCT do.digital_object_id SEPARATOR ',') FROM {dbname}.digital_object_links AS do WHERE do.object_link_type = 'Source' AND do.object_link_id = child.id) AS digital_objects,
(SELECT GROUP_CONCAT(DISTINCT sw.work_id SEPARATOR '\n') FROM {dbname}.sources_to_works AS sw WHERE sw.source_id = child.id) AS work_ids,
(SELECT GROUP_CONCAT(DISTINCT CONCAT_WS('|:|', wn.id, wn.marc_source) SEPARATOR '|~|') FROM {dbname}.sources_to_work_nodes AS swn LEFT JOIN {dbname}.work_nodes AS wn ON swn.work_node_id = wn.id WHERE swn.source_id = child.id) AS work_nodes,
GROUP_CONCAT(DISTINCT h.marc_source SEPARATOR '\n') AS holdings_marc,
GROUP_CONCAT(DISTINCT hp.marc_source SEPARATOR '\n') as parent_holdings_marc,
GROUP_CONCAT(DISTINCT h.lib_siglum SEPARATOR '\n') AS holdings_org,
Expand Down
76 changes: 76 additions & 0 deletions indexer/records/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,21 @@ def create_source_index_documents(record: dict, cfg: dict) -> list:

has_digitization: bool = _get_has_digitization(all_marc_records)

all_work_node_things = []
all_work_node_ids = []
if w := record.get("work_nodes"):
work_nodes: list[dict] = _get_work_nodes(w, source_id)
for wn in work_nodes:
all_work_node_ids.append(wn["external_id"])

all_work_node_things.extend(work_nodes)

work_nodes_json = (
orjson.dumps(all_work_node_things).decode("utf-8")
if all_work_node_things
else None
)

# add some core fields to the source. These are fields that may not be easily
# derived directly from the MARC record, or that include data from the database.
source_core: dict = {
Expand Down Expand Up @@ -308,6 +323,8 @@ def create_source_index_documents(record: dict, cfg: dict) -> list:
"related_sources_json": related_sources_json,
"works_catalogue_json": works_catalogue_json,
"related_institution_sigla_sm": related_institution_sigla,
"work_nodes_json": work_nodes_json,
"work_node_ids": all_work_node_ids,
# purposefully left empty so we can fill this up later.
"external_records_jsonm": [],
"created": record["created"].strftime("%Y-%m-%dT%H:%M:%SZ"),
Expand Down Expand Up @@ -633,3 +650,62 @@ def _get_holding_people_ids(records: list[pymarc.Record]) -> set[str]:
ids.update(p_ids)

return ids


def _get_work_nodes(wns: str, source_id: str) -> Optional[list[dict]]:
all_records = wns.split("|~|")
work_nodes: list[(str, dict)] = []

for record in all_records:
wnid, wn_marc_source = record.split("|:|")
wn_marc_record: Optional[pymarc.Record] = (
create_marc(wn_marc_source) if wn_marc_source else None
)
if not wn_marc_record:
log.error("Could not load Work Node MARC record: %s", wnid)
continue

work_node_id: str = f"work_node_{wnid}"
link_field: pymarc.Field = wn_marc_record.get("024")
if not link_field:
log.error("Work Node without an 024. Skipping: %s", work_node_id)
continue

if link_field and "2" in link_field and "a" in link_field:
ident: str = f"{link_field['2'].lower()}:{link_field['a']}"
else:
log.error(
"Work Node with 024 but without $2 or $a. Skipping: %s", work_node_id
)
continue

creator: pymarc.Field = wn_marc_record.get("100")
composer_name: Optional[str] = None
composer_id: Optional[str] = None
work_title: Optional[str] = None
if creator and "a" in creator:
name: str = creator["a"].strip()
dates: str = f" ({d})" if (d := creator.get("d")) else ""

composer_name = f"{name}{dates}"
composer_id = f"person_{creator["0"]}"

work_title_subf: str = creator["t"]
partial_title_subf: str = creator.get("p", "")

work_title = f"{work_title_subf}{partial_title_subf}"

d: dict = {
"id": work_node_id,
"type": "work_node",
"external_id": ident,
"composer_name": composer_name,
"composer_id": composer_id,
"work_title": work_title,
"this_id": source_id,
"this_type": "source",
}

work_nodes.append({k: v for k, v in d.items() if v})

return work_nodes

0 comments on commit 747c196

Please sign in to comment.