Skip to content

Commit

Permalink
feat: allow multiple filenames as arguments
Browse files Browse the repository at this point in the history
  • Loading branch information
sourcefilter committed Nov 19, 2024
1 parent 3ec6ed4 commit 43ecb72
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 9 deletions.
18 changes: 13 additions & 5 deletions feed_ursus/feed_ursus.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@


@click.command()
@click.argument("filename")
@click.argument("filenames", nargs=-1, type=click.Path(exists=True, dir_okay=False))
@click.option(
"--solr_url",
default=None,
Expand All @@ -39,19 +39,27 @@
default="dlp",
help="'sinai' or 'dlp'. Deterines the metadata field mapping",
)
def load_csv(filename: str, solr_url: typing.Optional[str], mapping: str):
def load_csv(
filenames: typing.List[click.Path], solr_url: typing.Optional[str], mapping: str
):
"""Load data from a csv.
Args:
filename: A CSV file.
filenames: A CSV file.
solr_url: API endpoint for a solr instance.
"""

global mapper
mapper = import_module(f"feed_ursus.mapper.{mapping}")
solr_client = Solr(solr_url, always_commit=True) if solr_url else Solr("")

csv_data = {row["Item ARK"]: row for row in csv.DictReader(open(filename))}
csv_data = {
row["Item ARK"]: row
for filename in rich.progress.track(
filenames, description=f"loading {len(filenames)} files..."
)
for row in csv.DictReader(open(filename, encoding="utf-8"))
}

config = {
"collection_names": {
Expand All @@ -65,7 +73,7 @@ def load_csv(filename: str, solr_url: typing.Optional[str], mapping: str):

mapped_records = []
for row in rich.progress.track(
csv_data.values(), description=f"Importing {filename}..."
csv_data.values(), description=f"Importing {len(csv_data)} records..."
):
if row.get("Object Type") not in ("ChildWork", "Page"):
mapped_records.append(map_record(row, solr_client, config=config))
Expand Down
2 changes: 2 additions & 0 deletions tests/csv/anais_work_simple.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Project Name,Item ARK,Parent ARK,Object Type,File Name,Item Sequence,puplicate,Delete in Title,Type.genre,Type.typeOfResource,viewingHint,Text direction,Visibility,IIIF Range,Language,Name.subject,Name.creator,Title,Format.extent,Alt ID.local,Date.creation,Description.note,Format.dimensions,Opac url,Finding Aid URL,Bucketeer State,IIIF Access URL,IIIF Manifest URL
Nin (Anais) Papers. Collection 2066,ark:/21198/zz00256728,ark:/21198/zz001nx6px,Work,ninan/image/21198-zz00256728_1659676_master.tif,,,,,still image,,,ucla,,,"Nin, Joaquín, 1879-1949",,"Nin, Joaquin. 1914 [photograph]",1 b & w photograph,,,,,,,succeeded,https://iiif.library.ucla.edu/iiif/2/ark%3A%2F21198%2Fzz00256728,https://iiif.library.ucla.edu/ark%3A%2F21198%2Fzz00256728/manifest
21 changes: 20 additions & 1 deletion tests/test_feed_ursus.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,32 @@

import importlib

import click.testing
import pytest # type: ignore
from pysolr import Solr # type: ignore

from feed_ursus import feed_ursus
from . import fixtures # pylint: disable=wrong-import-order

feed_ursus.mapper = importlib.import_module("feed_ursus.mapper.sinai")
feed_ursus.mapper = importlib.import_module("feed_ursus.mapper.dlp")


class TestLoadCsv:
"""Tests for function load_csv"""

def test_file_exists(self):
"""gets the contents of a CSV file"""
runner = click.testing.CliRunner()
result = runner.invoke(feed_ursus.load_csv, ["tests/csv/anais_collection.csv"])
assert result.exit_code == 0

def test_file_does_not_exist(self):
"""raises an error if file does not exist"""

runner = click.testing.CliRunner()
result = runner.invoke(feed_ursus.load_csv, ["tests/fixtures/nonexistent.csv"])

assert result.exit_code == 2


class TestMapFieldValue:
Expand Down
18 changes: 15 additions & 3 deletions tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,26 @@ def test_feed_ursus():
"""Integration test for feed_ursus."""
solr = Solr(SOLR_URL)
solr.delete(id="xp6xn100zz-89112", commit=True)
solr.delete(id="82765200zz-89112", commit=True)

runner = CliRunner()
result = runner.invoke(
feed_ursus.load_csv, ["--solr_url", SOLR_URL, "tests/csv/anais_collection.csv"]
feed_ursus.load_csv,
[
"--solr_url",
SOLR_URL,
"tests/csv/anais_collection.csv",
"tests/csv/anais_work_simple.csv",
],
)
assert result.exit_code == 0

doc_in_solr = solr.search("id:xp6xn100zz-89112", defType="lucene")
collection_record = solr.search("id:xp6xn100zz-89112", defType="lucene")
# Doesn't run against a fresh solr index, so there's no guarantee the result comes from this run of the feed_ursus command.
# But at least we can see that pysolr works and talks to solr in this environment.
assert doc_in_solr.docs[0]["title_tesim"] == ["Nin (Anais) Papers, circa 1910-1977"]
assert collection_record.docs[0]["title_tesim"] == [
"Nin (Anais) Papers, circa 1910-1977"
]

work_record = solr.search("id:82765200zz-89112", defType="lucene").docs[0]
assert work_record["title_tesim"] == ["Nin, Joaquin. 1914 [photograph]"]

0 comments on commit 43ecb72

Please sign in to comment.