script to create new items from CSV, ref #34

cca · Sep 11, 2023 · b99084e · b99084e
1 parent 4795053
commit b99084e
Show file tree

Hide file tree

Showing 2 changed files with 140 additions and 8 deletions.
diff --git a/utilities/metadata-csv/create.js b/utilities/metadata-csv/create.js
@@ -0,0 +1,107 @@
+// create metadata-only records from a CSV
+// usage:
+//      node create --csv input.csv --collection $UUID [--draft] [--dryrun]
+import fs from 'node:fs'
+import https from 'node:https'
+import { pathToFileURL } from 'node:url'
+
+import { default as fetch } from 'node-fetch'
+import rc from 'rc'
+import { DOMParser, XMLSerializer } from '@xmldom/xmldom'
+import { default as CSVReader } from 'csv-reader'
+
+import { insertNewElement } from './modify.js'
+
+let options = rc('metadata-csv', { 'root': 'https://vault.cca.edu/api' })
+
+const headers = {
+    'X-Authorization': 'access_token=' + options.token,
+    'Accept': 'application/json',
+    'Content-Type': 'application/json',
+}
+const httpsAgent = new https.Agent({
+    maxSockets: 5
+})
+
+function makeXMLDoc(values) {
+    let doc = new DOMParser().parseFromString('<xml/>')
+    for (let i = 0; i < values.length; i++) {
+        let value = values[i].trim()
+        if (value !== "") insertNewElement(doc, columns[i], value)
+    }
+    return doc
+}
+
+function createItem(values) {
+    // https://vault.cca.edu/apidocs.do#operations-Items-newItem
+    const doc = makeXMLDoc(values)
+    const item = {
+        metadata: new XMLSerializer().serializeToString(doc),
+        collection: {
+            uuid: options.collection
+        }
+    }
+
+    if (options.dryrun) {
+        return console.log(`Item that would've been created:\n`, item)
+    }
+
+    fetch(`${options.root}/item/?draft=${!!options.draft}`, {
+        agent: httpsAgent,
+        method: 'POST',
+        headers: headers,
+        body: JSON.stringify(item),
+    }).then(r => {
+        // EQUELLA responds with an empty body on success, new item URL is in location header
+        if (r.ok) return console.log(`Successfully created item: ${r.headers.get('location')}`)
+        // if we're unsuccessful we might have error JSON
+        return r.json()
+    }).then(data => {
+        if (data && data.error) throw (data)
+    }).catch(e => {
+        console.error('Error editing item', item)
+        console.error('EQUELLA API Response:')
+        console.error(e)
+    })
+}
+
+let columns = null
+const main = () => {
+    fs.createReadStream(options.csv, 'utf8').pipe(new CSVReader({ trim: true }))
+        .on('data', (row) => {
+            if (!columns) {
+                columns = row
+                columns.every(c => {
+                    if (c.indexOf('/xml/') !== 0) {
+                        console.error(`Error: column "${c}" does not begin with "/xml/". All column headers in the CSV must be fully-specified XPath expressions.`)
+                        process.exit(1)
+                    }
+                })
+            } else {
+                createItem(row)
+            }
+        })
+}
+
+if (import.meta.url.replace(/\.js$/, '') === pathToFileURL(process.argv[1]).href.replace(/\.js$/, '')) {
+    // usage info
+    if (options.h || options.help) {
+        console.log('Usage:\n\tnode create --csv input.csv [--draft] [--dryrun]\n')
+        console.log('Create metadata-only records based on a CSV of metadata. The CSV must have a header row of XPaths (e.g. "/xml/mods/abstract"). You must use full paths that start with "/xml/".')
+        console.log('\nOptions:')
+        console.log('\t--csv:'.padEnd(12) + ' spreadsheet of items to create')
+        console.log('\t--collection'.padEnd(12) + ' UUID of the collection for the items')
+        console.log('\t--draft:'.padEnd(12) + ' create items in draft state')
+        console.log('\t--dryrun:'.padEnd(12) + ' do not modify records, but print XML documents')
+        process.exit(0)
+    }
+
+    const uuid_regex = /^[0-9a-fA-F]{8}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{12}$/
+    if (!options.collection || !options.collection.match(uuid_regex)) {
+        console.error('Error: must supply a collection UUID with the --collection flag.')
+        process.exit(1)
+    }
+
+    if (options.dryrun) console.log('Dry run: no records will be modified.')
+    main()
+}
diff --git a/utilities/metadata-csv/readme.md b/utilities/metadata-csv/readme.md
@@ -25,11 +25,8 @@ To edit items, the CSV passed to the modify script can't use the labels. It need
 
 * How to handle multiple metadata nodes? E.g. multiple /mods/name/namePart
   * Currently, we can map columns one-by-one like `"/mods/name[1]/namePart": "Creator 1"`
-* Allow a special `DELETE` value (or similar) which lets us _remove_ fields with the modify script
-  * Right now, we could use empty CSV values to set XML nodes to empty string as a workaround
-* ~~Support adding new values~~
-* Bring the two scripts into alignment
-  * index.js's JSON map applies an `/xml` prefix on its own while the modify CSV expects the user to supply it
+* Bring the scripts into alignment
+  * index.js's JSON map applies an `/xml` prefix on its own while modify & create expects the user to supply it
   * the CSV expects UUID & version columns but the exported metadata uses a single URL column instead
 
 ## Downloading Metadata to a CSV
@@ -40,7 +37,8 @@ Download all items matching a search into a CSV with selected metadata nodes inc
 >  # example of downloading from a specific collection
 > node index --collections=5e6a957b-80d4-4dee-9081-7186586fbbe5 --metadataMap map.json > coll.csv
 > # getting Hamaguchi items from within Libraries collection
-> node index --metadataMap hamaguchi-map.json --where="/xml/mods/relatedItem/title = 'Hamaguchi Study Print Collection'" > hamaguchi.csv
+> node index --metadataMap hamaguchi-map.json \
+> --where="/xml/mods/relatedItem/title = 'Hamaguchi Study Print Collection'" > hamaguchi.csv
 ```
 
 Any of the parameters we can pass to the openEQUELLA Search API route are accepted on the command line: https://vault.cca.edu/apidocs.do#!/search/searchItems_get_0
@@ -51,11 +49,15 @@ Secondly, write a JSON map of XML paths to CSV column headers. Examples for the
 
 Use the CSV of modifications described in the **Setup** section with the `modify` script. The script has usage information `node modify -h`:
 
-```sh
+```
 Usage:
  node modify --csv input.csv [--debug] [--dryrun]
 
-Modify records based on a CSV of metadata. The CSV must have a header row, the first column must be the item UUID, and the second column must be the item version. The rest are treaded as metadata columns where the header is the XPath of the field to be modified (e.g. "/xml/mods/abstract"). It is recommended to use full paths that start with "/xml/mods".
+Modify records based on a CSV of metadata. The CSV must have a header row, the
+first column must be the item UUID, and the second column must be the item
+version. The rest are treaded as metadata columns where the header is the XPath
+of the field to be modified (e.g. "/xml/mods/abstract"). It is recommended to
+use full paths that start with "/xml/mods".
 
 Options:
  --csv:     metadata changes spreadsheet
@@ -64,3 +66,26 @@ Options:
 ```
 
 Only records where a change in one of the metadata fields is detected are modified. Items are modified in-place (as opposed to creating a new version with the changes). The script cannot add or remove attachments, though we can modify metadata fields that reference attachments (e.g. `/xml/mods/part`), but this would rarely make sense to do.
+
+## Creating New Items From a CSV
+
+We can create new items from a CSV like the one described in the **Setup** section, with XPath column headers but no "uuid" nor "version" columns. These are _metadata-only_ items for now. We could develop the ability to add attachments, but it would add substantial complexity.
+
+```
+Usage:
+ node create --csv input.csv [--draft] [--dryrun]
+
+Create metadata-only records based on a CSV of metadata. The CSV must have a
+header row of XPaths (e.g. "/xml/mods/abstract"). You must use full paths that
+start with "/xml/".
+
+Options:
+ --csv:     spreadsheet of items to create
+ --collection UUID of the collection for the items
+ --draft:   create items in draft state
+ --dryrun:  do not modify records, but print XML documents
+```
+
+For testing (creates draft items in a test collection) try:
+
+`node create --csv data/create.csv --collection e1722640-f782-4a53-b20d-cda384f1aa22 --draft`