From a522c8f1ef459e05fa57f33795ee6cf7b3a9f746 Mon Sep 17 00:00:00 2001 From: phette23 Date: Fri, 28 Jul 2023 17:01:24 -0700 Subject: [PATCH] start on script to make metadata changes from CSV, ref #34 --- package.json | 1 + pnpm-lock.yaml | 8 ++++ utilities/metadata-csv/modify.js | 80 ++++++++++++++++++++++++++++++++ 3 files changed, 89 insertions(+) create mode 100644 utilities/metadata-csv/modify.js diff --git a/package.json b/package.json index f157b50..f77339f 100644 --- a/package.json +++ b/package.json @@ -33,6 +33,7 @@ "dependencies": { "@xmldom/xmldom": "^0.8.8", "async": "^3.2.4", + "csv-reader": "^1.0.12", "csv-stringify": "^6.4.0", "filenamify": "~6.0.0", "luxon": "^3.3.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 326b826..48f51fe 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -11,6 +11,9 @@ dependencies: async: specifier: ^3.2.4 version: 3.2.4 + csv-reader: + specifier: ^1.0.12 + version: 1.0.12 csv-stringify: specifier: ^6.4.0 version: 6.4.0 @@ -340,6 +343,11 @@ packages: which: 2.0.2 dev: true + /csv-reader@1.0.12: + resolution: {integrity: sha512-0AAgazKJUywtjvZbclNuovIiQY/WyvojWw15Y2k3kPixE+pDiOFnfg5FcH3CfDqqnrB2f3p5oPAc446EXD01Tw==} + engines: {node: '>=8.0.0'} + dev: false + /csv-stringify@6.4.0: resolution: {integrity: sha512-HQsw0QXiN5fdlO+R8/JzCZnR3Fqp8E87YVnhHlaPtNGJjt6ffbV0LpOkieIb1x6V1+xt878IYq77SpXHWAqKkA==} dev: false diff --git a/utilities/metadata-csv/modify.js b/utilities/metadata-csv/modify.js new file mode 100644 index 0000000..40980e2 --- /dev/null +++ b/utilities/metadata-csv/modify.js @@ -0,0 +1,80 @@ +// usage: +// node modify --csv input.csv [--debug] +// input.csv must have a header row and the first column must be the UUID +// and the second column is the item version +import fs from 'node:fs' +import { default as fetch, Headers } from 'node-fetch' +import rc from 'rc' +import { DOMParser as xmldom } from '@xmldom/xmldom' +import xpath from 'xpath' +import { default as CSVReader } from 'csv-reader' + +let options = rc('metadata-csv', {}) + +const headers = new Headers({ + 'X-Authorization': 'access_token=' + options.token, + 'Accept': 'application/json', +}) + +// log messages only when debug=true +function debug(msg) { + if (options.debug) console.error(msg) +} + +// take header row and row of values and turn them into a hash +function makeChangesHash(columns, row) { + if (columns.length === 0) { + console.error('Error: could not find header row. Are you sure the first two columns are "uuid,version" (case insensitive)?') + process.exit(1) + } + let changes = {} + columns.forEach((col, idx) => { + // lowercase the uuid and version columns for consistent access later + if (idx === 0 || idx === 1) col = col.toLowerCase() + changes[col] = row[idx] + }) + return changes +} + +function modifyItem(item, changes ) { + let xml = new xmldom().parseFromString(item.metadata) + let paths = Object.keys(changes) + paths.splice(0, 2) + // @TODO how to find and modify XML nodes from the paths? + // can xpath help here? + console.log(paths) + // start with for path of paths look up & print path value vs. changed value +} + +function getItem(item) { + const url = `https://vault.cca.edu/api/item/${item.uuid}/${item.version}` + fetch(url, { headers: headers }) + .then(r => r.json()) + .then(data => { + // how EQUELLA does API errors + if (data.error) throw(data) + modifyItem(data, item) + }).catch(e => { + console.error(`Error fetching item ${url}`) + console.error('EQUELLA API Response:') + console.error(e) + }) +} + +let columns = [] +const inputStream = fs.createReadStream(options.csv, 'utf8') + +inputStream.pipe(new CSVReader({ trim: true })) + .on('data', (row)=> { + if (row[0].toLowerCase() === 'uuid' && row[1].toLowerCase() === 'version') { + columns = row + debug(`CSV columns are ${columns.join(', ')}`) + return + } else { + let changes = makeChangesHash(columns, row) + getItem(changes) + } + }) + .on('end', () => { + debug('Finished reading rows from CSV') + })