From c65a638fa956c7a43208643b96ce1f7a4542b334 Mon Sep 17 00:00:00 2001 From: Richard Towers Date: Sat, 9 Nov 2024 13:29:00 +0000 Subject: [PATCH 1/4] Add micromark-extension-abbr This is basically the same as the version in https://github.com/richardTowers/remark-abbr, except I've converted the tests from node:test to jest to be consistent with the rest of zmarkdown. This only exports a syntax extension - there's no HTML extension (because micromark doesn't currently support the hooks we'd need to implement one - see https://github.com/orgs/micromark/discussions/181 for more detail). It's only intended to be used in remark-abbr. --- packages/micromark-extension-abbr/.npmignore | 3 + packages/micromark-extension-abbr/README.md | 38 +++++ .../__tests__/plugin.test.js | 137 ++++++++++++++++ .../micromark-extension-abbr/lib/syntax.js | 154 ++++++++++++++++++ .../micromark-extension-abbr/package.json | 47 ++++++ 5 files changed, 379 insertions(+) create mode 100644 packages/micromark-extension-abbr/.npmignore create mode 100644 packages/micromark-extension-abbr/README.md create mode 100644 packages/micromark-extension-abbr/__tests__/plugin.test.js create mode 100644 packages/micromark-extension-abbr/lib/syntax.js create mode 100644 packages/micromark-extension-abbr/package.json diff --git a/packages/micromark-extension-abbr/.npmignore b/packages/micromark-extension-abbr/.npmignore new file mode 100644 index 00000000..858c80b4 --- /dev/null +++ b/packages/micromark-extension-abbr/.npmignore @@ -0,0 +1,3 @@ +__tests__/ +specs/ +.npmignore diff --git a/packages/micromark-extension-abbr/README.md b/packages/micromark-extension-abbr/README.md new file mode 100644 index 00000000..890a0498 --- /dev/null +++ b/packages/micromark-extension-abbr/README.md @@ -0,0 +1,38 @@ +# `micromark-extension-abbr` + +**[micromark][]** extension that parses custom Markdown syntax to handle +abbreviations. + +This package provides the low-level modules for integrating with the micromark +tokenizer. + +## Install + +[npm][]: + +```sh +npm install micromark-extension-abbr +``` + +## API + +### `abbr` + +Support custom syntax to handle abbreviations. + +The export of `abbr` is an extension for the micromark parser (can be passed +in `extensions`). + +## License + +[MIT][license] © [Zeste de Savoir][zds] + + + +[license]: LICENCE + +[micromark]: https://github.com/micromark/micromark + +[npm]: https://docs.npmjs.com/cli/install + +[zds]: https://zestedesavoir.com diff --git a/packages/micromark-extension-abbr/__tests__/plugin.test.js b/packages/micromark-extension-abbr/__tests__/plugin.test.js new file mode 100644 index 00000000..d54865fd --- /dev/null +++ b/packages/micromark-extension-abbr/__tests__/plugin.test.js @@ -0,0 +1,137 @@ +import {preprocess, parse, postprocess} from 'micromark' +import {abbr, abbrTypes} from '../lib/syntax' + +describe('micromark-extension-abbr', () => { + it('parses definitions', () => { + const input = `*[HTML]: Hyper Text Markup Language` + const events = postprocess( + parse({extensions: [abbr]}) + .document() + .write(preprocess()(input, null, true)), + ) + const eventTypes = events.map((event) => [event[0], event[1].type]) + expect(eventTypes).toEqual( + // prettier-ignore + [ + [ 'enter', 'content' ], + [ 'enter', 'abbrDefinition' ], + [ 'enter', 'abbrDefinitionLabel' ], + [ 'enter', 'abbrDefinitionMarker' ], + [ 'exit', 'abbrDefinitionMarker' ], + [ 'enter', 'abbrDefinitionString' ], + [ 'enter', 'data' ], + [ 'exit', 'data' ], + [ 'exit', 'abbrDefinitionString' ], + [ 'enter', 'abbrDefinitionMarker' ], + [ 'exit', 'abbrDefinitionMarker' ], + [ 'exit', 'abbrDefinitionLabel' ], + [ 'enter', 'abbrDefinitionMarker' ], + [ 'exit', 'abbrDefinitionMarker' ], + [ 'enter', 'lineSuffix' ], + [ 'exit', 'lineSuffix' ], + [ 'enter', 'abbrDefinitionValueString' ], + [ 'enter', 'data' ], + [ 'exit', 'data' ], + [ 'exit', 'abbrDefinitionValueString' ], + [ 'exit', 'abbrDefinition' ], + [ 'exit', 'content' ], + ], + ) + }) + + it('parses definitions without whitespace', () => { + const input = `*[HTML]:Hyper Text Markup Language` + const events = postprocess( + parse({extensions: [abbr]}) + .document() + .write(preprocess()(input, null, true)), + ) + const eventTypes = events.map((event) => [event[0], event[1].type]) + expect(eventTypes).toEqual( + // prettier-ignore + [ + [ 'enter', 'content' ], + [ 'enter', 'abbrDefinition' ], + [ 'enter', 'abbrDefinitionLabel' ], + [ 'enter', 'abbrDefinitionMarker' ], + [ 'exit', 'abbrDefinitionMarker' ], + [ 'enter', 'abbrDefinitionString' ], + [ 'enter', 'data' ], + [ 'exit', 'data' ], + [ 'exit', 'abbrDefinitionString' ], + [ 'enter', 'abbrDefinitionMarker' ], + [ 'exit', 'abbrDefinitionMarker' ], + [ 'exit', 'abbrDefinitionLabel' ], + [ 'enter', 'abbrDefinitionMarker' ], + [ 'exit', 'abbrDefinitionMarker' ], + [ 'enter', 'abbrDefinitionValueString' ], + [ 'enter', 'data' ], + [ 'exit', 'data' ], + [ 'exit', 'abbrDefinitionValueString' ], + [ 'exit', 'abbrDefinition' ], + [ 'exit', 'content' ], + ], + ) + }) + + it('does not parse definitions with empty labels', () => { + const input = `*[]: Empty` + const events = postprocess( + parse({extensions: [abbr]}) + .document() + .write(preprocess()(input, null, true)), + ) + const abbrDefinitions = events.filter( + (event) => event[1].type === abbrTypes.abbrDefinition, + ) + expect(abbrDefinitions).toEqual([]) + }) + + it( + 'does not parse definitions with parens instead of square brackets', + () => { + const input = `*(HTML): Hyper Text Markup Language` + const events = postprocess( + parse({extensions: [abbr]}) + .document() + .write(preprocess()(input, null, true)), + ) + const abbrDefinitions = events.filter( + (event) => event[1].type === abbrTypes.abbrDefinition, + ) + expect(abbrDefinitions).toEqual([]) + }, + ) + + it('does not parse definitions without colons', () => { + const input = `*[HTML]; Hyper Text Markup Language` + const events = postprocess( + parse({extensions: [abbr]}) + .document() + .write(preprocess()(input, null, true)), + ) + const abbrDefinitions = events.filter( + (event) => event[1].type === abbrTypes.abbrDefinition, + ) + expect(abbrDefinitions).toEqual([]) + }) + + it('parses definitions with labels containing spaces and punctuation', () => { + const input = `*[MV(VSL) (E&W)]: Motor Vehicles (Variation of Speed Limits) (England & Wales) Regulations` + const events = postprocess( + parse({extensions: [abbr]}) + .document() + .write(preprocess()(input, null, true)), + ) + const abbrDefinitionString = events.find( + (event) => event[1].type === abbrTypes.abbrDefinitionString, + ) + if (abbrDefinitionString === undefined) { + throw new Error('could not find an abbrDefinitionString') + } else { + const [_, token, context] = abbrDefinitionString + expect(context.sliceSerialize(token)).toEqual('MV(VSL) (E&W)') + } + }, + ) +}) diff --git a/packages/micromark-extension-abbr/lib/syntax.js b/packages/micromark-extension-abbr/lib/syntax.js new file mode 100644 index 00000000..1c9b6cb5 --- /dev/null +++ b/packages/micromark-extension-abbr/lib/syntax.js @@ -0,0 +1,154 @@ +/** + * @import { + * ConstructRecord, + * Extension, + * State, + * Tokenizer, + * } from 'micromark-util-types' + */ +import { codes, types } from 'micromark-util-symbol' +import { factoryWhitespace } from 'micromark-factory-whitespace' +import { factoryLabel } from 'micromark-factory-label' +import { + markdownLineEnding, + markdownLineEndingOrSpace +} from 'micromark-util-character' + +/** + * @type {{ + * abbrDefinition: 'abbrDefinition', + * abbrDefinitionLabel: 'abbrDefinitionLabel', + * abbrDefinitionMarker: 'abbrDefinitionMarker', + * abbrDefinitionString: 'abbrDefinitionString', + * abbrDefinitionValueString: 'abbrDefinitionValueString', + * }} + */ +export const abbrTypes = { + abbrDefinition: 'abbrDefinition', + abbrDefinitionLabel: 'abbrDefinitionLabel', + abbrDefinitionMarker: 'abbrDefinitionMarker', + abbrDefinitionString: 'abbrDefinitionString', + abbrDefinitionValueString: 'abbrDefinitionValueString' +} + +/** + * @type {Tokenizer} + */ +function abbrDefinitionTokenize (effects, ok, nok) { + const self = this + + return start + + /** + * @type {State} + * + * *[HTML]: Hyper Text Markup Language + * ^ + */ + function start (code) { + effects.enter(abbrTypes.abbrDefinition) + effects.consume(code) + return abbrKeyDefinition + } + + /** + * @type {State} + * + * *[HTML]: Hyper Text Markup Language + * ^ + */ + function abbrKeyDefinition (code) { + if (code === codes.leftSquareBracket) { + return factoryLabel.call( + self, + effects, + abbrKeyValueSeparator, + nok, + // @ts-ignore + abbrTypes.abbrDefinitionLabel, + abbrTypes.abbrDefinitionMarker, + abbrTypes.abbrDefinitionString + )(code) + } + + return nok(code) + } + + /** + * @type {State} + * + * *[HTML]: Hyper Text Markup Language + * ^ + */ + function abbrKeyValueSeparator (code) { + if (code === codes.colon) { + effects.enter(abbrTypes.abbrDefinitionMarker) + effects.consume(code) + effects.exit(abbrTypes.abbrDefinitionMarker) + return abbrKeyValueSeparatorAfter + } + + return nok(code) + } + + /** + * @type {State} + * + * *[HTML]: Hyper Text Markup Language + * ^ + */ + function abbrKeyValueSeparatorAfter (code) { + // Note: whitespace is optional. + const isSpace = markdownLineEndingOrSpace(code) + return isSpace + ? factoryWhitespace(effects, abbrValueStart)(code) + : abbrValueStart(code) + } + + /** + * @type {State} + * + * *[HTML]: Hyper Text Markup Language + * ^ + */ + function abbrValueStart (code) { + effects.enter(abbrTypes.abbrDefinitionValueString) + effects.enter(types.chunkString, { contentType: 'string' }) + return abbrValue(code) + } + + /** + * @type {State} + * + * *[HTML]: Hyper Text Markup Language + * ^^^^^^^^^^^^^^^^^^^^^^^^^^ + */ + function abbrValue (code) { + if (markdownLineEnding(code) || code === codes.eof) { + effects.exit(types.chunkString) + effects.exit(abbrTypes.abbrDefinitionValueString) + effects.exit(abbrTypes.abbrDefinition) + return ok(code) + } + + effects.consume(code) + return abbrValue + } +} + +/** + * @type {ConstructRecord} + */ +const contentInitial = { + [codes.asterisk]: { + name: 'abbrDefinition', + tokenize: abbrDefinitionTokenize + } +} + +/** + * @type {Extension} + */ +export const abbr = { + contentInitial +} diff --git a/packages/micromark-extension-abbr/package.json b/packages/micromark-extension-abbr/package.json new file mode 100644 index 00000000..76abb05d --- /dev/null +++ b/packages/micromark-extension-abbr/package.json @@ -0,0 +1,47 @@ +{ + "name": "micromark-extension-abbr", + "version": "0.0.0", + "description": "Add Markdown syntax to handle abbreviations (syntax only)", + "type": "module", + "keywords": [ + "micromark", + "kbd", + "keyboard", + "plugin", + "extension" + ], + "author": "Stalone ", + "homepage": "https://github.com/zestedesavoir/zmarkdown/tree/master/packages/micromark-extension-abbr", + "license": "MIT", + "main": "lib/syntax.js", + "module": "lib/syntax.js", + "directories": { + "lib": "lib", + "test": "__tests__" + }, + "files": [ + "lib" + ], + "repository": { + "type": "git", + "url": "git+https://github.com/zestedesavoir/zmarkdown.git#master" + }, + "scripts": { + "pretest": "eslint .", + "test": "cross-env NODE_OPTIONS=--experimental-vm-modules jest", + "coverage": "cross-env NODE_OPTIONS=--experimental-vm-modules jest --coverage" + }, + "bugs": { + "url": "https://github.com/zestedesavoir/zmarkdown/issues" + }, + "dependencies": { + "micromark-factory-label": "^2.0.0", + "micromark-factory-whitespace": "^2.0.0", + "micromark-util-character": "^2.1.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "devDependencies": { + "micromark": "^4.0.0" + } +} From 02b4aaa74c52acf8a9570b9f49da62e154994503 Mon Sep 17 00:00:00 2001 From: Richard Towers Date: Sat, 9 Nov 2024 14:40:40 +0000 Subject: [PATCH 2/4] Implement remark-abbr and update tests This implementation is pretty much taken verbatim from https://github.com/richardTowers/remark-abbr. I didn't make much of an attempt to merge it with the existing code. The existing test suite is retained, and I haven't moved any of the tests over from richardTowers/remark-abbr yet. A couple of the snapshots needed to be updated, but in my view what the code does now is more correct than what was happening before, so I think they're okay. I haven't implemented the expandFirst functionality just yet. --- .../__tests__/__snapshots__/index.js.snap | 41 ++- packages/remark-abbr/__tests__/index.js | 66 ++-- packages/remark-abbr/lib/index.js | 315 +++++++++++------- packages/remark-abbr/package.json | 4 +- 4 files changed, 262 insertions(+), 164 deletions(-) diff --git a/packages/remark-abbr/__tests__/__snapshots__/index.js.snap b/packages/remark-abbr/__tests__/__snapshots__/index.js.snap index 39766982..7783e162 100644 --- a/packages/remark-abbr/__tests__/__snapshots__/index.js.snap +++ b/packages/remark-abbr/__tests__/__snapshots__/index.js.snap @@ -1,30 +1,29 @@ // Jest Snapshot v1, https://goo.gl/fbAQLP exports[`compiles to markdown 1`] = ` -"_abbr_ HTML +"*abbr* HTML > HTML inside quote *[abbr]: abbreviation -*[HTML]: HyperText Markup Language" + +*[noabbr]: explanation that does not match + +*[HTML]: HyperText Markup Language +" `; exports[`compiles to markdown 2`] = ` -"_abbr_ HTML +"*abbr* HTML > HTML inside quote *[abbr]: abbreviation -*[HTML]: HyperText Markup Language" -`; -exports[`compiles to markdown 3`] = ` -"_abbr_ HTML - -> HTML inside quote +*[noabbr]: explanation that does not match -*[abbr]: abbreviation -*[HTML]: HyperText Markup Language" +*[HTML]: HyperText Markup Language +" `; exports[`empty object does not break with references in their own paragraphs 1`] = `"

Here is a test featuring abc and def

"`; @@ -46,10 +45,16 @@ exports[`empty object passes the retro test 2`] = ` The HTML specification is maintained by the W3C. -*[ABBR]: Abbreviation *[REF]: Reference + +*[ABBR]: This gets overridden by the next one. + +*[ABBR]: Abbreviation + *[HTML]: Hyper Text Markup Language -*[W3C]: World Wide Web Consortium" + +*[W3C]: World Wide Web Consortium +" `; exports[`empty object passes the second regression test 1`] = ` @@ -120,10 +125,16 @@ exports[`no-config passes the retro test 2`] = ` The HTML specification is maintained by the W3C. -*[ABBR]: Abbreviation *[REF]: Reference + +*[ABBR]: This gets overridden by the next one. + +*[ABBR]: Abbreviation + *[HTML]: Hyper Text Markup Language -*[W3C]: World Wide Web Consortium" + +*[W3C]: World Wide Web Consortium +" `; exports[`no-config passes the second regression test 1`] = ` diff --git a/packages/remark-abbr/__tests__/index.js b/packages/remark-abbr/__tests__/index.js index c32252ec..1195058d 100644 --- a/packages/remark-abbr/__tests__/index.js +++ b/packages/remark-abbr/__tests__/index.js @@ -7,28 +7,40 @@ import remarkStringify from 'remark-stringify' import remarkAbbr from '../lib/index' -const render = (text, config) => unified() - .use(reParse) - .use(remarkAbbr, config) - .use(remark2rehype) - .use(stringify) - .processSync(text) - -const renderToMarkdown = (text, config) => unified() - .use(reParse) - .use(remarkStringify) - .use(remarkAbbr, config) - .processSync(text) +const render = (text, config) => { + const result = unified() + .use(reParse) + .use(remarkAbbr, config) + .use(remark2rehype, { + handlers: { + abbrDefinition: () => undefined, + } + }) + .use(stringify) + .processSync(text) + return String(result) +} + +const renderToMarkdown = (text, config) => { + const result = unified() + .use(reParse) + .use(remarkAbbr, config) + .use(remarkStringify) + .processSync(text) + + return String(result) +} const configToTest = { 'no-config': undefined, 'empty object': {}, - expandFirst: {expandFirst: true}, + // TODO - add support for expandFirst + // expandFirst: {expandFirst: true}, } for (const [configName, config] of Object.entries(configToTest)) { it(`${configName} renders references`, () => { - const {contents} = render(dedent` + const contents = render(dedent` This is an abbreviation: REF. ref and REFERENCE should be ignored. @@ -45,7 +57,7 @@ for (const [configName, config] of Object.entries(configToTest)) { it(`${configName} passes the first regression test`, () => { - const {contents} = render(dedent` + const contents = render(dedent` The HTML specification is maintained by the W3C:\ [link](https://w3c.github.io/html/), this line had an abbr before link. @@ -59,7 +71,7 @@ for (const [configName, config] of Object.entries(configToTest)) { }) it(`${configName} passes the second regression test`, () => { - const {contents} = render(dedent` + const contents = render(dedent` The HTML specification is maintained by the W3C:\ [link](https://w3c.github.io/html/), this line had an abbr before **link** HTML. @@ -85,15 +97,15 @@ for (const [configName, config] of Object.entries(configToTest)) { *[W3C]: World Wide Web Consortium ` - const {contents: html} = render(input) + const html = render(input) expect(html).toMatchSnapshot() - const {contents: markdown} = renderToMarkdown(input) + const markdown = renderToMarkdown(input) expect(markdown).toMatchSnapshot() }) it(`${configName} no reference`, () => { - const {contents} = render(dedent` + const contents = render(dedent` No reference! `, config) @@ -110,17 +122,17 @@ for (const [configName, config] of Object.entries(configToTest)) { *[noabbr]: explanation that does not match *[HTML]: HyperText Markup Language ` - const {contents} = renderToMarkdown(md) + const contents = renderToMarkdown(md) expect(contents).toMatchSnapshot() - const contents1 = renderToMarkdown(md).contents - const contents2 = renderToMarkdown(contents1).contents + const contents1 = renderToMarkdown(md) + const contents2 = renderToMarkdown(contents1) expect(contents1).toBe(contents2) }) it(`${configName} handles abbreviations ending with a period`, () => { - const {contents} = render(dedent` + const contents = render(dedent` A.B.C. and C-D%F. foo *[A.B.C.]: ref1 @@ -132,7 +144,7 @@ for (const [configName, config] of Object.entries(configToTest)) { }) it(`${configName} does not parse words starting with abbr`, () => { - const {contents} = render(dedent` + const contents = render(dedent` ABC ABC ABC *[AB]: ref1 @@ -142,7 +154,7 @@ for (const [configName, config] of Object.entries(configToTest)) { }) it(`${configName} does not parse words ending with abbr`, () => { - const {contents} = render(dedent` + const contents = render(dedent` ABC ABC ABC *[BC]: ref1 @@ -152,7 +164,7 @@ for (const [configName, config] of Object.entries(configToTest)) { }) it(`${configName} does not parse words containing abbr`, () => { - const {contents} = render(dedent` + const contents = render(dedent` ABC ABC ABC *[B]: ref1 @@ -162,7 +174,7 @@ for (const [configName, config] of Object.entries(configToTest)) { }) it(`${configName} does not break with references in their own paragraphs`, () => { - const {contents} = render(dedent` + const contents = render(dedent` Here is a test featuring abc and def *[abc]: A B C diff --git a/packages/remark-abbr/lib/index.js b/packages/remark-abbr/lib/index.js index d496cf95..d12ef1ab 100644 --- a/packages/remark-abbr/lib/index.js +++ b/packages/remark-abbr/lib/index.js @@ -1,152 +1,227 @@ -import { visit } from 'unist-util-visit' +import { SKIP, CONTINUE, visit } from 'unist-util-visit' +import { abbr, abbrTypes } from 'micromark-extension-abbr' -export default function plugin (options) { - const opts = options || {} - const expandFirst = opts.expandFirst - - function locator (value, fromIndex) { - return value.indexOf('*[', fromIndex) +function splitTextByAbbr (textNode, abbrDefinitions) { + const uniqueAbbreviationMap = new Map() + for (const abbreviation of abbrDefinitions) { + uniqueAbbreviationMap.set(abbreviation.identifier, abbreviation) } - function inlineTokenizer (eat, value, silent) { - const regex = /[*]\[([^\]]*)\]:\s*(.+)\n*/ - const keep = regex.exec(value) - - /* istanbul ignore if - never used (yet) */ - if (silent) return silent - if (!keep || keep.index !== 0) return + const uniqueAbbreviations = [...uniqueAbbreviationMap.values()] + + const matches = uniqueAbbreviations + .map( + (abbr) => + /** @type {const} */ ([abbr, textNode.value.indexOf(abbr.identifier)]) + ) + .filter(([_abbr, index]) => index >= 0) + .map(([abbr, index]) => { + const start = index + const end = index + abbr.identifier.length - 1 + return { + abbr, + start, + end, + prevChar: textNode.value[start - 1], + nextChar: textNode.value[end + 1] + } + }) + .filter((match) => + // We don't want to match "HTML" inside strings like "HHHHTMLLLLLL", so check that the + // surrounding characters are either undefined (i.e. start of string / end of string) + // or non-word characters + [match.prevChar, match.nextChar].every( + (c) => c === undefined || /^\W$/.test(c) + ) + ) + .sort((l, r) => l.start - r.start) + + if (matches.length === 0) { + return [textNode] + } - const [matched, abbr, reference] = keep + const nodes = [] + let currentIndex = 0 + for (const match of matches) { + if (match.start > currentIndex) { + nodes.push({ + ...textNode, + value: textNode.value.slice(currentIndex, match.start), + position: textNode.position && { + start: updatePoint(textNode.position.start, currentIndex), + end: updatePoint(textNode.position.start, match.start) + } + }) + } - return eat(matched)({ + const abbrPosition = textNode.position && { + start: updatePoint(textNode.position.start, match.start), + end: updatePoint(textNode.position.start, match.end + 1) + } + const abbr = { type: 'abbr', - abbr, - reference, - children: [ - { type: 'text', value: abbr } - ], + value: match.abbr.value, + identifier: match.abbr.identifier, data: { hName: 'abbr', hProperties: { - title: reference - } - } - }) - } + title: match.abbr.value + }, + hChildren: [{ type: 'text', value: match.abbr.identifier }] + }, + position: abbrPosition + } + nodes.push(abbr) - function transformer (tree) { - const abbrs = {} - const emptyParagraphsToRemove = new Map() + // Move the position forwards + currentIndex = match.end + 1 + } - visit(tree, 'paragraph', find(abbrs, emptyParagraphsToRemove)) - emptyParagraphsToRemove.forEach((indices, key) => { - indices.reverse() - indices.forEach((index) => { - key.children.splice(index, 1) - }) + // If the final abbreviation wasn't at the very end of the value, + // add one final text node with the remainder of the value + if (currentIndex < textNode.value.length) { + nodes.push({ + ...textNode, + value: textNode.value.slice(currentIndex), + position: textNode.position && { + start: updatePoint(textNode.position.start, currentIndex), + end: updatePoint(textNode.position.end, 0) + } }) - - visit(tree, replace(abbrs)) } - function find (abbrs, emptyParagraphsToRemove) { - return function one (node, index, parent) { - for (let i = 0; i < node.children.length; i++) { - const child = node.children[i] - if (child.type !== 'abbr') continue - // Store abbr node for later use - abbrs[child.abbr] = child - node.children.splice(i, 1) - i -= 1 - } - // Keep track of empty paragraphs to remove - if (node.children.length === 0) { - const indices = emptyParagraphsToRemove.get(parent) || [] - indices.push(index) - emptyParagraphsToRemove.set(parent, indices) - } + return nodes + + function updatePoint (point, increment) { + return { + line: point.line, + column: point.column + increment, + offset: + point.offset === undefined + ? undefined + : point.offset + increment } } +} - function replace (abbrs) { - function escapeRegExp (str) { - return str.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, '\\$&') // eslint-disable-line no-useless-escape - } +/** + * Create an extension for `mdast-util-from-markdown` to enable abbreviations + * in markdown. + */ +export function abbrFromMarkdown () { + return { + enter: { + abbrDefinition: enterAbbrDefinition, + abbrDefinitionLabel: enterAbbrDefinitionLabel, + abbrDefinitionValueString: enterAbbrDefinitionValueString + }, + exit: { + abbrDefinition: exitAbbrDefinition, + abbrDefinitionLabel: exitAbbrDefinitionLabel, + abbrDefinitionValueString: exitAbbrDefinitionValueString + }, + transforms: [ + (tree) => { + const abbrDefinitions = tree.children.filter( + (x) => x.type === abbrTypes.abbrDefinition + ) + if (abbrDefinitions.length === 0) { + return tree + } - const pattern = Object.keys(abbrs).map(escapeRegExp).join('|') - const regex = new RegExp(`(\\b|\\W)(${pattern})(\\b|\\W)`) - const expanded = {} - - function one (node, index, parent) { - if (Object.keys(abbrs).length === 0) return - if (!node.children) return - - // If a text node is present in child nodes, check if an abbreviation is present - for (let c = 0; c < node.children.length; c++) { - const child = node.children[c] - if (node.type === 'abbr' || child.type !== 'text') continue - if (!regex.test(child.value)) continue - - // Transform node - const newTexts = child.value.split(regex) - - // Remove old text node - node.children.splice(c, 1) - - // Replace abbreviations - for (let i = 0; i < newTexts.length; i++) { - const content = newTexts[i] - if (Object.prototype.hasOwnProperty.call(abbrs, content)) { - const abbr = abbrs[content] - if (expandFirst && !expanded[content]) { - node.children.splice(c + i, 0, { - type: 'text', - value: `${abbr.reference} (${abbr.abbr})` - }) - expanded[content] = true - } else { - node.children.splice(c + i, 0, abbr) - } - } else { - node.children.splice(c + i, 0, { - type: 'text', - value: content - }) + visit(tree, null, (node, index, parent) => { + if (index === undefined || parent === undefined) { + return CONTINUE } - } + + if (node.type === 'text') { + const newNodes = splitTextByAbbr(node, abbrDefinitions) + parent.children.splice(index, 1, ...newNodes) + return SKIP + } + + return CONTINUE + }) } - } - return one + ] } - inlineTokenizer.locator = locator + function enterAbbrDefinition (token) { + this.enter( + { + type: abbrTypes.abbrDefinition, + value: '', + identifier: '' + }, + token + ) + } - const Parser = this.Parser + function enterAbbrDefinitionLabel () { + this.buffer() + } - // Inject inlineTokenizer - const inlineTokenizers = Parser.prototype.inlineTokenizers - const inlineMethods = Parser.prototype.inlineMethods - inlineTokenizers.abbr = inlineTokenizer - inlineMethods.splice(0, 0, 'abbr') + function exitAbbrDefinitionLabel () { + const label = this.resume() + const node = this.stack[this.stack.length - 1] + node.identifier = label + } - const Compiler = this.Compiler - if (Compiler) { - const visitors = Compiler.prototype.visitors - if (!visitors) return + function enterAbbrDefinitionValueString () { + this.buffer() + } - const abbrMap = {} - visitors.abbr = (node) => { - if (!abbrMap[node.abbr]) { - abbrMap[node.abbr] = `*[${node.abbr}]: ${node.reference}` - } - return `${node.abbr}` + function exitAbbrDefinitionValueString () { + const node = this.stack.find( + (node) => node.type === abbrTypes.abbrDefinition + ) + if (node !== undefined) { + node.value = this.resume() } + } + + function exitAbbrDefinition (token) { + this.exit(token) + } +} - const originalRootCompiler = visitors.root - visitors.root = function (node) { - return `${originalRootCompiler.apply(this, arguments)}\n${Object.values(abbrMap).join('\n')}` +/** + * Create an extension for `mdast-util-to-markdown` to enable abbreviations + * in markdown. + */ +export function abbrToMarkdown () { + return { + handlers: { + abbr: handleAbbr, + abbrDefinition: handleAbbrDefinition } } - return transformer + + function handleAbbr (node, _, state, info) { + return state.safe(node.identifier, info) + } + + function handleAbbrDefinition (node, _, state, info) { + return state.safe(`*[${node.identifier}]: ${node.value}`, info) + } } +export default function plugin (options) { + // TODO - add support for expand first, or document that it's being removed + // const opts = options || {} + // const expandFirst = opts.expandFirst + + const self = this + const data = self.data() + + const micromarkExtensions = + data.micromarkExtensions || (data.micromarkExtensions = []) + const fromMarkdownExtensions = + data.fromMarkdownExtensions || (data.fromMarkdownExtensions = []) + const toMarkdownExtensions = + data.toMarkdownExtensions || (data.toMarkdownExtensions = []) + + micromarkExtensions.push(abbr) + fromMarkdownExtensions.push(abbrFromMarkdown()) + toMarkdownExtensions.push(abbrToMarkdown()) +} diff --git a/packages/remark-abbr/package.json b/packages/remark-abbr/package.json index 062fe5c7..a581b84e 100644 --- a/packages/remark-abbr/package.json +++ b/packages/remark-abbr/package.json @@ -15,8 +15,8 @@ ], "scripts": { "pretest": "eslint .", - "test": "jest", - "coverage": "jest --coverage" + "test": "cross-env NODE_OPTIONS=--experimental-vm-modules jest", + "coverage": "cross-env NODE_OPTIONS=--experimental-vm-modules jest --coverage" }, "main": "lib/index.js", "module": "lib/index.js", From 3000ad2166ff293a5906f3e638768a2d6c3fa131 Mon Sep 17 00:00:00 2001 From: Richard Towers Date: Sat, 9 Nov 2024 16:02:55 +0000 Subject: [PATCH 3/4] Add support for expandFirst --- .../__tests__/__snapshots__/index.js.snap | 23 +++++++++++-- packages/remark-abbr/__tests__/index.js | 3 +- packages/remark-abbr/lib/index.js | 34 +++++++++++++++---- 3 files changed, 49 insertions(+), 11 deletions(-) diff --git a/packages/remark-abbr/__tests__/__snapshots__/index.js.snap b/packages/remark-abbr/__tests__/__snapshots__/index.js.snap index 7783e162..fdf40030 100644 --- a/packages/remark-abbr/__tests__/__snapshots__/index.js.snap +++ b/packages/remark-abbr/__tests__/__snapshots__/index.js.snap @@ -26,6 +26,19 @@ exports[`compiles to markdown 2`] = ` " `; +exports[`compiles to markdown 3`] = ` +"*abbr* HTML + +> HTML inside quote + +*[abbr]: abbreviation + +*[noabbr]: explanation that does not match + +*[HTML]: HyperText Markup Language +" +`; + exports[`empty object does not break with references in their own paragraphs 1`] = `"

Here is a test featuring abc and def

"`; exports[`empty object no reference 1`] = `"

No reference!

"`; @@ -88,10 +101,16 @@ exports[`expandFirst passes the retro test 2`] = ` The HTML specification is maintained by the W3C. -*[ABBR]: Abbreviation *[REF]: Reference + +*[ABBR]: This gets overridden by the next one. + +*[ABBR]: Abbreviation + *[HTML]: Hyper Text Markup Language -*[W3C]: World Wide Web Consortium" + +*[W3C]: World Wide Web Consortium +" `; exports[`expandFirst passes the second regression test 1`] = ` diff --git a/packages/remark-abbr/__tests__/index.js b/packages/remark-abbr/__tests__/index.js index 1195058d..2d0254ee 100644 --- a/packages/remark-abbr/__tests__/index.js +++ b/packages/remark-abbr/__tests__/index.js @@ -34,8 +34,7 @@ const renderToMarkdown = (text, config) => { const configToTest = { 'no-config': undefined, 'empty object': {}, - // TODO - add support for expandFirst - // expandFirst: {expandFirst: true}, + expandFirst: {expandFirst: true}, } for (const [configName, config] of Object.entries(configToTest)) { diff --git a/packages/remark-abbr/lib/index.js b/packages/remark-abbr/lib/index.js index d12ef1ab..a5b12e1e 100644 --- a/packages/remark-abbr/lib/index.js +++ b/packages/remark-abbr/lib/index.js @@ -1,7 +1,8 @@ import { SKIP, CONTINUE, visit } from 'unist-util-visit' import { abbr, abbrTypes } from 'micromark-extension-abbr' -function splitTextByAbbr (textNode, abbrDefinitions) { +function splitTextByAbbr (textNode, abbrDefinitions, seenAbbreviations, opts) { + const expandFirst = opts.expandFirst || false const uniqueAbbreviationMap = new Map() for (const abbreviation of abbrDefinitions) { uniqueAbbreviationMap.set(abbreviation.identifier, abbreviation) @@ -35,6 +36,11 @@ function splitTextByAbbr (textNode, abbrDefinitions) { ) ) .sort((l, r) => l.start - r.start) + .map(match => { + const firstOfItsKind = !seenAbbreviations.has(match.abbr.identifier) + seenAbbreviations.add(match.abbr.identifier) + return { ...match, firstOfItsKind } + }) if (matches.length === 0) { return [textNode] @@ -53,6 +59,14 @@ function splitTextByAbbr (textNode, abbrDefinitions) { } }) } + const shouldExpand = expandFirst && match.firstOfItsKind + if (shouldExpand) { + // Add a text node for the expanded definition, up to the opening paren + nodes.push({ + ...textNode, + value: match.abbr.value + ' (' + }) + } const abbrPosition = textNode.position && { start: updatePoint(textNode.position.start, match.start), @@ -73,6 +87,13 @@ function splitTextByAbbr (textNode, abbrDefinitions) { } nodes.push(abbr) + if (shouldExpand) { + // Add a closing paren text node + nodes.push({ + type: 'text', + value: ')' + }) + } // Move the position forwards currentIndex = match.end + 1 } @@ -108,7 +129,7 @@ function splitTextByAbbr (textNode, abbrDefinitions) { * Create an extension for `mdast-util-from-markdown` to enable abbreviations * in markdown. */ -export function abbrFromMarkdown () { +export function abbrFromMarkdown (opts) { return { enter: { abbrDefinition: enterAbbrDefinition, @@ -129,13 +150,14 @@ export function abbrFromMarkdown () { return tree } + const seenAbbreviations = new Set() visit(tree, null, (node, index, parent) => { if (index === undefined || parent === undefined) { return CONTINUE } if (node.type === 'text') { - const newNodes = splitTextByAbbr(node, abbrDefinitions) + const newNodes = splitTextByAbbr(node, abbrDefinitions, seenAbbreviations, opts) parent.children.splice(index, 1, ...newNodes) return SKIP } @@ -207,9 +229,7 @@ export function abbrToMarkdown () { } export default function plugin (options) { - // TODO - add support for expand first, or document that it's being removed - // const opts = options || {} - // const expandFirst = opts.expandFirst + const opts = options || {} const self = this const data = self.data() @@ -222,6 +242,6 @@ export default function plugin (options) { data.toMarkdownExtensions || (data.toMarkdownExtensions = []) micromarkExtensions.push(abbr) - fromMarkdownExtensions.push(abbrFromMarkdown()) + fromMarkdownExtensions.push(abbrFromMarkdown(opts)) toMarkdownExtensions.push(abbrToMarkdown()) } From 272201f5e6713af2776d78d3983b67d8d615f9cc Mon Sep 17 00:00:00 2001 From: Richard Towers Date: Sat, 9 Nov 2024 16:12:58 +0000 Subject: [PATCH 4/4] Run npm i --- package-lock.json | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/package-lock.json b/package-lock.json index 9b4a6b13..60d9221f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8486,7 +8486,6 @@ "version": "2.0.3", "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz", "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==", - "dev": true, "engines": { "node": ">=6" } @@ -8522,7 +8521,6 @@ "version": "1.1.0", "resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz", "integrity": "sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==", - "dev": true, "dependencies": { "dequal": "^2.0.0" }, @@ -16666,6 +16664,10 @@ "micromark-util-types": "^2.0.0" } }, + "node_modules/micromark-extension-abbr": { + "resolved": "packages/micromark-extension-abbr", + "link": true + }, "node_modules/micromark-extension-gfm": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/micromark-extension-gfm/-/micromark-extension-gfm-3.0.0.tgz", @@ -16824,7 +16826,6 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/micromark-factory-label/-/micromark-factory-label-2.0.0.tgz", "integrity": "sha512-RR3i96ohZGde//4WSe/dJsxOX6vxIg9TimLAS3i4EhBAFx8Sm5SmqVfR8E87DPSR31nEAjZfbt91OMZWcNgdZw==", - "dev": true, "funding": [ { "type": "GitHub Sponsors", @@ -16846,7 +16847,6 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/micromark-factory-space/-/micromark-factory-space-2.0.0.tgz", "integrity": "sha512-TKr+LIDX2pkBJXFLzpyPyljzYK3MtmllMUMODTQJIUfDGncESaqB90db9IAUcz4AZAJFdd8U9zOp9ty1458rxg==", - "dev": true, "funding": [ { "type": "GitHub Sponsors", @@ -16888,7 +16888,6 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/micromark-factory-whitespace/-/micromark-factory-whitespace-2.0.0.tgz", "integrity": "sha512-28kbwaBjc5yAI1XadbdPYHX/eDnqaUFVikLwrO7FDnKG7lpgxnvk/XGRhX/PN0mOZ+dBSZ+LgunHS+6tYQAzhA==", - "dev": true, "funding": [ { "type": "GitHub Sponsors", @@ -25367,6 +25366,20 @@ "unist-util-visit": "^2.0.3" } }, + "packages/micromark-extension-abbr": { + "version": "0.0.0", + "license": "MIT", + "dependencies": { + "micromark-factory-label": "^2.0.0", + "micromark-factory-whitespace": "^2.0.0", + "micromark-util-character": "^2.1.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "devDependencies": { + "micromark": "^4.0.0" + } + }, "packages/micromark-extension-iframes": { "version": "0.0.0", "license": "MIT",