From c65a638fa956c7a43208643b96ce1f7a4542b334 Mon Sep 17 00:00:00 2001
From: Richard Towers <richard.towers@digital.cabinet-office.gov.uk>
Date: Sat, 9 Nov 2024 13:29:00 +0000
Subject: [PATCH 1/4] Add micromark-extension-abbr

This is basically the same as the version in
https://github.com/richardTowers/remark-abbr, except I've converted the
tests from node:test to jest to be consistent with the rest of
zmarkdown.

This only exports a syntax extension - there's no HTML extension
(because micromark doesn't currently support the hooks we'd need to
implement one - see https://github.com/orgs/micromark/discussions/181
for more detail). It's only intended to be used in remark-abbr.
---
 packages/micromark-extension-abbr/.npmignore  |   3 +
 packages/micromark-extension-abbr/README.md   |  38 +++++
 .../__tests__/plugin.test.js                  | 137 ++++++++++++++++
 .../micromark-extension-abbr/lib/syntax.js    | 154 ++++++++++++++++++
 .../micromark-extension-abbr/package.json     |  47 ++++++
 5 files changed, 379 insertions(+)
 create mode 100644 packages/micromark-extension-abbr/.npmignore
 create mode 100644 packages/micromark-extension-abbr/README.md
 create mode 100644 packages/micromark-extension-abbr/__tests__/plugin.test.js
 create mode 100644 packages/micromark-extension-abbr/lib/syntax.js
 create mode 100644 packages/micromark-extension-abbr/package.json

diff --git a/packages/micromark-extension-abbr/.npmignore b/packages/micromark-extension-abbr/.npmignore
new file mode 100644
index 00000000..858c80b4
--- /dev/null
+++ b/packages/micromark-extension-abbr/.npmignore
@@ -0,0 +1,3 @@
+__tests__/
+specs/
+.npmignore
diff --git a/packages/micromark-extension-abbr/README.md b/packages/micromark-extension-abbr/README.md
new file mode 100644
index 00000000..890a0498
--- /dev/null
+++ b/packages/micromark-extension-abbr/README.md
@@ -0,0 +1,38 @@
+# `micromark-extension-abbr`
+
+**[micromark][]** extension that parses custom Markdown syntax to handle
+abbreviations.
+
+This package provides the low-level modules for integrating with the micromark
+tokenizer.
+
+## Install
+
+[npm][]:
+
+```sh
+npm install micromark-extension-abbr
+```
+
+## API
+
+### `abbr`
+
+Support custom syntax to handle abbreviations.
+
+The export of `abbr` is an extension for the micromark parser (can be passed
+in `extensions`).
+
+## License
+
+[MIT][license] © [Zeste de Savoir][zds]
+
+<!-- Definitions -->
+
+[license]: LICENCE
+
+[micromark]: https://github.com/micromark/micromark
+
+[npm]: https://docs.npmjs.com/cli/install
+
+[zds]: https://zestedesavoir.com
diff --git a/packages/micromark-extension-abbr/__tests__/plugin.test.js b/packages/micromark-extension-abbr/__tests__/plugin.test.js
new file mode 100644
index 00000000..d54865fd
--- /dev/null
+++ b/packages/micromark-extension-abbr/__tests__/plugin.test.js
@@ -0,0 +1,137 @@
+import {preprocess, parse, postprocess} from 'micromark'
+import {abbr, abbrTypes} from '../lib/syntax'
+
+describe('micromark-extension-abbr', () => {
+  it('parses definitions', () => {
+    const input = `*[HTML]: Hyper Text Markup Language`
+    const events = postprocess(
+      parse({extensions: [abbr]})
+        .document()
+        .write(preprocess()(input, null, true)),
+    )
+    const eventTypes = events.map((event) => [event[0], event[1].type])
+    expect(eventTypes).toEqual(
+      // prettier-ignore
+      [
+        [ 'enter', 'content' ],
+          [ 'enter', 'abbrDefinition' ],
+            [ 'enter', 'abbrDefinitionLabel' ],
+              [ 'enter', 'abbrDefinitionMarker' ],
+              [ 'exit', 'abbrDefinitionMarker' ],
+              [ 'enter', 'abbrDefinitionString' ],
+                [ 'enter', 'data' ],
+                [ 'exit', 'data' ],
+              [ 'exit', 'abbrDefinitionString' ],
+              [ 'enter', 'abbrDefinitionMarker' ],
+              [ 'exit', 'abbrDefinitionMarker' ],
+            [ 'exit', 'abbrDefinitionLabel' ],
+            [ 'enter', 'abbrDefinitionMarker' ],
+            [ 'exit', 'abbrDefinitionMarker' ],
+            [ 'enter', 'lineSuffix' ],
+            [ 'exit', 'lineSuffix' ],
+            [ 'enter', 'abbrDefinitionValueString' ],
+              [ 'enter', 'data' ],
+              [ 'exit', 'data' ],
+            [ 'exit', 'abbrDefinitionValueString' ],
+          [ 'exit', 'abbrDefinition' ],
+        [ 'exit', 'content' ],
+    ],
+    )
+  })
+
+  it('parses definitions without whitespace', () => {
+    const input = `*[HTML]:Hyper Text Markup Language`
+    const events = postprocess(
+      parse({extensions: [abbr]})
+        .document()
+        .write(preprocess()(input, null, true)),
+    )
+    const eventTypes = events.map((event) => [event[0], event[1].type])
+    expect(eventTypes).toEqual(
+      // prettier-ignore
+      [
+        [ 'enter', 'content' ],
+          [ 'enter', 'abbrDefinition' ],
+            [ 'enter', 'abbrDefinitionLabel' ],
+              [ 'enter', 'abbrDefinitionMarker' ],
+              [ 'exit', 'abbrDefinitionMarker' ],
+              [ 'enter', 'abbrDefinitionString' ],
+                [ 'enter', 'data' ],
+                [ 'exit', 'data' ],
+              [ 'exit', 'abbrDefinitionString' ],
+              [ 'enter', 'abbrDefinitionMarker' ],
+              [ 'exit', 'abbrDefinitionMarker' ],
+            [ 'exit', 'abbrDefinitionLabel' ],
+            [ 'enter', 'abbrDefinitionMarker' ],
+            [ 'exit', 'abbrDefinitionMarker' ],
+            [ 'enter', 'abbrDefinitionValueString' ],
+              [ 'enter', 'data' ],
+              [ 'exit', 'data' ],
+            [ 'exit', 'abbrDefinitionValueString' ],
+          [ 'exit', 'abbrDefinition' ],
+        [ 'exit', 'content' ],
+    ],
+    )
+  })
+
+  it('does not parse definitions with empty labels', () => {
+    const input = `*[]: Empty`
+    const events = postprocess(
+      parse({extensions: [abbr]})
+        .document()
+        .write(preprocess()(input, null, true)),
+    )
+    const abbrDefinitions = events.filter(
+      (event) => event[1].type === abbrTypes.abbrDefinition,
+    )
+    expect(abbrDefinitions).toEqual([])
+  })
+
+  it(
+    'does not parse definitions with parens instead of square brackets',
+    () => {
+      const input = `*(HTML): Hyper Text Markup Language`
+      const events = postprocess(
+        parse({extensions: [abbr]})
+          .document()
+          .write(preprocess()(input, null, true)),
+      )
+      const abbrDefinitions = events.filter(
+        (event) => event[1].type === abbrTypes.abbrDefinition,
+      )
+      expect(abbrDefinitions).toEqual([])
+    },
+  )
+
+  it('does not parse definitions without colons', () => {
+    const input = `*[HTML]; Hyper Text Markup Language`
+    const events = postprocess(
+      parse({extensions: [abbr]})
+        .document()
+        .write(preprocess()(input, null, true)),
+    )
+    const abbrDefinitions = events.filter(
+      (event) => event[1].type === abbrTypes.abbrDefinition,
+    )
+    expect(abbrDefinitions).toEqual([])
+  })
+
+  it('parses definitions with labels containing spaces and punctuation', () => {
+      const input = `*[MV(VSL) (E&W)]: Motor Vehicles (Variation of Speed Limits) (England & Wales) Regulations`
+      const events = postprocess(
+        parse({extensions: [abbr]})
+          .document()
+          .write(preprocess()(input, null, true)),
+      )
+      const abbrDefinitionString = events.find(
+        (event) => event[1].type === abbrTypes.abbrDefinitionString,
+      )
+      if (abbrDefinitionString === undefined) {
+        throw new Error('could not find an abbrDefinitionString')
+      } else {
+        const [_, token, context] = abbrDefinitionString
+        expect(context.sliceSerialize(token)).toEqual('MV(VSL) (E&W)')
+      }
+    },
+  )
+})
diff --git a/packages/micromark-extension-abbr/lib/syntax.js b/packages/micromark-extension-abbr/lib/syntax.js
new file mode 100644
index 00000000..1c9b6cb5
--- /dev/null
+++ b/packages/micromark-extension-abbr/lib/syntax.js
@@ -0,0 +1,154 @@
+/**
+ * @import {
+ *   ConstructRecord,
+ *   Extension,
+ *   State,
+ *   Tokenizer,
+ * } from 'micromark-util-types'
+ */
+import { codes, types } from 'micromark-util-symbol'
+import { factoryWhitespace } from 'micromark-factory-whitespace'
+import { factoryLabel } from 'micromark-factory-label'
+import {
+  markdownLineEnding,
+  markdownLineEndingOrSpace
+} from 'micromark-util-character'
+
+/**
+ * @type {{
+ * abbrDefinition: 'abbrDefinition',
+ * abbrDefinitionLabel: 'abbrDefinitionLabel',
+ * abbrDefinitionMarker: 'abbrDefinitionMarker',
+ * abbrDefinitionString: 'abbrDefinitionString',
+ * abbrDefinitionValueString: 'abbrDefinitionValueString',
+ * }}
+ */
+export const abbrTypes = {
+  abbrDefinition: 'abbrDefinition',
+  abbrDefinitionLabel: 'abbrDefinitionLabel',
+  abbrDefinitionMarker: 'abbrDefinitionMarker',
+  abbrDefinitionString: 'abbrDefinitionString',
+  abbrDefinitionValueString: 'abbrDefinitionValueString'
+}
+
+/**
+ * @type {Tokenizer}
+ */
+function abbrDefinitionTokenize (effects, ok, nok) {
+  const self = this
+
+  return start
+
+  /**
+   * @type {State}
+   *
+   * *[HTML]: Hyper Text Markup Language
+   * ^
+   */
+  function start (code) {
+    effects.enter(abbrTypes.abbrDefinition)
+    effects.consume(code)
+    return abbrKeyDefinition
+  }
+
+  /**
+   * @type {State}
+   *
+   * *[HTML]: Hyper Text Markup Language
+   *  ^
+   */
+  function abbrKeyDefinition (code) {
+    if (code === codes.leftSquareBracket) {
+      return factoryLabel.call(
+        self,
+        effects,
+        abbrKeyValueSeparator,
+        nok,
+        // @ts-ignore
+        abbrTypes.abbrDefinitionLabel,
+        abbrTypes.abbrDefinitionMarker,
+        abbrTypes.abbrDefinitionString
+      )(code)
+    }
+
+    return nok(code)
+  }
+
+  /**
+   * @type {State}
+   *
+   * *[HTML]: Hyper Text Markup Language
+   *        ^
+   */
+  function abbrKeyValueSeparator (code) {
+    if (code === codes.colon) {
+      effects.enter(abbrTypes.abbrDefinitionMarker)
+      effects.consume(code)
+      effects.exit(abbrTypes.abbrDefinitionMarker)
+      return abbrKeyValueSeparatorAfter
+    }
+
+    return nok(code)
+  }
+
+  /**
+   * @type {State}
+   *
+   * *[HTML]: Hyper Text Markup Language
+   *         ^
+   */
+  function abbrKeyValueSeparatorAfter (code) {
+    // Note: whitespace is optional.
+    const isSpace = markdownLineEndingOrSpace(code)
+    return isSpace
+      ? factoryWhitespace(effects, abbrValueStart)(code)
+      : abbrValueStart(code)
+  }
+
+  /**
+   * @type {State}
+   *
+   * *[HTML]: Hyper Text Markup Language
+   *          ^
+   */
+  function abbrValueStart (code) {
+    effects.enter(abbrTypes.abbrDefinitionValueString)
+    effects.enter(types.chunkString, { contentType: 'string' })
+    return abbrValue(code)
+  }
+
+  /**
+   * @type {State}
+   *
+   * *[HTML]: Hyper Text Markup Language
+   *          ^^^^^^^^^^^^^^^^^^^^^^^^^^
+   */
+  function abbrValue (code) {
+    if (markdownLineEnding(code) || code === codes.eof) {
+      effects.exit(types.chunkString)
+      effects.exit(abbrTypes.abbrDefinitionValueString)
+      effects.exit(abbrTypes.abbrDefinition)
+      return ok(code)
+    }
+
+    effects.consume(code)
+    return abbrValue
+  }
+}
+
+/**
+ * @type {ConstructRecord}
+ */
+const contentInitial = {
+  [codes.asterisk]: {
+    name: 'abbrDefinition',
+    tokenize: abbrDefinitionTokenize
+  }
+}
+
+/**
+ * @type {Extension}
+ */
+export const abbr = {
+  contentInitial
+}
diff --git a/packages/micromark-extension-abbr/package.json b/packages/micromark-extension-abbr/package.json
new file mode 100644
index 00000000..76abb05d
--- /dev/null
+++ b/packages/micromark-extension-abbr/package.json
@@ -0,0 +1,47 @@
+{
+  "name": "micromark-extension-abbr",
+  "version": "0.0.0",
+  "description": "Add Markdown syntax to handle abbreviations (syntax only)",
+  "type": "module",
+  "keywords": [
+    "micromark",
+    "kbd",
+    "keyboard",
+    "plugin",
+    "extension"
+  ],
+  "author": "Stalone <stalone+zmd@boxph.one>",
+  "homepage": "https://github.com/zestedesavoir/zmarkdown/tree/master/packages/micromark-extension-abbr",
+  "license": "MIT",
+  "main": "lib/syntax.js",
+  "module": "lib/syntax.js",
+  "directories": {
+    "lib": "lib",
+    "test": "__tests__"
+  },
+  "files": [
+    "lib"
+  ],
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/zestedesavoir/zmarkdown.git#master"
+  },
+  "scripts": {
+    "pretest": "eslint .",
+    "test": "cross-env NODE_OPTIONS=--experimental-vm-modules jest",
+    "coverage": "cross-env NODE_OPTIONS=--experimental-vm-modules jest --coverage"
+  },
+  "bugs": {
+    "url": "https://github.com/zestedesavoir/zmarkdown/issues"
+  },
+  "dependencies": {
+    "micromark-factory-label": "^2.0.0",
+    "micromark-factory-whitespace": "^2.0.0",
+    "micromark-util-character": "^2.1.0",
+    "micromark-util-symbol": "^2.0.0",
+    "micromark-util-types": "^2.0.0"
+  },
+  "devDependencies": {
+    "micromark": "^4.0.0"
+  }
+}

From 02b4aaa74c52acf8a9570b9f49da62e154994503 Mon Sep 17 00:00:00 2001
From: Richard Towers <richard.towers@digital.cabinet-office.gov.uk>
Date: Sat, 9 Nov 2024 14:40:40 +0000
Subject: [PATCH 2/4] Implement remark-abbr and update tests

This implementation is pretty much taken verbatim from
https://github.com/richardTowers/remark-abbr. I didn't make much of an
attempt to merge it with the existing code.

The existing test suite is retained, and I haven't moved any of the
tests over from richardTowers/remark-abbr yet.

A couple of the snapshots needed to be updated, but in my view what the
code does now is more correct than what was happening before, so I think
they're okay.

I haven't implemented the expandFirst functionality just yet.
---
 .../__tests__/__snapshots__/index.js.snap     |  41 ++-
 packages/remark-abbr/__tests__/index.js       |  66 ++--
 packages/remark-abbr/lib/index.js             | 315 +++++++++++-------
 packages/remark-abbr/package.json             |   4 +-
 4 files changed, 262 insertions(+), 164 deletions(-)

diff --git a/packages/remark-abbr/__tests__/__snapshots__/index.js.snap b/packages/remark-abbr/__tests__/__snapshots__/index.js.snap
index 39766982..7783e162 100644
--- a/packages/remark-abbr/__tests__/__snapshots__/index.js.snap
+++ b/packages/remark-abbr/__tests__/__snapshots__/index.js.snap
@@ -1,30 +1,29 @@
 // Jest Snapshot v1, https://goo.gl/fbAQLP
 
 exports[`compiles to markdown 1`] = `
-"_abbr_ HTML
+"*abbr* HTML
 
 > HTML inside quote
 
 *[abbr]: abbreviation
-*[HTML]: HyperText Markup Language"
+
+*[noabbr]: explanation that does not match
+
+*[HTML]: HyperText Markup Language
+"
 `;
 
 exports[`compiles to markdown 2`] = `
-"_abbr_ HTML
+"*abbr* HTML
 
 > HTML inside quote
 
 *[abbr]: abbreviation
-*[HTML]: HyperText Markup Language"
-`;
 
-exports[`compiles to markdown 3`] = `
-"_abbr_ HTML
-
-> HTML inside quote
+*[noabbr]: explanation that does not match
 
-*[abbr]: abbreviation
-*[HTML]: HyperText Markup Language"
+*[HTML]: HyperText Markup Language
+"
 `;
 
 exports[`empty object does not break with references in their own paragraphs 1`] = `"<p>Here is a test featuring <abbr title="A B C">abc</abbr> and <abbr title="D E F">def</abbr></p>"`;
@@ -46,10 +45,16 @@ exports[`empty object passes the retro test 2`] = `
 
 The HTML specification is maintained by the W3C.
 
-*[ABBR]: Abbreviation
 *[REF]: Reference
+
+*[ABBR]: This gets overridden by the next one.
+
+*[ABBR]: Abbreviation
+
 *[HTML]: Hyper Text Markup Language
-*[W3C]: World Wide Web Consortium"
+
+*[W3C]: World Wide Web Consortium
+"
 `;
 
 exports[`empty object passes the second regression test 1`] = `
@@ -120,10 +125,16 @@ exports[`no-config passes the retro test 2`] = `
 
 The HTML specification is maintained by the W3C.
 
-*[ABBR]: Abbreviation
 *[REF]: Reference
+
+*[ABBR]: This gets overridden by the next one.
+
+*[ABBR]: Abbreviation
+
 *[HTML]: Hyper Text Markup Language
-*[W3C]: World Wide Web Consortium"
+
+*[W3C]: World Wide Web Consortium
+"
 `;
 
 exports[`no-config passes the second regression test 1`] = `
diff --git a/packages/remark-abbr/__tests__/index.js b/packages/remark-abbr/__tests__/index.js
index c32252ec..1195058d 100644
--- a/packages/remark-abbr/__tests__/index.js
+++ b/packages/remark-abbr/__tests__/index.js
@@ -7,28 +7,40 @@ import remarkStringify from 'remark-stringify'
 
 import remarkAbbr from '../lib/index'
 
-const render = (text, config) => unified()
-  .use(reParse)
-  .use(remarkAbbr, config)
-  .use(remark2rehype)
-  .use(stringify)
-  .processSync(text)
-
-const renderToMarkdown = (text, config) => unified()
-  .use(reParse)
-  .use(remarkStringify)
-  .use(remarkAbbr, config)
-  .processSync(text)
+const render = (text, config) => {
+  const result = unified()
+    .use(reParse)
+    .use(remarkAbbr, config)
+    .use(remark2rehype, {
+      handlers: {
+        abbrDefinition: () => undefined,
+      }
+    })
+    .use(stringify)
+    .processSync(text)
+  return String(result)
+}
+
+const renderToMarkdown = (text, config) => {
+  const result = unified()
+    .use(reParse)
+    .use(remarkAbbr, config)
+    .use(remarkStringify)
+    .processSync(text)
+
+  return String(result)
+}
 
 const configToTest = {
   'no-config': undefined,
   'empty object': {},
-  expandFirst: {expandFirst: true},
+  // TODO - add support for expandFirst
+  // expandFirst: {expandFirst: true},
 }
 
 for (const [configName, config] of Object.entries(configToTest)) {
   it(`${configName} renders references`, () => {
-    const {contents} = render(dedent`
+    const contents = render(dedent`
       This is an abbreviation: REF.
       ref and REFERENCE should be ignored.
 
@@ -45,7 +57,7 @@ for (const [configName, config] of Object.entries(configToTest)) {
 
 
   it(`${configName} passes the first regression test`, () => {
-    const {contents} = render(dedent`
+    const contents = render(dedent`
       The HTML specification is maintained by the W3C:\
       [link](https://w3c.github.io/html/), this line had an abbr before link.
 
@@ -59,7 +71,7 @@ for (const [configName, config] of Object.entries(configToTest)) {
   })
 
   it(`${configName} passes the second regression test`, () => {
-    const {contents} = render(dedent`
+    const contents = render(dedent`
       The HTML specification is maintained by the W3C:\
       [link](https://w3c.github.io/html/), this line had an abbr before **link** HTML.
 
@@ -85,15 +97,15 @@ for (const [configName, config] of Object.entries(configToTest)) {
       *[W3C]:  World Wide Web Consortium
     `
 
-    const {contents: html} = render(input)
+    const html = render(input)
     expect(html).toMatchSnapshot()
 
-    const {contents: markdown} = renderToMarkdown(input)
+    const markdown = renderToMarkdown(input)
     expect(markdown).toMatchSnapshot()
   })
 
   it(`${configName} no reference`, () => {
-    const {contents} = render(dedent`
+    const contents = render(dedent`
       No reference!
     `, config)
 
@@ -110,17 +122,17 @@ for (const [configName, config] of Object.entries(configToTest)) {
       *[noabbr]: explanation that does not match
       *[HTML]: HyperText Markup Language
     `
-    const {contents} = renderToMarkdown(md)
+    const contents = renderToMarkdown(md)
     expect(contents).toMatchSnapshot()
 
-    const contents1 = renderToMarkdown(md).contents
-    const contents2 = renderToMarkdown(contents1).contents
+    const contents1 = renderToMarkdown(md)
+    const contents2 = renderToMarkdown(contents1)
 
     expect(contents1).toBe(contents2)
   })
 
   it(`${configName} handles abbreviations ending with a period`, () => {
-    const {contents} = render(dedent`
+    const contents = render(dedent`
       A.B.C. and C-D%F. foo
 
       *[A.B.C.]: ref1
@@ -132,7 +144,7 @@ for (const [configName, config] of Object.entries(configToTest)) {
   })
 
   it(`${configName} does not parse words starting with abbr`, () => {
-    const {contents} = render(dedent`
+    const contents = render(dedent`
       ABC ABC ABC
 
       *[AB]: ref1
@@ -142,7 +154,7 @@ for (const [configName, config] of Object.entries(configToTest)) {
   })
 
   it(`${configName} does not parse words ending with abbr`, () => {
-    const {contents} = render(dedent`
+    const contents = render(dedent`
       ABC ABC ABC
 
       *[BC]: ref1
@@ -152,7 +164,7 @@ for (const [configName, config] of Object.entries(configToTest)) {
   })
 
   it(`${configName} does not parse words containing abbr`, () => {
-    const {contents} = render(dedent`
+    const contents = render(dedent`
       ABC ABC ABC
 
       *[B]: ref1
@@ -162,7 +174,7 @@ for (const [configName, config] of Object.entries(configToTest)) {
   })
 
   it(`${configName} does not break with references in their own paragraphs`, () => {
-    const {contents} = render(dedent`
+    const contents = render(dedent`
       Here is a test featuring abc and def
 
       *[abc]: A B C
diff --git a/packages/remark-abbr/lib/index.js b/packages/remark-abbr/lib/index.js
index d496cf95..d12ef1ab 100644
--- a/packages/remark-abbr/lib/index.js
+++ b/packages/remark-abbr/lib/index.js
@@ -1,152 +1,227 @@
-import { visit } from 'unist-util-visit'
+import { SKIP, CONTINUE, visit } from 'unist-util-visit'
+import { abbr, abbrTypes } from 'micromark-extension-abbr'
 
-export default function plugin (options) {
-  const opts = options || {}
-  const expandFirst = opts.expandFirst
-
-  function locator (value, fromIndex) {
-    return value.indexOf('*[', fromIndex)
+function splitTextByAbbr (textNode, abbrDefinitions) {
+  const uniqueAbbreviationMap = new Map()
+  for (const abbreviation of abbrDefinitions) {
+    uniqueAbbreviationMap.set(abbreviation.identifier, abbreviation)
   }
 
-  function inlineTokenizer (eat, value, silent) {
-    const regex = /[*]\[([^\]]*)\]:\s*(.+)\n*/
-    const keep = regex.exec(value)
-
-    /* istanbul ignore if - never used (yet) */
-    if (silent) return silent
-    if (!keep || keep.index !== 0) return
+  const uniqueAbbreviations = [...uniqueAbbreviationMap.values()]
+
+  const matches = uniqueAbbreviations
+    .map(
+      (abbr) =>
+        /** @type {const} */ ([abbr, textNode.value.indexOf(abbr.identifier)])
+    )
+    .filter(([_abbr, index]) => index >= 0)
+    .map(([abbr, index]) => {
+      const start = index
+      const end = index + abbr.identifier.length - 1
+      return {
+        abbr,
+        start,
+        end,
+        prevChar: textNode.value[start - 1],
+        nextChar: textNode.value[end + 1]
+      }
+    })
+    .filter((match) =>
+      // We don't want to match "HTML" inside strings like "HHHHTMLLLLLL", so check that the
+      // surrounding characters are either undefined (i.e. start of string / end of string)
+      // or non-word characters
+      [match.prevChar, match.nextChar].every(
+        (c) => c === undefined || /^\W$/.test(c)
+      )
+    )
+    .sort((l, r) => l.start - r.start)
+
+  if (matches.length === 0) {
+    return [textNode]
+  }
 
-    const [matched, abbr, reference] = keep
+  const nodes = []
+  let currentIndex = 0
+  for (const match of matches) {
+    if (match.start > currentIndex) {
+      nodes.push({
+        ...textNode,
+        value: textNode.value.slice(currentIndex, match.start),
+        position: textNode.position && {
+          start: updatePoint(textNode.position.start, currentIndex),
+          end: updatePoint(textNode.position.start, match.start)
+        }
+      })
+    }
 
-    return eat(matched)({
+    const abbrPosition = textNode.position && {
+      start: updatePoint(textNode.position.start, match.start),
+      end: updatePoint(textNode.position.start, match.end + 1)
+    }
+    const abbr = {
       type: 'abbr',
-      abbr,
-      reference,
-      children: [
-        { type: 'text', value: abbr }
-      ],
+      value: match.abbr.value,
+      identifier: match.abbr.identifier,
       data: {
         hName: 'abbr',
         hProperties: {
-          title: reference
-        }
-      }
-    })
-  }
+          title: match.abbr.value
+        },
+        hChildren: [{ type: 'text', value: match.abbr.identifier }]
+      },
+      position: abbrPosition
+    }
+    nodes.push(abbr)
 
-  function transformer (tree) {
-    const abbrs = {}
-    const emptyParagraphsToRemove = new Map()
+    // Move the position forwards
+    currentIndex = match.end + 1
+  }
 
-    visit(tree, 'paragraph', find(abbrs, emptyParagraphsToRemove))
-    emptyParagraphsToRemove.forEach((indices, key) => {
-      indices.reverse()
-      indices.forEach((index) => {
-        key.children.splice(index, 1)
-      })
+  // If the final abbreviation wasn't at the very end of the value,
+  // add one final text node with the remainder of the value
+  if (currentIndex < textNode.value.length) {
+    nodes.push({
+      ...textNode,
+      value: textNode.value.slice(currentIndex),
+      position: textNode.position && {
+        start: updatePoint(textNode.position.start, currentIndex),
+        end: updatePoint(textNode.position.end, 0)
+      }
     })
-
-    visit(tree, replace(abbrs))
   }
 
-  function find (abbrs, emptyParagraphsToRemove) {
-    return function one (node, index, parent) {
-      for (let i = 0; i < node.children.length; i++) {
-        const child = node.children[i]
-        if (child.type !== 'abbr') continue
-        // Store abbr node for later use
-        abbrs[child.abbr] = child
-        node.children.splice(i, 1)
-        i -= 1
-      }
-      // Keep track of empty paragraphs to remove
-      if (node.children.length === 0) {
-        const indices = emptyParagraphsToRemove.get(parent) || []
-        indices.push(index)
-        emptyParagraphsToRemove.set(parent, indices)
-      }
+  return nodes
+
+  function updatePoint (point, increment) {
+    return {
+      line: point.line,
+      column: point.column + increment,
+      offset:
+        point.offset === undefined
+          ? undefined
+          : point.offset + increment
     }
   }
+}
 
-  function replace (abbrs) {
-    function escapeRegExp (str) {
-      return str.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, '\\$&') // eslint-disable-line no-useless-escape
-    }
+/**
+ * Create an extension for `mdast-util-from-markdown` to enable abbreviations
+ * in markdown.
+ */
+export function abbrFromMarkdown () {
+  return {
+    enter: {
+      abbrDefinition: enterAbbrDefinition,
+      abbrDefinitionLabel: enterAbbrDefinitionLabel,
+      abbrDefinitionValueString: enterAbbrDefinitionValueString
+    },
+    exit: {
+      abbrDefinition: exitAbbrDefinition,
+      abbrDefinitionLabel: exitAbbrDefinitionLabel,
+      abbrDefinitionValueString: exitAbbrDefinitionValueString
+    },
+    transforms: [
+      (tree) => {
+        const abbrDefinitions = tree.children.filter(
+          (x) => x.type === abbrTypes.abbrDefinition
+        )
+        if (abbrDefinitions.length === 0) {
+          return tree
+        }
 
-    const pattern = Object.keys(abbrs).map(escapeRegExp).join('|')
-    const regex = new RegExp(`(\\b|\\W)(${pattern})(\\b|\\W)`)
-    const expanded = {}
-
-    function one (node, index, parent) {
-      if (Object.keys(abbrs).length === 0) return
-      if (!node.children) return
-
-      // If a text node is present in child nodes, check if an abbreviation is present
-      for (let c = 0; c < node.children.length; c++) {
-        const child = node.children[c]
-        if (node.type === 'abbr' || child.type !== 'text') continue
-        if (!regex.test(child.value)) continue
-
-        // Transform node
-        const newTexts = child.value.split(regex)
-
-        // Remove old text node
-        node.children.splice(c, 1)
-
-        // Replace abbreviations
-        for (let i = 0; i < newTexts.length; i++) {
-          const content = newTexts[i]
-          if (Object.prototype.hasOwnProperty.call(abbrs, content)) {
-            const abbr = abbrs[content]
-            if (expandFirst && !expanded[content]) {
-              node.children.splice(c + i, 0, {
-                type: 'text',
-                value: `${abbr.reference} (${abbr.abbr})`
-              })
-              expanded[content] = true
-            } else {
-              node.children.splice(c + i, 0, abbr)
-            }
-          } else {
-            node.children.splice(c + i, 0, {
-              type: 'text',
-              value: content
-            })
+        visit(tree, null, (node, index, parent) => {
+          if (index === undefined || parent === undefined) {
+            return CONTINUE
           }
-        }
+
+          if (node.type === 'text') {
+            const newNodes = splitTextByAbbr(node, abbrDefinitions)
+            parent.children.splice(index, 1, ...newNodes)
+            return SKIP
+          }
+
+          return CONTINUE
+        })
       }
-    }
-    return one
+    ]
   }
 
-  inlineTokenizer.locator = locator
+  function enterAbbrDefinition (token) {
+    this.enter(
+      {
+        type: abbrTypes.abbrDefinition,
+        value: '',
+        identifier: ''
+      },
+      token
+    )
+  }
 
-  const Parser = this.Parser
+  function enterAbbrDefinitionLabel () {
+    this.buffer()
+  }
 
-  // Inject inlineTokenizer
-  const inlineTokenizers = Parser.prototype.inlineTokenizers
-  const inlineMethods = Parser.prototype.inlineMethods
-  inlineTokenizers.abbr = inlineTokenizer
-  inlineMethods.splice(0, 0, 'abbr')
+  function exitAbbrDefinitionLabel () {
+    const label = this.resume()
+    const node = this.stack[this.stack.length - 1]
+    node.identifier = label
+  }
 
-  const Compiler = this.Compiler
-  if (Compiler) {
-    const visitors = Compiler.prototype.visitors
-    if (!visitors) return
+  function enterAbbrDefinitionValueString () {
+    this.buffer()
+  }
 
-    const abbrMap = {}
-    visitors.abbr = (node) => {
-      if (!abbrMap[node.abbr]) {
-        abbrMap[node.abbr] = `*[${node.abbr}]: ${node.reference}`
-      }
-      return `${node.abbr}`
+  function exitAbbrDefinitionValueString () {
+    const node = this.stack.find(
+      (node) => node.type === abbrTypes.abbrDefinition
+    )
+    if (node !== undefined) {
+      node.value = this.resume()
     }
+  }
+
+  function exitAbbrDefinition (token) {
+    this.exit(token)
+  }
+}
 
-    const originalRootCompiler = visitors.root
-    visitors.root = function (node) {
-      return `${originalRootCompiler.apply(this, arguments)}\n${Object.values(abbrMap).join('\n')}`
+/**
+ * Create an extension for `mdast-util-to-markdown` to enable abbreviations
+ * in markdown.
+ */
+export function abbrToMarkdown () {
+  return {
+    handlers: {
+      abbr: handleAbbr,
+      abbrDefinition: handleAbbrDefinition
     }
   }
-  return transformer
+
+  function handleAbbr (node, _, state, info) {
+    return state.safe(node.identifier, info)
+  }
+
+  function handleAbbrDefinition (node, _, state, info) {
+    return state.safe(`*[${node.identifier}]: ${node.value}`, info)
+  }
 }
 
+export default function plugin (options) {
+  // TODO - add support for expand first, or document that it's being removed
+  // const opts = options || {}
+  // const expandFirst = opts.expandFirst
+
+  const self = this
+  const data = self.data()
+
+  const micromarkExtensions =
+    data.micromarkExtensions || (data.micromarkExtensions = [])
+  const fromMarkdownExtensions =
+    data.fromMarkdownExtensions || (data.fromMarkdownExtensions = [])
+  const toMarkdownExtensions =
+    data.toMarkdownExtensions || (data.toMarkdownExtensions = [])
+
+  micromarkExtensions.push(abbr)
+  fromMarkdownExtensions.push(abbrFromMarkdown())
+  toMarkdownExtensions.push(abbrToMarkdown())
+}
diff --git a/packages/remark-abbr/package.json b/packages/remark-abbr/package.json
index 062fe5c7..a581b84e 100644
--- a/packages/remark-abbr/package.json
+++ b/packages/remark-abbr/package.json
@@ -15,8 +15,8 @@
   ],
   "scripts": {
     "pretest": "eslint .",
-    "test": "jest",
-    "coverage": "jest --coverage"
+    "test": "cross-env NODE_OPTIONS=--experimental-vm-modules jest",
+    "coverage": "cross-env NODE_OPTIONS=--experimental-vm-modules jest --coverage"
   },
   "main": "lib/index.js",
   "module": "lib/index.js",

From 3000ad2166ff293a5906f3e638768a2d6c3fa131 Mon Sep 17 00:00:00 2001
From: Richard Towers <richard.towers@digital.cabinet-office.gov.uk>
Date: Sat, 9 Nov 2024 16:02:55 +0000
Subject: [PATCH 3/4] Add support for expandFirst

---
 .../__tests__/__snapshots__/index.js.snap     | 23 +++++++++++--
 packages/remark-abbr/__tests__/index.js       |  3 +-
 packages/remark-abbr/lib/index.js             | 34 +++++++++++++++----
 3 files changed, 49 insertions(+), 11 deletions(-)

diff --git a/packages/remark-abbr/__tests__/__snapshots__/index.js.snap b/packages/remark-abbr/__tests__/__snapshots__/index.js.snap
index 7783e162..fdf40030 100644
--- a/packages/remark-abbr/__tests__/__snapshots__/index.js.snap
+++ b/packages/remark-abbr/__tests__/__snapshots__/index.js.snap
@@ -26,6 +26,19 @@ exports[`compiles to markdown 2`] = `
 "
 `;
 
+exports[`compiles to markdown 3`] = `
+"*abbr* HTML
+
+> HTML inside quote
+
+*[abbr]: abbreviation
+
+*[noabbr]: explanation that does not match
+
+*[HTML]: HyperText Markup Language
+"
+`;
+
 exports[`empty object does not break with references in their own paragraphs 1`] = `"<p>Here is a test featuring <abbr title="A B C">abc</abbr> and <abbr title="D E F">def</abbr></p>"`;
 
 exports[`empty object no reference 1`] = `"<p>No reference!</p>"`;
@@ -88,10 +101,16 @@ exports[`expandFirst passes the retro test 2`] = `
 
 The HTML specification is maintained by the W3C.
 
-*[ABBR]: Abbreviation
 *[REF]: Reference
+
+*[ABBR]: This gets overridden by the next one.
+
+*[ABBR]: Abbreviation
+
 *[HTML]: Hyper Text Markup Language
-*[W3C]: World Wide Web Consortium"
+
+*[W3C]: World Wide Web Consortium
+"
 `;
 
 exports[`expandFirst passes the second regression test 1`] = `
diff --git a/packages/remark-abbr/__tests__/index.js b/packages/remark-abbr/__tests__/index.js
index 1195058d..2d0254ee 100644
--- a/packages/remark-abbr/__tests__/index.js
+++ b/packages/remark-abbr/__tests__/index.js
@@ -34,8 +34,7 @@ const renderToMarkdown = (text, config) => {
 const configToTest = {
   'no-config': undefined,
   'empty object': {},
-  // TODO - add support for expandFirst
-  // expandFirst: {expandFirst: true},
+  expandFirst: {expandFirst: true},
 }
 
 for (const [configName, config] of Object.entries(configToTest)) {
diff --git a/packages/remark-abbr/lib/index.js b/packages/remark-abbr/lib/index.js
index d12ef1ab..a5b12e1e 100644
--- a/packages/remark-abbr/lib/index.js
+++ b/packages/remark-abbr/lib/index.js
@@ -1,7 +1,8 @@
 import { SKIP, CONTINUE, visit } from 'unist-util-visit'
 import { abbr, abbrTypes } from 'micromark-extension-abbr'
 
-function splitTextByAbbr (textNode, abbrDefinitions) {
+function splitTextByAbbr (textNode, abbrDefinitions, seenAbbreviations, opts) {
+  const expandFirst = opts.expandFirst || false
   const uniqueAbbreviationMap = new Map()
   for (const abbreviation of abbrDefinitions) {
     uniqueAbbreviationMap.set(abbreviation.identifier, abbreviation)
@@ -35,6 +36,11 @@ function splitTextByAbbr (textNode, abbrDefinitions) {
       )
     )
     .sort((l, r) => l.start - r.start)
+    .map(match => {
+      const firstOfItsKind = !seenAbbreviations.has(match.abbr.identifier)
+      seenAbbreviations.add(match.abbr.identifier)
+      return { ...match, firstOfItsKind }
+    })
 
   if (matches.length === 0) {
     return [textNode]
@@ -53,6 +59,14 @@ function splitTextByAbbr (textNode, abbrDefinitions) {
         }
       })
     }
+    const shouldExpand = expandFirst && match.firstOfItsKind
+    if (shouldExpand) {
+      // Add a text node for the expanded definition, up to the opening paren
+      nodes.push({
+        ...textNode,
+        value: match.abbr.value + ' ('
+      })
+    }
 
     const abbrPosition = textNode.position && {
       start: updatePoint(textNode.position.start, match.start),
@@ -73,6 +87,13 @@ function splitTextByAbbr (textNode, abbrDefinitions) {
     }
     nodes.push(abbr)
 
+    if (shouldExpand) {
+      // Add a closing paren text node
+      nodes.push({
+        type: 'text',
+        value: ')'
+      })
+    }
     // Move the position forwards
     currentIndex = match.end + 1
   }
@@ -108,7 +129,7 @@ function splitTextByAbbr (textNode, abbrDefinitions) {
  * Create an extension for `mdast-util-from-markdown` to enable abbreviations
  * in markdown.
  */
-export function abbrFromMarkdown () {
+export function abbrFromMarkdown (opts) {
   return {
     enter: {
       abbrDefinition: enterAbbrDefinition,
@@ -129,13 +150,14 @@ export function abbrFromMarkdown () {
           return tree
         }
 
+        const seenAbbreviations = new Set()
         visit(tree, null, (node, index, parent) => {
           if (index === undefined || parent === undefined) {
             return CONTINUE
           }
 
           if (node.type === 'text') {
-            const newNodes = splitTextByAbbr(node, abbrDefinitions)
+            const newNodes = splitTextByAbbr(node, abbrDefinitions, seenAbbreviations, opts)
             parent.children.splice(index, 1, ...newNodes)
             return SKIP
           }
@@ -207,9 +229,7 @@ export function abbrToMarkdown () {
 }
 
 export default function plugin (options) {
-  // TODO - add support for expand first, or document that it's being removed
-  // const opts = options || {}
-  // const expandFirst = opts.expandFirst
+  const opts = options || {}
 
   const self = this
   const data = self.data()
@@ -222,6 +242,6 @@ export default function plugin (options) {
     data.toMarkdownExtensions || (data.toMarkdownExtensions = [])
 
   micromarkExtensions.push(abbr)
-  fromMarkdownExtensions.push(abbrFromMarkdown())
+  fromMarkdownExtensions.push(abbrFromMarkdown(opts))
   toMarkdownExtensions.push(abbrToMarkdown())
 }

From 272201f5e6713af2776d78d3983b67d8d615f9cc Mon Sep 17 00:00:00 2001
From: Richard Towers <richard.towers@digital.cabinet-office.gov.uk>
Date: Sat, 9 Nov 2024 16:12:58 +0000
Subject: [PATCH 4/4] Run npm i

---
 package-lock.json | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/package-lock.json b/package-lock.json
index 9b4a6b13..60d9221f 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -8486,7 +8486,6 @@
       "version": "2.0.3",
       "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz",
       "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==",
-      "dev": true,
       "engines": {
         "node": ">=6"
       }
@@ -8522,7 +8521,6 @@
       "version": "1.1.0",
       "resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz",
       "integrity": "sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==",
-      "dev": true,
       "dependencies": {
         "dequal": "^2.0.0"
       },
@@ -16666,6 +16664,10 @@
         "micromark-util-types": "^2.0.0"
       }
     },
+    "node_modules/micromark-extension-abbr": {
+      "resolved": "packages/micromark-extension-abbr",
+      "link": true
+    },
     "node_modules/micromark-extension-gfm": {
       "version": "3.0.0",
       "resolved": "https://registry.npmjs.org/micromark-extension-gfm/-/micromark-extension-gfm-3.0.0.tgz",
@@ -16824,7 +16826,6 @@
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/micromark-factory-label/-/micromark-factory-label-2.0.0.tgz",
       "integrity": "sha512-RR3i96ohZGde//4WSe/dJsxOX6vxIg9TimLAS3i4EhBAFx8Sm5SmqVfR8E87DPSR31nEAjZfbt91OMZWcNgdZw==",
-      "dev": true,
       "funding": [
         {
           "type": "GitHub Sponsors",
@@ -16846,7 +16847,6 @@
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/micromark-factory-space/-/micromark-factory-space-2.0.0.tgz",
       "integrity": "sha512-TKr+LIDX2pkBJXFLzpyPyljzYK3MtmllMUMODTQJIUfDGncESaqB90db9IAUcz4AZAJFdd8U9zOp9ty1458rxg==",
-      "dev": true,
       "funding": [
         {
           "type": "GitHub Sponsors",
@@ -16888,7 +16888,6 @@
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/micromark-factory-whitespace/-/micromark-factory-whitespace-2.0.0.tgz",
       "integrity": "sha512-28kbwaBjc5yAI1XadbdPYHX/eDnqaUFVikLwrO7FDnKG7lpgxnvk/XGRhX/PN0mOZ+dBSZ+LgunHS+6tYQAzhA==",
-      "dev": true,
       "funding": [
         {
           "type": "GitHub Sponsors",
@@ -25367,6 +25366,20 @@
         "unist-util-visit": "^2.0.3"
       }
     },
+    "packages/micromark-extension-abbr": {
+      "version": "0.0.0",
+      "license": "MIT",
+      "dependencies": {
+        "micromark-factory-label": "^2.0.0",
+        "micromark-factory-whitespace": "^2.0.0",
+        "micromark-util-character": "^2.1.0",
+        "micromark-util-symbol": "^2.0.0",
+        "micromark-util-types": "^2.0.0"
+      },
+      "devDependencies": {
+        "micromark": "^4.0.0"
+      }
+    },
     "packages/micromark-extension-iframes": {
       "version": "0.0.0",
       "license": "MIT",