From cb1cb610ac08ba78d9f8735402929722d7253088 Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Sun, 2 Jun 2024 15:14:02 +0200 Subject: [PATCH 1/2] feat: anchor link checks support HTML tags like `` --- index.js | 27 ++++++++++++++++++++++-- test/hash-links.md | 35 ++++++++++++++++++++++++++++++-- test/markdown-link-check.test.js | 9 +++++++- 3 files changed, 66 insertions(+), 5 deletions(-) diff --git a/index.js b/index.js index cc4c9ae..98c76ab 100644 --- a/index.js +++ b/index.js @@ -43,9 +43,32 @@ function performSpecialReplacements(str, opts) { return str; } +function removeCodeBlocks(markdown) { + return markdown.replace(/^```[\S\s]+?^```$/gm, ''); +} + +function extractHtmlSections(markdown) { + markdown = + // remove code blocks + removeCodeBlocks(markdown) + // remove HTML comments + .replace(//gm, '') + // remove single line code (if not escaped with "\") + .replace(/(?[^\s]+).*?id="(?[^"]*?)".*?>/gmi; + const regexAName = /[^"]*?)".*?>/gmi; + + const sections = [] + .concat(Array.from(markdown.matchAll(regexAllId), (match) => match.groups.id)) + .concat(Array.from(markdown.matchAll(regexAName), (match) => match.groups.name)); + + return sections +} + function extractSections(markdown) { // First remove code blocks. - markdown = markdown.replace(/^```[\S\s]+?^```$/mg, ''); + markdown = removeCodeBlocks(markdown); const sectionTitles = markdown.match(/^#+ .*$/gm) || []; @@ -85,7 +108,7 @@ module.exports = function markdownLinkCheck(markdown, opts, callback) { } const links = markdownLinkExtractor(markdown); - const sections = extractSections(markdown); + const sections = extractSections(markdown).concat(extractHtmlSections(markdown)); const linksCollection = _.uniq(links); const bar = (opts.showProgressBar) ? new ProgressBar('Checking... [:bar] :percent', { diff --git a/test/hash-links.md b/test/hash-links.md index 4db835b..e9e53de 100644 --- a/test/hash-links.md +++ b/test/hash-links.md @@ -1,11 +1,28 @@ # Foo - + + + + +
+
+
+
+ + + This is a test. +HTML anchor in code `` should be ignored. + + +Ignore escaped backticks \`\`. Link should work. + ## Bar The title is [Foo](#foo). @@ -18,7 +35,21 @@ The second section is [Bar](#bar). There is no section named [Potato](#potato). -There is an anchor named [Tomato](#tomato). +There is an anchor named with `id` [Tomato](#tomato_id). + +There is an anchor named with `name` [Tomato](#tomato_name). + +There is an anchor in code [Tomato in code](#tomato_code). + +There is an anchor in escaped code [Tomato in escaped backticks](#tomato_escaped_backticks). + +There is an anchor in HTML comment [Tomato in comment](#tomato_comment). + +There is an anchor in single div [Onion](#onion). + +There is an anchor in outer div [Onion outer](#onion_outer). + +There is an anchor in inner div [Onion inner](#onion_inner). ## Header with special char ✨ diff --git a/test/markdown-link-check.test.js b/test/markdown-link-check.test.js index a46cfc5..eb77212 100644 --- a/test/markdown-link-check.test.js +++ b/test/markdown-link-check.test.js @@ -377,7 +377,14 @@ describe('markdown-link-check', function () { { link: '#foo', statusCode: 200, err: null, status: 'alive' }, { link: '#bar', statusCode: 200, err: null, status: 'alive' }, { link: '#potato', statusCode: 404, err: null, status: 'dead' }, - { link: '#tomato', statusCode: 404, err: null, status: 'dead' }, + { link: '#tomato_id', statusCode: 200, err: null, status: 'alive' }, + { link: '#tomato_name', statusCode: 200, err: null, status: 'alive' }, + { link: '#tomato_code', statusCode: 404, err: null, status: 'dead' }, + { link: '#tomato_escaped_backticks', statusCode: 200, err: null, status: 'alive' }, + { link: '#tomato_comment', statusCode: 404, err: null, status: 'dead' }, + { link: '#onion', statusCode: 200, err: null, status: 'alive' }, + { link: '#onion_outer', statusCode: 200, err: null, status: 'alive' }, + { link: '#onion_inner', statusCode: 200, err: null, status: 'alive' }, { link: '#header-with-special-char-', statusCode: 404, err: null, status: 'dead' }, ]); done(); From 181d704e94bde2ad43c4d9c90bbe5a6383536c8e Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Mon, 3 Jun 2024 16:16:22 +0200 Subject: [PATCH 2/2] add support and tests for single quotes --- index.js | 4 ++-- test/hash-links.md | 8 +++++++- test/markdown-link-check.test.js | 2 ++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/index.js b/index.js index 98c76ab..aad6ea3 100644 --- a/index.js +++ b/index.js @@ -56,8 +56,8 @@ function extractHtmlSections(markdown) { // remove single line code (if not escaped with "\") .replace(/(?[^\s]+).*?id="(?[^"]*?)".*?>/gmi; - const regexAName = /[^"]*?)".*?>/gmi; + const regexAllId = /<(?[^\s]+).*?id=["'](?[^"']*?)["'].*?>/gmi; + const regexAName = /[^"']*?)["'].*?>/gmi; const sections = [] .concat(Array.from(markdown.matchAll(regexAllId), (match) => match.groups.id)) diff --git a/test/hash-links.md b/test/hash-links.md index e9e53de..33e3d70 100644 --- a/test/hash-links.md +++ b/test/hash-links.md @@ -2,9 +2,11 @@ - + + +
@@ -39,6 +41,10 @@ There is an anchor named with `id` [Tomato](#tomato_id). There is an anchor named with `name` [Tomato](#tomato_name). +There is an anchor named with `id` [Tomato in single quote](#tomato_id_single_quote). + +There is an anchor named with `name` [Tomato in single quote](#tomato_name_single_quote). + There is an anchor in code [Tomato in code](#tomato_code). There is an anchor in escaped code [Tomato in escaped backticks](#tomato_escaped_backticks). diff --git a/test/markdown-link-check.test.js b/test/markdown-link-check.test.js index eb77212..2d043f5 100644 --- a/test/markdown-link-check.test.js +++ b/test/markdown-link-check.test.js @@ -379,6 +379,8 @@ describe('markdown-link-check', function () { { link: '#potato', statusCode: 404, err: null, status: 'dead' }, { link: '#tomato_id', statusCode: 200, err: null, status: 'alive' }, { link: '#tomato_name', statusCode: 200, err: null, status: 'alive' }, + { link: '#tomato_id_single_quote', statusCode: 200, err: null, status: 'alive' }, + { link: '#tomato_name_single_quote', statusCode: 200, err: null, status: 'alive' }, { link: '#tomato_code', statusCode: 404, err: null, status: 'dead' }, { link: '#tomato_escaped_backticks', statusCode: 200, err: null, status: 'alive' }, { link: '#tomato_comment', statusCode: 404, err: null, status: 'dead' },