From dd2a38ac80988ddecab4b1ec6fb6f76b99d55757 Mon Sep 17 00:00:00 2001 From: Hamir Mahal Date: Sun, 22 Dec 2024 11:41:58 -0800 Subject: [PATCH 01/40] refactor: remove raw hashes that are unnecessary --- harper-core/src/language_detection.rs | 4 ++-- harper-core/src/lexing/email_address.rs | 28 ++++++++++++------------- harper-core/src/lexing/hostname.rs | 28 ++++++++++++------------- harper-core/src/parsers/markdown.rs | 12 +++++------ 4 files changed, 36 insertions(+), 36 deletions(-) diff --git a/harper-core/src/language_detection.rs b/harper-core/src/language_detection.rs index 2c837850..fff66045 100644 --- a/harper-core/src/language_detection.rs +++ b/harper-core/src/language_detection.rs @@ -106,7 +106,7 @@ mod tests { #[test] fn detects_python_fib() { assert_not_english( - r#" + r" def fibIter(n): if n < 2: return n @@ -115,7 +115,7 @@ def fibIter(n): for _ in range(2, n): fibPrev, fib = fib, fib + fibPrev return fib - "#, + ", ); } diff --git a/harper-core/src/lexing/email_address.rs b/harper-core/src/lexing/email_address.rs index c034f337..df2eb20a 100644 --- a/harper-core/src/lexing/email_address.rs +++ b/harper-core/src/lexing/email_address.rs @@ -109,23 +109,23 @@ mod tests { fn example_local_parts() -> impl Iterator> { [ - r#"simple"#, - r#"very.common"#, - r#"x"#, - r#"long.email-address-with-hyphens"#, - r#"user.name+tag+sorting"#, - r#"name/surname"#, - r#"admin"#, - r#"example"#, + r"simple", + r"very.common", + r"x", + r"long.email-address-with-hyphens", + r"user.name+tag+sorting", + r"name/surname", + r"admin", + r"example", r#"" ""#, r#""john..doe""#, - r#"mailhost!username"#, + r"mailhost!username", r#""very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual""#, - r#"user%example.com"#, - r#"user-"#, - r#"postmaster"#, - r#"postmaster"#, - r#"_test"#, + r"user%example.com", + r"user-", + r"postmaster", + r"postmaster", + r"_test", ] .into_iter() .map(|s| s.chars().collect()) diff --git a/harper-core/src/lexing/hostname.rs b/harper-core/src/lexing/hostname.rs index 7897373b..6a130726 100644 --- a/harper-core/src/lexing/hostname.rs +++ b/harper-core/src/lexing/hostname.rs @@ -25,20 +25,20 @@ pub mod tests { pub fn example_domain_parts() -> impl Iterator> { [ - r#"example.com"#, - r#"example.com"#, - r#"example.com"#, - r#"and.subdomains.example.com"#, - r#"example.com"#, - r#"example.com"#, - r#"example"#, - r#"s.example"#, - r#"example.org"#, - r#"example.org"#, - r#"example.org"#, - r#"strange.example.com"#, - r#"example.org"#, - r#"example.org"#, + r"example.com", + r"example.com", + r"example.com", + r"and.subdomains.example.com", + r"example.com", + r"example.com", + r"example", + r"s.example", + r"example.org", + r"example.org", + r"example.org", + r"strange.example.com", + r"example.org", + r"example.org", ] .into_iter() .map(|s| s.chars().collect()) diff --git a/harper-core/src/parsers/markdown.rs b/harper-core/src/parsers/markdown.rs index 89ed098b..cc5331fd 100644 --- a/harper-core/src/parsers/markdown.rs +++ b/harper-core/src/parsers/markdown.rs @@ -264,7 +264,7 @@ mod tests { #[test] fn survives_emojis() { - let source = r#"🤷."#; + let source = r"🤷."; Markdown.parse_str(source); } @@ -284,7 +284,7 @@ mod tests { #[test] fn math_becomes_unlintable() { - let source = r#"$\Katex$ $\text{is}$ $\text{great}$."#; + let source = r"$\Katex$ $\text{is}$ $\text{great}$."; let tokens = Markdown.parse_str(source); assert_eq!( @@ -302,7 +302,7 @@ mod tests { #[test] fn hidden_wikilink_text() { - let source = r#"[[this is hidden|this is not]]"#; + let source = r"[[this is hidden|this is not]]"; let tokens = Markdown.parse_str(source); @@ -322,7 +322,7 @@ mod tests { #[test] fn improper_wikilink_text() { - let source = r#"this is shown|this is also shown]]"#; + let source = r"this is shown|this is also shown]]"; let tokens = Markdown.parse_str(source); @@ -354,7 +354,7 @@ mod tests { #[test] fn normal_wikilink() { - let source = r#"[[Wikilink]]"#; + let source = r"[[Wikilink]]"; let tokens = Markdown.parse_str(source); let token_kinds = tokens.iter().map(|t| t.kind).collect::>(); @@ -365,7 +365,7 @@ mod tests { #[test] fn html_is_unlintable() { - let source = r#"The range of inputs from to ctrl-z"#; + let source = r"The range of inputs from to ctrl-z"; let tokens = Markdown.parse_str(source); assert_eq!(tokens.iter_unlintables().count(), 1); } From b9bb988935d2fd1e6663d2853bf27975acdf795f Mon Sep 17 00:00:00 2001 From: Hamir Mahal Date: Thu, 26 Dec 2024 12:52:53 -0800 Subject: [PATCH 02/40] prevent unnecessary `#`s --- justfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/justfile b/justfile index fd96fb88..9b34dacf 100644 --- a/justfile +++ b/justfile @@ -123,7 +123,7 @@ check-rust: set -eo pipefail cargo fmt -- --check - cargo clippy -- -Dwarnings -D clippy::dbg_macro + cargo clippy -- -Dwarnings -D clippy::dbg_macro -D clippy::needless_raw_string_hashes # Perform format and type checking. check: From 487484d2d7db7c5b99e86c6dbd8c52258a79a4c1 Mon Sep 17 00:00:00 2001 From: Grant Lemons Date: Sun, 29 Dec 2024 15:22:12 -0600 Subject: [PATCH 03/40] feat(spans-visual): mark unlintable spans as red --- harper-cli/src/main.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/harper-cli/src/main.rs b/harper-cli/src/main.rs index ef57427b..79293ad2 100644 --- a/harper-cli/src/main.rs +++ b/harper-cli/src/main.rs @@ -8,7 +8,7 @@ use clap::Parser; use harper_comments::CommentParser; use harper_core::linting::{LintGroup, LintGroupConfig, Linter}; use harper_core::parsers::Markdown; -use harper_core::{remove_overlaps, Dictionary, Document, FstDictionary}; +use harper_core::{remove_overlaps, Dictionary, Document, FstDictionary, TokenKind}; #[derive(Debug, Parser)] enum Args { @@ -94,6 +94,7 @@ fn main() -> anyhow::Result<()> { let primary_color = Color::Blue; let secondary_color = Color::Magenta; + let unlintable_color = Color::Red; let filename = file .file_name() .map(|s| s.to_string_lossy().into()) @@ -106,7 +107,11 @@ fn main() -> anyhow::Result<()> { report_builder = report_builder.with_label( Label::new((&filename, token.span.into())) .with_message(format!("[{}, {})", token.span.start, token.span.end)) - .with_color(color), + .with_color(if matches!(token.kind, TokenKind::Unlintable) { + unlintable_color + } else { + color + }), ); // Alternate colors so spans are clear From 153c17a90a59536dec5f8f7a1b179e05655341c0 Mon Sep 17 00:00:00 2001 From: Grant Lemons Date: Mon, 30 Dec 2024 15:26:48 -0600 Subject: [PATCH 04/40] feat(spans-visual): spans visual defaults to hiding newline spans --- harper-cli/src/main.rs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/harper-cli/src/main.rs b/harper-cli/src/main.rs index 79293ad2..21655852 100644 --- a/harper-cli/src/main.rs +++ b/harper-cli/src/main.rs @@ -30,6 +30,9 @@ enum Args { Spans { /// The file you wish to display the spans. file: PathBuf, + /// Include newlines in the output + #[arg(short, long)] + include_newlines: bool, }, /// Emit decompressed, line-separated list of words in Harper's dictionary. Words, @@ -89,7 +92,10 @@ fn main() -> anyhow::Result<()> { Ok(()) } - Args::Spans { file } => { + Args::Spans { + file, + include_newlines, + } => { let (doc, source) = load_file(&file)?; let primary_color = Color::Blue; @@ -103,7 +109,11 @@ fn main() -> anyhow::Result<()> { let mut report_builder = Report::build(ReportKind::Custom("Spans", primary_color), &filename, 0); let mut color = primary_color; - for token in doc.tokens() { + + for token in doc.tokens().filter(|t| { + include_newlines + || !matches!(t.kind, TokenKind::Newline(_) | TokenKind::ParagraphBreak) + }) { report_builder = report_builder.with_label( Label::new((&filename, token.span.into())) .with_message(format!("[{}, {})", token.span.start, token.span.end)) From bac07a8a9db0a229599740b2f90f79e53ccb8ce0 Mon Sep 17 00:00:00 2001 From: AminWhat <88392440+aminwhat@users.noreply.github.com> Date: Thu, 2 Jan 2025 12:36:02 +0330 Subject: [PATCH 05/40] Update +page.md please add the sample, it took me a while to find the sample code --- .../docs/harperjs/introduction/+page.md | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/packages/web/src/routes/docs/harperjs/introduction/+page.md b/packages/web/src/routes/docs/harperjs/introduction/+page.md index 1ec08294..c128f95d 100644 --- a/packages/web/src/routes/docs/harperjs/introduction/+page.md +++ b/packages/web/src/routes/docs/harperjs/introduction/+page.md @@ -21,3 +21,36 @@ On the inside, it uses a copy of Harper's core algorithm compiled to [WebAssembl It can be imported [natively in a browser](./CDN) or through [npm](https://www.npmjs.com/package/harper.js). @install-pkg(harper.js) + + +## Sample Usage: +```js +let harper = require('harper.js'); + +async function main() { + // We cannot use `WorkerLinter` on Node.js since it relies on web-specific APIs. + let linter = new harper.LocalLinter(); + + let lints = await linter.lint('This is a example of how to use `harper.js`.'); + + console.log('Here are the results of linting the above text:'); + + for (let lint of lints) { + console.log(' - ', lint.span().start, ':', lint.span().end, lint.message()); + + if (lint.suggestion_count() != 0) { + console.log('Suggestions:'); + + for (let sug of lint.suggestions()) { + console.log( + '\t - ', + sug.kind() == 1 ? 'Remove' : 'Replace with', + sug.get_replacement_text() + ); + } + } + } +} + +main(); +``` From 5c23c5b9ba515a907e2302d8d619ed50518fd8e8 Mon Sep 17 00:00:00 2001 From: aminwhat Date: Thu, 2 Jan 2025 12:47:06 +0330 Subject: [PATCH 06/40] fix spacing --- .../docs/harperjs/introduction/+page.md | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/packages/web/src/routes/docs/harperjs/introduction/+page.md b/packages/web/src/routes/docs/harperjs/introduction/+page.md index c128f95d..447c14fe 100644 --- a/packages/web/src/routes/docs/harperjs/introduction/+page.md +++ b/packages/web/src/routes/docs/harperjs/introduction/+page.md @@ -25,31 +25,31 @@ It can be imported [natively in a browser](./CDN) or through [npm](https://www.n ## Sample Usage: ```js -let harper = require('harper.js'); +let harper = require("harper.js"); async function main() { - // We cannot use `WorkerLinter` on Node.js since it relies on web-specific APIs. - let linter = new harper.LocalLinter(); + // We cannot use `WorkerLinter` on Node.js since it relies on web-specific APIs. + let linter = new harper.LocalLinter(); - let lints = await linter.lint('This is a example of how to use `harper.js`.'); + let lints = await linter.lint("This is a example of how to use `harper.js`."); - console.log('Here are the results of linting the above text:'); + console.log("Here are the results of linting the above text:"); - for (let lint of lints) { - console.log(' - ', lint.span().start, ':', lint.span().end, lint.message()); + for (let lint of lints) { + console.log(" - ", lint.span().start, ":", lint.span().end, lint.message()); - if (lint.suggestion_count() != 0) { - console.log('Suggestions:'); + if (lint.suggestion_count() != 0) { + console.log("Suggestions:"); - for (let sug of lint.suggestions()) { - console.log( - '\t - ', - sug.kind() == 1 ? 'Remove' : 'Replace with', - sug.get_replacement_text() - ); - } - } - } + for (let sug of lint.suggestions()) { + console.log( + "\t - ", + sug.kind() == 1 ? "Remove" : "Replace with", + sug.get_replacement_text(), + ); + } + } + } } main(); From 8a8182210b9679e465da0a6f5b1677a739712d02 Mon Sep 17 00:00:00 2001 From: aminwhat Date: Thu, 2 Jan 2025 12:50:23 +0330 Subject: [PATCH 07/40] fix spacing --- .../docs/harperjs/introduction/+page.md | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/packages/web/src/routes/docs/harperjs/introduction/+page.md b/packages/web/src/routes/docs/harperjs/introduction/+page.md index c128f95d..32c74252 100644 --- a/packages/web/src/routes/docs/harperjs/introduction/+page.md +++ b/packages/web/src/routes/docs/harperjs/introduction/+page.md @@ -25,31 +25,32 @@ It can be imported [natively in a browser](./CDN) or through [npm](https://www.n ## Sample Usage: ```js -let harper = require('harper.js'); + +let harper = require("harper.js"); async function main() { - // We cannot use `WorkerLinter` on Node.js since it relies on web-specific APIs. - let linter = new harper.LocalLinter(); + // We cannot use `WorkerLinter` on Node.js since it relies on web-specific APIs. + let linter = new harper.LocalLinter(); - let lints = await linter.lint('This is a example of how to use `harper.js`.'); + let lints = await linter.lint("This is a example of how to use `harper.js`."); - console.log('Here are the results of linting the above text:'); + console.log("Here are the results of linting the above text:"); - for (let lint of lints) { - console.log(' - ', lint.span().start, ':', lint.span().end, lint.message()); + for (let lint of lints) { + console.log(" - ", lint.span().start, ":", lint.span().end, lint.message()); - if (lint.suggestion_count() != 0) { - console.log('Suggestions:'); + if (lint.suggestion_count() != 0) { + console.log("Suggestions:"); - for (let sug of lint.suggestions()) { - console.log( - '\t - ', - sug.kind() == 1 ? 'Remove' : 'Replace with', - sug.get_replacement_text() - ); - } - } - } + for (let sug of lint.suggestions()) { + console.log( + "\t - ", + sug.kind() == 1 ? "Remove" : "Replace with", + sug.get_replacement_text(), + ); + } + } + } } main(); From 69bd31749ffd1c8b38513a327530141aad154987 Mon Sep 17 00:00:00 2001 From: AminWhat <88392440+aminwhat@users.noreply.github.com> Date: Thu, 2 Jan 2025 13:02:08 +0330 Subject: [PATCH 08/40] Update +page.md add .setup() --- packages/web/src/routes/docs/harperjs/introduction/+page.md | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/web/src/routes/docs/harperjs/introduction/+page.md b/packages/web/src/routes/docs/harperjs/introduction/+page.md index 32c74252..e363dd38 100644 --- a/packages/web/src/routes/docs/harperjs/introduction/+page.md +++ b/packages/web/src/routes/docs/harperjs/introduction/+page.md @@ -31,6 +31,7 @@ let harper = require("harper.js"); async function main() { // We cannot use `WorkerLinter` on Node.js since it relies on web-specific APIs. let linter = new harper.LocalLinter(); + linter.setup(); let lints = await linter.lint("This is a example of how to use `harper.js`."); From b56af6b9dc5745dce9511d0547e8e3399f6fbf50 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 2 Jan 2025 08:04:40 -0700 Subject: [PATCH 09/40] fix(core): #341 by adding check to make sure we don't include `*-only` --- harper-core/src/linting/terminating_conjunctions.rs | 10 ++++++++++ harper-core/src/patterns/sequence_pattern.rs | 13 +++++++++++++ harper-core/src/token.rs | 4 ++++ 3 files changed, 27 insertions(+) diff --git a/harper-core/src/linting/terminating_conjunctions.rs b/harper-core/src/linting/terminating_conjunctions.rs index 9b0bb5c2..cc2af7d6 100644 --- a/harper-core/src/linting/terminating_conjunctions.rs +++ b/harper-core/src/linting/terminating_conjunctions.rs @@ -11,6 +11,7 @@ impl Default for TerminatingConjunctions { Self { pattern: Box::new(ConsumesRemainingPattern::new(Box::new( SequencePattern::default() + .then_anything_but_hyphen() .then_any_word_in(Lrc::new( [ "although", @@ -92,4 +93,13 @@ mod tests { fn no_false_positive() { assert_lint_count("Cookies and milk.", TerminatingConjunctions::default(), 0) } + + #[test] + fn issue_341() { + assert_lint_count( + "The structure has a couple of fields marked read-only, like A and B", + TerminatingConjunctions::default(), + 0, + ); + } } diff --git a/harper-core/src/patterns/sequence_pattern.rs b/harper-core/src/patterns/sequence_pattern.rs index 239f36db..afb9eeb0 100644 --- a/harper-core/src/patterns/sequence_pattern.rs +++ b/harper-core/src/patterns/sequence_pattern.rs @@ -27,6 +27,18 @@ macro_rules! gen_then_from_is { tok.kind.[< is_$quality >]() })) } + + pub fn [< then_anything_but_$quality >] (mut self) -> Self{ + self.token_patterns.push(Box::new(|tok: &Token, _source: &[char]| { + if tok.kind.[< is_$quality >](){ + false + }else{ + true + } + })); + + self + } } }; } @@ -43,6 +55,7 @@ impl SequencePattern { gen_then_from_is!(case_separator); gen_then_from_is!(adverb); gen_then_from_is!(adjective); + gen_then_from_is!(hyphen); pub fn then_exact_word(mut self, word: &'static str) -> Self { self.token_patterns diff --git a/harper-core/src/token.rs b/harper-core/src/token.rs index 2105c840..5d08ebb7 100644 --- a/harper-core/src/token.rs +++ b/harper-core/src/token.rs @@ -130,6 +130,10 @@ impl TokenKind { matches!(self, TokenKind::Punctuation(Punctuation::Ellipsis)) } + pub fn is_hyphen(&self) -> bool { + matches!(self, TokenKind::Punctuation(Punctuation::Hyphen)) + } + pub fn is_adjective(&self) -> bool { matches!( self, From d804e468de2188228d9ee0a04b912aaf883ac99a Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 2 Jan 2025 08:21:03 -0700 Subject: [PATCH 10/40] fix(web): ran `just format` to appease Prettier --- .../docs/harperjs/introduction/+page.md | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/packages/web/src/routes/docs/harperjs/introduction/+page.md b/packages/web/src/routes/docs/harperjs/introduction/+page.md index 277711f9..49e503f5 100644 --- a/packages/web/src/routes/docs/harperjs/introduction/+page.md +++ b/packages/web/src/routes/docs/harperjs/introduction/+page.md @@ -22,35 +22,35 @@ It can be imported [natively in a browser](./CDN) or through [npm](https://www.n @install-pkg(harper.js) - ## Sample Usage: + ```js -let harper = require("harper.js"); +let harper = require('harper.js'); async function main() { - // We cannot use `WorkerLinter` on Node.js since it relies on web-specific APIs. - let linter = new harper.LocalLinter(); - linter.setup(); + // We cannot use `WorkerLinter` on Node.js since it relies on web-specific APIs. + let linter = new harper.LocalLinter(); + linter.setup(); - let lints = await linter.lint("This is a example of how to use `harper.js`."); + let lints = await linter.lint('This is a example of how to use `harper.js`.'); - console.log("Here are the results of linting the above text:"); + console.log('Here are the results of linting the above text:'); - for (let lint of lints) { - console.log(" - ", lint.span().start, ":", lint.span().end, lint.message()); + for (let lint of lints) { + console.log(' - ', lint.span().start, ':', lint.span().end, lint.message()); - if (lint.suggestion_count() != 0) { - console.log("Suggestions:"); + if (lint.suggestion_count() != 0) { + console.log('Suggestions:'); - for (let sug of lint.suggestions()) { - console.log( - "\t - ", - sug.kind() == 1 ? "Remove" : "Replace with", - sug.get_replacement_text(), - ); - } - } - } + for (let sug of lint.suggestions()) { + console.log( + '\t - ', + sug.kind() == 1 ? 'Remove' : 'Replace with', + sug.get_replacement_text() + ); + } + } + } } main(); From 2075d2c77760e3dea7aaa28671a149def9c6f01d Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 2 Jan 2025 08:34:39 -0700 Subject: [PATCH 11/40] feat(web): moved the Node.js example to its own page --- .../commonjs-simple/{index.cjs => index.js} | 0 .../examples/commonjs-simple/package.json | 2 +- .../docs/harperjs/introduction/+page.md | 36 +------------------ .../src/routes/docs/harperjs/node/+page.md | 15 ++++++++ packages/web/vite.config.ts | 4 +++ 5 files changed, 21 insertions(+), 36 deletions(-) rename packages/harper.js/examples/commonjs-simple/{index.cjs => index.js} (100%) create mode 100644 packages/web/src/routes/docs/harperjs/node/+page.md diff --git a/packages/harper.js/examples/commonjs-simple/index.cjs b/packages/harper.js/examples/commonjs-simple/index.js similarity index 100% rename from packages/harper.js/examples/commonjs-simple/index.cjs rename to packages/harper.js/examples/commonjs-simple/index.js diff --git a/packages/harper.js/examples/commonjs-simple/package.json b/packages/harper.js/examples/commonjs-simple/package.json index 74bd5808..5c5d138a 100644 --- a/packages/harper.js/examples/commonjs-simple/package.json +++ b/packages/harper.js/examples/commonjs-simple/package.json @@ -3,7 +3,7 @@ "version": "0.0.1", "private": true, "scripts": { - "start": "node index.cjs" + "start": "node index.js" }, "dependencies": { "harper.js": "^0.13.0" diff --git a/packages/web/src/routes/docs/harperjs/introduction/+page.md b/packages/web/src/routes/docs/harperjs/introduction/+page.md index 49e503f5..f2cc9b9e 100644 --- a/packages/web/src/routes/docs/harperjs/introduction/+page.md +++ b/packages/web/src/routes/docs/harperjs/introduction/+page.md @@ -18,40 +18,6 @@ Today, it serves as the foundation for our [Obsidian plugin](/docs/integrations/ `harper.js` is an ECMAScript module designed to be easy to import into any project. On the inside, it uses a copy of Harper's core algorithm compiled to [WebAssembly](https://webassembly.org/). -It can be imported [natively in a browser](./CDN) or through [npm](https://www.npmjs.com/package/harper.js). +It can be imported [natively in a browser](./CDN) or through [npm](https://www.npmjs.com/package/harper.js) and [consumed in Node.js](./node). @install-pkg(harper.js) - -## Sample Usage: - -```js -let harper = require('harper.js'); - -async function main() { - // We cannot use `WorkerLinter` on Node.js since it relies on web-specific APIs. - let linter = new harper.LocalLinter(); - linter.setup(); - - let lints = await linter.lint('This is a example of how to use `harper.js`.'); - - console.log('Here are the results of linting the above text:'); - - for (let lint of lints) { - console.log(' - ', lint.span().start, ':', lint.span().end, lint.message()); - - if (lint.suggestion_count() != 0) { - console.log('Suggestions:'); - - for (let sug of lint.suggestions()) { - console.log( - '\t - ', - sug.kind() == 1 ? 'Remove' : 'Replace with', - sug.get_replacement_text() - ); - } - } - } -} - -main(); -``` diff --git a/packages/web/src/routes/docs/harperjs/node/+page.md b/packages/web/src/routes/docs/harperjs/node/+page.md new file mode 100644 index 00000000..ef9ca2de --- /dev/null +++ b/packages/web/src/routes/docs/harperjs/node/+page.md @@ -0,0 +1,15 @@ +--- +title: Using Harper in Node.js +--- + +Harper.js can run in Node.js. +There is just one consideration: as described in [more detailed here](./linting), we cannot use the `WorkerLinter`. +That means we must use the `LocalLinter`. + +Additionally, since `harper.js` is an ECMAScript module, it must be imported in a relatively recent version of Node.js. + +## Example Code + +The example below can be found in [the Harper monorepo.](https://github.com/Automattic/harper/tree/master/packages/harper.js/examples/commonjs-simple) + +@code(../../../../../../harper.js/examples/commonjs-simple/index.js) diff --git a/packages/web/vite.config.ts b/packages/web/vite.config.ts index 53c76f39..8f5adb17 100644 --- a/packages/web/vite.config.ts +++ b/packages/web/vite.config.ts @@ -80,6 +80,10 @@ export default defineConfig({ title: 'Linting', to: '/docs/harperjs/linting' }, + { + title: 'Node.js', + to: '/docs/harperjs/node' + }, { title: 'CDN', to: '/docs/harperjs/CDN' From 613d0a2eae2b9af0f658e965751ad081e9df659d Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 2 Jan 2025 08:38:43 -0700 Subject: [PATCH 12/40] fix: satisfy `eslint` and update `harper.js` version --- packages/harper.js/examples/commonjs-simple/index.js | 2 +- packages/harper.js/examples/commonjs-simple/package.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/harper.js/examples/commonjs-simple/index.js b/packages/harper.js/examples/commonjs-simple/index.js index 2a27a5ef..69bba9e6 100644 --- a/packages/harper.js/examples/commonjs-simple/index.js +++ b/packages/harper.js/examples/commonjs-simple/index.js @@ -1,4 +1,4 @@ -let harper = require('harper.js'); +import * as harper from 'harper.js'; async function main() { // We cannot use `WorkerLinter` on Node.js since it relies on web-specific APIs. diff --git a/packages/harper.js/examples/commonjs-simple/package.json b/packages/harper.js/examples/commonjs-simple/package.json index 5c5d138a..ad222da0 100644 --- a/packages/harper.js/examples/commonjs-simple/package.json +++ b/packages/harper.js/examples/commonjs-simple/package.json @@ -6,6 +6,6 @@ "start": "node index.js" }, "dependencies": { - "harper.js": "^0.13.0" + "harper.js": "^0.14.0" } } From 5540f2ca7a346f941c65276c127c7caba2712892 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 2 Jan 2025 09:54:28 -0700 Subject: [PATCH 13/40] docs: wrote page about contributing to the curated dictionary --- .../examples/commonjs-simple/yarn.lock | 14 ++--- .../docs/contributors/architecture/+page.md | 4 +- .../docs/contributors/dictionary/+page.md | 57 +++++++++++++++++++ .../docs/contributors/environment/+page.md | 2 +- .../integrations/language-server/+page.md | 3 +- packages/web/vite.config.ts | 4 ++ 6 files changed, 70 insertions(+), 14 deletions(-) create mode 100644 packages/web/src/routes/docs/contributors/dictionary/+page.md diff --git a/packages/harper.js/examples/commonjs-simple/yarn.lock b/packages/harper.js/examples/commonjs-simple/yarn.lock index 9ebc976e..f54ca7e9 100644 --- a/packages/harper.js/examples/commonjs-simple/yarn.lock +++ b/packages/harper.js/examples/commonjs-simple/yarn.lock @@ -2,13 +2,7 @@ # yarn lockfile v1 -harper.js@^0.13.0: - version "0.13.0" - resolved "https://registry.yarnpkg.com/harper.js/-/harper.js-0.13.0.tgz#f9e945c842f4e07e1b7042e3eef00a8040b2e0dd" - integrity sha512-XjiQG7kpooKCU2xp46mvNra9PUq5n3z5kXOaWvKjOBRyWIllvuEwvDv40cEUODhWD2ABVTEMea8odXRORfyruw== - dependencies: - wasm "link:../../../../../../.cache/yarn/v6/npm-harper-js-0.13.0-f9e945c842f4e07e1b7042e3eef00a8040b2e0dd-integrity/harper-wasm/pkg" - -"wasm@link:../../../../../../.cache/yarn/v6/npm-harper-js-0.13.0-f9e945c842f4e07e1b7042e3eef00a8040b2e0dd-integrity/harper-wasm/pkg": - version "0.0.0" - uid "" +harper.js@^0.14.0: + version "0.14.0" + resolved "https://registry.yarnpkg.com/harper.js/-/harper.js-0.14.0.tgz#16d511070d4474dca501d4de1c76f7849ec0accb" + integrity sha512-WRzop2PmFK2AZiqNMHHWDFIqFmS7DeoNVhyibyu7T6wuSe24E1Tunolz8ZzMPfcaKuBB2SY+yGbyTOyj+/QD8g== diff --git a/packages/web/src/routes/docs/contributors/architecture/+page.md b/packages/web/src/routes/docs/contributors/architecture/+page.md index 2a891c6f..f8a00915 100644 --- a/packages/web/src/routes/docs/contributors/architecture/+page.md +++ b/packages/web/src/routes/docs/contributors/architecture/+page.md @@ -20,7 +20,7 @@ In this vein, Harper serves the role of a [Linter]( +``` + +If this command doesn't look familiar, [read our setup documentation for contributors](./environment). diff --git a/packages/web/src/routes/docs/contributors/environment/+page.md b/packages/web/src/routes/docs/contributors/environment/+page.md index d94e4d75..3d0ee5e0 100644 --- a/packages/web/src/routes/docs/contributors/environment/+page.md +++ b/packages/web/src/routes/docs/contributors/environment/+page.md @@ -13,7 +13,7 @@ To use the tooling required to build and debug Harper, you'll need to the follow - [`wasm-pack`](https://rustwasm.github.io/wasm-pack/installer/) - `zip` -We develop a series of tools, accessible via `just`, to build and debug Harper's algorithm (otherwise known as `harper-core`) and its various integrations. +We develop a set of tools, accessible via `just`, to build and debug Harper's algorithm (otherwise known as `harper-core`) and its various integrations. To get see all the tools in your toolbox run: ```bash diff --git a/packages/web/src/routes/docs/integrations/language-server/+page.md b/packages/web/src/routes/docs/integrations/language-server/+page.md index 82e5d283..7c394492 100644 --- a/packages/web/src/routes/docs/integrations/language-server/+page.md +++ b/packages/web/src/routes/docs/integrations/language-server/+page.md @@ -64,5 +64,4 @@ The static dictionary is built into the binary and is (as of now) immutable. It contains almost all words you could possibly encounter. I _do_ take pull requests or issues for adding words to the static dictionary. -It is composed of two files: `harper-core/dictionary.dict` and `harper-core/affixes.json` -If you just want to add a proper noun, try running `just addnoun `. +[Read the documentation on the matter before you do.](../contributors/dictionary) diff --git a/packages/web/vite.config.ts b/packages/web/vite.config.ts index 8f5adb17..3c0f49ce 100644 --- a/packages/web/vite.config.ts +++ b/packages/web/vite.config.ts @@ -108,6 +108,10 @@ export default defineConfig({ { title: 'Architecture', to: '/docs/contributors/architecture' + }, + { + title: 'Dictionary', + to: '/docs/contributors/dictionary' } ] }, From 9f6737bab69bf825fcf64202e8bd384a24ca3c75 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 2 Jan 2025 10:12:55 -0700 Subject: [PATCH 14/40] feat(core): improve docs for declaratively generated linters --- harper-core/src/linting/proper_noun_capitalization_linters.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/harper-core/src/linting/proper_noun_capitalization_linters.rs b/harper-core/src/linting/proper_noun_capitalization_linters.rs index fefea849..4c10a59b 100644 --- a/harper-core/src/linting/proper_noun_capitalization_linters.rs +++ b/harper-core/src/linting/proper_noun_capitalization_linters.rs @@ -12,6 +12,7 @@ macro_rules! create_linter_for { create_linter_for!($name, $pattern, $message, $message); }; ($name:ident, $pattern:expr, $message:literal, $description:literal) => { + #[doc = $description] pub struct $name { pattern: Box, dict: Arc, From 009312c335574ba69b7fe80c6011bb9d60783207 Mon Sep 17 00:00:00 2001 From: Clay Dugo Date: Thu, 2 Jan 2025 12:51:59 -0500 Subject: [PATCH 15/40] Add project names --- harper-core/dictionary.dict | 2 ++ 1 file changed, 2 insertions(+) diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict index 91c24181..e976966b 100644 --- a/harper-core/dictionary.dict +++ b/harper-core/dictionary.dict @@ -49762,3 +49762,5 @@ glymph/SM Snapchat/SM middleware/SM OpenSearch/SM +OpenGL/SM +WebGPU/SM From 0c7b540cbe9c462cbd87dfb31110f3668038a92d Mon Sep 17 00:00:00 2001 From: Clay Dugo Date: Thu, 2 Jan 2025 12:54:08 -0500 Subject: [PATCH 16/40] Add digital image processing terminology --- harper-core/dictionary.dict | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict index e976966b..3de844a3 100644 --- a/harper-core/dictionary.dict +++ b/harper-core/dictionary.dict @@ -49764,3 +49764,11 @@ middleware/SM OpenSearch/SM OpenGL/SM WebGPU/SM +backfill/SM +downsample/SM +downsampling/SM +subclassing/SM +thresholding/SM +uncheck/SM +unsample/SM +unsampling/SM From 90a91be46436de494e288aadf1aecbf761d26e5a Mon Sep 17 00:00:00 2001 From: Clay Dugo Date: Thu, 2 Jan 2025 12:54:36 -0500 Subject: [PATCH 17/40] Add organoid --- harper-core/dictionary.dict | 1 + 1 file changed, 1 insertion(+) diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict index 3de844a3..5ed23fc8 100644 --- a/harper-core/dictionary.dict +++ b/harper-core/dictionary.dict @@ -49772,3 +49772,4 @@ thresholding/SM uncheck/SM unsample/SM unsampling/SM +organoid/SM From ec936cb5749f36e1e90e1c35939124adf78b77ca Mon Sep 17 00:00:00 2001 From: Clay Dugo Date: Thu, 2 Jan 2025 12:54:57 -0500 Subject: [PATCH 18/40] Add centric --- harper-core/dictionary.dict | 1 + 1 file changed, 1 insertion(+) diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict index 5ed23fc8..61a3ffed 100644 --- a/harper-core/dictionary.dict +++ b/harper-core/dictionary.dict @@ -49773,3 +49773,4 @@ uncheck/SM unsample/SM unsampling/SM organoid/SM +centric/SM From f4ba4d04c75fa078bd935ea200ee6baf0f675142 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 2 Jan 2025 11:10:24 -0700 Subject: [PATCH 19/40] feat(core): wrote linter to look for separated closed compound words --- harper-core/src/linting/compound_words.rs | 147 ++++++++++++++++++ harper-core/src/linting/lint_group.rs | 7 +- harper-core/src/linting/matcher.rs | 81 ---------- harper-core/src/linting/mod.rs | 3 +- .../proper_noun_capitalization_linters.rs | 12 ++ 5 files changed, 166 insertions(+), 84 deletions(-) create mode 100644 harper-core/src/linting/compound_words.rs diff --git a/harper-core/src/linting/compound_words.rs b/harper-core/src/linting/compound_words.rs new file mode 100644 index 00000000..47d92533 --- /dev/null +++ b/harper-core/src/linting/compound_words.rs @@ -0,0 +1,147 @@ +use std::sync::Arc; + +use itertools::Itertools; + +use crate::{CharString, Dictionary, Document, FstDictionary, Span}; + +use super::{Lint, LintKind, Linter, Suggestion}; + +pub struct CompoundWords { + dict: Arc, +} + +impl CompoundWords { + pub fn new() -> Self { + Self { + dict: FstDictionary::curated(), + } + } +} + +impl Default for CompoundWords { + fn default() -> Self { + Self::new() + } +} + +impl Linter for CompoundWords { + fn lint(&mut self, document: &Document) -> Vec { + let mut lints = Vec::new(); + + let mut merged_word = CharString::new(); + + for (a, w, b) in document.tokens().tuple_windows() { + if !a.kind.is_word() || !w.kind.is_whitespace() || !b.kind.is_word() { + continue; + } + + let a_chars = document.get_span_content(a.span); + let b_chars = document.get_span_content(b.span); + + // Not super helpful in this case, so we skip it + if matches!(a_chars, ['a']) { + continue; + } + + merged_word.clear(); + merged_word.extend_from_slice(a_chars); + merged_word.extend_from_slice(b_chars); + + if self.dict.contains_word(&merged_word) { + lints.push(Lint { + span: Span::new(a.span.start, b.span.end), + lint_kind: LintKind::Spelling, + suggestions: vec![Suggestion::ReplaceWith(merged_word.to_vec())], + message: "These two words are often combined to form a closed compound word." + .to_owned(), + priority: 63, + }); + } + } + + lints + } + + fn description(&self) -> &str { + "Accidentally inserting a space inside a word is common. This rule looks for valid words that are split by whitespace." + } +} + +#[cfg(test)] +mod tests { + use crate::linting::tests::assert_lint_count; + + use super::CompoundWords; + + #[test] + fn scarecrow() { + assert_lint_count( + "I saw a scare crow in the field today.", + CompoundWords::default(), + 1, + ); + } + + #[test] + fn clean() { + assert_lint_count( + "When referring to the political party, make sure to treat them as a proper noun.", + CompoundWords::default(), + 0, + ); + } + + #[test] + fn bookshelf() { + assert_lint_count( + "I have a big book shelf in my room.", + CompoundWords::default(), + 1, + ); + } + + #[test] + fn sunscreen() { + assert_lint_count( + "Don't forget to apply your sunscreen before going out.", + CompoundWords::default(), + 0, + ); + } + + #[test] + fn makeup() { + assert_lint_count( + "She spent a lot of time doing her make up this morning.", + CompoundWords::default(), + 1, + ); + } + + #[test] + fn birthday() { + assert_lint_count( + "We're having a big party to celebrate the couple's birthday today.", + CompoundWords::default(), + 0, + ); + } + + #[test] + fn hometown() { + assert_lint_count( + "My home town is a beautiful place with many historical land marks.", + CompoundWords::default(), + 2, + ); + } + + #[test] + fn assertions() { + assert_lint_count( + "Make sure to compile with debug ass ertions disabled.", + CompoundWords::default(), + 1, + ); + } +} diff --git a/harper-core/src/linting/lint_group.rs b/harper-core/src/linting/lint_group.rs index fd4489a6..b49dfec9 100644 --- a/harper-core/src/linting/lint_group.rs +++ b/harper-core/src/linting/lint_group.rs @@ -5,6 +5,7 @@ use super::an_a::AnA; use super::avoid_curses::AvoidCurses; use super::boring_words::BoringWords; use super::capitalize_personal_pronouns::CapitalizePersonalPronouns; +use super::compound_words::CompoundWords; use super::correct_number_suffix::CorrectNumberSuffix; use super::dot_initialisms::DotInitialisms; use super::ellipsis_length::EllipsisLength; @@ -15,7 +16,7 @@ use super::multiple_sequential_pronouns::MultipleSequentialPronouns; use super::number_suffix_capitalization::NumberSuffixCapitalization; use super::proper_noun_capitalization_linters::{ AmazonNames, Americas, AppleNames, AzureNames, ChineseCommunistParty, GoogleNames, Holidays, - MetaNames, MicrosoftNames, UnitedOrganizations, + Koreas, MetaNames, MicrosoftNames, UnitedOrganizations, }; use super::repeated_words::RepeatedWords; use super::sentence_capitalization::SentenceCapitalization; @@ -162,6 +163,7 @@ create_lint_group_config!( ThatWhich => true, CapitalizePersonalPronouns => true, Americas => true, + Koreas => true, ChineseCommunistParty => true, UnitedOrganizations => true, Holidays => true, @@ -170,7 +172,8 @@ create_lint_group_config!( MetaNames => true, MicrosoftNames => true, AppleNames => true, - AzureNames => true + AzureNames => true, + CompoundWords => true ); impl Default for LintGroup { diff --git a/harper-core/src/linting/matcher.rs b/harper-core/src/linting/matcher.rs index 3349bedb..cefd4c50 100644 --- a/harper-core/src/linting/matcher.rs +++ b/harper-core/src/linting/matcher.rs @@ -106,7 +106,6 @@ impl Matcher { "wellbeing" => "well-being", "hashtable" => "hash table", "hashmap" => "hash map", - "CCP" => "Chinese Communist Party", "dep" => "dependency", "deps" => "dependencies", "off","the","cuff" => "off-the-cuff", @@ -118,7 +117,6 @@ impl Matcher { "todo" => "to-do", "To-Do" => "To-do", "performing","this" => "perform this", - "united nations" => "United Nations", "mins" => "minutes", "min" => "minute", "min" => "minimum", @@ -139,10 +137,6 @@ impl Matcher { "There","fore" => "Therefore", "fatal","outcome" => "death", "geiger","counter" => "Geiger counter", - "veterans","day" => "Veterans Day", - "presidents","day" => "Presidents' Day", - "president's","day" => "Presidents' Day", - "valentines","day" => "Valentine's Day", "world","war","2" => "World War II", "World","war","ii" => "World War II", "world","War","ii" => "World War II", @@ -185,86 +179,11 @@ impl Matcher { "the","hing" => "the thing", "The","hing" => "The thing", "need","helps" => "need help", - "all","though" => "although", - "All","though" => "although", - "al","though" => "although", - "Al","though" => "although", "an","this" => "and this", "break","up" => "break-up", "case", "sensitive" => "case-sensitive", - "bare", "foot" => "barefoot", - "air", "port" => "airport", - "any", "body" => "anybody", - "every", "body" => "everybody", - "no", "body" => "nobody", - "some", "body" => "somebody", - "any", "one" => "anyone", - "every", "one" => "everyone", - "some", "one" => "someone", - "any", "thing" => "anything", - "every", "thing" => "everything", - "no", "thing" => "nothing", - "some", "thing" => "something", - "any", "where" => "anywhere", - "every", "where" => "everywhere", - "no", "where" => "nowhere", - "some", "where" => "somewhere", - "baby", "sit" => "babysit", - "back", "ground" => "background", - "bare", "foot" => "barefoot", - "base", "ball" => "baseball", - "basket", "ball" => "basketball", - "foot", "ball" => "football", - "bath", "room" => "bathroom", - "bed", "room" => "bedroom", - "black", "berry" => "blackberry", - "blue", "berry" => "blueberry", - "break", "fast" => "breakfast", - "can", "not" => "cannot", - "check", "out" => "checkout", - "cow", "boy" => "cowboy", - "day", "light" => "daylight", - "desk", "top" => "desktop", - "finger", "print" => "fingerprint", - "fire", "fly" => "firefly", - "fore", "ver" => "forever", - "gentle", "man" => "gentleman", - "grand", "mother" => "grandmother", - "grand", "father" => "grandfather", - "grand", "daughter" => "granddaughter", - "grape", "fruit" => "grapefruit", - "grass", "hopper" => "grasshopper", - "head", "quarters" => "headquarters", - "hand", "shake" => "handshake", - "in", "side" => "inside", - "key", "board" => "keyboard", - "lip", "stick" => "lipstick", - "mail", "box" => "mailbox", - "never", "theless" => "nevertheless", - "none", "theless" => "nonetheless", - "note", "book" => "notebook", - "ou", "tside" => "outside", - "pay", "day" => "payday", - "rail", "road" => "railroad", - "rain", "bow" => "rainbow", - "rain", "coat" => "raincoat", - "skate", "board" => "skateboard", - "smart", "phone" => "smartphone", - "snow", "ball" => "snowball", - "some", "times" => "sometimes", - "sun", "flower" => "sunflower", - "tooth", "brush" => "toothbrush", - "turn", "table" => "turntable", - "under", "cover" => "undercover", - "up", "stream" => "upstream", - "water", "fall" => "waterfall", - "water", "melon" => "watermelon", - "wee", "kend" => "weekend", - "with", "in" => "within", - "with", "out" => "without", "Tree", "sitter" => "Tree-sitter", "all", "of", "the" => "all the", - "an", "other" => "another", "not", "longer" => "no longer", "to", "towards" => "towards", "though", "process" => "thought process", diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index f06e548b..58217074 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -2,6 +2,7 @@ mod an_a; mod avoid_curses; mod boring_words; mod capitalize_personal_pronouns; +mod compound_words; mod correct_number_suffix; mod dot_initialisms; mod ellipsis_length; @@ -42,7 +43,7 @@ pub use number_suffix_capitalization::NumberSuffixCapitalization; pub use pattern_linter::PatternLinter; pub use proper_noun_capitalization_linters::{ AmazonNames, Americas, AppleNames, AzureNames, ChineseCommunistParty, GoogleNames, Holidays, - MetaNames, MicrosoftNames, UnitedOrganizations, + Koreas, MetaNames, MicrosoftNames, UnitedOrganizations, }; pub use repeated_words::RepeatedWords; pub use sentence_capitalization::SentenceCapitalization; diff --git a/harper-core/src/linting/proper_noun_capitalization_linters.rs b/harper-core/src/linting/proper_noun_capitalization_linters.rs index 4c10a59b..5b4c896c 100644 --- a/harper-core/src/linting/proper_noun_capitalization_linters.rs +++ b/harper-core/src/linting/proper_noun_capitalization_linters.rs @@ -71,6 +71,18 @@ create_linter_for!( "When referring to the continents, make sure to treat them as a proper noun." ); +create_linter_for!( + Koreas, + SequencePattern::default() + .then(Box::new(EitherPattern::new(vec![ + Box::new(SequencePattern::default().then_any_capitalization_of("South")), + Box::new(SequencePattern::default().then_any_capitalization_of("North")) + ]))) + .then_whitespace() + .then_any_capitalization_of("Korea"), + "When referring to the nations, make sure to treat them as a proper noun." +); + create_linter_for!( ChineseCommunistParty, SequencePattern::default() From 83eec1b04de2bc2b5481296c93b3e2e0f482900f Mon Sep 17 00:00:00 2001 From: Clay Dugo Date: Thu, 2 Jan 2025 13:28:45 -0500 Subject: [PATCH 20/40] Manage present participle with correct affix --- harper-core/dictionary.dict | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict index 61a3ffed..58ad4040 100644 --- a/harper-core/dictionary.dict +++ b/harper-core/dictionary.dict @@ -44053,7 +44053,7 @@ subbed/4 subbing/41 subbranch/1MS subcategory/1SM -subclass/14 +subclass/14G subcommittee/1SM subcompact/15SM subconscious/51PMY @@ -45628,7 +45628,7 @@ threesome/1SM threnody/1SM thresh/4MDRSZG thresher/1M -threshold/1SM +threshold/1SMG threw/4 thrice/ thrift/14SM @@ -49765,12 +49765,8 @@ OpenSearch/SM OpenGL/SM WebGPU/SM backfill/SM -downsample/SM -downsampling/SM -subclassing/SM -thresholding/SM +downsample/SMG uncheck/SM -unsample/SM -unsampling/SM +unsample/SMG organoid/SM centric/SM From d215d7e5d53a9f5515137a3f34c74ec1fe388a72 Mon Sep 17 00:00:00 2001 From: Clay Dugo Date: Thu, 2 Jan 2025 13:35:22 -0500 Subject: [PATCH 21/40] upsample* --- harper-core/dictionary.dict | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict index 58ad4040..e3f7ba70 100644 --- a/harper-core/dictionary.dict +++ b/harper-core/dictionary.dict @@ -49767,6 +49767,6 @@ WebGPU/SM backfill/SM downsample/SMG uncheck/SM -unsample/SMG +upsample/SMG organoid/SM centric/SM From f6f578408ac19302463cefcdb4cae48f00888916 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 2 Jan 2025 12:07:48 -0700 Subject: [PATCH 22/40] docs: use #343 as an example of a good PR for dictionary edits --- packages/web/src/routes/docs/contributors/dictionary/+page.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/web/src/routes/docs/contributors/dictionary/+page.md b/packages/web/src/routes/docs/contributors/dictionary/+page.md index 8e4d18b2..3de8d652 100644 --- a/packages/web/src/routes/docs/contributors/dictionary/+page.md +++ b/packages/web/src/routes/docs/contributors/dictionary/+page.md @@ -6,6 +6,8 @@ The curated dictionary is the English dictionary Harper uses as reference intern It is common, especially with technical language, to come across words that are not in this dictionary. If this happens to you, please open a PR to get it in. +PR [#343](https://github.com/Automattic/harper/pull/343) is a great example of what is described here. + There are two files you need to worry about. [`harper-core/dictionary.dict`](https://github.com/Automattic/harper/blob/master/harper-core/dictionary.dict) and [`harper-core/affixes.json`](https://github.com/Automattic/harper/blob/master/harper-core/affixes.json). The first is a list of words, tagged with modifiers defined in the second. From 5fe070e7c183068d749b8ce85b634fd8727d757e Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 2 Jan 2025 15:21:21 -0700 Subject: [PATCH 23/40] feat(core): added simple linter for the conjugations of "to be" for #256 --- harper-core/affixes.json | 15 ++- harper-core/dictionary.dict | 7 +- harper-core/src/linting/lint.rs | 2 + harper-core/src/linting/lint_group.rs | 4 +- harper-core/src/linting/mod.rs | 2 + harper-core/src/linting/plural_conjugate.rs | 94 +++++++++++++++++++ harper-core/src/spell/full_dictionary.rs | 8 ++ harper-core/src/token.rs | 16 ++++ .../tests/test_sources/obsidian_links.md | 2 +- 9 files changed, 143 insertions(+), 7 deletions(-) create mode 100644 harper-core/src/linting/plural_conjugate.rs diff --git a/harper-core/affixes.json b/harper-core/affixes.json index 57dda979..2c72dce7 100644 --- a/harper-core/affixes.json +++ b/harper-core/affixes.json @@ -483,7 +483,9 @@ "replacements": [], "adds_metadata": {}, "gifts_metadata": { - "noun": {} + "noun": { + "is_plural": false + } } }, "2": { @@ -557,6 +559,17 @@ } } }, + "9": { + "suffix": true, + "cross_product": true, + "replacements": [], + "adds_metadata": {}, + "gifts_metadata": { + "noun": { + "is_plural": true + } + } + }, "~": { "suffix": true, "cross_product": true, diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict index e3f7ba70..ed321060 100644 --- a/harper-core/dictionary.dict +++ b/harper-core/dictionary.dict @@ -45485,7 +45485,7 @@ therapeutically/ therapeutics/1M therapist/1SM therapy/14SM -there/18~ +there/~ there's/ thereabout/S thereafter/1 @@ -45519,7 +45519,7 @@ thermostatic/5 thermostatically/ thesauri/1 thesaurus/1MS -these/8S~ +these/8S~9 thesis/1M thespian/51SM theta/1SM @@ -45544,9 +45544,8 @@ thieve/4DSG thievery/1M thieving/451M thievish/5 -thigh/1M +thigh/1MS thighbone/1MS -thighs/1 thimble/14MS thimbleful/1SM thin/514YSP diff --git a/harper-core/src/linting/lint.rs b/harper-core/src/linting/lint.rs index 8638d2b2..6c92706f 100644 --- a/harper-core/src/linting/lint.rs +++ b/harper-core/src/linting/lint.rs @@ -36,6 +36,7 @@ pub enum LintKind { Repetition, Enhancement, Readability, + WordChoice, #[default] Miscellaneous, } @@ -50,6 +51,7 @@ impl Display for LintKind { LintKind::Readability => "Readability", LintKind::Miscellaneous => "Miscellaneous", LintKind::Enhancement => "Enhancement", + LintKind::WordChoice => "Word Choice", }; write!(f, "{}", s) diff --git a/harper-core/src/linting/lint_group.rs b/harper-core/src/linting/lint_group.rs index b49dfec9..175e2d04 100644 --- a/harper-core/src/linting/lint_group.rs +++ b/harper-core/src/linting/lint_group.rs @@ -14,6 +14,7 @@ use super::long_sentences::LongSentences; use super::matcher::Matcher; use super::multiple_sequential_pronouns::MultipleSequentialPronouns; use super::number_suffix_capitalization::NumberSuffixCapitalization; +use super::plural_conjugate::PluralConjugate; use super::proper_noun_capitalization_linters::{ AmazonNames, Americas, AppleNames, AzureNames, ChineseCommunistParty, GoogleNames, Holidays, Koreas, MetaNames, MicrosoftNames, UnitedOrganizations, @@ -173,7 +174,8 @@ create_lint_group_config!( MicrosoftNames => true, AppleNames => true, AzureNames => true, - CompoundWords => true + CompoundWords => true, + PluralConjugate => true ); impl Default for LintGroup { diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index 58217074..254d286a 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -14,6 +14,7 @@ mod matcher; mod multiple_sequential_pronouns; mod number_suffix_capitalization; mod pattern_linter; +mod plural_conjugate; mod proper_noun_capitalization_linters; mod repeated_words; mod sentence_capitalization; @@ -63,6 +64,7 @@ pub trait Linter { fn lint(&mut self, document: &Document) -> Vec; fn description(&self) -> &str; } + #[cfg(feature = "concurrent")] pub trait Linter: Send + Sync { fn lint(&mut self, document: &Document) -> Vec; diff --git a/harper-core/src/linting/plural_conjugate.rs b/harper-core/src/linting/plural_conjugate.rs new file mode 100644 index 00000000..af8e92b9 --- /dev/null +++ b/harper-core/src/linting/plural_conjugate.rs @@ -0,0 +1,94 @@ +use crate::{ + patterns::{EitherPattern, Pattern, SequencePattern}, + Token, +}; + +use super::{Lint, LintKind, PatternLinter, Suggestion}; + +pub struct PluralConjugate { + pattern: Box, +} + +impl Default for PluralConjugate { + fn default() -> Self { + let plural_case = SequencePattern::default() + .then_plural_noun() + .then_whitespace() + .then_exact_word("is"); + + let non_plural_case = SequencePattern::default() + .then(Box::new(|tok: &Token, _source: &[char]| { + tok.kind.is_not_plural_noun() && tok.kind.is_noun() + })) + .then_whitespace() + .then_exact_word("are"); + + let pat = EitherPattern::new(vec![Box::new(plural_case), Box::new(non_plural_case)]); + + Self { + pattern: Box::new(pat), + } + } +} + +impl PatternLinter for PluralConjugate { + fn pattern(&self) -> &dyn Pattern { + self.pattern.as_ref() + } + + fn match_to_lint(&self, matched_tokens: &[Token], _source: &[char]) -> Lint { + let should_be_plural = matched_tokens.first().unwrap().kind.is_plural_noun(); + + let sug = if should_be_plural { + vec!['a', 'r', 'e'] + } else { + vec!['i', 's'] + }; + + Lint { + span: matched_tokens.last().unwrap().span, + lint_kind: LintKind::WordChoice, + suggestions: vec![Suggestion::ReplaceWith(sug)], + message: "Use the alternative conjugation of this verb to be consistent with the noun's plural nature.".to_owned(), + priority: 63, + } + } + + fn description(&self) -> &'static str { + "Make sure you use the correct conjugation of the verb \"to be\" in plural contexts." + } +} + +#[cfg(test)] +mod tests { + use crate::linting::tests::assert_suggestion_result; + + use super::PluralConjugate; + + #[test] + fn issue_256() { + assert_suggestion_result( + "The bananas is tasty", + PluralConjugate::default(), + "The bananas are tasty", + ); + } + + #[test] + fn plural_students() { + assert_suggestion_result( + "The students is doing their homework.", + PluralConjugate::default(), + "The students are doing their homework.", + ); + } + + #[test] + fn singular_house() { + assert_suggestion_result( + "The house are just sitting there.", + PluralConjugate::default(), + "The house is just sitting there.", + ); + } +} diff --git a/harper-core/src/spell/full_dictionary.rs b/harper-core/src/spell/full_dictionary.rs index 21e7476e..cc20e126 100644 --- a/harper-core/src/spell/full_dictionary.rs +++ b/harper-core/src/spell/full_dictionary.rs @@ -329,4 +329,12 @@ mod tests { assert!(is_sorted_by_dist) } + + #[test] + fn there_is_not_a_pronoun() { + let dict = FullDictionary::curated(); + + assert!(!dict.get_word_metadata_str("there").is_noun()); + assert!(!dict.get_word_metadata_str("there").is_pronoun_noun()); + } } diff --git a/harper-core/src/token.rs b/harper-core/src/token.rs index 5d08ebb7..ef82c703 100644 --- a/harper-core/src/token.rs +++ b/harper-core/src/token.rs @@ -319,6 +319,22 @@ impl TokenKind { metadata.is_linking_verb() } + pub fn is_not_plural_noun(&self) -> bool { + let TokenKind::Word(metadata) = self else { + return true; + }; + + metadata.is_not_plural_noun() + } + + pub fn is_plural_noun(&self) -> bool { + let TokenKind::Word(metadata) = self else { + return false; + }; + + metadata.is_plural_noun() + } + pub fn is_noun(&self) -> bool { let TokenKind::Word(metadata) = self else { return false; diff --git a/harper-core/tests/test_sources/obsidian_links.md b/harper-core/tests/test_sources/obsidian_links.md index 9b5cb15f..e4d70f70 100644 --- a/harper-core/tests/test_sources/obsidian_links.md +++ b/harper-core/tests/test_sources/obsidian_links.md @@ -3,7 +3,7 @@ Below, you will find a number of example links that Obsidian is able to process. These should be treated as normal Markdown links. -The stuff inside the square brackets is visible and should be checked by Harper. +The things inside the square brackets are visible and should be checked by Harper. [[Three lws of motion]] [Three las of motion](Three%20laws%20of%20motion.md) From 47d5b195d2fbb3b4f6c95f981da0b40bc16c83c1 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 2 Jan 2025 15:21:57 -0700 Subject: [PATCH 24/40] feat(cli): added command to expose metadata of individual words --- harper-cli/src/main.rs | 13 ++++++++++++- justfile | 4 ++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/harper-cli/src/main.rs b/harper-cli/src/main.rs index ef57427b..843457c7 100644 --- a/harper-cli/src/main.rs +++ b/harper-cli/src/main.rs @@ -31,7 +31,9 @@ enum Args { /// The file you wish to display the spans. file: PathBuf, }, - /// Emit decompressed, line-separated list of words in Harper's dictionary. + /// Get the metadata associated with a particular word. + Metadata { word: String }, + /// Emit a decompressed, line-separated list of the words in Harper's dictionary. Words, } @@ -134,6 +136,15 @@ fn main() -> anyhow::Result<()> { println!("{}", word_str); } + Ok(()) + } + Args::Metadata { word } => { + let dict = FstDictionary::curated(); + let metadata = dict.get_word_metadata_str(&word); + let json = serde_json::to_string_pretty(&metadata).unwrap(); + + println!("{json}"); + Ok(()) } } diff --git a/justfile b/justfile index 7a0662df..c2afc4b2 100644 --- a/justfile +++ b/justfile @@ -239,6 +239,10 @@ userdictoverlap: just searchdictfor $line 2> /dev/null done < $USER_DICT_FILE +# Get the metadata associated with a particular word in Harper's dictionary as JSON. +getmetadata word: + cargo run --bin harper-cli -- metadata {{word}} + bump-versions: #! /bin/bash set -eo pipefail From 7526771e78a7a864a9f5a08e104c3c6e4cb25005 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 2 Jan 2025 15:28:12 -0700 Subject: [PATCH 25/40] fix(core): made shorthand versions of long function names --- .../proper_noun_capitalization_linters.rs | 344 +++++++++--------- harper-core/src/patterns/sequence_pattern.rs | 20 + 2 files changed, 182 insertions(+), 182 deletions(-) diff --git a/harper-core/src/linting/proper_noun_capitalization_linters.rs b/harper-core/src/linting/proper_noun_capitalization_linters.rs index 5b4c896c..0fabf9c9 100644 --- a/harper-core/src/linting/proper_noun_capitalization_linters.rs +++ b/harper-core/src/linting/proper_noun_capitalization_linters.rs @@ -63,11 +63,11 @@ create_linter_for!( Americas, SequencePattern::default() .then(Box::new(EitherPattern::new(vec![ - Box::new(SequencePattern::default().then_any_capitalization_of("South")), - Box::new(SequencePattern::default().then_any_capitalization_of("North")) + Box::new(SequencePattern::aco("South")), + Box::new(SequencePattern::aco("North")) ]))) .then_whitespace() - .then_any_capitalization_of("America"), + .t_aco("America"), "When referring to the continents, make sure to treat them as a proper noun." ); @@ -75,40 +75,39 @@ create_linter_for!( Koreas, SequencePattern::default() .then(Box::new(EitherPattern::new(vec![ - Box::new(SequencePattern::default().then_any_capitalization_of("South")), - Box::new(SequencePattern::default().then_any_capitalization_of("North")) + Box::new(SequencePattern::aco("South")), + Box::new(SequencePattern::aco("North")) ]))) .then_whitespace() - .then_any_capitalization_of("Korea"), + .t_aco("Korea"), "When referring to the nations, make sure to treat them as a proper noun." ); create_linter_for!( ChineseCommunistParty, - SequencePattern::default() - .then_any_capitalization_of("Chinese") + SequencePattern::aco("Chinese") .then_whitespace() - .then_any_capitalization_of("Communist") + .t_aco("Communist") .then_whitespace() - .then_any_capitalization_of("Party"), + .t_aco("Party"), "When referring to the political party, make sure to treat them as a proper noun." ); create_linter_for!( UnitedOrganizations, SequencePattern::default() - .then_any_capitalization_of("United") + .t_aco("United") .then_whitespace() .then(Box::new(EitherPattern::new(vec![ - Box::new(SequencePattern::default().then_any_capitalization_of("Nations")), - Box::new(SequencePattern::default().then_any_capitalization_of("States")), - Box::new(SequencePattern::default().then_any_capitalization_of("Kingdom")), - Box::new(SequencePattern::default().then_any_capitalization_of("Airlines")), + Box::new(SequencePattern::aco("Nations")), + Box::new(SequencePattern::aco("States")), + Box::new(SequencePattern::aco("Kingdom")), + Box::new(SequencePattern::aco("Airlines")), Box::new( SequencePattern::default() - .then_any_capitalization_of("Arab") + .t_aco("Arab") .then_whitespace() - .then_any_capitalization_of("Emirates") + .t_aco("Emirates") ) ]))), "When referring to national or international organizations, make sure to treat them as a proper noun." @@ -120,62 +119,58 @@ create_linter_for!( Box::new( SequencePattern::default() .then(Box::new(EitherPattern::new(vec![ - Box::new(SequencePattern::default().then_any_capitalization_of("Presidents'")), - Box::new(SequencePattern::default().then_any_capitalization_of("Valentines")), - Box::new(SequencePattern::default().then_any_capitalization_of("Christmas")), - Box::new(SequencePattern::default().then_any_capitalization_of("Easter")), - Box::new(SequencePattern::default().then_any_capitalization_of("Flag")), - Box::new(SequencePattern::default().then_any_capitalization_of("Independence")), - Box::new(SequencePattern::default().then_any_capitalization_of("Mothers'")), + Box::new(SequencePattern::aco("Presidents'")), + Box::new(SequencePattern::aco("Valentines")), + Box::new(SequencePattern::aco("Christmas")), + Box::new(SequencePattern::aco("Easter")), + Box::new(SequencePattern::aco("Flag")), + Box::new(SequencePattern::aco("Independence")), + Box::new(SequencePattern::aco("Mothers'")), + Box::new(SequencePattern::aco("New").t_aco("Years")), + Box::new(SequencePattern::aco("Fathers'")), + Box::new(SequencePattern::aco("Columbus")), + Box::new(SequencePattern::aco("Thanksgiving")), + Box::new(SequencePattern::aco("Memorial")), + Box::new(SequencePattern::aco("May")), + Box::new(SequencePattern::aco("Halloween")), + Box::new(SequencePattern::aco("Tax")), + Box::new(SequencePattern::aco("Parents")), + Box::new(SequencePattern::aco("Veterans")), + Box::new(SequencePattern::aco("Armistice")), + Box::new(SequencePattern::aco("Groundhog")), Box::new( SequencePattern::default() - .then_any_capitalization_of("New") - .then_any_capitalization_of("Years") - ), - Box::new(SequencePattern::default().then_any_capitalization_of("Fathers'")), - Box::new(SequencePattern::default().then_any_capitalization_of("Columbus")), - Box::new(SequencePattern::default().then_any_capitalization_of("Thanksgiving")), - Box::new(SequencePattern::default().then_any_capitalization_of("Memorial")), - Box::new(SequencePattern::default().then_any_capitalization_of("May")), - Box::new(SequencePattern::default().then_any_capitalization_of("Halloween")), - Box::new(SequencePattern::default().then_any_capitalization_of("Tax")), - Box::new(SequencePattern::default().then_any_capitalization_of("Parents")), - Box::new(SequencePattern::default().then_any_capitalization_of("Veterans")), - Box::new(SequencePattern::default().then_any_capitalization_of("Armistice")), - Box::new(SequencePattern::default().then_any_capitalization_of("Groundhog")), - Box::new( - SequencePattern::default() - .then_any_capitalization_of("National") + .t_aco("National") .then_whitespace() - .then_any_capitalization_of("Freedom") + .t_aco("Freedom") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("All") + .t_aco("All") .then_whitespace() - .then_any_capitalization_of("Saints") + .t_aco("Saints") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("All") + .t_aco("All") .then_whitespace() - .then_any_capitalization_of("Souls") + .t_aco("Souls") ) ]))) .then_whitespace() - .then_any_capitalization_of("Day") + .t_aco("Day") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("Black") + .t_aco("Black") .then_whitespace() - .then_any_capitalization_of("Friday") + .t_aco("Friday") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("Cyber") + .t_aco("Cyber") .then_whitespace() - .then_any_capitalization_of("Monday") + .t_aco("Monday") ) ]), "When referring to holidays, make sure to treat them as a proper noun." @@ -184,66 +179,66 @@ create_linter_for!( create_linter_for!( AmazonNames, SequencePattern::default() - .then_any_capitalization_of("Amazon") + .t_aco("Amazon") .then_whitespace() .then(Box::new(EitherPattern::new(vec![ Box::new( SequencePattern::default() - .then_any_capitalization_of("Shopping") + .t_aco("Shopping") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("Web") + .t_aco("Web") .then_whitespace() - .then_any_capitalization_of("Services") + .t_aco("Services") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("Lambda") + .t_aco("Lambda") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("RDS") + .t_aco("RDS") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("DynamoDB") + .t_aco("DynamoDB") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("SageMaker") + .t_aco("SageMaker") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("Rekognition") + .t_aco("Rekognition") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("CloudFront") + .t_aco("CloudFront") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("ECS") + .t_aco("ECS") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("EKS") + .t_aco("EKS") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("CloudWatch") + .t_aco("CloudWatch") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("IAM") + .t_aco("IAM") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("Prime") + .t_aco("Prime") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("Kindle") + .t_aco("Kindle") ) ]))), "When referring to the various products of Amazon.com, make sure to treat them as a proper noun." @@ -252,32 +247,32 @@ create_linter_for!( create_linter_for!( GoogleNames, SequencePattern::default() - .then_any_capitalization_of("Google") + .t_aco("Google") .then_whitespace() .then(Box::new(EitherPattern::new(vec![ - Box::new(SequencePattern::default().then_any_capitalization_of("Search")), - Box::new(SequencePattern::default().then_any_capitalization_of("Cloud")), - Box::new(SequencePattern::default().then_any_capitalization_of("Maps")), - Box::new(SequencePattern::default().then_any_capitalization_of("Docs")), - Box::new(SequencePattern::default().then_any_capitalization_of("Sheets")), - Box::new(SequencePattern::default().then_any_capitalization_of("Slides")), - Box::new(SequencePattern::default().then_any_capitalization_of("Drive")), - Box::new(SequencePattern::default().then_any_capitalization_of("Meet")), - Box::new(SequencePattern::default().then_any_capitalization_of("Gmail")), - Box::new(SequencePattern::default().then_any_capitalization_of("Calendar")), - Box::new(SequencePattern::default().then_any_capitalization_of("Chrome")), - Box::new(SequencePattern::default().then_any_capitalization_of("ChromeOS")), - Box::new(SequencePattern::default().then_any_capitalization_of("Android")), - Box::new(SequencePattern::default().then_any_capitalization_of("Play")), - Box::new(SequencePattern::default().then_any_capitalization_of("Bard")), - Box::new(SequencePattern::default().then_any_capitalization_of("Gemini")), - Box::new(SequencePattern::default().then_any_capitalization_of("YouTube")), - Box::new(SequencePattern::default().then_any_capitalization_of("Photos")), - Box::new(SequencePattern::default().then_any_capitalization_of("Analytics")), - Box::new(SequencePattern::default().then_any_capitalization_of("AdSense")), - Box::new(SequencePattern::default().then_any_capitalization_of("Pixel")), - Box::new(SequencePattern::default().then_any_capitalization_of("Nest")), - Box::new(SequencePattern::default().then_any_capitalization_of("Workspace")) + Box::new(SequencePattern::aco("Search")), + Box::new(SequencePattern::aco("Cloud")), + Box::new(SequencePattern::aco("Maps")), + Box::new(SequencePattern::aco("Docs")), + Box::new(SequencePattern::aco("Sheets")), + Box::new(SequencePattern::aco("Slides")), + Box::new(SequencePattern::aco("Drive")), + Box::new(SequencePattern::aco("Meet")), + Box::new(SequencePattern::aco("Gmail")), + Box::new(SequencePattern::aco("Calendar")), + Box::new(SequencePattern::aco("Chrome")), + Box::new(SequencePattern::aco("ChromeOS")), + Box::new(SequencePattern::aco("Android")), + Box::new(SequencePattern::aco("Play")), + Box::new(SequencePattern::aco("Bard")), + Box::new(SequencePattern::aco("Gemini")), + Box::new(SequencePattern::aco("YouTube")), + Box::new(SequencePattern::aco("Photos")), + Box::new(SequencePattern::aco("Analytics")), + Box::new(SequencePattern::aco("AdSense")), + Box::new(SequencePattern::aco("Pixel")), + Box::new(SequencePattern::aco("Nest")), + Box::new(SequencePattern::aco("Workspace")) ]))), "When referring to Google products and services, make sure to treat them as proper nouns." ); @@ -285,72 +280,72 @@ create_linter_for!( create_linter_for!( AzureNames, SequencePattern::default() - .then_any_capitalization_of("Azure") + .t_aco("Azure") .then_whitespace() .then(Box::new(EitherPattern::new(vec![ - Box::new(SequencePattern::default().then_any_capitalization_of("DevOps")), - Box::new(SequencePattern::default().then_any_capitalization_of("Functions")), + Box::new(SequencePattern::aco("DevOps")), + Box::new(SequencePattern::aco("Functions")), Box::new( SequencePattern::default() - .then_any_capitalization_of("Cosmos") + .t_aco("Cosmos") .then_whitespace() - .then_any_capitalization_of("DB") + .t_aco("DB") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("SQL") + .t_aco("SQL") .then_whitespace() - .then_any_capitalization_of("Database") + .t_aco("Database") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("Kubernetes") + .t_aco("Kubernetes") .then_whitespace() - .then_any_capitalization_of("Service") + .t_aco("Service") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("Virtual") + .t_aco("Virtual") .then_whitespace() - .then_any_capitalization_of("Machines") + .t_aco("Machines") ), - Box::new(SequencePattern::default().then_any_capitalization_of("Monitor")), - Box::new(SequencePattern::default().then_any_capitalization_of("Storage")), + Box::new(SequencePattern::aco("Monitor")), + Box::new(SequencePattern::aco("Storage")), Box::new( SequencePattern::default() - .then_any_capitalization_of("Active") + .t_aco("Active") .then_whitespace() - .then_any_capitalization_of("Directory") + .t_aco("Directory") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("App") + .t_aco("App") .then_whitespace() - .then_any_capitalization_of("Service") + .t_aco("Service") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("Key") + .t_aco("Key") .then_whitespace() - .then_any_capitalization_of("Vault") + .t_aco("Vault") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("Cognitive") + .t_aco("Cognitive") .then_whitespace() - .then_any_capitalization_of("Services") + .t_aco("Services") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("Service") + .t_aco("Service") .then_whitespace() - .then_any_capitalization_of("Bus") + .t_aco("Bus") ), Box::new( SequencePattern::default() - .then_any_capitalization_of("Event") + .t_aco("Event") .then_whitespace() - .then_any_capitalization_of("Hub") + .t_aco("Hub") ) ]))), "When referring to Azure cloud services, make sure to treat them as proper nouns." @@ -359,28 +354,28 @@ create_linter_for!( create_linter_for!( MicrosoftNames, SequencePattern::default() - .then_any_capitalization_of("Microsoft") + .t_aco("Microsoft") .then_whitespace() .then(Box::new(EitherPattern::new(vec![ - Box::new(SequencePattern::default().then_any_capitalization_of("Windows")), - Box::new(SequencePattern::default().then_any_capitalization_of("Office")), - Box::new(SequencePattern::default().then_any_capitalization_of("Teams")), - Box::new(SequencePattern::default().then_any_capitalization_of("Excel")), - Box::new(SequencePattern::default().then_any_capitalization_of("PowerPoint")), - Box::new(SequencePattern::default().then_any_capitalization_of("Word")), - Box::new(SequencePattern::default().then_any_capitalization_of("Outlook")), - Box::new(SequencePattern::default().then_any_capitalization_of("OneDrive")), - Box::new(SequencePattern::default().then_any_capitalization_of("SharePoint")), - Box::new(SequencePattern::default().then_any_capitalization_of("Xbox")), - Box::new(SequencePattern::default().then_any_capitalization_of("Surface")), - Box::new(SequencePattern::default().then_any_capitalization_of("Edge")), - Box::new(SequencePattern::default().then_any_capitalization_of("Bing")), - Box::new(SequencePattern::default().then_any_capitalization_of("Dynamics")), + Box::new(SequencePattern::aco("Windows")), + Box::new(SequencePattern::aco("Office")), + Box::new(SequencePattern::aco("Teams")), + Box::new(SequencePattern::aco("Excel")), + Box::new(SequencePattern::aco("PowerPoint")), + Box::new(SequencePattern::aco("Word")), + Box::new(SequencePattern::aco("Outlook")), + Box::new(SequencePattern::aco("OneDrive")), + Box::new(SequencePattern::aco("SharePoint")), + Box::new(SequencePattern::aco("Xbox")), + Box::new(SequencePattern::aco("Surface")), + Box::new(SequencePattern::aco("Edge")), + Box::new(SequencePattern::aco("Bing")), + Box::new(SequencePattern::aco("Dynamics")), Box::new( SequencePattern::default() - .then_any_capitalization_of("Visual") + .t_aco("Visual") .then_whitespace() - .then_any_capitalization_of("Studio") + .t_aco("Studio") ) ]))), "When referring to Microsoft products and services, make sure to treat them as proper nouns." @@ -389,63 +384,49 @@ create_linter_for!( create_linter_for!( AppleNames, SequencePattern::default() - .then_any_capitalization_of("Apple") + .t_aco("Apple") .then_whitespace() .then(Box::new(EitherPattern::new(vec![ - Box::new(SequencePattern::default().then_any_capitalization_of("iPhone")), - Box::new(SequencePattern::default().then_any_capitalization_of("iPad")), - Box::new(SequencePattern::default().then_any_capitalization_of("MacBook")), + Box::new(SequencePattern::aco("iPhone")), + Box::new(SequencePattern::aco("iPad")), + Box::new(SequencePattern::aco("iMac")), + Box::new(SequencePattern::aco("MacBook")), Box::new( - SequencePattern::default() - .then_any_capitalization_of("MacBook") + SequencePattern::aco("MacBook") .then_whitespace() - .then_any_capitalization_of("Pro") + .t_aco("Pro") ), Box::new( - SequencePattern::default() - .then_any_capitalization_of("MacBook") + SequencePattern::aco("MacBook") .then_whitespace() - .then_any_capitalization_of("Air") + .t_aco("Air") ), - Box::new(SequencePattern::default().then_any_capitalization_of("iMac")), + Box::new(SequencePattern::aco("Mac").then_whitespace().t_aco("Pro")), + Box::new(SequencePattern::aco("Mac").then_whitespace().t_aco("Mini")), + Box::new(SequencePattern::aco("AirPods")), Box::new( - SequencePattern::default() - .then_any_capitalization_of("Mac") + SequencePattern::aco("AirPods") .then_whitespace() - .then_any_capitalization_of("Pro") + .t_aco("Pro") ), Box::new( - SequencePattern::default() - .then_any_capitalization_of("Mac") + SequencePattern::aco("AirPods") .then_whitespace() - .then_any_capitalization_of("Mini") + .t_aco("Max") ), - Box::new(SequencePattern::default().then_any_capitalization_of("AirPods")), + Box::new(SequencePattern::aco("Watch")), + Box::new(SequencePattern::aco("TV")), + Box::new(SequencePattern::aco("Music")), + Box::new(SequencePattern::aco("Arcade")), + Box::new(SequencePattern::aco("iCloud")), + Box::new(SequencePattern::aco("Safari")), + Box::new(SequencePattern::aco("HomeKit")), + Box::new(SequencePattern::aco("CarPlay")), Box::new( SequencePattern::default() - .then_any_capitalization_of("AirPods") + .t_aco("Vision") .then_whitespace() - .then_any_capitalization_of("Pro") - ), - Box::new( - SequencePattern::default() - .then_any_capitalization_of("AirPods") - .then_whitespace() - .then_any_capitalization_of("Max") - ), - Box::new(SequencePattern::default().then_any_capitalization_of("Watch")), - Box::new(SequencePattern::default().then_any_capitalization_of("TV")), - Box::new(SequencePattern::default().then_any_capitalization_of("Music")), - Box::new(SequencePattern::default().then_any_capitalization_of("Arcade")), - Box::new(SequencePattern::default().then_any_capitalization_of("iCloud")), - Box::new(SequencePattern::default().then_any_capitalization_of("Safari")), - Box::new(SequencePattern::default().then_any_capitalization_of("HomeKit")), - Box::new(SequencePattern::default().then_any_capitalization_of("CarPlay")), - Box::new( - SequencePattern::default() - .then_any_capitalization_of("Vision") - .then_whitespace() - .then_any_capitalization_of("Pro") + .t_aco("Pro") ) ]))), "When referring to Apple products and services, make sure to treat them as proper nouns." @@ -453,20 +434,19 @@ create_linter_for!( create_linter_for!( MetaNames, - SequencePattern::default() - .then_any_capitalization_of("Meta") + SequencePattern::aco("Meta") .then_whitespace() .then(Box::new(EitherPattern::new(vec![ - Box::new(SequencePattern::default().then_any_capitalization_of("Oculus")), - Box::new(SequencePattern::default().then_any_capitalization_of("Portals")), - Box::new(SequencePattern::default().then_any_capitalization_of("Quest")), - Box::new(SequencePattern::default().then_any_capitalization_of("Gaming")), - Box::new(SequencePattern::default().then_any_capitalization_of("Horizon")), + Box::new(SequencePattern::aco("Oculus")), + Box::new(SequencePattern::aco("Portals")), + Box::new(SequencePattern::aco("Quest")), + Box::new(SequencePattern::aco("Gaming")), + Box::new(SequencePattern::aco("Horizon")), Box::new( SequencePattern::default() - .then_any_capitalization_of("Reality") + .t_aco("Reality") .then_whitespace() - .then_any_capitalization_of("Labs") + .t_aco("Labs") ), ]))), "When referring to Meta products and services, make sure to treat them as proper nouns." diff --git a/harper-core/src/patterns/sequence_pattern.rs b/harper-core/src/patterns/sequence_pattern.rs index afb9eeb0..39d538c3 100644 --- a/harper-core/src/patterns/sequence_pattern.rs +++ b/harper-core/src/patterns/sequence_pattern.rs @@ -45,6 +45,7 @@ macro_rules! gen_then_from_is { impl SequencePattern { gen_then_from_is!(noun); + gen_then_from_is!(plural_noun); gen_then_from_is!(verb); gen_then_from_is!(linking_verb); gen_then_from_is!(pronoun); @@ -80,6 +81,20 @@ impl SequencePattern { self } + /// Shorthand for [`Self::any_capitalization_of`]. + pub fn aco(word: &'static str) -> Self { + Self::any_capitalization_of(word) + } + + pub fn any_capitalization_of(word: &'static str) -> Self { + Self::default().then_any_capitalization_of(word) + } + + /// Shorthand for [`Self::then_any_capitalization_of`]. + pub fn t_aco(self, word: &'static str) -> Self { + self.then_any_capitalization_of(word) + } + /// Match examples of `word` that have any capitalization. pub fn then_any_capitalization_of(mut self, word: &'static str) -> Self { self.token_patterns @@ -104,6 +119,11 @@ impl SequencePattern { self } + /// Shorthand for [`Self::then_exact_word_or_lowercase`]. + pub fn t_eworl(self, word: &'static str) -> Self { + self.then_exact_word_or_lowercase(word) + } + pub fn then_exact_word_or_lowercase(mut self, word: &'static str) -> Self { self.token_patterns .push(Box::new(|tok: &Token, source: &[char]| { From 39b41ae01f7a4c251ca7a109a483bbdff4bb059d Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Fri, 3 Jan 2025 08:43:58 -0700 Subject: [PATCH 26/40] feat(core): expanded `CompoundWords` to cover hyphenated cases --- harper-core/src/linting/compound_words.rs | 42 ++++++++++++++++++++--- harper-core/src/linting/matcher.rs | 1 - harper-core/src/linting/mod.rs | 13 ++++++- 3 files changed, 50 insertions(+), 6 deletions(-) diff --git a/harper-core/src/linting/compound_words.rs b/harper-core/src/linting/compound_words.rs index 47d92533..b7d25491 100644 --- a/harper-core/src/linting/compound_words.rs +++ b/harper-core/src/linting/compound_words.rs @@ -29,6 +29,7 @@ impl Linter for CompoundWords { let mut lints = Vec::new(); let mut merged_word = CharString::new(); + let mut potential_compounds = Vec::new(); for (a, w, b) in document.tokens().tuple_windows() { if !a.kind.is_word() || !w.kind.is_whitespace() || !b.kind.is_word() { @@ -43,17 +44,39 @@ impl Linter for CompoundWords { continue; } + potential_compounds.clear(); + + merged_word.clear(); + merged_word.extend_from_slice(a_chars); + merged_word.extend_from_slice(b_chars); + + // Check for closed compound words + if self.dict.contains_word(&merged_word) { + potential_compounds.push(merged_word.clone()); + } + + // Check for hyphenated compound words merged_word.clear(); merged_word.extend_from_slice(a_chars); + merged_word.push('-'); merged_word.extend_from_slice(b_chars); + // Check for closed compound words if self.dict.contains_word(&merged_word) { + potential_compounds.push(merged_word.clone()); + } + + if !potential_compounds.is_empty() { lints.push(Lint { span: Span::new(a.span.start, b.span.end), lint_kind: LintKind::Spelling, - suggestions: vec![Suggestion::ReplaceWith(merged_word.to_vec())], - message: "These two words are often combined to form a closed compound word." - .to_owned(), + suggestions: potential_compounds + .drain(..) + .map(|v| Suggestion::ReplaceWith(v.to_vec())) + .collect(), + message: + "These two words are often combined to form a hyphenated compound word." + .to_owned(), priority: 63, }); } @@ -69,7 +92,9 @@ impl Linter for CompoundWords { #[cfg(test)] mod tests { - use crate::linting::tests::assert_lint_count; + use crate::linting::tests::{ + assert_lint_count, assert_suggestion_count, assert_suggestion_result, + }; use super::CompoundWords; @@ -144,4 +169,13 @@ mod tests { 1, ); } + + #[test] + fn break_up() { + assert_suggestion_count( + "Like if you break up words you shouldn't.", + CompoundWords::default(), + 2, + ); + } } diff --git a/harper-core/src/linting/matcher.rs b/harper-core/src/linting/matcher.rs index cefd4c50..6e34785e 100644 --- a/harper-core/src/linting/matcher.rs +++ b/harper-core/src/linting/matcher.rs @@ -180,7 +180,6 @@ impl Matcher { "The","hing" => "The thing", "need","helps" => "need help", "an","this" => "and this", - "break","up" => "break-up", "case", "sensitive" => "case-sensitive", "Tree", "sitter" => "Tree-sitter", "all", "of", "the" => "all the", diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index 254d286a..bbd650b6 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -74,7 +74,7 @@ pub trait Linter: Send + Sync { #[cfg(test)] mod tests { use super::Linter; - use crate::Document; + use crate::{remove_overlaps, Document}; pub fn assert_lint_count(text: &str, mut linter: impl Linter, count: usize) { let test = Document::new_markdown_curated(text); @@ -83,6 +83,17 @@ mod tests { assert_eq!(lints.len(), count); } + /// Assert the total number of suggestions produced by a [`Linter`], spread across all produced + /// [`Lint`]s. + pub fn assert_suggestion_count(text: &str, mut linter: impl Linter, count: usize) { + let test = Document::new_markdown_curated(text); + let lints = linter.lint(&test); + assert_eq!( + lints.iter().map(|l| l.suggestions.len()).sum::(), + count + ); + } + /// Runs a provided linter on text, applies the first suggestion from each /// lint and asserts that the result is equal to a given value. pub fn assert_suggestion_result(text: &str, mut linter: impl Linter, expected_result: &str) { From 1ee819a5db578a468638f58e9895443f0371719a Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Fri, 3 Jan 2025 08:57:47 -0700 Subject: [PATCH 27/40] fix(ls): allow Neovim-style `gitcommit` language ID for #352 --- harper-ls/src/backend.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harper-ls/src/backend.rs b/harper-ls/src/backend.rs index 73ceedeb..026f0d99 100644 --- a/harper-ls/src/backend.rs +++ b/harper-ls/src/backend.rs @@ -206,7 +206,7 @@ impl Backend { } } else if language_id == "markdown" { Some(Box::new(Markdown)) - } else if language_id == "git-commit" { + } else if language_id == "git-commit" || language_id == "gitcommit" { Some(Box::new(GitCommitParser)) } else if language_id == "html" { Some(Box::new(HtmlParser::default())) From 21fbf09daec3a513d66c1474457a5029b2fd4a5e Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Fri, 3 Jan 2025 08:58:02 -0700 Subject: [PATCH 28/40] fix: remove empty module --- harper-comments/src/better.rs | 1 - harper-comments/src/lib.rs | 1 - 2 files changed, 2 deletions(-) delete mode 100644 harper-comments/src/better.rs diff --git a/harper-comments/src/better.rs b/harper-comments/src/better.rs deleted file mode 100644 index 8b137891..00000000 --- a/harper-comments/src/better.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/harper-comments/src/lib.rs b/harper-comments/src/lib.rs index ac56cb94..06967166 100644 --- a/harper-comments/src/lib.rs +++ b/harper-comments/src/lib.rs @@ -1,6 +1,5 @@ #![doc = include_str!("../README.md")] -mod better; mod comment_parser; mod comment_parsers; pub use comment_parser::CommentParser; From f70f289842e1d598e62a21b05187cafe1c9c3583 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Fri, 3 Jan 2025 09:08:10 -0700 Subject: [PATCH 29/40] fix(ls): improve error handling for #251 --- harper-ls/src/backend.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/harper-ls/src/backend.rs b/harper-ls/src/backend.rs index 026f0d99..85e2d9f7 100644 --- a/harper-ls/src/backend.rs +++ b/harper-ls/src/backend.rs @@ -54,7 +54,7 @@ impl Backend { fn file_dict_name(url: &Url) -> Option { let mut rewritten = String::new(); - // We assume all URLs are local files and have a base + // We assume all URLs are local files and have a base. for seg in url.to_file_path().ok()?.components() { if !matches!(seg, Component::RootDir) { rewritten.push_str(&seg.as_os_str().to_string_lossy()); @@ -120,7 +120,7 @@ impl Backend { ); let Some(file_dictionary) = file_dictionary else { - return Err(anyhow!("Unable to compute dictionary path.")); + return Err(anyhow!("Unable to compute dictionary path for {url}.")); }; let mut global_dictionary = global_dictionary?; @@ -399,9 +399,13 @@ impl LanguageServer for Backend { return; }; - self.update_document(¶ms.text_document.uri, &last.text, None) + if let Err(err) = self + .update_document(¶ms.text_document.uri, &last.text, None) .await - .unwrap(); + { + error!("{err}") + } + self.publish_diagnostics(¶ms.text_document.uri).await; } From 1e0b5b08e05b106a6544480e1f5b543c270066e4 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Fri, 3 Jan 2025 09:26:41 -0700 Subject: [PATCH 30/40] test: make sure linter descriptions are grammatically correct Which helped me discover bugs in the `CompoundWords` linter. --- demo.md | 2 +- .../javadoc_complex.java | 2 +- harper-core/dictionary.dict | 3 +- harper-core/src/lexing/hostname.rs | 27 ++++++++++++++++ harper-core/src/lexing/mod.rs | 16 ++++++++++ harper-core/src/linting/an_a.rs | 2 +- harper-core/src/linting/compound_words.rs | 27 +++------------- .../src/linting/correct_number_suffix.rs | 2 +- harper-core/src/linting/linking_verbs.rs | 2 +- harper-core/src/linting/lint_group.rs | 31 +++++++++++++++++-- harper-core/src/linting/long_sentences.rs | 3 +- harper-core/src/linting/that_which.rs | 2 +- harper-core/src/token.rs | 8 +++++ harper-core/tests/run_tests.rs | 1 + .../tests/test_sources/amazon_hostname.md | 1 + packages/harper.js/package.json | 2 +- 16 files changed, 95 insertions(+), 36 deletions(-) create mode 100644 harper-core/tests/test_sources/amazon_hostname.md diff --git a/demo.md b/demo.md index 12a015d8..5f20a7af 100644 --- a/demo.md +++ b/demo.md @@ -4,7 +4,7 @@ checkers don't cut it. That s where Harper comes in handy. Harper is an language checker for developers. it can detect improper capitalization and misspellled words, as well as a number of other issues. -Like if you break up words you shouldn't. +Like if you break up words you shoul dn't. Harper works everywhere, even offline. Since you r data never leaves your device, you don't ned to worry aout us diff --git a/harper-comments/tests/language_support_sources/javadoc_complex.java b/harper-comments/tests/language_support_sources/javadoc_complex.java index a3ae0b53..054f4c46 100644 --- a/harper-comments/tests/language_support_sources/javadoc_complex.java +++ b/harper-comments/tests/language_support_sources/javadoc_complex.java @@ -19,7 +19,7 @@ public static void main(String[] args) { /** * This doc has a link in it: {@link this sould b ignor} but not tis * - * @param name this is an other test. + * @param name this is anoher test. */ public static void greet(String name) { System.out.println("Hello " + name + "."); diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict index ed321060..d3753e0b 100644 --- a/harper-core/dictionary.dict +++ b/harper-core/dictionary.dict @@ -28344,7 +28344,7 @@ inimitably/ iniquitous/5Y iniquity/1SM initial/514SGMDY -initialism/1 +initialism/1MS initialization/1 initialize/4DSG initialized/4AU @@ -49636,7 +49636,6 @@ scatterplot/14SMG Wikilink/MS1 stacktrace/SM1 scrollbar/1SM -break-up/1SM sweetgrass/1SM PowerShell/SM WebSocket/SM diff --git a/harper-core/src/lexing/hostname.rs b/harper-core/src/lexing/hostname.rs index 6a130726..5534fbe5 100644 --- a/harper-core/src/lexing/hostname.rs +++ b/harper-core/src/lexing/hostname.rs @@ -1,3 +1,30 @@ +use crate::TokenKind; + +use super::FoundToken; + +/// Lex a hostname token. +pub fn lex_hostname_token(source: &[char]) -> Option { + let len = lex_hostname(source)?; + + // Might be word, just skip it. + if len <= 1 { + return None; + } + + if !source.get(1..len - 1)?.contains(&'.') { + return None; + } + + if source.get(len - 1) == Some(&'.') { + return None; + } + + Some(FoundToken { + next_index: len, + token: TokenKind::Hostname, + }) +} + pub fn lex_hostname(source: &[char]) -> Option { let mut passed_chars = 0; diff --git a/harper-core/src/lexing/mod.rs b/harper-core/src/lexing/mod.rs index a44919ed..a2d03ece 100644 --- a/harper-core/src/lexing/mod.rs +++ b/harper-core/src/lexing/mod.rs @@ -2,6 +2,7 @@ mod email_address; mod hostname; mod url; +use hostname::lex_hostname_token; use url::lex_url; use self::email_address::lex_email_address; @@ -27,6 +28,7 @@ pub fn lex_token(source: &[char]) -> Option { lex_number, lex_url, lex_email_address, + lex_hostname_token, lex_word, lex_catch, ]; @@ -161,11 +163,25 @@ fn lex_catch(_source: &[char]) -> Option { #[cfg(test)] mod tests { + use super::lex_token; use super::lex_word; + use super::{FoundToken, TokenKind}; #[test] fn lexes_cjk_as_unlintable() { let source: Vec<_> = "世".chars().collect(); assert!(lex_word(&source).is_none()); } + + #[test] + fn lexes_youtube_as_hostname() { + let source: Vec<_> = "YouTube.com".chars().collect(); + assert!(matches!( + lex_token(&source), + Some(FoundToken { + token: TokenKind::Hostname, + .. + }) + )); + } } diff --git a/harper-core/src/linting/an_a.rs b/harper-core/src/linting/an_a.rs index fe1b0bb3..a76432b3 100644 --- a/harper-core/src/linting/an_a.rs +++ b/harper-core/src/linting/an_a.rs @@ -64,7 +64,7 @@ impl Linter for AnA { } fn description(&self) -> &'static str { - "A rule that looks for incorrect indefinite articles. For example, \"this is an mule\" would be flagged as incorrect." + "A rule that looks for incorrect indefinite articles. For example, `this is an mule` would be flagged as incorrect." } } diff --git a/harper-core/src/linting/compound_words.rs b/harper-core/src/linting/compound_words.rs index b7d25491..c644f5f6 100644 --- a/harper-core/src/linting/compound_words.rs +++ b/harper-core/src/linting/compound_words.rs @@ -51,18 +51,10 @@ impl Linter for CompoundWords { merged_word.extend_from_slice(b_chars); // Check for closed compound words - if self.dict.contains_word(&merged_word) { - potential_compounds.push(merged_word.clone()); - } - - // Check for hyphenated compound words - merged_word.clear(); - merged_word.extend_from_slice(a_chars); - merged_word.push('-'); - merged_word.extend_from_slice(b_chars); - - // Check for closed compound words - if self.dict.contains_word(&merged_word) { + if self.dict.contains_word(&merged_word) + && !a.kind.is_common_word() + && !b.kind.is_common_word() + { potential_compounds.push(merged_word.clone()); } @@ -134,15 +126,6 @@ mod tests { ); } - #[test] - fn makeup() { - assert_lint_count( - "She spent a lot of time doing her make up this morning.", - CompoundWords::default(), - 1, - ); - } - #[test] fn birthday() { assert_lint_count( @@ -175,7 +158,7 @@ mod tests { assert_suggestion_count( "Like if you break up words you shouldn't.", CompoundWords::default(), - 2, + 0, ); } } diff --git a/harper-core/src/linting/correct_number_suffix.rs b/harper-core/src/linting/correct_number_suffix.rs index 0c53a786..8d095156 100644 --- a/harper-core/src/linting/correct_number_suffix.rs +++ b/harper-core/src/linting/correct_number_suffix.rs @@ -33,7 +33,7 @@ impl Linter for CorrectNumberSuffix { } fn description(&self) -> &'static str { - "When making quick edits, it is common for authors to change the value of a number without changing its suffix. This rule looks for these cases, for example: \"2st\"." + "When making quick edits, it is common for authors to change the value of a number without changing its suffix. This rule looks for these cases, for example: `2st`." } } diff --git a/harper-core/src/linting/linking_verbs.rs b/harper-core/src/linting/linking_verbs.rs index 55ddeb21..60fea3ca 100644 --- a/harper-core/src/linting/linking_verbs.rs +++ b/harper-core/src/linting/linking_verbs.rs @@ -36,7 +36,7 @@ impl Linter for LinkingVerbs { } fn description(&self) -> &'static str { - "Linking verbs connect nouns to other ideas. Make sure you do not accidentaly link words that aren't nouns." + "Linking verbs connect nouns to other ideas. Make sure you do not accidentally link words that aren't nouns." } } diff --git a/harper-core/src/linting/lint_group.rs b/harper-core/src/linting/lint_group.rs index 175e2d04..753df32d 100644 --- a/harper-core/src/linting/lint_group.rs +++ b/harper-core/src/linting/lint_group.rs @@ -45,6 +45,14 @@ macro_rules! create_lint_group_config { pub spell_check: &'a str } + + impl<'a> LintGroupDescriptions<'a> { + /// Create a [`Vec`] containing the key-value pairs of this struct. + pub fn to_vec_pairs(self) -> Vec<(&'static str, &'a str)>{ + vec![$((stringify!([<$linter:snake>]), self.[<$linter:snake>],),)* ("spell_check", self.spell_check)] + } + } + #[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)] pub struct LintGroupConfig { $( @@ -175,7 +183,7 @@ create_lint_group_config!( AppleNames => true, AzureNames => true, CompoundWords => true, - PluralConjugate => true + PluralConjugate => false ); impl Default for LintGroup { @@ -186,13 +194,30 @@ impl Default for LintGroup { #[cfg(test)] mod tests { - use crate::FullDictionary; + use crate::{linting::Linter, Document, FstDictionary, FullDictionary}; - use super::LintGroup; + use super::{LintGroup, LintGroupConfig}; #[test] fn can_get_all_descriptions() { let group = LintGroup::::default(); group.all_descriptions(); } + + #[test] + fn lint_descriptions_are_clean() { + let mut group = LintGroup::new(LintGroupConfig::default(), FstDictionary::curated()); + let pairs: Vec<_> = group + .all_descriptions() + .to_vec_pairs() + .into_iter() + .map(|(a, b)| (a.to_string(), b.to_string())) + .collect(); + + for (key, value) in pairs { + let doc = Document::new_markdown_curated(&value); + eprintln!("{key}: {value}"); + assert!(group.lint(&doc).is_empty()) + } + } } diff --git a/harper-core/src/linting/long_sentences.rs b/harper-core/src/linting/long_sentences.rs index e877f259..666d018e 100644 --- a/harper-core/src/linting/long_sentences.rs +++ b/harper-core/src/linting/long_sentences.rs @@ -27,7 +27,6 @@ impl Linter for LongSentences { } fn description(&self) -> &'static str { - "This rule looks for run-on sentences, which can make your work harder to grok. -" + "This rule looks for run-on sentences, which can make your work harder to grok." } } diff --git a/harper-core/src/linting/that_which.rs b/harper-core/src/linting/that_which.rs index ac152ad0..dfd1eb6a 100644 --- a/harper-core/src/linting/that_which.rs +++ b/harper-core/src/linting/that_which.rs @@ -58,7 +58,7 @@ impl PatternLinter for ThatWhich { } fn description(&self) -> &'static str { - "Repeating the word \"that\" twice is often redundent. \"That which\" is easier to read." + "Repeating the word \"that\" twice is often redundant. `That which` is easier to read." } } diff --git a/harper-core/src/token.rs b/harper-core/src/token.rs index ef82c703..b8ef580e 100644 --- a/harper-core/src/token.rs +++ b/harper-core/src/token.rs @@ -327,6 +327,14 @@ impl TokenKind { metadata.is_not_plural_noun() } + pub fn is_common_word(&self) -> bool { + let TokenKind::Word(metadata) = self else { + return true; + }; + + metadata.common + } + pub fn is_plural_noun(&self) -> bool { let TokenKind::Word(metadata) = self else { return false; diff --git a/harper-core/tests/run_tests.rs b/harper-core/tests/run_tests.rs index eece70c2..72bd58dd 100644 --- a/harper-core/tests/run_tests.rs +++ b/harper-core/tests/run_tests.rs @@ -44,3 +44,4 @@ create_test!(chinese_lorem_ipsum.md, 2); create_test!(obsidian_links.md, 2); create_test!(issue_267.md, 0); create_test!(proper_noun_capitalization.md, 2); +create_test!(amazon_hostname.md, 0); diff --git a/harper-core/tests/test_sources/amazon_hostname.md b/harper-core/tests/test_sources/amazon_hostname.md new file mode 100644 index 00000000..48d4be50 --- /dev/null +++ b/harper-core/tests/test_sources/amazon_hostname.md @@ -0,0 +1 @@ +This is a test of whether Amazon.com is considered a URI. diff --git a/packages/harper.js/package.json b/packages/harper.js/package.json index 50b39312..0133dd7d 100644 --- a/packages/harper.js/package.json +++ b/packages/harper.js/package.json @@ -17,7 +17,7 @@ "scripts": { "dev": "vite", "build": "tsc && vite build", - "test": "vitest run --browser firefox && vitest run --browser chromium" + "test": "vitest run --browser chromium && vitest run --browser firefox" }, "devDependencies": { "wasm": "link:../../harper-wasm/pkg", From 32bf9f4a9acb587465597cbaed6f71e0033c0a7b Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Fri, 3 Jan 2025 11:43:52 -0700 Subject: [PATCH 31/40] docs(ls): wrote instructions for isolating English in `harper-ls` --- .../routes/docs/integrations/neovim/+page.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/packages/web/src/routes/docs/integrations/neovim/+page.md b/packages/web/src/routes/docs/integrations/neovim/+page.md index 036ad64a..96bf325f 100644 --- a/packages/web/src/routes/docs/integrations/neovim/+page.md +++ b/packages/web/src/routes/docs/integrations/neovim/+page.md @@ -114,3 +114,22 @@ lspconfig.harper_ls.setup { }, } ``` + +If you work with a lot of documents that are a mixture of English and another language, Harper can attempt to automatically detect which is which and only lint the English text. +To enable it, just set the `isolateEnglish` key. + +:::note +This feature is incredibly new and unstable. +Do not expect it to work perfectly. +If improvements are important to you, feel free to [open an issue](https://github.com/Automattic/harper/issues/new?template=Blank+issue) to let us know. +::: + +```lua +lspconfig.harper_ls.setup { + settings = { + ["harper-ls"] = { + isolateEnglish = false + } + }, +} +``` From e3f4cc92355446c958db2fd2405bd415677c603b Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Fri, 3 Jan 2025 11:55:26 -0700 Subject: [PATCH 32/40] test(html): check if #156 is still present --- harper-html/tests/run_tests.rs | 1 + harper-html/tests/test_sources/issue_156.html | 4 ++++ 2 files changed, 5 insertions(+) create mode 100644 harper-html/tests/test_sources/issue_156.html diff --git a/harper-html/tests/run_tests.rs b/harper-html/tests/run_tests.rs index 7d6aec2b..320ac7c8 100644 --- a/harper-html/tests/run_tests.rs +++ b/harper-html/tests/run_tests.rs @@ -37,3 +37,4 @@ macro_rules! create_test { } create_test!(run_on.html, 0); +create_test!(issue_156.html, 0); diff --git a/harper-html/tests/test_sources/issue_156.html b/harper-html/tests/test_sources/issue_156.html new file mode 100644 index 00000000..705b0903 --- /dev/null +++ b/harper-html/tests/test_sources/issue_156.html @@ -0,0 +1,4 @@ +

+ foo + bar +

From e7c5a27eb5816173d89efaa66ff09de1bf98e01a Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Fri, 3 Jan 2025 12:01:00 -0700 Subject: [PATCH 33/40] test(core): check if #159 is still present --- harper-core/tests/run_tests.rs | 1 + harper-core/tests/test_sources/issue_159.md | 4 ++++ 2 files changed, 5 insertions(+) create mode 100644 harper-core/tests/test_sources/issue_159.md diff --git a/harper-core/tests/run_tests.rs b/harper-core/tests/run_tests.rs index 72bd58dd..eec35583 100644 --- a/harper-core/tests/run_tests.rs +++ b/harper-core/tests/run_tests.rs @@ -45,3 +45,4 @@ create_test!(obsidian_links.md, 2); create_test!(issue_267.md, 0); create_test!(proper_noun_capitalization.md, 2); create_test!(amazon_hostname.md, 0); +create_test!(issue_159.md, 1); diff --git a/harper-core/tests/test_sources/issue_159.md b/harper-core/tests/test_sources/issue_159.md new file mode 100644 index 00000000..31be34fa --- /dev/null +++ b/harper-core/tests/test_sources/issue_159.md @@ -0,0 +1,4 @@ +The file in question was myfile.txt, and it was glorious. +It was referenced by https://pax.grsecurity.net/docs/pageexec.old.txt. + +this is another test for the sentence capitalization. From 10ea21f8fb627310557fc14243be5b6033da874e Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Fri, 3 Jan 2025 12:31:06 -0700 Subject: [PATCH 34/40] docs: created initial "Frequently Asked Questions" page --- packages/web/src/routes/docs/faq/+page.md | 22 ++++++++++++++++++++++ packages/web/vite.config.ts | 4 ++++ 2 files changed, 26 insertions(+) create mode 100644 packages/web/src/routes/docs/faq/+page.md diff --git a/packages/web/src/routes/docs/faq/+page.md b/packages/web/src/routes/docs/faq/+page.md new file mode 100644 index 00000000..86dde89e --- /dev/null +++ b/packages/web/src/routes/docs/faq/+page.md @@ -0,0 +1,22 @@ +--- +title: Frequently Asked Questions +--- + +## Is There a Chrome or Firefox Extension? + +**Short answer**: not yet. + +**Long answer:** at the time of writing, we've just released our first version of `harper.js` ([documentation here](./harperjs/introduction)), which will make it significantly easier to build such a product. +That said, our road map has higher priority items at the moment, so don't expect the offical Harper maintainers to make an attempt in the near future. + +If you're interested in trying to make one, let us know how it goes. +We might be able to help. + +## Why Doesn't Harper Support My Programming Language? + +Harper (particularly `harper-ls`) tries to support a wide variety of programming languages. +If one is missing, it's probably because we haven't had time to get to it yet. +We are entirely open to PRs that add support. + +If you just want to be able to run grammar checking on your code's comments, it's actually quite straightforward. +You can use [this PR as a model for what to do](https://github.com/Automattic/harper/pull/332). diff --git a/packages/web/vite.config.ts b/packages/web/vite.config.ts index 3c0f49ce..3e1c24e4 100644 --- a/packages/web/vite.config.ts +++ b/packages/web/vite.config.ts @@ -115,6 +115,10 @@ export default defineConfig({ } ] }, + { + title: 'FAQ', + to: '/docs/faq' + }, { title: 'Rules', to: '/docs/rules' From 28448ad00609f52c27a3b3774b40374b56ead62d Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Fri, 3 Jan 2025 12:58:15 -0700 Subject: [PATCH 35/40] chore(core): move some types to their own files --- harper-core/src/document.rs | 3 +- harper-core/src/fat_token.rs | 11 + harper-core/src/lexing/mod.rs | 3 +- harper-core/src/lib.rs | 6 +- harper-core/src/linting/compound_words.rs | 4 +- .../src/linting/correct_number_suffix.rs | 4 +- harper-core/src/linting/mod.rs | 2 +- .../src/parsers/collapse_identifiers.rs | 4 +- harper-core/src/parsers/mod.rs | 2 +- harper-core/src/token.rs | 345 +----------------- harper-core/src/token_kind.rs | 335 +++++++++++++++++ 11 files changed, 361 insertions(+), 358 deletions(-) create mode 100644 harper-core/src/fat_token.rs create mode 100644 harper-core/src/token_kind.rs diff --git a/harper-core/src/document.rs b/harper-core/src/document.rs index 1c34ccea..61b75f4d 100644 --- a/harper-core/src/document.rs +++ b/harper-core/src/document.rs @@ -7,10 +7,9 @@ use paste::paste; use crate::parsers::{Markdown, Parser, PlainEnglish}; use crate::patterns::{PatternExt, RepeatingPattern, SequencePattern}; use crate::punctuation::Punctuation; -use crate::token::NumberSuffix; use crate::vec_ext::VecExt; -use crate::Span; use crate::{Dictionary, FatToken, FstDictionary, Lrc, Token, TokenKind, TokenStringExt}; +use crate::{NumberSuffix, Span}; /// A document containing some amount of lexed and parsed English text. #[derive(Debug, Clone)] diff --git a/harper-core/src/fat_token.rs b/harper-core/src/fat_token.rs new file mode 100644 index 00000000..7db6c609 --- /dev/null +++ b/harper-core/src/fat_token.rs @@ -0,0 +1,11 @@ +use serde::{Deserialize, Serialize}; + +use crate::TokenKind; + +/// A [`Token`] that holds its content as a fat [`Vec`] rather than as a +/// [`Span`]. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd)] +pub struct FatToken { + pub content: Vec, + pub kind: TokenKind, +} diff --git a/harper-core/src/lexing/mod.rs b/harper-core/src/lexing/mod.rs index a2d03ece..e451d311 100644 --- a/harper-core/src/lexing/mod.rs +++ b/harper-core/src/lexing/mod.rs @@ -8,8 +8,7 @@ use url::lex_url; use self::email_address::lex_email_address; use crate::char_ext::CharExt; use crate::punctuation::{Punctuation, Quote}; -use crate::token::TokenKind; -use crate::WordMetadata; +use crate::{TokenKind, WordMetadata}; #[derive(Debug)] pub struct FoundToken { diff --git a/harper-core/src/lib.rs b/harper-core/src/lib.rs index aa5c16b1..abaefe6d 100644 --- a/harper-core/src/lib.rs +++ b/harper-core/src/lib.rs @@ -4,6 +4,7 @@ mod char_ext; mod char_string; mod document; +mod fat_token; pub mod language_detection; mod lexing; pub mod linting; @@ -16,6 +17,7 @@ mod spell; mod sync; mod title_case; mod token; +mod token_kind; mod vec_ext; mod word_metadata; @@ -23,6 +25,7 @@ use std::collections::VecDeque; pub use char_string::{CharString, CharStringExt}; pub use document::Document; +pub use fat_token::FatToken; use linting::Lint; pub use mask::{Mask, Masker}; pub use punctuation::{Punctuation, Quote}; @@ -30,7 +33,8 @@ pub use span::Span; pub use spell::{Dictionary, FstDictionary, FullDictionary, MergedDictionary}; pub use sync::Lrc; pub use title_case::{make_title_case, make_title_case_str}; -pub use token::{FatToken, Token, TokenKind, TokenStringExt}; +pub use token::{Token, TokenStringExt}; +pub use token_kind::{NumberSuffix, TokenKind}; pub use vec_ext::VecExt; pub use word_metadata::{AdverbData, ConjunctionData, NounData, Tense, VerbData, WordMetadata}; diff --git a/harper-core/src/linting/compound_words.rs b/harper-core/src/linting/compound_words.rs index c644f5f6..1d50f60a 100644 --- a/harper-core/src/linting/compound_words.rs +++ b/harper-core/src/linting/compound_words.rs @@ -84,9 +84,7 @@ impl Linter for CompoundWords { #[cfg(test)] mod tests { - use crate::linting::tests::{ - assert_lint_count, assert_suggestion_count, assert_suggestion_result, - }; + use crate::linting::tests::{assert_lint_count, assert_suggestion_count}; use super::CompoundWords; diff --git a/harper-core/src/linting/correct_number_suffix.rs b/harper-core/src/linting/correct_number_suffix.rs index 8d095156..f5f7d5f0 100644 --- a/harper-core/src/linting/correct_number_suffix.rs +++ b/harper-core/src/linting/correct_number_suffix.rs @@ -1,6 +1,6 @@ use super::{Lint, LintKind, Linter, Suggestion}; -use crate::token::{NumberSuffix, TokenStringExt}; -use crate::{Document, Span, TokenKind}; +use crate::token::TokenStringExt; +use crate::{Document, NumberSuffix, Span, TokenKind}; /// Detect and warn that the sentence is too long. #[derive(Debug, Clone, Copy, Default)] diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index bbd650b6..87e26c91 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -74,7 +74,7 @@ pub trait Linter: Send + Sync { #[cfg(test)] mod tests { use super::Linter; - use crate::{remove_overlaps, Document}; + use crate::Document; pub fn assert_lint_count(text: &str, mut linter: impl Linter, count: usize) { let test = Document::new_markdown_curated(text); diff --git a/harper-core/src/parsers/collapse_identifiers.rs b/harper-core/src/parsers/collapse_identifiers.rs index 59fd81a1..e98f2c11 100644 --- a/harper-core/src/parsers/collapse_identifiers.rs +++ b/harper-core/src/parsers/collapse_identifiers.rs @@ -3,9 +3,9 @@ use std::sync::Arc; use itertools::Itertools; -use super::{Parser, TokenKind}; +use super::Parser; use crate::patterns::{PatternExt, SequencePattern}; -use crate::{Dictionary, Lrc, Span, Token, VecExt}; +use crate::{Dictionary, Lrc, Span, Token, TokenKind, VecExt}; /// A parser that wraps any other parser to collapse token strings that match /// the pattern `word_word` or `word-word`. diff --git a/harper-core/src/parsers/mod.rs b/harper-core/src/parsers/mod.rs index f35f209b..55900667 100644 --- a/harper-core/src/parsers/mod.rs +++ b/harper-core/src/parsers/mod.rs @@ -11,7 +11,7 @@ pub use markdown::Markdown; pub use mask::Mask; pub use plain_english::PlainEnglish; -pub use crate::token::{Token, TokenKind, TokenStringExt}; +use crate::{Token, TokenStringExt}; #[cfg(not(feature = "concurrent"))] #[blanket(derive(Box))] diff --git a/harper-core/src/token.rs b/harper-core/src/token.rs index b8ef580e..5eaa36c7 100644 --- a/harper-core/src/token.rs +++ b/harper-core/src/token.rs @@ -1,13 +1,8 @@ -use is_macro::Is; use itertools::Itertools; -use ordered_float::OrderedFloat; use paste::paste; use serde::{Deserialize, Serialize}; -use crate::punctuation::Punctuation; -use crate::Span; -use crate::{ConjunctionData, NounData}; -use crate::{Quote, WordMetadata}; +use crate::{FatToken, Span, TokenKind}; #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Default)] pub struct Token { @@ -31,344 +26,6 @@ impl Token { } } -/// A [`Token`] that holds its content as a fat [`Vec`] rather than as a -/// [`Span`]. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd)] -pub struct FatToken { - pub content: Vec, - pub kind: TokenKind, -} - -#[derive( - Debug, Is, Clone, Copy, Serialize, Deserialize, Default, PartialOrd, Hash, Eq, PartialEq, -)] -#[serde(tag = "kind", content = "value")] -pub enum TokenKind { - Word(WordMetadata), - Punctuation(Punctuation), - Number(OrderedFloat, Option), - /// A sequence of " " spaces. - Space(usize), - /// A sequence of "\n" newlines - Newline(usize), - EmailAddress, - Url, - Hostname, - /// A special token used for things like inline code blocks that should be - /// ignored by all linters. - #[default] - Unlintable, - ParagraphBreak, -} - -impl TokenKind { - pub fn is_open_square(&self) -> bool { - matches!(self, TokenKind::Punctuation(Punctuation::OpenSquare)) - } - - pub fn is_close_square(&self) -> bool { - matches!(self, TokenKind::Punctuation(Punctuation::CloseSquare)) - } - - pub fn is_pipe(&self) -> bool { - matches!(self, TokenKind::Punctuation(Punctuation::Pipe)) - } - - pub fn is_pronoun(&self) -> bool { - matches!( - self, - TokenKind::Word(WordMetadata { - noun: Some(NounData { - is_pronoun: Some(true), - .. - }), - .. - }) - ) - } - - pub fn is_conjunction(&self) -> bool { - matches!( - self, - TokenKind::Word(WordMetadata { - conjunction: Some(ConjunctionData {}), - .. - }) - ) - } - - fn is_chunk_terminator(&self) -> bool { - if self.is_sentence_terminator() { - return true; - } - - match self { - TokenKind::Punctuation(punct) => { - matches!( - punct, - Punctuation::Comma | Punctuation::Quote { .. } | Punctuation::Colon - ) - } - _ => false, - } - } - - fn is_sentence_terminator(&self) -> bool { - match self { - TokenKind::Punctuation(punct) => [ - Punctuation::Period, - Punctuation::Bang, - Punctuation::Question, - ] - .contains(punct), - TokenKind::ParagraphBreak => true, - _ => false, - } - } - - pub fn is_ellipsis(&self) -> bool { - matches!(self, TokenKind::Punctuation(Punctuation::Ellipsis)) - } - - pub fn is_hyphen(&self) -> bool { - matches!(self, TokenKind::Punctuation(Punctuation::Hyphen)) - } - - pub fn is_adjective(&self) -> bool { - matches!( - self, - TokenKind::Word(WordMetadata { - adjective: Some(_), - .. - }) - ) - } - - pub fn is_adverb(&self) -> bool { - matches!( - self, - TokenKind::Word(WordMetadata { - adverb: Some(_), - .. - }) - ) - } - - pub fn is_swear(&self) -> bool { - matches!( - self, - TokenKind::Word(WordMetadata { - swear: Some(true), - .. - }) - ) - } - - /// Checks that `self` is the same enum variant as `other`, regardless of - /// whether the inner metadata is also equal. - pub fn matches_variant_of(&self, other: &Self) -> bool { - self.with_default_data() == other.with_default_data() - } - - /// Produces a copy of `self` with any inner data replaced with it's default - /// value. Useful for making comparisons on just the variant of the - /// enum. - pub fn with_default_data(&self) -> Self { - match self { - TokenKind::Word(_) => TokenKind::Word(Default::default()), - TokenKind::Punctuation(_) => TokenKind::Punctuation(Default::default()), - TokenKind::Number(..) => TokenKind::Number(Default::default(), Default::default()), - TokenKind::Space(_) => TokenKind::Space(Default::default()), - TokenKind::Newline(_) => TokenKind::Newline(Default::default()), - _ => *self, - } - } -} - -impl TokenKind { - /// Construct a [`TokenKind::Word`] with no (default) metadata. - pub fn blank_word() -> Self { - Self::Word(WordMetadata::default()) - } -} - -#[derive( - Debug, Serialize, Deserialize, Default, PartialEq, PartialOrd, Clone, Copy, Is, Hash, Eq, -)] -pub enum NumberSuffix { - #[default] - Th, - St, - Nd, - Rd, -} - -impl NumberSuffix { - pub fn correct_suffix_for(number: impl Into) -> Option { - let number = number.into(); - - if number < 0.0 || number - number.floor() > f64::EPSILON || number > u64::MAX as f64 { - return None; - } - - let integer = number as u64; - - if let 11..=13 = integer % 100 { - return Some(Self::Th); - }; - - match integer % 10 { - 0 => Some(Self::Th), - 1 => Some(Self::St), - 2 => Some(Self::Nd), - 3 => Some(Self::Rd), - 4 => Some(Self::Th), - 5 => Some(Self::Th), - 6 => Some(Self::Th), - 7 => Some(Self::Th), - 8 => Some(Self::Th), - 9 => Some(Self::Th), - _ => None, - } - } - - pub fn to_chars(self) -> Vec { - match self { - NumberSuffix::Th => vec!['t', 'h'], - NumberSuffix::St => vec!['s', 't'], - NumberSuffix::Nd => vec!['n', 'd'], - NumberSuffix::Rd => vec!['r', 'd'], - } - } - - /// Check the first several characters in a buffer to see if it matches a - /// number suffix. - pub fn from_chars(chars: &[char]) -> Option { - if chars.len() < 2 { - return None; - } - - match (chars[0], chars[1]) { - ('t', 'h') => Some(NumberSuffix::Th), - ('T', 'h') => Some(NumberSuffix::Th), - ('t', 'H') => Some(NumberSuffix::Th), - ('T', 'H') => Some(NumberSuffix::Th), - ('s', 't') => Some(NumberSuffix::St), - ('S', 't') => Some(NumberSuffix::St), - ('s', 'T') => Some(NumberSuffix::St), - ('S', 'T') => Some(NumberSuffix::St), - ('n', 'd') => Some(NumberSuffix::Nd), - ('N', 'd') => Some(NumberSuffix::Nd), - ('n', 'D') => Some(NumberSuffix::Nd), - ('N', 'D') => Some(NumberSuffix::Nd), - ('r', 'd') => Some(NumberSuffix::Rd), - ('R', 'd') => Some(NumberSuffix::Rd), - ('r', 'D') => Some(NumberSuffix::Rd), - ('R', 'D') => Some(NumberSuffix::Rd), - _ => None, - } - } -} - -impl TokenKind { - pub fn as_mut_quote(&mut self) -> Option<&mut Quote> { - self.as_mut_punctuation()?.as_mut_quote() - } - - pub fn as_quote(&self) -> Option<&Quote> { - self.as_punctuation()?.as_quote() - } - - pub fn is_quote(&self) -> bool { - matches!(self, TokenKind::Punctuation(Punctuation::Quote(_))) - } - - pub fn is_apostrophe(&self) -> bool { - matches!(self, TokenKind::Punctuation(Punctuation::Apostrophe)) - } - - pub fn is_period(&self) -> bool { - matches!(self, TokenKind::Punctuation(Punctuation::Period)) - } - - pub fn is_at(&self) -> bool { - matches!(self, TokenKind::Punctuation(Punctuation::At)) - } - - /// Used by `crate::parsers::CollapseIdentifiers` - /// TODO: Separate this into two functions and add OR functionality to - /// pattern matching - pub fn is_case_separator(&self) -> bool { - matches!(self, TokenKind::Punctuation(Punctuation::Underscore)) - || matches!(self, TokenKind::Punctuation(Punctuation::Hyphen)) - } - - pub fn is_verb(&self) -> bool { - let TokenKind::Word(metadata) = self else { - return false; - }; - - metadata.is_verb() - } - - pub fn is_linking_verb(&self) -> bool { - let TokenKind::Word(metadata) = self else { - return false; - }; - - metadata.is_linking_verb() - } - - pub fn is_not_plural_noun(&self) -> bool { - let TokenKind::Word(metadata) = self else { - return true; - }; - - metadata.is_not_plural_noun() - } - - pub fn is_common_word(&self) -> bool { - let TokenKind::Word(metadata) = self else { - return true; - }; - - metadata.common - } - - pub fn is_plural_noun(&self) -> bool { - let TokenKind::Word(metadata) = self else { - return false; - }; - - metadata.is_plural_noun() - } - - pub fn is_noun(&self) -> bool { - let TokenKind::Word(metadata) = self else { - return false; - }; - - metadata.is_noun() - } - - pub fn is_likely_homograph(&self) -> bool { - let TokenKind::Word(metadata) = self else { - return false; - }; - - metadata.is_likely_homograph() - } - - pub fn is_comma(&self) -> bool { - matches!(self, TokenKind::Punctuation(Punctuation::Comma)) - } - - /// Checks whether the token is whitespace. - pub fn is_whitespace(&self) -> bool { - matches!(self, TokenKind::Space(_) | TokenKind::Newline(_)) - } -} - macro_rules! create_decl_for { ($thing:ident) => { paste! { diff --git a/harper-core/src/token_kind.rs b/harper-core/src/token_kind.rs new file mode 100644 index 00000000..b669c040 --- /dev/null +++ b/harper-core/src/token_kind.rs @@ -0,0 +1,335 @@ +use is_macro::Is; +use ordered_float::OrderedFloat; +use serde::{Deserialize, Serialize}; + +use crate::{ConjunctionData, NounData, Punctuation, Quote, WordMetadata}; + +#[derive( + Debug, Is, Clone, Copy, Serialize, Deserialize, Default, PartialOrd, Hash, Eq, PartialEq, +)] +#[serde(tag = "kind", content = "value")] +pub enum TokenKind { + Word(WordMetadata), + Punctuation(Punctuation), + Number(OrderedFloat, Option), + /// A sequence of " " spaces. + Space(usize), + /// A sequence of "\n" newlines + Newline(usize), + EmailAddress, + Url, + Hostname, + /// A special token used for things like inline code blocks that should be + /// ignored by all linters. + #[default] + Unlintable, + ParagraphBreak, +} + +impl TokenKind { + pub fn is_open_square(&self) -> bool { + matches!(self, TokenKind::Punctuation(Punctuation::OpenSquare)) + } + + pub fn is_close_square(&self) -> bool { + matches!(self, TokenKind::Punctuation(Punctuation::CloseSquare)) + } + + pub fn is_pipe(&self) -> bool { + matches!(self, TokenKind::Punctuation(Punctuation::Pipe)) + } + + pub fn is_pronoun(&self) -> bool { + matches!( + self, + TokenKind::Word(WordMetadata { + noun: Some(NounData { + is_pronoun: Some(true), + .. + }), + .. + }) + ) + } + + pub fn is_conjunction(&self) -> bool { + matches!( + self, + TokenKind::Word(WordMetadata { + conjunction: Some(ConjunctionData {}), + .. + }) + ) + } + + pub(crate) fn is_chunk_terminator(&self) -> bool { + if self.is_sentence_terminator() { + return true; + } + + match self { + TokenKind::Punctuation(punct) => { + matches!( + punct, + Punctuation::Comma | Punctuation::Quote { .. } | Punctuation::Colon + ) + } + _ => false, + } + } + + pub(crate) fn is_sentence_terminator(&self) -> bool { + match self { + TokenKind::Punctuation(punct) => [ + Punctuation::Period, + Punctuation::Bang, + Punctuation::Question, + ] + .contains(punct), + TokenKind::ParagraphBreak => true, + _ => false, + } + } + + pub fn is_ellipsis(&self) -> bool { + matches!(self, TokenKind::Punctuation(Punctuation::Ellipsis)) + } + + pub fn is_hyphen(&self) -> bool { + matches!(self, TokenKind::Punctuation(Punctuation::Hyphen)) + } + + pub fn is_adjective(&self) -> bool { + matches!( + self, + TokenKind::Word(WordMetadata { + adjective: Some(_), + .. + }) + ) + } + + pub fn is_adverb(&self) -> bool { + matches!( + self, + TokenKind::Word(WordMetadata { + adverb: Some(_), + .. + }) + ) + } + + pub fn is_swear(&self) -> bool { + matches!( + self, + TokenKind::Word(WordMetadata { + swear: Some(true), + .. + }) + ) + } + + /// Checks that `self` is the same enum variant as `other`, regardless of + /// whether the inner metadata is also equal. + pub fn matches_variant_of(&self, other: &Self) -> bool { + self.with_default_data() == other.with_default_data() + } + + /// Produces a copy of `self` with any inner data replaced with it's default + /// value. Useful for making comparisons on just the variant of the + /// enum. + pub fn with_default_data(&self) -> Self { + match self { + TokenKind::Word(_) => TokenKind::Word(Default::default()), + TokenKind::Punctuation(_) => TokenKind::Punctuation(Default::default()), + TokenKind::Number(..) => TokenKind::Number(Default::default(), Default::default()), + TokenKind::Space(_) => TokenKind::Space(Default::default()), + TokenKind::Newline(_) => TokenKind::Newline(Default::default()), + _ => *self, + } + } +} + +impl TokenKind { + /// Construct a [`TokenKind::Word`] with no (default) metadata. + pub fn blank_word() -> Self { + Self::Word(WordMetadata::default()) + } +} + +#[derive( + Debug, Serialize, Deserialize, Default, PartialEq, PartialOrd, Clone, Copy, Is, Hash, Eq, +)] +pub enum NumberSuffix { + #[default] + Th, + St, + Nd, + Rd, +} + +impl NumberSuffix { + pub fn correct_suffix_for(number: impl Into) -> Option { + let number = number.into(); + + if number < 0.0 || number - number.floor() > f64::EPSILON || number > u64::MAX as f64 { + return None; + } + + let integer = number as u64; + + if let 11..=13 = integer % 100 { + return Some(Self::Th); + }; + + match integer % 10 { + 0 => Some(Self::Th), + 1 => Some(Self::St), + 2 => Some(Self::Nd), + 3 => Some(Self::Rd), + 4 => Some(Self::Th), + 5 => Some(Self::Th), + 6 => Some(Self::Th), + 7 => Some(Self::Th), + 8 => Some(Self::Th), + 9 => Some(Self::Th), + _ => None, + } + } + + pub fn to_chars(self) -> Vec { + match self { + NumberSuffix::Th => vec!['t', 'h'], + NumberSuffix::St => vec!['s', 't'], + NumberSuffix::Nd => vec!['n', 'd'], + NumberSuffix::Rd => vec!['r', 'd'], + } + } + + /// Check the first several characters in a buffer to see if it matches a + /// number suffix. + pub fn from_chars(chars: &[char]) -> Option { + if chars.len() < 2 { + return None; + } + + match (chars[0], chars[1]) { + ('t', 'h') => Some(NumberSuffix::Th), + ('T', 'h') => Some(NumberSuffix::Th), + ('t', 'H') => Some(NumberSuffix::Th), + ('T', 'H') => Some(NumberSuffix::Th), + ('s', 't') => Some(NumberSuffix::St), + ('S', 't') => Some(NumberSuffix::St), + ('s', 'T') => Some(NumberSuffix::St), + ('S', 'T') => Some(NumberSuffix::St), + ('n', 'd') => Some(NumberSuffix::Nd), + ('N', 'd') => Some(NumberSuffix::Nd), + ('n', 'D') => Some(NumberSuffix::Nd), + ('N', 'D') => Some(NumberSuffix::Nd), + ('r', 'd') => Some(NumberSuffix::Rd), + ('R', 'd') => Some(NumberSuffix::Rd), + ('r', 'D') => Some(NumberSuffix::Rd), + ('R', 'D') => Some(NumberSuffix::Rd), + _ => None, + } + } +} + +impl TokenKind { + pub fn as_mut_quote(&mut self) -> Option<&mut Quote> { + self.as_mut_punctuation()?.as_mut_quote() + } + + pub fn as_quote(&self) -> Option<&Quote> { + self.as_punctuation()?.as_quote() + } + + pub fn is_quote(&self) -> bool { + matches!(self, TokenKind::Punctuation(Punctuation::Quote(_))) + } + + pub fn is_apostrophe(&self) -> bool { + matches!(self, TokenKind::Punctuation(Punctuation::Apostrophe)) + } + + pub fn is_period(&self) -> bool { + matches!(self, TokenKind::Punctuation(Punctuation::Period)) + } + + pub fn is_at(&self) -> bool { + matches!(self, TokenKind::Punctuation(Punctuation::At)) + } + + /// Used by `crate::parsers::CollapseIdentifiers` + /// TODO: Separate this into two functions and add OR functionality to + /// pattern matching + pub fn is_case_separator(&self) -> bool { + matches!(self, TokenKind::Punctuation(Punctuation::Underscore)) + || matches!(self, TokenKind::Punctuation(Punctuation::Hyphen)) + } + + pub fn is_verb(&self) -> bool { + let TokenKind::Word(metadata) = self else { + return false; + }; + + metadata.is_verb() + } + + pub fn is_linking_verb(&self) -> bool { + let TokenKind::Word(metadata) = self else { + return false; + }; + + metadata.is_linking_verb() + } + + pub fn is_not_plural_noun(&self) -> bool { + let TokenKind::Word(metadata) = self else { + return true; + }; + + metadata.is_not_plural_noun() + } + + pub fn is_common_word(&self) -> bool { + let TokenKind::Word(metadata) = self else { + return true; + }; + + metadata.common + } + + pub fn is_plural_noun(&self) -> bool { + let TokenKind::Word(metadata) = self else { + return false; + }; + + metadata.is_plural_noun() + } + + pub fn is_noun(&self) -> bool { + let TokenKind::Word(metadata) = self else { + return false; + }; + + metadata.is_noun() + } + + pub fn is_likely_homograph(&self) -> bool { + let TokenKind::Word(metadata) = self else { + return false; + }; + + metadata.is_likely_homograph() + } + + pub fn is_comma(&self) -> bool { + matches!(self, TokenKind::Punctuation(Punctuation::Comma)) + } + + /// Checks whether the token is whitespace. + pub fn is_whitespace(&self) -> bool { + matches!(self, TokenKind::Space(_) | TokenKind::Newline(_)) + } +} From c956a022733efdd289d01975e4543a6276a88306 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Fri, 3 Jan 2025 14:17:37 -0700 Subject: [PATCH 36/40] fix(core): overlap removal algorithm missed some --- harper-core/src/lib.rs | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/harper-core/src/lib.rs b/harper-core/src/lib.rs index abaefe6d..868415d6 100644 --- a/harper-core/src/lib.rs +++ b/harper-core/src/lib.rs @@ -21,7 +21,7 @@ mod token_kind; mod vec_ext; mod word_metadata; -use std::collections::VecDeque; +use std::{collections::VecDeque, iter::once}; pub use char_string::{CharString, CharStringExt}; pub use document::Document; @@ -47,22 +47,17 @@ pub fn remove_overlaps(lints: &mut Vec) { return; } - lints.sort_by_key(|l| l.span.start); - let mut remove_indices = VecDeque::new(); + lints.sort_by_key(|l| (l.span.start, !0 - l.span.end)); - for i in 0..lints.len() - 1 { - let cur = &lints[i]; - let next = &lints[i + 1]; + let mut cur = 0; - if cur.span.overlaps_with(next.span) { - // Remember, lower priority means higher importance. - if next.priority < cur.priority { - remove_indices.push_back(i); - } else { - remove_indices.push_back(i + 1); - } + for (i, lint) in lints.iter().enumerate() { + if lint.span.start < cur { + remove_indices.push_back(i); + continue; } + cur = lint.span.end; } lints.remove_indices(remove_indices); From 1a8d50dbecbb86a0c338022c55d0f02527c1ed33 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Fri, 3 Jan 2025 14:25:47 -0700 Subject: [PATCH 37/40] chore(core): remove unused import --- harper-core/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harper-core/src/lib.rs b/harper-core/src/lib.rs index 868415d6..c3a71c80 100644 --- a/harper-core/src/lib.rs +++ b/harper-core/src/lib.rs @@ -21,7 +21,7 @@ mod token_kind; mod vec_ext; mod word_metadata; -use std::{collections::VecDeque, iter::once}; +use std::collections::VecDeque; pub use char_string::{CharString, CharStringExt}; pub use document::Document; From b85e5e8c53cbcc8fa800fa7c8edeb4099c0e1e2b Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Fri, 3 Jan 2025 14:37:03 -0700 Subject: [PATCH 38/40] docs: moved information from the `README.md` to the website --- README.md | 33 ++++++----------------- packages/web/src/routes/docs/faq/+page.md | 4 +++ 2 files changed, 12 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 43884f35..2fcf1574 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,10 @@ [![Harper Binaries](https://github.com/automattic/harper/actions/workflows/build_harper_binaries.yml/badge.svg)](https://github.com/automattic/harper/actions/workflows/build_harper_binaries.yml) -[![Web](https://github.com/automattic/harper/actions/workflows/build_web.yml/badge.svg)](https://github.com/automattic/harper/actions/workflows/build_web.yml) +[![Website](https://github.com/automattic/harper/actions/workflows/build_web.yml/badge.svg)](https://github.com/automattic/harper/actions/workflows/build_web.yml) [![Precommit](https://github.com/automattic/harper/actions/workflows/precommit.yml/badge.svg)](https://github.com/automattic/harper/actions/workflows/precommit.yml) [![Crates.io](https://img.shields.io/crates/v/harper-ls)](https://crates.io/crates/harper-ls) +![NPM Version](https://img.shields.io/npm/v/harper.js) Harper is an English grammar checker designed to be _just right._ I created it after years of dealing with the shortcomings of the competition. @@ -31,25 +32,6 @@ Harper is even small enough to load via [WebAssembly.](https://writewithharper.c Harper currently only supports American English, but the core is extensible to support other languages, so we welcome contributions that allow for other language support. -## Installation - -If you want to use Harper on your machine, you have three choices. - -### `harper-ls` - -`harper-ls` provides an integration that works for most code editors. - -[Read more here.](https://writewithharper.com/docs/integrations/language-server) - -### Harper Obsidian Integration - -If you use [Obsidian](https://obsidian.md/), you may install the [Harper Obsidian Plugin](https://github.com/automattic/harper-obsidian-plugin) by searching for "Harper" in the community plugin store. - -### Zed Plugin - -If you use [Zed](https://zed.dev/), [Stef16Robbe](https://github.com/Stef16Robbe) has developed a fantastic [plugin](https://github.com/Stef16Robbe/harper_zed) that works out-of-the box. -No setup required. - ## Performance Issues We consider long lint times bugs. @@ -58,15 +40,16 @@ If you encounter any significant performance issues, please create an issue on t If you find a fix to any performance issue, we are open the contribution. Just make sure to read [our contribution guidelines first.](https://github.com/automattic/harper/blob/master/CONTRIBUTING.md) -## FAQs - -### Where did the name Harper come from? +## Links -See [this blog post](https://elijahpotter.dev/articles/naming_harper). +- [Frequently Asked Questions](https://writewithharper.com/docs/faq) +- [`harper-ls` Documentation](https://writewithharper.com/docs/integrations/language-server) +- [Neovim Support](https://writewithharper.com/docs/integrations/neovim) +- [`harper.js` Documentation](https://writewithharper.com/docs/integrations/neovim) ## Huge Thanks -This project would not be possible without the hard work from those who [contribute](/CONTRIBUTING.md). +This project would not be possible without the hard work from those who [contribute](https://writewithharper.com/docs/contributors/introduction). diff --git a/packages/web/src/routes/docs/faq/+page.md b/packages/web/src/routes/docs/faq/+page.md index 86dde89e..db4a4bf2 100644 --- a/packages/web/src/routes/docs/faq/+page.md +++ b/packages/web/src/routes/docs/faq/+page.md @@ -20,3 +20,7 @@ We are entirely open to PRs that add support. If you just want to be able to run grammar checking on your code's comments, it's actually quite straightforward. You can use [this PR as a model for what to do](https://github.com/Automattic/harper/pull/332). + +### Where Did the Name Harper Come From? + +See [this blog post](https://elijahpotter.dev/articles/naming_harper). From b8ba902a17516f7d2ec6fdca829f066ec4c8ee9c Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Fri, 3 Jan 2025 14:44:58 -0700 Subject: [PATCH 39/40] fix: broken link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2fcf1574..7576a4b3 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ Just make sure to read [our contribution guidelines first.](https://github.com/a - [Frequently Asked Questions](https://writewithharper.com/docs/faq) - [`harper-ls` Documentation](https://writewithharper.com/docs/integrations/language-server) - [Neovim Support](https://writewithharper.com/docs/integrations/neovim) -- [`harper.js` Documentation](https://writewithharper.com/docs/integrations/neovim) +- [`harper.js` Documentation](https://writewithharper.com/docs/harperjs/introduction) ## Huge Thanks From 7bdc753b54cec79702807fb81a8defafc4a3f1be Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Fri, 3 Jan 2025 14:52:23 -0700 Subject: [PATCH 40/40] docs: fixed minor grammatical errors --- .../web/src/routes/docs/contributors/dictionary/+page.md | 4 ++-- .../web/src/routes/docs/harperjs/introduction/+page.md | 2 +- packages/web/src/routes/docs/harperjs/linting/+page.md | 4 ++-- .../web/src/routes/docs/integrations/obsidian/+page.md | 8 ++++---- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/packages/web/src/routes/docs/contributors/dictionary/+page.md b/packages/web/src/routes/docs/contributors/dictionary/+page.md index 3de8d652..f265a485 100644 --- a/packages/web/src/routes/docs/contributors/dictionary/+page.md +++ b/packages/web/src/routes/docs/contributors/dictionary/+page.md @@ -4,9 +4,9 @@ title: Updating the Curated Dictionary The curated dictionary is the English dictionary Harper uses as reference internally when analyzing or modifying English text. It is common, especially with technical language, to come across words that are not in this dictionary. -If this happens to you, please open a PR to get it in. +If this happens to you, please open a PR to get them in. -PR [#343](https://github.com/Automattic/harper/pull/343) is a great example of what is described here. +PR [#343](https://github.com/Automattic/harper/pull/343) is a practical example of the ideas described here. There are two files you need to worry about. [`harper-core/dictionary.dict`](https://github.com/Automattic/harper/blob/master/harper-core/dictionary.dict) and [`harper-core/affixes.json`](https://github.com/Automattic/harper/blob/master/harper-core/affixes.json). diff --git a/packages/web/src/routes/docs/harperjs/introduction/+page.md b/packages/web/src/routes/docs/harperjs/introduction/+page.md index f2cc9b9e..86f1289d 100644 --- a/packages/web/src/routes/docs/harperjs/introduction/+page.md +++ b/packages/web/src/routes/docs/harperjs/introduction/+page.md @@ -8,7 +8,7 @@ If you're a developer, odds are that you are using JavaScript or TypeScript on a Your project probably has a least a little bit of either. Furthermore, a plurality of focused authorship happens inside either a web browser or an [Electron-based app](https://www.electronjs.org/). -Given this, we wanted to create an environment where trivial to integrate fantastic grammar checking into web applications. +Given this, we wanted to create an environment where it would be trivial to integrate fantastic grammar checking into web applications. That's why we created `harper.js`. Today, it serves as the foundation for our [Obsidian plugin](/docs/integrations/obsidian) and our [website](/). diff --git a/packages/web/src/routes/docs/harperjs/linting/+page.md b/packages/web/src/routes/docs/harperjs/linting/+page.md index 082b4510..680d1e1f 100644 --- a/packages/web/src/routes/docs/harperjs/linting/+page.md +++ b/packages/web/src/routes/docs/harperjs/linting/+page.md @@ -15,11 +15,11 @@ Notice how every method returns a `Promise<...>`. @code(../../../../../../harper.js/src/Linter.ts) -The `LocalLinter` will instantiate and prepare Harper's WebAssembly module asynchronously, but notably **in the same event loop**. +The `LocalLinter` will instantiate and prepare Harper's WebAssembly module asynchronously, but **in the same event loop**. This can result in high [LCP](https://developer.mozilla.org/en-US/docs/Glossary/Largest_contentful_paint), so this implementation is only recommended in situtations where the event loop will not be doing other latency-sensitive things. In other words: the `LocalLinter` is not for the web. The `WorkerLinter`, on the other hand, will instantiate and prepare Harper's WebAssembly module inside a [Web Worker](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API), which means it will **not** block the event loop. This is recommended for interactive web applications. -[Visit our page about CDNs](./CDN) to see an example of the `WorkerLinter` in action. +[Visit our page about CDNs](./CDN) to see an example of the `WorkerLinter` in action, or [the page about Node.js](./node) for the `LocalLinter`. diff --git a/packages/web/src/routes/docs/integrations/obsidian/+page.md b/packages/web/src/routes/docs/integrations/obsidian/+page.md index fbe378d9..0bfb5d5d 100644 --- a/packages/web/src/routes/docs/integrations/obsidian/+page.md +++ b/packages/web/src/routes/docs/integrations/obsidian/+page.md @@ -2,16 +2,16 @@ title: Obsidian --- +![A screenshot of Obsidian with Harper installed](/images/obsidian_screenshot.webp) + [Obsidian](https://obsidian.md/) is a popular Markdown editor and [Zettelkasten](https://en.wikipedia.org/wiki/Zettelkasten) knowledge vault. Its extensive plugin ecosystem makes it an attractive choice for note-taking students and documenting professionals. -While one of main features of Obsidian is privacy, server-side LanguageTool is one of the most popular plugins. -Harper aims to replace LanguageTool by being faster, better and entirely private. +While one of main features of Obsidian is privacy, a server-side LanguageTool plugin is one of the most popular on the platform. +Harper aims to replace LanguageTool by being faster, better, and entirely private. ## Installation -![A screenshot of Obsidian with Harper installed](/images/obsidian_screenshot.webp) - To add Harper to [Obsidian](/obsidian), just install the community plugin. More specifically, go to Obsidian's settings and click "Community Plugins." You may be asked to enable them.