From c3219385f0fb6f9827f54f2471e823130828d623 Mon Sep 17 00:00:00 2001 From: Clay Dugo Date: Mon, 30 Dec 2024 22:06:35 -0400 Subject: [PATCH 01/11] Update README.md language server integration link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f1a2712a..43884f35 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ If you want to use Harper on your machine, you have three choices. `harper-ls` provides an integration that works for most code editors. -[Read more here.](./harper-ls/README.md) +[Read more here.](https://writewithharper.com/docs/integrations/language-server) ### Harper Obsidian Integration From cd03cd49254a885b7cb6dec9509d46066312f358 Mon Sep 17 00:00:00 2001 From: Clay Dugo Date: Mon, 30 Dec 2024 22:08:03 -0400 Subject: [PATCH 02/11] Update harper-ls README.md as well --- harper-ls/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harper-ls/README.md b/harper-ls/README.md index 7ae7d289..36b9c9ef 100644 --- a/harper-ls/README.md +++ b/harper-ls/README.md @@ -1,3 +1,3 @@ # `harper-ls` -Documentation for `harper-ls` has moved to the main [website](https://writewithharper.com/integrations/language-server). +Documentation for `harper-ls` has moved to the main [website](https://writewithharper.com/docs/integrations/language-server). From 8be1d0d12375e8453d1cc11f7ca0ebe9f35cbdfd Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Tue, 31 Dec 2024 08:35:51 -0700 Subject: [PATCH 03/11] fix(obsidian-plugin): config was loaded _after_ settings were rendered --- packages/obsidian-plugin/src/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/obsidian-plugin/src/index.js b/packages/obsidian-plugin/src/index.js index 9a1062d6..fe9cc9df 100644 --- a/packages/obsidian-plugin/src/index.js +++ b/packages/obsidian-plugin/src/index.js @@ -120,8 +120,8 @@ export default class HarperPlugin extends Plugin { this.setupCommands(); this.setupStatusBar(); - this.addSettingTab(new HarperSettingTab(this.app, this)); await this.setSettings(await this.loadData()); + this.addSettingTab(new HarperSettingTab(this.app, this)); } setupCommands() { From b9486a57f40d0abd577f9f05435c86bd94f32820 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Tue, 31 Dec 2024 09:12:07 -0700 Subject: [PATCH 04/11] test(harper.js): run tests on both Firefox and Chromium --- packages/harper.js/package.json | 2 +- packages/harper.js/vite.config.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/harper.js/package.json b/packages/harper.js/package.json index 1c4620ef..8c39aee2 100644 --- a/packages/harper.js/package.json +++ b/packages/harper.js/package.json @@ -17,7 +17,7 @@ "scripts": { "dev": "vite", "build": "tsc && vite build", - "test": "vitest run" + "test": "vitest run --browser firefox && vitest run --browser chromium" }, "devDependencies": { "wasm": "link:../../harper-wasm/pkg", diff --git a/packages/harper.js/vite.config.js b/packages/harper.js/vite.config.js index 87c3ee7c..c3b54135 100644 --- a/packages/harper.js/vite.config.js +++ b/packages/harper.js/vite.config.js @@ -46,7 +46,7 @@ export default defineConfig({ browser: { provider: 'playwright', enabled: true, - name: 'chromium' + headless: true } }, assetsInclude: ['**/*.wasm'] From d8227828e672d94f3f84776226a2bba0ff338518 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Tue, 31 Dec 2024 10:23:55 -0700 Subject: [PATCH 05/11] feat: added descriptions to each individual lint rule --- harper-core/src/linting/an_a.rs | 4 ++ harper-core/src/linting/avoid_curses.rs | 4 ++ harper-core/src/linting/boring_words.rs | 4 ++ .../linting/capitalize_personal_pronouns.rs | 4 ++ .../src/linting/correct_number_suffix.rs | 4 ++ harper-core/src/linting/dot_initialisms.rs | 4 ++ harper-core/src/linting/ellipsis_length.rs | 4 ++ harper-core/src/linting/linking_verbs.rs | 4 ++ harper-core/src/linting/lint_group.rs | 38 ++++++++++++++++++- harper-core/src/linting/long_sentences.rs | 5 +++ harper-core/src/linting/matcher.rs | 4 ++ harper-core/src/linting/mod.rs | 2 + .../linting/multiple_sequential_pronouns.rs | 4 ++ .../linting/number_suffix_capitalization.rs | 4 ++ harper-core/src/linting/pattern_linter.rs | 6 +++ harper-core/src/linting/repeated_words.rs | 4 ++ .../src/linting/sentence_capitalization.rs | 4 ++ harper-core/src/linting/spaces.rs | 4 ++ harper-core/src/linting/spell_check.rs | 4 ++ harper-core/src/linting/spelled_numbers.rs | 4 ++ .../src/linting/terminating_conjunctions.rs | 4 ++ harper-core/src/linting/that_which.rs | 4 ++ harper-core/src/linting/unclosed_quotes.rs | 4 ++ harper-core/src/linting/use_genitive.rs | 4 ++ harper-core/src/linting/wrong_quotes.rs | 4 ++ harper-wasm/src/lib.rs | 10 +++++ packages/harper.js/src/Linter.test.ts | 19 ++++++++++ packages/harper.js/src/Linter.ts | 6 +++ packages/harper.js/src/LocalLinter.ts | 10 +++++ packages/harper.js/src/WorkerLinter/index.ts | 8 ++++ 30 files changed, 187 insertions(+), 1 deletion(-) diff --git a/harper-core/src/linting/an_a.rs b/harper-core/src/linting/an_a.rs index c7c008ba..fe1b0bb3 100644 --- a/harper-core/src/linting/an_a.rs +++ b/harper-core/src/linting/an_a.rs @@ -62,6 +62,10 @@ impl Linter for AnA { lints } + + fn description(&self) -> &'static str { + "A rule that looks for incorrect indefinite articles. For example, \"this is an mule\" would be flagged as incorrect." + } } fn to_lower_word(word: &[char]) -> Cow<'_, [char]> { diff --git a/harper-core/src/linting/avoid_curses.rs b/harper-core/src/linting/avoid_curses.rs index ec7e85e6..717327c8 100644 --- a/harper-core/src/linting/avoid_curses.rs +++ b/harper-core/src/linting/avoid_curses.rs @@ -18,6 +18,10 @@ impl Linter for AvoidCurses { }) .collect() } + + fn description(&self) -> &'static str { + "A rule that looks for common offensive language." + } } #[cfg(test)] diff --git a/harper-core/src/linting/boring_words.rs b/harper-core/src/linting/boring_words.rs index b56900be..1e30b3eb 100644 --- a/harper-core/src/linting/boring_words.rs +++ b/harper-core/src/linting/boring_words.rs @@ -44,4 +44,8 @@ impl PatternLinter for BoringWords { priority: 127, } } + + fn description(&self) -> &'static str { + "This rule looks for particularly boring or overused words. Using varied language is an easy way to keep a reader's attention." + } } diff --git a/harper-core/src/linting/capitalize_personal_pronouns.rs b/harper-core/src/linting/capitalize_personal_pronouns.rs index 849d0d81..04a884d6 100644 --- a/harper-core/src/linting/capitalize_personal_pronouns.rs +++ b/harper-core/src/linting/capitalize_personal_pronouns.rs @@ -34,6 +34,10 @@ impl Linter for CapitalizePersonalPronouns { lints } + + fn description(&self) -> &'static str { + "Forgetting to capitalize personal pronouns, like \"I\" or \"I'm\" is one of the most common errors. This rule helps with that." + } } #[cfg(test)] diff --git a/harper-core/src/linting/correct_number_suffix.rs b/harper-core/src/linting/correct_number_suffix.rs index bc06b700..0c53a786 100644 --- a/harper-core/src/linting/correct_number_suffix.rs +++ b/harper-core/src/linting/correct_number_suffix.rs @@ -31,6 +31,10 @@ impl Linter for CorrectNumberSuffix { output } + + fn description(&self) -> &'static str { + "When making quick edits, it is common for authors to change the value of a number without changing its suffix. This rule looks for these cases, for example: \"2st\"." + } } #[cfg(test)] diff --git a/harper-core/src/linting/dot_initialisms.rs b/harper-core/src/linting/dot_initialisms.rs index 05099f3b..5f01de42 100644 --- a/harper-core/src/linting/dot_initialisms.rs +++ b/harper-core/src/linting/dot_initialisms.rs @@ -51,6 +51,10 @@ impl PatternLinter for DotInitialisms { priority: 63, } } + + fn description(&self) -> &'static str { + "Ensures common initialisms (like \"i.e.\") are properly dot-separated." + } } #[cfg(test)] diff --git a/harper-core/src/linting/ellipsis_length.rs b/harper-core/src/linting/ellipsis_length.rs index 493e0ad3..c91757e9 100644 --- a/harper-core/src/linting/ellipsis_length.rs +++ b/harper-core/src/linting/ellipsis_length.rs @@ -35,6 +35,10 @@ impl Linter for EllipsisLength { lints } + + fn description(&self) -> &'static str { + "Make sure you have the correct number of dots in your ellipsis." + } } #[cfg(test)] diff --git a/harper-core/src/linting/linking_verbs.rs b/harper-core/src/linting/linking_verbs.rs index dadb4fde..55ddeb21 100644 --- a/harper-core/src/linting/linking_verbs.rs +++ b/harper-core/src/linting/linking_verbs.rs @@ -34,6 +34,10 @@ impl Linter for LinkingVerbs { output } + + fn description(&self) -> &'static str { + "Linking verbs connect nouns to other ideas. Make sure you do not accidentaly link words that aren't nouns." + } } #[cfg(test)] diff --git a/harper-core/src/linting/lint_group.rs b/harper-core/src/linting/lint_group.rs index 728679e9..d2ecd7ca 100644 --- a/harper-core/src/linting/lint_group.rs +++ b/harper-core/src/linting/lint_group.rs @@ -29,6 +29,16 @@ use crate::{Dictionary, Document}; macro_rules! create_lint_group_config { ($($linter:ident => $default:expr),*) => { paste! { + /// A collection of all the descriptions from the composing linters. + #[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)] + pub struct LintGroupDescriptions<'a> { + $( + #[doc = "The description for the [`" $linter "`] linter."] + pub [<$linter:snake>]: &'a str, + )* + pub spell_check: &'a str + } + #[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)] pub struct LintGroupConfig { $( @@ -85,10 +95,19 @@ macro_rules! create_lint_group_config { config, } } + + pub fn all_descriptions(&self) -> LintGroupDescriptions<'_> { + LintGroupDescriptions { + $( + [<$linter:snake>]: self.[<$linter:snake>].description(), + )* + spell_check: self.spell_check.description(), + } + } } impl Linter for LintGroup { - fn lint(&mut self, document: &Document) -> Vec{ + fn lint(&mut self, document: &Document) -> Vec { let mut lints = Vec::new(); let mut config = self.config.clone(); @@ -107,6 +126,10 @@ macro_rules! create_lint_group_config { lints } + + fn description(&self) -> &'static str { + "A collection of linters that can be run as one." + } } } }; @@ -141,3 +164,16 @@ impl Default for LintGroup { Self::new(LintGroupConfig::default(), T::default()) } } + +#[cfg(test)] +mod tests { + use crate::FullDictionary; + + use super::LintGroup; + + #[test] + fn can_get_all_descriptions() { + let group = LintGroup::::default(); + group.all_descriptions(); + } +} diff --git a/harper-core/src/linting/long_sentences.rs b/harper-core/src/linting/long_sentences.rs index 005a842f..e877f259 100644 --- a/harper-core/src/linting/long_sentences.rs +++ b/harper-core/src/linting/long_sentences.rs @@ -25,4 +25,9 @@ impl Linter for LongSentences { output } + + fn description(&self) -> &'static str { + "This rule looks for run-on sentences, which can make your work harder to grok. +" + } } diff --git a/harper-core/src/linting/matcher.rs b/harper-core/src/linting/matcher.rs index 827d22d4..2a0a109b 100644 --- a/harper-core/src/linting/matcher.rs +++ b/harper-core/src/linting/matcher.rs @@ -364,6 +364,10 @@ impl Linter for Matcher { lints } + + fn description(&self) -> &'static str { + "A collection of curated rules. A catch-all that will be removed in the future." + } } #[cfg(test)] diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index 70d6d237..37aca745 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -55,10 +55,12 @@ use crate::Document; #[cfg(not(feature = "concurrent"))] pub trait Linter { fn lint(&mut self, document: &Document) -> Vec; + fn description(&self) -> &str; } #[cfg(feature = "concurrent")] pub trait Linter: Send + Sync { fn lint(&mut self, document: &Document) -> Vec; + fn description(&self) -> &str; } #[cfg(test)] diff --git a/harper-core/src/linting/multiple_sequential_pronouns.rs b/harper-core/src/linting/multiple_sequential_pronouns.rs index 03282cb5..1221ecef 100644 --- a/harper-core/src/linting/multiple_sequential_pronouns.rs +++ b/harper-core/src/linting/multiple_sequential_pronouns.rs @@ -61,6 +61,10 @@ impl PatternLinter for MultipleSequentialPronouns { suggestions, } } + + fn description(&self) -> &'static str { + "When editing work to change point of view (i.e. first-person or third-person) it is common to add pronouns while neglecting to remove old ones. This rule catches cases where you have multiple disparate pronouns in sequence." + } } impl Default for MultipleSequentialPronouns { diff --git a/harper-core/src/linting/number_suffix_capitalization.rs b/harper-core/src/linting/number_suffix_capitalization.rs index 4d617b4f..350b198b 100644 --- a/harper-core/src/linting/number_suffix_capitalization.rs +++ b/harper-core/src/linting/number_suffix_capitalization.rs @@ -33,6 +33,10 @@ impl Linter for NumberSuffixCapitalization { output } + + fn description(&self) -> &'static str { + "You should never capitalize number suffixes." + } } #[cfg(test)] diff --git a/harper-core/src/linting/pattern_linter.rs b/harper-core/src/linting/pattern_linter.rs index fc2cb26f..4fed2072 100644 --- a/harper-core/src/linting/pattern_linter.rs +++ b/harper-core/src/linting/pattern_linter.rs @@ -7,6 +7,7 @@ pub trait PatternLinter { /// A simple getter for the pattern to be searched for. fn pattern(&self) -> &dyn Pattern; fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Lint; + fn description<'a>(&'a self) -> &'a str; } #[cfg(feature = "concurrent")] @@ -14,6 +15,7 @@ pub trait PatternLinter: Send + Sync { /// A simple getter for the pattern to be searched for. fn pattern(&self) -> &dyn Pattern; fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Lint; + fn description(&self) -> &str; } impl Linter for L @@ -48,4 +50,8 @@ where lints } + + fn description(&self) -> &str { + self.description() + } } diff --git a/harper-core/src/linting/repeated_words.rs b/harper-core/src/linting/repeated_words.rs index 8ccd7ffe..068e6053 100644 --- a/harper-core/src/linting/repeated_words.rs +++ b/harper-core/src/linting/repeated_words.rs @@ -63,6 +63,10 @@ impl Linter for RepeatedWords { lints } + + fn description(&self) -> &'static str { + "This rule looks for repetitions of words that are not homographs." + } } #[cfg(test)] diff --git a/harper-core/src/linting/sentence_capitalization.rs b/harper-core/src/linting/sentence_capitalization.rs index 33d8371b..449cb331 100644 --- a/harper-core/src/linting/sentence_capitalization.rs +++ b/harper-core/src/linting/sentence_capitalization.rs @@ -60,6 +60,10 @@ impl Linter for SentenceCapitalization { lints } + + fn description(&self) -> &'static str { + "The opening word of a sentence should almost always be capitalized." + } } fn is_full_sentence(toks: &[Token]) -> bool { diff --git a/harper-core/src/linting/spaces.rs b/harper-core/src/linting/spaces.rs index 5da5eac1..f9faa087 100644 --- a/harper-core/src/linting/spaces.rs +++ b/harper-core/src/linting/spaces.rs @@ -61,6 +61,10 @@ impl Linter for Spaces { output } + + fn description(&self) -> &'static str { + "Words should be separated by at most one space." + } } #[cfg(test)] diff --git a/harper-core/src/linting/spell_check.rs b/harper-core/src/linting/spell_check.rs index f19d99bb..3f5f7c6a 100644 --- a/harper-core/src/linting/spell_check.rs +++ b/harper-core/src/linting/spell_check.rs @@ -93,4 +93,8 @@ impl Linter for SpellCheck { lints } + + fn description(&self) -> &'static str { + "Looks and provides corrections for misspelled words." + } } diff --git a/harper-core/src/linting/spelled_numbers.rs b/harper-core/src/linting/spelled_numbers.rs index e9fc5fd7..8824d78f 100644 --- a/harper-core/src/linting/spelled_numbers.rs +++ b/harper-core/src/linting/spelled_numbers.rs @@ -29,6 +29,10 @@ impl Linter for SpelledNumbers { lints } + + fn description(&self) -> &'static str { + "Most style guides recommend that you spell out numbers less than ten." + } } /// Converts a number to it's spelled-out variant. diff --git a/harper-core/src/linting/terminating_conjunctions.rs b/harper-core/src/linting/terminating_conjunctions.rs index dac69bf0..9b0bb5c2 100644 --- a/harper-core/src/linting/terminating_conjunctions.rs +++ b/harper-core/src/linting/terminating_conjunctions.rs @@ -68,6 +68,10 @@ impl PatternLinter for TerminatingConjunctions { priority: 63, } } + + fn description(&self) -> &'static str { + "Subordinating conjunctions are words that create a grammatical space for another idea or clause. As such, they should never appear at the end of a clause." + } } #[cfg(test)] diff --git a/harper-core/src/linting/that_which.rs b/harper-core/src/linting/that_which.rs index 65357a92..ac152ad0 100644 --- a/harper-core/src/linting/that_which.rs +++ b/harper-core/src/linting/that_which.rs @@ -56,6 +56,10 @@ impl PatternLinter for ThatWhich { priority: 126, } } + + fn description(&self) -> &'static str { + "Repeating the word \"that\" twice is often redundent. \"That which\" is easier to read." + } } #[cfg(test)] diff --git a/harper-core/src/linting/unclosed_quotes.rs b/harper-core/src/linting/unclosed_quotes.rs index 0b108754..ac550431 100644 --- a/harper-core/src/linting/unclosed_quotes.rs +++ b/harper-core/src/linting/unclosed_quotes.rs @@ -25,4 +25,8 @@ impl Linter for UnclosedQuotes { lints } + + fn description(&self) -> &'static str { + "Quotation marks should always be closed. Unpaired quotation marks are a hallmark of sloppy work." + } } diff --git a/harper-core/src/linting/use_genitive.rs b/harper-core/src/linting/use_genitive.rs index 7c7abf99..c6d61b52 100644 --- a/harper-core/src/linting/use_genitive.rs +++ b/harper-core/src/linting/use_genitive.rs @@ -67,6 +67,10 @@ impl PatternLinter for UseGenitive { priority: 31, } } + + fn description(&self) -> &'static str { + "Looks situations where the genitive case of \"there\" should be used." + } } impl Default for UseGenitive { diff --git a/harper-core/src/linting/wrong_quotes.rs b/harper-core/src/linting/wrong_quotes.rs index e22e5848..50ae3a43 100644 --- a/harper-core/src/linting/wrong_quotes.rs +++ b/harper-core/src/linting/wrong_quotes.rs @@ -13,6 +13,10 @@ impl Linter for WrongQuotes { .filter_map(|(quote_idx, quote_token)| lint_quote(document, quote_idx, quote_token)) .collect() } + + fn description(&self) -> &'static str { + "The key on the keyboard often used as a quotation mark is actually a double-apostrophe. Use the correct character." + } } fn lint_quote(document: &Document, quote_idx: usize, quote_token: Token) -> Option { diff --git a/harper-wasm/src/lib.rs b/harper-wasm/src/lib.rs index aeea7fbf..563e9293 100644 --- a/harper-wasm/src/lib.rs +++ b/harper-wasm/src/lib.rs @@ -79,6 +79,11 @@ impl Linter { document.to_string() } + /// Get a JSON map containing the descriptions of all the linting rules. + pub fn get_lint_descriptions_as_json(&self) -> String { + serde_json::to_string(&self.lint_group.all_descriptions()).unwrap() + } + pub fn get_lint_config_as_json(&self) -> String { serde_json::to_string(&self.lint_group.config).unwrap() } @@ -88,6 +93,11 @@ impl Linter { Ok(()) } + /// Get a Record containing the descriptions of all the linting rules. + pub fn get_lint_descriptions_as_object(&self) -> JsValue { + serde_wasm_bindgen::to_value(&self.lint_group.all_descriptions()).unwrap() + } + pub fn get_lint_config_as_object(&self) -> JsValue { // Important for downstream JSON serialization let serializer = serde_wasm_bindgen::Serializer::json_compatible(); diff --git a/packages/harper.js/src/Linter.test.ts b/packages/harper.js/src/Linter.test.ts index 571f1754..ef708661 100644 --- a/packages/harper.js/src/Linter.test.ts +++ b/packages/harper.js/src/Linter.test.ts @@ -102,6 +102,25 @@ for (const [linterName, Linter] of Object.entries(linters)) { expect(titleCase).toBe('This Is a Test for Making Titles'); }); + + test(`${linterName} can get rule descriptions`, async () => { + const linter = new Linter(); + + const descriptions = await linter.getLintDescriptions(); + + expect(descriptions).toBeTypeOf('object'); + }); + + test(`${linterName} rule descriptions are not empty`, async () => { + const linter = new Linter(); + + const descriptions = await linter.getLintDescriptions(); + + for (const value of Object.values(descriptions)) { + expect(value).toBeTypeOf('string'); + expect(value).not.toHaveLength(0); + } + }); } test('Linters have the same config format', async () => { diff --git a/packages/harper.js/src/Linter.ts b/packages/harper.js/src/Linter.ts index 2d489414..6febf738 100644 --- a/packages/harper.js/src/Linter.ts +++ b/packages/harper.js/src/Linter.ts @@ -34,6 +34,12 @@ export default interface Linter { /** Set the linter's current configuration from JSON. */ setLintConfigWithJSON(config: string): Promise; + /** Get the linting rule descriptions as a JSON map. */ + getLintDescriptionsAsJSON(): Promise; + + /** Get the linting rule descriptions as an object */ + getLintDescriptions(): Promise>; + /** Convert a string to Chicago-style title case. */ toTitleCase(text: string): Promise; } diff --git a/packages/harper.js/src/LocalLinter.ts b/packages/harper.js/src/LocalLinter.ts index a6a28ae3..8f9031e0 100644 --- a/packages/harper.js/src/LocalLinter.ts +++ b/packages/harper.js/src/LocalLinter.ts @@ -74,4 +74,14 @@ export default class LocalLinter implements Linter { const wasm = await loadWasm(); return wasm.to_title_case(text); } + + async getLintDescriptions(): Promise> { + await this.initialize(); + return this.inner!.get_lint_descriptions_as_object(); + } + + async getLintDescriptionsAsJSON(): Promise { + await this.initialize(); + return this.inner!.get_lint_descriptions_as_json(); + } } diff --git a/packages/harper.js/src/WorkerLinter/index.ts b/packages/harper.js/src/WorkerLinter/index.ts index bbfd63e3..7710140c 100644 --- a/packages/harper.js/src/WorkerLinter/index.ts +++ b/packages/harper.js/src/WorkerLinter/index.ts @@ -98,6 +98,14 @@ export default class WorkerLinter implements Linter { return this.rpc('toTitleCase', [text]); } + getLintDescriptionsAsJSON(): Promise { + return this.rpc('getLintDescriptionsAsJSON', []); + } + + async getLintDescriptions(): Promise> { + return JSON.parse(await this.getLintDescriptionsAsJSON()) as Record; + } + /** Run a procedure on the remote worker. */ private async rpc(procName: string, args: any[]): Promise { const promise = new Promise((resolve, reject) => { From e4fbe733000923096b894f486b87a9dab56490b3 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Tue, 31 Dec 2024 10:36:16 -0700 Subject: [PATCH 06/11] feat(obsidian-plugin): added rule descriptions to settings page --- packages/obsidian-plugin/src/HarperSettingTab.js | 11 ++++++++++- packages/obsidian-plugin/src/index.js | 6 ++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/packages/obsidian-plugin/src/HarperSettingTab.js b/packages/obsidian-plugin/src/HarperSettingTab.js index 7c4ebc0b..ebf18d1d 100644 --- a/packages/obsidian-plugin/src/HarperSettingTab.js +++ b/packages/obsidian-plugin/src/HarperSettingTab.js @@ -10,12 +10,16 @@ export class HarperSettingTab extends PluginSettingTab { /** @type Record */ settings; + /** @type Record */ + descriptions; + /** @param {App} app * @param {HarperPlugin} plugin */ constructor(app, plugin) { super(app, plugin); this.plugin = plugin; + this.updateDescriptions(); this.updateSettings(); } @@ -23,6 +27,10 @@ export class HarperSettingTab extends PluginSettingTab { this.plugin.getSettings().then((v) => (this.settings = v)); } + updateDescriptions() { + this.plugin.getDescriptions().then((v) => (this.descriptions = v)); + } + display() { const { containerEl } = this; containerEl.empty(); @@ -31,10 +39,11 @@ export class HarperSettingTab extends PluginSettingTab { for (let setting of Object.keys(this.settings.lintSettings)) { let value = this.settings.lintSettings[setting]; + let description = this.descriptions[setting]; new Setting(containerEl) .setName(startCase(setting)) - .setDesc(`Whether to include the ${setting} grammar rule.`) + .setDesc(description) .addDropdown((dropdown) => dropdown .addOption('default', 'Default') diff --git a/packages/obsidian-plugin/src/index.js b/packages/obsidian-plugin/src/index.js index fe9cc9df..d3c39eef 100644 --- a/packages/obsidian-plugin/src/index.js +++ b/packages/obsidian-plugin/src/index.js @@ -78,6 +78,12 @@ export default class HarperPlugin extends Plugin { /** @public */ lintSettingModified = false; + /** @public + * @returns {Promise>} */ + async getDescriptions() { + return await harper.getLintDescriptions(); + } + /** @public * @returns {Promise>} */ async getSettings() { From 5f98d68390f93c2ce1067b48099079fe5ff7d37a Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Tue, 31 Dec 2024 10:59:49 -0700 Subject: [PATCH 07/11] feat(obsidian-plugin): added setting for whether to use Web Worker --- .../obsidian-plugin/src/HarperSettingTab.js | 7 +++++ packages/obsidian-plugin/src/index.js | 30 ++++++++++++++++--- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/packages/obsidian-plugin/src/HarperSettingTab.js b/packages/obsidian-plugin/src/HarperSettingTab.js index ebf18d1d..e75b0845 100644 --- a/packages/obsidian-plugin/src/HarperSettingTab.js +++ b/packages/obsidian-plugin/src/HarperSettingTab.js @@ -37,6 +37,13 @@ export class HarperSettingTab extends PluginSettingTab { console.log(this.settings.lintSettings); + new Setting(containerEl).setName('Use Web Worker').addToggle((toggle) => + toggle.setValue(this.settings.useWebWorker).onChange(async (value) => { + this.settings.useWebWorker = value; + await this.plugin.setSettings(this.settings); + }) + ); + for (let setting of Object.keys(this.settings.lintSettings)) { let value = this.settings.lintSettings[setting]; let description = this.descriptions[setting]; diff --git a/packages/obsidian-plugin/src/index.js b/packages/obsidian-plugin/src/index.js index d3c39eef..68cf5111 100644 --- a/packages/obsidian-plugin/src/index.js +++ b/packages/obsidian-plugin/src/index.js @@ -1,7 +1,7 @@ import logoSvg from '../logo.svg'; import { linter } from './lint'; import { Plugin, addIcon, Menu } from 'obsidian'; -import { WorkerLinter } from 'harper.js'; +import { LocalLinter, WorkerLinter } from 'harper.js'; import { HarperSettingTab } from './HarperSettingTab'; function suggestionToLabel(sug) { @@ -12,8 +12,20 @@ function suggestionToLabel(sug) { } } -let harper = new WorkerLinter(); -harper.setup(); +function initHarperInstance(useWebWorker) { + if (useWebWorker) { + console.log('Switching to `WorkerLinter`'); + harper = new WorkerLinter(); + } else { + console.log('Switching to `LocalLinter`'); + harper = new LocalLinter(); + } + harper.setup(); +} + +let harper; + +initHarperInstance(true); const harperLinter = (plugin) => linter( @@ -91,7 +103,7 @@ export default class HarperPlugin extends Plugin { let lintSettings = await harper.getLintConfig(); - return { lintSettings }; + return { ...this.settings, lintSettings }; } /** @public @@ -102,6 +114,10 @@ export default class HarperPlugin extends Plugin { settings = {}; } + if (settings.useWebWorker == undefined) { + settings.useWebWorker = true; + } + if (settings.lintSettings == undefined) { settings.lintSettings = {}; } @@ -113,6 +129,12 @@ export default class HarperPlugin extends Plugin { await harper.setLintConfig(settings.lintSettings); this.lintSettingChanged(); this.saveData(settings); + + if (this.settings?.useWebWorker != settings.useWebWorker) { + initHarperInstance(settings.useWebWorker); + } + + this.settings = settings; } async onload() { From 6616712a83e9b4dc5a8e00717ec95520df8295fd Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Tue, 31 Dec 2024 12:08:30 -0700 Subject: [PATCH 08/11] docs: created page listing Harper's rules --- .../web/src/routes/docs/rules/+page.svelte | 34 +++++++++++++++++++ packages/web/vite.config.ts | 4 +++ 2 files changed, 38 insertions(+) create mode 100644 packages/web/src/routes/docs/rules/+page.svelte diff --git a/packages/web/src/routes/docs/rules/+page.svelte b/packages/web/src/routes/docs/rules/+page.svelte new file mode 100644 index 00000000..c47d737a --- /dev/null +++ b/packages/web/src/routes/docs/rules/+page.svelte @@ -0,0 +1,34 @@ + + +

This page is an incomplete list of the various grammatical rules Harper checks for.

+ +{#each Object.entries(info) as [name, description]} +

{titles[name]}

+

{description}

+

This rule is also often referred to as {name}.

+{/each} diff --git a/packages/web/vite.config.ts b/packages/web/vite.config.ts index 8975e3b9..53c76f39 100644 --- a/packages/web/vite.config.ts +++ b/packages/web/vite.config.ts @@ -106,6 +106,10 @@ export default defineConfig({ to: '/docs/contributors/architecture' } ] + }, + { + title: 'Rules', + to: '/docs/rules' } ] }, From 90f8629909c7ca6a267116b7e41b09c9fd389b41 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Tue, 31 Dec 2024 12:26:49 -0700 Subject: [PATCH 09/11] Release 0.14.0 harper-comments@0.14.0 harper-core@0.14.0 harper-html@0.14.0 harper-ls@0.14.0 harper-tree-sitter@0.14.0 Generated by cargo-workspaces --- Cargo.lock | 10 +++++----- harper-cli/Cargo.toml | 4 ++-- harper-comments/Cargo.toml | 8 ++++---- harper-core/Cargo.toml | 2 +- harper-html/Cargo.toml | 6 +++--- harper-ls/Cargo.toml | 8 ++++---- harper-tree-sitter/Cargo.toml | 4 ++-- harper-wasm/Cargo.toml | 2 +- justfile | 5 +++++ packages/harper.js/package.json | 2 +- packages/vscode-plugin/package.json | 2 +- 11 files changed, 29 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 224e6ff6..40f47aab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -565,7 +565,7 @@ dependencies = [ [[package]] name = "harper-comments" -version = "0.13.0" +version = "0.14.0" dependencies = [ "harper-core", "harper-html", @@ -593,7 +593,7 @@ dependencies = [ [[package]] name = "harper-core" -version = "0.13.0" +version = "0.14.0" dependencies = [ "blanket", "criterion", @@ -617,7 +617,7 @@ dependencies = [ [[package]] name = "harper-html" -version = "0.13.0" +version = "0.14.0" dependencies = [ "harper-core", "harper-tree-sitter", @@ -628,7 +628,7 @@ dependencies = [ [[package]] name = "harper-ls" -version = "0.13.0" +version = "0.14.0" dependencies = [ "anyhow", "clap", @@ -651,7 +651,7 @@ dependencies = [ [[package]] name = "harper-tree-sitter" -version = "0.13.0" +version = "0.14.0" dependencies = [ "harper-core", "tree-sitter", diff --git a/harper-cli/Cargo.toml b/harper-cli/Cargo.toml index 7c69db72..9281ea37 100644 --- a/harper-cli/Cargo.toml +++ b/harper-cli/Cargo.toml @@ -10,6 +10,6 @@ repository = "https://github.com/automattic/harper" anyhow = "1.0.95" ariadne = "0.4.1" clap = { version = "4.5.23", features = ["derive"] } -harper-core = { path = "../harper-core", version = "0.13.0" } -harper-comments = { path = "../harper-comments", version = "0.13.0" } +harper-core = { path = "../harper-core", version = "0.14.0" } +harper-comments = { path = "../harper-comments", version = "0.14.0" } serde_json = "1.0.133" diff --git a/harper-comments/Cargo.toml b/harper-comments/Cargo.toml index db0b2bb5..15539bfa 100644 --- a/harper-comments/Cargo.toml +++ b/harper-comments/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "harper-comments" -version = "0.13.0" +version = "0.14.0" edition = "2021" description = "The language checker for developers." license = "Apache-2.0" @@ -8,9 +8,9 @@ readme = "README.md" repository = "https://github.com/automattic/harper" [dependencies] -harper-core = { path = "../harper-core", version = "0.13.0" } -harper-html = { path = "../harper-html", version = "0.13.0" } -harper-tree-sitter = { path = "../harper-tree-sitter", version = "0.13.0" } +harper-core = { path = "../harper-core", version = "0.14.0" } +harper-html = { path = "../harper-html", version = "0.14.0" } +harper-tree-sitter = { path = "../harper-tree-sitter", version = "0.14.0" } tree-sitter = "0.20.10" tree-sitter-rust = "0.20.4" tree-sitter-typescript = "0.20.3" diff --git a/harper-core/Cargo.toml b/harper-core/Cargo.toml index 54f2f39f..456b9a65 100644 --- a/harper-core/Cargo.toml +++ b/harper-core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "harper-core" -version = "0.13.0" +version = "0.14.0" edition = "2021" description = "The language checker for developers." license = "Apache-2.0" diff --git a/harper-html/Cargo.toml b/harper-html/Cargo.toml index 90708562..bcfa535e 100644 --- a/harper-html/Cargo.toml +++ b/harper-html/Cargo.toml @@ -1,14 +1,14 @@ [package] name = "harper-html" -version = "0.13.0" +version = "0.14.0" edition = "2021" description = "The language checker for developers." license = "Apache-2.0" repository = "https://github.com/automattic/harper" [dependencies] -harper-core = { path = "../harper-core", version = "0.13.0" } -harper-tree-sitter = { path = "../harper-tree-sitter", version = "0.13.0" } +harper-core = { path = "../harper-core", version = "0.14.0" } +harper-tree-sitter = { path = "../harper-tree-sitter", version = "0.14.0" } tree-sitter-html = "0.19.0" tree-sitter = "0.20.10" diff --git a/harper-ls/Cargo.toml b/harper-ls/Cargo.toml index 998ac77d..1bf9f91f 100644 --- a/harper-ls/Cargo.toml +++ b/harper-ls/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "harper-ls" -version = "0.13.0" +version = "0.14.0" edition = "2021" description = "The language checker for developers." license = "Apache-2.0" @@ -8,9 +8,9 @@ readme = "README.md" repository = "https://github.com/automattic/harper" [dependencies] -harper-core = { path = "../harper-core", version = "0.13.0", features = ["concurrent"] } -harper-comments = { path = "../harper-comments", version = "0.13.0" } -harper-html = { path = "../harper-html", version = "0.13.0" } +harper-core = { path = "../harper-core", version = "0.14.0", features = ["concurrent"] } +harper-comments = { path = "../harper-comments", version = "0.14.0" } +harper-html = { path = "../harper-html", version = "0.14.0" } tower-lsp = "0.20.0" tokio = { version = "1.42.0", features = ["fs", "rt", "rt-multi-thread", "macros", "io-std", "io-util", "net"] } clap = { version = "4.5.23", features = ["derive"] } diff --git a/harper-tree-sitter/Cargo.toml b/harper-tree-sitter/Cargo.toml index 868b0487..530f4ba4 100644 --- a/harper-tree-sitter/Cargo.toml +++ b/harper-tree-sitter/Cargo.toml @@ -1,11 +1,11 @@ [package] name = "harper-tree-sitter" -version = "0.13.0" +version = "0.14.0" edition = "2021" description = "The language checker for developers." license = "Apache-2.0" repository = "https://github.com/automattic/harper" [dependencies] -harper-core = { path = "../harper-core", version = "0.13.0" } +harper-core = { path = "../harper-core", version = "0.14.0" } tree-sitter = "0.20.10" diff --git a/harper-wasm/Cargo.toml b/harper-wasm/Cargo.toml index e2fdf572..3b918038 100644 --- a/harper-wasm/Cargo.toml +++ b/harper-wasm/Cargo.toml @@ -14,7 +14,7 @@ console_error_panic_hook = "0.1.7" tracing = "0.1.41" tracing-wasm = "0.2.1" wasm-bindgen = "0.2.97" -harper-core = { path = "../harper-core", version = "0.13.0", features = ["concurrent"] } +harper-core = { path = "../harper-core", version = "0.14.0", features = ["concurrent"] } once_cell = "1.20.2" serde-wasm-bindgen = "0.6.5" serde_json = "1.0.133" diff --git a/justfile b/justfile index 8ecc419a..7a0662df 100644 --- a/justfile +++ b/justfile @@ -252,6 +252,11 @@ bump-versions: cat package.json | jq ".version = \"$HARPER_VERSION\"" > package.json.edited mv package.json.edited package.json + cd "{{justfile_directory()}}/packages/vscode-plugin" + + cat package.json | jq ".version = \"$HARPER_VERSION\"" > package.json.edited + mv package.json.edited package.json + just format lazygit diff --git a/packages/harper.js/package.json b/packages/harper.js/package.json index 8c39aee2..50b39312 100644 --- a/packages/harper.js/package.json +++ b/packages/harper.js/package.json @@ -1,6 +1,6 @@ { "name": "harper.js", - "version": "0.13.1", + "version": "0.14.0", "license": "Apache-2.0", "author": "Elijah Potter", "description": "The grammar checker for developers.", diff --git a/packages/vscode-plugin/package.json b/packages/vscode-plugin/package.json index 7b40900e..89ef81f5 100644 --- a/packages/vscode-plugin/package.json +++ b/packages/vscode-plugin/package.json @@ -2,7 +2,7 @@ "name": "harper", "displayName": "Harper", "description": "The grammar checker for developers", - "version": "0.13.0", + "version": "0.14.0", "private": true, "author": "Elijah Potter", "publisher": "elijah-potter", From d8967394b3e4b88837440b4184ec3328b84fad8f Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Tue, 31 Dec 2024 14:48:44 -0700 Subject: [PATCH 10/11] feat: created several linters for multi-token capitalization --- harper-core/src/linting/lint_group.rs | 16 +- harper-core/src/linting/matcher.rs | 6 - harper-core/src/linting/mod.rs | 5 + .../proper_noun_capitalization_linters.rs | 531 ++++++++++++++++++ harper-core/src/patterns/is_not_title_case.rs | 34 ++ harper-core/src/patterns/mod.rs | 2 + harper-core/src/patterns/sequence_pattern.rs | 24 + harper-core/src/title_case.rs | 41 +- harper-core/tests/run_tests.rs | 1 + .../proper_noun_capitalization.md | 3 + 10 files changed, 642 insertions(+), 21 deletions(-) create mode 100644 harper-core/src/linting/proper_noun_capitalization_linters.rs create mode 100644 harper-core/src/patterns/is_not_title_case.rs create mode 100644 harper-core/tests/test_sources/proper_noun_capitalization.md diff --git a/harper-core/src/linting/lint_group.rs b/harper-core/src/linting/lint_group.rs index d2ecd7ca..fd4489a6 100644 --- a/harper-core/src/linting/lint_group.rs +++ b/harper-core/src/linting/lint_group.rs @@ -13,6 +13,10 @@ use super::long_sentences::LongSentences; use super::matcher::Matcher; use super::multiple_sequential_pronouns::MultipleSequentialPronouns; use super::number_suffix_capitalization::NumberSuffixCapitalization; +use super::proper_noun_capitalization_linters::{ + AmazonNames, Americas, AppleNames, AzureNames, ChineseCommunistParty, GoogleNames, Holidays, + MetaNames, MicrosoftNames, UnitedOrganizations, +}; use super::repeated_words::RepeatedWords; use super::sentence_capitalization::SentenceCapitalization; use super::spaces::Spaces; @@ -156,7 +160,17 @@ create_lint_group_config!( BoringWords => false, UseGenitive => false, ThatWhich => true, - CapitalizePersonalPronouns => true + CapitalizePersonalPronouns => true, + Americas => true, + ChineseCommunistParty => true, + UnitedOrganizations => true, + Holidays => true, + AmazonNames => true, + GoogleNames => true, + MetaNames => true, + MicrosoftNames => true, + AppleNames => true, + AzureNames => true ); impl Default for LintGroup { diff --git a/harper-core/src/linting/matcher.rs b/harper-core/src/linting/matcher.rs index 2a0a109b..3349bedb 100644 --- a/harper-core/src/linting/matcher.rs +++ b/harper-core/src/linting/matcher.rs @@ -137,12 +137,6 @@ impl Matcher { "ngram" => "n-gram", "grammer" => "grammar", "There","fore" => "Therefore", - "south","America" => "South America", - "South","america" => "South America", - "south","america" => "South America", - "North","america" => "North America", - "north","America" => "North America", - "north","america" => "North America", "fatal","outcome" => "death", "geiger","counter" => "Geiger counter", "veterans","day" => "Veterans Day", diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index 37aca745..f06e548b 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -13,6 +13,7 @@ mod matcher; mod multiple_sequential_pronouns; mod number_suffix_capitalization; mod pattern_linter; +mod proper_noun_capitalization_linters; mod repeated_words; mod sentence_capitalization; mod spaces; @@ -39,6 +40,10 @@ pub use matcher::Matcher; pub use multiple_sequential_pronouns::MultipleSequentialPronouns; pub use number_suffix_capitalization::NumberSuffixCapitalization; pub use pattern_linter::PatternLinter; +pub use proper_noun_capitalization_linters::{ + AmazonNames, Americas, AppleNames, AzureNames, ChineseCommunistParty, GoogleNames, Holidays, + MetaNames, MicrosoftNames, UnitedOrganizations, +}; pub use repeated_words::RepeatedWords; pub use sentence_capitalization::SentenceCapitalization; pub use spaces::Spaces; diff --git a/harper-core/src/linting/proper_noun_capitalization_linters.rs b/harper-core/src/linting/proper_noun_capitalization_linters.rs new file mode 100644 index 00000000..fefea849 --- /dev/null +++ b/harper-core/src/linting/proper_noun_capitalization_linters.rs @@ -0,0 +1,531 @@ +use super::PatternLinter; +use super::{Lint, LintKind, Suggestion}; +use crate::make_title_case; +use crate::patterns::{EitherPattern, IsNotTitleCase, Pattern, SequencePattern}; +use crate::FstDictionary; +use crate::{Token, TokenStringExt}; +use std::sync::Arc; + +/// A macro that will generate a linter to enforce capitalization of a multi-token proper noun. +macro_rules! create_linter_for { + ($name:ident, $pattern:expr, $message:literal) => { + create_linter_for!($name, $pattern, $message, $message); + }; + ($name:ident, $pattern:expr, $message:literal, $description:literal) => { + pub struct $name { + pattern: Box, + dict: Arc, + } + + impl $name { + pub fn new() -> Self { + let dict = FstDictionary::curated(); + + Self { + pattern: Box::new(IsNotTitleCase::new(Box::new($pattern), dict.clone())), + dict, + } + } + } + + impl Default for $name { + fn default() -> Self { + Self::new() + } + } + + impl PatternLinter for $name { + fn pattern(&self) -> &dyn Pattern { + self.pattern.as_ref() + } + + fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Lint { + let proper = make_title_case(matched_tokens, source, &self.dict); + + Lint { + span: matched_tokens.span().unwrap(), + lint_kind: LintKind::Capitalization, + suggestions: vec![Suggestion::ReplaceWith(proper)], + message: $message.to_string(), + priority: 31, + } + } + + fn description(&self) -> &'static str { + $description + } + } + }; +} + +create_linter_for!( + Americas, + SequencePattern::default() + .then(Box::new(EitherPattern::new(vec![ + Box::new(SequencePattern::default().then_any_capitalization_of("South")), + Box::new(SequencePattern::default().then_any_capitalization_of("North")) + ]))) + .then_whitespace() + .then_any_capitalization_of("America"), + "When referring to the continents, make sure to treat them as a proper noun." +); + +create_linter_for!( + ChineseCommunistParty, + SequencePattern::default() + .then_any_capitalization_of("Chinese") + .then_whitespace() + .then_any_capitalization_of("Communist") + .then_whitespace() + .then_any_capitalization_of("Party"), + "When referring to the political party, make sure to treat them as a proper noun." +); + +create_linter_for!( + UnitedOrganizations, + SequencePattern::default() + .then_any_capitalization_of("United") + .then_whitespace() + .then(Box::new(EitherPattern::new(vec![ + Box::new(SequencePattern::default().then_any_capitalization_of("Nations")), + Box::new(SequencePattern::default().then_any_capitalization_of("States")), + Box::new(SequencePattern::default().then_any_capitalization_of("Kingdom")), + Box::new(SequencePattern::default().then_any_capitalization_of("Airlines")), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Arab") + .then_whitespace() + .then_any_capitalization_of("Emirates") + ) + ]))), + "When referring to national or international organizations, make sure to treat them as a proper noun." +); + +create_linter_for!( + Holidays, + EitherPattern::new(vec![ + Box::new( + SequencePattern::default() + .then(Box::new(EitherPattern::new(vec![ + Box::new(SequencePattern::default().then_any_capitalization_of("Presidents'")), + Box::new(SequencePattern::default().then_any_capitalization_of("Valentines")), + Box::new(SequencePattern::default().then_any_capitalization_of("Christmas")), + Box::new(SequencePattern::default().then_any_capitalization_of("Easter")), + Box::new(SequencePattern::default().then_any_capitalization_of("Flag")), + Box::new(SequencePattern::default().then_any_capitalization_of("Independence")), + Box::new(SequencePattern::default().then_any_capitalization_of("Mothers'")), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("New") + .then_any_capitalization_of("Years") + ), + Box::new(SequencePattern::default().then_any_capitalization_of("Fathers'")), + Box::new(SequencePattern::default().then_any_capitalization_of("Columbus")), + Box::new(SequencePattern::default().then_any_capitalization_of("Thanksgiving")), + Box::new(SequencePattern::default().then_any_capitalization_of("Memorial")), + Box::new(SequencePattern::default().then_any_capitalization_of("May")), + Box::new(SequencePattern::default().then_any_capitalization_of("Halloween")), + Box::new(SequencePattern::default().then_any_capitalization_of("Tax")), + Box::new(SequencePattern::default().then_any_capitalization_of("Parents")), + Box::new(SequencePattern::default().then_any_capitalization_of("Veterans")), + Box::new(SequencePattern::default().then_any_capitalization_of("Armistice")), + Box::new(SequencePattern::default().then_any_capitalization_of("Groundhog")), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("National") + .then_whitespace() + .then_any_capitalization_of("Freedom") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("All") + .then_whitespace() + .then_any_capitalization_of("Saints") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("All") + .then_whitespace() + .then_any_capitalization_of("Souls") + ) + ]))) + .then_whitespace() + .then_any_capitalization_of("Day") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Black") + .then_whitespace() + .then_any_capitalization_of("Friday") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Cyber") + .then_whitespace() + .then_any_capitalization_of("Monday") + ) + ]), + "When referring to holidays, make sure to treat them as a proper noun." +); + +create_linter_for!( + AmazonNames, + SequencePattern::default() + .then_any_capitalization_of("Amazon") + .then_whitespace() + .then(Box::new(EitherPattern::new(vec![ + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Shopping") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Web") + .then_whitespace() + .then_any_capitalization_of("Services") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Lambda") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("RDS") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("DynamoDB") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("SageMaker") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Rekognition") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("CloudFront") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("ECS") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("EKS") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("CloudWatch") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("IAM") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Prime") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Kindle") + ) + ]))), + "When referring to the various products of Amazon.com, make sure to treat them as a proper noun." +); + +create_linter_for!( + GoogleNames, + SequencePattern::default() + .then_any_capitalization_of("Google") + .then_whitespace() + .then(Box::new(EitherPattern::new(vec![ + Box::new(SequencePattern::default().then_any_capitalization_of("Search")), + Box::new(SequencePattern::default().then_any_capitalization_of("Cloud")), + Box::new(SequencePattern::default().then_any_capitalization_of("Maps")), + Box::new(SequencePattern::default().then_any_capitalization_of("Docs")), + Box::new(SequencePattern::default().then_any_capitalization_of("Sheets")), + Box::new(SequencePattern::default().then_any_capitalization_of("Slides")), + Box::new(SequencePattern::default().then_any_capitalization_of("Drive")), + Box::new(SequencePattern::default().then_any_capitalization_of("Meet")), + Box::new(SequencePattern::default().then_any_capitalization_of("Gmail")), + Box::new(SequencePattern::default().then_any_capitalization_of("Calendar")), + Box::new(SequencePattern::default().then_any_capitalization_of("Chrome")), + Box::new(SequencePattern::default().then_any_capitalization_of("ChromeOS")), + Box::new(SequencePattern::default().then_any_capitalization_of("Android")), + Box::new(SequencePattern::default().then_any_capitalization_of("Play")), + Box::new(SequencePattern::default().then_any_capitalization_of("Bard")), + Box::new(SequencePattern::default().then_any_capitalization_of("Gemini")), + Box::new(SequencePattern::default().then_any_capitalization_of("YouTube")), + Box::new(SequencePattern::default().then_any_capitalization_of("Photos")), + Box::new(SequencePattern::default().then_any_capitalization_of("Analytics")), + Box::new(SequencePattern::default().then_any_capitalization_of("AdSense")), + Box::new(SequencePattern::default().then_any_capitalization_of("Pixel")), + Box::new(SequencePattern::default().then_any_capitalization_of("Nest")), + Box::new(SequencePattern::default().then_any_capitalization_of("Workspace")) + ]))), + "When referring to Google products and services, make sure to treat them as proper nouns." +); + +create_linter_for!( + AzureNames, + SequencePattern::default() + .then_any_capitalization_of("Azure") + .then_whitespace() + .then(Box::new(EitherPattern::new(vec![ + Box::new(SequencePattern::default().then_any_capitalization_of("DevOps")), + Box::new(SequencePattern::default().then_any_capitalization_of("Functions")), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Cosmos") + .then_whitespace() + .then_any_capitalization_of("DB") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("SQL") + .then_whitespace() + .then_any_capitalization_of("Database") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Kubernetes") + .then_whitespace() + .then_any_capitalization_of("Service") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Virtual") + .then_whitespace() + .then_any_capitalization_of("Machines") + ), + Box::new(SequencePattern::default().then_any_capitalization_of("Monitor")), + Box::new(SequencePattern::default().then_any_capitalization_of("Storage")), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Active") + .then_whitespace() + .then_any_capitalization_of("Directory") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("App") + .then_whitespace() + .then_any_capitalization_of("Service") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Key") + .then_whitespace() + .then_any_capitalization_of("Vault") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Cognitive") + .then_whitespace() + .then_any_capitalization_of("Services") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Service") + .then_whitespace() + .then_any_capitalization_of("Bus") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Event") + .then_whitespace() + .then_any_capitalization_of("Hub") + ) + ]))), + "When referring to Azure cloud services, make sure to treat them as proper nouns." +); + +create_linter_for!( + MicrosoftNames, + SequencePattern::default() + .then_any_capitalization_of("Microsoft") + .then_whitespace() + .then(Box::new(EitherPattern::new(vec![ + Box::new(SequencePattern::default().then_any_capitalization_of("Windows")), + Box::new(SequencePattern::default().then_any_capitalization_of("Office")), + Box::new(SequencePattern::default().then_any_capitalization_of("Teams")), + Box::new(SequencePattern::default().then_any_capitalization_of("Excel")), + Box::new(SequencePattern::default().then_any_capitalization_of("PowerPoint")), + Box::new(SequencePattern::default().then_any_capitalization_of("Word")), + Box::new(SequencePattern::default().then_any_capitalization_of("Outlook")), + Box::new(SequencePattern::default().then_any_capitalization_of("OneDrive")), + Box::new(SequencePattern::default().then_any_capitalization_of("SharePoint")), + Box::new(SequencePattern::default().then_any_capitalization_of("Xbox")), + Box::new(SequencePattern::default().then_any_capitalization_of("Surface")), + Box::new(SequencePattern::default().then_any_capitalization_of("Edge")), + Box::new(SequencePattern::default().then_any_capitalization_of("Bing")), + Box::new(SequencePattern::default().then_any_capitalization_of("Dynamics")), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Visual") + .then_whitespace() + .then_any_capitalization_of("Studio") + ) + ]))), + "When referring to Microsoft products and services, make sure to treat them as proper nouns." +); + +create_linter_for!( + AppleNames, + SequencePattern::default() + .then_any_capitalization_of("Apple") + .then_whitespace() + .then(Box::new(EitherPattern::new(vec![ + Box::new(SequencePattern::default().then_any_capitalization_of("iPhone")), + Box::new(SequencePattern::default().then_any_capitalization_of("iPad")), + Box::new(SequencePattern::default().then_any_capitalization_of("MacBook")), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("MacBook") + .then_whitespace() + .then_any_capitalization_of("Pro") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("MacBook") + .then_whitespace() + .then_any_capitalization_of("Air") + ), + Box::new(SequencePattern::default().then_any_capitalization_of("iMac")), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Mac") + .then_whitespace() + .then_any_capitalization_of("Pro") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Mac") + .then_whitespace() + .then_any_capitalization_of("Mini") + ), + Box::new(SequencePattern::default().then_any_capitalization_of("AirPods")), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("AirPods") + .then_whitespace() + .then_any_capitalization_of("Pro") + ), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("AirPods") + .then_whitespace() + .then_any_capitalization_of("Max") + ), + Box::new(SequencePattern::default().then_any_capitalization_of("Watch")), + Box::new(SequencePattern::default().then_any_capitalization_of("TV")), + Box::new(SequencePattern::default().then_any_capitalization_of("Music")), + Box::new(SequencePattern::default().then_any_capitalization_of("Arcade")), + Box::new(SequencePattern::default().then_any_capitalization_of("iCloud")), + Box::new(SequencePattern::default().then_any_capitalization_of("Safari")), + Box::new(SequencePattern::default().then_any_capitalization_of("HomeKit")), + Box::new(SequencePattern::default().then_any_capitalization_of("CarPlay")), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Vision") + .then_whitespace() + .then_any_capitalization_of("Pro") + ) + ]))), + "When referring to Apple products and services, make sure to treat them as proper nouns." +); + +create_linter_for!( + MetaNames, + SequencePattern::default() + .then_any_capitalization_of("Meta") + .then_whitespace() + .then(Box::new(EitherPattern::new(vec![ + Box::new(SequencePattern::default().then_any_capitalization_of("Oculus")), + Box::new(SequencePattern::default().then_any_capitalization_of("Portals")), + Box::new(SequencePattern::default().then_any_capitalization_of("Quest")), + Box::new(SequencePattern::default().then_any_capitalization_of("Gaming")), + Box::new(SequencePattern::default().then_any_capitalization_of("Horizon")), + Box::new( + SequencePattern::default() + .then_any_capitalization_of("Reality") + .then_whitespace() + .then_any_capitalization_of("Labs") + ), + ]))), + "When referring to Meta products and services, make sure to treat them as proper nouns." +); + +#[cfg(test)] +mod tests { + use crate::linting::tests::{assert_lint_count, assert_suggestion_result}; + + use super::{Americas, MetaNames, MicrosoftNames, UnitedOrganizations}; + + #[test] + fn americas_lowercase() { + assert_suggestion_result("south america", Americas::default(), "South America"); + assert_suggestion_result("north america", Americas::default(), "North America"); + } + + #[test] + fn americas_uppercase() { + assert_suggestion_result("SOUTH AMERICA", Americas::default(), "South America"); + assert_suggestion_result("NORTH AMERICA", Americas::default(), "North America"); + } + + #[test] + fn americas_allow_correct() { + assert_lint_count("South America", Americas::default(), 0); + assert_lint_count("North America", Americas::default(), 0); + } + + #[test] + fn united_nations_uppercase() { + assert_suggestion_result( + "UNITED NATIONS", + UnitedOrganizations::default(), + "United Nations", + ); + } + + #[test] + fn united_arab_emirates_lowercase() { + assert_suggestion_result( + "UNITED ARAB EMIRATES", + UnitedOrganizations::default(), + "United Arab Emirates", + ); + } + + #[test] + fn united_nations_allow_correct() { + assert_lint_count("United Nations", UnitedOrganizations::default(), 0); + } + + #[test] + fn meta_allow_correct() { + assert_lint_count("Meta Quest", MetaNames::default(), 0); + } + + #[test] + fn microsoft_lowercase() { + assert_suggestion_result( + "microsoft visual studio", + MicrosoftNames::default(), + "Microsoft Visual Studio", + ); + } + + #[test] + fn microsoft_first_word_is_correct() { + assert_suggestion_result( + "Microsoft visual studio", + MicrosoftNames::default(), + "Microsoft Visual Studio", + ); + } +} diff --git a/harper-core/src/patterns/is_not_title_case.rs b/harper-core/src/patterns/is_not_title_case.rs new file mode 100644 index 00000000..0c400a4a --- /dev/null +++ b/harper-core/src/patterns/is_not_title_case.rs @@ -0,0 +1,34 @@ +use crate::{make_title_case, Dictionary, Token, TokenStringExt}; + +use super::Pattern; + +/// Will match full length of wrapped pattern __only if the matched +/// text is not already title case__. +pub struct IsNotTitleCase { + inner: Box, + dict: D, +} + +impl IsNotTitleCase { + pub fn new(inner: Box, dict: D) -> Self { + Self { inner, dict } + } +} + +impl Pattern for IsNotTitleCase { + fn matches(&self, tokens: &[Token], source: &[char]) -> usize { + let inner_match = self.inner.matches(tokens, source); + + if inner_match == 0 { + return 0; + } + + let matched_chars = tokens[0..inner_match].span().unwrap().get_content(source); + + if make_title_case(tokens, source, &self.dict) != matched_chars { + inner_match + } else { + 0 + } + } +} diff --git a/harper-core/src/patterns/mod.rs b/harper-core/src/patterns/mod.rs index dc6b2edf..e290a314 100644 --- a/harper-core/src/patterns/mod.rs +++ b/harper-core/src/patterns/mod.rs @@ -6,6 +6,7 @@ mod any_pattern; mod consumes_remaining_pattern; mod either_pattern; mod invert; +mod is_not_title_case; mod naive_pattern_group; mod repeating_pattern; mod sequence_pattern; @@ -18,6 +19,7 @@ use blanket::blanket; pub use consumes_remaining_pattern::ConsumesRemainingPattern; pub use either_pattern::EitherPattern; pub use invert::Invert; +pub use is_not_title_case::IsNotTitleCase; pub use naive_pattern_group::NaivePatternGroup; pub use repeating_pattern::RepeatingPattern; pub use sequence_pattern::SequencePattern; diff --git a/harper-core/src/patterns/sequence_pattern.rs b/harper-core/src/patterns/sequence_pattern.rs index c26a2035..239f36db 100644 --- a/harper-core/src/patterns/sequence_pattern.rs +++ b/harper-core/src/patterns/sequence_pattern.rs @@ -67,6 +67,30 @@ impl SequencePattern { self } + /// Match examples of `word` that have any capitalization. + pub fn then_any_capitalization_of(mut self, word: &'static str) -> Self { + self.token_patterns + .push(Box::new(|tok: &Token, source: &[char]| { + if !tok.kind.is_word() { + return false; + } + + let tok_chars = tok.span.get_content(source); + + if tok_chars.len() != word.chars().count() { + return false; + } + + let partial_match = tok_chars + .iter() + .zip(word.chars()) + .all(|(a, b)| a.to_ascii_lowercase() == b.to_ascii_lowercase()); + + partial_match + })); + self + } + pub fn then_exact_word_or_lowercase(mut self, word: &'static str) -> Self { self.token_patterns .push(Box::new(|tok: &Token, source: &[char]| { diff --git a/harper-core/src/title_case.rs b/harper-core/src/title_case.rs index aee11ace..0c7ccd09 100644 --- a/harper-core/src/title_case.rs +++ b/harper-core/src/title_case.rs @@ -1,4 +1,5 @@ use crate::Lrc; +use crate::Token; use hashbrown::HashSet; use lazy_static::lazy_static; @@ -8,22 +9,33 @@ use crate::{parsers::Parser, CharStringExt, Dictionary, Document, TokenStringExt pub fn make_title_case_str( source: &str, parser: &mut impl Parser, - dict: impl Dictionary, + dict: &impl Dictionary, ) -> String { - make_title_case(Lrc::new(source.chars().collect()), parser, dict).to_string() + let source: Vec = source.chars().collect(); + + make_title_case_chars(Lrc::new(source), parser, dict).to_string() } // Make a given string [title case](https://en.wikipedia.org/wiki/Title_case) following the Chicago Manual of Style. -pub fn make_title_case( +pub fn make_title_case_chars( source: Lrc>, parser: &mut impl Parser, - dict: impl Dictionary, + dict: &impl Dictionary, ) -> Vec { - let mut output = source.to_vec(); + let document = Document::new_from_vec(source.clone(), parser, dict); + + make_title_case(document.get_tokens(), source.as_slice(), dict) +} + +pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary) -> Vec { + if toks.is_empty() { + return Vec::new(); + } - let document = Document::new_from_vec(source, parser, &dict); + let start_index = toks.first().unwrap().span.start; - let mut words = document.iter_words().enumerate().peekable(); + let mut words = toks.iter_words().enumerate().peekable(); + let mut output = toks.span().unwrap().get_content(source).to_vec(); // Only specific conjunctions are not capitalized. lazy_static! { @@ -34,7 +46,7 @@ pub fn make_title_case( } while let Some((index, word)) = words.next() { - let chars = document.get_span_content(word.span); + let chars = word.span.get_content(source); let chars_lower = chars.to_lower(); let metadata = word @@ -50,16 +62,17 @@ pub fn make_title_case( || words.peek().is_none(); if should_capitalize { - output[word.span.start] = output[word.span.start].to_ascii_uppercase(); + output[word.span.start - start_index] = + output[word.span.start - start_index].to_ascii_uppercase(); // The rest of the word should be lowercase. - for v in &mut output[word.span.start + 1..word.span.end] { + for v in &mut output[word.span.start + 1 - start_index..word.span.end - start_index] { *v = v.to_ascii_lowercase(); } } else { // The whole word should be lowercase. for i in word.span { - output[i] = output[i].to_ascii_lowercase(); + output[i - start_index] = output[i].to_ascii_lowercase(); } } } @@ -78,7 +91,7 @@ mod tests { make_title_case_str( "this is a test", &mut PlainEnglish, - FstDictionary::curated() + &FstDictionary::curated() ), "This Is a Test" ) @@ -90,7 +103,7 @@ mod tests { make_title_case_str( "the first and last words should be capitalized, even if it is \"the\"", &mut PlainEnglish, - FstDictionary::curated() + &FstDictionary::curated() ), "The First and Last Words Should Be Capitalized, Even If It Is \"The\"" ) @@ -102,7 +115,7 @@ mod tests { make_title_case_str( "THIS IS A TEST", &mut PlainEnglish, - FstDictionary::curated() + &FstDictionary::curated() ), "This Is a Test" ) diff --git a/harper-core/tests/run_tests.rs b/harper-core/tests/run_tests.rs index 2832132f..eece70c2 100644 --- a/harper-core/tests/run_tests.rs +++ b/harper-core/tests/run_tests.rs @@ -43,3 +43,4 @@ create_test!(issue_109_ext.md, 0); create_test!(chinese_lorem_ipsum.md, 2); create_test!(obsidian_links.md, 2); create_test!(issue_267.md, 0); +create_test!(proper_noun_capitalization.md, 2); diff --git a/harper-core/tests/test_sources/proper_noun_capitalization.md b/harper-core/tests/test_sources/proper_noun_capitalization.md new file mode 100644 index 00000000..fc68eba7 --- /dev/null +++ b/harper-core/tests/test_sources/proper_noun_capitalization.md @@ -0,0 +1,3 @@ +Apple watch should have been capitalized here. + +Similarly, amazon web seRVices should have been capitalized differently here. From 2c9c3405ba81b321cf220379308017937d5113c0 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Tue, 31 Dec 2024 15:27:49 -0700 Subject: [PATCH 11/11] fix: missing reference from last commit --- harper-wasm/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harper-wasm/src/lib.rs b/harper-wasm/src/lib.rs index 563e9293..cb3c466f 100644 --- a/harper-wasm/src/lib.rs +++ b/harper-wasm/src/lib.rs @@ -138,7 +138,7 @@ impl Default for Linter { #[wasm_bindgen] pub fn to_title_case(text: String) -> String { - harper_core::make_title_case_str(&text, &mut PlainEnglish, FstDictionary::curated()) + harper_core::make_title_case_str(&text, &mut PlainEnglish, &FstDictionary::curated()) } #[wasm_bindgen]