From 5fe070e7c183068d749b8ce85b634fd8727d757e Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Thu, 2 Jan 2025 15:21:21 -0700 Subject: [PATCH] feat(core): added simple linter for the conjugations of "to be" for #256 --- harper-core/affixes.json | 15 ++- harper-core/dictionary.dict | 7 +- harper-core/src/linting/lint.rs | 2 + harper-core/src/linting/lint_group.rs | 4 +- harper-core/src/linting/mod.rs | 2 + harper-core/src/linting/plural_conjugate.rs | 94 +++++++++++++++++++ harper-core/src/spell/full_dictionary.rs | 8 ++ harper-core/src/token.rs | 16 ++++ .../tests/test_sources/obsidian_links.md | 2 +- 9 files changed, 143 insertions(+), 7 deletions(-) create mode 100644 harper-core/src/linting/plural_conjugate.rs diff --git a/harper-core/affixes.json b/harper-core/affixes.json index 57dda979..2c72dce7 100644 --- a/harper-core/affixes.json +++ b/harper-core/affixes.json @@ -483,7 +483,9 @@ "replacements": [], "adds_metadata": {}, "gifts_metadata": { - "noun": {} + "noun": { + "is_plural": false + } } }, "2": { @@ -557,6 +559,17 @@ } } }, + "9": { + "suffix": true, + "cross_product": true, + "replacements": [], + "adds_metadata": {}, + "gifts_metadata": { + "noun": { + "is_plural": true + } + } + }, "~": { "suffix": true, "cross_product": true, diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict index e3f7ba70..ed321060 100644 --- a/harper-core/dictionary.dict +++ b/harper-core/dictionary.dict @@ -45485,7 +45485,7 @@ therapeutically/ therapeutics/1M therapist/1SM therapy/14SM -there/18~ +there/~ there's/ thereabout/S thereafter/1 @@ -45519,7 +45519,7 @@ thermostatic/5 thermostatically/ thesauri/1 thesaurus/1MS -these/8S~ +these/8S~9 thesis/1M thespian/51SM theta/1SM @@ -45544,9 +45544,8 @@ thieve/4DSG thievery/1M thieving/451M thievish/5 -thigh/1M +thigh/1MS thighbone/1MS -thighs/1 thimble/14MS thimbleful/1SM thin/514YSP diff --git a/harper-core/src/linting/lint.rs b/harper-core/src/linting/lint.rs index 8638d2b2..6c92706f 100644 --- a/harper-core/src/linting/lint.rs +++ b/harper-core/src/linting/lint.rs @@ -36,6 +36,7 @@ pub enum LintKind { Repetition, Enhancement, Readability, + WordChoice, #[default] Miscellaneous, } @@ -50,6 +51,7 @@ impl Display for LintKind { LintKind::Readability => "Readability", LintKind::Miscellaneous => "Miscellaneous", LintKind::Enhancement => "Enhancement", + LintKind::WordChoice => "Word Choice", }; write!(f, "{}", s) diff --git a/harper-core/src/linting/lint_group.rs b/harper-core/src/linting/lint_group.rs index b49dfec9..175e2d04 100644 --- a/harper-core/src/linting/lint_group.rs +++ b/harper-core/src/linting/lint_group.rs @@ -14,6 +14,7 @@ use super::long_sentences::LongSentences; use super::matcher::Matcher; use super::multiple_sequential_pronouns::MultipleSequentialPronouns; use super::number_suffix_capitalization::NumberSuffixCapitalization; +use super::plural_conjugate::PluralConjugate; use super::proper_noun_capitalization_linters::{ AmazonNames, Americas, AppleNames, AzureNames, ChineseCommunistParty, GoogleNames, Holidays, Koreas, MetaNames, MicrosoftNames, UnitedOrganizations, @@ -173,7 +174,8 @@ create_lint_group_config!( MicrosoftNames => true, AppleNames => true, AzureNames => true, - CompoundWords => true + CompoundWords => true, + PluralConjugate => true ); impl Default for LintGroup { diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index 58217074..254d286a 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -14,6 +14,7 @@ mod matcher; mod multiple_sequential_pronouns; mod number_suffix_capitalization; mod pattern_linter; +mod plural_conjugate; mod proper_noun_capitalization_linters; mod repeated_words; mod sentence_capitalization; @@ -63,6 +64,7 @@ pub trait Linter { fn lint(&mut self, document: &Document) -> Vec; fn description(&self) -> &str; } + #[cfg(feature = "concurrent")] pub trait Linter: Send + Sync { fn lint(&mut self, document: &Document) -> Vec; diff --git a/harper-core/src/linting/plural_conjugate.rs b/harper-core/src/linting/plural_conjugate.rs new file mode 100644 index 00000000..af8e92b9 --- /dev/null +++ b/harper-core/src/linting/plural_conjugate.rs @@ -0,0 +1,94 @@ +use crate::{ + patterns::{EitherPattern, Pattern, SequencePattern}, + Token, +}; + +use super::{Lint, LintKind, PatternLinter, Suggestion}; + +pub struct PluralConjugate { + pattern: Box, +} + +impl Default for PluralConjugate { + fn default() -> Self { + let plural_case = SequencePattern::default() + .then_plural_noun() + .then_whitespace() + .then_exact_word("is"); + + let non_plural_case = SequencePattern::default() + .then(Box::new(|tok: &Token, _source: &[char]| { + tok.kind.is_not_plural_noun() && tok.kind.is_noun() + })) + .then_whitespace() + .then_exact_word("are"); + + let pat = EitherPattern::new(vec![Box::new(plural_case), Box::new(non_plural_case)]); + + Self { + pattern: Box::new(pat), + } + } +} + +impl PatternLinter for PluralConjugate { + fn pattern(&self) -> &dyn Pattern { + self.pattern.as_ref() + } + + fn match_to_lint(&self, matched_tokens: &[Token], _source: &[char]) -> Lint { + let should_be_plural = matched_tokens.first().unwrap().kind.is_plural_noun(); + + let sug = if should_be_plural { + vec!['a', 'r', 'e'] + } else { + vec!['i', 's'] + }; + + Lint { + span: matched_tokens.last().unwrap().span, + lint_kind: LintKind::WordChoice, + suggestions: vec![Suggestion::ReplaceWith(sug)], + message: "Use the alternative conjugation of this verb to be consistent with the noun's plural nature.".to_owned(), + priority: 63, + } + } + + fn description(&self) -> &'static str { + "Make sure you use the correct conjugation of the verb \"to be\" in plural contexts." + } +} + +#[cfg(test)] +mod tests { + use crate::linting::tests::assert_suggestion_result; + + use super::PluralConjugate; + + #[test] + fn issue_256() { + assert_suggestion_result( + "The bananas is tasty", + PluralConjugate::default(), + "The bananas are tasty", + ); + } + + #[test] + fn plural_students() { + assert_suggestion_result( + "The students is doing their homework.", + PluralConjugate::default(), + "The students are doing their homework.", + ); + } + + #[test] + fn singular_house() { + assert_suggestion_result( + "The house are just sitting there.", + PluralConjugate::default(), + "The house is just sitting there.", + ); + } +} diff --git a/harper-core/src/spell/full_dictionary.rs b/harper-core/src/spell/full_dictionary.rs index 21e7476e..cc20e126 100644 --- a/harper-core/src/spell/full_dictionary.rs +++ b/harper-core/src/spell/full_dictionary.rs @@ -329,4 +329,12 @@ mod tests { assert!(is_sorted_by_dist) } + + #[test] + fn there_is_not_a_pronoun() { + let dict = FullDictionary::curated(); + + assert!(!dict.get_word_metadata_str("there").is_noun()); + assert!(!dict.get_word_metadata_str("there").is_pronoun_noun()); + } } diff --git a/harper-core/src/token.rs b/harper-core/src/token.rs index 5d08ebb7..ef82c703 100644 --- a/harper-core/src/token.rs +++ b/harper-core/src/token.rs @@ -319,6 +319,22 @@ impl TokenKind { metadata.is_linking_verb() } + pub fn is_not_plural_noun(&self) -> bool { + let TokenKind::Word(metadata) = self else { + return true; + }; + + metadata.is_not_plural_noun() + } + + pub fn is_plural_noun(&self) -> bool { + let TokenKind::Word(metadata) = self else { + return false; + }; + + metadata.is_plural_noun() + } + pub fn is_noun(&self) -> bool { let TokenKind::Word(metadata) = self else { return false; diff --git a/harper-core/tests/test_sources/obsidian_links.md b/harper-core/tests/test_sources/obsidian_links.md index 9b5cb15f..e4d70f70 100644 --- a/harper-core/tests/test_sources/obsidian_links.md +++ b/harper-core/tests/test_sources/obsidian_links.md @@ -3,7 +3,7 @@ Below, you will find a number of example links that Obsidian is able to process. These should be treated as normal Markdown links. -The stuff inside the square brackets is visible and should be checked by Harper. +The things inside the square brackets are visible and should be checked by Harper. [[Three lws of motion]] [Three las of motion](Three%20laws%20of%20motion.md)