From ccebbd09b55ca113f46e530f9618cab9c1e7369e Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Mon, 29 Jan 2024 08:55:48 -0700 Subject: [PATCH] Now correctly recognizes numbers when they open sentences --- harper-core/dictionary.dict | 3 --- harper-core/src/lexing/mod.rs | 1 + .../src/linting/sentence_capitalization.rs | 15 ++++++++++++++- harper-core/src/token.rs | 17 +++++++++++++++++ 4 files changed, 32 insertions(+), 4 deletions(-) diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict index 59b48630..dbc6b27e 100644 --- a/harper-core/dictionary.dict +++ b/harper-core/dictionary.dict @@ -10271,12 +10271,10 @@ Vonnegut/M Voronezh/M Vorster/M Voyager/M -Vt Vuitton/M Vulcan/M Vulg Vulgate/SM -W/MDT WA WAC WASP/M @@ -40244,7 +40242,6 @@ rutted rutting rutty/RT rye/M -s/NYXB sabbath/M sabbaths sabbatical/SM diff --git a/harper-core/src/lexing/mod.rs b/harper-core/src/lexing/mod.rs index ef34578d..e5d1d510 100644 --- a/harper-core/src/lexing/mod.rs +++ b/harper-core/src/lexing/mod.rs @@ -117,6 +117,7 @@ fn lex_punctuation(source: &[char]) -> Option { use Punctuation::*; let punct = match c { + '~' => Tilde, '=' => Equal, '<' => LessThan, '>' => GreaterThan, diff --git a/harper-core/src/linting/sentence_capitalization.rs b/harper-core/src/linting/sentence_capitalization.rs index 6d239b61..b0c31cb8 100644 --- a/harper-core/src/linting/sentence_capitalization.rs +++ b/harper-core/src/linting/sentence_capitalization.rs @@ -14,7 +14,11 @@ impl Linter for SentenceCapitalization { let mut lints = Vec::new(); for sentence in document.sentences() { - if let Some(first_word) = sentence.first_sentence_word() { + if let Some(first_word) = sentence.first_non_whitespace() { + if !first_word.kind.is_word() { + continue; + } + let letters = document.get_span_content(first_word.span); if let Some(first_letter) = letters.first() { @@ -61,6 +65,15 @@ mod tests { ) } + #[test] + fn start_with_number() { + assert_lint_count( + "53 is the length of the longest word.", + SentenceCapitalization, + 0, + ); + } + #[test] fn ignores_unlintable() { assert_lint_count( diff --git a/harper-core/src/token.rs b/harper-core/src/token.rs index 7a49933e..223ada31 100644 --- a/harper-core/src/token.rs +++ b/harper-core/src/token.rs @@ -57,6 +57,15 @@ impl TokenKind { pub fn is_apostrophe(&self) -> bool { matches!(self, TokenKind::Punctuation(Punctuation::Apostrophe)) } + + /// Checks whether the token is whitespace. + pub fn is_whitespace(&self) -> bool { + match self { + TokenKind::Space(_) => true, + TokenKind::Newline(_) => true, + _ => false, + } + } } #[derive(Debug, Is, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd)] @@ -104,6 +113,8 @@ pub enum Punctuation { Equal, /// * Star, + /// ~ + Tilde, } #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, PartialOrd)] @@ -118,6 +129,8 @@ pub trait TokenStringExt { /// Will also return [`None`] if there is an unlintable token in the position of the first /// word. fn first_sentence_word(&self) -> Option; + /// Grabs the first token that isn't whitespace from the token string. + fn first_non_whitespace(&self) -> Option; fn iter_word_indices(&self) -> impl Iterator + '_; fn iter_words(&self) -> impl Iterator + '_; fn iter_space_indices(&self) -> impl Iterator + '_; @@ -134,6 +147,10 @@ impl TokenStringExt for [Token] { self.iter().find(|v| v.kind.is_word()).copied() } + fn first_non_whitespace(&self) -> Option { + self.iter().find(|t| !t.kind.is_whitespace()).copied() + } + fn first_sentence_word(&self) -> Option { let (w_idx, word) = self.iter().find_position(|v| v.kind.is_word())?;