Skip to content

Commit

Permalink
Now correctly recognizes numbers when they open sentences
Browse files Browse the repository at this point in the history
  • Loading branch information
elijah-potter committed Jan 29, 2024
1 parent ae429e8 commit ccebbd0
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 4 deletions.
3 changes: 0 additions & 3 deletions harper-core/dictionary.dict
Original file line number Diff line number Diff line change
Expand Up @@ -10271,12 +10271,10 @@ Vonnegut/M
Voronezh/M
Vorster/M
Voyager/M
Vt
Vuitton/M
Vulcan/M
Vulg
Vulgate/SM
W/MDT
WA
WAC
WASP/M
Expand Down Expand Up @@ -40244,7 +40242,6 @@ rutted
rutting
rutty/RT
rye/M
s/NYXB
sabbath/M
sabbaths
sabbatical/SM
Expand Down
1 change: 1 addition & 0 deletions harper-core/src/lexing/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ fn lex_punctuation(source: &[char]) -> Option<FoundToken> {
use Punctuation::*;

let punct = match c {
'~' => Tilde,
'=' => Equal,
'<' => LessThan,
'>' => GreaterThan,
Expand Down
15 changes: 14 additions & 1 deletion harper-core/src/linting/sentence_capitalization.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@ impl Linter for SentenceCapitalization {
let mut lints = Vec::new();

for sentence in document.sentences() {
if let Some(first_word) = sentence.first_sentence_word() {
if let Some(first_word) = sentence.first_non_whitespace() {
if !first_word.kind.is_word() {
continue;
}

let letters = document.get_span_content(first_word.span);

if let Some(first_letter) = letters.first() {
Expand Down Expand Up @@ -61,6 +65,15 @@ mod tests {
)
}

#[test]
fn start_with_number() {
assert_lint_count(
"53 is the length of the longest word.",
SentenceCapitalization,
0,
);
}

#[test]
fn ignores_unlintable() {
assert_lint_count(
Expand Down
17 changes: 17 additions & 0 deletions harper-core/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,15 @@ impl TokenKind {
pub fn is_apostrophe(&self) -> bool {
matches!(self, TokenKind::Punctuation(Punctuation::Apostrophe))
}

/// Checks whether the token is whitespace.
pub fn is_whitespace(&self) -> bool {
match self {
TokenKind::Space(_) => true,
TokenKind::Newline(_) => true,
_ => false,
}
}
}

#[derive(Debug, Is, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd)]
Expand Down Expand Up @@ -104,6 +113,8 @@ pub enum Punctuation {
Equal,
/// *
Star,
/// ~
Tilde,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, PartialOrd)]
Expand All @@ -118,6 +129,8 @@ pub trait TokenStringExt {
/// Will also return [`None`] if there is an unlintable token in the position of the first
/// word.
fn first_sentence_word(&self) -> Option<Token>;
/// Grabs the first token that isn't whitespace from the token string.
fn first_non_whitespace(&self) -> Option<Token>;
fn iter_word_indices(&self) -> impl Iterator<Item = usize> + '_;
fn iter_words(&self) -> impl Iterator<Item = &Token> + '_;
fn iter_space_indices(&self) -> impl Iterator<Item = usize> + '_;
Expand All @@ -134,6 +147,10 @@ impl TokenStringExt for [Token] {
self.iter().find(|v| v.kind.is_word()).copied()
}

fn first_non_whitespace(&self) -> Option<Token> {
self.iter().find(|t| !t.kind.is_whitespace()).copied()
}

fn first_sentence_word(&self) -> Option<Token> {
let (w_idx, word) = self.iter().find_position(|v| v.kind.is_word())?;

Expand Down

0 comments on commit ccebbd0

Please sign in to comment.