-
Notifications
You must be signed in to change notification settings - Fork 65
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
2d03f8e
commit f9d96c3
Showing
8 changed files
with
250 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,7 @@ | ||
Harper is a language checker for artists. it can detect | ||
improper capitalization and mispelled words. There are some cases, | ||
improper capitalization and misspelled words. There are some cases, | ||
where the the standard grammar checkers don't cut it. | ||
|
||
That's where Harper comes in handy. | ||
|
||
kid regards, Elijah |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,206 @@ | ||
use crate::{ | ||
spell::DictWord, Document, Lint, LintKind, Linter, Punctuation, Span, Suggestion, Token, | ||
TokenKind, | ||
}; | ||
|
||
#[derive(Debug, PartialEq, PartialOrd, Clone)] | ||
struct PatternToken { | ||
kind: TokenKind, | ||
content: Option<DictWord>, | ||
} | ||
|
||
impl PatternToken { | ||
fn from_token(token: Token, document: &Document) -> Self { | ||
if token.kind.is_word() { | ||
Self { | ||
kind: token.kind, | ||
content: Some(document.get_span_content(token.span).into()), | ||
} | ||
} else { | ||
Self { | ||
kind: token.kind, | ||
content: None, | ||
} | ||
} | ||
} | ||
} | ||
|
||
macro_rules! vecword { | ||
($lit:literal) => { | ||
$lit.chars().collect() | ||
}; | ||
} | ||
|
||
macro_rules! pt { | ||
($str:literal) => { | ||
PatternToken { | ||
kind: TokenKind::Word, | ||
content: Some($str.chars().collect()), | ||
} | ||
}; | ||
(Hyphen) => { | ||
PatternToken { | ||
kind: TokenKind::Punctuation(Punctuation::Hyphen), | ||
content: None, | ||
} | ||
}; | ||
(Space) => { | ||
PatternToken { | ||
kind: TokenKind::Space(1), | ||
content: None, | ||
} | ||
}; | ||
( $($($str:literal),* => $repl:literal),*) => { | ||
vec![ | ||
$( | ||
{ | ||
let mut rule = Rule { | ||
pattern: vec![$( | ||
pt!($str), | ||
pt!(Space), | ||
)*], | ||
replace_with: $repl.chars().collect() | ||
}; | ||
|
||
if rule.pattern.len() > 0{ | ||
rule.pattern.pop(); | ||
} | ||
|
||
rule | ||
}, | ||
)* | ||
] | ||
}; | ||
} | ||
|
||
struct Rule { | ||
pattern: Vec<PatternToken>, | ||
replace_with: Vec<char>, | ||
} | ||
|
||
/// A linter that uses a variety of curated pattern matches to find and fix common | ||
/// grammatical issues. | ||
pub struct Matcher { | ||
triggers: Vec<Rule>, | ||
} | ||
|
||
impl Matcher { | ||
pub fn new() -> Self { | ||
let mut triggers = pt! { | ||
"There","fore" => "Therefore", | ||
"south","America" => "South America", | ||
"South","america" => "South America", | ||
"south","america" => "South America", | ||
"North","america" => "North America", | ||
"north","America" => "North America", | ||
"north","america" => "North America", | ||
"fatal","outcome" => "death", | ||
"geiger","counter" => "Geiger counter", | ||
"veterans","day" => "Veterans Day", | ||
"presidents","day" => "Presidents' Day", | ||
"president's","day" => "Presidents' Day", | ||
"valentines","day" => "Valentine's Day", | ||
"world","war","2" => "World War II", | ||
"World","war","ii" => "World War II", | ||
"world","War","ii" => "World War II", | ||
"World","War","Ii" => "World War II", | ||
"World","War","iI" => "World War II", | ||
"black","sea" => "Black Sea", | ||
"I","a","m" => "I am", | ||
"We","a","re" => "We are", | ||
"The","re" => "There", | ||
"my","french" => "my French", | ||
"It","cam" => "It can", | ||
"can","be","seem" => "can be seen", | ||
"mu","house" => "my house", | ||
"kid","regards" => "kind regards", | ||
"miss","understand" => "misunderstand", | ||
"miss","use" => "misuse", | ||
"miss","used" => "misused", | ||
"bee","there" => "been there", | ||
"want","be" => "won't be", | ||
"more","then" => "more than", | ||
"gong","to" => "going to", | ||
"then","others" => "than others", | ||
"then","before" => "than before", | ||
"then","last","week" => "than last week", | ||
"then","her" => "than her", | ||
"then","hers" => "than hers", | ||
"then","him" => "than him", | ||
"then","his" => "than his" | ||
}; | ||
|
||
triggers.push(Rule { | ||
pattern: vec![pt!("break"), pt!(Hyphen), pt!("up")], | ||
replace_with: vecword!("break-up"), | ||
}); | ||
|
||
Self { triggers } | ||
} | ||
} | ||
|
||
impl Default for Matcher { | ||
fn default() -> Self { | ||
Self::new() | ||
} | ||
} | ||
|
||
impl Linter for Matcher { | ||
fn lint(&mut self, document: &Document) -> Vec<Lint> { | ||
let mut lints = Vec::new(); | ||
|
||
for (index, _) in document.tokens().enumerate() { | ||
for trigger in &self.triggers { | ||
let mut match_tokens = Vec::new(); | ||
|
||
for (p_index, pattern) in trigger.pattern.iter().enumerate() { | ||
let Some(token) = document.get_token(index + p_index) else { | ||
break; | ||
}; | ||
|
||
let t_pattern = PatternToken::from_token(token, document); | ||
|
||
if t_pattern != *pattern { | ||
break; | ||
} | ||
|
||
match_tokens.push(token); | ||
} | ||
|
||
if match_tokens.len() == trigger.pattern.len() && !match_tokens.is_empty() { | ||
let span = Span::new( | ||
match_tokens.first().unwrap().span.start, | ||
match_tokens.last().unwrap().span.end, | ||
); | ||
|
||
lints.push(Lint { | ||
span, | ||
lint_kind: LintKind::Miscellaneous, | ||
suggestions: vec![Suggestion::ReplaceWith(trigger.replace_with.to_owned())], | ||
message: format!( | ||
"Did you mean “{}”?", | ||
trigger.replace_with.iter().collect::<String>() | ||
), | ||
}) | ||
} | ||
} | ||
} | ||
|
||
lints | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use crate::{Document, Linter}; | ||
|
||
use super::Matcher; | ||
|
||
#[test] | ||
fn matches_therefore() { | ||
let document = Document::new_plain_english("There fore."); | ||
let mut matcher = Matcher::new(); | ||
let lints = matcher.lint(&document); | ||
assert!(lints.len() == 1) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters