Skip to content

Commit

Permalink
Now ignores code blocks in Markdown
Browse files Browse the repository at this point in the history
  • Loading branch information
elijah-potter committed Jan 24, 2024
1 parent 74be2af commit b361763
Show file tree
Hide file tree
Showing 9 changed files with 142 additions and 109 deletions.
3 changes: 3 additions & 0 deletions harper-core/dictionary.aff
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ SFX B e able [^aeiou]e
SFX L Y 1
SFX L 0 ment .

SFX O Y 1
SFX O 0 ful .

REP 90
REP a ei
REP ei a
Expand Down
2 changes: 1 addition & 1 deletion harper-core/dictionary.dict
Original file line number Diff line number Diff line change
Expand Up @@ -27763,7 +27763,7 @@ immutability/M
immutable
immutably
imp/SMR
impact/SMDG
impact/SMDGO
impair/SDGL
impaired/U
impairment/MS
Expand Down
3 changes: 1 addition & 2 deletions harper-core/src/linting/sentence_capitalization.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ impl Linter for SentenceCapitalization {
let mut lints = Vec::new();

for sentence in document.sentences() {
dbg!(sentence);
if let Some(first_word) = sentence.first_word() {
let letters = document.get_span_content(first_word.span);

Expand All @@ -26,7 +25,7 @@ impl Linter for SentenceCapitalization {
suggestions: vec![Suggestion::ReplaceWith(
first_letter.to_uppercase().collect_vec(),
)],
message: "This sentance does not start with a capital letter"
message: "This sentence does not start with a capital letter"
.to_string(),
})
}
Expand Down
90 changes: 4 additions & 86 deletions harper-core/src/linting/spell_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ impl SpellCheck {
self.word_cache
.entry(word.clone())
.or_insert_with(|| {
suggest_correct_spelling(&word, 10, 3, &self.dictionary)
suggest_correct_spelling(&word, 100, 3, &self.dictionary)
.into_iter()
.map(|v| v.to_vec())
.collect()
Expand All @@ -50,37 +50,9 @@ impl Linter for SpellCheck {

let mut possibilities = self.cached_suggest_correct_spelling(word_chars);

possibilities.sort_by_cached_key(|v| {
let mut key_dist = usize::MAX;

for (o, n) in v.iter().zip(word_chars.iter()) {
if o != n {
key_dist = key_distance(*o, *n)
.map(|v| v as usize)
.unwrap_or(usize::MAX);
break;
}
}

// The error is likely by omission
if key_dist > 2 {
usize::MAX - v.len()
}
// The error is likely by replacement
else {
key_dist
}
});

possibilities.sort_by_key(|v| {
if self.dictionary.is_common_word(v) {
0
} else {
1
}
});

possibilities.shrink_to(5);
if possibilities.len() > 3 {
possibilities.resize_with(3, || panic!());
}

let suggestions = possibilities
.into_iter()
Expand All @@ -100,57 +72,3 @@ impl Linter for SpellCheck {
lints
}
}

/// Calculate the approximate distance between two letters on a querty keyboard
fn key_distance(key_a: char, key_b: char) -> Option<f32> {
let a = key_location(key_a)?;
let b = key_location(key_b)?;

Some(((a.0 - b.0) * (a.1 - b.1)).sqrt())
}

/// Calculate the approximate position of a letter on a querty keyboard
fn key_location(key: char) -> Option<(f32, f32)> {
let keys = "1234567890qwertyuiopasdfghjklzxcvbnm";

let idx = keys.find(key)?;

// The starting index of each row of the keyboard
let mut resets = [0, 10, 20, 29].into_iter().enumerate().peekable();
// The amount each row is offset (on my keyboard at least)
let offsets = [0.0, 0.5, 0.75, 1.25];

while let Some((r_idx, reset)) = resets.next() {
if idx >= reset {
if let Some((_, n_reset)) = resets.peek() {
if idx < *n_reset {
return Some(((idx - reset) as f32 + offsets[r_idx], r_idx as f32));
}
} else {
return Some(((idx - reset) as f32 + offsets[r_idx], r_idx as f32));
}
}
}

None
}

#[cfg(test)]
mod tests {
use super::key_location;

#[test]
fn correct_q_pos() {
assert_eq!(key_location('q'), Some((0.5, 1.0)))
}

#[test]
fn correct_a_pos() {
assert_eq!(key_location('a'), Some((0.75, 2.0)))
}

#[test]
fn correct_g_pos() {
assert_eq!(key_location('g'), Some((4.75, 2.0)))
}
}
45 changes: 34 additions & 11 deletions harper-core/src/parsing/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,41 @@ pub fn lex_to_end_md(source: &[char]) -> Vec<Token> {
let mut traversed_bytes = 0;
let mut traversed_chars = 0;

let mut stack = Vec::new();

// NOTE: the range spits out __byte__ indices, not char indices.
// This is why we keep track above.
for (event, range) in md_parser.into_offset_iter() {
if let pulldown_cmark::Event::Text(text) = event {
traversed_chars += source_str[traversed_bytes..range.start].chars().count();
traversed_bytes = range.start;

let mut new_tokens = lex_to_end_str(text);

new_tokens
.iter_mut()
.for_each(|token| token.span.offset(traversed_chars));

tokens.append(&mut new_tokens);
match event {
pulldown_cmark::Event::Start(tag) => stack.push(tag),
pulldown_cmark::Event::End(_) => {
stack.pop();
}
pulldown_cmark::Event::Text(text) => {
traversed_chars += source_str[traversed_bytes..range.start].chars().count();
traversed_bytes = range.start;

if let Some(tag) = stack.last() {
use pulldown_cmark::Tag;

if !(matches!(tag, Tag::Paragraph)
|| matches!(tag, Tag::Heading(_, _, _))
|| matches!(tag, Tag::Item))
|| matches!(tag, Tag::TableCell)
{
continue;
}
}

let mut new_tokens = lex_to_end_str(text);

new_tokens
.iter_mut()
.for_each(|token| token.span.offset(traversed_chars));

tokens.append(&mut new_tokens);
}
_ => (),
}
}

Expand Down Expand Up @@ -192,6 +213,8 @@ fn lex_punctuation(source: &[char]) -> Option<FoundToken> {
use Punctuation::*;

let punct = match c {
'/' => ForwardSlash,
'\\' => Backslash,
'%' => Percent,
'’' => Apostrophe,
'\'' => Apostrophe,
Expand Down
4 changes: 4 additions & 0 deletions harper-core/src/parsing/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ pub enum Punctuation {
Apostrophe,
/// %
Percent,
/// /
ForwardSlash,
/// \
Backslash,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
Expand Down
1 change: 1 addition & 0 deletions harper-core/src/spell/hunspell/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ impl<'a> AttributeArgParser<'a> {
.char_indices()
.find(|(_i, c)| !c.is_whitespace())
else {
dbg!(self.cursor);
return Err(Error::UnexpectedEndOfLine);
};

Expand Down
100 changes: 92 additions & 8 deletions harper-core/src/spell/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,22 +40,54 @@ pub fn suggest_correct_spelling<'a>(
}
});

let mut found: Vec<(&[char], u8)> = Vec::with_capacity(result_limit);
let mut found_dist: Vec<(&[char], u8)> = Vec::with_capacity(result_limit);

for (word, dist) in pruned_words {
if found.len() < result_limit {
found.push((word, dist));
found.sort_by_key(|a| a.1);
if found_dist.len() < result_limit {
found_dist.push((word, dist));
found_dist.sort_by_key(|a| a.1);
continue;
}

if dist < found[result_limit - 1].1 {
found[result_limit - 1] = (word, dist);
found.sort_by_key(|a| a.1);
if dist < found_dist[result_limit - 1].1 {
found_dist[result_limit - 1] = (word, dist);
found_dist.sort_by_key(|a| a.1);
}
}

found.into_iter().map(|(word, _dist)| word).collect()
// Remove edit dist
let mut found: Vec<&[char]> = found_dist.into_iter().map(|(word, _dist)| word).collect();

found.sort_by_cached_key(|v| {
let mut key_dist = usize::MAX;

// The error may be by omission at the end of the word.
if v.len() > misspelled_word.len() {
return edit_distance_min_alloc(v, misspelled_word, &mut buf_a, &mut buf_b) as usize;
}

for (o, n) in v.iter().zip(misspelled_word.iter()) {
if o != n {
key_dist = key_distance(*o, *n)
.map(|v| v as usize)
.unwrap_or(usize::MAX);
break;
}
}

// The error is likely by omission somewhere inside the word
if key_dist > 2 {
usize::MAX - v.len()
}
// The error is likely by replacement
else {
key_dist
}
});

found.sort_by_key(|v| if dictionary.is_common_word(v) { 0 } else { 1 });

found
}

/// Convenience function over [suggest_correct_spelling] that does conversions for you.
Expand Down Expand Up @@ -116,6 +148,40 @@ fn edit_distance(source: &[char], target: &[char]) -> u8 {
edit_distance_min_alloc(source, target, &mut Vec::new(), &mut Vec::new())
}

/// Calculate the approximate distance between two letters on a querty keyboard
fn key_distance(key_a: char, key_b: char) -> Option<f32> {
let a = key_location(key_a)?;
let b = key_location(key_b)?;

Some(((a.0 - b.0) * (a.1 - b.1)).sqrt())
}

/// Calculate the approximate position of a letter on a querty keyboard
fn key_location(key: char) -> Option<(f32, f32)> {
let keys = "1234567890qwertyuiopasdfghjklzxcvbnm";

let idx = keys.find(key)?;

// The starting index of each row of the keyboard
let mut resets = [0, 10, 20, 29].into_iter().enumerate().peekable();
// The amount each row is offset (on my keyboard at least)
let offsets = [0.0, 0.5, 0.75, 1.25];

while let Some((r_idx, reset)) = resets.next() {
if idx >= reset {
if let Some((_, n_reset)) = resets.peek() {
if idx < *n_reset {
return Some(((idx - reset) as f32 + offsets[r_idx], r_idx as f32));
}
} else {
return Some(((idx - reset) as f32 + offsets[r_idx], r_idx as f32));
}
}
}

None
}

#[cfg(test)]
mod tests {
use super::edit_distance;
Expand All @@ -132,8 +198,26 @@ mod tests {
fn simple1() {
assert_edit_dist("kitten", "sitting", 3)
}

#[test]
fn simple2() {
assert_edit_dist("saturday", "sunday", 3)
}

use super::key_location;

#[test]
fn correct_q_pos() {
assert_eq!(key_location('q'), Some((0.5, 1.0)))
}

#[test]
fn correct_a_pos() {
assert_eq!(key_location('a'), Some((0.75, 2.0)))
}

#[test]
fn correct_g_pos() {
assert_eq!(key_location('g'), Some((4.75, 2.0)))
}
}
3 changes: 2 additions & 1 deletion web/src/lib/Editor.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@
)}
>
Replace "{content.substring(lint.span.start, lint.span.end)}" with "{suggestion.ReplaceWith.reduce(
(p, c) => p + c
(p, c) => p + c,
''
)}"
</Button>
</div>
Expand Down

0 comments on commit b361763

Please sign in to comment.