Now ignores code blocks in Markdown

Automattic · Jan 24, 2024 · b361763 · b361763
1 parent 74be2af
commit b361763
Show file tree

Hide file tree

Showing 9 changed files with 142 additions and 109 deletions.
diff --git a/harper-core/dictionary.aff b/harper-core/dictionary.aff
@@ -112,6 +112,9 @@ SFX B   e     able       [^aeiou]e
 SFX L Y 1
 SFX L   0     ment       .
 
+SFX O Y 1
+SFX O   0     ful         .
+
 REP 90
 REP a ei
 REP ei a

diff --git a/harper-core/dictionary.dict b/harper-core/dictionary.dict
@@ -27763,7 +27763,7 @@ immutability/M
 immutable
 immutably
 imp/SMR
-impact/SMDG
+impact/SMDGO
 impair/SDGL
 impaired/U
 impairment/MS

diff --git a/harper-core/src/linting/sentence_capitalization.rs b/harper-core/src/linting/sentence_capitalization.rs
@@ -14,7 +14,6 @@ impl Linter for SentenceCapitalization {
         let mut lints = Vec::new();
 
         for sentence in document.sentences() {
-            dbg!(sentence);
             if let Some(first_word) = sentence.first_word() {
                 let letters = document.get_span_content(first_word.span);
 
@@ -26,7 +25,7 @@ impl Linter for SentenceCapitalization {
                             suggestions: vec![Suggestion::ReplaceWith(
                                 first_letter.to_uppercase().collect_vec(),
                             )],
-                            message: "This sentance does not start with a capital letter"
+                            message: "This sentence does not start with a capital letter"
                                 .to_string(),
                         })
                     }

diff --git a/harper-core/src/linting/spell_check.rs b/harper-core/src/linting/spell_check.rs
@@ -29,7 +29,7 @@ impl SpellCheck {
         self.word_cache
             .entry(word.clone())
             .or_insert_with(|| {
-                suggest_correct_spelling(&word, 10, 3, &self.dictionary)
+                suggest_correct_spelling(&word, 100, 3, &self.dictionary)
                     .into_iter()
                     .map(|v| v.to_vec())
                     .collect()
@@ -50,37 +50,9 @@ impl Linter for SpellCheck {
 
             let mut possibilities = self.cached_suggest_correct_spelling(word_chars);
 
-            possibilities.sort_by_cached_key(|v| {
-                let mut key_dist = usize::MAX;
-
-                for (o, n) in v.iter().zip(word_chars.iter()) {
-                    if o != n {
-                        key_dist = key_distance(*o, *n)
-                            .map(|v| v as usize)
-                            .unwrap_or(usize::MAX);
-                        break;
-                    }
-                }
-
-                // The error is likely by omission
-                if key_dist > 2 {
-                    usize::MAX - v.len()
-                }
-                // The error is likely by replacement
-                else {
-                    key_dist
-                }
-            });
-
-            possibilities.sort_by_key(|v| {
-                if self.dictionary.is_common_word(v) {
-                    0
-                } else {
-                    1
-                }
-            });
-
-            possibilities.shrink_to(5);
+            if possibilities.len() > 3 {
+                possibilities.resize_with(3, || panic!());
+            }
 
             let suggestions = possibilities
                 .into_iter()
@@ -100,57 +72,3 @@ impl Linter for SpellCheck {
         lints
     }
 }
-
-/// Calculate the approximate distance between two letters on a querty keyboard
-fn key_distance(key_a: char, key_b: char) -> Option<f32> {
-    let a = key_location(key_a)?;
-    let b = key_location(key_b)?;
-
-    Some(((a.0 - b.0) * (a.1 - b.1)).sqrt())
-}
-
-/// Calculate the approximate position of a letter on a querty keyboard
-fn key_location(key: char) -> Option<(f32, f32)> {
-    let keys = "1234567890qwertyuiopasdfghjklzxcvbnm";
-
-    let idx = keys.find(key)?;
-
-    // The starting index of each row of the keyboard
-    let mut resets = [0, 10, 20, 29].into_iter().enumerate().peekable();
-    // The amount each row is offset (on my keyboard at least)
-    let offsets = [0.0, 0.5, 0.75, 1.25];
-
-    while let Some((r_idx, reset)) = resets.next() {
-        if idx >= reset {
-            if let Some((_, n_reset)) = resets.peek() {
-                if idx < *n_reset {
-                    return Some(((idx - reset) as f32 + offsets[r_idx], r_idx as f32));
-                }
-            } else {
-                return Some(((idx - reset) as f32 + offsets[r_idx], r_idx as f32));
-            }
-        }
-    }
-
-    None
-}
-
-#[cfg(test)]
-mod tests {
-    use super::key_location;
-
-    #[test]
-    fn correct_q_pos() {
-        assert_eq!(key_location('q'), Some((0.5, 1.0)))
-    }
-
-    #[test]
-    fn correct_a_pos() {
-        assert_eq!(key_location('a'), Some((0.75, 2.0)))
-    }
-
-    #[test]
-    fn correct_g_pos() {
-        assert_eq!(key_location('g'), Some((4.75, 2.0)))
-    }
-}
diff --git a/harper-core/src/parsing/lexer.rs b/harper-core/src/parsing/lexer.rs
@@ -23,20 +23,41 @@ pub fn lex_to_end_md(source: &[char]) -> Vec<Token> {
     let mut traversed_bytes = 0;
     let mut traversed_chars = 0;
 
+    let mut stack = Vec::new();
+
     // NOTE: the range spits out __byte__ indices, not char indices.
     // This is why we keep track above.
     for (event, range) in md_parser.into_offset_iter() {
-        if let pulldown_cmark::Event::Text(text) = event {
-            traversed_chars += source_str[traversed_bytes..range.start].chars().count();
-            traversed_bytes = range.start;
-
-            let mut new_tokens = lex_to_end_str(text);
-
-            new_tokens
-                .iter_mut()
-                .for_each(|token| token.span.offset(traversed_chars));
-
-            tokens.append(&mut new_tokens);
+        match event {
+            pulldown_cmark::Event::Start(tag) => stack.push(tag),
+            pulldown_cmark::Event::End(_) => {
+                stack.pop();
+            }
+            pulldown_cmark::Event::Text(text) => {
+                traversed_chars += source_str[traversed_bytes..range.start].chars().count();
+                traversed_bytes = range.start;
+
+                if let Some(tag) = stack.last() {
+                    use pulldown_cmark::Tag;
+
+                    if !(matches!(tag, Tag::Paragraph)
+                        || matches!(tag, Tag::Heading(_, _, _))
+                        || matches!(tag, Tag::Item))
+                        || matches!(tag, Tag::TableCell)
+                    {
+                        continue;
+                    }
+                }
+
+                let mut new_tokens = lex_to_end_str(text);
+
+                new_tokens
+                    .iter_mut()
+                    .for_each(|token| token.span.offset(traversed_chars));
+
+                tokens.append(&mut new_tokens);
+            }
+            _ => (),
         }
     }
 
@@ -192,6 +213,8 @@ fn lex_punctuation(source: &[char]) -> Option<FoundToken> {
     use Punctuation::*;
 
     let punct = match c {
+        '/' => ForwardSlash,
+        '\\' => Backslash,
         '%' => Percent,
         '’' => Apostrophe,
         '\'' => Apostrophe,

diff --git a/harper-core/src/parsing/token.rs b/harper-core/src/parsing/token.rs
@@ -88,6 +88,10 @@ pub enum Punctuation {
     Apostrophe,
     /// %
     Percent,
+    /// /
+    ForwardSlash,
+    /// \
+    Backslash,
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]

diff --git a/harper-core/src/spell/hunspell/attributes.rs b/harper-core/src/spell/hunspell/attributes.rs
@@ -245,6 +245,7 @@ impl<'a> AttributeArgParser<'a> {
             .char_indices()
             .find(|(_i, c)| !c.is_whitespace())
         else {
+            dbg!(self.cursor);
             return Err(Error::UnexpectedEndOfLine);
         };
 

diff --git a/harper-core/src/spell/mod.rs b/harper-core/src/spell/mod.rs
@@ -40,22 +40,54 @@ pub fn suggest_correct_spelling<'a>(
         }
     });
 
-    let mut found: Vec<(&[char], u8)> = Vec::with_capacity(result_limit);
+    let mut found_dist: Vec<(&[char], u8)> = Vec::with_capacity(result_limit);
 
     for (word, dist) in pruned_words {
-        if found.len() < result_limit {
-            found.push((word, dist));
-            found.sort_by_key(|a| a.1);
+        if found_dist.len() < result_limit {
+            found_dist.push((word, dist));
+            found_dist.sort_by_key(|a| a.1);
             continue;
         }
 
-        if dist < found[result_limit - 1].1 {
-            found[result_limit - 1] = (word, dist);
-            found.sort_by_key(|a| a.1);
+        if dist < found_dist[result_limit - 1].1 {
+            found_dist[result_limit - 1] = (word, dist);
+            found_dist.sort_by_key(|a| a.1);
         }
     }
 
-    found.into_iter().map(|(word, _dist)| word).collect()
+    // Remove edit dist
+    let mut found: Vec<&[char]> = found_dist.into_iter().map(|(word, _dist)| word).collect();
+
+    found.sort_by_cached_key(|v| {
+        let mut key_dist = usize::MAX;
+
+        // The error may be by omission at the end of the word.
+        if v.len() > misspelled_word.len() {
+            return edit_distance_min_alloc(v, misspelled_word, &mut buf_a, &mut buf_b) as usize;
+        }
+
+        for (o, n) in v.iter().zip(misspelled_word.iter()) {
+            if o != n {
+                key_dist = key_distance(*o, *n)
+                    .map(|v| v as usize)
+                    .unwrap_or(usize::MAX);
+                break;
+            }
+        }
+
+        // The error is likely by omission somewhere inside the word
+        if key_dist > 2 {
+            usize::MAX - v.len()
+        }
+        // The error is likely by replacement
+        else {
+            key_dist
+        }
+    });
+
+    found.sort_by_key(|v| if dictionary.is_common_word(v) { 0 } else { 1 });
+
+    found
 }
 
 /// Convenience function over [suggest_correct_spelling] that does conversions for you.
@@ -116,6 +148,40 @@ fn edit_distance(source: &[char], target: &[char]) -> u8 {
     edit_distance_min_alloc(source, target, &mut Vec::new(), &mut Vec::new())
 }
 
+/// Calculate the approximate distance between two letters on a querty keyboard
+fn key_distance(key_a: char, key_b: char) -> Option<f32> {
+    let a = key_location(key_a)?;
+    let b = key_location(key_b)?;
+
+    Some(((a.0 - b.0) * (a.1 - b.1)).sqrt())
+}
+
+/// Calculate the approximate position of a letter on a querty keyboard
+fn key_location(key: char) -> Option<(f32, f32)> {
+    let keys = "1234567890qwertyuiopasdfghjklzxcvbnm";
+
+    let idx = keys.find(key)?;
+
+    // The starting index of each row of the keyboard
+    let mut resets = [0, 10, 20, 29].into_iter().enumerate().peekable();
+    // The amount each row is offset (on my keyboard at least)
+    let offsets = [0.0, 0.5, 0.75, 1.25];
+
+    while let Some((r_idx, reset)) = resets.next() {
+        if idx >= reset {
+            if let Some((_, n_reset)) = resets.peek() {
+                if idx < *n_reset {
+                    return Some(((idx - reset) as f32 + offsets[r_idx], r_idx as f32));
+                }
+            } else {
+                return Some(((idx - reset) as f32 + offsets[r_idx], r_idx as f32));
+            }
+        }
+    }
+
+    None
+}
+
 #[cfg(test)]
 mod tests {
     use super::edit_distance;
@@ -132,8 +198,26 @@ mod tests {
     fn simple1() {
         assert_edit_dist("kitten", "sitting", 3)
     }
+
     #[test]
     fn simple2() {
         assert_edit_dist("saturday", "sunday", 3)
     }
+
+    use super::key_location;
+
+    #[test]
+    fn correct_q_pos() {
+        assert_eq!(key_location('q'), Some((0.5, 1.0)))
+    }
+
+    #[test]
+    fn correct_a_pos() {
+        assert_eq!(key_location('a'), Some((0.75, 2.0)))
+    }
+
+    #[test]
+    fn correct_g_pos() {
+        assert_eq!(key_location('g'), Some((4.75, 2.0)))
+    }
 }
diff --git a/web/src/lib/Editor.svelte b/web/src/lib/Editor.svelte
@@ -87,7 +87,8 @@
 											)}
 									>
 										Replace "{content.substring(lint.span.start, lint.span.end)}" with "{suggestion.ReplaceWith.reduce(
-											(p, c) => p + c
+											(p, c) => p + c,
+											''
 										)}"
 									</Button>
 								</div>