Skip to content

Commit

Permalink
Fixed issues with inline code blocks in Markdown
Browse files Browse the repository at this point in the history
  • Loading branch information
elijah-potter committed Jan 26, 2024
1 parent f2b0b48 commit 6dbcebd
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 16 deletions.
2 changes: 0 additions & 2 deletions harper-core/src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -301,13 +301,11 @@ mod tests {

fn assert_condensed_contractions(text: &str, final_tok_count: usize) {
let mut document = Document::new(text, Box::new(PlainEnglish));
dbg!(&document.tokens);
document.condense_contractions();

assert_eq!(document.tokens.len(), final_tok_count);

let mut document = Document::new(text, Box::new(Markdown));
dbg!(&document.tokens);
document.condense_contractions();

assert_eq!(document.tokens.len(), final_tok_count);
Expand Down
5 changes: 3 additions & 2 deletions harper-core/src/linting/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@ pub trait Linter: Send + Sync {

#[cfg(test)]
mod tests {
use crate::{parsers::PlainEnglish, Document, Linter};
use crate::{parsers::Markdown, Document, Linter};

pub fn assert_lint_count(text: &str, mut linter: impl Linter, count: usize) {
let test = Document::new(text, Box::new(PlainEnglish));
let test = Document::new(text, Box::new(Markdown));
let lints = linter.lint(&test);
dbg!(&lints);
assert_eq!(lints.len(), count);
}
}
24 changes: 19 additions & 5 deletions harper-core/src/linting/sentence_capitalization.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,7 @@ impl Linter for SentenceCapitalization {
let mut lints = Vec::new();

for sentence in document.sentences() {
if let Some(first_word) = sentence.first() {
if !first_word.kind.is_word() {
break;
}

if let Some(first_word) = sentence.first_sentence_word() {
let letters = document.get_span_content(first_word.span);

if let Some(first_letter) = letters.first() {
Expand Down Expand Up @@ -64,4 +60,22 @@ mod tests {
2,
)
}

#[test]
fn ignores_unlintable() {
assert_lint_count(
"[`misspelled_word`] is assumed to be quite small (n < 100). ",
SentenceCapitalization,
0,
)
}

#[test]
fn unphased_unlintable() {
assert_lint_count(
"the linter should not be affected by `this` unlintable.",
SentenceCapitalization,
1,
)
}
}
23 changes: 20 additions & 3 deletions harper-core/src/parsers/markdown.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
use itertools::Itertools;
use pulldown_cmark::LinkType;

use super::{Parser, PlainEnglish, StrParser};
use crate::{Span, Token, TokenKind};

Expand Down Expand Up @@ -29,10 +32,23 @@ impl Parser for Markdown {
pulldown_cmark::Event::End(_) => {
stack.pop();
}
pulldown_cmark::Event::Code(code) => {
traversed_chars += source_str[traversed_bytes..range.start].chars().count();
traversed_bytes = range.start;

let chunk_len = code.chars().count();

tokens.push(Token {
span: Span::new(traversed_chars, chunk_len),
kind: TokenKind::Unlintable,
});
}
pulldown_cmark::Event::Text(text) => {
traversed_chars += source_str[traversed_bytes..range.start].chars().count();
traversed_bytes = range.start;

let chunk_len = text.chars().count();

if let Some(tag) = stack.last() {
use pulldown_cmark::Tag;

Expand All @@ -45,19 +61,20 @@ impl Parser for Markdown {
}

if !(matches!(tag, Tag::Paragraph)
|| matches!(tag, Tag::Heading(_, _, _))
|| matches!(tag, Tag::Link(..))
|| matches!(tag, Tag::Heading(..))
|| matches!(tag, Tag::Item)
|| matches!(tag, Tag::TableCell)
|| matches!(tag, Tag::Emphasis)
|| matches!(tag, Tag::Strong)
|| matches!(tag, Tag::Link(..))
|| matches!(tag, Tag::Strikethrough))
{
continue;
}
}

let mut new_tokens = english_parser.parse_str(text);
let mut new_tokens =
english_parser.parse(&source[traversed_chars..traversed_chars + chunk_len]);

new_tokens
.iter_mut()
Expand Down
1 change: 0 additions & 1 deletion harper-core/src/spell/hunspell/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,6 @@ impl<'a> AttributeArgParser<'a> {
.char_indices()
.find(|(_i, c)| !c.is_whitespace())
else {
dbg!(self.cursor);
return Err(Error::UnexpectedEndOfLine);
};

Expand Down
2 changes: 1 addition & 1 deletion harper-core/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ impl TokenStringExt for [Token] {
return Some(*word);
};

if w_idx > u_idx {
if w_idx < u_idx {
Some(*word)
} else {
None
Expand Down
19 changes: 17 additions & 2 deletions harper-ls/src/tree_sitter_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,22 @@ impl Parser for TreeSitterParser {
let mut tokens = Vec::new();

for span in comments_spans {
let mut new_tokens = markdown_parser.parse(&source[span.start..span.end]);
// Skip over the comment start characters
let actual_start = source[span.start..span.end]
.iter()
.position(|c| !is_comment_character(*c))
.unwrap_or(0)
+ span.start;

if span.end <= actual_start {
continue;
}

let mut new_tokens = markdown_parser.parse(&source[actual_start..span.end]);

new_tokens
.iter_mut()
.for_each(|t| t.span.offset(span.start));
.for_each(|t| t.span.offset(actual_start));

tokens.append(&mut new_tokens);
}
Expand All @@ -71,6 +82,10 @@ impl Parser for TreeSitterParser {
}
}

fn is_comment_character(c: char) -> bool {
matches!(c, '#' | '-' | '/')
}

/// Converts a set of byte-indexed [`Span`]s to char-index Spans, in-place.
/// NOTE: Will sort the given array by their [`Span::start`].
///
Expand Down

0 comments on commit 6dbcebd

Please sign in to comment.