diff --git a/harper-core/src/document.rs b/harper-core/src/document.rs index 60c09a16..797901b6 100644 --- a/harper-core/src/document.rs +++ b/harper-core/src/document.rs @@ -2,10 +2,11 @@ use std::fmt::Display; use itertools::Itertools; +use crate::parsers::{MarkdownParser, Parser, PlainEnglishParser}; use crate::{ linting::Suggestion, span::Span, - FatToken, MarkdownParser, Parser, PlainEnglishParser, + FatToken, Punctuation::{self}, Token, TokenKind, }; @@ -18,7 +19,6 @@ pub struct Document { impl Document { /// Lexes and parses text to produce a document. - /// /// Choosing to parse with markdown may have a performance penalty pub fn new(text: &str, parser: Box) -> Self { let source: Vec<_> = text.chars().collect(); diff --git a/harper-core/src/lib.rs b/harper-core/src/lib.rs index 6a71d440..7f42a819 100644 --- a/harper-core/src/lib.rs +++ b/harper-core/src/lib.rs @@ -3,7 +3,7 @@ mod document; mod lexing; mod linting; -mod parsers; +pub mod parsers; mod span; mod spell; mod token; @@ -11,7 +11,6 @@ mod token; pub use document::Document; pub use linting::LintSet; pub use linting::{Lint, LintKind, Linter, Suggestion}; -pub use parsers::{MarkdownParser, Parser, PlainEnglishParser}; pub use span::Span; pub use spell::Dictionary; pub use token::{FatToken, Punctuation, Token, TokenKind, TokenStringExt}; diff --git a/harper-core/src/linting/lint_set.rs b/harper-core/src/linting/lint_set.rs index 3025135d..0ace5dd9 100644 --- a/harper-core/src/linting/lint_set.rs +++ b/harper-core/src/linting/lint_set.rs @@ -60,6 +60,12 @@ impl LintSet { } } +impl Default for LintSet { + fn default() -> Self { + Self::new() + } +} + /// Create builder methods for the linters that do not take any arguments. macro_rules! create_simple_builder_methods { ($($linter:ident),*) => { diff --git a/harper-core/src/linting/spell_check.rs b/harper-core/src/linting/spell_check.rs index 5be41a72..a0e30326 100644 --- a/harper-core/src/linting/spell_check.rs +++ b/harper-core/src/linting/spell_check.rs @@ -29,7 +29,7 @@ impl SpellCheck { self.word_cache .entry(word.clone()) .or_insert_with(|| { - suggest_correct_spelling(&word, 100, 3, &self.dictionary) + suggest_correct_spelling(&word, 100, 2, &self.dictionary) .into_iter() .map(|v| v.to_vec()) .collect() diff --git a/harper-core/src/parsers/mod.rs b/harper-core/src/parsers/mod.rs index b3321913..5c8e46d8 100644 --- a/harper-core/src/parsers/mod.rs +++ b/harper-core/src/parsers/mod.rs @@ -25,9 +25,9 @@ where #[cfg(test)] mod tests { - use super::{MarkdownParser, PlainEnglishParser}; + use super::{MarkdownParser, Parser, PlainEnglishParser}; use crate::{ - Parser, Punctuation, + Punctuation, TokenKind::{self, *}, }; diff --git a/harper-ls/src/backend.rs b/harper-ls/src/backend.rs index 8d3003d8..e7a17d25 100644 --- a/harper-ls/src/backend.rs +++ b/harper-ls/src/backend.rs @@ -1,6 +1,9 @@ -use std::{borrow::Borrow, collections::HashMap, fs}; +use std::{collections::HashMap, fs}; -use harper_core::{Dictionary, Document, Lint, LintSet, Linter, MarkdownParser}; +use harper_core::{ + parsers::{MarkdownParser, Parser}, + Dictionary, Document, LintSet, Linter, +}; use tokio::sync::Mutex; use tower_lsp::{ jsonrpc::Result, @@ -18,6 +21,7 @@ use tower_lsp::{ use crate::{ diagnostics::{lint_to_code_actions, lints_to_diagnostics}, pos_conv::range_to_span, + rust_parser::RustParser, }; pub struct Backend { @@ -33,17 +37,17 @@ impl Backend { } async fn update_document(&self, url: &Url, text: &str) { - let doc = Document::new(text, Box::new(MarkdownParser)); - let mut files = self.files.lock().await; - files.insert(url.clone(), doc); - } + let mut parser: Box = Box::new(MarkdownParser); - async fn generate_lints_for_url(&self, url: &Url) -> Option> { - let files = self.files.lock().await; - let file_contents = files.get(url)?; + if let Some(extension) = url.to_file_path().unwrap().extension() { + if extension == "rs" { + parser = Box::new(RustParser) + } + } - let mut linter = self.linter.lock().await; - Some(linter.lint(file_contents)) + let doc = Document::new(text, parser); + let mut files = self.files.lock().await; + files.insert(url.clone(), doc); } async fn generate_code_actions(&self, url: &Url, range: Range) -> Result> { diff --git a/harper-ls/src/comments.rs b/harper-ls/src/comments.rs deleted file mode 100644 index 0003fefe..00000000 --- a/harper-ls/src/comments.rs +++ /dev/null @@ -1,37 +0,0 @@ -use harper_core::Span; -use tree_sitter::{Parser, TreeCursor}; - -/// Extract each comment astris a seperate block. -pub fn extract_comments_rust(text: &str) -> Vec { - let mut parser = Parser::new(); - parser.set_language(tree_sitter_rust::language()).unwrap(); - - // TODO: Use incremental parsing - let Some(root) = parser.parse(text, None) else { - return vec![]; - }; - - let mut comments = Vec::new(); - - extract_comments(&mut root.walk(), &mut comments); - - comments -} - -fn extract_comments(cursor: &mut TreeCursor, comments: &mut Vec) { - if !cursor.goto_first_child() { - return; - } - - while cursor.goto_next_sibling() { - let node = cursor.node(); - - if node.kind().contains("comment") { - comments.push(node.byte_range().into()); - } - - extract_comments(cursor, comments); - } - - cursor.goto_parent(); -} diff --git a/harper-ls/src/main.rs b/harper-ls/src/main.rs index 36d0ad7d..5cfebe35 100644 --- a/harper-ls/src/main.rs +++ b/harper-ls/src/main.rs @@ -1,8 +1,8 @@ use tokio::net::TcpListener; mod backend; -mod comments; mod diagnostics; mod pos_conv; +mod rust_parser; use backend::Backend; use clap::Parser; diff --git a/harper-ls/src/rust_parser.rs b/harper-ls/src/rust_parser.rs new file mode 100644 index 00000000..af4f118c --- /dev/null +++ b/harper-ls/src/rust_parser.rs @@ -0,0 +1,85 @@ +use harper_core::{ + parsers::{Parser, PlainEnglishParser}, + Span, +}; +use tree_sitter::TreeCursor; + +pub struct RustParser; + +impl Parser for RustParser { + fn parse(&mut self, source: &[char]) -> Vec { + let text: String = source.iter().collect(); + + let mut english_parser = PlainEnglishParser; + let mut parser = tree_sitter::Parser::new(); + parser.set_language(tree_sitter_rust::language()).unwrap(); + + // TODO: Use incremental parsing + let Some(root) = parser.parse(&text, None) else { + return vec![]; + }; + + let mut comments_spans = Vec::new(); + + extract_comments(&mut root.walk(), &mut comments_spans); + byte_spans_to_char_spans(&mut comments_spans, &text); + + let mut tokens = Vec::new(); + + for span in comments_spans { + let mut new_tokens = english_parser.parse(&source[span.start..span.end]); + + new_tokens + .iter_mut() + .for_each(|t| t.span.offset(span.start)); + + tokens.append(&mut new_tokens); + } + + tokens + } +} + +/// Converts a set of byte-indexed [`Span`]s to char-index Spans, in-place. +/// NOTE: Will sort the given array by their [`Span::start`]. +/// +/// Assumes that none of the Spans are overlapping. +fn byte_spans_to_char_spans(byte_spans: &mut [Span], source: &str) { + byte_spans.sort_by_key(|s| s.start); + + let mut last_byte_pos = 0; + let mut last_char_pos = 0; + + byte_spans.iter_mut().for_each(|span| { + let byte_span = *span; + + last_char_pos += source[last_byte_pos..byte_span.start].chars().count(); + span.start = last_char_pos; + + last_char_pos += source[byte_span.start..byte_span.end].chars().count(); + span.end = last_char_pos; + + last_byte_pos = byte_span.end; + }) +} + +/// Visits the children of a TreeSitter node, searching for comments. +/// +/// Returns the BYTE spans of the comment position. +fn extract_comments(cursor: &mut TreeCursor, comments: &mut Vec) { + if !cursor.goto_first_child() { + return; + } + + while cursor.goto_next_sibling() { + let node = cursor.node(); + + if node.kind().contains("comment") { + comments.push(node.byte_range().into()); + } + + extract_comments(cursor, comments); + } + + cursor.goto_parent(); +}