Skip to content

Commit

Permalink
Can now parse and lint Rust comments
Browse files Browse the repository at this point in the history
  • Loading branch information
elijah-potter committed Jan 25, 2024
1 parent 61dcf22 commit 17353b2
Show file tree
Hide file tree
Showing 9 changed files with 113 additions and 56 deletions.
4 changes: 2 additions & 2 deletions harper-core/src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@ use std::fmt::Display;

use itertools::Itertools;

use crate::parsers::{MarkdownParser, Parser, PlainEnglishParser};
use crate::{
linting::Suggestion,
span::Span,
FatToken, MarkdownParser, Parser, PlainEnglishParser,
FatToken,
Punctuation::{self},
Token, TokenKind,
};
Expand All @@ -18,7 +19,6 @@ pub struct Document {

impl Document {
/// Lexes and parses text to produce a document.
///
/// Choosing to parse with markdown may have a performance penalty
pub fn new(text: &str, parser: Box<dyn Parser>) -> Self {
let source: Vec<_> = text.chars().collect();
Expand Down
3 changes: 1 addition & 2 deletions harper-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@
mod document;
mod lexing;
mod linting;
mod parsers;
pub mod parsers;
mod span;
mod spell;
mod token;

pub use document::Document;
pub use linting::LintSet;
pub use linting::{Lint, LintKind, Linter, Suggestion};
pub use parsers::{MarkdownParser, Parser, PlainEnglishParser};
pub use span::Span;
pub use spell::Dictionary;
pub use token::{FatToken, Punctuation, Token, TokenKind, TokenStringExt};
6 changes: 6 additions & 0 deletions harper-core/src/linting/lint_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ impl LintSet {
}
}

impl Default for LintSet {
fn default() -> Self {
Self::new()
}
}

/// Create builder methods for the linters that do not take any arguments.
macro_rules! create_simple_builder_methods {
($($linter:ident),*) => {
Expand Down
2 changes: 1 addition & 1 deletion harper-core/src/linting/spell_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ impl SpellCheck {
self.word_cache
.entry(word.clone())
.or_insert_with(|| {
suggest_correct_spelling(&word, 100, 3, &self.dictionary)
suggest_correct_spelling(&word, 100, 2, &self.dictionary)
.into_iter()
.map(|v| v.to_vec())
.collect()
Expand Down
4 changes: 2 additions & 2 deletions harper-core/src/parsers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ where

#[cfg(test)]
mod tests {
use super::{MarkdownParser, PlainEnglishParser};
use super::{MarkdownParser, Parser, PlainEnglishParser};
use crate::{
Parser, Punctuation,
Punctuation,
TokenKind::{self, *},
};

Expand Down
26 changes: 15 additions & 11 deletions harper-ls/src/backend.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
use std::{borrow::Borrow, collections::HashMap, fs};
use std::{collections::HashMap, fs};

use harper_core::{Dictionary, Document, Lint, LintSet, Linter, MarkdownParser};
use harper_core::{
parsers::{MarkdownParser, Parser},
Dictionary, Document, LintSet, Linter,
};
use tokio::sync::Mutex;
use tower_lsp::{
jsonrpc::Result,
Expand All @@ -18,6 +21,7 @@ use tower_lsp::{
use crate::{
diagnostics::{lint_to_code_actions, lints_to_diagnostics},
pos_conv::range_to_span,
rust_parser::RustParser,
};

pub struct Backend {
Expand All @@ -33,17 +37,17 @@ impl Backend {
}

async fn update_document(&self, url: &Url, text: &str) {
let doc = Document::new(text, Box::new(MarkdownParser));
let mut files = self.files.lock().await;
files.insert(url.clone(), doc);
}
let mut parser: Box<dyn Parser> = Box::new(MarkdownParser);

async fn generate_lints_for_url(&self, url: &Url) -> Option<Vec<Lint>> {
let files = self.files.lock().await;
let file_contents = files.get(url)?;
if let Some(extension) = url.to_file_path().unwrap().extension() {
if extension == "rs" {
parser = Box::new(RustParser)
}
}

let mut linter = self.linter.lock().await;
Some(linter.lint(file_contents))
let doc = Document::new(text, parser);
let mut files = self.files.lock().await;
files.insert(url.clone(), doc);
}

async fn generate_code_actions(&self, url: &Url, range: Range) -> Result<Vec<CodeAction>> {
Expand Down
37 changes: 0 additions & 37 deletions harper-ls/src/comments.rs

This file was deleted.

2 changes: 1 addition & 1 deletion harper-ls/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use tokio::net::TcpListener;
mod backend;
mod comments;
mod diagnostics;
mod pos_conv;
mod rust_parser;

use backend::Backend;
use clap::Parser;
Expand Down
85 changes: 85 additions & 0 deletions harper-ls/src/rust_parser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
use harper_core::{
parsers::{Parser, PlainEnglishParser},
Span,
};
use tree_sitter::TreeCursor;

pub struct RustParser;

impl Parser for RustParser {
fn parse(&mut self, source: &[char]) -> Vec<harper_core::Token> {
let text: String = source.iter().collect();

let mut english_parser = PlainEnglishParser;
let mut parser = tree_sitter::Parser::new();
parser.set_language(tree_sitter_rust::language()).unwrap();

// TODO: Use incremental parsing
let Some(root) = parser.parse(&text, None) else {
return vec![];
};

let mut comments_spans = Vec::new();

extract_comments(&mut root.walk(), &mut comments_spans);
byte_spans_to_char_spans(&mut comments_spans, &text);

let mut tokens = Vec::new();

for span in comments_spans {
let mut new_tokens = english_parser.parse(&source[span.start..span.end]);

new_tokens
.iter_mut()
.for_each(|t| t.span.offset(span.start));

tokens.append(&mut new_tokens);
}

tokens
}
}

/// Converts a set of byte-indexed [`Span`]s to char-index Spans, in-place.
/// NOTE: Will sort the given array by their [`Span::start`].
///
/// Assumes that none of the Spans are overlapping.
fn byte_spans_to_char_spans(byte_spans: &mut [Span], source: &str) {
byte_spans.sort_by_key(|s| s.start);

let mut last_byte_pos = 0;
let mut last_char_pos = 0;

byte_spans.iter_mut().for_each(|span| {
let byte_span = *span;

last_char_pos += source[last_byte_pos..byte_span.start].chars().count();
span.start = last_char_pos;

last_char_pos += source[byte_span.start..byte_span.end].chars().count();
span.end = last_char_pos;

last_byte_pos = byte_span.end;
})
}

/// Visits the children of a TreeSitter node, searching for comments.
///
/// Returns the BYTE spans of the comment position.
fn extract_comments(cursor: &mut TreeCursor, comments: &mut Vec<Span>) {
if !cursor.goto_first_child() {
return;
}

while cursor.goto_next_sibling() {
let node = cursor.node();

if node.kind().contains("comment") {
comments.push(node.byte_range().into());
}

extract_comments(cursor, comments);
}

cursor.goto_parent();
}

0 comments on commit 17353b2

Please sign in to comment.