From 7e3221459c3772d04cc330a3d0999d04101919dc Mon Sep 17 00:00:00 2001 From: WillLillis Date: Wed, 17 Jan 2024 17:25:22 -0500 Subject: [PATCH] Tree-sitter perf --- Cargo.lock | 15 ++++- lsp/Cargo.toml | 1 + lsp/src/handle.rs | 146 ++++++++++++++++++++++++++--------------- lsp/src/lib.rs | 9 ++- lsp/src/text_store.rs | 31 +++++---- lsp/src/tree_sitter.rs | 70 +++++++++++++++----- 6 files changed, 187 insertions(+), 85 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8900162..d911b67 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -278,6 +278,7 @@ dependencies = [ "htmx-lsp-util", "log", "lsp-server", + "lsp-textdocument", "lsp-types", "maplit", "phf", @@ -368,11 +369,21 @@ dependencies = [ "serde_json", ] +[[package]] +name = "lsp-textdocument" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62dcaf776a57a63c3baafa3ab0ae25943049865c862980522a5112b1fd849503" +dependencies = [ + "lsp-types", + "serde_json", +] + [[package]] name = "lsp-types" -version = "0.94.0" +version = "0.94.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b63735a13a1f9cd4f4835223d828ed9c2e35c8c5e61837774399f558b6a1237" +checksum = "c66bfd44a06ae10647fe3f8214762e9369fd4248df1350924b4ef9e770a85ea1" dependencies = [ "bitflags 1.3.2", "serde", diff --git a/lsp/Cargo.toml b/lsp/Cargo.toml index e16324c..a9ab807 100644 --- a/lsp/Cargo.toml +++ b/lsp/Cargo.toml @@ -20,3 +20,4 @@ tree-sitter.workspace = true tree-sitter-html.workspace = true maplit = "1.0.2" phf = { version = "0.11.2", features = ["macros"] } +lsp-textdocument = "0.3.2" diff --git a/lsp/src/handle.rs b/lsp/src/handle.rs index b0bf6a3..c9daa39 100644 --- a/lsp/src/handle.rs +++ b/lsp/src/handle.rs @@ -1,42 +1,15 @@ use crate::{ htmx::{hx_completion, hx_hover, HxCompletion}, - text_store::TEXT_STORE, + text_store::{DocInfo, DOCUMENT_STORE}, + tree_sitter::text_doc_change_to_ts_edit, }; use log::{debug, error, warn}; use lsp_server::{Message, Notification, Request, RequestId}; -use lsp_types::{CompletionContext, CompletionParams, CompletionTriggerKind}; - -#[derive(serde::Deserialize, Debug)] -struct Text { - text: String, -} - -#[derive(serde::Deserialize, Debug)] -struct TextDocumentLocation { - uri: String, -} - -#[derive(serde::Deserialize, Debug)] -struct TextDocumentChanges { - #[serde(rename = "textDocument")] - text_document: TextDocumentLocation, - - #[serde(rename = "contentChanges")] - content_changes: Vec, -} - -#[derive(serde::Deserialize, Debug)] -struct TextDocumentOpened { - uri: String, - - text: String, -} - -#[derive(serde::Deserialize, Debug)] -struct TextDocumentOpen { - #[serde(rename = "textDocument")] - text_document: TextDocumentOpened, -} +use lsp_textdocument::FullTextDocument; +use lsp_types::{ + notification::{DidChangeTextDocument, DidOpenTextDocument}, + CompletionContext, CompletionParams, CompletionTriggerKind, +}; #[derive(Debug)] pub struct HtmxAttributeCompletion { @@ -61,41 +34,84 @@ pub enum HtmxResult { // ignore snakeCase #[allow(non_snake_case)] fn handle_didChange(noti: Notification) -> Option { - let text_document_changes: TextDocumentChanges = serde_json::from_value(noti.params).ok()?; - let uri = text_document_changes.text_document.uri; - let text = text_document_changes.content_changes[0].text.to_string(); - - if text_document_changes.content_changes.len() > 1 { - error!("more than one content change, please be wary"); + match cast_notif::(noti) { + Ok(params) => { + match DOCUMENT_STORE + .get() + .expect("text store not initialized") + .lock() + .expect("text store mutex poisoned") + .get_mut(params.text_document.uri.as_str()) + { + Some(entry) => { + entry + .doc + .update(¶ms.content_changes, params.text_document.version); + + if let Some(ref mut curr_tree) = entry.tree { + for edit in params.content_changes.iter() { + match text_doc_change_to_ts_edit(edit, &entry.doc) { + Ok(edit) => { + curr_tree.edit(&edit); + } + Err(e) => { + error!("handle_didChange Bad edit info, failed to edit tree -- Error: {e}"); + } + } + } + } else { + error!( + "handle_didChange tree for {} is None", + params.text_document.uri.as_str() + ); + } + } + None => { + error!( + "handle_didChange No corresponding doc for supplied edits -- {}", + params.text_document.uri.as_str() + ); + } + } + } + Err(e) => { + error!("Failed the deserialize DidChangeTextDocument params -- Error {e}"); + } } - TEXT_STORE - .get() - .expect("text store not initialized") - .lock() - .expect("text store mutex poisoned") - .insert(uri, text); - None } #[allow(non_snake_case)] fn handle_didOpen(noti: Notification) -> Option { debug!("handle_didOpen params {:?}", noti.params); - let text_document_changes = match serde_json::from_value::(noti.params) { - Ok(p) => p.text_document, + let text_doc_open = match cast_notif::(noti) { + Ok(params) => params, Err(err) => { error!("handle_didOpen parsing params error : {:?}", err); return None; } }; - TEXT_STORE + let doc = FullTextDocument::new( + text_doc_open.text_document.language_id, + text_doc_open.text_document.version, + text_doc_open.text_document.text, + ); + let mut parser = ::tree_sitter::Parser::new(); + parser + .set_language(tree_sitter_html::language()) + .expect("Failed to load HTML grammar"); + let tree = parser.parse(doc.get_content(None), None); + + let doc = DocInfo { doc, parser, tree }; + + DOCUMENT_STORE .get() .expect("text store not initialized") .lock() .expect("text store mutex poisoned") - .insert(text_document_changes.uri, text_document_changes.text); + .insert(text_doc_open.text_document.uri.to_string(), doc); None } @@ -186,10 +202,23 @@ pub fn handle_other(msg: Message) -> Option { None } +fn cast_notif(notif: Notification) -> anyhow::Result +where + R: lsp_types::notification::Notification, + R::Params: serde::de::DeserializeOwned, +{ + match notif.extract(R::METHOD) { + Ok(value) => Ok(value), + Err(e) => Err(anyhow::anyhow!( + "cast_notif Failed to extract params -- Error: {e}" + )), + } +} + #[cfg(test)] mod tests { use super::{handle_request, HtmxResult, Request}; - use crate::text_store::{init_text_store, TEXT_STORE}; + use crate::text_store::{init_text_store, DocInfo, DOCUMENT_STORE}; use std::sync::Once; static SETUP: Once = Once::new(); @@ -198,12 +227,21 @@ mod tests { init_text_store(); }); - TEXT_STORE + let doc = + lsp_textdocument::FullTextDocument::new("html".to_string(), 0, content.to_string()); + let mut parser = ::tree_sitter::Parser::new(); + parser + .set_language(tree_sitter_html::language()) + .expect("Failed to load HTML grammar"); + let tree = parser.parse(doc.get_content(None), None); + let doc_info = DocInfo { doc, parser, tree }; + + DOCUMENT_STORE .get() .expect("text store not initialized") .lock() .expect("text store mutex poisoned") - .insert(file.to_string(), content.to_string()); + .insert(file.to_string(), doc_info); } #[test] diff --git a/lsp/src/lib.rs b/lsp/src/lib.rs index ffffae3..10581f0 100644 --- a/lsp/src/lib.rs +++ b/lsp/src/lib.rs @@ -9,8 +9,8 @@ use htmx::HxCompletion; use log::{debug, error, info, warn}; use lsp_types::{ CompletionItem, CompletionItemKind, CompletionList, HoverContents, InitializeParams, - LanguageString, MarkedString, ServerCapabilities, TextDocumentSyncCapability, - TextDocumentSyncKind, WorkDoneProgressOptions, + LanguageString, MarkedString, PositionEncodingKind, ServerCapabilities, + TextDocumentSyncCapability, TextDocumentSyncKind, WorkDoneProgressOptions, }; use lsp_server::{Connection, Message, Response}; @@ -123,7 +123,10 @@ pub fn start_lsp() -> Result<()> { // Run the server and wait for the two threads to end (typically by trigger LSP Exit event). let server_capabilities = serde_json::to_value(ServerCapabilities { - text_document_sync: Some(TextDocumentSyncCapability::Kind(TextDocumentSyncKind::FULL)), + position_encoding: Some(PositionEncodingKind::UTF16), // compatability with lsp_textdocument crate + text_document_sync: Some(TextDocumentSyncCapability::Kind( + TextDocumentSyncKind::INCREMENTAL, + )), completion_provider: Some(lsp_types::CompletionOptions { resolve_provider: Some(false), trigger_characters: Some(vec!["-".to_string(), "\"".to_string(), " ".to_string()]), diff --git a/lsp/src/text_store.rs b/lsp/src/text_store.rs index d7ed9e5..ab07a48 100644 --- a/lsp/src/text_store.rs +++ b/lsp/src/text_store.rs @@ -4,37 +4,46 @@ use std::{ sync::{Arc, Mutex, OnceLock}, }; -use lsp_types::Url; +use lsp_textdocument::FullTextDocument; +use lsp_types::{Range, Url}; +use tree_sitter::{Parser, Tree}; + +pub struct DocInfo { + pub doc: FullTextDocument, + pub parser: Parser, + pub tree: Option, +} -type TxtStore = HashMap; +type DocStore = HashMap; -pub struct TextStore(TxtStore); +#[derive(Default)] +pub struct DocumentStore(DocStore); -impl Deref for TextStore { - type Target = TxtStore; +impl Deref for DocumentStore { + type Target = DocStore; fn deref(&self) -> &Self::Target { &self.0 } } -impl DerefMut for TextStore { +impl DerefMut for DocumentStore { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } } -pub static TEXT_STORE: OnceLock>> = OnceLock::new(); +pub static DOCUMENT_STORE: OnceLock>> = OnceLock::new(); pub fn init_text_store() { - _ = TEXT_STORE.set(Arc::new(Mutex::new(TextStore(HashMap::new())))); + _ = DOCUMENT_STORE.set(Arc::new(Mutex::new(DocumentStore::default()))); } -pub fn get_text_document(uri: Url) -> Option { - return TEXT_STORE +pub fn get_text_document(uri: &Url, range: Option) -> Option { + return DOCUMENT_STORE .get() .expect("text store not initialized") .lock() .expect("text store mutex poisoned") .get(&uri.to_string()) - .cloned(); + .map(|doc| doc.doc.get_content(range).to_string()); } diff --git a/lsp/src/tree_sitter.rs b/lsp/src/tree_sitter.rs index f305383..7a64bd9 100644 --- a/lsp/src/tree_sitter.rs +++ b/lsp/src/tree_sitter.rs @@ -1,9 +1,11 @@ -use crate::tree_sitter_querier::{ - query_attr_keys_for_completion, query_attr_values_for_completion, +use crate::{ + text_store::DOCUMENT_STORE, + tree_sitter_querier::{query_attr_keys_for_completion, query_attr_values_for_completion}, }; use log::{debug, error}; -use lsp_types::TextDocumentPositionParams; -use tree_sitter::{Node, Parser, Point}; +use lsp_textdocument::FullTextDocument; +use lsp_types::{TextDocumentContentChangeEvent, TextDocumentPositionParams}; +use tree_sitter::{InputEdit, Node, Point}; use crate::text_store::get_text_document; @@ -106,23 +108,61 @@ pub fn get_position_from_lsp_completion( text_params: TextDocumentPositionParams, ) -> Option { error!("get_position_from_lsp_completion"); - let text = get_text_document(text_params.text_document.uri)?; + let text = get_text_document(&text_params.text_document.uri, None)?; error!("get_position_from_lsp_completion: text {}", text); let pos = text_params.position; error!("get_position_from_lsp_completion: pos {:?}", pos); - // TODO: Gallons of perf work can be done starting here - let mut parser = Parser::new(); - - parser - .set_language(tree_sitter_html::language()) - .expect("could not load html grammer"); + if let Some(entry) = DOCUMENT_STORE + .get() + .expect("text store not initialized") + .lock() + .expect("text store mutex poisoned") + .get_mut(text_params.text_document.uri.as_str()) + { + entry.tree = entry + .parser + .parse(entry.doc.get_content(None), entry.tree.as_ref()); + + if let Some(ref curr_tree) = entry.tree { + let trigger_point = Point::new(pos.line as usize, pos.character as usize); + return query_position(curr_tree.root_node(), text.as_str(), trigger_point); + } + } - let tree = parser.parse(&text, None)?; - let root_node = tree.root_node(); - let trigger_point = Point::new(pos.line as usize, pos.character as usize); + None +} - return query_position(root_node, text.as_str(), trigger_point); +/// Convert an `lsp_types::TextDocumentContentChangeEvent` to a `tree_sitter::InputEdit` +pub fn text_doc_change_to_ts_edit( + change: &TextDocumentContentChangeEvent, + doc: &FullTextDocument, +) -> Result { + let range = change.range.ok_or("Invalid edit range")?; + let start = range.start; + let end = range.end; + + let start_byte = doc.offset_at(start) as usize; + let new_end_byte = start_byte + change.text.len(); + let new_end_pos = doc.position_at(new_end_byte as u32); + + Ok(InputEdit { + start_byte, + old_end_byte: doc.offset_at(end) as usize, + new_end_byte, + start_position: Point { + row: start.line as usize, + column: start.character as usize, + }, + old_end_position: Point { + row: end.line as usize, + column: end.character as usize, + }, + new_end_position: Point { + row: new_end_pos.line as usize, + column: new_end_pos.character as usize, + }, + }) } #[cfg(test)]