From 1b3f8e1e9ad14f0777188bd6eba06e42ffd98961 Mon Sep 17 00:00:00 2001 From: Jonne Ransijn Date: Sun, 1 Dec 2024 22:21:18 +0100 Subject: [PATCH] LibJS: Cache source code positions more often The source code position cache was moved from a line based approach to a "chunk"-based approach to improve performance on large, minified JavaScript files with few lines, but this has had an adverse effect on _multi-line_ source files. Reintroduce some of the old behaviour by caching lines again, with some added sanity limits to avoid caching empty/overly small lines. Source code positions in files with few lines will still be cached less often, since minified JavaScript files can be assumed to be unusually large, and since stack traces for minified JavaScript are less useful as well. On WPT tests with large JavaScript dependencies like `css/css-masking/animations/clip-interpolation.html` this reduces the amount of time spent in `SourceCode::range_from_offsets` by as much as 99.98%, for the small small price of 80KB extra memory usage. --- Libraries/LibJS/Position.h | 2 ++ Libraries/LibJS/SourceCode.cpp | 25 ++++++++++++++++--------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/Libraries/LibJS/Position.h b/Libraries/LibJS/Position.h index 5d512fb1a0b5..dd33cb4df1af 100644 --- a/Libraries/LibJS/Position.h +++ b/Libraries/LibJS/Position.h @@ -6,6 +6,8 @@ #pragma once +#include + namespace JS { struct Position { diff --git a/Libraries/LibJS/SourceCode.cpp b/Libraries/LibJS/SourceCode.cpp index 6596e5f7b0dd..551648fc7cb8 100644 --- a/Libraries/LibJS/SourceCode.cpp +++ b/Libraries/LibJS/SourceCode.cpp @@ -35,26 +35,31 @@ String const& SourceCode::code() const void SourceCode::fill_position_cache() const { - constexpr size_t minimum_distance_between_cached_positions = 10000; + constexpr size_t predicted_mimimum_cached_positions = 8; + constexpr size_t minimum_distance_between_cached_positions = 32; + constexpr size_t maximum_distance_between_cached_positions = 8192; if (m_code.is_empty()) return; - bool previous_code_point_was_carriage_return = false; + u32 previous_code_point = 0; size_t line = 1; size_t column = 1; size_t offset_of_last_starting_point = 0; - m_cached_positions.ensure_capacity(m_code.bytes().size() / minimum_distance_between_cached_positions); + m_cached_positions.ensure_capacity(predicted_mimimum_cached_positions + m_code.bytes().size() / maximum_distance_between_cached_positions); m_cached_positions.append({ .line = 1, .column = 1, .offset = 0 }); Utf8View const view(m_code); for (auto it = view.begin(); it != view.end(); ++it) { u32 code_point = *it; - bool is_line_terminator = code_point == '\r' || (code_point == '\n' && !previous_code_point_was_carriage_return) || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR; - previous_code_point_was_carriage_return = code_point == '\r'; + bool is_line_terminator = code_point == '\r' || (code_point == '\n' && previous_code_point != '\r') || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR; auto byte_offset = view.byte_offset_of(it); - if ((byte_offset - offset_of_last_starting_point) >= minimum_distance_between_cached_positions) { + + bool is_nonempty_line = is_line_terminator && previous_code_point != '\n' && previous_code_point != LINE_SEPARATOR && previous_code_point != PARAGRAPH_SEPARATOR && (code_point == '\n' || previous_code_point != '\r'); + auto distance_between_cached_position = byte_offset - offset_of_last_starting_point; + + if ((distance_between_cached_position >= minimum_distance_between_cached_positions && is_nonempty_line) || distance_between_cached_position >= maximum_distance_between_cached_positions) { m_cached_positions.append({ .line = line, .column = column, .offset = byte_offset }); offset_of_last_starting_point = byte_offset; } @@ -65,6 +70,8 @@ void SourceCode::fill_position_cache() const } else { column += 1; } + + previous_code_point = code_point; } } @@ -93,7 +100,7 @@ SourceRange SourceCode::range_from_offsets(u32 start_offset, u32 end_offset) con Optional start; Optional end; - bool previous_code_point_was_carriage_return = false; + u32 previous_code_point = 0; Utf8View const view(m_code); for (auto it = view.iterator_at_byte_offset_without_validation(current.offset); it != view.end(); ++it) { @@ -119,8 +126,8 @@ SourceRange SourceCode::range_from_offsets(u32 start_offset, u32 end_offset) con u32 code_point = *it; - bool const is_line_terminator = code_point == '\r' || (code_point == '\n' && !previous_code_point_was_carriage_return) || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR; - previous_code_point_was_carriage_return = code_point == '\r'; + bool const is_line_terminator = code_point == '\r' || (code_point == '\n' && previous_code_point != '\r') || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR; + previous_code_point = code_point; if (is_line_terminator) { current.line += 1;