Skip to content

Commit

Permalink
LibWebView: Normalize source-code text before highlighting it
Browse files Browse the repository at this point in the history
The previous code to determine the SourceDocument's lines was too naive:
the source text can contain other newline characters and sequences, and
the HTML/CSS/JS syntax highlighters would take those into account when
determining what line a token is on. This disagreement would cause
incorrect highlighting, or even crashes, if the source didn't solely use
`\n` for its newlines.

In order to have everyone agree on what a line is, this patch first
processes the source to replace all newlines with `\n`. The need to
copy the source like this is unfortunate, but viewing the source is a
rare enough action that this should not cause any noticeable
performance problems.

As the callers have a String, and we want a String, this also changes
the function parameters to keep the source as a String instead of
converting it to StringView and back.

Fixes LadybirdBrowser#3169
  • Loading branch information
AtkinsSJ committed Jan 13, 2025
1 parent c3fbf93 commit 66f1c43
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 12 deletions.
47 changes: 40 additions & 7 deletions Libraries/LibWebView/SourceHighlighter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,45 @@

namespace WebView {

SourceDocument::SourceDocument(StringView source)
: m_source(source)
SourceDocument::SourceDocument(String const& source)
{
m_source.for_each_split_view('\n', AK::SplitBehavior::KeepEmpty, [&](auto line) {
m_lines.append(Syntax::TextDocumentLine { *this, line });
});
// HTML, CSS and JS differ slightly on what they consider a newline to be.
// In order to make them get along in documents that include a mix of the three, process the source to make the
// newlines consistent before doing any highlighting.

// Optimization: If all the newlines are \n, just use the input string.
if (!source.code_points().contains_any_of(Array<u32, 3> { '\r', 0x2028, 0x2029 })) {
m_source = source;
} else {
StringBuilder builder { source.byte_count() };
// Convert any '\r\n', \r, <LS> or <PS> to \n
bool previous_was_cr = false;
for (u32 code_point : source.code_points()) {
if (previous_was_cr && code_point != '\n')
builder.append('\n');
previous_was_cr = false;

switch (code_point) {
case '\r':
previous_was_cr = true;
break;
case JS::LINE_SEPARATOR:
case JS::PARAGRAPH_SEPARATOR:
builder.append('\n');
break;
default:
builder.append_code_point(code_point);
}
}
m_source = builder.to_string_without_validation();
}

m_source.code_points().for_each_split_view(
[](u32 it) { return it == '\n'; },
SplitBehavior::KeepEmpty,
[&](auto line) {
m_lines.append(Syntax::TextDocumentLine { *this, line.as_string() });
});
}

Syntax::TextDocumentLine& SourceDocument::line(size_t line_index)
Expand All @@ -35,7 +68,7 @@ Syntax::TextDocumentLine const& SourceDocument::line(size_t line_index) const
return m_lines[line_index];
}

SourceHighlighterClient::SourceHighlighterClient(StringView source, Syntax::Language language)
SourceHighlighterClient::SourceHighlighterClient(String const& source, Syntax::Language language)
: m_document(SourceDocument::create(source))
{
// HACK: Syntax highlighters require a palette, but we don't actually care about the output styling, only the type of token for each span.
Expand Down Expand Up @@ -114,7 +147,7 @@ void SourceHighlighterClient::highlighter_did_set_folding_regions(Vector<Syntax:
document().set_folding_regions(move(folding_regions));
}

String highlight_source(URL::URL const& url, URL::URL const& base_url, StringView source, Syntax::Language language, HighlightOutputMode mode)
String highlight_source(URL::URL const& url, URL::URL const& base_url, String const& source, Syntax::Language language, HighlightOutputMode mode)
{
SourceHighlighterClient highlighter_client { source, language };
return highlighter_client.to_html_string(url, base_url, mode);
Expand Down
10 changes: 5 additions & 5 deletions Libraries/LibWebView/SourceHighlighter.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ enum class HighlightOutputMode {

class SourceDocument final : public Syntax::Document {
public:
static NonnullRefPtr<SourceDocument> create(StringView source)
static NonnullRefPtr<SourceDocument> create(String const& source)
{
return adopt_ref(*new (nothrow) SourceDocument(source));
}
Expand All @@ -38,18 +38,18 @@ class SourceDocument final : public Syntax::Document {
virtual Syntax::TextDocumentLine& line(size_t line_index) override;

private:
SourceDocument(StringView source);
SourceDocument(String const& source);

// ^ Syntax::Document
virtual void update_views(Badge<Syntax::TextDocumentLine>) override { }

StringView m_source;
String m_source;
Vector<Syntax::TextDocumentLine> m_lines;
};

class SourceHighlighterClient final : public Syntax::HighlighterClient {
public:
SourceHighlighterClient(StringView source, Syntax::Language);
SourceHighlighterClient(String const& source, Syntax::Language);
virtual ~SourceHighlighterClient() = default;

String to_html_string(URL::URL const& url, URL::URL const& base_url, HighlightOutputMode) const;
Expand All @@ -75,7 +75,7 @@ class SourceHighlighterClient final : public Syntax::HighlighterClient {
OwnPtr<Syntax::Highlighter> m_highlighter;
};

String highlight_source(URL::URL const& url, URL::URL const& base_url, StringView, Syntax::Language, HighlightOutputMode);
String highlight_source(URL::URL const& url, URL::URL const& base_url, String const& source, Syntax::Language, HighlightOutputMode);

constexpr inline StringView HTML_HIGHLIGHTER_STYLE = R"~~~(
@media (prefers-color-scheme: dark) {
Expand Down

0 comments on commit 66f1c43

Please sign in to comment.