Skip to content

Commit

Permalink
Implement faster trimming of control characters
Browse files Browse the repository at this point in the history
Due to a V8 bug (https://issues.chromium.org/issues/42204424), negative end-of-string matches are slow, as shown in #286. We can avoid triggering this slowdown for certain inputs by just implementing the trimming using loops.

Closes #286. Closes #288 by superseding it.
  • Loading branch information
domenic authored Feb 9, 2025
1 parent 807353d commit 834cef8
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 7 deletions.
1 change: 1 addition & 0 deletions eslint.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export default [
{
ignores: [
"coverage/",
"_site/",
"test/web-platform-tests/",
"live-viewer/whatwg-url.mjs",
"lib/VoidFunction.js",
Expand Down
18 changes: 16 additions & 2 deletions lib/url-state-machine.js
Original file line number Diff line number Diff line change
Expand Up @@ -441,8 +441,22 @@ function domainToASCII(domain, beStrict = false) {
return result;
}

function trimControlChars(url) {
return url.replace(/^[\u0000-\u001F\u0020]+|[\u0000-\u001F\u0020]+$/ug, "");
function trimControlChars(string) {
// Avoid using regexp because of this V8 bug: https://issues.chromium.org/issues/42204424

let start = 0;
let end = string.length;
for (; start < end; ++start) {
if (string.charCodeAt(start) > 0x20) {
break;
}
}
for (; end > start; --end) {
if (string.charCodeAt(end - 1) > 0x20) {
break;
}
}
return string.substring(start, end);
}

function trimTabAndNewline(url) {
Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
"prepare": "node scripts/transform.js",
"pretest": "node scripts/get-latest-platform-tests.js && node scripts/transform.js",
"build-live-viewer": "esbuild --bundle --format=esm --sourcemap --outfile=live-viewer/whatwg-url.mjs index.js",
"test": "node --test test/*.js"
"test": "node --test test/*.js",
"bench": "node scripts/benchmark.js"
},
"c8": {
"reporter": [
Expand Down
26 changes: 22 additions & 4 deletions scripts/benchmark.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"use strict";
const { URL } = require("../");
const Benchmark = require("benchmark");
const testData = require("../test/web-platform-tests/resources/urltestdata.json");

const testData = require("../test/web-platform-tests/resources/urltestdata.json");
const testInputs = testData.filter(c => typeof c === "object").map(c => c.input);

const benchmark = new Benchmark(() => {
runBenchmark("URL constructor with WPT data", () => {
for (const input of testInputs) {
try {
// eslint-disable-next-line no-new
Expand All @@ -16,5 +16,23 @@ const benchmark = new Benchmark(() => {
}
});

benchmark.on("cycle", e => console.log(e.target.toString()));
benchmark.run();
runBenchmark("long input not starting or ending with control characters (GH-286)", () => {
try {
// eslint-disable-next-line no-new
new URL(`!!${"\u0000".repeat(100000)}A\rA`);
} catch {
// intentionally empty
}
});

function runBenchmark(name, fn) {
new Benchmark(name, fn, {
onComplete(event) {
console.log(`${name}:`);
console.log(` ${event.target.hz.toFixed(0)} ops/second`);
console.log(` ±${event.target.stats.rme.toFixed(2)}% relative margin of error`);
console.log(` ${event.target.stats.sample.length} samples`);
console.log("");
}
}).run();
}

0 comments on commit 834cef8

Please sign in to comment.