Skip to content

Commit

Permalink
Meta: Include stylesheets and stylesheet-includes in WPT imports
Browse files Browse the repository at this point in the history
Because of this we no longer have to handle ahem.css in a special way.

This should find:
- <link rel=stylesheet>
- CSS `@import`s
- Any resources linked from a stylesheet with `url()`

There's a good chance there are other resources we'll want to copy too,
but CSS was a big hole.
  • Loading branch information
AtkinsSJ committed Nov 6, 2024
1 parent 06154b8 commit 377dd7a
Showing 1 changed file with 29 additions and 4 deletions.
33 changes: 29 additions & 4 deletions Meta/import-wpt-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from urllib.request import urlopen
from collections import namedtuple
from enum import Enum
import re

wpt_base_url = 'https://wpt.live/'

Expand All @@ -35,13 +36,39 @@ def __init__(self, _: str, input_path: str, expected_path: str):
src_values = []


class ScriptSrcValueFinder(HTMLParser):
class LinkedResourceFinder(HTMLParser):
def __init__(self):
super().__init__()
self._tag_stack_ = []
self._match_css_url_ = re.compile(r"url\(\"?(?P<url>[^\")]+)\"?\)")
self._match_css_import_string_ = re.compile(r"@import\s+\"(?P<url>[^\")]+)\"")

def handle_starttag(self, tag, attrs):
self._tag_stack_.append(tag)
if tag == "script":
attr_dict = dict(attrs)
if "src" in attr_dict:
src_values.append(attr_dict["src"])
if tag == "link":
attr_dict = dict(attrs)
if attr_dict["rel"] == "stylesheet":
src_values.append(attr_dict["href"])

def handle_endtag(self, tag):
self._tag_stack_.pop()

def handle_data(self, data):
if self._tag_stack_ and self._tag_stack_[-1] == "style":
# Look for uses of url()
url_iterator = self._match_css_url_.finditer(data)
for match in url_iterator:
print(f"Found a url: {match.group("url")}")
src_values.append(match.group("url"))
# Look for @imports that use plain strings - we already found the url() ones
import_iterator = self._match_css_import_string_.finditer(data)
for match in import_iterator:
print(f"Found an @import url: {match.group("url")}")
src_values.append(match.group("url"))


class TestTypeIdentifier(HTMLParser):
Expand Down Expand Up @@ -111,8 +138,6 @@ def modify_sources(files):
with open(file, 'r') as f:
page_source = f.read()

page_source = page_source.replace('/fonts/ahem.css', '../' * parent_folder_count + 'fonts/ahem.css')

# Iterate all scripts and overwrite the src attribute
for i, src_value in enumerate(src_values):
if src_value.startswith('/'):
Expand Down Expand Up @@ -208,7 +233,7 @@ def main():
files_to_modify = download_files(main_paths)
create_expectation_files(main_paths)

parser = ScriptSrcValueFinder()
parser = LinkedResourceFinder()
parser.feed(page)

modify_sources(files_to_modify)
Expand Down

0 comments on commit 377dd7a

Please sign in to comment.