Skip to content

Commit 9090fa8

Browse files
authoredJan 23, 2025··
Merge pull request #32 from dreadnode/loaders/fix-selenium-website-loader-timeouts-v2
fix: aggressive dns caching approach (closes #31)
2 parents a07c57f + 759ef13 commit 9090fa8

File tree

2 files changed

+74
-34
lines changed

2 files changed

+74
-34
lines changed
 

‎dyana/loaders/website/Dockerfile

+12-8
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
11
FROM python:3.10-alpine
22

3-
# install chromedriver
4-
RUN apk update
5-
RUN apk add chromium chromium-chromedriver
3+
# install only essential dependencies
4+
RUN apk update && apk add --no-cache \
5+
chromium \
6+
chromium-chromedriver
67

7-
ENV PYTHONUNBUFFERED 1
8-
RUN pip install --upgrade pip
9-
RUN pip install selenium
8+
ENV PYTHONUNBUFFERED=1
9+
ENV CHROME_BIN=/usr/bin/chromium-browser
10+
ENV CHROME_PATH=/usr/lib/chromium/
11+
ENV DISPLAY=:99
12+
ENV PATH="/usr/lib/chromium/:${PATH}"
13+
14+
# Install Python dependencies
15+
RUN pip install --no-cache-dir selenium
1016

1117
WORKDIR /app
1218
COPY dyana.py .
1319
COPY main.py .
1420

15-
ENV DISPLAY=:99
16-
1721
ENTRYPOINT ["python3", "-W", "ignore", "main.py"]

‎dyana/loaders/website/main.py

+62-26
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,51 @@
11
import argparse
22
import json
33
import os
4-
import shutil
4+
import time
55

66
from selenium import webdriver
77
from selenium.common.exceptions import TimeoutException
8-
from selenium.webdriver.chrome.service import Service
98
from selenium.webdriver.common.by import By
109
from selenium.webdriver.support import expected_conditions as EC
1110
from selenium.webdriver.support.ui import WebDriverWait
1211

1312
from dyana import Profiler # type: ignore[attr-defined]
1413

14+
CHROMIUM_BROWSER_PATH = "/usr/bin/chromium-browser"
15+
CHROMIUM_DRIVER_PATH = "/usr/lib/chromium/chromedriver"
16+
17+
18+
def setup_chrome_options(performance_log: bool) -> webdriver.ChromeOptions:
19+
chrome_options = webdriver.ChromeOptions()
20+
21+
chrome_options.add_argument("--window-size=1920,1080")
22+
chrome_options.add_argument("--headless=new")
23+
chrome_options.add_argument("--no-sandbox")
24+
chrome_options.add_argument("--disable-dev-shm-usage")
25+
chrome_options.add_argument("--disable-gpu")
26+
27+
# disable Google services and non-critical features that can cause hangs
28+
chrome_options.add_argument("--disable-sync")
29+
chrome_options.add_argument("--disable-extensions")
30+
chrome_options.add_argument("--disable-background-networking")
31+
chrome_options.add_argument("--disable-domain-reliability")
32+
chrome_options.add_argument("--disable-client-side-phishing-detection")
33+
chrome_options.add_argument("--disable-component-update")
34+
chrome_options.binary_location = CHROMIUM_BROWSER_PATH
35+
36+
# force DNS lookups for each request
37+
chrome_options.add_argument("--dns-prefetch-disable")
38+
chrome_options.add_argument("--disable-http-cache")
39+
chrome_options.add_argument("--disable-browser-side-navigation")
40+
chrome_options.add_experimental_option("excludeSwitches", ["enable-logging"])
41+
42+
if performance_log:
43+
# network logging prefs
44+
chrome_options.set_capability("goog:loggingPrefs", {"performance": "ALL", "browser": "ALL", "network": "ALL"})
45+
46+
return chrome_options
47+
48+
1549
if __name__ == "__main__":
1650
parser = argparse.ArgumentParser(description="Profile website performance")
1751
parser.add_argument("--url", help="URL to open", required=True)
@@ -23,46 +57,47 @@
2357
parser.add_argument("--performance-log", help="Enable performance logging", action="store_true")
2458
args = parser.parse_args()
2559

26-
# Normalize URL by adding https:// if protocol is missing
60+
# normalize URL - https:// if protocol is missing
2761
if "://" not in args.url:
2862
args.url = f"https://{args.url}"
2963

3064
profiler: Profiler = Profiler()
65+
driver: webdriver.Chrome | None = None
3166

3267
try:
33-
chrome_options = webdriver.ChromeOptions()
34-
chrome_options.add_argument("--no-sandbox")
35-
chrome_options.add_argument("--headless=new")
36-
chrome_options.add_argument("--disable-dev-shm-usage")
37-
chrome_options.add_argument("--window-size=1920,1080")
38-
39-
if args.performance_log:
40-
chrome_options.set_capability("goog:loggingPrefs", {"performance": "ALL"})
41-
42-
service = Service(executable_path=shutil.which("chromedriver"))
43-
service.start()
44-
68+
chrome_options = setup_chrome_options(args.performance_log)
69+
service = webdriver.ChromeService(executable_path=CHROMIUM_DRIVER_PATH)
4570
driver = webdriver.Chrome(options=chrome_options, service=service)
46-
driver.implicitly_wait(10)
71+
72+
# set shorter timeouts
73+
driver.set_page_load_timeout(15)
74+
driver.implicitly_wait(5)
4775

4876
profiler.track_memory("before_load")
77+
try:
78+
profiler.track("dns_start", time.time())
79+
driver.get(args.url)
80+
profiler.track("dns_end", time.time())
81+
except TimeoutException:
82+
profiler.track_error("page_load", f"Timeout loading page: {args.url}")
83+
# continue execution to capture any partial data
4984

50-
driver.get(args.url)
85+
if args.performance_log:
86+
network_logs = driver.get_log("performance")
87+
profiler.track_extra("network_logs", network_logs)
88+
browser_logs = driver.get_log("browser")
89+
profiler.track_extra("browser_logs", browser_logs)
90+
91+
profiler.track_memory("after_load")
5192

5293
if args.wait_for:
53-
# Wait for specific element if requested
5494
try:
5595
WebDriverWait(driver, args.wait_for_timeout).until(
5696
EC.presence_of_element_located((By.CSS_SELECTOR, args.wait_for))
5797
)
5898
except TimeoutException:
5999
profiler.track_error("wait", f"Timeout waiting for element: {args.wait_for}")
60100

61-
profiler.track_memory("after_load")
62-
63-
if args.performance_log:
64-
profiler.track_extra("performance_log", driver.get_log("performance"))
65-
66101
if args.screenshot:
67102
try:
68103
driver.get_screenshot_as_file("/tmp/screenshot.png")
@@ -76,9 +111,10 @@
76111
profiler.track_error("chrome", str(e))
77112
finally:
78113
try:
79-
driver.quit()
80-
profiler.track_memory("after_quit")
81-
except Exception:
114+
if driver:
115+
driver.quit()
116+
except Exception as _:
82117
pass
83118

119+
# ensure we always output something
84120
print(json.dumps(profiler.as_dict()))

0 commit comments

Comments
 (0)
Please sign in to comment.