-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlogic.ts
75 lines (65 loc) · 2.27 KB
/
logic.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import { HTMLElement, parse } from "node-html-parser";
import {
fetchWithAxios,
fetchWithNodeFetch,
fetchWithStealthPuppeteer,
type FetchStrategy,
type OnPageEvaluationFunction,
type StrategyResponse,
} from "./strategies";
import { formatUrl } from "./utils";
export type NodeHTMLElement = HTMLElement;
export type HtmlResponse = {
success: boolean;
html: string | null;
root?: NodeHTMLElement;
evaluation_result?: any;
strategy: FetchStrategy;
error?: Error | null | string | unknown;
status?: number | string | null;
page?: any;
url?: string;
};
export type FetchStrategyFunction = (url: string, evalFunction?: OnPageEvaluationFunction, keepBrowserOpen?: boolean) => Promise<StrategyResponse>;
export default async function getHtml(
url: string,
options?: {
set: "cheap" | "js" | null;
evalFunction?: OnPageEvaluationFunction;
keepBrowserOpen?: boolean;
}
): Promise<HtmlResponse> {
let { set, evalFunction, keepBrowserOpen } = options || {
set: null,
evalFunction: null,
keepBrowserOpen: false,
};
try {
// Format the URL using default options
const formattedUrl = formatUrl(url);
let strategy_set: FetchStrategyFunction[] = [fetchWithAxios, fetchWithNodeFetch, fetchWithStealthPuppeteer];
if (evalFunction) set = "js";
if (set === "cheap") strategy_set = [fetchWithAxios, fetchWithNodeFetch];
if (set === "js") strategy_set = [fetchWithStealthPuppeteer];
for (const strategyFunction of strategy_set) {
const { success, html, strategy, evaluation_result, error, status, page }: StrategyResponse = evalFunction
? await strategyFunction(formattedUrl, evalFunction, keepBrowserOpen)
: await strategyFunction(formattedUrl, undefined, keepBrowserOpen);
if (error) console.error(error);
console.log(`getHTML: [${strategy.name}] [${status}] [${success}] [${html?.length}] [${formattedUrl}]`);
if (!success || !html) continue;
const root: NodeHTMLElement = parse(html);
return { success, html, root, strategy, evaluation_result, error, status, page: keepBrowserOpen ? page : undefined };
}
return { success: false, html: null, strategy: { name: "unknown", cost: 0 }, page: undefined };
} catch (error) {
return {
success: false,
html: null,
strategy: { name: "unknown", cost: 0 },
error,
page: undefined,
url: url,
};
}
}