-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbootstrap-puppeteer.js
121 lines (97 loc) · 3.48 KB
/
bootstrap-puppeteer.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
require('./bootstrap');
const {fs,puppeteer,dirname} = global;
let browser;
let isLaunching = false;
let runningTasks = [];
global.headless = global.headless ?? -1 < process.argv.indexOf('--headless');
const logger = new Logger(dirname(require.main.filename));
global.run = async function run(name, url, callable) {
runningTasks.push(name);
if (!isLaunching) {
logger.debug('Launching browser');
browser = await puppeteer.launch({
headless: global.headless,
args: ['--disable-web-security'],
});
isLaunching = true;
}
const limit = 5;
let page, cursorLimit = 0;
while (true) {
if (cursorLimit >= limit) {
logger.error(`Cannot open page "${url}"`);
break;
}
try {
logger.debug('Opening new page');
page = await browser.newPage();
logger.debug(`Going to "${url}"`);
await page.goto(url);
break;
} catch (e) {
cursorLimit++;
logger.error(e);
if (!!page) if (!!page.close) page.close();
}
}
page.original_$eval = page.$eval;
page.original_$$eval = page.$$eval;
page.$eval = $eval.bind(page);
page.$$eval = $$eval.bind(page);
let data;
try {
data = await callable(page);
} catch (e) {
logger.error(e);
}
if (!global.headless ?? false) {
const timeouts = [500, 3000, 10000, 30000, 60000];
cursorLimit = 0;
while (true) {
if (cursorLimit >= limit) {
const message = `Error while opening "${url}".`;
logger.error(message);
break;
}
try {
await page.waitForNavigation({timeout: timeouts[cursorLimit], waitUntil: 'load'});
data = await callable();
break;
} catch (e) {
cursorLimit++;
logger.error(e);
}
}
}
logger.debug('Closing page');
await page.close();
runningTasks.splice(runningTasks.indexOf(name), 1);
setTimeout(async () => {
if (1 > runningTasks.length) await browser.close();
}, 2000);
writeJSON(`${global.projectPath}/result/${name}`, url, {data});
return 0;
};
global.$eval = async function $eval(selector, callback, waitingMessage, argumentsWaitForSelector = [], argumentsEval = []) {
return _genericEval(this, '$eval', selector, callback, waitingMessage, argumentsWaitForSelector, argumentsEval)
}
global.$$eval = async function $$eval(selector, callback, waitingMessage, argumentsWaitForSelector = [], argumentsEval = []) {
return _genericEval(this, '$$eval', selector, callback, waitingMessage, argumentsWaitForSelector, argumentsEval)
}
async function _genericEval(page, name, selector, callback, waitingMessage, argumentsWaitForSelector = [], argumentsEval = []) {
logger.info(waitingMessage ?? `Waiting for element "${selector}"`);
let element = undefined;
try {
await page.waitForSelector(selector, ...argumentsWaitForSelector);
element = await page[`original_${name}`](selector, callback, ...argumentsEval);
} catch (e) {
logger.error(e);
}
return element;
}
function writeJSON(path, url, content) {
const pathdir = dirname((path = `${path}.json`));
if (!fs.existsSync(pathdir)) fs.mkdirSync(pathdir, {recursive: true});
fs.writeFileSync(path, JSON.stringify({url, date: now(), ...content}));
return 0;
}