-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract.js
59 lines (54 loc) · 1.67 KB
/
extract.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
const jsdom = require('jsdom')
const url = require('url')
module.exports = function extract(pages, store = []) {
let update = []
let delta = []
let requests = []
for (let page of pages) {
let storedPage = store.find((aStoredPage) => page.url === aStoredPage.url && page.selector === aStoredPage.selector)
requests.push(new Promise((resolve) => {
jsdom.env(
page.url,
function (err, window) {
if (err) {
return resolve(err)
}
// record current state with selector
page.finds = []
for (let element of window.document.querySelectorAll(page.selector)) {
if (element.hasAttribute('href')) {
let anchor = {name: element.innerHTML, url: url.resolve(page.url, element.getAttribute('href'))}
page.finds.push(anchor)
} else {
page.finds.push(element.innerHTML)
}
}
update.push(Object.assign({},page))
// check for new results
if (storedPage !== undefined && storedPage.finds !== undefined && storedPage.finds.length && page.finds.length) {
page.finds = page.finds.filter(extract => {
if (typeof extract === 'string') {
return (storedPage.finds.indexOf(extract) === -1)
} else if (extract.url !== undefined) {
return (storedPage.finds.find(storedExtract => extract.url === storedExtract.url) === undefined)
} else {
return true
}
})
}
if (page.finds.length) {
delta.push(page)
}
resolve()
}
)
}))
}
return new Promise((resolve, reject) => {
Promise.all(requests).then((errors) => {
errors = errors.filter(response => response !== undefined)
resolve([errors, update, delta])
})
.catch(reject)
})
}