-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpbt.js
75 lines (55 loc) · 1.4 KB
/
pbt.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
const mongoose = require('mongoose');
const child_process = require('child_process');
const file = 'pbt.py';
let timemodel;
function runCommand(cmd, args, callback) {
var command = child_process.spawn(cmd, args)
, data = ''
, err = ''
;
command.stdout.on('data', (_data) => {
data += _data;
});
command.stderr.on('data', (_data) => {
err += _data;
});
command.on('close', (code) => {
callback(err, data);
});
}
function scrape(_timemodel) {
let urlsToScrape;
timemodel = timemodel || _timemodel;
console.log('START SCRAPE')
function getUrlsWithNoKeywords(callback) {
timemodel
.find({keywords: null})
.exec(callback);
}
function scrapeNextUrl() {
const dbEntry = urlsToScrape.pop()
const urlToScrape = dbEntry.website;
runCommand('python', [file, urlToScrape], function(err, keywords) {
if(urlsToScrape.length) {
console.log(urlToScrape, keywords)
dbEntry.keywords = keywords;
dbEntry.save(scrapeNextUrl);
}else {
console.log('FINISHED SCRAPE')
setTimeout(scrape, 10000);
}
});
}
function goScrapeStuff(err, _urlsToScrape) {
if(_urlsToScrape && _urlsToScrape.length) {
urlsToScrape = _urlsToScrape;
scrapeNextUrl();
} else {
console.log('FINISHED SCRAPE')
setTimeout(scrape, 10000);
}
}
getUrlsWithNoKeywords(goScrapeStuff);
}
exports.scrape = scrape
exports.pyw = 'pyw'