-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetLink1.js
113 lines (80 loc) · 2.61 KB
/
getLink1.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
// node "D:/prj/spdJs/getLink1.js"
function imp223()
{
var log4js = require('log4js');
var mysql = require('mysql');
const cheerio = require('cheerio')
}
logmdx = require('./jsdk/log.js');
logger = logmdx.logger;
logger.info("this is a info msg");
mdx = require('./conn.js');
connection = mdx.conn;
var fs = require("fs");
for (j=104;j<200;j++)
for (i = 1; i <= 30; i++) {
// url = 'https://cn.pornhub.com/video?c=80&page=' + i;
// cateid = 24;
cateid=j;cate='';
//cate='公众野战';
page = i;
fname = "D:\\prj\\data\\hornhub" + "_cate" + cateid + "_page" + page + '.html';
console.log(fname);
logger.info(fname)
try{
var html = fs.readFileSync(fname, "utf8");
getDetailLinkV2(html,cateid,cate);
}catch(e){
console.log(e)
logger.error(e)
}
}
function getDetailLinkV2(html, cateid, cate) {
var fs = require("fs");
const cheerio = require('cheerio')
const $ = cheerio.load(html)
var a_arr = $('li a[class=""]').toArray();
console.log(a_arr)
// <div id="player" class="original mainPlayerDiv" data-video-id="219486801">
// var data-video-id = $('#player').atrr('data-video-id');
for (item of a_arr) {
console.log(item.attribs.href)
console.log(item.attribs.title)
var obj = {};
obj.cateid = cateid; obj.cate = cate;
obj.LiAattribs = item.attribs;
obj.detailurl = "";
var obj2str = JSON.stringify(obj);
// console.log(obj2str)
child_process= require('child_process')
// child_process.fork("./detailDbInsert.js", [obj2str] ,{silent:true});
child_process.exec('node ./detailDbInsert.js '+escape(obj2str), function(error, stdout, stderr){
if(error) {
console.error('error: ' + error);
return;
}
console.log('stdout: ' + stdout);
console.log('stderr: ' + typeof stderr);
});
//if many dril ujrl ,,cant multi therad
// connection.query('INSERT INTO 抓取数据记录(数据) VALUES(?)', [obj2str], (err, results) => {
// if (err) {
// console.log(err);
// }
// console.log(results);
// })
}
}
function getDetailLink(html) {
var fs = require("fs");
var html = fs.readFileSync('input.txt.html', "utf8");
console.log(html);
const cheerio = require('cheerio')
const $ = cheerio.load(html)
var a_arr = $('li a[class=""]').toArray();
console.log(a_arr)
for (item of a_arr) {
console.log(item.attribs.href)
console.log(item.attribs.title)
}
}