-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
109 lines (94 loc) · 2.68 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
const superagent = require("superagent")
var path = require('path')
const cheerio = require("cheerio")
const async = require('async');
const { downloadFilePromise, mkdirsSync } = require('./util')
function parseCharacterPage(data) {
var $ = cheerio.load(data)
let imgLinks = []
let imgTypes = ['立绘', '换装', '换装2', '改造', '誓约']
imgTypes.forEach(type => {
let selector = `img[alt*="${type}"]`
let element = $(selector)
let src = element.attr('src')
let imgName = element.attr('alt')
// console.log(imgName, src)
if (imgName && src) {
imgLinks.push([imgName, src])
}
})
return imgLinks
}
async function getCharacterImgLinks(characterPageUrl) {
return new Promise((resolve, reject) => {
superagent
.get(characterPageUrl)
.end(function (err, res) {
if (err) {
reject(err)
}
else {
let data = res.text
let imgLinks = parseCharacterPage(data)
resolve(imgLinks)
}
});
})
}
async function getImages(imgLinks, saveDir) {
let promiseArr = []
imgLinks.forEach(imgLink => {
let fileFullPath = path.join(saveDir, imgLink[0])
promiseArr.push(downloadFilePromise(imgLink[1], fileFullPath))
})
Promise.all(promiseArr)
.then(() => {
console.log('complete ' + imgLinks.length)
})
.catch(err => {
console.log('err: ' + err)
})
}
function parseJianniangHome(data) {
var $ = cheerio.load(data)
let characterList = []
$('#FlourPackage').find('.Flour').each(function () {
let title = $(this).find('a').attr('title')
let href = $(this).find('a').attr('href')
// console.log(title, href)
characterList.push([title, href])
})
return characterList
}
async function getCharacterList() {
const jianniangHomeUrl = 'http://wiki.joyme.com/blhx/%E8%88%B0%E5%A8%98'
return new Promise((resolve, reject) => {
superagent
.get(jianniangHomeUrl)
.end(function (err, res) {
if (err) {
reject(err)
}
else {
let data = res.text
let characterList = parseJianniangHome(data)
resolve(characterList)
}
});
})
}
async function runSpider() {
console.log('爬虫程序开始运行......');
let saveDir = 'img'
mkdirsSync(saveDir)
let characterList = await getCharacterList()
// console.log(characterList)
async.mapSeries(characterList, async function (characterInfo) {
let imgLinks = await getCharacterImgLinks(encodeURI(characterInfo[1]))
await getImages(imgLinks, saveDir)
}, (err, results) => {
if (err) console.log('err: ' + err)
else console.log('抓取的角色数:' + characterList.length);
})
}
runSpider()