-
Notifications
You must be signed in to change notification settings - Fork 0
/
interestListSpider.py
120 lines (107 loc) · 4.35 KB
/
interestListSpider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import requests
import re
import json
import networkx as nx
import matplotlib.pyplot as plt
from spiderUtils import *
# 获取用户关注列表
def getInterestList(uid, num):
url = 'https://m.weibo.cn/api/container/getIndex?containerid=231051_-_followers_-_{}&page=1'.format(uid)
data = json.loads(getHtmlText(url))
intertestList = []
cardlist = data['data']['cards']
for cards in cardlist:
if 'title' in cards and (cards['title'] == '她的全部关注' or '他的全部关注'):
i = 0
for card in cards['card_group']:
if i < num:
person = {}
person['id'] = card['user']['id']
intertestList.append(person)
i += 1
# with open('./interestList.json', 'w', encoding='utf-8') as f:
# f.write(json.dumps(intertestList, ensure_ascii=False))
return intertestList
# 深搜获取多层用户信息及用户关注列表
def deepSearchList(list, uid, floor, num):
if floor == 0:
# print(list.keys())
if uid in list.keys():
print('{}有重复'.format(uid))
return list
else:
# print(list.keys())
# print(uid in list.keys())
list[str(uid)] = dict()
list[uid]['userInfo'] = getUserInfo(uid)
print('{}\t{}\t{}\t{}'.format(uid, list[uid]['userInfo']['name'], list[uid]['userInfo']['gender'],
list[uid]['userInfo']['desc']))
return list
elif uid in list.keys() and 'interestList' in list[uid].keys():
# print('interestList' in list[uid].keys())
print('{}有重复'.format(uid))
return list
else:
list[str(uid)] = dict()
list[uid]['userInfo'] = getUserInfo(uid)
list[uid]['interestList'] = getInterestList(uid, num)
print('{}\t{}\t{}\t{}'.format(uid, list[uid]['userInfo']['name'],
list[uid]['userInfo']['gender'],
list[uid]['userInfo']['desc']))
i = 0
for interestList in list[uid]['interestList']:
if i < num:
list = deepSearchList(list, str(interestList['id']), floor - 1, num)
# with open('./list.json', 'w', encoding='utf-8') as f:
# f.write(json.dumps(list, ensure_ascii=False))
i += 1
return list
# 画出关注人的人物关系图
def drawRelationGraph(dataList):
G = nx.DiGraph()
node_size_list = dict()
node_color_list = dict()
for person in dataList:
G.add_node(dataList[person]['userInfo']['name'])
node_size_list[dataList[person]['userInfo']['name']] = 0
node_color_list[dataList[person]['userInfo']['name']] = 'lightblue' if dataList[person]['userInfo'][
'gender'] == '男' else 'pink'
for person in dataList:
if 'interestList' in dataList[person].keys():
for interest in dataList[person]['interestList']:
print('{} -> {}'.format(person, interest['id']))
G.add_edge(dataList[person]['userInfo']['name'],
dataList[str(interest['id'])]['userInfo']['name'])
node_size_list[dataList[str(interest['id'])]['userInfo']['name']] += 1
nx.draw(G,
pos=nx.spring_layout(G),
with_labels=True,
node_size=[i * i * 200 + 100 for i in list(node_size_list.values())],
node_color=[i for i in list(node_color_list.values())],
width=0.2,
font_size=8)
plt.rcParams['font.sans-serif'] = ['YouYuan']
plt.rcParams['axes.unicode_minus'] = False
plt.show()
if __name__ == '__main__':
# 头顶戴朵花
# uid = '1913880370'
# sven_shi
# uid = '2382064902'
# 史前怪物嗷呜
# uid = '5565609898'
# 张家界事儿
# uid = '6000209884'
# 是柠檬呀柠檬呀
uid = '1972174013'
try:
with open('./interestList.json', 'r', encoding='utf-8') as f:
data = json.load(f)
except:
data = dict()
data = deepSearchList(data, uid, 3, 5)
with open('./interestList.json', 'w', encoding='utf-8') as f:
f.write(json.dumps(data, ensure_ascii=False))
print(data)
print(len(data))
drawRelationGraph(data)