-
Notifications
You must be signed in to change notification settings - Fork 0
/
fansInfoSpider.py
144 lines (134 loc) · 5.02 KB
/
fansInfoSpider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import requests
import json
import matplotlib.pyplot as plt
from spiderUtils import *
def getFansList(uid, num):
page = 1
fansNum = 0
FansList = dict()
FansList['userInfo'] = getUserInfo(uid)
print('userInfo:{}\t{}\t{}'.format(FansList['userInfo']['id'], FansList['userInfo']['name'], FansList['userInfo']['gender'],))
FansList['fans'] = []
while page:
url = 'https://m.weibo.cn/api/container/getIndex?containerid=231051_-_fans_-_{}&since_id={}'.format(uid, page)
list = json.loads(getHtmlText(url))
if list['ok'] == 0 or fansNum >= num:
return FansList
for fansitem in list['data']['cards'][-1]['card_group']:
fansId = fansitem['user']['id']
FansList['fans'].append(getUserInfo(fansId))
fansNum += 1
print('fans{}\tInfo:\t{}\t{}\t{}'.format(fansNum,
FansList['fans'][-1]['id'],
FansList['fans'][-1]['name'],
FansList['fans'][-1]['gender']))
with open('./fansDataCache.json', 'w', encoding='utf-8') as f:
f.write(json.dumps(FansList, ensure_ascii=False))
page += 1
if __name__ == '__main__':
# 是柠檬呀柠檬呀
# uid = '1972174013'
# 蔡徐坤
uid = '1776448504'
#
#
try:
with open('./fansData.json', 'r', encoding='utf-8') as f:
data = json.load(f)
except:
data = getFansList(uid, 25000)
with open('./fansData.json', 'w', encoding='utf-8') as f:
f.write(json.dumps(data, ensure_ascii=False))
# 男女比例饼状图
# male_count = 0
# female_count = 0
# print(data)
# for fansInfo in data['fans']:
# if fansInfo['gender'] == '男':
# male_count += 1
# else:
# female_count += 1
#
# name_list = ['男', '女']
# num_list = [male_count, female_count]
# colors = ['lightblue', 'pink']
# # 圆形
# plt.figure(1, figsize=(6, 6))
# # 决定分割部分,及其与其它部分之间的间距
# expl = [0, 0.1]
# plt.pie(x=num_list, explode=expl, labels=name_list, autopct='%3.1f %%', colors=colors, shadow=True)
# plt.rcParams['font.sans-serif'] = ['YouYuan']
# plt.rcParams['axes.unicode_minus'] = False
# plt.show()
# 粉丝的个人粉丝情况
# name_list = ['0-15', '15-50', '50-100', '100-200', '>200']
# num_list = [0, 0, 0, 0, 0]
# for fansInfo in data['fans']:
# if fansInfo['fans_count'] < 15:
# num_list[0] += 1
# elif fansInfo['fans_count'] < 50:
# num_list[1] += 1
# elif fansInfo['fans_count'] < 100:
# num_list[2] += 1
# elif fansInfo['fans_count'] < 200:
# num_list[3] += 1
# else:
# num_list[4] += 1
# plt.bar(range(len(num_list)), num_list, color='rgb', tick_label=name_list)
# plt.savefig('./粉丝的个人粉丝数.png')
# plt.show()
# 粉丝的关注情况
# name_list = ['0-15', '15-50', '50-100', '100-200', '>200']
# num_list = [0, 0, 0, 0, 0]
# for fansInfo in data['fans']:
# if fansInfo['follow_count'] < 15:
# num_list[0] += 1
# elif fansInfo['follow_count'] < 50:
# num_list[1] += 1
# elif fansInfo['follow_count'] < 100:
# num_list[2] += 1
# elif fansInfo['follow_count'] < 200:
# num_list[3] += 1
# else:
# num_list[4] += 1
# plt.bar(range(len(num_list)), num_list, color='rgb', tick_label=name_list)
# plt.savefig('./粉丝关注数情况.png')
# plt.show()
# 粉丝的发微博的情况
name_list = ['0-15', '15-40', '40-80', '80-150', '>150']
num_list = [0, 0, 0, 0, 0]
for fansInfo in data['fans']:
if fansInfo['follow_count'] < 15:
num_list[0] += 1
elif fansInfo['follow_count'] < 40:
num_list[1] += 1
elif fansInfo['follow_count'] < 80:
num_list[2] += 1
elif fansInfo['follow_count'] < 150:
num_list[3] += 1
else:
num_list[4] += 1
plt.bar(range(len(num_list)), num_list, color='rgb', tick_label=name_list)
plt.savefig('./粉丝发微博数的情况.png')
plt.show()
# 假粉比例
# fans_fake = 0
# fans_real = 0
# print(data)
# for fansInfo in data['fans']:
# if fansInfo['statuses_count'] <= 15 and fansInfo['fans_count'] <= 10:
# fans_fake += 1
# else:
# fans_real += 1
#
# name_list = ['假粉丝', '真粉丝']
# num_list = [fans_fake, fans_real]
# colors = ['gray', 'red']
# # 圆形
# plt.figure(1, figsize=(6, 6))
# # 决定分割部分,及其与其它部分之间的间距
# expl = [0, 0.1]
# plt.pie(x=num_list, explode=expl, labels=name_list, autopct='%3.1f %%', colors=colors, shadow=True)
# plt.rcParams['font.sans-serif'] = ['YouYuan']
# plt.rcParams['axes.unicode_minus'] = False
# plt.show()