forked from wistbean/learn_python3_spider
-
Notifications
You must be signed in to change notification settings - Fork 0
/
wechat_public_account.py
78 lines (62 loc) · 1.95 KB
/
wechat_public_account.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#-*- coding:UTF-8 -*-
import json
import time
import pdfkit
import requests
base_url = 'https://mp.weixin.qq.com/mp/profile_ext'
# 这些信息不能抄我的,要用你自己的才有效
headers = {
'Connection': 'keep - alive',
'Accept': '* / *',
'User-Agent': '写你自己的',
'Referer': '写你自己的',
'Accept-Encoding': 'br, gzip, deflate'
}
cookies = {
'devicetype': 'iOS12.2',
'lang': 'zh_CN',
'pass_ticket': '写你自己的',
'version': '1700042b',
'wap_sid2': '写你自己的',
'wxuin': '3340537333'
}
def get_params(offset):
params = {
'action': 'getmsg',
'__biz': '写你自己的',
'f': 'json',
'offset': '{}'.format(offset),
'count': '10',
'is_ok': '1',
'scene': '126',
'uin': '777',
'key': '777',
'pass_ticket': '写你自己的',
'appmsg_token': '写你自己的',
'x5': '0',
'f': 'json',
}
return params
def get_list_data(offset):
res = requests.get(base_url, headers=headers, params=get_params(offset), cookies=cookies)
data = json.loads(res.text)
can_msg_continue = data['can_msg_continue']
next_offset = data['next_offset']
general_msg_list = data['general_msg_list']
list_data = json.loads(general_msg_list)['list']
for data in list_data:
try:
if data['app_msg_ext_info']['copyright_stat'] == 11:
msg_info = data['app_msg_ext_info']
title = msg_info['title']
content_url = msg_info['content_url']
# 自己定义存储路径
pdfkit.from_url(content_url, '/home/wistbean/wechat_article/'+title+'.pdf')
print('获取到原创文章:%s : %s' % (title, content_url))
except:
print('不是图文')
if can_msg_continue == 1:
time.sleep(1)
get_list_data(next_offset)
if __name__ == '__main__':
get_list_data(0)