forked from Jack-Cherish/python-spider
-
Notifications
You must be signed in to change notification settings - Fork 3
/
douyin_pro_2.py
150 lines (139 loc) · 5.17 KB
/
douyin_pro_2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# -*- coding:utf-8 -*-
from splinter.driver.webdriver.chrome import Options, Chrome
from splinter.browser import Browser
from contextlib import closing
import requests, json, time, re, os, sys, time
from bs4 import BeautifulSoup
class DouYin(object):
def __init__(self, width = 500, height = 300):
"""
抖音App视频下载
"""
pass
def get_video_urls(self, user_id):
"""
获得视频播放地址
Parameters:
user_id:查询的用户ID
Returns:
video_names: 视频名字列表
video_urls: 视频链接列表
nickname: 用户昵称
"""
video_names = []
video_urls = []
unique_id = ''
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'max-age=0',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Linux; U; Android 5.1.1; zh-cn; MI 4S Build/LMY47V) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.146 Mobile Safari/537.36 XiaoMi/MiuiBrowser/9.1.3',
}
while unique_id != user_id:
search_url = 'https://api.amemv.com/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&version_code=162&version_name=1.6.2&device_platform=android&ssmix=a&device_type=MI+5&device_brand=Xiaomi&os_api=24&os_version=7.0&uuid=861945034132187&openudid=dc451556fc0eeadb&manifest_version_code=162&resolution=1080*1920&dpi=480&update_version_code=1622' % user_id
req = requests.get(url = search_url, verify = False)
html = json.loads(req.text)
aweme_count = html['user_list'][0]['user_info']['aweme_count']
uid = html['user_list'][0]['user_info']['uid']
nickname = html['user_list'][0]['user_info']['nickname']
unique_id = html['user_list'][0]['user_info']['unique_id']
user_url = 'https://www.douyin.com/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (uid, aweme_count)
req = requests.get(url = user_url, headers=headers, verify = False)
html = json.loads(req.text)
i = 1
for each in html['aweme_list']:
share_desc = each['share_info']['share_desc']
if '抖音-原创音乐短视频社区' == share_desc:
video_names.append(str(i) + '.mp4')
i += 1
else:
video_names.append(share_desc + '.mp4')
video_urls.append(each['share_info']['share_url'])
return video_names, video_urls, nickname
def get_download_url(self, video_url, watermark_flag):
"""
获得带水印的视频播放地址
Parameters:
video_url:带水印的视频播放地址
Returns:
download_url: 带水印的视频下载地址
"""
req = requests.get(url = video_url, verify = False)
bf = BeautifulSoup(req.text, 'lxml')
script = bf.find_all('script')[-1]
video_url_js = re.findall('var data = \[(.+)\];', str(script))[0]
video_html = json.loads(video_url_js)
# 带水印视频
if watermark_flag == True:
download_url = video_html['video']['play_addr']['url_list'][0]
# 无水印视频
else:
download_url = video_html['video']['play_addr']['url_list'][0].replace('playwm','play')
return download_url
def video_downloader(self, video_url, video_name, watermark_flag=False):
"""
视频下载
Parameters:
video_url: 带水印的视频地址
video_name: 视频名
watermark_flag: 是否下载带水印的视频
Returns:
无
"""
size = 0
video_url = self.get_download_url(video_url, watermark_flag=watermark_flag)
with closing(requests.get(video_url, stream=True, verify = False)) as response:
chunk_size = 1024
content_size = int(response.headers['content-length'])
if response.status_code == 200:
sys.stdout.write(' [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024))
with open(video_name, "wb") as file:
for data in response.iter_content(chunk_size = chunk_size):
file.write(data)
size += len(data)
file.flush()
sys.stdout.write(' [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r')
sys.stdout.flush()
def run(self):
"""
运行函数
Parameters:
None
Returns:
None
"""
self.hello()
user_id = input('请输入ID(例如145651081):')
watermark_flag = int(input('是否下载带水印的视频(0-否,1-是):'))
video_names, video_urls, nickname = self.get_video_urls(user_id)
if nickname not in os.listdir():
os.mkdir(nickname)
print('视频下载中:共有%d个作品!\n' % len(video_urls))
for num in range(len(video_urls)):
print(' 解析第%d个视频链接 [%s] 中,请稍后!\n' % (num+1, video_urls[num]))
if '\\' in video_names[num]:
video_name = video_names[num].replace('\\', '')
elif '/' in video_names[num]:
video_name = video_names[num].replace('/', '')
else:
video_name = video_names[num]
self.video_downloader(video_urls[num], os.path.join(nickname, video_name), watermark_flag)
print('\n')
print('下载完成!')
def hello(self):
"""
打印欢迎界面
Parameters:
None
Returns:
None
"""
print('*' * 100)
print('\t\t\t\t抖音App视频下载小助手')
print('\t\t作者:Jack Cui')
print('*' * 100)
if __name__ == '__main__':
douyin = DouYin()
douyin.run()