-
Notifications
You must be signed in to change notification settings - Fork 251
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 966ea47
Showing
15 changed files
with
1,062 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
# webvideo-downloader | ||
|
||
网站视频下载器,用于下载一些网站中可以在线播放的视频,当前支持Bilibili(单P/多P),爱奇艺,腾讯视频,芒果TV的视频下载。 | ||
|
||
> **What you can watch determined what you can download.** | ||
> | ||
> 你只能下载你或你的账号可以在线观看的视频,本项目没有VIP破解功能。 | ||
|
||
|
||
## 🔨 Usage | ||
|
||
本项目分为两部分,violentmonkey(暴力猴)脚本用于在浏览器中提取视频链接,downloader程序负责视频文件的下载与合并。 | ||
|
||
#### ViolentMonkey | ||
|
||
1. Chrome浏览器安装**ViolentMonkey**插件或者其他浏览器类似插件,都一样 | ||
|
||
2. 导入**violentmonkey**目录中的几个脚本,按需启用 | ||
|
||
3. 打开相对应的视频网站,点击某一个视频 | ||
|
||
4. 网页会自动弹出视频链接的窗口,右键复制链接地址即可 | ||
|
||
data:image/s3,"s3://crabby-images/7f2f6/7f2f6d50d55f10e0dae3ca2f334eac62c5a4561b" alt="bilibili" | ||
|
||
#### Downloader | ||
|
||
- 下载单个视频时,运行`python common.py`,粘贴上面暴力猴解析到的视频链接,然后输入保存的文件名即可 | ||
|
||
- 下载Bilibili的多个分P视频时,运行`python bilibiliMultiPart.py`,粘贴暴力猴链接,输入文件名和首、尾即可 | ||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
# -*- coding:utf-8 -*- | ||
import requests | ||
import re | ||
import json | ||
import dispatcher | ||
import utils | ||
|
||
def getAllPartInfo(url): | ||
headers = { | ||
"referer": "https://www.bilibili.com", | ||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36", | ||
} | ||
resp = requests.get(url, headers=headers) | ||
content = resp.content.decode('utf-8') | ||
|
||
# 获取分p名称和cid | ||
match = re.search(r'<script>window\.__INITIAL_STATE__=(.+?});.+?</script>', content) | ||
data = json.loads(match.group(1)) | ||
isOpera = 'epList' in data | ||
pages = data['epList'] if isOpera else data['videoData']['pages'] | ||
|
||
allPartInfo = [] | ||
for page in pages: | ||
if isOpera: | ||
name, partUrl = page['longTitle'], re.sub(r'\d+$', str(page['id']), url) | ||
else: | ||
name, partUrl = page['part'], url + '?p=' + str(page['page']) | ||
|
||
allPartInfo.append({ | ||
'cid': page['cid'], | ||
'name': name, | ||
'url': partUrl, | ||
}) | ||
|
||
return allPartInfo | ||
|
||
|
||
# 获取指定p的视频url | ||
def getPartUrl(partUrl, partCid, basePlayInfoUrl, sessCookie): | ||
def getBandwidth(item): | ||
return item['bandwidth'] | ||
|
||
headers = { | ||
"referer": "https://www.bilibili.com", | ||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36", | ||
"Cookie": "CURRENT_FNVAL=16", | ||
} | ||
resp = requests.get(partUrl, headers=headers) | ||
content = resp.content.decode('utf-8') | ||
match = re.search(r'<script>window\.__playinfo__=(.+?)</script>', content) | ||
|
||
if match: | ||
data = match.group(1) | ||
data = json.loads(data)['data'] | ||
else: | ||
playInfoUrl = basePlayInfoUrl + '&cid=' + str(partCid) | ||
headers = { 'Cookie': sessCookie } | ||
data = utils.getText(playInfoUrl, headers) | ||
data = json.loads(data)['data'] | ||
|
||
if 'dash' in data: | ||
# 音视频分离 | ||
data = data['dash'] | ||
data['audio'].sort(key=getBandwidth, reverse=True) | ||
data['video'].sort(key=getBandwidth, reverse=True) | ||
combineVideoUrl = data['audio'][0]['baseUrl'] + '|' + data['video'][0]['baseUrl'] | ||
elif 'durl' in data: | ||
# 视频分段 | ||
data = data['durl'] | ||
urls = list(map(lambda each: each['url'], data)) | ||
combineVideoUrl = '|'.join(urls) | ||
|
||
return combineVideoUrl | ||
|
||
def downloadRangeParts(linksurl, baseFileName, startP, endP): | ||
if linksurl.find('|') != -1: | ||
baseUrl, basePlayInfoUrl, sessCookie = linksurl.split('|') | ||
else: | ||
baseUrl, basePlayInfoUrl, sessCookie = linksurl, '', '' | ||
|
||
baseUrl, startP, endP = baseUrl.split('?')[0], int(startP), int(endP) | ||
allPartInfo = getAllPartInfo(baseUrl) | ||
|
||
print('-- 准备下载第%d - %dP' % (startP, endP)) | ||
|
||
for p in range(startP, endP + 1): | ||
partInfo = allPartInfo[p-1] | ||
partUrl, partCid, partName = partInfo['url'], partInfo['cid'], partInfo['name'] | ||
combineVideoUrl = getPartUrl(partUrl, partCid, basePlayInfoUrl, sessCookie) | ||
fileName = 'P{:03d}__{}__{}'.format(p, baseFileName, partName) | ||
|
||
print('开始下载第{}P: {}'.format(p, fileName)) | ||
dispatcher.download(combineVideoUrl, fileName) | ||
|
||
def main(): | ||
while True: | ||
linksurl = input('输入油猴多p链接: ') | ||
baseFileName = input('输入文件名: ') | ||
rangeP = input('输入首、尾P(空格分隔)或单P: ').strip().split(' ') | ||
startP, endP = rangeP if len(rangeP) > 1 else rangeP * 2 | ||
|
||
downloadRangeParts(linksurl, baseFileName, startP, endP) | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# -*- coding:utf-8 -*- | ||
import sys | ||
import os | ||
import dispatcher | ||
|
||
|
||
def main(): | ||
while True: | ||
linksurl = input('输入油猴链接或本地m3u8路径:') | ||
filename = input('输入保存文件名:') | ||
|
||
if not linksurl or not filename: | ||
continue | ||
dispatcher.download(linksurl, filename) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# -*- coding:utf-8 -*- | ||
import os | ||
|
||
# 是否静默删除下载的临时文件 | ||
delSilent = True | ||
|
||
# 临时文件保存路径 | ||
tempFilePath = "../temp/" | ||
|
||
# 视频文件保存路径 | ||
videoFilePath = "../videos/" | ||
|
||
|
||
if not os.path.exists(tempFilePath): | ||
os.makedirs(tempFilePath) | ||
|
||
if not os.path.exists(videoFilePath): | ||
os.makedirs(videoFilePath) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
# -*- coding:utf-8 -*- | ||
import re | ||
import json | ||
import os | ||
import threading | ||
import time | ||
import config | ||
import utils | ||
|
||
# 通用m3u8下载: 下载所有ts分段并合并 | ||
def downloadM3u8(m3u8Url, fileName, headers={}): | ||
print("-- dispatcher/downloadM3u8") | ||
|
||
if isIqiyi: | ||
data = json.loads(utils.getText(m3u8Url, headers)) | ||
videos = data['data']['program']['video'] | ||
videos = list(filter(lambda each: 'm3u8' in each and each['m3u8'] != '', videos)) | ||
content = videos[0]['m3u8'] | ||
else: | ||
content = utils.getText(m3u8Url, headers) | ||
|
||
urls = re.findall(r'\S+\.ts\S+', content) | ||
print('匹配到%d段视频,开始下载...' % len(urls)) | ||
|
||
if len(urls) > 0 and not urls[0].startswith('http'): | ||
path, holder = utils.parseUrl(m3u8Url) | ||
urls = list(map(lambda ele: path + ele, urls)) | ||
|
||
suffix = '.ts' | ||
threadList, names = [], [] | ||
for i in range(len(urls)): | ||
name = '%s_第%d段%s' % (fileName, i+1, suffix) | ||
name = os.path.join(config.tempFilePath, name) | ||
names.append(name) | ||
|
||
utils.downloadAll(urls, names, headers, 2) | ||
|
||
fileName = os.path.join(config.videoFilePath, fileName + '.mp4') | ||
utils.mergePartialVideos(names, fileName) | ||
|
||
# flv/f4v视频分段下载并合并 | ||
def downloadFlv(urls, fileName, headers={}): | ||
print("-- dispatcher/downloadFlv") | ||
if isinstance(urls, str): | ||
urls = urls.split('|') | ||
_, suffix = utils.parseUrl(urls[0]) | ||
suffix = '.' + suffix.rsplit('.', 1)[-1] | ||
|
||
print('匹配到' + str(len(urls)) + '段视频,开始下载...') | ||
|
||
names = [] | ||
for i in range(len(urls)): | ||
name = '%s_第%d段%s' % (fileName, i+1, suffix) | ||
name = os.path.join(config.tempFilePath, name) | ||
names.append(name) | ||
|
||
if isBilibili: | ||
# 1080p视频的第一段,多线程下载容易失败(官方限制) | ||
if i == 0 and urls[i].find('-80.flv') > 0: | ||
# 每个线程下载500k,减轻失败代价 | ||
fileSize = utils.getFileSize(urls[i], headers) | ||
threadCount = fileSize // (1024 * 500) + 1 | ||
# 并行数量降低,降低失败率 | ||
utils.multiThreadDownload(urls[i], name, headers, threadCount, 8) | ||
else: | ||
utils.multiThreadDownload(urls[i], name, headers, 16) | ||
else: | ||
utils.download(urls[i], name, headers) | ||
|
||
fileName = os.path.join(config.videoFilePath, fileName + suffix) | ||
utils.mergePartialVideos(names, fileName) | ||
|
||
# bilibili专属: 下载m4s音视频并合并 | ||
def downloadM4s(urls, fileName, headers={}): | ||
print("-- dispatcher/downloadM4s") | ||
|
||
audioUrl, videoUrl = urls.split('|') | ||
_, suffix = utils.parseUrl(videoUrl) | ||
suffix = '.' + suffix.rsplit('.', 1)[-1] | ||
|
||
audioName = os.path.join(config.tempFilePath, fileName + suffix + '.audio') | ||
videoName = os.path.join(config.tempFilePath, fileName + suffix + '.video') | ||
fileName = os.path.join(config.videoFilePath, fileName + '.mp4') | ||
|
||
print('匹配到一段音频和一段视频,开始下载音频和视频...') | ||
|
||
utils.download(audioUrl, audioName, headers) | ||
utils.download(videoUrl, videoName, headers) | ||
utils.mergeAudio2Video(videoName, audioName, fileName) | ||
|
||
|
||
|
||
def download(linksurl, fileName, headers={}): | ||
fileName = re.sub(r'[/\:*?"<>|]', '_', fileName) | ||
|
||
global isBilibili, isIqiyi, isMgtv | ||
isBilibili = linksurl.find('acgvideo.com') > 0 or linksurl.find('bili') > 0 | ||
isIqiyi = linksurl.find('iqiyi.com') > 0 | ||
isMgtv = linksurl.find('mgtv.com') > 0 | ||
|
||
if isBilibili: | ||
headers['referer'] = 'https://www.bilibili.com/' | ||
elif isMgtv: | ||
headers['referer'] = 'https://www.mgtv.com/' | ||
|
||
if linksurl.find('.m3u8') > 0 or linksurl.find('dash?') > 0: | ||
downloadM3u8(linksurl, fileName, headers) | ||
elif linksurl.find('m4s') > 0: | ||
downloadM4s(linksurl, fileName, headers) | ||
elif linksurl.find('.flv') > 0 or linksurl.find('.f4v') > 0: | ||
downloadFlv(linksurl, fileName, headers) | ||
else: | ||
downloadFlv(linksurl, fileName, headers) |
Binary file not shown.
Oops, something went wrong.