Skip to content

Commit

Permalink
version 1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
jaysonlong committed May 26, 2020
0 parents commit 966ea47
Show file tree
Hide file tree
Showing 15 changed files with 1,062 additions and 0 deletions.
32 changes: 32 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# webvideo-downloader

网站视频下载器,用于下载一些网站中可以在线播放的视频,当前支持Bilibili(单P/多P),爱奇艺,腾讯视频,芒果TV的视频下载。

> **What you can watch determined what you can download.**
>
> 你只能下载你或你的账号可以在线观看的视频,本项目没有VIP破解功能。


## 🔨 Usage

本项目分为两部分,violentmonkey(暴力猴)脚本用于在浏览器中提取视频链接,downloader程序负责视频文件的下载与合并。

#### ViolentMonkey

1. Chrome浏览器安装**ViolentMonkey**插件或者其他浏览器类似插件,都一样

2. 导入**violentmonkey**目录中的几个脚本,按需启用

3. 打开相对应的视频网站,点击某一个视频

4. 网页会自动弹出视频链接的窗口,右键复制链接地址即可

![bilibili](img/bilibili.png)

#### Downloader

- 下载单个视频时,运行`python common.py`,粘贴上面暴力猴解析到的视频链接,然后输入保存的文件名即可

- 下载Bilibili的多个分P视频时,运行`python bilibiliMultiPart.py`,粘贴暴力猴链接,输入文件名和首、尾即可

Binary file added downloader/__pycache__/config.cpython-36.pyc
Binary file not shown.
Binary file added downloader/__pycache__/dispatcher.cpython-36.pyc
Binary file not shown.
Binary file added downloader/__pycache__/utils.cpython-36.pyc
Binary file not shown.
105 changes: 105 additions & 0 deletions downloader/bilibiliMultiPart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# -*- coding:utf-8 -*-
import requests
import re
import json
import dispatcher
import utils

def getAllPartInfo(url):
headers = {
"referer": "https://www.bilibili.com",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36",
}
resp = requests.get(url, headers=headers)
content = resp.content.decode('utf-8')

# 获取分p名称和cid
match = re.search(r'<script>window\.__INITIAL_STATE__=(.+?});.+?</script>', content)
data = json.loads(match.group(1))
isOpera = 'epList' in data
pages = data['epList'] if isOpera else data['videoData']['pages']

allPartInfo = []
for page in pages:
if isOpera:
name, partUrl = page['longTitle'], re.sub(r'\d+$', str(page['id']), url)
else:
name, partUrl = page['part'], url + '?p=' + str(page['page'])

allPartInfo.append({
'cid': page['cid'],
'name': name,
'url': partUrl,
})

return allPartInfo


# 获取指定p的视频url
def getPartUrl(partUrl, partCid, basePlayInfoUrl, sessCookie):
def getBandwidth(item):
return item['bandwidth']

headers = {
"referer": "https://www.bilibili.com",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36",
"Cookie": "CURRENT_FNVAL=16",
}
resp = requests.get(partUrl, headers=headers)
content = resp.content.decode('utf-8')
match = re.search(r'<script>window\.__playinfo__=(.+?)</script>', content)

if match:
data = match.group(1)
data = json.loads(data)['data']
else:
playInfoUrl = basePlayInfoUrl + '&cid=' + str(partCid)
headers = { 'Cookie': sessCookie }
data = utils.getText(playInfoUrl, headers)
data = json.loads(data)['data']

if 'dash' in data:
# 音视频分离
data = data['dash']
data['audio'].sort(key=getBandwidth, reverse=True)
data['video'].sort(key=getBandwidth, reverse=True)
combineVideoUrl = data['audio'][0]['baseUrl'] + '|' + data['video'][0]['baseUrl']
elif 'durl' in data:
# 视频分段
data = data['durl']
urls = list(map(lambda each: each['url'], data))
combineVideoUrl = '|'.join(urls)

return combineVideoUrl

def downloadRangeParts(linksurl, baseFileName, startP, endP):
if linksurl.find('|') != -1:
baseUrl, basePlayInfoUrl, sessCookie = linksurl.split('|')
else:
baseUrl, basePlayInfoUrl, sessCookie = linksurl, '', ''

baseUrl, startP, endP = baseUrl.split('?')[0], int(startP), int(endP)
allPartInfo = getAllPartInfo(baseUrl)

print('-- 准备下载第%d - %dP' % (startP, endP))

for p in range(startP, endP + 1):
partInfo = allPartInfo[p-1]
partUrl, partCid, partName = partInfo['url'], partInfo['cid'], partInfo['name']
combineVideoUrl = getPartUrl(partUrl, partCid, basePlayInfoUrl, sessCookie)
fileName = 'P{:03d}__{}__{}'.format(p, baseFileName, partName)

print('开始下载第{}P: {}'.format(p, fileName))
dispatcher.download(combineVideoUrl, fileName)

def main():
while True:
linksurl = input('输入油猴多p链接: ')
baseFileName = input('输入文件名: ')
rangeP = input('输入首、尾P(空格分隔)或单P: ').strip().split(' ')
startP, endP = rangeP if len(rangeP) > 1 else rangeP * 2

downloadRangeParts(linksurl, baseFileName, startP, endP)

if __name__ == '__main__':
main()
18 changes: 18 additions & 0 deletions downloader/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# -*- coding:utf-8 -*-
import sys
import os
import dispatcher


def main():
while True:
linksurl = input('输入油猴链接或本地m3u8路径:')
filename = input('输入保存文件名:')

if not linksurl or not filename:
continue
dispatcher.download(linksurl, filename)


if __name__ == "__main__":
main()
18 changes: 18 additions & 0 deletions downloader/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# -*- coding:utf-8 -*-
import os

# 是否静默删除下载的临时文件
delSilent = True

# 临时文件保存路径
tempFilePath = "../temp/"

# 视频文件保存路径
videoFilePath = "../videos/"


if not os.path.exists(tempFilePath):
os.makedirs(tempFilePath)

if not os.path.exists(videoFilePath):
os.makedirs(videoFilePath)
113 changes: 113 additions & 0 deletions downloader/dispatcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# -*- coding:utf-8 -*-
import re
import json
import os
import threading
import time
import config
import utils

# 通用m3u8下载: 下载所有ts分段并合并
def downloadM3u8(m3u8Url, fileName, headers={}):
print("-- dispatcher/downloadM3u8")

if isIqiyi:
data = json.loads(utils.getText(m3u8Url, headers))
videos = data['data']['program']['video']
videos = list(filter(lambda each: 'm3u8' in each and each['m3u8'] != '', videos))
content = videos[0]['m3u8']
else:
content = utils.getText(m3u8Url, headers)

urls = re.findall(r'\S+\.ts\S+', content)
print('匹配到%d段视频,开始下载...' % len(urls))

if len(urls) > 0 and not urls[0].startswith('http'):
path, holder = utils.parseUrl(m3u8Url)
urls = list(map(lambda ele: path + ele, urls))

suffix = '.ts'
threadList, names = [], []
for i in range(len(urls)):
name = '%s_第%d段%s' % (fileName, i+1, suffix)
name = os.path.join(config.tempFilePath, name)
names.append(name)

utils.downloadAll(urls, names, headers, 2)

fileName = os.path.join(config.videoFilePath, fileName + '.mp4')
utils.mergePartialVideos(names, fileName)

# flv/f4v视频分段下载并合并
def downloadFlv(urls, fileName, headers={}):
print("-- dispatcher/downloadFlv")
if isinstance(urls, str):
urls = urls.split('|')
_, suffix = utils.parseUrl(urls[0])
suffix = '.' + suffix.rsplit('.', 1)[-1]

print('匹配到' + str(len(urls)) + '段视频,开始下载...')

names = []
for i in range(len(urls)):
name = '%s_第%d段%s' % (fileName, i+1, suffix)
name = os.path.join(config.tempFilePath, name)
names.append(name)

if isBilibili:
# 1080p视频的第一段,多线程下载容易失败(官方限制)
if i == 0 and urls[i].find('-80.flv') > 0:
# 每个线程下载500k,减轻失败代价
fileSize = utils.getFileSize(urls[i], headers)
threadCount = fileSize // (1024 * 500) + 1
# 并行数量降低,降低失败率
utils.multiThreadDownload(urls[i], name, headers, threadCount, 8)
else:
utils.multiThreadDownload(urls[i], name, headers, 16)
else:
utils.download(urls[i], name, headers)

fileName = os.path.join(config.videoFilePath, fileName + suffix)
utils.mergePartialVideos(names, fileName)

# bilibili专属: 下载m4s音视频并合并
def downloadM4s(urls, fileName, headers={}):
print("-- dispatcher/downloadM4s")

audioUrl, videoUrl = urls.split('|')
_, suffix = utils.parseUrl(videoUrl)
suffix = '.' + suffix.rsplit('.', 1)[-1]

audioName = os.path.join(config.tempFilePath, fileName + suffix + '.audio')
videoName = os.path.join(config.tempFilePath, fileName + suffix + '.video')
fileName = os.path.join(config.videoFilePath, fileName + '.mp4')

print('匹配到一段音频和一段视频,开始下载音频和视频...')

utils.download(audioUrl, audioName, headers)
utils.download(videoUrl, videoName, headers)
utils.mergeAudio2Video(videoName, audioName, fileName)



def download(linksurl, fileName, headers={}):
fileName = re.sub(r'[/\:*?"<>|]', '_', fileName)

global isBilibili, isIqiyi, isMgtv
isBilibili = linksurl.find('acgvideo.com') > 0 or linksurl.find('bili') > 0
isIqiyi = linksurl.find('iqiyi.com') > 0
isMgtv = linksurl.find('mgtv.com') > 0

if isBilibili:
headers['referer'] = 'https://www.bilibili.com/'
elif isMgtv:
headers['referer'] = 'https://www.mgtv.com/'

if linksurl.find('.m3u8') > 0 or linksurl.find('dash?') > 0:
downloadM3u8(linksurl, fileName, headers)
elif linksurl.find('m4s') > 0:
downloadM4s(linksurl, fileName, headers)
elif linksurl.find('.flv') > 0 or linksurl.find('.f4v') > 0:
downloadFlv(linksurl, fileName, headers)
else:
downloadFlv(linksurl, fileName, headers)
Binary file added downloader/ffmpeg.exe
Binary file not shown.
Loading

0 comments on commit 966ea47

Please sign in to comment.