Skip to content

Commit

Permalink
[vimeo] fix album extraction
Browse files Browse the repository at this point in the history
closes #1933
closes #15704
closes #15855
closes #18967
closes #21986
  • Loading branch information
remitamine authored and meunierd committed Feb 13, 2020
1 parent b32c4cc commit 701eda1
Showing 1 changed file with 43 additions and 17 deletions.
60 changes: 43 additions & 17 deletions youtube_dl/extractor/vimeo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
from __future__ import unicode_literals

import base64
import functools
import json
import re
import itertools

from .common import InfoExtractor
from ..compat import (
compat_kwargs,
compat_HTTPError,
compat_str,
compat_urlparse,
Expand All @@ -19,6 +21,7 @@
int_or_none,
merge_dicts,
NO_DEFAULT,
OnDemandPagedList,
parse_filesize,
qualities,
RegexNotFoundError,
Expand Down Expand Up @@ -98,6 +101,13 @@ def _extract_xsrft_and_vuid(self, webpage):
webpage, 'vuid', group='vuid')
return xsrft, vuid

def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
vimeo_config = self._search_regex(
r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));',
webpage, 'vimeo config', *args, **compat_kwargs(kwargs))
if vimeo_config:
return self._parse_json(vimeo_config, video_id)

def _set_vimeo_cookie(self, name, value):
self._set_cookie('vimeo.com', name, value)

Expand Down Expand Up @@ -253,7 +263,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
\.
)?
vimeo(?P<pro>pro)?\.com/
(?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
(?:.*?/)?
(?:
(?:
Expand Down Expand Up @@ -580,11 +590,9 @@ def _real_extract(self, url):
# and latter we extract those that are Vimeo specific.
self.report_extraction(video_id)

vimeo_config = self._search_regex(
r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', webpage,
'vimeo config', default=None)
vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
if vimeo_config:
seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {})
seed_status = vimeo_config.get('seed_status', {})
if seed_status.get('state') == 'failed':
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, seed_status['title']),
Expand Down Expand Up @@ -905,7 +913,7 @@ def _real_extract(self, url):

class VimeoAlbumIE(VimeoChannelIE):
IE_NAME = 'vimeo:album'
_VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)(?:$|[?#]|/(?!video))'
_VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P<id>\d+)(?:$|[?#]|/(?!video))'
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
_TESTS = [{
'url': 'https://vimeo.com/album/2632481',
Expand All @@ -925,21 +933,39 @@ class VimeoAlbumIE(VimeoChannelIE):
'params': {
'videopassword': 'youtube-dl',
}
}, {
'url': 'https://vimeo.com/album/2632481/sort:plays/format:thumbnail',
'only_matching': True,
}, {
# TODO: respect page number
'url': 'https://vimeo.com/album/2632481/page:2/sort:plays/format:thumbnail',
'only_matching': True,
}]

def _page_url(self, base_url, pagenum):
return '%s/page:%d/' % (base_url, pagenum)
_PAGE_SIZE = 100

def _fetch_page(self, album_id, authorizaion, hashed_pass, page):
api_page = page + 1
query = {
'fields': 'link',
'page': api_page,
'per_page': self._PAGE_SIZE,
}
if hashed_pass:
query['_hashed_pass'] = hashed_pass
videos = self._download_json(
'https://api.vimeo.com/albums/%s/videos' % album_id,
album_id, 'Downloading page %d' % api_page, query=query, headers={
'Authorization': 'jwt ' + authorizaion,
})['data']
for video in videos:
link = video.get('link')
if not link:
continue
yield self.url_result(link, VimeoIE.ie_key(), VimeoIE._match_id(link))

def _real_extract(self, url):
album_id = self._match_id(url)
return self._extract_videos(album_id, 'https://vimeo.com/album/%s' % album_id)
webpage = self._download_webpage(url, album_id)
webpage = self._login_list_password(url, album_id, webpage)
api_config = self._extract_vimeo_config(webpage, album_id)['api']
entries = OnDemandPagedList(functools.partial(
self._fetch_page, album_id, api_config['jwt'],
api_config.get('hashed_pass')), self._PAGE_SIZE)
return self.playlist_result(entries, album_id, self._html_search_regex(
r'<title>\s*(.+?)(?:\s+on Vimeo)?</title>', webpage, 'title', fatal=False))


class VimeoGroupsIE(VimeoAlbumIE):
Expand Down

0 comments on commit 701eda1

Please sign in to comment.