From 9c7f8ad7bbc5c023189690efcf37bb99eee577a7 Mon Sep 17 00:00:00 2001 From: dirkf Date: Wed, 3 Mar 2021 13:33:33 +0000 Subject: [PATCH 1/7] Get HD formats for Reel --- youtube_dl/extractor/bbc.py | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 92e6f1bea37..db9f655acc1 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -995,6 +995,8 @@ def _real_extract(self, url): } # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness) + programme_id = self._search_regex( + r'/reel/video/(?P%s)/' % self._ID_REGEX, url, 'Reel pid', default=None) initial_data = self._parse_json(self._html_search_regex( r']+id=(["\'])initial-data\1[^>]+data-json=(["\'])(?P(?:(?!\2).)+)', webpage, 'initial data', default='{}', group='json'), playlist_id, fatal=False) @@ -1005,25 +1007,39 @@ def _real_extract(self, url): clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {} version_id = clip_data.get('versionID') if version_id: - title = smp_data['title'] - formats, subtitles = self._download_media_selector(version_id) - self._sort_formats(formats) image_url = smp_data.get('holdingImageURL') display_date = init_data.get('displayDate') topic_title = init_data.get('topicTitle') - - return { - 'id': version_id, - 'title': title, - 'formats': formats, + ret = { + 'title': smp_data.get('title', playlist_id), 'alt_title': init_data.get('shortTitle'), 'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None, 'description': smp_data.get('summary') or init_data.get('shortSummary'), 'upload_date': display_date.replace('-', '') if display_date else None, - 'subtitles': subtitles, 'duration': int_or_none(clip_data.get('duration')), 'categories': [topic_title] if topic_title else None, } + + if not programme_id: + # get the formats from the reel page + formats, subtitles = self._download_media_selector(version_id) + self._sort_formats(formats) + ret.update({ + 'id': version_id, + 'formats': formats, + 'subtitles': subtitles, + }) + else: + # get the formats (including HD) from the programmes page + # avoid https: to help proxying + ret.update({ + '_type': 'url_transparent', + 'url': 'http://bbc.co.uk/programmes/%s' % programme_id + }) + return ret + elif programme_id: + # the Reel page was not as expected: try the programmes page + return self._url_result(programme_id) # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975) # There are several setPayload calls may be present but the video From fcb4cf1783bf43cd322bbd8ee27c616273b6306e Mon Sep 17 00:00:00 2001 From: dirkf Date: Wed, 3 Mar 2021 13:56:17 +0000 Subject: [PATCH 2/7] Get HD formats for Reel --- youtube_dl/extractor/bbc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index db9f655acc1..3556227cfa9 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -1012,6 +1012,7 @@ def _real_extract(self, url): topic_title = init_data.get('topicTitle') ret = { 'title': smp_data.get('title', playlist_id), + 'id': version_id, 'alt_title': init_data.get('shortTitle'), 'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None, 'description': smp_data.get('summary') or init_data.get('shortSummary'), @@ -1025,7 +1026,6 @@ def _real_extract(self, url): formats, subtitles = self._download_media_selector(version_id) self._sort_formats(formats) ret.update({ - 'id': version_id, 'formats': formats, 'subtitles': subtitles, }) From 24a1061f539076dd98ae21ac0b4b43fec248c95c Mon Sep 17 00:00:00 2001 From: dirkf Date: Wed, 3 Mar 2021 14:11:36 +0000 Subject: [PATCH 3/7] Get HD formats for Reel --- youtube_dl/extractor/bbc.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 3556227cfa9..8046ad10b6b 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -995,7 +995,7 @@ def _real_extract(self, url): } # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness) - programme_id = self._search_regex( + programme_id = self._search_regex( r'/reel/video/(?P%s)/' % self._ID_REGEX, url, 'Reel pid', default=None) initial_data = self._parse_json(self._html_search_regex( r']+id=(["\'])initial-data\1[^>]+data-json=(["\'])(?P(?:(?!\2).)+)', @@ -1020,22 +1020,22 @@ def _real_extract(self, url): 'duration': int_or_none(clip_data.get('duration')), 'categories': [topic_title] if topic_title else None, } - + if not programme_id: # get the formats from the reel page formats, subtitles = self._download_media_selector(version_id) self._sort_formats(formats) ret.update({ - 'formats': formats, - 'subtitles': subtitles, + 'formats': formats, + 'subtitles': subtitles, }) else: # get the formats (including HD) from the programmes page # avoid https: to help proxying ret.update({ - '_type': 'url_transparent', - 'url': 'http://bbc.co.uk/programmes/%s' % programme_id - }) + '_type': 'url_transparent', + 'url': 'http://bbc.co.uk/programmes/%s' % programme_id + }) return ret elif programme_id: # the Reel page was not as expected: try the programmes page From d6ed9424cdb28e60b39790f7afe3e2172fb7f3d1 Mon Sep 17 00:00:00 2001 From: dirkf Date: Wed, 3 Mar 2021 14:14:14 +0000 Subject: [PATCH 4/7] Get HD formats for Reel --- youtube_dl/extractor/bbc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 8046ad10b6b..d9d2fb77c95 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -1028,14 +1028,14 @@ def _real_extract(self, url): ret.update({ 'formats': formats, 'subtitles': subtitles, - }) + }) else: # get the formats (including HD) from the programmes page # avoid https: to help proxying ret.update({ '_type': 'url_transparent', 'url': 'http://bbc.co.uk/programmes/%s' % programme_id - }) + }) return ret elif programme_id: # the Reel page was not as expected: try the programmes page From 7a1e11c0456387f09e0218960423e7a5945059c1 Mon Sep 17 00:00:00 2001 From: dirkf Date: Wed, 3 Mar 2021 18:06:35 +0000 Subject: [PATCH 5/7] Extend _MEDIA_SETS to get HD Reel videos --- youtube_dl/extractor/bbc.py | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index d9d2fb77c95..df035ed1e22 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -995,7 +995,7 @@ def _real_extract(self, url): } # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness) - programme_id = self._search_regex( + programme_id = self._search_regex( r'/reel/video/(?P%s)/' % self._ID_REGEX, url, 'Reel pid', default=None) initial_data = self._parse_json(self._html_search_regex( r']+id=(["\'])initial-data\1[^>]+data-json=(["\'])(?P(?:(?!\2).)+)', @@ -1007,40 +1007,27 @@ def _real_extract(self, url): clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {} version_id = clip_data.get('versionID') if version_id: + title = smp_data['title'] + # also try for higher resolutions + self._MEDIA_SETS.insert(0,'iptv-all') + formats, subtitles = self._download_media_selector(version_id) + self._sort_formats(formats) image_url = smp_data.get('holdingImageURL') display_date = init_data.get('displayDate') topic_title = init_data.get('topicTitle') - ret = { - 'title': smp_data.get('title', playlist_id), + return { 'id': version_id, + 'title': title + 'formats': formats, 'alt_title': init_data.get('shortTitle'), 'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None, 'description': smp_data.get('summary') or init_data.get('shortSummary'), 'upload_date': display_date.replace('-', '') if display_date else None, + 'subtitles': subtitles, 'duration': int_or_none(clip_data.get('duration')), 'categories': [topic_title] if topic_title else None, } - if not programme_id: - # get the formats from the reel page - formats, subtitles = self._download_media_selector(version_id) - self._sort_formats(formats) - ret.update({ - 'formats': formats, - 'subtitles': subtitles, - }) - else: - # get the formats (including HD) from the programmes page - # avoid https: to help proxying - ret.update({ - '_type': 'url_transparent', - 'url': 'http://bbc.co.uk/programmes/%s' % programme_id - }) - return ret - elif programme_id: - # the Reel page was not as expected: try the programmes page - return self._url_result(programme_id) - # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975) # There are several setPayload calls may be present but the video # seems to be always related to the first one From 49d83ee5022be269dbacc5e1300d331350f1a125 Mon Sep 17 00:00:00 2001 From: dirkf Date: Wed, 3 Mar 2021 18:10:22 +0000 Subject: [PATCH 6/7] Extend _MEDIA_SETS to get HD Reel videos --- youtube_dl/extractor/bbc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index df035ed1e22..9ff5ba713ae 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -1017,7 +1017,7 @@ def _real_extract(self, url): topic_title = init_data.get('topicTitle') return { 'id': version_id, - 'title': title + 'title': title, 'formats': formats, 'alt_title': init_data.get('shortTitle'), 'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None, From ff4e7e280b415e87963e5459daef4dbe317d5c4b Mon Sep 17 00:00:00 2001 From: dirkf Date: Wed, 3 Mar 2021 18:21:44 +0000 Subject: [PATCH 7/7] Extend _MEDIA_SETS to get HD Reel videos --- youtube_dl/extractor/bbc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 9ff5ba713ae..ba9098190fb 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -995,7 +995,7 @@ def _real_extract(self, url): } # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness) - programme_id = self._search_regex( + programme_id = self._search_regex( r'/reel/video/(?P%s)/' % self._ID_REGEX, url, 'Reel pid', default=None) initial_data = self._parse_json(self._html_search_regex( r']+id=(["\'])initial-data\1[^>]+data-json=(["\'])(?P(?:(?!\2).)+)', @@ -1009,7 +1009,7 @@ def _real_extract(self, url): if version_id: title = smp_data['title'] # also try for higher resolutions - self._MEDIA_SETS.insert(0,'iptv-all') + self._MEDIA_SETS.insert(0, 'iptv-all') formats, subtitles = self._download_media_selector(version_id) self._sort_formats(formats) image_url = smp_data.get('holdingImageURL')