Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix n-tv.de extractor, website format has changed #8414

Merged
merged 10 commits into from
Nov 11, 2023
76 changes: 43 additions & 33 deletions yt_dlp/extractor/ntvde.py
Original file line number Diff line number Diff line change
@@ -1,73 +1,83 @@
import re

from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
int_or_none,
js_to_json,
parse_duration,
url_or_none,
)
from ..utils.traversal import traverse_obj


class NTVDeIE(InfoExtractor):
IE_NAME = 'n-tv.de'
_VALID_URL = r'https?://(?:www\.)?n-tv\.de/mediathek/videos/[^/?#]+/[^/?#]+-article(?P<id>.+)\.html'
_VALID_URL = r'https?://(?:www\.)?n-tv\.de/mediathek/(?:videos|magazine)/[^/?#]+/[^/?#]+-article(?P<id>[^/?#]+)\.html'

_TESTS = [{
'url': 'http://www.n-tv.de/mediathek/videos/panorama/Schnee-und-Glaette-fuehren-zu-zahlreichen-Unfaellen-und-Staus-article14438086.html',
'md5': '6ef2514d4b1e8e03ca24b49e2f167153',
'md5': '6bcf2a6638cb83f45d5561659a1cb498',
'info_dict': {
'id': '14438086',
'ext': 'mp4',
'thumbnail': r're:^https?://.*\.jpg$',
'title': 'Schnee und Glätte führen zu zahlreichen Unfällen und Staus',
'alt_title': 'Winterchaos auf deutschen Straßen',
'description': 'Schnee und Glätte sorgen deutschlandweit für einen chaotischen Start in die Woche: Auf den Straßen kommt es zu kilometerlangen Staus und Dutzenden Glätteunfällen. In Düsseldorf und München wirbelt der Schnee zudem den Flugplan durcheinander. Dutzende Flüge landen zu spät, einige fallen ganz aus.',
'duration': 4020,
'duration': 67,
'timestamp': 1422892797,
'upload_date': '20150202',
},
}, {
'url': 'https://www.n-tv.de/mediathek/magazine/auslandsreport/Juedische-Siedler-wollten-Rache-die-wollten-nur-toeten-article24523089.html',
'md5': 'c5c6014c014ccc3359470e1d34472bfd',
'info_dict': {
'id': '24523089',
'ext': 'mp4',
'thumbnail': r're:^https?://.*\.jpg$',
'title': 'Jüdische Siedler "wollten Rache, die wollten nur töten"',
'alt_title': 'Israelische Gewalt fern von Gaza',
'description': 'Vier Tage nach dem Massaker der Hamas greifen jüdische Siedler das Haus einer palästinensischen Familie im Westjordanland an. Die Überlebenden berichten, sie waren unbewaffnet, die Angreifer seien nur auf "Rache und Töten" aus gewesen. Als die Toten beerdigt werden sollen, eröffnen die Siedler erneut das Feuer.',
'duration': 326,
'timestamp': 1699688294,
'upload_date': '20231111',
},
}]

def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)

info = self._parse_json(self._search_regex(
r'(?s)ntv\.pageInfo\.article\s*=\s*(\{.*?\});', webpage, 'info'),
video_id, transform_source=js_to_json)
timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp'))
vdata = self._parse_json(self._search_regex(
r'(?s)\$\(\s*"\#player"\s*\)\s*\.data\(\s*"player",\s*(\{.*?\})\);',
webpage, 'player data'), video_id,
transform_source=lambda s: js_to_json(re.sub(r'advertising:\s*{[^}]+},', '', s)))
duration = parse_duration(vdata.get('duration'))
info = self._search_json(
r'article:', webpage, 'info', video_id, transform_source=js_to_json)

vdata = self._search_json(
r'\$\(\s*"#playerwrapper"\s*\)\s*\.data\(\s*"player",',
webpage, 'player data', video_id,
transform_source=lambda s: js_to_json(re.sub(r'ivw:[^},]+', '', s)))['setup']['source']

formats = []
if vdata.get('video'):
formats.append({
'format_id': 'flash',
'url': 'rtmp://fms.n-tv.de/%s' % vdata['video'],
})
if vdata.get('videoMp4'):
if vdata.get('progressive'):
formats.append({
'format_id': 'mobile',
'url': compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoMp4']),
'tbr': 400, # estimation
'format_id': 'http',
'url': vdata['progressive'],
})
if vdata.get('videoM3u8'):
m3u8_url = compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoM3u8'])
if vdata.get('hls'):
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
quality=1, m3u8_id='hls', fatal=False))
vdata['hls'], video_id, 'mp4', m3u8_id='hls', fatal=False))
if vdata.get('dash'):
formats.extend(self._extract_mpd_formats(vdata['dash'], video_id, fatal=False, mpd_id='dash'))

return {
'id': video_id,
'title': info['headline'],
'description': info.get('intro'),
'alt_title': info.get('kicker'),
'timestamp': timestamp,
'thumbnail': vdata.get('html5VideoPoster'),
'duration': duration,
**traverse_obj(info, {
'title': 'headline',
'description': 'intro',
'alt_title': 'kicker',
'timestamp': ('publishedDateAsUnixTimeStamp', {int_or_none}),
}),
**traverse_obj(vdata, {
'thumbnail': ('poster', {url_or_none}),
'duration': ('length', {int_or_none}),
}),
'formats': formats,
}
Loading