Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix tmz #24687

Closed
wants to merge 3 commits into from
Closed

Fix tmz #24687

Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 58 additions & 15 deletions youtube_dl/extractor/tmz.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,38 @@


class TMZIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/.*(?P<id>[^/?#]{10,10})'
_TESTS = [{
'url': 'http://www.tmz.com/videos/0_okj015ty/',
'md5': '4d22a51ef205b6c06395d8394f72d560',
'info_dict': {
'id': '0_okj015ty',
'ext': 'mp4',
'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!',
'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?',
'timestamp': 1394747163,
'uploader_id': 'batchUser',
'upload_date': '20140313',
}
}, {
'url': 'http://www.tmz.com/videos/0-cegprt2p/',
'only_matching': True,
'info_dict': {
'id': '0_cegprt2p',
'ext': 'mp4',
'title': "No Charges Against Hillary Clinton? Harvey Says It Ain't Over Yet",
'timestamp': 1467831837,
'uploader_id': 'batchUser',
'upload_date': '20160706',
}
}, {
'url': 'https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/',
'info_dict': {
'id': '0_zcsejvcr',
'ext': 'mxf',
'title': "Angry Bagel Shop Guy Says He Doesn't Trust Women",
'timestamp': 1562889485,
'uploader_id': 'batchUser',
'upload_date': '20190711',
}
}]

def _real_extract(self, url):
Expand All @@ -30,27 +46,54 @@ def _real_extract(self, url):

class TMZArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/]+)/?'
_TEST = {
_TESTS = [{
'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert',
'md5': '3316ff838ae5bb7f642537825e1e90d2',
'md5': '5429c85db8bde39a473a56ca8c4c5602',
'info_dict': {
'id': '0_6snoelag',
'ext': 'mov',
'ext': 'mp4',
'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake',
'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."',
'timestamp': 1429467813,
'upload_date': '20150419',
'uploader_id': 'batchUser',
}
}
}, {
'url': 'http://www.tmz.com/2015/09/19/patti-labelle-concert-fan-stripping-kicked-out-nicki-minaj/',
'info_dict': {
'id': '0_jerz7s3l',
'ext': 'mp4',
'title': 'Patti LaBelle -- Goes Nuclear On Stripping Fan',
'timestamp': 1442683746,
'upload_date': '20150919',
'uploader_id': 'batchUser',
}
}, {
'url': 'http://www.tmz.com/2016/01/28/adam-silver-sting-drake-blake-griffin/',
'info_dict': {
'id': '0_ytz87kk7',
'ext': 'mp4',
'title': "NBA's Adam Silver -- Blake Griffin's a Great Guy ... He'll Learn from This",
'timestamp': 1454010989,
'upload_date': '20160128',
'uploader_id': 'batchUser',
}
}, {
'url': 'http://www.tmz.com/2016/10/27/donald-trump-star-vandal-arrested-james-otis/',
'info_dict': {
'id': '0_isigfatu',
'ext': 'mp4',
'title': "Trump Star Vandal -- I'm Not Afraid of Donald or the Cops!",
'timestamp': 1477500095,
'upload_date': '20161026',
'uploader_id': 'batchUser',
}
}]

def _real_extract(self, url):
video_id = self._match_id(url)

webpage = self._download_webpage(url, video_id)
embedded_video_info = self._parse_json(self._html_search_regex(
r'tmzVideoEmbed\(({.+?})\);', webpage, 'embedded video info'),
video_id)

return self.url_result(
'http://www.tmz.com/videos/%s/' % embedded_video_info['id'])
params = self._html_search_regex(r'TMZ.actions.clickLink\(([\s\S]+?)\)',
webpage, 'embedded video info').split(',')
new_url = params[0].strip("'\"")
if new_url != url:
return self.url_result(new_url)