Skip to content

Commit

Permalink
Fix urlhandle_detect_ext() non-ASCII error in Py2, with test
Browse files Browse the repository at this point in the history
  • Loading branch information
dirkf committed Aug 29, 2021
1 parent 1972157 commit 625666f
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 2 deletions.
25 changes: 25 additions & 0 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@
cli_valueless_option,
cli_bool_option,
parse_codecs,
urlhandle_detect_ext,
)
from youtube_dl.compat import (
compat_chr,
Expand Down Expand Up @@ -1475,6 +1476,30 @@ def test_clean_podcast_url(self):
self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')

def test_urlhandle_detect_ext(self):

class UrlHandle(object):
_info = {}

def __init__(self, info):
self._info = info

@property
def headers(self):
return self._info

# header with non-ASCII character and contradictory Content-Type
urlh = UrlHandle({
'Content-Disposition': b'attachment; filename="Epis\xf3dio contains non-ASCI ISO 8859-1 character.mp3"',
'Content-Type': b'audio/aac',
})
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
# header with no Content-Disposition
urlh = UrlHandle({
'Content-Type': b'audio/mp3',
})
self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')


if __name__ == '__main__':
unittest.main()
8 changes: 6 additions & 2 deletions youtube_dl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4287,16 +4287,20 @@ def parse_codecs(codecs_str):

def urlhandle_detect_ext(url_handle):
getheader = url_handle.headers.get

def encode_compat_str_or_none(x, encoding='iso-8859-1', errors='ignore'):
return encode_compat_str(x, encoding=encoding, errors=errors) if x else None

cd = getheader('Content-Disposition')
cd = encode_compat_str_or_none(getheader('Content-Disposition'))
if cd:
m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
if m:
e = determine_ext(m.group('filename'), default_ext=None)
if e:
return e

return mimetype2ext(getheader('Content-Type'))
ct = encode_compat_str_or_none(getheader('Content-Type'))
return mimetype2ext(ct)


def encode_data_uri(data, mime_type):
Expand Down

0 comments on commit 625666f

Please sign in to comment.