Skip to content

Commit

Permalink
[youtube] modify regex to get chapters from description (closes #24819)
Browse files Browse the repository at this point in the history
  • Loading branch information
jaimebl committed Apr 19, 2020
1 parent 00eb865 commit 78d7146
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 1 deletion.
35 changes: 35 additions & 0 deletions test/test_youtube_chapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,41 @@
class TestYoutubeChapters(unittest.TestCase):

_TEST_CASES = [
(
# https://www.youtube.com/watch?v=gBRKnvK1JUE
# pattern: 00:00 - 09:24 <title>
'''Here is Nucleus's 1979 album Out Of The Long Dark: https://www.youtube.com/watch?v=GX4Eh1DPb-E<br /><br />And here is their 1971 live album: https://www.youtube.com/watch?v=cpbM75B8qaE<br /><br /><a href="#" onclick="yt.www.watch.player.seekTo(00*60+00);return false;">00:00</a> - <a href="#" onclick="yt.www.watch.player.seekTo(09*60+24);return false;">09:24</a> roots<br /><a href="#" onclick="yt.www.watch.player.seekTo(09*60+24);return false;">09:24</a> - <a href="#" onclick="yt.www.watch.player.seekTo(14*60+19);return false;">14:19</a> images<br /><a href="#" onclick="yt.www.watch.player.seekTo(14*60+19);return false;">14:19</a> - <a href="#" onclick="yt.www.watch.player.seekTo(18*60+20);return false;">18:20</a> caliban<br /><a href="#" onclick="yt.www.watch.player.seekTo(18*60+20);return false;">18:20</a> - <a href="#" onclick="yt.www.watch.player.seekTo(21*60+42);return false;">21:42</a> whapatiti<br /><a href="#" onclick="yt.www.watch.player.seekTo(21*60+42);return false;">21:42</a> - <a href="#" onclick="yt.www.watch.player.seekTo(26*60+18);return false;">26:18</a> capricorn<br /><a href="#" onclick="yt.www.watch.player.seekTo(26*60+18);return false;">26:18</a> - <a href="#" onclick="yt.www.watch.player.seekTo(29*60+42);return false;">29:42</a> odokamona<br /><a href="#" onclick="yt.www.watch.player.seekTo(29*60+42);return false;">29:42</a> - <a href="#" onclick="yt.www.watch.player.seekTo(37*60+26);return false;">37:26</a> southern roots and celebration<br /><br />Bass Guitar – Roger Sutton<br />Design – Keith Davis (3)<br />Drums – Clive Thacker<br />Engineer – Roger Wake<br />Guitar – Jocelyn Pitchen<br />Percussion – Aureo de Souza<br />Piano, Electric Piano – Dave MacRae<br />Producer – Fritz Fryer<br />Tenor Saxophone, Soprano Saxophone, Flute, Flute [Bamboo] – Brian Smith<br />Trumpet – Ian Carr<br />Vocals – Joy Yates<br />Written-By – Brian Smith (tracks: B1 to B3), Dave MacRae (tracks: B4), Ian Carr (tracks: A) ''',
2246,
[{
'start_time': 0,
'end_time': 564,
'title': 'roots',
}, {
'start_time': 564,
'end_time': 859,
'title': 'images',
}, {
'start_time': 859,
'end_time': 1100,
'title': 'caliban',
}, {
'start_time': 1100,
'end_time': 1302,
'title': 'whapatiti',
}, {
'start_time': 1302,
'end_time': 1578,
'title': 'capricorn',
}, {
'start_time': 1578,
'end_time': 1782,
'title': 'odokamona',
}, {
'start_time': 1782,
'end_time': 2246,
'title': 'southern roots and celebration',
}]
),
(
# https://www.youtube.com/watch?v=A22oy8dFjqc
# pattern: 00:00 - <title>
Expand Down
2 changes: 1 addition & 1 deletion youtube_dl/extractor/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -1621,7 +1621,7 @@ def _extract_chapters(description, duration):
if not description:
return None
chapter_lines = re.findall(
r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>(?:[^<]*<a.*?)?[^>]*)(?=$|<br\s*/>)',
description)
if not chapter_lines:
return None
Expand Down

0 comments on commit 78d7146

Please sign in to comment.