diff --git a/cms/djangoapps/contentstore/management/commands/tests/test_migrate_transcripts.py b/cms/djangoapps/contentstore/management/commands/tests/test_migrate_transcripts.py index 446c5703b625..c458b7791160 100644 --- a/cms/djangoapps/contentstore/management/commands/tests/test_migrate_transcripts.py +++ b/cms/djangoapps/contentstore/management/commands/tests/test_migrate_transcripts.py @@ -261,10 +261,7 @@ def test_migrate_transcripts_exception_logging(self): u'[Transcript migration] process for ge transcript started'), (LOGGER_NAME, 'ERROR', - '[Transcript migration] Exception: u"SON([' - '(\'category\', \'asset\'), (\'name\', u\'not_found.srt\'),' - ' (\'course\', u\'{}\'), (\'tag\', \'c4x\'), (\'org\', u\'{}\'),' - ' (\'revision\', None)])"'.format(self.course_2.id.course, self.course_2.id.org)), + "[Transcript migration] Exception: u'No transcript for `ge` language'"), (LOGGER_NAME, 'INFO', u'[Transcript migration] process for course {} ended. Processed 1 transcripts'.format( @@ -272,11 +269,8 @@ def test_migrate_transcripts_exception_logging(self): )), (LOGGER_NAME, 'INFO', - "[Transcript migration] Result: Failed: language ge of video test_edx_video_id_2 with exception SON([" - "('category', 'asset'), ('name', u'not_found.srt'), ('course', u'{}')," - " ('tag', 'c4x'), ('org', u'{}'), ('revision', None)])".format( - self.course_2.id.course, self.course_2.id.org) - ) + "[Transcript migration] Result: Failed: language ge of video test_edx_video_id_2 with exception " + "No transcript for `ge` language") ) with LogCapture(LOGGER_NAME, level=logging.INFO) as logger: diff --git a/cms/djangoapps/contentstore/tests/test_transcripts_utils.py b/cms/djangoapps/contentstore/tests/test_transcripts_utils.py index 9144dce6ede3..bc4c1b6777e8 100644 --- a/cms/djangoapps/contentstore/tests/test_transcripts_utils.py +++ b/cms/djangoapps/contentstore/tests/test_transcripts_utils.py @@ -744,7 +744,7 @@ def setUp(self): edx_video_id=u'1234-5678-90' ) - def create_transcript(self, subs_id, language=u'en', filename='video.srt'): + def create_transcript(self, subs_id, language=u'en', filename='video.srt', youtube_id_1_0='', html5_sources=None): """ create transcript. """ @@ -752,21 +752,26 @@ def create_transcript(self, subs_id, language=u'en', filename='video.srt'): if language != u'en': transcripts = {language: filename} + html5_sources = html5_sources or [] self.video = ItemFactory.create( category='video', parent_location=self.vertical.location, sub=subs_id, + youtube_id_1_0=youtube_id_1_0, transcripts=transcripts, - edx_video_id=u'1234-5678-90' + edx_video_id=u'1234-5678-90', + html5_sources=html5_sources ) - if subs_id: - transcripts_utils.save_subs_to_store( - self.subs_sjson, - subs_id, - self.video, - language=language, - ) + possible_subs = [subs_id, youtube_id_1_0] + transcripts_utils.get_html5_ids(html5_sources) + for possible_sub in possible_subs: + if possible_sub: + transcripts_utils.save_subs_to_store( + self.subs_sjson, + possible_sub, + self.video, + language=language, + ) def create_srt_file(self, content): """ @@ -812,31 +817,69 @@ def test_get_transcript_not_found(self, lang): ) @ddt.data( + # video.sub transcript + { + 'language': u'en', + 'subs_id': 'video_101', + 'youtube_id_1_0': '', + 'html5_sources': [], + 'expected_filename': 'en_video_101.srt', + }, + # if video.sub is present, rest will be skipped. { 'language': u'en', 'subs_id': 'video_101', - 'filename': 'en_video_101.srt', + 'youtube_id_1_0': 'test_yt_id', + 'html5_sources': ['www.abc.com/foo.mp4'], + 'expected_filename': 'en_video_101.srt', + }, + # video.youtube_id_1_0 transcript + { + 'language': u'en', + 'subs_id': '', + 'youtube_id_1_0': 'test_yt_id', + 'html5_sources': [], + 'expected_filename': 'en_test_yt_id.srt', + }, + # video.html5_sources transcript + { + 'language': u'en', + 'subs_id': '', + 'youtube_id_1_0': '', + 'html5_sources': ['www.abc.com/foo.mp4'], + 'expected_filename': 'en_foo.srt', }, + # non-english transcript { 'language': u'ur', 'subs_id': '', - 'filename': 'ur_video_101.srt', + 'youtube_id_1_0': '', + 'html5_sources': [], + 'expected_filename': 'ur_video_101.srt', }, ) @ddt.unpack - def test_get_transcript_from_content_store(self, language, subs_id, filename): + def test_get_transcript_from_contentstore( + self, + language, + subs_id, + youtube_id_1_0, + html5_sources, + expected_filename + ): """ Verify that `get_transcript` function returns correct data when transcript is in content store. """ - self.upload_file(self.create_srt_file(self.subs_srt), self.video.location, filename) - self.create_transcript(subs_id, language, filename) - content, filename, mimetype = transcripts_utils.get_transcript( + base_filename = 'video_101.srt' + self.upload_file(self.create_srt_file(self.subs_srt), self.video.location, base_filename) + self.create_transcript(subs_id, language, base_filename, youtube_id_1_0, html5_sources) + content, file_name, mimetype = transcripts_utils.get_transcript( self.video, language ) self.assertEqual(content, self.subs[language]) - self.assertEqual(filename, filename) + self.assertEqual(file_name, expected_filename) self.assertEqual(mimetype, self.srt_mime_type) def test_get_transcript_from_content_store_for_ur(self): diff --git a/common/lib/xmodule/xmodule/video_module/transcripts_utils.py b/common/lib/xmodule/xmodule/video_module/transcripts_utils.py index c28b5674c477..58f7b54a270d 100644 --- a/common/lib/xmodule/xmodule/video_module/transcripts_utils.py +++ b/common/lib/xmodule/xmodule/video_module/transcripts_utils.py @@ -859,7 +859,7 @@ def get_transcript_from_val(edx_video_id, lang=None, output_format=Transcript.SR """ Get video transcript from edx-val. Arguments: - edx_video_id (unicode): course identifier + edx_video_id (unicode): video identifier lang (unicode): transcript language output_format (unicode): transcript output format Returns: @@ -923,6 +923,7 @@ def get_transcript_from_contentstore(video, language, output_format, transcripts Returns: tuple containing content, filename, mimetype """ + input_format, base_name, transcript_content = None, None, None if output_format not in (Transcript.SRT, Transcript.SJSON, Transcript.TXT): raise NotFoundError('Invalid transcript format `{output_format}`'.format(output_format=output_format)) @@ -930,24 +931,24 @@ def get_transcript_from_contentstore(video, language, output_format, transcripts transcripts = dict(other_languages) # this is sent in case of a translation dispatch and we need to use it as our subs_id. - if youtube_id: - transcripts['en'] = youtube_id - elif sub: - transcripts['en'] = sub - elif video.youtube_id_1_0: - transcripts['en'] = video.youtube_id_1_0 - elif language == u'en': - raise NotFoundError('No transcript for `en` language') - - try: - input_format, base_name, transcript_content = get_transcript_for_video( - video.location, - subs_id=transcripts.get('en'), - file_name=transcripts[language], - language=language - ) - except KeyError: - raise NotFoundError + possible_sub_ids = [youtube_id, sub, video.youtube_id_1_0] + get_html5_ids(video.html5_sources) + for sub_id in possible_sub_ids: + try: + transcripts[u'en'] = sub_id + input_format, base_name, transcript_content = get_transcript_for_video( + video.location, + subs_id=sub_id, + file_name=transcripts[language], + language=language + ) + break + except (KeyError, NotFoundError): + continue + + if transcript_content is None: + raise NotFoundError('No transcript for `{lang}` language'.format( + lang=language + )) # add language prefix to transcript file only if language is not None language_prefix = '{}_'.format(language) if language else ''