Update the get_transcript util with html5_sources #17976

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

mushtaqak merged 1 commit into transcripts-phase-2 from mushtaq/html5_sources_transcript

Apr 25, 2018

cms/djangoapps/contentstore/management/commands/tests/test_migrate_transcripts.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -261,22 +261,16 @@ def test_migrate_transcripts_exception_logging(self): @@
                  u'[Transcript migration] process for ge transcript started'),
                 (LOGGER_NAME,
                  'ERROR',
-                 '[Transcript migration] Exception: u"SON(['
-                 '(\'category\', \'asset\'), (\'name\', u\'not_found.srt\'),'
-                 ' (\'course\', u\'{}\'), (\'tag\', \'c4x\'), (\'org\', u\'{}\'),'
-                 ' (\'revision\', None)])"'.format(self.course_2.id.course, self.course_2.id.org)),
+                 "[Transcript migration] Exception: u'No transcript for `ge` language'"),
                 (LOGGER_NAME,
                  'INFO',
                  u'[Transcript migration] process for course {} ended. Processed 1 transcripts'.format(
                      unicode(self.course_2.id)
                  )),
                 (LOGGER_NAME,
                  'INFO',
-                 "[Transcript migration] Result: Failed: language ge of video test_edx_video_id_2 with exception SON(["
-                 "('category', 'asset'), ('name', u'not_found.srt'), ('course', u'{}'),"
-                 " ('tag', 'c4x'), ('org', u'{}'), ('revision', None)])".format(
-                     self.course_2.id.course, self.course_2.id.org)
-                 )
+                 "[Transcript migration] Result: Failed: language ge of video test_edx_video_id_2 with exception "
+                 "No transcript for `ge` language")
             )
             with LogCapture(LOGGER_NAME, level=logging.INFO) as logger:
@@ Expand Down @@

cms/djangoapps/contentstore/tests/test_transcripts_utils.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -744,29 +744,34 @@ def setUp(self):
  
                edx_video_id=u'1234-5678-90'

            )

        def create_transcript(self, subs_id, language=u'en', filename='video.srt'):

        def create_transcript(self, subs_id, language=u'en', filename='video.srt', youtube_id_1_0='', html5_sources=None):

            """

            create transcript.

            """

            transcripts = {}

            if language != u'en':

                transcripts = {language: filename}

            html5_sources = html5_sources or []

            self.video = ItemFactory.create(

                category='video',

                parent_location=self.vertical.location,

                sub=subs_id,

                youtube_id_1_0=youtube_id_1_0,

                transcripts=transcripts,

                edx_video_id=u'1234-5678-90'

                edx_video_id=u'1234-5678-90',

                html5_sources=html5_sources

            )

            if subs_id:

                transcripts_utils.save_subs_to_store(

                    self.subs_sjson,

                    subs_id,

                    self.video,

                    language=language,

                )

            possible_subs = [subs_id, youtube_id_1_0] + transcripts_utils.get_html5_ids(html5_sources)

            for possible_sub in possible_subs:

                if possible_sub:

                    transcripts_utils.save_subs_to_store(

                        self.subs_sjson,

                        possible_sub,

                        self.video,

                        language=language,

                    )

        def create_srt_file(self, content):

            """

    @@ -812,31 +817,69 @@ def test_get_transcript_not_found(self, lang):
  
                )

        @ddt.data(

            # video.sub transcript

            {

                'language': u'en',

                'subs_id': 'video_101',

                'youtube_id_1_0': '',

                'html5_sources': [],

                'expected_filename': 'en_video_101.srt',

            },

            # if video.sub is present, rest will be skipped.

            {

                'language': u'en',

                'subs_id': 'video_101',

                'filename': 'en_video_101.srt',

                'youtube_id_1_0': 'test_yt_id',

                'html5_sources': ['www.abc.com/foo.mp4'],

                'expected_filename': 'en_video_101.srt',

            },

            # video.youtube_id_1_0 transcript

            {

                'language': u'en',

                'subs_id': '',

                'youtube_id_1_0': 'test_yt_id',

                'html5_sources': [],

                'expected_filename': 'en_test_yt_id.srt',

            },

            # video.html5_sources transcript

            {

                'language': u'en',

                'subs_id': '',

                'youtube_id_1_0': '',

                'html5_sources': ['www.abc.com/foo.mp4'],

                'expected_filename': 'en_foo.srt',

            },

            # non-english transcript

            {

                'language': u'ur',

                'subs_id': '',

                'filename': 'ur_video_101.srt',

                'youtube_id_1_0': '',

                'html5_sources': [],

                'expected_filename': 'ur_video_101.srt',

            },

        )

        @ddt.unpack

        def test_get_transcript_from_content_store(self, language, subs_id, filename):

        def test_get_transcript_from_contentstore(

            self,

            language,

            subs_id,

            youtube_id_1_0,

            html5_sources,

            expected_filename

        ):

            """

            Verify that `get_transcript` function returns correct data when transcript is in content store.

            """

            self.upload_file(self.create_srt_file(self.subs_srt), self.video.location, filename)

            self.create_transcript(subs_id, language, filename)

            content, filename, mimetype = transcripts_utils.get_transcript(

            base_filename = 'video_101.srt'

            self.upload_file(self.create_srt_file(self.subs_srt), self.video.location, base_filename)

            self.create_transcript(subs_id, language, base_filename, youtube_id_1_0, html5_sources)

            content, file_name, mimetype = transcripts_utils.get_transcript(

                self.video,

                language

            )

            self.assertEqual(content, self.subs[language])

            self.assertEqual(filename, filename)

            self.assertEqual(file_name, expected_filename)

            self.assertEqual(mimetype, self.srt_mime_type)

        def test_get_transcript_from_content_store_for_ur(self):

common/lib/xmodule/xmodule/video_module/transcripts_utils.py

-Original file line number
+Diff line change
@@ Expand Up @@
         """
         Get video transcript from edx-val.
         Arguments:
-            edx_video_id (unicode): course identifier
+            edx_video_id (unicode): video identifier
             lang (unicode): transcript language
             output_format (unicode): transcript output format
         Returns:
@@ Expand Down Expand Up @@
         Returns:
             tuple containing content, filename, mimetype
         """
+        input_format, base_name, transcript_content = None, None, None
         if output_format not in (Transcript.SRT, Transcript.SJSON, Transcript.TXT):
             raise NotFoundError('Invalid transcript format `{output_format}`'.format(output_format=output_format))
         sub, other_languages = transcripts_info['sub'], transcripts_info['transcripts']
         transcripts = dict(other_languages)
         # this is sent in case of a translation dispatch and we need to use it as our subs_id.
-        if youtube_id:
-            transcripts['en'] = youtube_id
-        elif sub:
-            transcripts['en'] = sub
-        elif video.youtube_id_1_0:
-            transcripts['en'] = video.youtube_id_1_0
-        elif language == u'en':
-            raise NotFoundError('No transcript for `en` language')
-        try:
-            input_format, base_name, transcript_content = get_transcript_for_video(
-                video.location,
-                subs_id=transcripts.get('en'),
-                file_name=transcripts[language],
-                language=language
-            )
-        except KeyError:
-            raise NotFoundError
+        possible_sub_ids = [youtube_id, sub, video.youtube_id_1_0] + get_html5_ids(video.html5_sources)
+        for sub_id in possible_sub_ids:
+            try:
+                transcripts[u'en'] = sub_id
+                input_format, base_name, transcript_content = get_transcript_for_video(
+                    video.location,
+                    subs_id=sub_id,
+                    file_name=transcripts[language],
+                    language=language
+                )
+                break
+            except (KeyError, NotFoundError):
+                continue
+        if transcript_content is None:
+            raise NotFoundError('No transcript for `{lang}` language'.format(
+                lang=language
+            ))
         # add language prefix to transcript file only if language is not None
         language_prefix = '{}_'.format(language) if language else ''
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Update the get_transcript util with html5_sources #17976

Uh oh!

Diff view

Diff view

There are no files selected for viewing

muhammad-ammar Apr 25, 2018

Uh oh!

mushtaqak Apr 25, 2018

Uh oh!

Update the get_transcript util with html5_sources #17976

Uh oh!

Update the get_transcript util with html5_sources #17976

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

muhammad-ammar Apr 25, 2018

Choose a reason for hiding this comment

Uh oh!

mushtaqak Apr 25, 2018

Choose a reason for hiding this comment

Uh oh!