From 80b1484b82ffb5db003db7ab3bdefcdfa4740004 Mon Sep 17 00:00:00 2001 From: Qubad786 Date: Mon, 2 Apr 2018 19:05:23 +0500 Subject: [PATCH 1/4] Use singular transcript util to retrieve transcripts from contentstore --- .../xmodule/video_module/transcripts_utils.py | 4 +- .../xmodule/video_module/video_handlers.py | 45 +++++++++++-------- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/common/lib/xmodule/xmodule/video_module/transcripts_utils.py b/common/lib/xmodule/xmodule/video_module/transcripts_utils.py index 99456f21f22f..8d2659293506 100644 --- a/common/lib/xmodule/xmodule/video_module/transcripts_utils.py +++ b/common/lib/xmodule/xmodule/video_module/transcripts_utils.py @@ -637,7 +637,7 @@ def convert(content, input_format, output_format): # With error handling (set to 'ERROR_RAISE'), we will be getting # the exception if something went wrong in parsing the transcript. srt_subs = SubRipFile.from_string( - content.decode('utf8'), + content.decode('utf-8-sig'), error_handling=SubRipFile.ERROR_RAISE ) except Error as ex: # Base exception from pysrt @@ -944,7 +944,7 @@ def get_transcript_from_contentstore(video, language, output_format, transcripts transcripts['en'] = sub elif video.youtube_id_1_0: transcripts['en'] = video.youtube_id_1_0 - elif language == u'en': + elif language == u'en' and not transcripts.get(language, False): raise NotFoundError('No transcript for `en` language') try: diff --git a/common/lib/xmodule/xmodule/video_module/video_handlers.py b/common/lib/xmodule/xmodule/video_module/video_handlers.py index 5b5c57dbf358..1041aaac934d 100644 --- a/common/lib/xmodule/xmodule/video_module/video_handlers.py +++ b/common/lib/xmodule/xmodule/video_module/video_handlers.py @@ -25,6 +25,7 @@ get_or_create_sjson, generate_sjson_for_all_speeds, get_video_transcript_content, + get_transcript_from_contentstore, save_to_store, subs_filename, Transcript, @@ -288,11 +289,14 @@ def transcript(self, request, dispatch): self.transcript_language = language try: - transcript = self.translation(request.GET.get('videoId', None), transcripts) - except (TypeError, TranscriptException, NotFoundError) as ex: - # Catching `TranscriptException` because its also getting raised at places - # when transcript is not found in contentstore. - log.debug(six.text_type(ex)) + transcript_content, __, mime_type = get_transcript_from_contentstore( + video=self, + language=language, + output_format=Transcript.SJSON, + transcripts_info=transcripts, + youtube_id=request.GET.get('videoId', None) + ) + except (TranscriptsGenerationException, UnicodeDecodeError, NotFoundError): # Try to return static URL redirection as last resort # if no translation is required response = self.get_static_transcript(request, transcripts) @@ -314,25 +318,28 @@ def transcript(self, request, dispatch): log.info(six.text_type(ex)) response = Response(status=404) else: - response = Response(transcript, headerlist=[('Content-Language', language)]) - response.content_type = Transcript.mime_types['sjson'] + response = Response(transcript_content, headerlist=[('Content-Language', language)]) + response.content_type = mime_type elif dispatch == 'download': + lang = request.GET.get('lang', None) + # Make sure the language is set. + if not lang: + lang = self.get_default_transcript_language(transcripts) + try: - transcript_content, transcript_filename, transcript_mime_type = self.get_transcript( - transcripts, transcript_format=self.transcript_download_format, lang=lang + transcript_content, filename, mime_type = get_transcript_from_contentstore( + video=self, + language=lang, + output_format=Transcript.SJSON, + transcripts_info=transcripts, + youtube_id=request.GET.get('videoId', None) ) - except (KeyError, UnicodeDecodeError): - return Response(status=404) - except (ValueError, NotFoundError): + except (TranscriptsGenerationException, UnicodeDecodeError, NotFoundError): response = Response(status=404) # Check for transcripts in edx-val as a last resort if corresponding feature is enabled. if feature_enabled: - # Make sure the language is set. - if not lang: - lang = self.get_default_transcript_language(transcripts) - transcript = get_video_transcript_content(edx_video_id=self.edx_video_id, language_code=lang) if transcript: transcript_conversion_props = dict(transcript, output_format=self.transcript_download_format) @@ -354,12 +361,12 @@ def transcript(self, request, dispatch): response = Response( transcript_content, headerlist=[ - ('Content-Disposition', 'attachment; filename="{}"'.format(transcript_filename.encode('utf8'))), - ('Content-Language', self.transcript_language), + ('Content-Disposition', 'attachment; filename="{}"'.format(filename.encode('utf8'))), + ('Content-Language', lang), ], charset='utf8' ) - response.content_type = transcript_mime_type + response.content_type = mime_type elif dispatch.startswith('available_translations'): From d2b65b89fef2aa1d514cdc88eeaca05de83f4760 Mon Sep 17 00:00:00 2001 From: Qubad786 Date: Tue, 3 Apr 2018 11:18:49 +0500 Subject: [PATCH 2/4] fix download output format --- common/lib/xmodule/xmodule/video_module/video_handlers.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/common/lib/xmodule/xmodule/video_module/video_handlers.py b/common/lib/xmodule/xmodule/video_module/video_handlers.py index 1041aaac934d..99562bb2ebf2 100644 --- a/common/lib/xmodule/xmodule/video_module/video_handlers.py +++ b/common/lib/xmodule/xmodule/video_module/video_handlers.py @@ -332,9 +332,8 @@ def transcript(self, request, dispatch): transcript_content, filename, mime_type = get_transcript_from_contentstore( video=self, language=lang, - output_format=Transcript.SJSON, - transcripts_info=transcripts, - youtube_id=request.GET.get('videoId', None) + output_format=Transcript.SRT, + transcripts_info=transcripts ) except (TranscriptsGenerationException, UnicodeDecodeError, NotFoundError): response = Response(status=404) From 6bbfd9713a1526d8ffce0c08e865dd2c6e4c70e3 Mon Sep 17 00:00:00 2001 From: Qubad786 Date: Tue, 3 Apr 2018 11:46:31 +0500 Subject: [PATCH 3/4] refactor tests --- .../courseware/tests/test_video_handlers.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/lms/djangoapps/courseware/tests/test_video_handlers.py b/lms/djangoapps/courseware/tests/test_video_handlers.py index 4f6b7a99db07..3115a37e71b6 100644 --- a/lms/djangoapps/courseware/tests/test_video_handlers.py +++ b/lms/djangoapps/courseware/tests/test_video_handlers.py @@ -435,7 +435,10 @@ def test_download_transcript_not_exist(self): response = self.item.transcript(request=request, dispatch='download') self.assertEqual(response.status, '404 Not Found') - @patch('xmodule.video_module.VideoModule.get_transcript', return_value=('Subs!', 'test_filename.srt', 'application/x-subrip; charset=utf-8')) + @patch( + 'xmodule.video_module.transcripts_utils.get_transcript_from_contentstore', + return_value=('Subs!', 'test_filename.srt', 'application/x-subrip; charset=utf-8') + ) def test_download_srt_exist(self, __): request = Request.blank('/download') response = self.item.transcript(request=request, dispatch='download') @@ -443,7 +446,10 @@ def test_download_srt_exist(self, __): self.assertEqual(response.headers['Content-Type'], 'application/x-subrip; charset=utf-8') self.assertEqual(response.headers['Content-Language'], 'en') - @patch('xmodule.video_module.VideoModule.get_transcript', return_value=('Subs!', 'txt', 'text/plain; charset=utf-8')) + @patch( + 'xmodule.video_module.transcripts_utils.get_transcript_from_contentstore', + return_value=('Subs!', 'txt', 'text/plain; charset=utf-8') + ) def test_download_txt_exist(self, __): self.item.transcript_format = 'txt' request = Request.blank('/download') @@ -460,7 +466,10 @@ def test_download_en_no_sub(self): with self.assertRaises(NotFoundError): self.item.get_transcript(transcripts) - @patch('xmodule.video_module.VideoModule.get_transcript', return_value=('Subs!', u"塞.srt", 'application/x-subrip; charset=utf-8')) + @patch( + 'xmodule.video_module.transcripts_utils.get_transcript_from_contentstore', + return_value=('Subs!', u"塞.srt", 'application/x-subrip; charset=utf-8') + ) def test_download_non_en_non_ascii_filename(self, __): request = Request.blank('/download') response = self.item.transcript(request=request, dispatch='download') From a887cd5dceb88c08aea464781c1456bb72c6595e Mon Sep 17 00:00:00 2001 From: Qubad786 Date: Wed, 4 Apr 2018 14:24:58 +0500 Subject: [PATCH 4/4] allow transcript download in txt format --- common/lib/xmodule/xmodule/video_module/video_handlers.py | 3 ++- lms/djangoapps/courseware/tests/test_video_handlers.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/common/lib/xmodule/xmodule/video_module/video_handlers.py b/common/lib/xmodule/xmodule/video_module/video_handlers.py index 99562bb2ebf2..c0aea72e3413 100644 --- a/common/lib/xmodule/xmodule/video_module/video_handlers.py +++ b/common/lib/xmodule/xmodule/video_module/video_handlers.py @@ -329,10 +329,11 @@ def transcript(self, request, dispatch): lang = self.get_default_transcript_language(transcripts) try: + output_format = self.transcript_download_format if self.transcript_download_format else Transcript.SRT transcript_content, filename, mime_type = get_transcript_from_contentstore( video=self, language=lang, - output_format=Transcript.SRT, + output_format=output_format, transcripts_info=transcripts ) except (TranscriptsGenerationException, UnicodeDecodeError, NotFoundError): diff --git a/lms/djangoapps/courseware/tests/test_video_handlers.py b/lms/djangoapps/courseware/tests/test_video_handlers.py index 3115a37e71b6..4bb548843e36 100644 --- a/lms/djangoapps/courseware/tests/test_video_handlers.py +++ b/lms/djangoapps/courseware/tests/test_video_handlers.py @@ -436,7 +436,7 @@ def test_download_transcript_not_exist(self): self.assertEqual(response.status, '404 Not Found') @patch( - 'xmodule.video_module.transcripts_utils.get_transcript_from_contentstore', + 'xmodule.video_module.video_handlers.get_transcript_from_contentstore', return_value=('Subs!', 'test_filename.srt', 'application/x-subrip; charset=utf-8') ) def test_download_srt_exist(self, __): @@ -447,7 +447,7 @@ def test_download_srt_exist(self, __): self.assertEqual(response.headers['Content-Language'], 'en') @patch( - 'xmodule.video_module.transcripts_utils.get_transcript_from_contentstore', + 'xmodule.video_module.video_handlers.get_transcript_from_contentstore', return_value=('Subs!', 'txt', 'text/plain; charset=utf-8') ) def test_download_txt_exist(self, __): @@ -467,7 +467,7 @@ def test_download_en_no_sub(self): self.item.get_transcript(transcripts) @patch( - 'xmodule.video_module.transcripts_utils.get_transcript_from_contentstore', + 'xmodule.video_module.video_handlers.get_transcript_from_contentstore', return_value=('Subs!', u"塞.srt", 'application/x-subrip; charset=utf-8') ) def test_download_non_en_non_ascii_filename(self, __):