From 99609bdc2be9a7c9e621cae399f13d18e1c6baea Mon Sep 17 00:00:00 2001 From: happyhuman Date: Mon, 16 Jul 2018 13:26:13 -0700 Subject: [PATCH 1/4] Added the sample for Word Level Confidence --- speech/cloud-client/README.rst | 1 + speech/cloud-client/beta_snippets.py | 33 +++++++++++++++++++++++ speech/cloud-client/beta_snippets_test.py | 11 +++++++- 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/speech/cloud-client/README.rst b/speech/cloud-client/README.rst index 01e73fb54c9..394c97c81f2 100644 --- a/speech/cloud-client/README.rst +++ b/speech/cloud-client/README.rst @@ -233,6 +233,7 @@ To run this sample: python beta_snippets.py diarization resources/commercial_mono.wav python beta_snippets.py multi-channel resources/commercial_mono.wav python beta_snippets.py multi-language resources/multi.wav en-US es + python beta_snippets.py word-level-conf resources/commercial_mono.wav positional arguments: command diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py index 87a6cac822b..6382b0b401b 100644 --- a/speech/cloud-client/beta_snippets.py +++ b/speech/cloud-client/beta_snippets.py @@ -24,6 +24,7 @@ python beta_snippets.py diarization resources/commercial_mono.wav python beta_snippets.py multi-channel resources/commercial_mono.wav python beta_snippets.py multi-language resources/multi.wav en-US es + python beta_snippets.py word-level-conf resources/commercial_mono.wav """ import argparse @@ -240,6 +241,36 @@ def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang): # [END speech_transcribe_multilanguage] +def transcribe_file_with_word_level_confidence(speech_file): + """Transcribe the given audio file synchronously with + word level confidence.""" + # [START speech_transcribe_word_level_confidence] + from google.cloud import speech_v1p1beta1 as speech + client = speech.SpeechClient() + + with open(speech_file, 'rb') as audio_file: + content = audio_file.read() + + audio = speech.types.RecognitionAudio(content=content) + + config = speech.types.RecognitionConfig( + encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=16000, + language_code='en-US', + enable_word_confidence=True) + + response = client.recognize(config, audio) + + for i, result in enumerate(response.results): + alternative = result.alternatives[0] + print('-' * 20) + print('First alternative of result {}'.format(i)) + print(u'Transcript: {}'.format(alternative.transcript)) + print(u'First Word and Confidence:{} {}'.format( + alternative.words[0].word, alternative.words[0].confidence)) + # [END speech_transcribe_word_level_confidence] + + if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, @@ -266,3 +297,5 @@ def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang): transcribe_file_with_multichannel(args.path) elif args.command == 'multi-language': transcribe_file_with_multilanguage(args.path, args.first, args.second) + elif args.command == 'word-level-conf': + transcribe_file_with_word_level_confidence(args.path) \ No newline at end of file diff --git a/speech/cloud-client/beta_snippets_test.py b/speech/cloud-client/beta_snippets_test.py index 10f0f4dba36..ed25f7785f3 100644 --- a/speech/cloud-client/beta_snippets_test.py +++ b/speech/cloud-client/beta_snippets_test.py @@ -19,7 +19,8 @@ transcribe_file_with_enhanced_model, transcribe_file_with_metadata, transcribe_file_with_multichannel, - transcribe_file_with_multilanguage) + transcribe_file_with_multilanguage, + transcribe_file_with_word_level_confidence) RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') @@ -70,3 +71,11 @@ def test_transcribe_multilanguage_file(capsys): out, err = capsys.readouterr() assert 'how are you doing estoy bien e tu' in out + + +def test_transcribe_word_level_confidence(capsys): + transcribe_file_with_word_level_confidence( + os.path.join(RESOURCES, 'Google_Gnome.wav')) + out, err = capsys.readouterr() + + assert 'OK Google stream stranger things from Netflix to my TV' in out \ No newline at end of file From 8caa4aadf682d09da0619a85d5cc0f89385a3244 Mon Sep 17 00:00:00 2001 From: happyhuman Date: Mon, 16 Jul 2018 14:16:35 -0700 Subject: [PATCH 2/4] Added the extra line --- speech/cloud-client/beta_snippets.py | 8 +++++--- speech/cloud-client/beta_snippets_test.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py index 6382b0b401b..4fffae9521b 100644 --- a/speech/cloud-client/beta_snippets.py +++ b/speech/cloud-client/beta_snippets.py @@ -266,7 +266,7 @@ def transcribe_file_with_word_level_confidence(speech_file): print('-' * 20) print('First alternative of result {}'.format(i)) print(u'Transcript: {}'.format(alternative.transcript)) - print(u'First Word and Confidence:{} {}'.format( + print(u'First Word and Confidence: ({}, {})'.format( alternative.words[0].word, alternative.words[0].confidence)) # [END speech_transcribe_word_level_confidence] @@ -279,9 +279,11 @@ def transcribe_file_with_word_level_confidence(speech_file): parser.add_argument( 'path', help='File for audio file to be recognized') parser.add_argument( - 'first', help='First language in audio file to be recognized') + 'first', help='First language in audio file to be recognized', + nargs='?') parser.add_argument( - 'second', help='Second language in audio file to be recognized') + 'second', help='Second language in audio file to be recognized', + nargs='?') args = parser.parse_args() diff --git a/speech/cloud-client/beta_snippets_test.py b/speech/cloud-client/beta_snippets_test.py index ed25f7785f3..bbb6c75f674 100644 --- a/speech/cloud-client/beta_snippets_test.py +++ b/speech/cloud-client/beta_snippets_test.py @@ -78,4 +78,4 @@ def test_transcribe_word_level_confidence(capsys): os.path.join(RESOURCES, 'Google_Gnome.wav')) out, err = capsys.readouterr() - assert 'OK Google stream stranger things from Netflix to my TV' in out \ No newline at end of file + assert 'OK Google stream stranger things from Netflix to my TV' in out From 3e4b31fbadb03f461e3cd3bb0a2a8522f8fe0b29 Mon Sep 17 00:00:00 2001 From: happyhuman Date: Mon, 16 Jul 2018 14:23:47 -0700 Subject: [PATCH 3/4] Added parameter comment --- speech/cloud-client/beta_snippets.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py index 4fffae9521b..099f5799810 100644 --- a/speech/cloud-client/beta_snippets.py +++ b/speech/cloud-client/beta_snippets.py @@ -248,6 +248,9 @@ def transcribe_file_with_word_level_confidence(speech_file): from google.cloud import speech_v1p1beta1 as speech client = speech.SpeechClient() + # TODO(developer): Uncomment and set to a path to your audio file. + # speech_file = 'path/to/file.wav' + with open(speech_file, 'rb') as audio_file: content = audio_file.read() @@ -300,4 +303,4 @@ def transcribe_file_with_word_level_confidence(speech_file): elif args.command == 'multi-language': transcribe_file_with_multilanguage(args.path, args.first, args.second) elif args.command == 'word-level-conf': - transcribe_file_with_word_level_confidence(args.path) \ No newline at end of file + transcribe_file_with_word_level_confidence(args.path) From e111f93f5824b6cad36080310e999c58d3964447 Mon Sep 17 00:00:00 2001 From: happyhuman Date: Mon, 16 Jul 2018 14:28:04 -0700 Subject: [PATCH 4/4] Removed the line with blank space --- speech/cloud-client/beta_snippets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py index 099f5799810..24e213be356 100644 --- a/speech/cloud-client/beta_snippets.py +++ b/speech/cloud-client/beta_snippets.py @@ -250,7 +250,7 @@ def transcribe_file_with_word_level_confidence(speech_file): # TODO(developer): Uncomment and set to a path to your audio file. # speech_file = 'path/to/file.wav' - + with open(speech_file, 'rb') as audio_file: content = audio_file.read()