From 0c9a0cf3d4b4b0c998c4a97fc8d6e969ebf9f30a Mon Sep 17 00:00:00 2001 From: Shahin Date: Mon, 16 Jul 2018 15:42:28 -0700 Subject: [PATCH] Added the sample for Word Level Confidence [(#1567)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1567) * Added the sample for Word Level Confidence * Added the extra line * Added parameter comment * Removed the line with blank space --- .../samples/snippets/README.rst | 1 + .../samples/snippets/beta_snippets.py | 42 ++++++++++++++++++- .../samples/snippets/beta_snippets_test.py | 11 ++++- 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/packages/google-cloud-python-speech/samples/snippets/README.rst b/packages/google-cloud-python-speech/samples/snippets/README.rst index 01e73fb54c9a..394c97c81f2f 100644 --- a/packages/google-cloud-python-speech/samples/snippets/README.rst +++ b/packages/google-cloud-python-speech/samples/snippets/README.rst @@ -233,6 +233,7 @@ To run this sample: python beta_snippets.py diarization resources/commercial_mono.wav python beta_snippets.py multi-channel resources/commercial_mono.wav python beta_snippets.py multi-language resources/multi.wav en-US es + python beta_snippets.py word-level-conf resources/commercial_mono.wav positional arguments: command diff --git a/packages/google-cloud-python-speech/samples/snippets/beta_snippets.py b/packages/google-cloud-python-speech/samples/snippets/beta_snippets.py index 87a6cac822b3..24e213be3565 100644 --- a/packages/google-cloud-python-speech/samples/snippets/beta_snippets.py +++ b/packages/google-cloud-python-speech/samples/snippets/beta_snippets.py @@ -24,6 +24,7 @@ python beta_snippets.py diarization resources/commercial_mono.wav python beta_snippets.py multi-channel resources/commercial_mono.wav python beta_snippets.py multi-language resources/multi.wav en-US es + python beta_snippets.py word-level-conf resources/commercial_mono.wav """ import argparse @@ -240,6 +241,39 @@ def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang): # [END speech_transcribe_multilanguage] +def transcribe_file_with_word_level_confidence(speech_file): + """Transcribe the given audio file synchronously with + word level confidence.""" + # [START speech_transcribe_word_level_confidence] + from google.cloud import speech_v1p1beta1 as speech + client = speech.SpeechClient() + + # TODO(developer): Uncomment and set to a path to your audio file. + # speech_file = 'path/to/file.wav' + + with open(speech_file, 'rb') as audio_file: + content = audio_file.read() + + audio = speech.types.RecognitionAudio(content=content) + + config = speech.types.RecognitionConfig( + encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=16000, + language_code='en-US', + enable_word_confidence=True) + + response = client.recognize(config, audio) + + for i, result in enumerate(response.results): + alternative = result.alternatives[0] + print('-' * 20) + print('First alternative of result {}'.format(i)) + print(u'Transcript: {}'.format(alternative.transcript)) + print(u'First Word and Confidence: ({}, {})'.format( + alternative.words[0].word, alternative.words[0].confidence)) + # [END speech_transcribe_word_level_confidence] + + if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, @@ -248,9 +282,11 @@ def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang): parser.add_argument( 'path', help='File for audio file to be recognized') parser.add_argument( - 'first', help='First language in audio file to be recognized') + 'first', help='First language in audio file to be recognized', + nargs='?') parser.add_argument( - 'second', help='Second language in audio file to be recognized') + 'second', help='Second language in audio file to be recognized', + nargs='?') args = parser.parse_args() @@ -266,3 +302,5 @@ def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang): transcribe_file_with_multichannel(args.path) elif args.command == 'multi-language': transcribe_file_with_multilanguage(args.path, args.first, args.second) + elif args.command == 'word-level-conf': + transcribe_file_with_word_level_confidence(args.path) diff --git a/packages/google-cloud-python-speech/samples/snippets/beta_snippets_test.py b/packages/google-cloud-python-speech/samples/snippets/beta_snippets_test.py index 10f0f4dba36e..bbb6c75f674f 100644 --- a/packages/google-cloud-python-speech/samples/snippets/beta_snippets_test.py +++ b/packages/google-cloud-python-speech/samples/snippets/beta_snippets_test.py @@ -19,7 +19,8 @@ transcribe_file_with_enhanced_model, transcribe_file_with_metadata, transcribe_file_with_multichannel, - transcribe_file_with_multilanguage) + transcribe_file_with_multilanguage, + transcribe_file_with_word_level_confidence) RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') @@ -70,3 +71,11 @@ def test_transcribe_multilanguage_file(capsys): out, err = capsys.readouterr() assert 'how are you doing estoy bien e tu' in out + + +def test_transcribe_word_level_confidence(capsys): + transcribe_file_with_word_level_confidence( + os.path.join(RESOURCES, 'Google_Gnome.wav')) + out, err = capsys.readouterr() + + assert 'OK Google stream stranger things from Netflix to my TV' in out