1414# limitations under the License.
1515"""Sample that streams audio to the Google Cloud Speech API via GRPC."""
1616
17- from __future__ import division
18-
1917import contextlib
2018import re
2119import threading
2220
2321from gcloud .credentials import get_credentials
24- from google .cloud .speech .v1beta1 import cloud_speech_pb2 as cloud_speech
22+ from google .cloud .speech .v1 import cloud_speech_pb2 as cloud_speech
2523from google .rpc import code_pb2
2624from grpc .beta import implementations
2725import pyaudio
2826
2927# Audio recording parameters
3028RATE = 16000
3129CHANNELS = 1
32- CHUNK = int ( RATE / 10 ) # 100ms
30+ CHUNK = RATE // 10 # 100ms
3331
3432# Keep the request alive for this many seconds
3533DEADLINE_SECS = 8 * 60 * 60
@@ -45,15 +43,15 @@ def make_channel(host, port):
4543 creds = get_credentials ().create_scoped ([SPEECH_SCOPE ])
4644 # Add a plugin to inject the creds into the header
4745 auth_header = (
48- 'Authorization' ,
49- 'Bearer ' + creds .get_access_token ().access_token )
46+ 'Authorization' ,
47+ 'Bearer ' + creds .get_access_token ().access_token )
5048 auth_plugin = implementations .metadata_call_credentials (
51- lambda _ , cb : cb ([auth_header ], None ),
52- name = 'google_creds' )
49+ lambda _ , cb : cb ([auth_header ], None ),
50+ name = 'google_creds' )
5351
5452 # compose the two together for both ssl and google auth
5553 composite_channel = implementations .composite_channel_credentials (
56- ssl_channel , auth_plugin )
54+ ssl_channel , auth_plugin )
5755
5856 return implementations .secure_channel (host , port , composite_channel )
5957
@@ -77,40 +75,41 @@ def record_audio(channels, rate, chunk):
7775
7876
7977def request_stream (stop_audio , channels = CHANNELS , rate = RATE , chunk = CHUNK ):
80- """Yields `StreamingRecognizeRequest`s constructed from a recording audio
81- stream.
78+ """Yields `RecognizeRequest`s constructed from a recording audio stream.
8279
8380 Args:
8481 stop_audio: A threading.Event object stops the recording when set.
8582 channels: How many audio channels to record.
8683 rate: The sampling rate.
8784 chunk: Buffer audio into chunks of this size before sending to the api.
8885 """
89- # The initial request must contain metadata about the stream, so the
90- # server knows how to interpret it.
91- recognition_config = cloud_speech .RecognitionConfig (
92- encoding = 'LINEAR16' , sample_rate = rate )
93- streaming_config = cloud_speech .StreamingRecognitionConfig (
94- config = recognition_config ,
95- # Note that setting interim_results to True means that you'll likely
96- # get multiple results for the same bit of audio, as the system
97- # re-interprets audio in the context of subsequent audio. However, this
98- # will give us quick results without having to tell the server when to
99- # finalize a piece of audio.
100- interim_results = True , single_utterance = True
101- )
102-
103- yield cloud_speech .StreamingRecognizeRequest (
104- streaming_config = streaming_config )
105-
10686 with record_audio (channels , rate , chunk ) as audio_stream :
87+ # The initial request must contain metadata about the stream, so the
88+ # server knows how to interpret it.
89+ metadata = cloud_speech .InitialRecognizeRequest (
90+ encoding = 'LINEAR16' , sample_rate = rate ,
91+ # Note that setting interim_results to True means that you'll
92+ # likely get multiple results for the same bit of audio, as the
93+ # system re-interprets audio in the context of subsequent audio.
94+ # However, this will give us quick results without having to tell
95+ # the server when to finalize a piece of audio.
96+ interim_results = True , continuous = False ,
97+ )
98+ data = audio_stream .read (chunk )
99+ audio_request = cloud_speech .AudioRequest (content = data )
100+
101+ yield cloud_speech .RecognizeRequest (
102+ initial_request = metadata ,
103+ audio_request = audio_request )
104+
107105 while not stop_audio .is_set ():
108106 data = audio_stream .read (chunk )
109107 if not data :
110108 raise StopIteration ()
111-
112109 # Subsequent requests can all just have the content
113- yield cloud_speech .StreamingRecognizeRequest (audio_content = data )
110+ audio_request = cloud_speech .AudioRequest (content = data )
111+
112+ yield cloud_speech .RecognizeRequest (audio_request = audio_request )
114113
115114
116115def listen_print_loop (recognize_stream ):
@@ -137,8 +136,7 @@ def main():
137136 make_channel ('speech.googleapis.com' , 443 )) as service :
138137 try :
139138 listen_print_loop (
140- service .StreamingRecognize (
141- request_stream (stop_audio ), DEADLINE_SECS ))
139+ service .Recognize (request_stream (stop_audio ), DEADLINE_SECS ))
142140 finally :
143141 # Stop the request stream once we're done with the loop - otherwise
144142 # it'll keep going in the thread that the grpc lib makes for it..
0 commit comments