diff --git a/packages/google-cloud-speech/package.json b/packages/google-cloud-speech/package.json index 2324cda4331..b0de8a953e6 100644 --- a/packages/google-cloud-speech/package.json +++ b/packages/google-cloud-speech/package.json @@ -1,7 +1,7 @@ { "name": "@google-cloud/speech", "description": "Cloud Speech Client Library for Node.js", - "version": "1.0.1", + "version": "1.1.0", "license": "Apache-2.0", "author": "Google Inc.", "engines": { diff --git a/packages/google-cloud-speech/protos/google/cloud/speech/v1p1beta1/cloud_speech.proto b/packages/google-cloud-speech/protos/google/cloud/speech/v1p1beta1/cloud_speech.proto new file mode 100644 index 00000000000..5c33ba0bb07 --- /dev/null +++ b/packages/google-cloud-speech/protos/google/cloud/speech/v1p1beta1/cloud_speech.proto @@ -0,0 +1,455 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.speech.v1p1beta1; + +import "google/api/annotations.proto"; +import "google/longrunning/operations.proto"; +import "google/protobuf/any.proto"; +import "google/protobuf/duration.proto"; +import "google/protobuf/timestamp.proto"; +import "google/rpc/status.proto"; + +option cc_enable_arenas = true; +option go_package = "google.golang.org/genproto/googleapis/cloud/speech/v1p1beta1;speech"; +option java_multiple_files = true; +option java_outer_classname = "SpeechProto"; +option java_package = "com.google.cloud.speech.v1p1beta1"; + + +// Service that implements Google Cloud Speech API. +service Speech { + // Performs synchronous speech recognition: receive results after all audio + // has been sent and processed. + rpc Recognize(RecognizeRequest) returns (RecognizeResponse) { + option (google.api.http) = { post: "/v1p1beta1/speech:recognize" body: "*" }; + } + + // Performs asynchronous speech recognition: receive results via the + // google.longrunning.Operations interface. Returns either an + // `Operation.error` or an `Operation.response` which contains + // a `LongRunningRecognizeResponse` message. + rpc LongRunningRecognize(LongRunningRecognizeRequest) returns (google.longrunning.Operation) { + option (google.api.http) = { post: "/v1p1beta1/speech:longrunningrecognize" body: "*" }; + } + + // Performs bidirectional streaming speech recognition: receive results while + // sending audio. This method is only available via the gRPC API (not REST). + rpc StreamingRecognize(stream StreamingRecognizeRequest) returns (stream StreamingRecognizeResponse); +} + +// The top-level message sent by the client for the `Recognize` method. +message RecognizeRequest { + // *Required* Provides information to the recognizer that specifies how to + // process the request. + RecognitionConfig config = 1; + + // *Required* The audio data to be recognized. + RecognitionAudio audio = 2; +} + +// The top-level message sent by the client for the `LongRunningRecognize` +// method. +message LongRunningRecognizeRequest { + // *Required* Provides information to the recognizer that specifies how to + // process the request. + RecognitionConfig config = 1; + + // *Required* The audio data to be recognized. + RecognitionAudio audio = 2; +} + +// The top-level message sent by the client for the `StreamingRecognize` method. +// Multiple `StreamingRecognizeRequest` messages are sent. The first message +// must contain a `streaming_config` message and must not contain `audio` data. +// All subsequent messages must contain `audio` data and must not contain a +// `streaming_config` message. +message StreamingRecognizeRequest { + // The streaming request, which is either a streaming config or audio content. + oneof streaming_request { + // Provides information to the recognizer that specifies how to process the + // request. The first `StreamingRecognizeRequest` message must contain a + // `streaming_config` message. + StreamingRecognitionConfig streaming_config = 1; + + // The audio data to be recognized. Sequential chunks of audio data are sent + // in sequential `StreamingRecognizeRequest` messages. The first + // `StreamingRecognizeRequest` message must not contain `audio_content` data + // and all subsequent `StreamingRecognizeRequest` messages must contain + // `audio_content` data. The audio bytes must be encoded as specified in + // `RecognitionConfig`. Note: as with all bytes fields, protobuffers use a + // pure binary representation (not base64). See + // [audio limits](https://cloud.google.com/speech/limits#content). + bytes audio_content = 2; + } +} + +// Provides information to the recognizer that specifies how to process the +// request. +message StreamingRecognitionConfig { + // *Required* Provides information to the recognizer that specifies how to + // process the request. + RecognitionConfig config = 1; + + // *Optional* If `false` or omitted, the recognizer will perform continuous + // recognition (continuing to wait for and process audio even if the user + // pauses speaking) until the client closes the input stream (gRPC API) or + // until the maximum time limit has been reached. May return multiple + // `StreamingRecognitionResult`s with the `is_final` flag set to `true`. + // + // If `true`, the recognizer will detect a single spoken utterance. When it + // detects that the user has paused or stopped speaking, it will return an + // `END_OF_SINGLE_UTTERANCE` event and cease recognition. It will return no + // more than one `StreamingRecognitionResult` with the `is_final` flag set to + // `true`. + bool single_utterance = 2; + + // *Optional* If `true`, interim results (tentative hypotheses) may be + // returned as they become available (these interim results are indicated with + // the `is_final=false` flag). + // If `false` or omitted, only `is_final=true` result(s) are returned. + bool interim_results = 3; +} + +// Provides information to the recognizer that specifies how to process the +// request. +message RecognitionConfig { + // The encoding of the audio data sent in the request. + // + // All encodings support only 1 channel (mono) audio. + // + // If you send a `FLAC` or `WAV` audio file format in the request, + // then if you specify an encoding in `AudioEncoding`, it must match the + // encoding described in the audio header. If it does not match, then the + // request returns an + // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code. You can request + // recognition for `WAV` files that contain either `LINEAR16` or `MULAW` + // encoded audio. + // For audio file formats other than `FLAC` or `WAV`, you must + // specify the audio encoding in your `RecognitionConfig`. + // + // For best results, the audio source should be captured and transmitted using + // a lossless encoding (`FLAC` or `LINEAR16`). The accuracy of the speech + // recognition can be reduced if lossy codecs, which include the other codecs + // listed in this section, are used to capture or transmit the audio, + // particularly if background noise is present. + enum AudioEncoding { + // Not specified. + ENCODING_UNSPECIFIED = 0; + + // Uncompressed 16-bit signed little-endian samples (Linear PCM). + LINEAR16 = 1; + + // [`FLAC`](https://xiph.org/flac/documentation.html) (Free Lossless Audio + // Codec) is the recommended encoding because it is + // lossless--therefore recognition is not compromised--and + // requires only about half the bandwidth of `LINEAR16`. `FLAC` stream + // encoding supports 16-bit and 24-bit samples, however, not all fields in + // `STREAMINFO` are supported. + FLAC = 2; + + // 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law. + MULAW = 3; + + // Adaptive Multi-Rate Narrowband codec. `sample_rate_hertz` must be 8000. + AMR = 4; + + // Adaptive Multi-Rate Wideband codec. `sample_rate_hertz` must be 16000. + AMR_WB = 5; + + // Opus encoded audio frames in Ogg container + // ([OggOpus](https://wiki.xiph.org/OggOpus)). + // `sample_rate_hertz` must be one of 8000, 12000, 16000, 24000, or 48000. + OGG_OPUS = 6; + + // Although the use of lossy encodings is not recommended, if a very low + // bitrate encoding is required, `OGG_OPUS` is highly preferred over + // Speex encoding. The [Speex](https://speex.org/) encoding supported by + // Cloud Speech API has a header byte in each block, as in MIME type + // `audio/x-speex-with-header-byte`. + // It is a variant of the RTP Speex encoding defined in + // [RFC 5574](https://tools.ietf.org/html/rfc5574). + // The stream is a sequence of blocks, one block per RTP packet. Each block + // starts with a byte containing the length of the block, in bytes, followed + // by one or more frames of Speex data, padded to an integral number of + // bytes (octets) as specified in RFC 5574. In other words, each RTP header + // is replaced with a single byte containing the block length. Only Speex + // wideband is supported. `sample_rate_hertz` must be 16000. + SPEEX_WITH_HEADER_BYTE = 7; + } + + // *Required* Encoding of audio data sent in all `RecognitionAudio` messages. + AudioEncoding encoding = 1; + + // *Required* Sample rate in Hertz of the audio data sent in all + // `RecognitionAudio` messages. Valid values are: 8000-48000. + // 16000 is optimal. For best results, set the sampling rate of the audio + // source to 16000 Hz. If that's not possible, use the native sample rate of + // the audio source (instead of re-sampling). + int32 sample_rate_hertz = 2; + + // *Required* The language of the supplied audio as a + // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag. + // Example: "en-US". + // See [Language Support](https://cloud.google.com/speech/docs/languages) + // for a list of the currently supported language codes. + string language_code = 3; + + // *Optional* Maximum number of recognition hypotheses to be returned. + // Specifically, the maximum number of `SpeechRecognitionAlternative` messages + // within each `SpeechRecognitionResult`. + // The server may return fewer than `max_alternatives`. + // Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of + // one. If omitted, will return a maximum of one. + int32 max_alternatives = 4; + + // *Optional* If set to `true`, the server will attempt to filter out + // profanities, replacing all but the initial character in each filtered word + // with asterisks, e.g. "f***". If set to `false` or omitted, profanities + // won't be filtered out. + bool profanity_filter = 5; + + // *Optional* A means to provide context to assist the speech recognition. + repeated SpeechContext speech_contexts = 6; + + // *Optional* If `true`, the top result includes a list of words and + // the start and end time offsets (timestamps) for those words. If + // `false`, no word-level time offset information is returned. The default is + // `false`. + bool enable_word_time_offsets = 8; +} + +// Provides "hints" to the speech recognizer to favor specific words and phrases +// in the results. +message SpeechContext { + // *Optional* A list of strings containing words and phrases "hints" so that + // the speech recognition is more likely to recognize them. This can be used + // to improve the accuracy for specific words and phrases, for example, if + // specific commands are typically spoken by the user. This can also be used + // to add additional words to the vocabulary of the recognizer. See + // [usage limits](https://cloud.google.com/speech/limits#content). + repeated string phrases = 1; +} + +// Contains audio data in the encoding specified in the `RecognitionConfig`. +// Either `content` or `uri` must be supplied. Supplying both or neither +// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See +// [audio limits](https://cloud.google.com/speech/limits#content). +message RecognitionAudio { + // The audio source, which is either inline content or a Google Cloud + // Storage uri. + oneof audio_source { + // The audio data bytes encoded as specified in + // `RecognitionConfig`. Note: as with all bytes fields, protobuffers use a + // pure binary representation, whereas JSON representations use base64. + bytes content = 1; + + // URI that points to a file that contains audio data bytes as specified in + // `RecognitionConfig`. Currently, only Google Cloud Storage URIs are + // supported, which must be specified in the following format: + // `gs://bucket_name/object_name` (other URI formats return + // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see + // [Request URIs](https://cloud.google.com/storage/docs/reference-uris). + string uri = 2; + } +} + +// The only message returned to the client by the `Recognize` method. It +// contains the result as zero or more sequential `SpeechRecognitionResult` +// messages. +message RecognizeResponse { + // *Output-only* Sequential list of transcription results corresponding to + // sequential portions of audio. + repeated SpeechRecognitionResult results = 2; +} + +// The only message returned to the client by the `LongRunningRecognize` method. +// It contains the result as zero or more sequential `SpeechRecognitionResult` +// messages. It is included in the `result.response` field of the `Operation` +// returned by the `GetOperation` call of the `google::longrunning::Operations` +// service. +message LongRunningRecognizeResponse { + // *Output-only* Sequential list of transcription results corresponding to + // sequential portions of audio. + repeated SpeechRecognitionResult results = 2; +} + +// Describes the progress of a long-running `LongRunningRecognize` call. It is +// included in the `metadata` field of the `Operation` returned by the +// `GetOperation` call of the `google::longrunning::Operations` service. +message LongRunningRecognizeMetadata { + // Approximate percentage of audio processed thus far. Guaranteed to be 100 + // when the audio is fully processed and the results are available. + int32 progress_percent = 1; + + // Time when the request was received. + google.protobuf.Timestamp start_time = 2; + + // Time of the most recent processing update. + google.protobuf.Timestamp last_update_time = 3; +} + +// `StreamingRecognizeResponse` is the only message returned to the client by +// `StreamingRecognize`. A series of zero or more `StreamingRecognizeResponse` +// messages are streamed back to the client. If there is no recognizable +// audio, and `single_utterance` is set to false, then no messages are streamed +// back to the client. +// +// Here's an example of a series of ten `StreamingRecognizeResponse`s that might +// be returned while processing audio: +// +// 1. results { alternatives { transcript: "tube" } stability: 0.01 } +// +// 2. results { alternatives { transcript: "to be a" } stability: 0.01 } +// +// 3. results { alternatives { transcript: "to be" } stability: 0.9 } +// results { alternatives { transcript: " or not to be" } stability: 0.01 } +// +// 4. results { alternatives { transcript: "to be or not to be" +// confidence: 0.92 } +// alternatives { transcript: "to bee or not to bee" } +// is_final: true } +// +// 5. results { alternatives { transcript: " that's" } stability: 0.01 } +// +// 6. results { alternatives { transcript: " that is" } stability: 0.9 } +// results { alternatives { transcript: " the question" } stability: 0.01 } +// +// 7. results { alternatives { transcript: " that is the question" +// confidence: 0.98 } +// alternatives { transcript: " that was the question" } +// is_final: true } +// +// Notes: +// +// - Only two of the above responses #4 and #7 contain final results; they are +// indicated by `is_final: true`. Concatenating these together generates the +// full transcript: "to be or not to be that is the question". +// +// - The others contain interim `results`. #3 and #6 contain two interim +// `results`: the first portion has a high stability and is less likely to +// change; the second portion has a low stability and is very likely to +// change. A UI designer might choose to show only high stability `results`. +// +// - The specific `stability` and `confidence` values shown above are only for +// illustrative purposes. Actual values may vary. +// +// - In each response, only one of these fields will be set: +// `error`, +// `speech_event_type`, or +// one or more (repeated) `results`. +message StreamingRecognizeResponse { + // Indicates the type of speech event. + enum SpeechEventType { + // No speech event specified. + SPEECH_EVENT_UNSPECIFIED = 0; + + // This event indicates that the server has detected the end of the user's + // speech utterance and expects no additional speech. Therefore, the server + // will not process additional audio (although it may subsequently return + // additional results). The client should stop sending additional audio + // data, half-close the gRPC connection, and wait for any additional results + // until the server closes the gRPC connection. This event is only sent if + // `single_utterance` was set to `true`, and is not used otherwise. + END_OF_SINGLE_UTTERANCE = 1; + } + + // *Output-only* If set, returns a [google.rpc.Status][google.rpc.Status] message that + // specifies the error for the operation. + google.rpc.Status error = 1; + + // *Output-only* This repeated list contains zero or more results that + // correspond to consecutive portions of the audio currently being processed. + // It contains zero or one `is_final=true` result (the newly settled portion), + // followed by zero or more `is_final=false` results (the interim results). + repeated StreamingRecognitionResult results = 2; + + // *Output-only* Indicates the type of speech event. + SpeechEventType speech_event_type = 4; +} + +// A streaming speech recognition result corresponding to a portion of the audio +// that is currently being processed. +message StreamingRecognitionResult { + // *Output-only* May contain one or more recognition hypotheses (up to the + // maximum specified in `max_alternatives`). + // These alternatives are ordered in terms of accuracy, with the top (first) + // alternative being the most probable, as ranked by the recognizer. + repeated SpeechRecognitionAlternative alternatives = 1; + + // *Output-only* If `false`, this `StreamingRecognitionResult` represents an + // interim result that may change. If `true`, this is the final time the + // speech service will return this particular `StreamingRecognitionResult`, + // the recognizer will not return any further hypotheses for this portion of + // the transcript and corresponding audio. + bool is_final = 2; + + // *Output-only* An estimate of the likelihood that the recognizer will not + // change its guess about this interim result. Values range from 0.0 + // (completely unstable) to 1.0 (completely stable). + // This field is only provided for interim results (`is_final=false`). + // The default of 0.0 is a sentinel value indicating `stability` was not set. + float stability = 3; +} + +// A speech recognition result corresponding to a portion of the audio. +message SpeechRecognitionResult { + // *Output-only* May contain one or more recognition hypotheses (up to the + // maximum specified in `max_alternatives`). + // These alternatives are ordered in terms of accuracy, with the top (first) + // alternative being the most probable, as ranked by the recognizer. + repeated SpeechRecognitionAlternative alternatives = 1; +} + +// Alternative hypotheses (a.k.a. n-best list). +message SpeechRecognitionAlternative { + // *Output-only* Transcript text representing the words that the user spoke. + string transcript = 1; + + // *Output-only* The confidence estimate between 0.0 and 1.0. A higher number + // indicates an estimated greater likelihood that the recognized words are + // correct. This field is set only for the top alternative of a non-streaming + // result or, of a streaming result where `is_final=true`. + // This field is not guaranteed to be accurate and users should not rely on it + // to be always provided. + // The default of 0.0 is a sentinel value indicating `confidence` was not set. + float confidence = 2; + + // *Output-only* A list of word-specific information for each recognized word. + repeated WordInfo words = 3; +} + +// Word-specific information for recognized words. +message WordInfo { + // *Output-only* Time offset relative to the beginning of the audio, + // and corresponding to the start of the spoken word. + // This field is only set if `enable_word_time_offsets=true` and only + // in the top hypothesis. + // This is an experimental feature and the accuracy of the time offset can + // vary. + google.protobuf.Duration start_time = 1; + + // *Output-only* Time offset relative to the beginning of the audio, + // and corresponding to the end of the spoken word. + // This field is only set if `enable_word_time_offsets=true` and only + // in the top hypothesis. + // This is an experimental feature and the accuracy of the time offset can + // vary. + google.protobuf.Duration end_time = 2; + + // *Output-only* The word corresponding to this set of information. + string word = 3; +} diff --git a/packages/google-cloud-speech/src/index.js b/packages/google-cloud-speech/src/index.js index 360f9e10c59..59889bf3166 100644 --- a/packages/google-cloud-speech/src/index.js +++ b/packages/google-cloud-speech/src/index.js @@ -41,6 +41,7 @@ const helpers = require('./helpers'); // Import the clients for each version supported by this package. const gapic = Object.freeze({ v1: require('./v1'), + v1p1beta1: require('./v1p1beta1'), }); // Augment the SpeechClient objects with the helpers. @@ -82,6 +83,7 @@ module.exports = gapic.v1; * @property {constructor} SpeechClient Reference to {@link v1.SpeechClient}. */ module.exports.v1 = gapic.v1; +module.exports.v1p1beta1 = gapic.v1p1beta1; // Alias `module.exports` as `module.exports.default`, for future-proofing. module.exports.default = Object.assign({}, module.exports); diff --git a/packages/google-cloud-speech/src/v1p1beta1/doc/google/cloud/speech/v1p1beta1/doc_cloud_speech.js b/packages/google-cloud-speech/src/v1p1beta1/doc/google/cloud/speech/v1p1beta1/doc_cloud_speech.js new file mode 100644 index 00000000000..16aab59d70b --- /dev/null +++ b/packages/google-cloud-speech/src/v1p1beta1/doc/google/cloud/speech/v1p1beta1/doc_cloud_speech.js @@ -0,0 +1,604 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Note: this file is purely for documentation. Any contents are not expected +// to be loaded as the JS file. + +/** + * The top-level message sent by the client for the `Recognize` method. + * + * @property {Object} config + * *Required* Provides information to the recognizer that specifies how to + * process the request. + * + * This object should have the same structure as [RecognitionConfig]{@link google.cloud.speech.v1p1beta1.RecognitionConfig} + * + * @property {Object} audio + * *Required* The audio data to be recognized. + * + * This object should have the same structure as [RecognitionAudio]{@link google.cloud.speech.v1p1beta1.RecognitionAudio} + * + * @typedef RecognizeRequest + * @memberof google.cloud.speech.v1p1beta1 + * @see [google.cloud.speech.v1p1beta1.RecognizeRequest definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1p1beta1/cloud_speech.proto} + */ +var RecognizeRequest = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * The top-level message sent by the client for the `LongRunningRecognize` + * method. + * + * @property {Object} config + * *Required* Provides information to the recognizer that specifies how to + * process the request. + * + * This object should have the same structure as [RecognitionConfig]{@link google.cloud.speech.v1p1beta1.RecognitionConfig} + * + * @property {Object} audio + * *Required* The audio data to be recognized. + * + * This object should have the same structure as [RecognitionAudio]{@link google.cloud.speech.v1p1beta1.RecognitionAudio} + * + * @typedef LongRunningRecognizeRequest + * @memberof google.cloud.speech.v1p1beta1 + * @see [google.cloud.speech.v1p1beta1.LongRunningRecognizeRequest definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1p1beta1/cloud_speech.proto} + */ +var LongRunningRecognizeRequest = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * The top-level message sent by the client for the `StreamingRecognize` method. + * Multiple `StreamingRecognizeRequest` messages are sent. The first message + * must contain a `streaming_config` message and must not contain `audio` data. + * All subsequent messages must contain `audio` data and must not contain a + * `streaming_config` message. + * + * @property {Object} streamingConfig + * Provides information to the recognizer that specifies how to process the + * request. The first `StreamingRecognizeRequest` message must contain a + * `streaming_config` message. + * + * This object should have the same structure as [StreamingRecognitionConfig]{@link google.cloud.speech.v1p1beta1.StreamingRecognitionConfig} + * + * @property {string} audioContent + * The audio data to be recognized. Sequential chunks of audio data are sent + * in sequential `StreamingRecognizeRequest` messages. The first + * `StreamingRecognizeRequest` message must not contain `audio_content` data + * and all subsequent `StreamingRecognizeRequest` messages must contain + * `audio_content` data. The audio bytes must be encoded as specified in + * `RecognitionConfig`. Note: as with all bytes fields, protobuffers use a + * pure binary representation (not base64). See + * [audio limits](https://cloud.google.com/speech/limits#content). + * + * @typedef StreamingRecognizeRequest + * @memberof google.cloud.speech.v1p1beta1 + * @see [google.cloud.speech.v1p1beta1.StreamingRecognizeRequest definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1p1beta1/cloud_speech.proto} + */ +var StreamingRecognizeRequest = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * Provides information to the recognizer that specifies how to process the + * request. + * + * @property {Object} config + * *Required* Provides information to the recognizer that specifies how to + * process the request. + * + * This object should have the same structure as [RecognitionConfig]{@link google.cloud.speech.v1p1beta1.RecognitionConfig} + * + * @property {boolean} singleUtterance + * *Optional* If `false` or omitted, the recognizer will perform continuous + * recognition (continuing to wait for and process audio even if the user + * pauses speaking) until the client closes the input stream (gRPC API) or + * until the maximum time limit has been reached. May return multiple + * `StreamingRecognitionResult`s with the `is_final` flag set to `true`. + * + * If `true`, the recognizer will detect a single spoken utterance. When it + * detects that the user has paused or stopped speaking, it will return an + * `END_OF_SINGLE_UTTERANCE` event and cease recognition. It will return no + * more than one `StreamingRecognitionResult` with the `is_final` flag set to + * `true`. + * + * @property {boolean} interimResults + * *Optional* If `true`, interim results (tentative hypotheses) may be + * returned as they become available (these interim results are indicated with + * the `is_final=false` flag). + * If `false` or omitted, only `is_final=true` result(s) are returned. + * + * @typedef StreamingRecognitionConfig + * @memberof google.cloud.speech.v1p1beta1 + * @see [google.cloud.speech.v1p1beta1.StreamingRecognitionConfig definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1p1beta1/cloud_speech.proto} + */ +var StreamingRecognitionConfig = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * Provides information to the recognizer that specifies how to process the + * request. + * + * @property {number} encoding + * *Required* Encoding of audio data sent in all `RecognitionAudio` messages. + * + * The number should be among the values of [AudioEncoding]{@link google.cloud.speech.v1p1beta1.AudioEncoding} + * + * @property {number} sampleRateHertz + * *Required* Sample rate in Hertz of the audio data sent in all + * `RecognitionAudio` messages. Valid values are: 8000-48000. + * 16000 is optimal. For best results, set the sampling rate of the audio + * source to 16000 Hz. If that's not possible, use the native sample rate of + * the audio source (instead of re-sampling). + * + * @property {string} languageCode + * *Required* The language of the supplied audio as a + * [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag. + * Example: "en-US". + * See [Language Support](https://cloud.google.com/speech/docs/languages) + * for a list of the currently supported language codes. + * + * @property {number} maxAlternatives + * *Optional* Maximum number of recognition hypotheses to be returned. + * Specifically, the maximum number of `SpeechRecognitionAlternative` messages + * within each `SpeechRecognitionResult`. + * The server may return fewer than `max_alternatives`. + * Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of + * one. If omitted, will return a maximum of one. + * + * @property {boolean} profanityFilter + * *Optional* If set to `true`, the server will attempt to filter out + * profanities, replacing all but the initial character in each filtered word + * with asterisks, e.g. "f***". If set to `false` or omitted, profanities + * won't be filtered out. + * + * @property {Object[]} speechContexts + * *Optional* A means to provide context to assist the speech recognition. + * + * This object should have the same structure as [SpeechContext]{@link google.cloud.speech.v1p1beta1.SpeechContext} + * + * @property {boolean} enableWordTimeOffsets + * *Optional* If `true`, the top result includes a list of words and + * the start and end time offsets (timestamps) for those words. If + * `false`, no word-level time offset information is returned. The default is + * `false`. + * + * @typedef RecognitionConfig + * @memberof google.cloud.speech.v1p1beta1 + * @see [google.cloud.speech.v1p1beta1.RecognitionConfig definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1p1beta1/cloud_speech.proto} + */ +var RecognitionConfig = { + // This is for documentation. Actual contents will be loaded by gRPC. + + /** + * The encoding of the audio data sent in the request. + * + * All encodings support only 1 channel (mono) audio. + * + * If you send a `FLAC` or `WAV` audio file format in the request, + * then if you specify an encoding in `AudioEncoding`, it must match the + * encoding described in the audio header. If it does not match, then the + * request returns an + * google.rpc.Code.INVALID_ARGUMENT error code. You can request + * recognition for `WAV` files that contain either `LINEAR16` or `MULAW` + * encoded audio. + * For audio file formats other than `FLAC` or `WAV`, you must + * specify the audio encoding in your `RecognitionConfig`. + * + * For best results, the audio source should be captured and transmitted using + * a lossless encoding (`FLAC` or `LINEAR16`). The accuracy of the speech + * recognition can be reduced if lossy codecs, which include the other codecs + * listed in this section, are used to capture or transmit the audio, + * particularly if background noise is present. + * + * @enum {number} + * @memberof google.cloud.speech.v1p1beta1 + */ + AudioEncoding: { + + /** + * Not specified. + */ + ENCODING_UNSPECIFIED: 0, + + /** + * Uncompressed 16-bit signed little-endian samples (Linear PCM). + */ + LINEAR16: 1, + + /** + * [`FLAC`](https://xiph.org/flac/documentation.html) (Free Lossless Audio + * Codec) is the recommended encoding because it is + * lossless--therefore recognition is not compromised--and + * requires only about half the bandwidth of `LINEAR16`. `FLAC` stream + * encoding supports 16-bit and 24-bit samples, however, not all fields in + * `STREAMINFO` are supported. + */ + FLAC: 2, + + /** + * 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law. + */ + MULAW: 3, + + /** + * Adaptive Multi-Rate Narrowband codec. `sample_rate_hertz` must be 8000. + */ + AMR: 4, + + /** + * Adaptive Multi-Rate Wideband codec. `sample_rate_hertz` must be 16000. + */ + AMR_WB: 5, + + /** + * Opus encoded audio frames in Ogg container + * ([OggOpus](https://wiki.xiph.org/OggOpus)). + * `sample_rate_hertz` must be one of 8000, 12000, 16000, 24000, or 48000. + */ + OGG_OPUS: 6, + + /** + * Although the use of lossy encodings is not recommended, if a very low + * bitrate encoding is required, `OGG_OPUS` is highly preferred over + * Speex encoding. The [Speex](https://speex.org/) encoding supported by + * Cloud Speech API has a header byte in each block, as in MIME type + * `audio/x-speex-with-header-byte`. + * It is a variant of the RTP Speex encoding defined in + * [RFC 5574](https://tools.ietf.org/html/rfc5574). + * The stream is a sequence of blocks, one block per RTP packet. Each block + * starts with a byte containing the length of the block, in bytes, followed + * by one or more frames of Speex data, padded to an integral number of + * bytes (octets) as specified in RFC 5574. In other words, each RTP header + * is replaced with a single byte containing the block length. Only Speex + * wideband is supported. `sample_rate_hertz` must be 16000. + */ + SPEEX_WITH_HEADER_BYTE: 7 + } +}; + +/** + * Provides "hints" to the speech recognizer to favor specific words and phrases + * in the results. + * + * @property {string[]} phrases + * *Optional* A list of strings containing words and phrases "hints" so that + * the speech recognition is more likely to recognize them. This can be used + * to improve the accuracy for specific words and phrases, for example, if + * specific commands are typically spoken by the user. This can also be used + * to add additional words to the vocabulary of the recognizer. See + * [usage limits](https://cloud.google.com/speech/limits#content). + * + * @typedef SpeechContext + * @memberof google.cloud.speech.v1p1beta1 + * @see [google.cloud.speech.v1p1beta1.SpeechContext definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1p1beta1/cloud_speech.proto} + */ +var SpeechContext = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * Contains audio data in the encoding specified in the `RecognitionConfig`. + * Either `content` or `uri` must be supplied. Supplying both or neither + * returns google.rpc.Code.INVALID_ARGUMENT. See + * [audio limits](https://cloud.google.com/speech/limits#content). + * + * @property {string} content + * The audio data bytes encoded as specified in + * `RecognitionConfig`. Note: as with all bytes fields, protobuffers use a + * pure binary representation, whereas JSON representations use base64. + * + * @property {string} uri + * URI that points to a file that contains audio data bytes as specified in + * `RecognitionConfig`. Currently, only Google Cloud Storage URIs are + * supported, which must be specified in the following format: + * `gs://bucket_name/object_name` (other URI formats return + * google.rpc.Code.INVALID_ARGUMENT). For more information, see + * [Request URIs](https://cloud.google.com/storage/docs/reference-uris). + * + * @typedef RecognitionAudio + * @memberof google.cloud.speech.v1p1beta1 + * @see [google.cloud.speech.v1p1beta1.RecognitionAudio definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1p1beta1/cloud_speech.proto} + */ +var RecognitionAudio = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * The only message returned to the client by the `Recognize` method. It + * contains the result as zero or more sequential `SpeechRecognitionResult` + * messages. + * + * @property {Object[]} results + * *Output-only* Sequential list of transcription results corresponding to + * sequential portions of audio. + * + * This object should have the same structure as [SpeechRecognitionResult]{@link google.cloud.speech.v1p1beta1.SpeechRecognitionResult} + * + * @typedef RecognizeResponse + * @memberof google.cloud.speech.v1p1beta1 + * @see [google.cloud.speech.v1p1beta1.RecognizeResponse definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1p1beta1/cloud_speech.proto} + */ +var RecognizeResponse = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * The only message returned to the client by the `LongRunningRecognize` method. + * It contains the result as zero or more sequential `SpeechRecognitionResult` + * messages. It is included in the `result.response` field of the `Operation` + * returned by the `GetOperation` call of the `google::longrunning::Operations` + * service. + * + * @property {Object[]} results + * *Output-only* Sequential list of transcription results corresponding to + * sequential portions of audio. + * + * This object should have the same structure as [SpeechRecognitionResult]{@link google.cloud.speech.v1p1beta1.SpeechRecognitionResult} + * + * @typedef LongRunningRecognizeResponse + * @memberof google.cloud.speech.v1p1beta1 + * @see [google.cloud.speech.v1p1beta1.LongRunningRecognizeResponse definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1p1beta1/cloud_speech.proto} + */ +var LongRunningRecognizeResponse = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * Describes the progress of a long-running `LongRunningRecognize` call. It is + * included in the `metadata` field of the `Operation` returned by the + * `GetOperation` call of the `google::longrunning::Operations` service. + * + * @property {number} progressPercent + * Approximate percentage of audio processed thus far. Guaranteed to be 100 + * when the audio is fully processed and the results are available. + * + * @property {Object} startTime + * Time when the request was received. + * + * This object should have the same structure as [Timestamp]{@link google.protobuf.Timestamp} + * + * @property {Object} lastUpdateTime + * Time of the most recent processing update. + * + * This object should have the same structure as [Timestamp]{@link google.protobuf.Timestamp} + * + * @typedef LongRunningRecognizeMetadata + * @memberof google.cloud.speech.v1p1beta1 + * @see [google.cloud.speech.v1p1beta1.LongRunningRecognizeMetadata definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1p1beta1/cloud_speech.proto} + */ +var LongRunningRecognizeMetadata = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * `StreamingRecognizeResponse` is the only message returned to the client by + * `StreamingRecognize`. A series of zero or more `StreamingRecognizeResponse` + * messages are streamed back to the client. If there is no recognizable + * audio, and `single_utterance` is set to false, then no messages are streamed + * back to the client. + * + * Here's an example of a series of ten `StreamingRecognizeResponse`s that might + * be returned while processing audio: + * + * 1. results { alternatives { transcript: "tube" } stability: 0.01 } + * + * 2. results { alternatives { transcript: "to be a" } stability: 0.01 } + * + * 3. results { alternatives { transcript: "to be" } stability: 0.9 } + * results { alternatives { transcript: " or not to be" } stability: 0.01 } + * + * 4. results { alternatives { transcript: "to be or not to be" + * confidence: 0.92 } + * alternatives { transcript: "to bee or not to bee" } + * is_final: true } + * + * 5. results { alternatives { transcript: " that's" } stability: 0.01 } + * + * 6. results { alternatives { transcript: " that is" } stability: 0.9 } + * results { alternatives { transcript: " the question" } stability: 0.01 } + * + * 7. results { alternatives { transcript: " that is the question" + * confidence: 0.98 } + * alternatives { transcript: " that was the question" } + * is_final: true } + * + * Notes: + * + * - Only two of the above responses #4 and #7 contain final results; they are + * indicated by `is_final: true`. Concatenating these together generates the + * full transcript: "to be or not to be that is the question". + * + * - The others contain interim `results`. #3 and #6 contain two interim + * `results`: the first portion has a high stability and is less likely to + * change; the second portion has a low stability and is very likely to + * change. A UI designer might choose to show only high stability `results`. + * + * - The specific `stability` and `confidence` values shown above are only for + * illustrative purposes. Actual values may vary. + * + * - In each response, only one of these fields will be set: + * `error`, + * `speech_event_type`, or + * one or more (repeated) `results`. + * + * @property {Object} error + * *Output-only* If set, returns a google.rpc.Status message that + * specifies the error for the operation. + * + * This object should have the same structure as [Status]{@link google.rpc.Status} + * + * @property {Object[]} results + * *Output-only* This repeated list contains zero or more results that + * correspond to consecutive portions of the audio currently being processed. + * It contains zero or one `is_final=true` result (the newly settled portion), + * followed by zero or more `is_final=false` results (the interim results). + * + * This object should have the same structure as [StreamingRecognitionResult]{@link google.cloud.speech.v1p1beta1.StreamingRecognitionResult} + * + * @property {number} speechEventType + * *Output-only* Indicates the type of speech event. + * + * The number should be among the values of [SpeechEventType]{@link google.cloud.speech.v1p1beta1.SpeechEventType} + * + * @typedef StreamingRecognizeResponse + * @memberof google.cloud.speech.v1p1beta1 + * @see [google.cloud.speech.v1p1beta1.StreamingRecognizeResponse definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1p1beta1/cloud_speech.proto} + */ +var StreamingRecognizeResponse = { + // This is for documentation. Actual contents will be loaded by gRPC. + + /** + * Indicates the type of speech event. + * + * @enum {number} + * @memberof google.cloud.speech.v1p1beta1 + */ + SpeechEventType: { + + /** + * No speech event specified. + */ + SPEECH_EVENT_UNSPECIFIED: 0, + + /** + * This event indicates that the server has detected the end of the user's + * speech utterance and expects no additional speech. Therefore, the server + * will not process additional audio (although it may subsequently return + * additional results). The client should stop sending additional audio + * data, half-close the gRPC connection, and wait for any additional results + * until the server closes the gRPC connection. This event is only sent if + * `single_utterance` was set to `true`, and is not used otherwise. + */ + END_OF_SINGLE_UTTERANCE: 1 + } +}; + +/** + * A streaming speech recognition result corresponding to a portion of the audio + * that is currently being processed. + * + * @property {Object[]} alternatives + * *Output-only* May contain one or more recognition hypotheses (up to the + * maximum specified in `max_alternatives`). + * These alternatives are ordered in terms of accuracy, with the top (first) + * alternative being the most probable, as ranked by the recognizer. + * + * This object should have the same structure as [SpeechRecognitionAlternative]{@link google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative} + * + * @property {boolean} isFinal + * *Output-only* If `false`, this `StreamingRecognitionResult` represents an + * interim result that may change. If `true`, this is the final time the + * speech service will return this particular `StreamingRecognitionResult`, + * the recognizer will not return any further hypotheses for this portion of + * the transcript and corresponding audio. + * + * @property {number} stability + * *Output-only* An estimate of the likelihood that the recognizer will not + * change its guess about this interim result. Values range from 0.0 + * (completely unstable) to 1.0 (completely stable). + * This field is only provided for interim results (`is_final=false`). + * The default of 0.0 is a sentinel value indicating `stability` was not set. + * + * @typedef StreamingRecognitionResult + * @memberof google.cloud.speech.v1p1beta1 + * @see [google.cloud.speech.v1p1beta1.StreamingRecognitionResult definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1p1beta1/cloud_speech.proto} + */ +var StreamingRecognitionResult = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * A speech recognition result corresponding to a portion of the audio. + * + * @property {Object[]} alternatives + * *Output-only* May contain one or more recognition hypotheses (up to the + * maximum specified in `max_alternatives`). + * These alternatives are ordered in terms of accuracy, with the top (first) + * alternative being the most probable, as ranked by the recognizer. + * + * This object should have the same structure as [SpeechRecognitionAlternative]{@link google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative} + * + * @typedef SpeechRecognitionResult + * @memberof google.cloud.speech.v1p1beta1 + * @see [google.cloud.speech.v1p1beta1.SpeechRecognitionResult definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1p1beta1/cloud_speech.proto} + */ +var SpeechRecognitionResult = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * Alternative hypotheses (a.k.a. n-best list). + * + * @property {string} transcript + * *Output-only* Transcript text representing the words that the user spoke. + * + * @property {number} confidence + * *Output-only* The confidence estimate between 0.0 and 1.0. A higher number + * indicates an estimated greater likelihood that the recognized words are + * correct. This field is set only for the top alternative of a non-streaming + * result or, of a streaming result where `is_final=true`. + * This field is not guaranteed to be accurate and users should not rely on it + * to be always provided. + * The default of 0.0 is a sentinel value indicating `confidence` was not set. + * + * @property {Object[]} words + * *Output-only* A list of word-specific information for each recognized word. + * + * This object should have the same structure as [WordInfo]{@link google.cloud.speech.v1p1beta1.WordInfo} + * + * @typedef SpeechRecognitionAlternative + * @memberof google.cloud.speech.v1p1beta1 + * @see [google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1p1beta1/cloud_speech.proto} + */ +var SpeechRecognitionAlternative = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * Word-specific information for recognized words. + * + * @property {Object} startTime + * *Output-only* Time offset relative to the beginning of the audio, + * and corresponding to the start of the spoken word. + * This field is only set if `enable_word_time_offsets=true` and only + * in the top hypothesis. + * This is an experimental feature and the accuracy of the time offset can + * vary. + * + * This object should have the same structure as [Duration]{@link google.protobuf.Duration} + * + * @property {Object} endTime + * *Output-only* Time offset relative to the beginning of the audio, + * and corresponding to the end of the spoken word. + * This field is only set if `enable_word_time_offsets=true` and only + * in the top hypothesis. + * This is an experimental feature and the accuracy of the time offset can + * vary. + * + * This object should have the same structure as [Duration]{@link google.protobuf.Duration} + * + * @property {string} word + * *Output-only* The word corresponding to this set of information. + * + * @typedef WordInfo + * @memberof google.cloud.speech.v1p1beta1 + * @see [google.cloud.speech.v1p1beta1.WordInfo definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1p1beta1/cloud_speech.proto} + */ +var WordInfo = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; \ No newline at end of file diff --git a/packages/google-cloud-speech/src/v1p1beta1/doc/google/protobuf/doc_any.js b/packages/google-cloud-speech/src/v1p1beta1/doc/google/protobuf/doc_any.js new file mode 100644 index 00000000000..f55fa17ff12 --- /dev/null +++ b/packages/google-cloud-speech/src/v1p1beta1/doc/google/protobuf/doc_any.js @@ -0,0 +1,131 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Note: this file is purely for documentation. Any contents are not expected +// to be loaded as the JS file. + +/** + * `Any` contains an arbitrary serialized protocol buffer message along with a + * URL that describes the type of the serialized message. + * + * Protobuf library provides support to pack/unpack Any values in the form + * of utility functions or additional generated methods of the Any type. + * + * Example 1: Pack and unpack a message in C++. + * + * Foo foo = ...; + * Any any; + * any.PackFrom(foo); + * ... + * if (any.UnpackTo(&foo)) { + * ... + * } + * + * Example 2: Pack and unpack a message in Java. + * + * Foo foo = ...; + * Any any = Any.pack(foo); + * ... + * if (any.is(Foo.class)) { + * foo = any.unpack(Foo.class); + * } + * + * Example 3: Pack and unpack a message in Python. + * + * foo = Foo(...) + * any = Any() + * any.Pack(foo) + * ... + * if any.Is(Foo.DESCRIPTOR): + * any.Unpack(foo) + * ... + * + * Example 4: Pack and unpack a message in Go + * + * foo := &pb.Foo{...} + * any, err := ptypes.MarshalAny(foo) + * ... + * foo := &pb.Foo{} + * if err := ptypes.UnmarshalAny(any, foo); err != nil { + * ... + * } + * + * The pack methods provided by protobuf library will by default use + * 'type.googleapis.com/full.type.name' as the type URL and the unpack + * methods only use the fully qualified type name after the last '/' + * in the type URL, for example "foo.bar.com/x/y.z" will yield type + * name "y.z". + * + * + * # JSON + * + * The JSON representation of an `Any` value uses the regular + * representation of the deserialized, embedded message, with an + * additional field `@type` which contains the type URL. Example: + * + * package google.profile; + * message Person { + * string first_name = 1; + * string last_name = 2; + * } + * + * { + * "@type": "type.googleapis.com/google.profile.Person", + * "firstName": , + * "lastName": + * } + * + * If the embedded message type is well-known and has a custom JSON + * representation, that representation will be embedded adding a field + * `value` which holds the custom JSON in addition to the `@type` + * field. Example (for message google.protobuf.Duration): + * + * { + * "@type": "type.googleapis.com/google.protobuf.Duration", + * "value": "1.212s" + * } + * + * @property {string} typeUrl + * A URL/resource name whose content describes the type of the + * serialized protocol buffer message. + * + * For URLs which use the scheme `http`, `https`, or no scheme, the + * following restrictions and interpretations apply: + * + * * If no scheme is provided, `https` is assumed. + * * The last segment of the URL's path must represent the fully + * qualified name of the type (as in `path/google.protobuf.Duration`). + * The name should be in a canonical form (e.g., leading "." is + * not accepted). + * * An HTTP GET on the URL must yield a google.protobuf.Type + * value in binary format, or produce an error. + * * Applications are allowed to cache lookup results based on the + * URL, or have them precompiled into a binary to avoid any + * lookup. Therefore, binary compatibility needs to be preserved + * on changes to types. (Use versioned type names to manage + * breaking changes.) + * + * Schemes other than `http`, `https` (or the empty scheme) might be + * used with implementation specific semantics. + * + * @property {string} value + * Must be a valid serialized protocol buffer of the above specified type. + * + * @typedef Any + * @memberof google.protobuf + * @see [google.protobuf.Any definition in proto format]{@link https://github.com/google/protobuf/blob/master/src/google/protobuf/any.proto} + */ +var Any = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; \ No newline at end of file diff --git a/packages/google-cloud-speech/src/v1p1beta1/doc/google/protobuf/doc_duration.js b/packages/google-cloud-speech/src/v1p1beta1/doc/google/protobuf/doc_duration.js new file mode 100644 index 00000000000..3ea5c376abb --- /dev/null +++ b/packages/google-cloud-speech/src/v1p1beta1/doc/google/protobuf/doc_duration.js @@ -0,0 +1,97 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Note: this file is purely for documentation. Any contents are not expected +// to be loaded as the JS file. + +/** + * A Duration represents a signed, fixed-length span of time represented + * as a count of seconds and fractions of seconds at nanosecond + * resolution. It is independent of any calendar and concepts like "day" + * or "month". It is related to Timestamp in that the difference between + * two Timestamp values is a Duration and it can be added or subtracted + * from a Timestamp. Range is approximately +-10,000 years. + * + * # Examples + * + * Example 1: Compute Duration from two Timestamps in pseudo code. + * + * Timestamp start = ...; + * Timestamp end = ...; + * Duration duration = ...; + * + * duration.seconds = end.seconds - start.seconds; + * duration.nanos = end.nanos - start.nanos; + * + * if (duration.seconds < 0 && duration.nanos > 0) { + * duration.seconds += 1; + * duration.nanos -= 1000000000; + * } else if (durations.seconds > 0 && duration.nanos < 0) { + * duration.seconds -= 1; + * duration.nanos += 1000000000; + * } + * + * Example 2: Compute Timestamp from Timestamp + Duration in pseudo code. + * + * Timestamp start = ...; + * Duration duration = ...; + * Timestamp end = ...; + * + * end.seconds = start.seconds + duration.seconds; + * end.nanos = start.nanos + duration.nanos; + * + * if (end.nanos < 0) { + * end.seconds -= 1; + * end.nanos += 1000000000; + * } else if (end.nanos >= 1000000000) { + * end.seconds += 1; + * end.nanos -= 1000000000; + * } + * + * Example 3: Compute Duration from datetime.timedelta in Python. + * + * td = datetime.timedelta(days=3, minutes=10) + * duration = Duration() + * duration.FromTimedelta(td) + * + * # JSON Mapping + * + * In JSON format, the Duration type is encoded as a string rather than an + * object, where the string ends in the suffix "s" (indicating seconds) and + * is preceded by the number of seconds, with nanoseconds expressed as + * fractional seconds. For example, 3 seconds with 0 nanoseconds should be + * encoded in JSON format as "3s", while 3 seconds and 1 nanosecond should + * be expressed in JSON format as "3.000000001s", and 3 seconds and 1 + * microsecond should be expressed in JSON format as "3.000001s". + * + * @property {number} seconds + * Signed seconds of the span of time. Must be from -315,576,000,000 + * to +315,576,000,000 inclusive. Note: these bounds are computed from: + * 60 sec/min * 60 min/hr * 24 hr/day * 365.25 days/year * 10000 years + * + * @property {number} nanos + * Signed fractions of a second at nanosecond resolution of the span + * of time. Durations less than one second are represented with a 0 + * `seconds` field and a positive or negative `nanos` field. For durations + * of one second or more, a non-zero value for the `nanos` field must be + * of the same sign as the `seconds` field. Must be from -999,999,999 + * to +999,999,999 inclusive. + * + * @typedef Duration + * @memberof google.protobuf + * @see [google.protobuf.Duration definition in proto format]{@link https://github.com/google/protobuf/blob/master/src/google/protobuf/duration.proto} + */ +var Duration = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; \ No newline at end of file diff --git a/packages/google-cloud-speech/src/v1p1beta1/doc/google/rpc/doc_status.js b/packages/google-cloud-speech/src/v1p1beta1/doc/google/rpc/doc_status.js new file mode 100644 index 00000000000..7122f1682e0 --- /dev/null +++ b/packages/google-cloud-speech/src/v1p1beta1/doc/google/rpc/doc_status.js @@ -0,0 +1,92 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Note: this file is purely for documentation. Any contents are not expected +// to be loaded as the JS file. + +/** + * The `Status` type defines a logical error model that is suitable for different + * programming environments, including REST APIs and RPC APIs. It is used by + * [gRPC](https://github.com/grpc). The error model is designed to be: + * + * - Simple to use and understand for most users + * - Flexible enough to meet unexpected needs + * + * # Overview + * + * The `Status` message contains three pieces of data: error code, error message, + * and error details. The error code should be an enum value of + * google.rpc.Code, but it may accept additional error codes if needed. The + * error message should be a developer-facing English message that helps + * developers *understand* and *resolve* the error. If a localized user-facing + * error message is needed, put the localized message in the error details or + * localize it in the client. The optional error details may contain arbitrary + * information about the error. There is a predefined set of error detail types + * in the package `google.rpc` that can be used for common error conditions. + * + * # Language mapping + * + * The `Status` message is the logical representation of the error model, but it + * is not necessarily the actual wire format. When the `Status` message is + * exposed in different client libraries and different wire protocols, it can be + * mapped differently. For example, it will likely be mapped to some exceptions + * in Java, but more likely mapped to some error codes in C. + * + * # Other uses + * + * The error model and the `Status` message can be used in a variety of + * environments, either with or without APIs, to provide a + * consistent developer experience across different environments. + * + * Example uses of this error model include: + * + * - Partial errors. If a service needs to return partial errors to the client, + * it may embed the `Status` in the normal response to indicate the partial + * errors. + * + * - Workflow errors. A typical workflow has multiple steps. Each step may + * have a `Status` message for error reporting. + * + * - Batch operations. If a client uses batch request and batch response, the + * `Status` message should be used directly inside batch response, one for + * each error sub-response. + * + * - Asynchronous operations. If an API call embeds asynchronous operation + * results in its response, the status of those operations should be + * represented directly using the `Status` message. + * + * - Logging. If some API errors are stored in logs, the message `Status` could + * be used directly after any stripping needed for security/privacy reasons. + * + * @property {number} code + * The status code, which should be an enum value of google.rpc.Code. + * + * @property {string} message + * A developer-facing error message, which should be in English. Any + * user-facing error message should be localized and sent in the + * google.rpc.Status.details field, or localized by the client. + * + * @property {Object[]} details + * A list of messages that carry the error details. There is a common set of + * message types for APIs to use. + * + * This object should have the same structure as [Any]{@link google.protobuf.Any} + * + * @typedef Status + * @memberof google.rpc + * @see [google.rpc.Status definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto} + */ +var Status = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; \ No newline at end of file diff --git a/packages/google-cloud-speech/src/v1p1beta1/index.js b/packages/google-cloud-speech/src/v1p1beta1/index.js new file mode 100644 index 00000000000..7204473c461 --- /dev/null +++ b/packages/google-cloud-speech/src/v1p1beta1/index.js @@ -0,0 +1,19 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +const SpeechClient = require('./speech_client'); + +module.exports.SpeechClient = SpeechClient; diff --git a/packages/google-cloud-speech/src/v1p1beta1/speech_client.js b/packages/google-cloud-speech/src/v1p1beta1/speech_client.js new file mode 100644 index 00000000000..5ce4bbb9008 --- /dev/null +++ b/packages/google-cloud-speech/src/v1p1beta1/speech_client.js @@ -0,0 +1,453 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +const gapicConfig = require('./speech_client_config'); +const gax = require('google-gax'); +const merge = require('lodash.merge'); +const path = require('path'); +const protobuf = require('protobufjs'); + +const VERSION = require('../../package.json').version; + +/** + * Service that implements Google Cloud Speech API. + * + * @class + * @memberof v1p1beta1 + */ +class SpeechClient { + /** + * Construct an instance of SpeechClient. + * + * @param {object} [options] - The configuration object. See the subsequent + * parameters for more details. + * @param {object} [options.credentials] - Credentials object. + * @param {string} [options.credentials.client_email] + * @param {string} [options.credentials.private_key] + * @param {string} [options.email] - Account email address. Required when + * usaing a .pem or .p12 keyFilename. + * @param {string} [options.keyFilename] - Full path to the a .json, .pem, or + * .p12 key downloaded from the Google Developers Console. If you provide + * a path to a JSON file, the projectId option above is not necessary. + * NOTE: .pem and .p12 require you to specify options.email as well. + * @param {number} [options.port] - The port on which to connect to + * the remote host. + * @param {string} [options.projectId] - The project ID from the Google + * Developer's Console, e.g. 'grape-spaceship-123'. We will also check + * the environment variable GCLOUD_PROJECT for your project ID. If your + * app is running in an environment which supports + * {@link https://developers.google.com/identity/protocols/application-default-credentials Application Default Credentials}, + * your project ID will be detected automatically. + * @param {function} [options.promise] - Custom promise module to use instead + * of native Promises. + * @param {string} [options.servicePath] - The domain name of the + * API remote host. + */ + constructor(opts) { + this._descriptors = {}; + + // Ensure that options include the service address and port. + opts = Object.assign( + { + clientConfig: {}, + port: this.constructor.port, + servicePath: this.constructor.servicePath, + }, + opts + ); + + // Create a `gaxGrpc` object, with any grpc-specific options + // sent to the client. + opts.scopes = this.constructor.scopes; + var gaxGrpc = gax.grpc(opts); + + // Save the auth object to the client, for use by other methods. + this.auth = gaxGrpc.auth; + + // Determine the client header string. + var clientHeader = [ + `gl-node/${process.version.node}`, + `grpc/${gaxGrpc.grpcVersion}`, + `gax/${gax.version}`, + `gapic/${VERSION}`, + ]; + if (opts.libName && opts.libVersion) { + clientHeader.push(`${opts.libName}/${opts.libVersion}`); + } + + // Load the applicable protos. + var protos = merge( + {}, + gaxGrpc.loadProto( + path.join(__dirname, '..', '..', 'protos'), + 'google/cloud/speech/v1p1beta1/cloud_speech.proto' + ) + ); + + // Some of the methods on this service provide streaming responses. + // Provide descriptors for these. + this._descriptors.stream = { + streamingRecognize: new gax.StreamDescriptor( + gax.StreamType.BIDI_STREAMING + ), + }; + var protoFilesRoot = new gax.grpc.GoogleProtoFilesRoot(); + protoFilesRoot = protobuf.loadSync( + path.join( + __dirname, + '..', + '..', + 'protos', + 'google/cloud/speech/v1p1beta1/cloud_speech.proto' + ), + protoFilesRoot + ); + + // This API contains "long-running operations", which return a + // an Operation object that allows for tracking of the operation, + // rather than holding a request open. + this.operationsClient = new gax.lro({ + auth: gaxGrpc.auth, + grpc: gaxGrpc.grpc, + }).operationsClient(opts); + + var longRunningRecognizeResponse = protoFilesRoot.lookup( + 'google.cloud.speech.v1p1beta1.LongRunningRecognizeResponse' + ); + var longRunningRecognizeMetadata = protoFilesRoot.lookup( + 'google.cloud.speech.v1p1beta1.LongRunningRecognizeMetadata' + ); + + this._descriptors.longrunning = { + longRunningRecognize: new gax.LongrunningDescriptor( + this.operationsClient, + longRunningRecognizeResponse.decode.bind(longRunningRecognizeResponse), + longRunningRecognizeMetadata.decode.bind(longRunningRecognizeMetadata) + ), + }; + + // Put together the default options sent with requests. + var defaults = gaxGrpc.constructSettings( + 'google.cloud.speech.v1p1beta1.Speech', + gapicConfig, + opts.clientConfig, + {'x-goog-api-client': clientHeader.join(' ')} + ); + + // Set up a dictionary of "inner API calls"; the core implementation + // of calling the API is handled in `google-gax`, with this code + // merely providing the destination and request information. + this._innerApiCalls = {}; + + // Put together the "service stub" for + // google.cloud.speech.v1p1beta1.Speech. + var speechStub = gaxGrpc.createStub( + protos.google.cloud.speech.v1p1beta1.Speech, + opts + ); + + // Iterate over each of the methods that the service provides + // and create an API call method for each. + var speechStubMethods = [ + 'recognize', + 'longRunningRecognize', + 'streamingRecognize', + ]; + for (let methodName of speechStubMethods) { + this._innerApiCalls[methodName] = gax.createApiCall( + speechStub.then( + stub => + function() { + var args = Array.prototype.slice.call(arguments, 0); + return stub[methodName].apply(stub, args); + } + ), + defaults[methodName], + this._descriptors.stream[methodName] || + this._descriptors.longrunning[methodName] + ); + } + } + + /** + * The DNS address for this API service. + */ + static get servicePath() { + return 'speech.googleapis.com'; + } + + /** + * The port for this API service. + */ + static get port() { + return 443; + } + + /** + * The scopes needed to make gRPC calls for every method defined + * in this service. + */ + static get scopes() { + return ['https://www.googleapis.com/auth/cloud-platform']; + } + + /** + * Return the project ID used by this class. + * @param {function(Error, string)} callback - the callback to + * be called with the current project Id. + */ + getProjectId(callback) { + return this.auth.getProjectId(callback); + } + + // ------------------- + // -- Service calls -- + // ------------------- + + /** + * Performs synchronous speech recognition: receive results after all audio + * has been sent and processed. + * + * @param {Object} request + * The request object that will be sent. + * @param {Object} request.config + * *Required* Provides information to the recognizer that specifies how to + * process the request. + * + * This object should have the same structure as [RecognitionConfig]{@link google.cloud.speech.v1p1beta1.RecognitionConfig} + * @param {Object} request.audio + * *Required* The audio data to be recognized. + * + * This object should have the same structure as [RecognitionAudio]{@link google.cloud.speech.v1p1beta1.RecognitionAudio} + * @param {Object} [options] + * Optional parameters. You can override the default settings for this call, e.g, timeout, + * retries, paginations, etc. See [gax.CallOptions]{@link https://googleapis.github.io/gax-nodejs/global.html#CallOptions} for the details. + * @param {function(?Error, ?Object)} [callback] + * The function which will be called with the result of the API call. + * + * The second parameter to the callback is an object representing [RecognizeResponse]{@link google.cloud.speech.v1p1beta1.RecognizeResponse}. + * @returns {Promise} - The promise which resolves to an array. + * The first element of the array is an object representing [RecognizeResponse]{@link google.cloud.speech.v1p1beta1.RecognizeResponse}. + * The promise has a method named "cancel" which cancels the ongoing API call. + * + * @example + * + * const speech = require('speech.v1p1beta1'); + * + * var client = new speech.v1p1beta1.SpeechClient({ + * // optional auth parameters. + * }); + * + * var encoding = 'FLAC'; + * var sampleRateHertz = 44100; + * var languageCode = 'en-US'; + * var config = { + * encoding: encoding, + * sampleRateHertz: sampleRateHertz, + * languageCode: languageCode, + * }; + * var uri = 'gs://bucket_name/file_name.flac'; + * var audio = { + * uri: uri, + * }; + * var request = { + * config: config, + * audio: audio, + * }; + * client.recognize(request) + * .then(responses => { + * var response = responses[0]; + * // doThingsWith(response) + * }) + * .catch(err => { + * console.error(err); + * }); + */ + recognize(request, options, callback) { + if (options instanceof Function && callback === undefined) { + callback = options; + options = {}; + } + options = options || {}; + + return this._innerApiCalls.recognize(request, options, callback); + } + + /** + * Performs asynchronous speech recognition: receive results via the + * google.longrunning.Operations interface. Returns either an + * `Operation.error` or an `Operation.response` which contains + * a `LongRunningRecognizeResponse` message. + * + * @param {Object} request + * The request object that will be sent. + * @param {Object} request.config + * *Required* Provides information to the recognizer that specifies how to + * process the request. + * + * This object should have the same structure as [RecognitionConfig]{@link google.cloud.speech.v1p1beta1.RecognitionConfig} + * @param {Object} request.audio + * *Required* The audio data to be recognized. + * + * This object should have the same structure as [RecognitionAudio]{@link google.cloud.speech.v1p1beta1.RecognitionAudio} + * @param {Object} [options] + * Optional parameters. You can override the default settings for this call, e.g, timeout, + * retries, paginations, etc. See [gax.CallOptions]{@link https://googleapis.github.io/gax-nodejs/global.html#CallOptions} for the details. + * @param {function(?Error, ?Object)} [callback] + * The function which will be called with the result of the API call. + * + * The second parameter to the callback is a [gax.Operation]{@link https://googleapis.github.io/gax-nodejs/Operation} object. + * @returns {Promise} - The promise which resolves to an array. + * The first element of the array is a [gax.Operation]{@link https://googleapis.github.io/gax-nodejs/Operation} object. + * The promise has a method named "cancel" which cancels the ongoing API call. + * + * @example + * + * const speech = require('speech.v1p1beta1'); + * + * var client = new speech.v1p1beta1.SpeechClient({ + * // optional auth parameters. + * }); + * + * var encoding = 'FLAC'; + * var sampleRateHertz = 44100; + * var languageCode = 'en-US'; + * var config = { + * encoding: encoding, + * sampleRateHertz: sampleRateHertz, + * languageCode: languageCode, + * }; + * var uri = 'gs://bucket_name/file_name.flac'; + * var audio = { + * uri: uri, + * }; + * var request = { + * config: config, + * audio: audio, + * }; + * + * // Handle the operation using the promise pattern. + * client.longRunningRecognize(request) + * .then(responses => { + * var operation = responses[0]; + * var initialApiResponse = responses[1]; + * + * // Operation#promise starts polling for the completion of the LRO. + * return operation.promise(); + * }) + * .then(responses => { + * // The final result of the operation. + * var result = responses[0]; + * + * // The metadata value of the completed operation. + * var metadata = responses[1]; + * + * // The response of the api call returning the complete operation. + * var finalApiResponse = responses[2]; + * }) + * .catch(err => { + * console.error(err); + * }); + * + * var encoding = 'FLAC'; + * var sampleRateHertz = 44100; + * var languageCode = 'en-US'; + * var config = { + * encoding: encoding, + * sampleRateHertz: sampleRateHertz, + * languageCode: languageCode, + * }; + * var uri = 'gs://bucket_name/file_name.flac'; + * var audio = { + * uri: uri, + * }; + * var request = { + * config: config, + * audio: audio, + * }; + * + * // Handle the operation using the event emitter pattern. + * client.longRunningRecognize(request) + * .then(responses => { + * var operation = responses[0]; + * var initialApiResponse = responses[1]; + * + * // Adding a listener for the "complete" event starts polling for the + * // completion of the operation. + * operation.on('complete', (result, metadata, finalApiResponse) => { + * // doSomethingWith(result); + * }); + * + * // Adding a listener for the "progress" event causes the callback to be + * // called on any change in metadata when the operation is polled. + * operation.on('progress', (metadata, apiResponse) => { + * // doSomethingWith(metadata) + * }); + * + * // Adding a listener for the "error" event handles any errors found during polling. + * operation.on('error', err => { + * // throw(err); + * }); + * }) + * .catch(err => { + * console.error(err); + * }); + */ + longRunningRecognize(request, options, callback) { + if (options instanceof Function && callback === undefined) { + callback = options; + options = {}; + } + options = options || {}; + + return this._innerApiCalls.longRunningRecognize(request, options, callback); + } + + /** + * Performs bidirectional streaming speech recognition: receive results while + * sending audio. This method is only available via the gRPC API (not REST). + * + * @param {Object} [options] + * Optional parameters. You can override the default settings for this call, e.g, timeout, + * retries, paginations, etc. See [gax.CallOptions]{@link https://googleapis.github.io/gax-nodejs/global.html#CallOptions} for the details. + * @returns {Stream} + * An object stream which is both readable and writable. It accepts objects + * representing [StreamingRecognizeRequest]{@link google.cloud.speech.v1p1beta1.StreamingRecognizeRequest} for write() method, and + * will emit objects representing [StreamingRecognizeResponse]{@link google.cloud.speech.v1p1beta1.StreamingRecognizeResponse} on 'data' event asynchronously. + * + * @example + * + * const speech = require('speech.v1p1beta1'); + * + * var client = new speech.v1p1beta1.SpeechClient({ + * // optional auth parameters. + * }); + * + * var stream = client.streamingRecognize().on('data', response => { + * // doThingsWith(response) + * }); + * var request = {}; + * // Write request objects. + * stream.write(request); + */ + streamingRecognize(options) { + options = options || {}; + + return this._innerApiCalls.streamingRecognize(options); + } +} + +module.exports = SpeechClient; diff --git a/packages/google-cloud-speech/src/v1p1beta1/speech_client_config.json b/packages/google-cloud-speech/src/v1p1beta1/speech_client_config.json new file mode 100644 index 00000000000..244513dc672 --- /dev/null +++ b/packages/google-cloud-speech/src/v1p1beta1/speech_client_config.json @@ -0,0 +1,41 @@ +{ + "interfaces": { + "google.cloud.speech.v1p1beta1.Speech": { + "retry_codes": { + "idempotent": [ + "DEADLINE_EXCEEDED", + "UNAVAILABLE" + ], + "non_idempotent": [] + }, + "retry_params": { + "default": { + "initial_retry_delay_millis": 100, + "retry_delay_multiplier": 1.3, + "max_retry_delay_millis": 60000, + "initial_rpc_timeout_millis": 1000000, + "rpc_timeout_multiplier": 1.0, + "max_rpc_timeout_millis": 1000000, + "total_timeout_millis": 5000000 + } + }, + "methods": { + "Recognize": { + "timeout_millis": 1000000, + "retry_codes_name": "idempotent", + "retry_params_name": "default" + }, + "LongRunningRecognize": { + "timeout_millis": 60000, + "retry_codes_name": "non_idempotent", + "retry_params_name": "default" + }, + "StreamingRecognize": { + "timeout_millis": 1000000, + "retry_codes_name": "idempotent", + "retry_params_name": "default" + } + } + } + } +} diff --git a/packages/google-cloud-speech/system-test/speech_smoke_test_v1p1beta1.js b/packages/google-cloud-speech/system-test/speech_smoke_test_v1p1beta1.js new file mode 100644 index 00000000000..f40b51a3160 --- /dev/null +++ b/packages/google-cloud-speech/system-test/speech_smoke_test_v1p1beta1.js @@ -0,0 +1,50 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +describe('SpeechSmokeTest', () => { + it('successfully makes a call to the service', done => { + const speech = require('../src'); + + var client = new speech.v1p1beta1.SpeechClient({ + // optional auth parameters. + }); + + var languageCode = 'en-US'; + var sampleRateHertz = 44100; + var encoding = 'FLAC'; + var config = { + languageCode: languageCode, + sampleRateHertz: sampleRateHertz, + encoding: encoding, + }; + var uri = 'gs://gapic-toolkit/hello.flac'; + var audio = { + uri: uri, + }; + var request = { + config: config, + audio: audio, + }; + client + .recognize(request) + .then(responses => { + var response = responses[0]; + console.log(response); + }) + .then(done) + .catch(done); + }); +}); diff --git a/packages/google-cloud-speech/test/gapic-v1p1beta1.js b/packages/google-cloud-speech/test/gapic-v1p1beta1.js new file mode 100644 index 00000000000..73c2ea23fc3 --- /dev/null +++ b/packages/google-cloud-speech/test/gapic-v1p1beta1.js @@ -0,0 +1,249 @@ +// Copyright 2018 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +const assert = require('assert'); + +const speechModule = require('../src'); + +var FAKE_STATUS_CODE = 1; +var error = new Error(); +error.code = FAKE_STATUS_CODE; + +describe('SpeechClient', () => { + describe('recognize', () => { + it('invokes recognize without error', done => { + var client = new speechModule.v1p1beta1.SpeechClient({ + credentials: {client_email: 'bogus', private_key: 'bogus'}, + projectId: 'bogus', + }); + + // Mock request + var encoding = 'FLAC'; + var sampleRateHertz = 44100; + var languageCode = 'en-US'; + var config = { + encoding: encoding, + sampleRateHertz: sampleRateHertz, + languageCode: languageCode, + }; + var uri = 'gs://bucket_name/file_name.flac'; + var audio = { + uri: uri, + }; + var request = { + config: config, + audio: audio, + }; + + // Mock response + var expectedResponse = {}; + + // Mock Grpc layer + client._innerApiCalls.recognize = mockSimpleGrpcMethod( + request, + expectedResponse + ); + + client.recognize(request, (err, response) => { + assert.ifError(err); + assert.deepStrictEqual(response, expectedResponse); + done(); + }); + }); + + it('invokes recognize with error', done => { + var client = new speechModule.v1p1beta1.SpeechClient({ + credentials: {client_email: 'bogus', private_key: 'bogus'}, + projectId: 'bogus', + }); + + // Mock request + var encoding = 'FLAC'; + var sampleRateHertz = 44100; + var languageCode = 'en-US'; + var config = { + encoding: encoding, + sampleRateHertz: sampleRateHertz, + languageCode: languageCode, + }; + var uri = 'gs://bucket_name/file_name.flac'; + var audio = { + uri: uri, + }; + var request = { + config: config, + audio: audio, + }; + + // Mock Grpc layer + client._innerApiCalls.recognize = mockSimpleGrpcMethod( + request, + null, + error + ); + + client.recognize(request, (err, response) => { + assert(err instanceof Error); + assert.equal(err.code, FAKE_STATUS_CODE); + assert(typeof response === 'undefined'); + done(); + }); + }); + }); + + describe('longRunningRecognize', function() { + it('invokes longRunningRecognize without error', done => { + var client = new speechModule.v1p1beta1.SpeechClient({ + credentials: {client_email: 'bogus', private_key: 'bogus'}, + projectId: 'bogus', + }); + + // Mock request + var encoding = 'FLAC'; + var sampleRateHertz = 44100; + var languageCode = 'en-US'; + var config = { + encoding: encoding, + sampleRateHertz: sampleRateHertz, + languageCode: languageCode, + }; + var uri = 'gs://bucket_name/file_name.flac'; + var audio = { + uri: uri, + }; + var request = { + config: config, + audio: audio, + }; + + // Mock response + var expectedResponse = {}; + + // Mock Grpc layer + client._innerApiCalls.longRunningRecognize = mockLongRunningGrpcMethod( + request, + expectedResponse + ); + + client + .longRunningRecognize(request) + .then(responses => { + var operation = responses[0]; + return operation.promise(); + }) + .then(responses => { + assert.deepStrictEqual(responses[0], expectedResponse); + done(); + }) + .catch(err => { + done(err); + }); + }); + + it('invokes longRunningRecognize with error', done => { + var client = new speechModule.v1p1beta1.SpeechClient({ + credentials: {client_email: 'bogus', private_key: 'bogus'}, + projectId: 'bogus', + }); + + // Mock request + var encoding = 'FLAC'; + var sampleRateHertz = 44100; + var languageCode = 'en-US'; + var config = { + encoding: encoding, + sampleRateHertz: sampleRateHertz, + languageCode: languageCode, + }; + var uri = 'gs://bucket_name/file_name.flac'; + var audio = { + uri: uri, + }; + var request = { + config: config, + audio: audio, + }; + + // Mock Grpc layer + client._innerApiCalls.longRunningRecognize = mockLongRunningGrpcMethod( + request, + null, + error + ); + + client + .longRunningRecognize(request) + .then(responses => { + var operation = responses[0]; + return operation.promise(); + }) + .then(() => { + assert.fail(); + }) + .catch(err => { + assert(err instanceof Error); + assert.equal(err.code, FAKE_STATUS_CODE); + done(); + }); + }); + + it('has longrunning decoder functions', () => { + var client = new speechModule.v1p1beta1.SpeechClient({ + credentials: {client_email: 'bogus', private_key: 'bogus'}, + projectId: 'bogus', + }); + assert( + client._descriptors.longrunning.longRunningRecognize + .responseDecoder instanceof Function + ); + assert( + client._descriptors.longrunning.longRunningRecognize + .metadataDecoder instanceof Function + ); + }); + }); +}); + +function mockSimpleGrpcMethod(expectedRequest, response, error) { + return function(actualRequest, options, callback) { + assert.deepStrictEqual(actualRequest, expectedRequest); + if (error) { + callback(error); + } else if (response) { + callback(null, response); + } else { + callback(null); + } + }; +} + +function mockLongRunningGrpcMethod(expectedRequest, response, error) { + return request => { + assert.deepStrictEqual(request, expectedRequest); + var mockOperation = { + promise: function() { + return new Promise((resolve, reject) => { + if (error) { + reject(error); + } else { + resolve([response]); + } + }); + }, + }; + return Promise.resolve([mockOperation]); + }; +}