From 48380c60c3398a8d64fb3b727f656086482b3905 Mon Sep 17 00:00:00 2001 From: Brad Miro Date: Mon, 11 Nov 2019 18:53:50 -0500 Subject: [PATCH] feat(samples): bump auto-punctuation sample to v1 and move diarization to v1 (#463) * bumped diarization and autocapitalization samples to v1 * Remove extra commited file * fixed linting error --- speech/recognize.js | 72 +++++++++++++++++++++++++++- speech/system-test/recognize.test.js | 5 ++ 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/speech/recognize.js b/speech/recognize.js index ac3cfa5b6c4..d0274ad5001 100644 --- a/speech/recognize.js +++ b/speech/recognize.js @@ -544,12 +544,12 @@ async function syncRecognizeWithAutoPunctuation( languageCode ) { // [START speech_transcribe_auto_punctuation] - // Imports the Google Cloud client library for Beta API + // Imports the Google Cloud client library for API /** * TODO(developer): Update client library import to use new * version of API when desired features become available */ - const speech = require('@google-cloud/speech').v1p1beta1; + const speech = require('@google-cloud/speech'); const fs = require('fs'); // Creates a client @@ -712,6 +712,62 @@ async function syncRecognizeWithMultiChannelGCS(gcsUri) { // [END speech_transcribe_multichannel_gcs] } +async function speechTranscribeDiarization(fileName) { + // [START speech_transcribe_diarization] + const fs = require('fs'); + + // Imports the Google Cloud client library + const speech = require('@google-cloud/speech'); + + // Creates a client + const client = new speech.SpeechClient(); + + // Set config for Diarization + const diarizationConfig = { + enableSpeakerDiarization: true, + maxSpeakerCount: 2, + }; + + const config = { + encoding: `LINEAR16`, + sampleRateHertz: 8000, + languageCode: `en-US`, + diarizationConfig: diarizationConfig, + model: `phone_call`, + }; + + /** + * TODO(developer): Uncomment the following lines before running the sample. + */ + // const fileName = 'Local path to audio file, e.g. /path/to/audio.raw'; + + const audio = { + content: fs.readFileSync(fileName).toString('base64'), + }; + + const request = { + config: config, + audio: audio, + }; + + const [response] = await client.recognize(request); + const transcription = response.results + .map(result => result.alternatives[0].transcript) + .join('\n'); + console.log(`Transcription: ${transcription}`); + console.log(`Speaker Diarization:`); + const result = response.results[response.results.length - 1]; + const wordsInfo = result.alternatives[0].words; + // Note: The transcript within each result is separate and sequential per result. + // However, the words list within an alternative includes all the words + // from all the results thus far. Thus, to get all the words with speaker + // tags, you only have to take the words list from the last result: + wordsInfo.forEach(a => + console.log(` word: ${a.word}, speakerTag: ${a.speakerTag}`) + ); + // [END speech_transcribe_diarization] +} + require(`yargs`) // eslint-disable-line .demand(1) .command( @@ -883,6 +939,12 @@ require(`yargs`) // eslint-disable-line opts.languageCode ) ) + .command( + `Diarization`, + `Isolate distinct speakers in an audio file`, + {}, + opts => speechTranscribeDiarization(opts.speechFile) + ) .options({ encoding: { alias: 'e', @@ -905,6 +967,12 @@ require(`yargs`) // eslint-disable-line requiresArg: true, type: 'string', }, + speechFile: { + alias: 'f', + global: true, + requiresArg: false, + type: 'string', + }, }) .example(`node $0 sync ./resources/audio.raw -e LINEAR16 -r 16000`) .example(`node $0 async-gcs gs://gcs-test-data/vr.flac -e FLAC -r 16000`) diff --git a/speech/system-test/recognize.test.js b/speech/system-test/recognize.test.js index 59a696ce350..3a96c344db3 100644 --- a/speech/system-test/recognize.test.js +++ b/speech/system-test/recognize.test.js @@ -132,4 +132,9 @@ describe('Recognize', () => { ); assert.match(output, /Channel Tag: 2/); }); + + it('should run speech diarization on a local file', async () => { + const output = execSync(`${cmd} Diarization -f ${filepath2}`); + assert.match(output, /speakerTag:/); + }); });