Merge pull request #778 from GoogleCloudPlatform/speech-wordtimeoffset

Adds support for word time offset
GoogleCloudPlatform · Jul 31, 2017 · 4bc55e1 · 4bc55e1
2 parents f9e8abf + d4d0c25
commit 4bc55e1
Show file tree

Hide file tree

Showing 4 changed files with 32 additions and 27 deletions.
diff --git a/speech/cloud-client/pom.xml b/speech/cloud-client/pom.xml
@@ -33,25 +33,12 @@
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
   </properties>
 
-  <!-- FIXME(lesv) - temp to fix an issue w/ GA Datastore -->
-<!--
-  <dependencyManagement>
-    <dependencies>
-      <dependency>
-          <groupId>io.grpc</groupId>
-          <artifactId>grpc-core</artifactId>
-          <version>1.2.0</version>
-      </dependency>
-    </dependencies>
-  </dependencyManagement>
- -->
-
   <dependencies>
     <!-- [START dependencies] -->
     <dependency>
       <groupId>com.google.cloud</groupId>
-      <artifactId>google-cloud</artifactId>
-      <version>0.21.0-alpha</version>
+      <artifactId>google-cloud-speech</artifactId>
+      <version>0.21.1-alpha</version>
     </dependency>
     <!-- [END dependencies] -->
 

diff --git a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java
@@ -16,9 +16,9 @@
 
 package com.example.speech;
 
-import com.google.api.gax.grpc.ApiStreamObserver;
-import com.google.api.gax.grpc.OperationFuture;
-import com.google.api.gax.grpc.StreamingCallable;
+import com.google.api.gax.rpc.ApiStreamObserver;
+import com.google.api.gax.rpc.OperationFuture;
+import com.google.api.gax.rpc.StreamingCallable;
 import com.google.cloud.speech.v1.LongRunningRecognizeMetadata;
 import com.google.cloud.speech.v1.LongRunningRecognizeResponse;
 import com.google.cloud.speech.v1.RecognitionAudio;
@@ -32,7 +32,9 @@
 import com.google.cloud.speech.v1.StreamingRecognitionResult;
 import com.google.cloud.speech.v1.StreamingRecognizeRequest;
 import com.google.cloud.speech.v1.StreamingRecognizeResponse;
+import com.google.cloud.speech.v1.WordInfo;
 import com.google.common.util.concurrent.SettableFuture;
+import com.google.longrunning.Operation;
 import com.google.protobuf.ByteString;
 
 import java.io.IOException;
@@ -169,8 +171,10 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep
         .build();
 
     // Use non-blocking call for getting file transcription
-    OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response =
+    OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata,
+            Operation> response =
         speech.longRunningRecognizeAsync(config, audio);
+
     while (!response.isDone()) {
       System.out.println("Waiting for response...");
       Thread.sleep(10000);
@@ -202,13 +206,15 @@ public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOExceptio
         .setEncoding(AudioEncoding.FLAC)
         .setLanguageCode("en-US")
         .setSampleRateHertz(16000)
+        .setEnableWordTimeOffsets(true)
         .build();
     RecognitionAudio audio = RecognitionAudio.newBuilder()
         .setUri(gcsUri)
         .build();
 
     // Use non-blocking call for getting file transcription
-    OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response =
+    OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata,
+            Operation> response =
         speech.longRunningRecognizeAsync(config, audio);
     while (!response.isDone()) {
       System.out.println("Waiting for response...");
@@ -220,7 +226,12 @@ public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOExceptio
     for (SpeechRecognitionResult result: results) {
       List<SpeechRecognitionAlternative> alternatives = result.getAlternativesList();
       for (SpeechRecognitionAlternative alternative: alternatives) {
-        System.out.printf("Transcription: %s%n", alternative.getTranscript());
+        System.out.printf("Transcription: %s\n",alternative.getTranscript());
+        for (WordInfo wordInfo: alternative.getWordsList()) {
+          System.out.println(wordInfo.getWord());
+          System.out.printf("\t%s ns - %s ns\n",
+              wordInfo.getStartTime().getNanos(), wordInfo.getEndTime().getNanos());
+        }
       }
     }
     speech.close();

diff --git a/speech/cloud-client/src/test/java/com/example/speech/QuickstartSampleIT.java b/speech/cloud-client/src/test/java/com/example/speech/QuickstartSampleIT.java
@@ -18,15 +18,15 @@
 
 import static com.google.common.truth.Truth.assertThat;
 
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-import java.io.ByteArrayOutputStream;
-import java.io.PrintStream;
-
 /**
  * Tests for quickstart sample.
  */

diff --git a/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java b/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java
@@ -18,15 +18,15 @@
 
 import static com.google.common.truth.Truth.assertThat;
 
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-import java.io.ByteArrayOutputStream;
-import java.io.PrintStream;
-
 /**
  * Tests for speech recognize sample.
  */
@@ -83,6 +83,13 @@ public void testAsyncRecognizeGcs() throws Exception {
     assertThat(got).contains("how old is the Brooklyn Bridge");
   }
 
+  @Test
+  public void testAsyncWordoffset() throws Exception {
+    Recognize.asyncRecognizeGcs(gcsPath);
+    String got = bout.toString();
+    assertThat(got).contains("\t0.0 sec -");
+  }
+
   @Test
   public void testStreamRecognize() throws Exception {
     Recognize.streamingRecognizeFile(fileName);