Skip to content

Commit

Permalink
samples: Updated Infinite streaming sample (#1422)
Browse files Browse the repository at this point in the history
* adding result end time and color-coded result output

* updated infinited streaming sample to use result_end_time

* lint fixes

* more comments and formatting updates

* Fix speech test.

* More test adjustments.
  • Loading branch information
Kristin Grace Galvin authored and chingor13 committed Aug 15, 2020
1 parent 7fc2cdb commit dd52b3a
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,17 @@
import com.google.api.gax.rpc.ClientStream;
import com.google.api.gax.rpc.ResponseObserver;
import com.google.api.gax.rpc.StreamController;
import com.google.cloud.speech.v1.RecognitionConfig;
import com.google.cloud.speech.v1.SpeechClient;
import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1.StreamingRecognitionConfig;
import com.google.cloud.speech.v1.StreamingRecognitionResult;
import com.google.cloud.speech.v1.StreamingRecognizeRequest;
import com.google.cloud.speech.v1.StreamingRecognizeResponse;
import com.google.cloud.speech.v1p1beta1.RecognitionConfig;
import com.google.cloud.speech.v1p1beta1.SpeechClient;
import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1p1beta1.StreamingRecognitionConfig;
import com.google.cloud.speech.v1p1beta1.StreamingRecognitionResult;
import com.google.cloud.speech.v1p1beta1.StreamingRecognizeRequest;
import com.google.cloud.speech.v1p1beta1.StreamingRecognizeResponse;
import com.google.protobuf.ByteString;
import com.google.protobuf.Duration;
import java.lang.Math;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
Expand All @@ -39,11 +42,29 @@

public class InfiniteStreamRecognize {

private static final int STREAMING_LIMIT = 10000; // 10 seconds

public static final String RED = "\033[0;31m";
public static final String GREEN = "\033[0;32m";
public static final String YELLOW = "\033[0;33m";

// Creating shared object
private static volatile BlockingQueue<byte[]> sharedQueue = new LinkedBlockingQueue();
private static TargetDataLine targetDataLine;
private static int BYTES_PER_BUFFER = 6400; // buffer size in bytes

private static int restartCounter = 0;
private static ArrayList<ByteString> audioInput = new ArrayList<ByteString>();
private static ArrayList<ByteString> lastAudioInput = new ArrayList<ByteString>();
private static int resultEndTimeInMS = 0;
private static int isFinalEndTime = 0;
private static int finalRequestEndTime = 0;
private static boolean newStream = true;
private static double bridgingOffset = 0;
private static boolean lastTranscriptWasFinal = false;
private static StreamController referenceToStreamController;
private static ByteString tempByteString;

public static void main(String... args) {
try {
infiniteStreamingRecognize();
Expand All @@ -60,6 +81,7 @@ class MicBuffer implements Runnable {

@Override
public void run() {
System.out.println(YELLOW);
System.out.println("Start speaking...Press Ctrl-C to stop");
targetDataLine.start();
byte[] data = new byte[BYTES_PER_BUFFER];
Expand Down Expand Up @@ -88,24 +110,48 @@ public void run() {

ArrayList<StreamingRecognizeResponse> responses = new ArrayList<>();

public void onStart(StreamController controller) {}
public void onStart(StreamController controller) {
referenceToStreamController = controller;
}

public void onResponse(StreamingRecognizeResponse response) {

responses.add(response);

StreamingRecognitionResult result = response.getResultsList().get(0);
// There can be several alternative transcripts for a given chunk of speech. Just
// use the first (most likely) one here.

Duration resultEndTime = result.getResultEndTime();

resultEndTimeInMS = (int) ((resultEndTime.getSeconds() * 1000)
+ (resultEndTime.getNanos() / 1000000));

double correctedTime = resultEndTimeInMS - bridgingOffset
+ (STREAMING_LIMIT * restartCounter);
DecimalFormat format = new DecimalFormat("0.#");

SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}
if (result.getIsFinal()) {
System.out.print(GREEN);
System.out.print("\033[2K\r");
System.out.printf("%s: %s\n", format.format(correctedTime),
alternative.getTranscript());

public void onComplete() {
System.out.println("Done");
}
isFinalEndTime = resultEndTimeInMS;
lastTranscriptWasFinal = true;
} else {
System.out.print(RED);
System.out.print("\033[2K\r");
System.out.printf("%s: %s", format.format(correctedTime),
alternative.getTranscript());

public void onError(Throwable t) {
System.out.println(t);
lastTranscriptWasFinal = false;
}
}

public void onComplete() {}

public void onError(Throwable t) {}

};

clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
Expand All @@ -116,8 +162,12 @@ public void onError(Throwable t) {
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build();

StreamingRecognitionConfig streamingRecognitionConfig =
StreamingRecognitionConfig.newBuilder().setConfig(recognitionConfig).build();
StreamingRecognitionConfig.newBuilder()
.setConfig(recognitionConfig)
.setInterimResults(true)
.build();

StreamingRecognizeRequest request =
StreamingRecognizeRequest.newBuilder()
Expand Down Expand Up @@ -151,23 +201,84 @@ public void onError(Throwable t) {

long estimatedTime = System.currentTimeMillis() - startTime;

if (estimatedTime >= 55000) {
if (estimatedTime >= STREAMING_LIMIT) {

clientStream.closeSend();
referenceToStreamController.cancel(); // remove Observer

if (resultEndTimeInMS > 0) {
finalRequestEndTime = isFinalEndTime;
}
resultEndTimeInMS = 0;

lastAudioInput = null;
lastAudioInput = audioInput;
audioInput = new ArrayList<ByteString>();

restartCounter++;

if (!lastTranscriptWasFinal) {
System.out.print('\n');
}

newStream = true;

clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);

request =
StreamingRecognizeRequest.newBuilder()
.setStreamingConfig(streamingRecognitionConfig)
.build();

System.out.println(YELLOW);
System.out.printf("%d: RESTARTING REQUEST\n", restartCounter * STREAMING_LIMIT);

startTime = System.currentTimeMillis();

} else {

if ((newStream) && (lastAudioInput.size() > 0)) {
// if this is the first audio from a new request
// calculate amount of unfinalized audio from last request
// resend the audio to the speech client before incoming audio
double chunkTime = STREAMING_LIMIT / lastAudioInput.size();
// ms length of each chunk in previous request audio arrayList
if (chunkTime != 0) {
if (bridgingOffset < 0) {
// bridging Offset accounts for time of resent audio
// calculated from last request
bridgingOffset = 0;
}
if (bridgingOffset > finalRequestEndTime) {
bridgingOffset = finalRequestEndTime;
}
int chunksFromMS = (int) Math.floor((finalRequestEndTime
- bridgingOffset) / chunkTime);
// chunks from MS is number of chunks to resend
bridgingOffset = (int) Math.floor((lastAudioInput.size()
- chunksFromMS) * chunkTime);
// set bridging offset for next request
for (int i = chunksFromMS; i < lastAudioInput.size(); i++) {

request =
StreamingRecognizeRequest.newBuilder()
.setAudioContent(lastAudioInput.get(i))
.build();
clientStream.send(request);
}
}
newStream = false;
}

tempByteString = ByteString.copyFrom(sharedQueue.take());

request =
StreamingRecognizeRequest.newBuilder()
.setAudioContent(ByteString.copyFrom(sharedQueue.take()))
.setAudioContent(tempByteString)
.build();

audioInput.add(tempByteString);

}

clientStream.send(request);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,6 @@ public void onError(Throwable t) {
System.out.println("Stop speaking.");
targetDataLine.stop();
targetDataLine.close();
break;
}
request =
StreamingRecognizeRequest.newBuilder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,22 +113,22 @@ public void testStreamRecognize() throws Exception {
@Test
public void testAutoPunctuation() throws Exception {
Recognize.transcribeFileWithAutomaticPunctuation(audioFileName);
String got = bout.toString();
assertThat(got).contains("How old is the Brooklyn Bridge?");
String got = bout.toString().toLowerCase();
assertThat(got).contains("how old is the brooklyn bridge");
}

@Test
public void testGcsAutoPunctuation() throws Exception {
Recognize.transcribeGcsWithAutomaticPunctuation(gcsAudioPath);
String got = bout.toString();
assertThat(got).contains("How old is the Brooklyn Bridge?");
String got = bout.toString().toLowerCase();
assertThat(got).contains("how old is the brooklyn bridge");
}

@Test
public void testStreamAutoPunctuation() throws Exception {
Recognize.streamingTranscribeWithAutomaticPunctuation(audioFileName);
String got = bout.toString();
assertThat(got).contains("How old is the Brooklyn Bridge?");
String got = bout.toString().toLowerCase();
assertThat(got).contains("how old is the brooklyn bridge");
}

@Test
Expand Down

0 comments on commit dd52b3a

Please sign in to comment.