Skip to content

Commit

Permalink
samples: Updates to highlight word time offsets (#787)
Browse files Browse the repository at this point in the history
  • Loading branch information
gguuss authored and chingor13 committed Aug 15, 2020
1 parent 23b3862 commit 30bb89c
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 4 deletions.
98 changes: 95 additions & 3 deletions speech/snippets/src/main/java/com/example/speech/Recognize.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public static void main(String... args) throws Exception {
System.out.printf(
"\tjava %s \"<command>\" \"<path-to-image>\"\n"
+ "Commands:\n"
+ "\tsyncrecognize | asyncrecognize | streamrecognize\n"
+ "\tsyncrecognize | asyncrecognize | streamrecognize | wordoffsets\n"
+ "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
+ "for a Cloud Storage resource (gs://...)\n",
Recognize.class.getCanonicalName());
Expand All @@ -66,6 +66,12 @@ public static void main(String... args) throws Exception {
} else {
syncRecognizeFile(path);
}
} else if (command.equals("wordoffsets")) {
if (path.startsWith("gs://")) {
asyncRecognizeWords(path);
} else {
syncRecognizeWords(path);
}
} else if (command.equals("asyncrecognize")) {
if (path.startsWith("gs://")) {
asyncRecognizeGcs(path);
Expand Down Expand Up @@ -113,6 +119,51 @@ public static void syncRecognizeFile(String fileName) throws Exception, IOExcept
speech.close();
}

/**
* Performs sync recognize and prints word time offsets.
*
* @param fileName the path to a PCM audio file to transcribe get offsets on.
*/
public static void syncRecognizeWords(String fileName) throws Exception, IOException {
SpeechClient speech = SpeechClient.create();

Path path = Paths.get(fileName);
byte[] data = Files.readAllBytes(path);
ByteString audioBytes = ByteString.copyFrom(data);

// Configure request with local raw PCM audio
RecognitionConfig config = RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.LINEAR16)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.setEnableWordTimeOffsets(true)
.build();
RecognitionAudio audio = RecognitionAudio.newBuilder()
.setContent(audioBytes)
.build();

// Use blocking call to get audio transcript
RecognizeResponse response = speech.recognize(config, audio);
List<SpeechRecognitionResult> results = response.getResultsList();

for (SpeechRecognitionResult result: results) {
List<SpeechRecognitionAlternative> alternatives = result.getAlternativesList();
for (SpeechRecognitionAlternative alternative: alternatives) {
System.out.printf("Transcription: %s%n", alternative.getTranscript());
for (WordInfo wordInfo: alternative.getWordsList()) {
System.out.println(wordInfo.getWord());
System.out.printf("\t%s.%s sec - %s.%s sec\n",
wordInfo.getStartTime().getSeconds(),
wordInfo.getStartTime().getNanos() / 100000000,
wordInfo.getEndTime().getSeconds(),
wordInfo.getEndTime().getNanos() / 100000000);
}
}
}
speech.close();
}


/**
* Performs speech recognition on remote FLAC file and prints the transcription.
*
Expand Down Expand Up @@ -193,11 +244,11 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep

/**
* Performs non-blocking speech recognition on remote FLAC file and prints
* the transcription.
* the transcription as well as word time offsets.
*
* @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
*/
public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOException {
public static void asyncRecognizeWords(String gcsUri) throws Exception, IOException {
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
SpeechClient speech = SpeechClient.create();

Expand Down Expand Up @@ -240,6 +291,47 @@ public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOExceptio
speech.close();
}

/**
* Performs non-blocking speech recognition on remote FLAC file and prints
* the transcription.
*
* @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
*/
public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOException {
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
SpeechClient speech = SpeechClient.create();

// Configure remote file request for Linear16
RecognitionConfig config = RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.FLAC)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build();
RecognitionAudio audio = RecognitionAudio.newBuilder()
.setUri(gcsUri)
.build();

// Use non-blocking call for getting file transcription
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata,
Operation> response =
speech.longRunningRecognizeAsync(config, audio);
while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(10000);
}

List<SpeechRecognitionResult> results = response.get().getResultsList();

for (SpeechRecognitionResult result: results) {
List<SpeechRecognitionAlternative> alternatives = result.getAlternativesList();
for (SpeechRecognitionAlternative alternative: alternatives) {
System.out.printf("Transcription: %s\n",alternative.getTranscript());
}
}
speech.close();
}


/**
* Performs streaming speech recognition on raw PCM audio data.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,14 @@ public void testRecognizeFile() throws Exception {
assertThat(got).contains("how old is the Brooklyn Bridge");
}

@Test
public void testRecognizeWordoffset() throws Exception {
Recognize.syncRecognizeWords(fileName);
String got = bout.toString();
assertThat(got).contains("how old is the Brooklyn Bridge");
assertThat(got).contains("\t0.0 sec -");
}

@Test
public void testRecognizeGcs() throws Exception {
Recognize.syncRecognizeGcs(gcsPath);
Expand All @@ -85,8 +93,9 @@ public void testAsyncRecognizeGcs() throws Exception {

@Test
public void testAsyncWordoffset() throws Exception {
Recognize.asyncRecognizeGcs(gcsPath);
Recognize.asyncRecognizeWords(gcsPath);
String got = bout.toString();
assertThat(got).contains("how old is the Brooklyn Bridge");
assertThat(got).contains("\t0.0 sec -");
}

Expand Down

0 comments on commit 30bb89c

Please sign in to comment.