Merge pull request #2957 from mozilla/nodejs-stream-wrapper

Return Stream wrapper in JS Model.createStream, add test coverage (Fixes #2956)
mozilla · Apr 29, 2020 · 65b7c41 · 65b7c41
2 parents 09b756a + 6f83e05
commit 65b7c41
Show file tree

Hide file tree

Showing 6 changed files with 66 additions and 32 deletions.
diff --git a/native_client/javascript/client.ts b/native_client/javascript/client.ts
@@ -30,6 +30,7 @@ parser.addArgument(['--scorer'], {help: 'Path to the external scorer file'});
 parser.addArgument(['--audio'], {required: true, help: 'Path to the audio file to run (WAV format)'});
 parser.addArgument(['--version'], {action: VersionAction, nargs: 0, help: 'Print version and exits'});
 parser.addArgument(['--extended'], {action: 'storeTrue', help: 'Output string from extended metadata'});
+parser.addArgument(['--stream'], {action: 'storeTrue', help: 'Use streaming code path (for tests)'});
 let args = parser.parseArgs();
 
 function totalTime(hrtimeValue: number[]): string {
@@ -86,8 +87,7 @@ function bufferToStream(buffer: Buffer) {
   return stream;
 }
 
-let audioStream = new MemoryStream();
-bufferToStream(buffer).
+let conversionStream = bufferToStream(buffer).
   pipe(Sox({
     global: {
       'no-dither': true,
@@ -102,27 +102,38 @@ bufferToStream(buffer).
       compression: 0.0,
       type: 'raw'
     }
-  })).
-  pipe(audioStream);
-
-audioStream.on('finish', () => {
-  let audioBuffer = audioStream.toBuffer();
-
-  const inference_start = process.hrtime();
-  console.error('Running inference.');
-  const audioLength = (audioBuffer.length / 2) * (1 / desired_sample_rate);
-
-  // sphinx-doc: js_ref_inference_start
-  if (args['extended']) {
-    let metadata = model.sttWithMetadata(audioBuffer, 1);
-    console.log(candidateTranscriptToString(metadata.transcripts[0]));
-    Ds.FreeMetadata(metadata);
-  } else {
-    console.log(model.stt(audioBuffer));
-  }
-  // sphinx-doc: js_ref_inference_stop
-  const inference_stop = process.hrtime(inference_start);
-  console.error('Inference took %ds for %ds audio file.', totalTime(inference_stop), audioLength.toPrecision(4));
-  Ds.FreeModel(model);
-  process.exit(0);
-});
+  }));
+
+if (!args['stream']) {
+  let audioStream = new MemoryStream();
+  conversionStream.pipe(audioStream);
+  audioStream.on('finish', () => {
+    let audioBuffer = audioStream.toBuffer();
+
+    const inference_start = process.hrtime();
+    console.error('Running inference.');
+    const audioLength = (audioBuffer.length / 2) * (1 / desired_sample_rate);
+
+    // sphinx-doc: js_ref_inference_start
+    if (args['extended']) {
+      let metadata = model.sttWithMetadata(audioBuffer, 1);
+      console.log(candidateTranscriptToString(metadata.transcripts[0]));
+      Ds.FreeMetadata(metadata);
+    } else {
+      console.log(model.stt(audioBuffer));
+    }
+    // sphinx-doc: js_ref_inference_stop
+    const inference_stop = process.hrtime(inference_start);
+    console.error('Inference took %ds for %ds audio file.', totalTime(inference_stop), audioLength.toPrecision(4));
+    Ds.FreeModel(model);
+    process.exit(0);
+  });
+} else {
+  let stream  = model.createStream();
+  conversionStream.on('data', (chunk: Buffer) => {
+    stream.feedAudioContent(chunk);
+  });
+  conversionStream.on('end', () => {
+    console.log(stream.finishStream());
+  });
+}
diff --git a/native_client/javascript/index.d.ts b/native_client/javascript/index.d.ts
@@ -112,7 +112,7 @@ sttWithMetadata(aBuffer: object, aNumResults: number): Metadata;
  *
  * @throws on error
  */
-createStream(): object;
+createStream(): Stream;
 }
 
 /**
@@ -127,14 +127,14 @@ declare class Stream {
  * @param aBuffer An array of 16-bit, mono raw audio samples at the
  *                 appropriate sample rate (matching what the model was trained on).
  */
-feedAudioContent(aBuffer: object): void;
+feedAudioContent(aBuffer: Buffer): void;
 
 /**
  * Compute the intermediate decoding of an ongoing streaming inference.
  *
  * @return The STT intermediate result.
  */
-intermediateDecode(aSctx: object): string;
+intermediateDecode(aSctx: Stream): string;
 
 /**
  * Compute the intermediate decoding of an ongoing streaming inference, return results including metadata.
@@ -188,7 +188,7 @@ export function FreeMetadata(metadata: Metadata): void;
  *
  * @param stream A streaming state pointer returned by :js:func:`Model.createStream`.
  */
-export function FreeStream(stream: object): void;
+export function FreeStream(stream: Stream): void;
 
 /**
  * Print version of this library and of the linked TensorFlow library on standard output.

diff --git a/native_client/javascript/index.js b/native_client/javascript/index.js
@@ -141,7 +141,7 @@ Model.prototype.createStream = function() {
     if (status !== 0) {
         throw "CreateStream failed "+binding.ErrorCodeToErrorMessage(status)+" 0x" + status.toString(16);
     }
-    return ctx;
+    return new Stream(ctx);
 }
 
 /**
@@ -192,7 +192,7 @@ Stream.prototype.intermediateDecodeWithMetadata = function(aNumResults) {
  * This method will free the stream, it must not be used after this method is called.
  */
 Stream.prototype.finishStream = function() {
-    result = binding.FinishStream(this._impl);
+    let result = binding.FinishStream(this._impl);
     this._impl = null;
     return result;
 }

diff --git a/taskcluster/tc-asserts.sh b/taskcluster/tc-asserts.sh
@@ -511,3 +511,22 @@ run_cpp_only_inference_tests()
   set -e
   assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm_intermediate_decode}" "$status"
 }
+
+run_js_streaming_inference_tests()
+{
+  set +e
+  phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --stream 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1)
+  status=$?
+  set -e
+  assert_correct_ldc93s1_lm "${phrase_pbmodel_withlm}" "$status"
+}
+
+run_js_streaming_prod_inference_tests()
+{
+  local _bitrate=$1
+  set +e
+  phrase_pbmodel_withlm=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer --audio ${TASKCLUSTER_TMP_DIR}/${ldc93s1_sample_filename} --stream 2>${TASKCLUSTER_TMP_DIR}/stderr | tail -n 1)
+  status=$?
+  set -e
+  assert_correct_ldc93s1_prodmodel "${phrase_pbmodel_withlm}" "$status" "${_bitrate}"
+}
diff --git a/taskcluster/tc-node-tests-prod.sh b/taskcluster/tc-node-tests-prod.sh
@@ -36,3 +36,5 @@ npm install --prefix ${NODE_ROOT} --cache ${NODE_CACHE} ${deepspeech_npm_url}
 check_runtime_nodejs
 
 run_prod_inference_tests "${bitrate}"
+
+run_js_streaming_prod_inference_tests "${bitrate}"
diff --git a/taskcluster/tc-node-tests.sh b/taskcluster/tc-node-tests.sh
@@ -32,3 +32,5 @@ check_runtime_nodejs
 ensure_cuda_usage "$3"
 
 run_all_inference_tests
+
+run_js_streaming_inference_tests