Unify inference chains (#1399)

xenova · web-flow · commit e0b3004aee43 · 2025-09-12T09:53:55.000-04:00
Used for tensor op registry and model inference
diff --git a/src/backends/onnx.js b/src/backends/onnx.js
@@ -160,6 +160,32 @@ export async function createInferenceSession(buffer_or_path, session_options, se
     return session;
 }
 
+
+/**
+ * Currently, Transformers.js doesn't support simultaneous execution of sessions in WASM/WebGPU.
+ * For this reason, we need to chain the inference calls (otherwise we get "Error: Session already started").
+ * @type {Promise<any>}
+ */
+let webInferenceChain = Promise.resolve();
+
+const IS_WEB_ENV = apis.IS_BROWSER_ENV || apis.IS_WEBWORKER_ENV;
+
+/**
+ * Run an inference session.
+ * @param {import('onnxruntime-common').InferenceSession} session The ONNX inference session.
+ * @param {Record<string, import('onnxruntime-common').Tensor>} ortFeed The input tensors.
+ * @returns {Promise<Record<string, import('onnxruntime-common').Tensor>>} The output tensors.
+ */
+export async function runInferenceSession(session, ortFeed) {
+    const run = () => session.run(ortFeed);
+    const output = await (IS_WEB_ENV
+        ? (webInferenceChain = webInferenceChain.then(run))
+        : run()
+    );
+    return output;
+}
+
+
 /**
  * Check if an object is an ONNX tensor.
  * @param {any} x The object to check
diff --git a/src/models.js b/src/models.js
@@ -48,6 +48,7 @@ import {
     createInferenceSession,
     isONNXTensor,
     isONNXProxy,
+    runInferenceSession,
 } from './backends/onnx.js';
 import {
     DATA_TYPES,
@@ -419,10 +420,6 @@ function validateInputs(session, inputs) {
     return checkedInputs;
 }
 
-// Currently, Transformers.js doesn't support simultaneous execution of sessions in WASM/WebGPU.
-// For this reason, we need to chain the inference calls (otherwise we get "Error: Session already started").
-let webInferenceChain = Promise.resolve();
-
 /**
  * Executes an InferenceSession using the specified inputs.
  * NOTE: `inputs` must contain at least the input names of the model.
@@ -439,10 +436,7 @@ async function sessionRun(session, inputs) {
     try {
         // pass the original ort tensor
         const ortFeed = Object.fromEntries(Object.entries(checkedInputs).map(([k, v]) => [k, v.ort_tensor]));
-        const run = () => session.run(ortFeed);
-        const output = await ((apis.IS_BROWSER_ENV || apis.IS_WEBWORKER_ENV)
-            ? (webInferenceChain = webInferenceChain.then(run))
-            : run());
+        const output = await runInferenceSession(session, ortFeed);
         return replaceTensors(output);
     } catch (e) {
         // Error messages can be long (nested) and uninformative. For this reason,
diff --git a/src/ops/registry.js b/src/ops/registry.js
@@ -1,8 +1,6 @@
-import { createInferenceSession, isONNXProxy } from "../backends/onnx.js";
+import { createInferenceSession, runInferenceSession, isONNXProxy } from "../backends/onnx.js";
 import { Tensor } from "../utils/tensor.js";
-import { apis } from "../env.js";
 
-const IS_WEB_ENV = apis.IS_BROWSER_ENV || apis.IS_WEBWORKER_ENV;
 /**
  * Asynchronously creates a wrapper function for running an ONNX inference session.
  *
@@ -19,16 +17,10 @@ const wrap = async (session_bytes, session_options, names) => {
         new Uint8Array(session_bytes), session_options,
     );
 
-    /** @type {Promise<any>} */
-    let chain = Promise.resolve();
-
     return /** @type {any} */(async (/** @type {Record<string, Tensor>} */ inputs) => {
         const proxied = isONNXProxy();
         const ortFeed = Object.fromEntries(Object.entries(inputs).map(([k, v]) => [k, (proxied ? v.clone() : v).ort_tensor]));
-
-        // When running in-browser via WASM, we need to chain calls to session.run to avoid "Error: Session already started"
-        const outputs = await (chain = IS_WEB_ENV ? chain.then(() => session.run(ortFeed)) : session.run(ortFeed));
-
+        const outputs = await runInferenceSession(session, ortFeed);
         if (Array.isArray(names)) {
             return names.map((n) => new Tensor(outputs[n]));
         } else {