speechmatics · mnemitz · Dec 10, 2024 · Dec 10, 2024 · Dec 10, 2024 · Dec 10, 2024
diff --git a/biome.json b/biome.json
@@ -6,7 +6,12 @@
   "formatter": {
     "indentStyle": "space",
     "indentWidth": 2,
-    "ignore": ["./packages/*/dist/**", "*/**/.next/**", "*/**/node_modules/*"]
+    "ignore": [
+      "./packages/*/dist/**",
+      "*/**/.next/**",
+      "*/**/node_modules/*",
+      "./examples/nextjs/public/*"
+    ]
   },
   "javascript": {
     "formatter": {
@@ -18,6 +23,11 @@
     "rules": {
       "recommended": true
     },
-    "ignore": ["./packages/*/dist/**", "*/**/.next/**", "*/**/node_modules/*"]
+    "ignore": [
+      "./packages/*/dist/**",
+      "*/**/.next/**",
+      "*/**/node_modules/*",
+      "./examples/nextjs/public/*"
+    ]
   }
 }
diff --git a/examples/nextjs/.gitignore b/examples/nextjs/.gitignore
@@ -0,0 +1 @@
+public/js/pcm-audio-worklet.min.js
diff --git a/examples/nextjs/next.config.ts b/examples/nextjs/next.config.ts
@@ -1,7 +1,28 @@
+import path from 'node:path';
+import CopyWebpackPlugin from 'copy-webpack-plugin';
 import type { NextConfig } from 'next';
 
 const nextConfig: NextConfig = {
-  /* config options here */
+  webpack: (config, { isServer }) => {
+    // Use CopyWebpackPlugin to copy the file to the public directory
+    if (!isServer) {
+      config.plugins.push(
+        new CopyWebpackPlugin({
+          patterns: [
+            {
+              from: path.resolve(
+                __dirname,
+                'node_modules/@speechmatics/browser-audio-input/dist/pcm-audio-worklet.min.js',
+              ),
+              to: path.resolve(__dirname, 'public/js/[name][ext]'),
+            },
+          ],
+        }),
+      );
+    }
+
+    return config;
+  },
 };
 
 export default nextConfig;
diff --git a/examples/nextjs/package.json b/examples/nextjs/package.json
@@ -10,10 +10,11 @@
     "lint": "next lint"
   },
   "dependencies": {
-    "@speechmatics/flow-client-react": "workspace:*",
-    "@speechmatics/browser-audio-input-react": "workspace:*",
-    "@speechmatics/auth": "workspace:*",
     "@picocss/pico": "^2.0.6",
+    "@speechmatics/auth": "workspace:*",
+    "@speechmatics/browser-audio-input": "workspace:*",
+    "@speechmatics/browser-audio-input-react": "workspace:*",
+    "@speechmatics/flow-client-react": "workspace:*",
     "next": "15.0.1",
     "react": "19.0.0-rc-69d4b800-20241021",
     "react-dom": "19.0.0-rc-69d4b800-20241021",
@@ -23,6 +24,7 @@
     "@types/node": "^20",
     "@types/react": "^18",
     "@types/react-dom": "^18",
+    "copy-webpack-plugin": "^12.0.2",
     "typescript": "^5"
   }
 }
diff --git a/examples/nextjs/src/app/actions.ts b/examples/nextjs/src/app/actions.ts
@@ -2,11 +2,11 @@
 
 import { createSpeechmaticsJWT } from '@speechmatics/auth';
 
-export async function getJWT() {
+export async function getJWT(type: 'flow' | 'rt') {
   const apiKey = process.env.API_KEY;
   if (!apiKey) {
     throw new Error('Please set the API_KEY environment variable');
   }
 
-  return createSpeechmaticsJWT({ type: 'flow', apiKey, ttl: 60 });
+  return createSpeechmaticsJWT({ type, apiKey, ttl: 60 });
 }
diff --git a/examples/nextjs/src/app/flow/Component.tsx b/examples/nextjs/src/app/flow/Component.tsx
@@ -1,18 +1,19 @@
 'use client';
 
-import { useCallback, useState } from 'react';
+import { use, useCallback, useState } from 'react';
 
-import {
-  usePcmMicrophoneAudio,
-  usePlayPcm16Audio,
-} from '../../lib/audio-hooks';
+import { usePlayPcm16Audio } from '../../lib/audio-hooks';
 import { ErrorBoundary } from 'react-error-boundary';
 import { Controls } from './Controls';
 import { Status } from './Status';
 import { ErrorFallback } from '../../lib/components/ErrorFallback';
 import { OutputView } from './OutputView';
 import { useFlow, useFlowEventListener } from '@speechmatics/flow-client-react';
 import { getJWT } from '../actions';
+import {
+  usePcmAudioListener,
+  usePcmAudioRecorder,
+} from '@speechmatics/browser-audio-input-react';
 
 export default function Component({
   personas,
@@ -31,13 +32,12 @@ export default function Component({
 
   const [loading, setLoading] = useState(false);
 
-  const [mediaStream, setMediaStream] = useState<MediaStream>();
+  const { startRecording, stopRecording, mediaStream, isRecording } =
+    usePcmAudioRecorder();
 
-  const { startRecording, stopRecording, isRecording } = usePcmMicrophoneAudio(
-    (audio) => {
-      sendAudio(audio);
-    },
-  );
+  usePcmAudioListener((audio) => {
+    sendAudio(audio);
+  });
 
   const startSession = useCallback(
     async ({
@@ -47,7 +47,7 @@ export default function Component({
       try {
         setLoading(true);
 
-        const jwt = await getJWT();
+        const jwt = await getJWT('flow');
 
         const audioContext = new AudioContext({ sampleRate: SAMPLE_RATE });
         setAudioContext(audioContext);
@@ -64,8 +64,7 @@ export default function Component({
           },
         });
 
-        const mediaStream = await startRecording(audioContext, deviceId);
-        setMediaStream(mediaStream);
+        await startRecording({ audioContext, deviceId });
       } finally {
         setLoading(false);
       }

diff --git a/examples/nextjs/src/app/flow/page.tsx b/examples/nextjs/src/app/flow/page.tsx
@@ -1,12 +1,15 @@
 import { fetchPersonas, FlowProvider } from '@speechmatics/flow-client-react';
 import Component from './Component';
+import { PcmAudioRecorderProvider } from '@speechmatics/browser-audio-input-react';
 
 export default async function Home() {
   const personas = await fetchPersonas();
 
   return (
-    <FlowProvider appId="nextjs-example">
-      <Component personas={personas} />
-    </FlowProvider>
+    <PcmAudioRecorderProvider workletScriptURL="/js/pcm-audio-worklet.min.js">
+      <FlowProvider appId="nextjs-example">
+        <Component personas={personas} />
+      </FlowProvider>
+    </PcmAudioRecorderProvider>
   );
 }
diff --git a/examples/nextjs/src/lib/audio-hooks.ts b/examples/nextjs/src/lib/audio-hooks.ts
@@ -1,66 +1,4 @@
-import { useRef, useState, useCallback, useEffect } from 'react';
-
-/**
- *
- * Hook for getting PCM (f32) microphone audio in the browser.
- *
- * The Web Audio APIs tend to use f32 over int16, when capturing/playing audio.
- * The Flow service accepts both, so we use f32 here to avoid converting.
- */
-export function usePcmMicrophoneAudio(onAudio: (audio: Float32Array) => void) {
-  const [isRecording, setIsRecording] = useState(false);
-  const mediaStreamRef = useRef<MediaStream>();
-
-  const startRecording = useCallback(
-    async (audioContext: AudioContext, deviceId?: string) => {
-      // If stream is present, it means we're already recording, nothing to do
-      if (mediaStreamRef.current) {
-        return mediaStreamRef.current;
-      }
-
-      const mediaStream = await navigator.mediaDevices.getUserMedia({
-        audio: {
-          deviceId,
-          sampleRate: audioContext?.sampleRate,
-          sampleSize: 16,
-          channelCount: 1,
-          echoCancellation: true,
-          noiseSuppression: true,
-          autoGainControl: true,
-        },
-      });
-
-      setIsRecording(true);
-
-      // TODO see if we can do this without script processor
-      const input = audioContext.createMediaStreamSource(mediaStream);
-      const processor = audioContext.createScriptProcessor(512, 1, 1);
-
-      input.connect(processor);
-      processor.connect(audioContext.destination);
-
-      processor.onaudioprocess = (event) => {
-        const inputBuffer = event.inputBuffer.getChannelData(0);
-        onAudio(inputBuffer);
-      };
-
-      mediaStreamRef.current = mediaStream;
-      return mediaStream;
-    },
-    [onAudio],
-  );
-
-  const stopRecording = useCallback(() => {
-    for (const track of mediaStreamRef.current?.getTracks() ?? []) {
-      track.stop();
-    }
-    mediaStreamRef.current = undefined;
-
-    setIsRecording(false);
-  }, []);
-
-  return { startRecording, stopRecording, isRecording };
-}
+import { useRef, useCallback, useEffect } from 'react';
 
 export function usePlayPcm16Audio(audioContext: AudioContext | undefined) {
   const playbackStartTime = useRef(0);

diff --git a/examples/nextjs/src/lib/constants.ts b/examples/nextjs/src/lib/constants.ts
@@ -0,0 +1,3 @@
+// We recommend using a sample rate of 16_000 Hz for real-time transcription.
+// Anything higher will be downsampled by the server. Lower sample rates are also supported.
+export const RECORDING_SAMPLE_RATE = 16_000;
diff --git a/packages/browser-audio-input-react/README.md b/packages/browser-audio-input-react/README.md
@@ -10,6 +10,8 @@ npm i @speechmatics/browser-audio-input-react
 
 ## Usage
 
+### Microphone selection
+
 Below is an example of a Microphone selection component.
 
 ```TSX
@@ -68,4 +70,116 @@ function MicrophoneSelect({
   }
 }
 
-```
+```
+
+### PCM recording
+
+This package exposes a context provider that can be used like so:
+
+```TSX
+import { PcmAudioRecorderProvider } from '@speechmatics/browser-audio-input-react';
+
+function App() {
+  return (
+    <PcmAudioRecorderProvider workletScriptURL="/path/to/pcm-audio-worklet.min.js">
+      <Component>
+    </PcmAudioRecorderProvider>
+  );
+}
+
+// Now all child components can use the provided hooks
+
+function Component() {
+  const { startRecording, stopRecording, mediaStream, isRecording } =
+    usePcmAudioRecorder();
+
+  usePcmAudioListener((audio) => {
+    // Handle Float32Array of audio however you like
+  });
+}
+
+```
+
+### Note about `AudioWorklet` script URL
+
+When recording audio in the browser, there are generally three approaches:
+
+- ❌ [`createScriptProcessor()`](https://developer.mozilla.org/en-US/docs/Web/API/BaseAudioContext/createScriptProcessor): Can capture PCM data on the main thread, but is deprecated and suffers from poor performance easily.
+- ❌ [`MediaRecorder`](https://developer.mozilla.org/en-US/docs/Web/API/MediaRecorder): Provides a simple API, but cannot capture PCM data (only MPEG/OGG)
+- ✅ [`AudioWorklet`](https://developer.mozilla.org/en-US/docs/Web/API/AudioWorklet): Captures/processes PCM on dedicated thread.
+
+This library leverages `AudioWorklet` to capture PCM audio (specifically 32-bit Float PCM, which is the underlying representation in the browser).
+
+Since `AudioWorklets` run outside the main thread, their code must be run from an external source (i.e. a URL).
+
+### Getting the AudioWorklet script
+
+First make sure the base package (the one this package wraps) is installed:
+
+```
+npm i @speechmatics/browser-audio-input
+```
+
+The code for this PCM audio processor is provided by that library at `/dist/pcm-audio-worklet.min.js`. However, **how this script is loaded depends on your bundler setup**.
+
+### Webpack
+
+At the moment, Webpack doesn't have a great story for `AudioWorklet` scripts (see [Github issue](https://github.com/webpack/webpack/issues/11543)). Instead, we recommend using the `copy-webpack-plugin` to copy our `pcm-audio-worklet.min.js` directly into your `/public` folder:
+
+```javascript
+const CopyPlugin = require("copy-webpack-plugin");
+
+module.exports = {
+  // ... rest of your Webpack config
+  plugins: [
+    new CopyWebpackPlugin({
+      patterns: [
+        {
+          from: path.resolve(
+            __dirname,
+            'node_modules/@speechmatics/browser-audio-input/dist/pcm-audio-worklet.min.js',
+          ),
+          to: path.resolve(__dirname, 'public/js/[name][ext]'),
+        },
+      ],
+    }),
+  ]
+};
+
+```
+
+See [Webpack documentation](https://webpack.js.org/plugins/copy-webpack-plugin) for more details.
+
+Then use `/js/pcm-audio-worklet.min.js` (or whatever other path you define) as the path to the script:
+
+```TSX
+// WEBPACK EXAMPLE
+import { PcmAudioRecorderProvider } from '@speechmatics/browser-audio-input-react';
+
+function App() {
+  return (
+    <PcmAudioRecorderProvider workletScriptURL="/js/pcm-audio-worklet.min.js">
+      <Component>
+    </PcmAudioRecorderProvider>
+  );
+}
+```
+
+### Vite
+
+Vite supports referencing bundled code by URL for use in Workers. This can be used like so:
+
+
+```TSX
+// VITE EXAMPLE
+import { PcmAudioRecorderProvider } from '@speechmatics/browser-audio-input-react';
+import workletScriptURL from '@speechmatics/browser-audio-input/pcm-audio-worklet.min.js?url';
+
+function App() {
+  return (
+    <PcmAudioRecorderProvider workletScriptURL={workletScriptURL}>
+      <Component>
+    </PcmAudioRecorderProvider>
+  );
+}
+```
diff --git a/packages/browser-audio-input-react/package.json b/packages/browser-audio-input-react/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@speechmatics/browser-audio-input-react",
-  "version": "0.0.1",
+  "version": "0.1.0",
   "description": "React hooks for managing audio inputs and permissions across browsers",
   "exports": ["./dist/index.js"],
   "module": "./dist/index.js",

diff --git a/packages/browser-audio-input-react/rollup.config.mjs b/packages/browser-audio-input-react/rollup.config.mjs
@@ -18,6 +18,7 @@ export default function rollup() {
           format: 'es',
           sourcemap: true,
           strict: false,
+          banner: '"use client";',
         },
       ],
     },