Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add PCM audio capture to browser audio input library #95

Merged
merged 5 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions biome.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@
"formatter": {
"indentStyle": "space",
"indentWidth": 2,
"ignore": ["./packages/*/dist/**", "*/**/.next/**", "*/**/node_modules/*"]
"ignore": [
"./packages/*/dist/**",
"*/**/.next/**",
"*/**/node_modules/*",
"./examples/nextjs/public/*"
]
},
"javascript": {
"formatter": {
Expand All @@ -18,6 +23,11 @@
"rules": {
"recommended": true
},
"ignore": ["./packages/*/dist/**", "*/**/.next/**", "*/**/node_modules/*"]
"ignore": [
"./packages/*/dist/**",
"*/**/.next/**",
"*/**/node_modules/*",
"./examples/nextjs/public/*"
]
}
}
1 change: 1 addition & 0 deletions examples/nextjs/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
public/js/pcm-audio-worklet.min.js
23 changes: 22 additions & 1 deletion examples/nextjs/next.config.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,28 @@
import path from 'node:path';
import CopyWebpackPlugin from 'copy-webpack-plugin';
import type { NextConfig } from 'next';

const nextConfig: NextConfig = {
/* config options here */
webpack: (config, { isServer }) => {
// Use CopyWebpackPlugin to copy the file to the public directory
if (!isServer) {
config.plugins.push(
new CopyWebpackPlugin({
patterns: [
{
from: path.resolve(
__dirname,
'node_modules/@speechmatics/browser-audio-input/dist/pcm-audio-worklet.min.js',
),
to: path.resolve(__dirname, 'public/js/[name][ext]'),
},
],
}),
);
}

return config;
},
};

export default nextConfig;
8 changes: 5 additions & 3 deletions examples/nextjs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
"lint": "next lint"
},
"dependencies": {
"@speechmatics/flow-client-react": "workspace:*",
"@speechmatics/browser-audio-input-react": "workspace:*",
"@speechmatics/auth": "workspace:*",
"@picocss/pico": "^2.0.6",
"@speechmatics/auth": "workspace:*",
"@speechmatics/browser-audio-input": "workspace:*",
"@speechmatics/browser-audio-input-react": "workspace:*",
"@speechmatics/flow-client-react": "workspace:*",
"next": "15.0.1",
"react": "19.0.0-rc-69d4b800-20241021",
"react-dom": "19.0.0-rc-69d4b800-20241021",
Expand All @@ -23,6 +24,7 @@
"@types/node": "^20",
"@types/react": "^18",
"@types/react-dom": "^18",
"copy-webpack-plugin": "^12.0.2",
"typescript": "^5"
}
}
4 changes: 2 additions & 2 deletions examples/nextjs/src/app/actions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

import { createSpeechmaticsJWT } from '@speechmatics/auth';

export async function getJWT() {
export async function getJWT(type: 'flow' | 'rt') {
const apiKey = process.env.API_KEY;
if (!apiKey) {
throw new Error('Please set the API_KEY environment variable');
}

return createSpeechmaticsJWT({ type: 'flow', apiKey, ttl: 60 });
return createSpeechmaticsJWT({ type, apiKey, ttl: 60 });
}
27 changes: 13 additions & 14 deletions examples/nextjs/src/app/flow/Component.tsx
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
'use client';

import { useCallback, useState } from 'react';
import { use, useCallback, useState } from 'react';

import {
usePcmMicrophoneAudio,
usePlayPcm16Audio,
} from '../../lib/audio-hooks';
import { usePlayPcm16Audio } from '../../lib/audio-hooks';
import { ErrorBoundary } from 'react-error-boundary';
import { Controls } from './Controls';
import { Status } from './Status';
import { ErrorFallback } from '../../lib/components/ErrorFallback';
import { OutputView } from './OutputView';
import { useFlow, useFlowEventListener } from '@speechmatics/flow-client-react';
import { getJWT } from '../actions';
import {
usePcmAudioListener,
usePcmAudioRecorder,
} from '@speechmatics/browser-audio-input-react';

export default function Component({
personas,
Expand All @@ -31,13 +32,12 @@ export default function Component({

const [loading, setLoading] = useState(false);

const [mediaStream, setMediaStream] = useState<MediaStream>();
const { startRecording, stopRecording, mediaStream, isRecording } =
usePcmAudioRecorder();

const { startRecording, stopRecording, isRecording } = usePcmMicrophoneAudio(
(audio) => {
sendAudio(audio);
},
);
usePcmAudioListener((audio) => {
sendAudio(audio);
});

const startSession = useCallback(
async ({
Expand All @@ -47,7 +47,7 @@ export default function Component({
try {
setLoading(true);

const jwt = await getJWT();
const jwt = await getJWT('flow');

const audioContext = new AudioContext({ sampleRate: SAMPLE_RATE });
setAudioContext(audioContext);
Expand All @@ -64,8 +64,7 @@ export default function Component({
},
});

const mediaStream = await startRecording(audioContext, deviceId);
setMediaStream(mediaStream);
await startRecording({ audioContext, deviceId });
} finally {
setLoading(false);
}
Expand Down
9 changes: 6 additions & 3 deletions examples/nextjs/src/app/flow/page.tsx
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import { fetchPersonas, FlowProvider } from '@speechmatics/flow-client-react';
import Component from './Component';
import { PcmAudioRecorderProvider } from '@speechmatics/browser-audio-input-react';

export default async function Home() {
const personas = await fetchPersonas();

return (
<FlowProvider appId="nextjs-example">
<Component personas={personas} />
</FlowProvider>
<PcmAudioRecorderProvider workletScriptURL="/js/pcm-audio-worklet.min.js">
<FlowProvider appId="nextjs-example">
<Component personas={personas} />
</FlowProvider>
</PcmAudioRecorderProvider>
);
}
64 changes: 1 addition & 63 deletions examples/nextjs/src/lib/audio-hooks.ts
Original file line number Diff line number Diff line change
@@ -1,66 +1,4 @@
import { useRef, useState, useCallback, useEffect } from 'react';

/**
*
* Hook for getting PCM (f32) microphone audio in the browser.
*
* The Web Audio APIs tend to use f32 over int16, when capturing/playing audio.
* The Flow service accepts both, so we use f32 here to avoid converting.
*/
export function usePcmMicrophoneAudio(onAudio: (audio: Float32Array) => void) {
const [isRecording, setIsRecording] = useState(false);
const mediaStreamRef = useRef<MediaStream>();

const startRecording = useCallback(
async (audioContext: AudioContext, deviceId?: string) => {
// If stream is present, it means we're already recording, nothing to do
if (mediaStreamRef.current) {
return mediaStreamRef.current;
}

const mediaStream = await navigator.mediaDevices.getUserMedia({
audio: {
deviceId,
sampleRate: audioContext?.sampleRate,
sampleSize: 16,
channelCount: 1,
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true,
},
});

setIsRecording(true);

// TODO see if we can do this without script processor
const input = audioContext.createMediaStreamSource(mediaStream);
const processor = audioContext.createScriptProcessor(512, 1, 1);

input.connect(processor);
processor.connect(audioContext.destination);

processor.onaudioprocess = (event) => {
const inputBuffer = event.inputBuffer.getChannelData(0);
onAudio(inputBuffer);
};

mediaStreamRef.current = mediaStream;
return mediaStream;
},
[onAudio],
);

const stopRecording = useCallback(() => {
for (const track of mediaStreamRef.current?.getTracks() ?? []) {
track.stop();
}
mediaStreamRef.current = undefined;

setIsRecording(false);
}, []);

return { startRecording, stopRecording, isRecording };
}
import { useRef, useCallback, useEffect } from 'react';

export function usePlayPcm16Audio(audioContext: AudioContext | undefined) {
const playbackStartTime = useRef(0);
Expand Down
3 changes: 3 additions & 0 deletions examples/nextjs/src/lib/constants.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// We recommend using a sample rate of 16_000 Hz for real-time transcription.
// Anything higher will be downsampled by the server. Lower sample rates are also supported.
export const RECORDING_SAMPLE_RATE = 16_000;
116 changes: 115 additions & 1 deletion packages/browser-audio-input-react/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ npm i @speechmatics/browser-audio-input-react

## Usage

### Microphone selection

Below is an example of a Microphone selection component.

```TSX
Expand Down Expand Up @@ -68,4 +70,116 @@ function MicrophoneSelect({
}
}

```
```

### PCM recording

This package exposes a context provider that can be used like so:

```TSX
import { PcmAudioRecorderProvider } from '@speechmatics/browser-audio-input-react';

function App() {
return (
<PcmAudioRecorderProvider workletScriptURL="/path/to/pcm-audio-worklet.min.js">
<Component>
</PcmAudioRecorderProvider>
);
}

// Now all child components can use the provided hooks

function Component() {
const { startRecording, stopRecording, mediaStream, isRecording } =
usePcmAudioRecorder();

usePcmAudioListener((audio) => {
// Handle Float32Array of audio however you like
});
}

```

### Note about `AudioWorklet` script URL

When recording audio in the browser, there are generally three approaches:

- ❌ [`createScriptProcessor()`](https://developer.mozilla.org/en-US/docs/Web/API/BaseAudioContext/createScriptProcessor): Can capture PCM data on the main thread, but is deprecated and suffers from poor performance easily.
- ❌ [`MediaRecorder`](https://developer.mozilla.org/en-US/docs/Web/API/MediaRecorder): Provides a simple API, but cannot capture PCM data (only MPEG/OGG)
- ✅ [`AudioWorklet`](https://developer.mozilla.org/en-US/docs/Web/API/AudioWorklet): Captures/processes PCM on dedicated thread.

This library leverages `AudioWorklet` to capture PCM audio (specifically 32-bit Float PCM, which is the underlying representation in the browser).

Since `AudioWorklets` run outside the main thread, their code must be run from an external source (i.e. a URL).

### Getting the AudioWorklet script

First make sure the base package (the one this package wraps) is installed:

```
npm i @speechmatics/browser-audio-input
```

The code for this PCM audio processor is provided by that library at `/dist/pcm-audio-worklet.min.js`. However, **how this script is loaded depends on your bundler setup**.

### Webpack

At the moment, Webpack doesn't have a great story for `AudioWorklet` scripts (see [Github issue](https://github.com/webpack/webpack/issues/11543)). Instead, we recommend using the `copy-webpack-plugin` to copy our `pcm-audio-worklet.min.js` directly into your `/public` folder:

```javascript
const CopyPlugin = require("copy-webpack-plugin");

module.exports = {
// ... rest of your Webpack config
plugins: [
new CopyWebpackPlugin({
patterns: [
{
from: path.resolve(
__dirname,
'node_modules/@speechmatics/browser-audio-input/dist/pcm-audio-worklet.min.js',
),
to: path.resolve(__dirname, 'public/js/[name][ext]'),
},
],
}),
]
};

```

See [Webpack documentation](https://webpack.js.org/plugins/copy-webpack-plugin) for more details.

Then use `/js/pcm-audio-worklet.min.js` (or whatever other path you define) as the path to the script:

```TSX
// WEBPACK EXAMPLE
import { PcmAudioRecorderProvider } from '@speechmatics/browser-audio-input-react';

function App() {
return (
<PcmAudioRecorderProvider workletScriptURL="/js/pcm-audio-worklet.min.js">
<Component>
</PcmAudioRecorderProvider>
);
}
```

### Vite

Vite supports referencing bundled code by URL for use in Workers. This can be used like so:


```TSX
// VITE EXAMPLE
import { PcmAudioRecorderProvider } from '@speechmatics/browser-audio-input-react';
import workletScriptURL from '@speechmatics/browser-audio-input/pcm-audio-worklet.min.js?url';

function App() {
return (
<PcmAudioRecorderProvider workletScriptURL={workletScriptURL}>
<Component>
</PcmAudioRecorderProvider>
);
}
```
2 changes: 1 addition & 1 deletion packages/browser-audio-input-react/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@speechmatics/browser-audio-input-react",
"version": "0.0.1",
"version": "0.1.0",
"description": "React hooks for managing audio inputs and permissions across browsers",
"exports": ["./dist/index.js"],
"module": "./dist/index.js",
Expand Down
1 change: 1 addition & 0 deletions packages/browser-audio-input-react/rollup.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export default function rollup() {
format: 'es',
sourcemap: true,
strict: false,
banner: '"use client";',
},
],
},
Expand Down
Loading
Loading