Skip to content

Commit

Permalink
Add packages for browser input audio device selection (#77)
Browse files Browse the repository at this point in the history
Add two new packages based on the microphone selection flow implemented
in the portal

- `@speechmatics/browser-audio-input`:
  - Contains just the store/singleton class managing audio devices.
- Also provides a minified build in case someone wants to use this in an
inline script tag
- `@speechmatics/browser-audio-input-react`:
  - React bindings for the above package
  - Constrain the types of the main exported hook
- Updated the NextJS example to use the react package

**Note**: Both these packages export only ESM (apart from the raw client
which also provides a minified build). My thinking was there are
practically 0 CommonJS use cases for browser microphone selection, so I
omitted it from the build.
  • Loading branch information
mnemitz authored Oct 31, 2024
1 parent 9a8cb4b commit ed56997
Show file tree
Hide file tree
Showing 13 changed files with 497 additions and 8 deletions.
1 change: 1 addition & 0 deletions examples/nextjs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
},
"dependencies": {
"@speechmatics/flow-client-react": "workspace:*",
"@speechmatics/browser-audio-input-react": "workspace:*",
"@picocss/pico": "^2.0.6",
"next": "15.0.1",
"react": "19.0.0-rc-69d4b800-20241021",
Expand Down
7 changes: 5 additions & 2 deletions examples/nextjs/src/app/flow/Component.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ export default function Component({
);

const startSession = useCallback(
async (personaId: string) => {
async ({
personaId,
deviceId,
}: { personaId: string; deviceId?: string }) => {
try {
setLoading(true);
const audioContext = new AudioContext({ sampleRate: SAMPLE_RATE });
Expand All @@ -57,7 +60,7 @@ export default function Component({
sample_rate: SAMPLE_RATE,
},
});
const mediaStream = await startRecording(audioContext);
const mediaStream = await startRecording(audioContext, deviceId);
setMediaStream(mediaStream);
} finally {
setLoading(false);
Expand Down
74 changes: 69 additions & 5 deletions examples/nextjs/src/app/flow/Controls.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { useState } from 'react';
import { type ChangeEvent, useState } from 'react';
import { useFlow } from '@speechmatics/flow-client-react';
import { useAudioDevices } from '@speechmatics/browser-audio-input-react';

export function Controls({
loading,
Expand All @@ -9,21 +10,27 @@ export function Controls({
}: {
loading: boolean;
personas: Record<string, { name: string }>;
startSession: (personaId: string) => Promise<void>;
startSession: ({
deviceId,
personaId,
}: { deviceId?: string; personaId: string }) => Promise<void>;
stopSession: () => Promise<void>;
}) {
const { socketState } = useFlow();
const connected = socketState === 'open';
const [persona, setPersona] = useState(Object.keys(personas)[0]);
const [personaId, setPersonaId] = useState(Object.keys(personas)[0]);

const [deviceId, setDeviceId] = useState<string>();

return (
<article>
<div className="grid">
<MicrophoneSelect setDeviceId={setDeviceId} />
<label>
Select persona
<select
onChange={(e) => {
setPersona(e.target.value);
setPersonaId(e.target.value);
}}
>
{Object.entries(personas).map(([id, { name }]) => (
Expand All @@ -39,11 +46,68 @@ export function Controls({
type="button"
className={connected ? 'secondary' : undefined}
aria-busy={loading}
onClick={connected ? stopSession : () => startSession(persona)}
onClick={
connected
? stopSession
: () => startSession({ personaId, deviceId })
}
>
{connected ? 'Stop conversation' : 'Start conversation'}
</button>
</div>
</article>
);
}

function MicrophoneSelect({
setDeviceId,
}: { setDeviceId: (deviceId: string) => void }) {
const devices = useAudioDevices();

switch (devices.permissionState) {
case 'prompt':
return (
<label>
Enable mic permissions
<select
onClick={devices.promptPermissions}
onKeyDown={devices.promptPermissions}
/>
</label>
);
case 'prompting':
return (
<label>
Enable mic permissions
<select aria-busy="true" />
</label>
);
case 'granted': {
const onChange = (e: ChangeEvent<HTMLSelectElement>) => {
setDeviceId(e.target.value);
};
return (
<label>
Select audio device
<select onChange={onChange}>
{devices.deviceList.map((d) => (
<option key={d.deviceId} value={d.deviceId}>
{d.label}
</option>
))}
</select>
</label>
);
}
case 'denied':
return (
<label>
Microphone permission disabled
<select disabled />
</label>
);
default:
devices satisfies never;
return null;
}
}
3 changes: 2 additions & 1 deletion examples/nextjs/src/lib/audio-hooks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@ export function usePcmMicrophoneAudio(onAudio: (audio: Float32Array) => void) {
const mediaStreamRef = useRef<MediaStream>();

const startRecording = useCallback(
async (audioContext: AudioContext) => {
async (audioContext: AudioContext, deviceId?: string) => {
// If stream is present, it means we're already recording, nothing to do
if (mediaStreamRef.current) {
return mediaStreamRef.current;
}

const mediaStream = await navigator.mediaDevices.getUserMedia({
audio: {
deviceId,
sampleRate: audioContext?.sampleRate,
sampleSize: 16,
channelCount: 1,
Expand Down
71 changes: 71 additions & 0 deletions packages/browser-audio-input-react/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Browser audio input (React)

React bindings for the `@speechmatics/browser-audio-input` package, letting you manage audio input devices and permissions across browsers.

## Installation

```
npm i @speechmatics/browser-audio-input-react
```

## Usage

Below is an example of a Microphone selection component.

```TSX
import { useAudioDevices } from "@speechmatics/browser-audio-input-react";

function MicrophoneSelect({
setDeviceId,
}: { setDeviceId: (deviceId: string) => void }) {
const devices = useAudioDevices();

switch (devices.permissionState) {
case 'prompt':
return (
<label>
Enable mic permissions
<select
onClick={devices.promptPermissions}
onKeyDown={devices.promptPermissions}
/>
</label>
);
case 'prompting':
return (
<label>
Enable mic permissions
<select aria-busy="true" />
</label>
);
case 'granted': {
const onChange = (e: ChangeEvent<HTMLSelectElement>) => {
setDeviceId(e.target.value);
};
return (
<label>
Select audio device
<select onChange={onChange}>
{devices.deviceList.map((d) => (
<option key={d.deviceId} value={d.deviceId}>
{d.label}
</option>
))}
</select>
</label>
);
}
case 'denied':
return (
<label>
Microphone permission disabled
<select disabled />
</label>
);
default:
devices satisfies never;
return null;
}
}

```
36 changes: 36 additions & 0 deletions packages/browser-audio-input-react/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"name": "@speechmatics/browser-audio-input-react",
"version": "0.0.1",
"description": "React hooks for managing audio inputs and permissions across browsers",
"exports": ["./dist/index.js"],
"module": "./dist/index.js",
"typings": "./dist/index.d.ts",
"files": ["dist/", "README.md"],
"scripts": {
"build": "rm -rf dist/ && pnpm -C ../browser-audio-input build && pnpm rollup -c",
"prepare": "pnpm build",
"format": "biome format --write .",
"lint": "biome lint --write ."
},
"keywords": [
"Flow",
"API",
"React",
"hooks",
"transcription",
"speech",
"intelligence"
],
"dependencies": {
"@speechmatics/browser-audio-input": "workspace:*"
},
"author": "",
"license": "MIT",
"peerDependencies": {
"react": "^18 || ^19"
},
"devDependencies": {
"@types/react": "^18.3.12",
"typescript-event-target": "^1.1.1"
}
}
39 changes: 39 additions & 0 deletions packages/browser-audio-input-react/rollup.config.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import esbuild from 'rollup-plugin-esbuild';
import dts from 'rollup-plugin-dts';

import packageJSON from './package.json' assert { type: 'json' };

// Based on gist
//https://gist.github.com/aleclarson/9900ed2a9a3119d865286b218e14d226

/** @returns {import("rollup").RollupOptions[]} */
export default function rollup() {
return [
{
plugins: [esbuild()],
input: 'src/index.ts',
output: [
{
file: packageJSON.module,
format: 'es',
sourcemap: true,
strict: false,
},
],
},

{
plugins: [
dts({
compilerOptions: {
removeComments: true,
},
}),
],
input: 'src/index.ts',
output: {
file: `${packageJSON.module.replace('.js', '')}.d.ts`,
},
},
];
}
77 changes: 77 additions & 0 deletions packages/browser-audio-input-react/src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import { useCallback, useSyncExternalStore } from 'react';
import { getAudioDevicesStore } from '@speechmatics/browser-audio-input';

// Here we subscribe to the device state browser event
// When devices change, the getDevices callback is invoked
function subscribeDevices(callback: () => void) {
const audioDevices = getAudioDevicesStore();
audioDevices.addEventListener('changeDevices', callback);
return () => {
audioDevices.removeEventListener('changeDevices', callback);
};
}
const getDevices = () => getAudioDevicesStore().devices;

function useAudioDeviceList() {
return useSyncExternalStore(subscribeDevices, getDevices, getDevices);
}

// Here we subscribe to the user's provided permissions
// When the permission state changes, the useAudioDevices hook is called
function subscribePermissionState(callback: () => void) {
const audioDevices = getAudioDevicesStore();
audioDevices.addEventListener('changePermissions', callback);
return () => {
audioDevices.removeEventListener('changePermissions', callback);
};
}
const getPermissionState = () => getAudioDevicesStore().permissionState;
function useAudioPermissionState() {
return useSyncExternalStore(
subscribePermissionState,
getPermissionState,
getPermissionState,
);
}

function usePromptAudioPermission() {
return useCallback(async () => {
await getAudioDevicesStore().promptPermissions();
}, []);
}

export type AudioDevices =
| { permissionState: 'prompt'; promptPermissions: () => void }
| { permissionState: 'prompting' }
| {
permissionState: 'granted';
deviceList: ReadonlyArray<MediaDeviceInfo>;
}
| { permissionState: 'denied' };

export function useAudioDevices(): AudioDevices {
const permissionState = useAudioPermissionState();
const promptPermissions = usePromptAudioPermission();
const deviceList = useAudioDeviceList();

switch (permissionState) {
case 'prompt':
return {
permissionState,
promptPermissions,
};
case 'granted':
return {
permissionState,
deviceList,
};
case 'prompting':
case 'denied':
return {
permissionState,
};
default:
permissionState satisfies never;
throw new Error(`Unexpected permission state: ${permissionState}`);
}
}
3 changes: 3 additions & 0 deletions packages/browser-audio-input-react/tsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"extends": "../../tsconfig.json"
}
Loading

0 comments on commit ed56997

Please sign in to comment.