Add packages for browser input audio device selection (#77)

Add two new packages based on the microphone selection flow implemented in the portal - `@speechmatics/browser-audio-input`: - Contains just the store/singleton class managing audio devices. - Also provides a minified build in case someone wants to use this in an inline script tag - `@speechmatics/browser-audio-input-react`: - React bindings for the above package - Constrain the types of the main exported hook - Updated the NextJS example to use the react package **Note**: Both these packages export only ESM (apart from the raw client which also provides a minified build). My thinking was there are practically 0 CommonJS use cases for browser microphone selection, so I omitted it from the build.
speechmatics · Oct 31, 2024 · ed56997 · ed56997
1 parent 9a8cb4b
commit ed56997
Show file tree

Hide file tree

Showing 13 changed files with 497 additions and 8 deletions.
diff --git a/examples/nextjs/package.json b/examples/nextjs/package.json
@@ -11,6 +11,7 @@
   },
   "dependencies": {
     "@speechmatics/flow-client-react": "workspace:*",
+    "@speechmatics/browser-audio-input-react": "workspace:*",
     "@picocss/pico": "^2.0.6",
     "next": "15.0.1",
     "react": "19.0.0-rc-69d4b800-20241021",

diff --git a/examples/nextjs/src/app/flow/Component.tsx b/examples/nextjs/src/app/flow/Component.tsx
@@ -41,7 +41,10 @@ export default function Component({
   );
 
   const startSession = useCallback(
-    async (personaId: string) => {
+    async ({
+      personaId,
+      deviceId,
+    }: { personaId: string; deviceId?: string }) => {
       try {
         setLoading(true);
         const audioContext = new AudioContext({ sampleRate: SAMPLE_RATE });
@@ -57,7 +60,7 @@ export default function Component({
             sample_rate: SAMPLE_RATE,
           },
         });
-        const mediaStream = await startRecording(audioContext);
+        const mediaStream = await startRecording(audioContext, deviceId);
         setMediaStream(mediaStream);
       } finally {
         setLoading(false);

diff --git a/examples/nextjs/src/app/flow/Controls.tsx b/examples/nextjs/src/app/flow/Controls.tsx
@@ -1,5 +1,6 @@
-import { useState } from 'react';
+import { type ChangeEvent, useState } from 'react';
 import { useFlow } from '@speechmatics/flow-client-react';
+import { useAudioDevices } from '@speechmatics/browser-audio-input-react';
 
 export function Controls({
   loading,
@@ -9,21 +10,27 @@ export function Controls({
 }: {
   loading: boolean;
   personas: Record<string, { name: string }>;
-  startSession: (personaId: string) => Promise<void>;
+  startSession: ({
+    deviceId,
+    personaId,
+  }: { deviceId?: string; personaId: string }) => Promise<void>;
   stopSession: () => Promise<void>;
 }) {
   const { socketState } = useFlow();
   const connected = socketState === 'open';
-  const [persona, setPersona] = useState(Object.keys(personas)[0]);
+  const [personaId, setPersonaId] = useState(Object.keys(personas)[0]);
+
+  const [deviceId, setDeviceId] = useState<string>();
 
   return (
     <article>
       <div className="grid">
+        <MicrophoneSelect setDeviceId={setDeviceId} />
         <label>
           Select persona
           <select
             onChange={(e) => {
-              setPersona(e.target.value);
+              setPersonaId(e.target.value);
             }}
           >
             {Object.entries(personas).map(([id, { name }]) => (
@@ -39,11 +46,68 @@ export function Controls({
           type="button"
           className={connected ? 'secondary' : undefined}
           aria-busy={loading}
-          onClick={connected ? stopSession : () => startSession(persona)}
+          onClick={
+            connected
+              ? stopSession
+              : () => startSession({ personaId, deviceId })
+          }
         >
           {connected ? 'Stop conversation' : 'Start conversation'}
         </button>
       </div>
     </article>
   );
 }
+
+function MicrophoneSelect({
+  setDeviceId,
+}: { setDeviceId: (deviceId: string) => void }) {
+  const devices = useAudioDevices();
+
+  switch (devices.permissionState) {
+    case 'prompt':
+      return (
+        <label>
+          Enable mic permissions
+          <select
+            onClick={devices.promptPermissions}
+            onKeyDown={devices.promptPermissions}
+          />
+        </label>
+      );
+    case 'prompting':
+      return (
+        <label>
+          Enable mic permissions
+          <select aria-busy="true" />
+        </label>
+      );
+    case 'granted': {
+      const onChange = (e: ChangeEvent<HTMLSelectElement>) => {
+        setDeviceId(e.target.value);
+      };
+      return (
+        <label>
+          Select audio device
+          <select onChange={onChange}>
+            {devices.deviceList.map((d) => (
+              <option key={d.deviceId} value={d.deviceId}>
+                {d.label}
+              </option>
+            ))}
+          </select>
+        </label>
+      );
+    }
+    case 'denied':
+      return (
+        <label>
+          Microphone permission disabled
+          <select disabled />
+        </label>
+      );
+    default:
+      devices satisfies never;
+      return null;
+  }
+}
diff --git a/examples/nextjs/src/lib/audio-hooks.ts b/examples/nextjs/src/lib/audio-hooks.ts
@@ -12,14 +12,15 @@ export function usePcmMicrophoneAudio(onAudio: (audio: Float32Array) => void) {
   const mediaStreamRef = useRef<MediaStream>();
 
   const startRecording = useCallback(
-    async (audioContext: AudioContext) => {
+    async (audioContext: AudioContext, deviceId?: string) => {
       // If stream is present, it means we're already recording, nothing to do
       if (mediaStreamRef.current) {
         return mediaStreamRef.current;
       }
 
       const mediaStream = await navigator.mediaDevices.getUserMedia({
         audio: {
+          deviceId,
           sampleRate: audioContext?.sampleRate,
           sampleSize: 16,
           channelCount: 1,

diff --git a/packages/browser-audio-input-react/README.md b/packages/browser-audio-input-react/README.md
@@ -0,0 +1,71 @@
+# Browser audio input (React)
+
+React bindings for the `@speechmatics/browser-audio-input` package, letting you manage audio input devices and permissions across browsers.
+
+## Installation
+
+```
+npm i @speechmatics/browser-audio-input-react
+```
+
+## Usage
+
+Below is an example of a Microphone selection component.
+
+```TSX
+import { useAudioDevices } from "@speechmatics/browser-audio-input-react";
+
+function MicrophoneSelect({
+  setDeviceId,
+}: { setDeviceId: (deviceId: string) => void }) {
+  const devices = useAudioDevices();
+
+  switch (devices.permissionState) {
+    case 'prompt':
+      return (
+        <label>
+          Enable mic permissions
+          <select
+            onClick={devices.promptPermissions}
+            onKeyDown={devices.promptPermissions}
+          />
+        </label>
+      );
+    case 'prompting':
+      return (
+        <label>
+          Enable mic permissions
+          <select aria-busy="true" />
+        </label>
+      );
+    case 'granted': {
+      const onChange = (e: ChangeEvent<HTMLSelectElement>) => {
+        setDeviceId(e.target.value);
+      };
+      return (
+        <label>
+          Select audio device
+          <select onChange={onChange}>
+            {devices.deviceList.map((d) => (
+              <option key={d.deviceId} value={d.deviceId}>
+                {d.label}
+              </option>
+            ))}
+          </select>
+        </label>
+      );
+    }
+    case 'denied':
+      return (
+        <label>
+          Microphone permission disabled
+          <select disabled />
+        </label>
+      );
+    default:
+      devices satisfies never;
+      return null;
+  }
+}
+
+```
diff --git a/packages/browser-audio-input-react/package.json b/packages/browser-audio-input-react/package.json
@@ -0,0 +1,36 @@
+{
+  "name": "@speechmatics/browser-audio-input-react",
+  "version": "0.0.1",
+  "description": "React hooks for managing audio inputs and permissions across browsers",
+  "exports": ["./dist/index.js"],
+  "module": "./dist/index.js",
+  "typings": "./dist/index.d.ts",
+  "files": ["dist/", "README.md"],
+  "scripts": {
+    "build": "rm -rf dist/ && pnpm -C ../browser-audio-input build && pnpm rollup -c",
+    "prepare": "pnpm build",
+    "format": "biome format --write .",
+    "lint": "biome lint --write ."
+  },
+  "keywords": [
+    "Flow",
+    "API",
+    "React",
+    "hooks",
+    "transcription",
+    "speech",
+    "intelligence"
+  ],
+  "dependencies": {
+    "@speechmatics/browser-audio-input": "workspace:*"
+  },
+  "author": "",
+  "license": "MIT",
+  "peerDependencies": {
+    "react": "^18 || ^19"
+  },
+  "devDependencies": {
+    "@types/react": "^18.3.12",
+    "typescript-event-target": "^1.1.1"
+  }
+}
diff --git a/packages/browser-audio-input-react/rollup.config.mjs b/packages/browser-audio-input-react/rollup.config.mjs
@@ -0,0 +1,39 @@
+import esbuild from 'rollup-plugin-esbuild';
+import dts from 'rollup-plugin-dts';
+
+import packageJSON from './package.json' assert { type: 'json' };
+
+// Based on gist
+//https://gist.github.com/aleclarson/9900ed2a9a3119d865286b218e14d226
+
+/** @returns {import("rollup").RollupOptions[]} */
+export default function rollup() {
+  return [
+    {
+      plugins: [esbuild()],
+      input: 'src/index.ts',
+      output: [
+        {
+          file: packageJSON.module,
+          format: 'es',
+          sourcemap: true,
+          strict: false,
+        },
+      ],
+    },
+
+    {
+      plugins: [
+        dts({
+          compilerOptions: {
+            removeComments: true,
+          },
+        }),
+      ],
+      input: 'src/index.ts',
+      output: {
+        file: `${packageJSON.module.replace('.js', '')}.d.ts`,
+      },
+    },
+  ];
+}
diff --git a/packages/browser-audio-input-react/src/index.ts b/packages/browser-audio-input-react/src/index.ts
@@ -0,0 +1,77 @@
+import { useCallback, useSyncExternalStore } from 'react';
+import { getAudioDevicesStore } from '@speechmatics/browser-audio-input';
+
+// Here we subscribe to the device state browser event
+// When devices change, the getDevices callback is invoked
+function subscribeDevices(callback: () => void) {
+  const audioDevices = getAudioDevicesStore();
+  audioDevices.addEventListener('changeDevices', callback);
+  return () => {
+    audioDevices.removeEventListener('changeDevices', callback);
+  };
+}
+const getDevices = () => getAudioDevicesStore().devices;
+
+function useAudioDeviceList() {
+  return useSyncExternalStore(subscribeDevices, getDevices, getDevices);
+}
+
+// Here we subscribe to the user's provided permissions
+// When the permission state changes, the useAudioDevices hook is called
+function subscribePermissionState(callback: () => void) {
+  const audioDevices = getAudioDevicesStore();
+  audioDevices.addEventListener('changePermissions', callback);
+  return () => {
+    audioDevices.removeEventListener('changePermissions', callback);
+  };
+}
+const getPermissionState = () => getAudioDevicesStore().permissionState;
+function useAudioPermissionState() {
+  return useSyncExternalStore(
+    subscribePermissionState,
+    getPermissionState,
+    getPermissionState,
+  );
+}
+
+function usePromptAudioPermission() {
+  return useCallback(async () => {
+    await getAudioDevicesStore().promptPermissions();
+  }, []);
+}
+
+export type AudioDevices =
+  | { permissionState: 'prompt'; promptPermissions: () => void }
+  | { permissionState: 'prompting' }
+  | {
+      permissionState: 'granted';
+      deviceList: ReadonlyArray<MediaDeviceInfo>;
+    }
+  | { permissionState: 'denied' };
+
+export function useAudioDevices(): AudioDevices {
+  const permissionState = useAudioPermissionState();
+  const promptPermissions = usePromptAudioPermission();
+  const deviceList = useAudioDeviceList();
+
+  switch (permissionState) {
+    case 'prompt':
+      return {
+        permissionState,
+        promptPermissions,
+      };
+    case 'granted':
+      return {
+        permissionState,
+        deviceList,
+      };
+    case 'prompting':
+    case 'denied':
+      return {
+        permissionState,
+      };
+    default:
+      permissionState satisfies never;
+      throw new Error(`Unexpected permission state: ${permissionState}`);
+  }
+}
diff --git a/packages/browser-audio-input-react/tsconfig.json b/packages/browser-audio-input-react/tsconfig.json
@@ -0,0 +1,3 @@
+{
+  "extends": "../../tsconfig.json"
+}