Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved error handling in the voice client. #210

Merged
merged 11 commits into from
Dec 11, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/fixie/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "fixie",
"version": "6.3.1",
"version": "6.3.2",
"license": "MIT",
"repository": "fixie-ai/fixie-sdk",
"bugs": "https://github.com/fixie-ai/fixie-sdk/issues",
Expand Down
66 changes: 41 additions & 25 deletions packages/fixie/src/voice.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
/** This file defines the Fixie Voice SDK. */

import { AgentId, ConversationId } from './types.js';
import {
createLocalTracks,
Expand All @@ -11,10 +13,11 @@ import {
TrackEvent,
} from 'livekit-client';

/**
* Represents the state of a VoiceSession.
*/
/** Represents the state of a VoiceSession. */
export enum VoiceSessionState {
DISCONNECTED = 'disconnected',
CONNECTING = 'connecting',
CONNECTED = 'connected',
IDLE = 'idle',
LISTENING = 'listening',
THINKING = 'thinking',
Expand All @@ -32,9 +35,7 @@ export interface VoiceSessionInit {
webrtcUrl?: string;
}

/**
* Web Audio AnalyserNode for an audio stream.
*/
/** Web Audio AnalyserNode for an audio stream. */
export class StreamAnalyzer {
source: MediaStreamAudioSourceNode;
analyzer: AnalyserNode;
Expand All @@ -48,15 +49,20 @@ export class StreamAnalyzer {
}
}

/**
* Manages a single voice session with a Fixie agent.
*/
export class VoiceSessionError extends Error {
constructor(message: string) {
super(message);
this.name = 'VoiceSessionError';
}
}

/** Manages a single voice session with a Fixie agent. */
export class VoiceSession {
private readonly audioContext = new AudioContext();
private readonly audioElement = new Audio();
private readonly textEncoder = new TextEncoder();
private readonly textDecoder = new TextDecoder();
private _state = VoiceSessionState.IDLE;
private _state = VoiceSessionState.DISCONNECTED;
private socket?: WebSocket;
private audioStarted = false;
private started = false;
Expand All @@ -81,15 +87,14 @@ export class VoiceSession {
onLatencyChange?: (metric: string, value: number) => void;

/** Called when an error occurs. */
onError?: () => void;
onError?: (error: VoiceSessionError) => void;

constructor(
private readonly agentId: AgentId,
private readonly conversationId?: ConversationId,
public conversationId?: ConversationId,
mdwelsh marked this conversation as resolved.
Show resolved Hide resolved
private readonly params?: VoiceSessionInit
) {
console.log('[voiceSession] creating VoiceSession');
this.warmup();
}

/** Returns the current state of this VoiceSession. */
Expand All @@ -112,11 +117,18 @@ export class VoiceSession {
console.log('[voiceSession] warming up');
this.audioStarted = false;
this.started = false;
const url = this.params?.webrtcUrl ?? 'wss://wsapi.fixie.ai';
this.socket = new WebSocket(url);
this.socket.onopen = () => this.handleSocketOpen();
this.socket.onmessage = (event) => this.handleSocketMessage(event);
this.socket.onclose = (event) => this.handleSocketClose(event);
try {
const url = this.params?.webrtcUrl ?? 'wss://wsapi.fixie.ai';
this.socket = new WebSocket(url);
this.socket.onopen = () => this.handleSocketOpen();
this.socket.onmessage = (event) => this.handleSocketMessage(event);
this.socket.onclose = (event) => this.handleSocketClose(event);
this.changeState(VoiceSessionState.CONNECTING);
} catch (e) {
mdwelsh marked this conversation as resolved.
Show resolved Hide resolved
const err = e as Error;
console.error('[voiceSession] failed to create socket', e);
this.onError?.(new VoiceSessionError(`Failed to create socket: ${err.message}`));
}
}

/** Start the audio channels associated with this VoiceSession. This will request microphone permissions from the user. */
Expand Down Expand Up @@ -164,9 +176,9 @@ export class VoiceSession {
this.localAudioTrack = undefined;
this.socket?.close();
this.socket = undefined;
this.changeState(VoiceSessionState.IDLE);
this.audioStarted = false;
this.started = false;
this.changeState(VoiceSessionState.DISCONNECTED);
}

/** Interrupt this VoiceSession. */
Expand All @@ -193,6 +205,7 @@ export class VoiceSession {
console.log('[voiceSession] publishing local audio track');
const opts = { name: 'audio', simulcast: false, source: Track.Source.Microphone };
this.room.localParticipant.publishTrack(this.localAudioTrack, opts);
this.changeState(VoiceSessionState.IDLE);
} else {
console.log(
`[voiceSession] not publishing local audio track - room state is ${this.room?.state}, local audio is ${
Expand All @@ -208,6 +221,7 @@ export class VoiceSession {

private handleSocketOpen() {
console.log('[voiceSession] socket opened');
this.changeState(VoiceSessionState.CONNECTED);
const obj = {
type: 'init',
params: {
Expand Down Expand Up @@ -250,28 +264,28 @@ export class VoiceSession {

private handleSocketClose(event: CloseEvent) {
console.log(`[voiceSession] socket closed: ${event.code} ${event.reason}`);
this.changeState(VoiceSessionState.DISCONNECTED);
if (event.code === 1000) {
// We initiated this shutdown, so we've already cleaned up.
// Reconnect to prepare for the next session.
console.log('[voiceSession] socket closed normally - calling warmup again');
this.warmup();
console.log('[voiceSession] socket closed normally');
} else if (event.code === 1006) {
// This occurs when running a Next.js app in debug mode and the VoiceSession is
// initialized twice, the first socket will receive this error that we can ignore.
console.log('[voiceSession] got event 1006');
} else {
console.warn(`[voiceSession] socket closed unexpectedly: ${event.code} ${event.reason}`);
this.onError?.();
this.onError?.(new VoiceSessionError(`Socket closed unexpectedly: ${event.code} ${event.reason}`));
}
}

private handleTrackSubscribed(track: RemoteTrack) {
console.log(`[voiceSession] subscribed to remote audio track ${track.sid}`);
const audioTrack = track as RemoteAudioTrack;
audioTrack.on(TrackEvent.AudioPlaybackStarted, () => console.log('[voiceSession] audio playback started'));
audioTrack.on(TrackEvent.AudioPlaybackFailed, (err: any) =>
console.error('[voiceSession] audio playback failed', err)
);
audioTrack.on(TrackEvent.AudioPlaybackFailed, (err: any) => {
mdwelsh marked this conversation as resolved.
Show resolved Hide resolved
console.error('[voiceSession] audio playback failed', err);
});
audioTrack.attach(this.audioElement);
this.outAnalyzer = new StreamAnalyzer(this.audioContext, track.mediaStream!);
if (this.delayedSpeakingState) {
Expand Down Expand Up @@ -304,6 +318,8 @@ export class VoiceSession {
this.handleOutputChange(msg.text, msg.final);
} else if (msg.type == 'latency') {
this.handleLatency(msg.kind, msg.value);
} else if (msg.type == 'conversation_created') {
this.conversationId = msg.conversationId;
}
}

Expand Down