Skip to content

Commit af163dc

Browse files
committed
feat(openai): support input audio noise reduction
1 parent 9a58cd3 commit af163dc

File tree

3 files changed

+19
-4
lines changed

3 files changed

+19
-4
lines changed

.changeset/violet-tigers-count.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@livekit/agents-plugin-openai": minor
3+
---
4+
5+
Allow agents to pass OpenAI's `input_audio_noise_reduction` setting through the realtime session payload.

plugins/openai/src/realtime/api_proto.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,10 @@ export type InputAudioTranscription = {
113113
prompt?: string;
114114
};
115115

116+
export interface InputAudioNoiseReduction {
117+
type?: 'near_field' | 'far_field';
118+
}
119+
116120
export interface InputTextContent {
117121
type: 'input_text';
118122
text: string;
@@ -196,6 +200,7 @@ export interface SessionResource {
196200
input_audio_format: AudioFormat; // default: "pcm16"
197201
output_audio_format: AudioFormat; // default: "pcm16"
198202
input_audio_transcription: InputAudioTranscription | null;
203+
input_audio_noise_reduction: InputAudioNoiseReduction | null;
199204
turn_detection: TurnDetectionType | null;
200205
tools: Tool[];
201206
tool_choice: ToolChoice; // default: "auto"
@@ -273,6 +278,7 @@ export interface SessionUpdateEvent extends BaseClientEvent {
273278
input_audio_format: AudioFormat;
274279
output_audio_format: AudioFormat;
275280
input_audio_transcription: InputAudioTranscription | null;
281+
input_audio_noise_reduction: InputAudioNoiseReduction | null;
276282
turn_detection: TurnDetectionType | null;
277283
tools: Tool[];
278284
tool_choice: ToolChoice;

plugins/openai/src/realtime/realtime_model.ts

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ interface RealtimeOptions {
4242
temperature: number;
4343
toolChoice?: llm.ToolChoice;
4444
inputAudioTranscription?: api_proto.InputAudioTranscription | null;
45-
// TODO(shubhra): add inputAudioNoiseReduction
45+
inputAudioNoiseReduction?: api_proto.InputAudioNoiseReduction | null;
4646
turnDetection?: api_proto.TurnDetectionType | null;
4747
maxResponseOutputTokens?: number | 'inf';
4848
speed?: number;
@@ -102,6 +102,7 @@ const DEFAULT_TURN_DETECTION: api_proto.TurnDetectionType = {
102102
const DEFAULT_INPUT_AUDIO_TRANSCRIPTION: api_proto.InputAudioTranscription = {
103103
model: 'gpt-4o-mini-transcribe',
104104
};
105+
const DEFAULT_INPUT_AUDIO_NOISE_REDUCTION: api_proto.InputAudioNoiseReduction | null = null;
105106
const DEFAULT_TOOL_CHOICE: llm.ToolChoice = 'auto';
106107
const DEFAULT_MAX_RESPONSE_OUTPUT_TOKENS: number | 'inf' = 'inf';
107108

@@ -124,6 +125,7 @@ const DEFAULT_REALTIME_MODEL_OPTIONS = {
124125
voice: 'marin',
125126
temperature: DEFAULT_TEMPERATURE,
126127
inputAudioTranscription: DEFAULT_INPUT_AUDIO_TRANSCRIPTION,
128+
inputAudioNoiseReduction: DEFAULT_INPUT_AUDIO_NOISE_REDUCTION,
127129
turnDetection: DEFAULT_TURN_DETECTION,
128130
toolChoice: DEFAULT_TOOL_CHOICE,
129131
maxResponseOutputTokens: DEFAULT_MAX_RESPONSE_OUTPUT_TOKENS,
@@ -149,7 +151,7 @@ export class RealtimeModel extends llm.RealtimeModel {
149151
baseURL?: string;
150152
modalities?: Modality[];
151153
inputAudioTranscription?: api_proto.InputAudioTranscription | null;
152-
// TODO(shubhra): add inputAudioNoiseReduction
154+
inputAudioNoiseReduction?: api_proto.InputAudioNoiseReduction | null;
153155
turnDetection?: api_proto.TurnDetectionType | null;
154156
speed?: number;
155157
// TODO(shubhra): add openai tracing options
@@ -243,6 +245,7 @@ export class RealtimeModel extends llm.RealtimeModel {
243245
turnDetection = AZURE_DEFAULT_TURN_DETECTION,
244246
temperature = 0.8,
245247
speed,
248+
inputAudioNoiseReduction = DEFAULT_INPUT_AUDIO_NOISE_REDUCTION,
246249
}: {
247250
azureDeployment: string;
248251
azureEndpoint?: string;
@@ -252,7 +255,7 @@ export class RealtimeModel extends llm.RealtimeModel {
252255
baseURL?: string;
253256
voice?: string;
254257
inputAudioTranscription?: api_proto.InputAudioTranscription;
255-
// TODO(shubhra): add inputAudioNoiseReduction
258+
inputAudioNoiseReduction?: api_proto.InputAudioNoiseReduction | null;
256259
turnDetection?: api_proto.TurnDetectionType;
257260
temperature?: number;
258261
speed?: number;
@@ -284,6 +287,7 @@ export class RealtimeModel extends llm.RealtimeModel {
284287
return new RealtimeModel({
285288
voice,
286289
inputAudioTranscription,
290+
inputAudioNoiseReduction,
287291
turnDetection,
288292
temperature,
289293
speed,
@@ -417,7 +421,7 @@ export class RealtimeSession extends llm.RealtimeSession {
417421
modalities: modalities,
418422
turn_detection: this.oaiRealtimeModel._options.turnDetection,
419423
input_audio_transcription: this.oaiRealtimeModel._options.inputAudioTranscription,
420-
// TODO(shubhra): add inputAudioNoiseReduction
424+
input_audio_noise_reduction: this.oaiRealtimeModel._options.inputAudioNoiseReduction,
421425
temperature: this.oaiRealtimeModel._options.temperature,
422426
tool_choice: toOaiToolChoice(this.oaiRealtimeModel._options.toolChoice),
423427
max_response_output_tokens:

0 commit comments

Comments
 (0)