diff --git a/src/Infrastructure/BotSharp.Core.Realtime/Services/WaveStreamChannel.cs b/src/Infrastructure/BotSharp.Core.Realtime/Services/WaveStreamChannel.cs index 4b194efbe..b10004534 100644 --- a/src/Infrastructure/BotSharp.Core.Realtime/Services/WaveStreamChannel.cs +++ b/src/Infrastructure/BotSharp.Core.Realtime/Services/WaveStreamChannel.cs @@ -25,7 +25,7 @@ public async Task ConnectAsync(string conversationId) _waveIn = new WaveInEvent { DeviceNumber = 0, // Default recording device - WaveFormat = new WaveFormat(24000, 16, 1), // 24000 Hz, 16-bit PCM, Mono + WaveFormat = new WaveFormat(16000, 16, 1), // 24000 Hz, 16-bit PCM, Mono BufferMilliseconds = 100 }; diff --git a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs index dcadd25d6..f9ef590ab 100644 --- a/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs +++ b/src/Plugins/BotSharp.Plugin.GoogleAI/Providers/Realtime/RealTimeCompletionProvider.cs @@ -79,7 +79,7 @@ public async Task Connect(RealtimeHubConnection conn, await AttachEvents(_client); - await _client.ConnectAsync(); + await _client.ConnectAsync(false); } public async Task Disconnect() @@ -96,7 +96,7 @@ public async Task AppenAudioBuffer(string message) public async Task AppenAudioBuffer(ArraySegment data, int length) { var buffer = data.AsSpan(0, length).ToArray(); - await _client.SendAudioAsync(buffer); + await _client.SendAudioAsync(buffer,"audio/pcm;rate=16000"); } public async Task TriggerModelInference(string? instructions = null) @@ -285,10 +285,14 @@ await HookEmitter.Emit(_services, }); } + // if(request.Tools.Count == 0) + // request.Tools = null; + // config.MaxOutputTokens = null; + await _client.SendSetupAsync(new BidiGenerateContentSetup() { GenerationConfig = config, - Model = Model, + Model = Model.ToModelId(), SystemInstruction = request.SystemInstruction, Tools = request.Tools?.ToArray(), }); diff --git a/tests/BotSharp.Test.RealtimeVoice/appsettings.json b/tests/BotSharp.Test.RealtimeVoice/appsettings.json index 28ada7ddb..d8c2730f6 100644 --- a/tests/BotSharp.Test.RealtimeVoice/appsettings.json +++ b/tests/BotSharp.Test.RealtimeVoice/appsettings.json @@ -27,7 +27,7 @@ "Models": [ { "Id": "gemini-2.0", - "Name": "gemini-2.0-flash-exp", + "Name": "gemini-2.0-flash-live-001", "Version": "20240620", "ApiKey": "", "Type": "realtime", @@ -48,8 +48,11 @@ }, "RealtimeModel": { + "Provider": "google-ai", + "Model": "gemini-2.0-flash-live-001", "InputAudioFormat": "pcm16", - "OutputAudioFormat": "pcm16" + "OutputAudioFormat": "pcm16", + "InterruptResponse": false, }, "PluginLoader": {