diff --git a/src/GenerativeAI.Live/Models/MultiModalLiveClient.cs b/src/GenerativeAI.Live/Models/MultiModalLiveClient.cs index 712ab64..a85c5a4 100644 --- a/src/GenerativeAI.Live/Models/MultiModalLiveClient.cs +++ b/src/GenerativeAI.Live/Models/MultiModalLiveClient.cs @@ -105,6 +105,10 @@ private async Task GetClient() /// public bool UseCodeExecutor { get; set; } = false; + public bool InputAudioTranscriptionEnabled { get; set; } = false; + + public bool OutputAudioTranscriptionEnabled { get; set; } = false; + #endregion #region Constructors @@ -115,6 +119,7 @@ private async Task GetClient() public MultiModalLiveClient(IPlatformAdapter platformAdapter, string modelName, GenerationConfig? config = null, ICollection? safetySettings = null, string? systemInstruction = null, + bool inputAudioTranscriptionEnabled = false, bool outputAudioTranscriptionEnabled = false, ILogger? logger = null) { _platformAdapter = platformAdapter ?? throw new ArgumentNullException(nameof(platformAdapter)); @@ -123,6 +128,8 @@ public MultiModalLiveClient(IPlatformAdapter platformAdapter, string modelName, { ResponseModalities = new List { Modality.TEXT } }; + InputAudioTranscriptionEnabled = inputAudioTranscriptionEnabled; + OutputAudioTranscriptionEnabled = outputAudioTranscriptionEnabled; SafetySettings = safetySettings; SystemInstruction = systemInstruction; _connectionId = Guid.NewGuid(); @@ -550,6 +557,8 @@ public async Task SendSetupAsync(CancellationToken cancellationToken = default) ? new Content(this.SystemInstruction, Roles.System) : null, Tools = tools.Count > 0 ? tools.ToArray() : null, + InputAudioTranscription = InputAudioTranscriptionEnabled ? new AudioTranscriptionConfig(): null, + OutputAudioTranscription = OutputAudioTranscriptionEnabled ? new AudioTranscriptionConfig() : null, }; await SendSetupAsync(setup, cancellationToken).ConfigureAwait(false); } diff --git a/src/GenerativeAI/Types/MultimodalLive/BidiGenerateContentSetup.cs b/src/GenerativeAI/Types/MultimodalLive/BidiGenerateContentSetup.cs index 5c5061c..10b40be 100644 --- a/src/GenerativeAI/Types/MultimodalLive/BidiGenerateContentSetup.cs +++ b/src/GenerativeAI/Types/MultimodalLive/BidiGenerateContentSetup.cs @@ -44,9 +44,15 @@ public class BidiGenerateContentSetup [JsonPropertyName("tools")] public Tool[]? Tools { get; set; } + /// + /// Configures output audio transcription settings. + /// [JsonPropertyName("outputAudioTranscription")] - public AudioTranscriptionConfig? OutputAudioTranscription { get; set; } + public AudioTranscriptionConfig? OutputAudioTranscription { get; set; } + /// + /// Configures input audio transcription settings. + /// [JsonPropertyName("inputAudioTranscription")] public AudioTranscriptionConfig? InputAudioTranscription { get; set; } ///