diff --git a/Cargo.lock b/Cargo.lock index b62e6694d7..2ca2f4c146 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -966,6 +966,7 @@ dependencies = [ "bytes", "cidre", "cpal", + "dasp", "data", "ebur128", "futures-channel", diff --git a/apps/desktop/src/components/editor-area/note-header/listen-button.tsx b/apps/desktop/src/components/editor-area/note-header/listen-button.tsx index 4c69f5fede..987a7194dd 100644 --- a/apps/desktop/src/components/editor-area/note-header/listen-button.tsx +++ b/apps/desktop/src/components/editor-area/note-header/listen-button.tsx @@ -1,6 +1,16 @@ import { Trans } from "@lingui/react/macro"; import { useMutation, useQuery } from "@tanstack/react-query"; -import { MicIcon, MicOffIcon, PauseIcon, PlayIcon, StopCircleIcon, Volume2Icon, VolumeOffIcon } from "lucide-react"; +import { + CheckIcon, + ChevronDownIcon, + MicIcon, + MicOffIcon, + PauseIcon, + PlayIcon, + StopCircleIcon, + Volume2Icon, + VolumeOffIcon, +} from "lucide-react"; import { useEffect, useState } from "react"; import SoundIndicator from "@/components/sound-indicator"; @@ -319,16 +329,14 @@ function RecordingControls({ return ( <> -
- + toggleMicMuted.mutate()} - type="mic" + onToggleMuted={() => toggleMicMuted.mutate()} /> - toggleSpeakerMuted.mutate()} - type="speaker" />
@@ -377,35 +385,147 @@ function RecordingControls({ ); } -function AudioControlButton({ - type, +function MicrophoneSelector({ + isMuted, + onToggleMuted, + disabled, +}: { + isMuted?: boolean; + onToggleMuted: () => void; + disabled?: boolean; +}) { + const [isOpen, setIsOpen] = useState(false); + + const allDevicesQuery = useQuery({ + queryKey: ["microphone", "devices"], + queryFn: () => listenerCommands.listMicrophoneDevices(), + }); + + const currentDeviceQuery = useQuery({ + queryKey: ["microphone", "current-device"], + queryFn: () => listenerCommands.getCurrentMicrophoneDevice(), + }); + + const handleSelectDevice = (device: string) => { + listenerCommands.setMicrophoneDevice(device).then(() => { + currentDeviceQuery.refetch(); + }); + }; + + useEffect(() => { + console.log("currentDeviceQuery.data", currentDeviceQuery.data); + console.log("allDevicesQuery.data", allDevicesQuery.data); + }, [currentDeviceQuery.data, allDevicesQuery.data]); + + const Icon = isMuted ? MicOffIcon : MicIcon; + + return ( +
+ +
+ + + + + +
+ + +
+
+ Microphone +
+ + {allDevicesQuery.isLoading + ? ( +
+
+

Loading devices...

+
+ ) + : allDevicesQuery.data?.length === 0 + ? ( +
+

No microphones found

+
+ ) + : ( +
+ {allDevicesQuery.data?.map((device) => { + const isSelected = device === currentDeviceQuery.data; + return ( + + ); + })} +
+ )} +
+
+
+
+ ); +} + +function SpeakerButton({ isMuted, onClick, disabled, }: { - type: "mic" | "speaker"; isMuted?: boolean; onClick: () => void; disabled?: boolean; }) { - const Icon = type === "mic" - ? isMuted - ? MicOffIcon - : MicIcon - : isMuted - ? VolumeOffIcon - : Volume2Icon; + const Icon = isMuted ? VolumeOffIcon : Volume2Icon; return ( - +
+ +
); } diff --git a/apps/desktop/src/locales/en/messages.po b/apps/desktop/src/locales/en/messages.po index 8cf92a8a76..03b55fa809 100644 --- a/apps/desktop/src/locales/en/messages.po +++ b/apps/desktop/src/locales/en/messages.po @@ -256,8 +256,8 @@ msgstr "(Beta) Upcoming meeting notifications" #. placeholder {0}: disabled ? "Wait..." : isHovered ? "Resume" : "Ended" #: src/components/settings/views/templates.tsx:194 #: src/components/settings/components/wer-modal.tsx:116 -#: src/components/editor-area/note-header/listen-button.tsx:179 -#: src/components/editor-area/note-header/listen-button.tsx:218 +#: src/components/editor-area/note-header/listen-button.tsx:189 +#: src/components/editor-area/note-header/listen-button.tsx:228 msgid "{0}" msgstr "{0}" @@ -887,7 +887,7 @@ msgstr "No speech-to-text models available or failed to load." #~ msgid "No Template" #~ msgstr "No Template" -#: src/components/editor-area/note-header/listen-button.tsx:342 +#: src/components/editor-area/note-header/listen-button.tsx:350 msgid "No Template (Default)" msgstr "No Template (Default)" @@ -955,7 +955,7 @@ msgstr "Optional for participant suggestions" msgid "Owner" msgstr "Owner" -#: src/components/editor-area/note-header/listen-button.tsx:365 +#: src/components/editor-area/note-header/listen-button.tsx:373 msgid "Pause" msgstr "Pause" @@ -967,7 +967,7 @@ msgstr "people" msgid "Performance difference between languages" msgstr "Performance difference between languages" -#: src/components/editor-area/note-header/listen-button.tsx:198 +#: src/components/editor-area/note-header/listen-button.tsx:208 msgid "Play video" msgstr "Play video" @@ -1011,7 +1011,7 @@ msgstr "Required to transcribe other people's voice during meetings" msgid "Required to transcribe your voice during meetings" msgstr "Required to transcribe your voice during meetings" -#: src/components/editor-area/note-header/listen-button.tsx:107 +#: src/components/editor-area/note-header/listen-button.tsx:117 msgid "Resume" msgstr "Resume" @@ -1109,11 +1109,11 @@ msgstr "Start Annual Plan" msgid "Start Monthly Plan" msgstr "Start Monthly Plan" -#: src/components/editor-area/note-header/listen-button.tsx:154 +#: src/components/editor-area/note-header/listen-button.tsx:164 msgid "Start recording" msgstr "Start recording" -#: src/components/editor-area/note-header/listen-button.tsx:373 +#: src/components/editor-area/note-header/listen-button.tsx:381 msgid "Stop" msgstr "Stop" diff --git a/apps/desktop/src/locales/ko/messages.po b/apps/desktop/src/locales/ko/messages.po index 8f91e752cd..b6cfb26f95 100644 --- a/apps/desktop/src/locales/ko/messages.po +++ b/apps/desktop/src/locales/ko/messages.po @@ -256,8 +256,8 @@ msgstr "" #. placeholder {0}: disabled ? "Wait..." : isHovered ? "Resume" : "Ended" #: src/components/settings/views/templates.tsx:194 #: src/components/settings/components/wer-modal.tsx:116 -#: src/components/editor-area/note-header/listen-button.tsx:179 -#: src/components/editor-area/note-header/listen-button.tsx:218 +#: src/components/editor-area/note-header/listen-button.tsx:189 +#: src/components/editor-area/note-header/listen-button.tsx:228 msgid "{0}" msgstr "" @@ -887,7 +887,7 @@ msgstr "" #~ msgid "No Template" #~ msgstr "" -#: src/components/editor-area/note-header/listen-button.tsx:342 +#: src/components/editor-area/note-header/listen-button.tsx:350 msgid "No Template (Default)" msgstr "" @@ -955,7 +955,7 @@ msgstr "" msgid "Owner" msgstr "" -#: src/components/editor-area/note-header/listen-button.tsx:365 +#: src/components/editor-area/note-header/listen-button.tsx:373 msgid "Pause" msgstr "" @@ -967,7 +967,7 @@ msgstr "" msgid "Performance difference between languages" msgstr "" -#: src/components/editor-area/note-header/listen-button.tsx:198 +#: src/components/editor-area/note-header/listen-button.tsx:208 msgid "Play video" msgstr "" @@ -1011,7 +1011,7 @@ msgstr "" msgid "Required to transcribe your voice during meetings" msgstr "" -#: src/components/editor-area/note-header/listen-button.tsx:107 +#: src/components/editor-area/note-header/listen-button.tsx:117 msgid "Resume" msgstr "" @@ -1109,11 +1109,11 @@ msgstr "" msgid "Start Monthly Plan" msgstr "" -#: src/components/editor-area/note-header/listen-button.tsx:154 +#: src/components/editor-area/note-header/listen-button.tsx:164 msgid "Start recording" msgstr "" -#: src/components/editor-area/note-header/listen-button.tsx:373 +#: src/components/editor-area/note-header/listen-button.tsx:381 msgid "Stop" msgstr "" diff --git a/apps/desktop/src/utils/broadcast.ts b/apps/desktop/src/utils/broadcast.ts index 7a7a48eff2..794f2f6d96 100644 --- a/apps/desktop/src/utils/broadcast.ts +++ b/apps/desktop/src/utils/broadcast.ts @@ -41,12 +41,6 @@ export function broadcastQueryClient(queryClient: QueryClient) { const keys = event.payload.queryKey as string[]; - if (keys.some((key) => key?.includes("extension"))) { - queryClient.invalidateQueries({ - predicate: (query) => query.queryKey.some((key) => typeof key === "string" && key.includes("extension")), - }); - } - if (keys.some((key) => key?.includes("flags"))) { queryClient.invalidateQueries({ predicate: (query) => query.queryKey.some((key) => typeof key === "string" && key.includes("flags")), diff --git a/crates/audio/Cargo.toml b/crates/audio/Cargo.toml index bfe6a8e05f..abde9554ef 100644 --- a/crates/audio/Cargo.toml +++ b/crates/audio/Cargo.toml @@ -17,6 +17,7 @@ futures-util = { workspace = true } tokio = { workspace = true, features = ["rt", "macros"] } cpal = { workspace = true } +dasp = { workspace = true } rodio = { workspace = true } ebur128 = "0.1.10" diff --git a/crates/audio/src/lib.rs b/crates/audio/src/lib.rs index 5f7aeec1f3..0acdb80192 100644 --- a/crates/audio/src/lib.rs +++ b/crates/audio/src/lib.rs @@ -11,6 +11,7 @@ pub use speaker::*; pub use stream::*; pub use cpal; +use cpal::traits::{DeviceTrait, HostTrait}; use futures_util::Stream; pub use kalosm_sound::AsyncSource; @@ -70,6 +71,23 @@ pub struct AudioInput { } impl AudioInput { + pub fn get_default_mic_device_name() -> String { + let host = cpal::default_host(); + let device = host.default_input_device().unwrap(); + device.name().unwrap().to_string() + } + + pub fn list_mic_devices() -> Vec { + let host = cpal::default_host(); + + let devices = host.input_devices().unwrap(); + + devices + .filter_map(|d| d.name().ok()) + .filter(|d| d != "hypr-audio-tap") + .collect() + } + pub fn from_mic() -> Self { Self { source: AudioSource::RealtimeMic, @@ -79,6 +97,15 @@ impl AudioInput { } } + pub fn from_mic_with_device_name(device_name: String) -> Self { + Self { + source: AudioSource::RealtimeMic, + mic: Some(MicInput::from_device(device_name)), + speaker: None, + data: None, + } + } + pub fn from_speaker(sample_rate_override: Option) -> Self { Self { source: AudioSource::RealtimeSpeaker, @@ -97,6 +124,14 @@ impl AudioInput { } } + pub fn device_name(&self) -> String { + match &self.source { + AudioSource::RealtimeMic => self.mic.as_ref().unwrap().device_name(), + AudioSource::RealtimeSpeaker => "TODO".to_string(), + AudioSource::Recorded => "TODO".to_string(), + } + } + pub fn stream(&mut self) -> AudioStream { match &self.source { AudioSource::RealtimeMic => AudioStream::RealtimeMic { diff --git a/crates/audio/src/mic.rs b/crates/audio/src/mic.rs index f32164b1ac..9d478e4bb9 100644 --- a/crates/audio/src/mic.rs +++ b/crates/audio/src/mic.rs @@ -1,4 +1,186 @@ -pub use kalosm_sound::{MicInput, MicStream}; +use cpal::{ + traits::{DeviceTrait, HostTrait, StreamTrait}, + SizedSample, +}; +use dasp::sample::ToSample; +use futures_channel::mpsc; +use futures_util::{Stream, StreamExt}; +use std::pin::Pin; + +use crate::AsyncSource; + +pub struct MicInput { + #[allow(dead_code)] + host: cpal::Host, + device: cpal::Device, + config: cpal::SupportedStreamConfig, +} + +impl Default for MicInput { + fn default() -> Self { + let host = cpal::default_host(); + let device = host + .default_input_device() + .expect("Failed to get default input device"); + let config = device + .default_input_config() + .expect("Failed to get default input config"); + + Self { + host, + device, + config, + } + } +} + +impl MicInput { + pub fn device_name(&self) -> String { + self.device.name().expect("Failed to get input device name") + } + + pub fn from_device(device_name: impl AsRef) -> Self { + let host = cpal::default_host(); + let device = host + .input_devices() + .expect("Failed to get input devices") + .find(|d| d.name().expect("Failed to get input device name") == device_name.as_ref()) + .expect("Failed to get input device"); + let config = device + .default_input_config() + .expect("Failed to get default input config"); + + Self { + host, + device, + config, + } + } +} + +impl MicInput { + pub fn stream(&self) -> MicStream { + let (tx, rx) = mpsc::unbounded::>(); + + let config = self.config.clone(); + let device = self.device.clone(); + let (drop_tx, drop_rx) = std::sync::mpsc::channel(); + + std::thread::spawn(move || { + fn build_stream + SizedSample>( + device: &cpal::Device, + config: &cpal::SupportedStreamConfig, + mut tx: mpsc::UnboundedSender>, + ) -> Result { + let channels = config.channels() as usize; + device.build_input_stream::( + &config.config(), + move |data: &[S], _input_callback_info: &_| { + let _ = tx.start_send( + data.iter() + .step_by(channels) + .map(|&x| x.to_sample()) + .collect(), + ); + }, + |err| { + tracing::error!("an error occurred on stream: {}", err); + }, + None, + ) + } + + let start_stream = || { + let stream = match config.sample_format() { + cpal::SampleFormat::I8 => build_stream::(&device, &config, tx), + cpal::SampleFormat::I16 => build_stream::(&device, &config, tx), + cpal::SampleFormat::I32 => build_stream::(&device, &config, tx), + cpal::SampleFormat::F32 => build_stream::(&device, &config, tx), + sample_format => { + tracing::error!("Unsupported sample format '{sample_format}'"); + return None; + } + }; + + let stream = match stream { + Ok(stream) => stream, + Err(err) => { + tracing::error!("Error starting stream: {}", err); + return None; + } + }; + + if let Err(err) = stream.play() { + tracing::error!("Error playing stream: {}", err); + } + + Some(stream) + }; + + let stream = match start_stream() { + Some(stream) => stream, + None => { + return; + } + }; + + // Wait for the stream to be dropped + drop_rx.recv().unwrap(); + + // Then drop the stream + drop(stream); + }); + + let receiver = rx.map(futures_util::stream::iter).flatten(); + MicStream { + drop_tx, + config: self.config.clone(), + receiver: Box::pin(receiver), + read_data: Vec::new(), + } + } +} + +pub struct MicStream { + drop_tx: std::sync::mpsc::Sender<()>, + config: cpal::SupportedStreamConfig, + read_data: Vec, + receiver: Pin + Send + Sync>>, +} + +impl Drop for MicStream { + fn drop(&mut self) { + self.drop_tx.send(()).unwrap(); + } +} + +impl Stream for MicStream { + type Item = f32; + + fn poll_next( + mut self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + match self.receiver.as_mut().poll_next_unpin(cx) { + std::task::Poll::Ready(Some(data_chunk)) => { + self.read_data.push(data_chunk); + std::task::Poll::Ready(Some(data_chunk)) + } + std::task::Poll::Ready(None) => std::task::Poll::Ready(None), + std::task::Poll::Pending => std::task::Poll::Pending, + } + } +} + +impl AsyncSource for MicStream { + fn as_stream(&mut self) -> impl Stream + '_ { + self + } + + fn sample_rate(&self) -> u32 { + self.config.sample_rate().0 + } +} #[cfg(test)] mod tests { diff --git a/crates/chunker/src/lib.rs b/crates/chunker/src/lib.rs index 45b6e86e00..ca7144d08e 100644 --- a/crates/chunker/src/lib.rs +++ b/crates/chunker/src/lib.rs @@ -70,7 +70,8 @@ pub trait VadExt: AsyncSource + Sized { Self: Unpin, { let config = VadConfig { - post_speech_pad: Duration::from_millis(50), + redemption_time: Duration::from_millis(600), + min_speech_time: Duration::from_millis(50), ..Default::default() }; diff --git a/crates/whisper-local/src/model.rs b/crates/whisper-local/src/model.rs index de04f62e14..15528e7ab3 100644 --- a/crates/whisper-local/src/model.rs +++ b/crates/whisper-local/src/model.rs @@ -139,33 +139,61 @@ impl Whisper { let mut segments = Vec::new(); for i in 0..num_segments { - let text = self.state.full_get_segment_text_lossy(i)?; + let text = TRAILING_DOTS + .replace(&self.state.full_get_segment_text_lossy(i)?, "") + .to_string(); + let (start, end) = ( self.state.full_get_segment_t0(i)?, self.state.full_get_segment_t1(i)?, ); let confidence = self.calculate_segment_confidence(i); - let mut segment = Segment { + segments.push(Segment { text, start: start as f32 / 1000.0, end: end as f32 / 1000.0, confidence, ..Default::default() - }; - segment.trim(); - segments.push(segment); + }); } - self.dynamic_prompt = segments + let segments = Self::filter_segments(segments); + + let full_text = segments .iter() .map(|s| s.text()) .collect::>() .join(" "); + if !full_text.is_empty() { + self.dynamic_prompt = full_text; + } + Ok(segments) } + fn filter_segments(segments: Vec) -> Vec { + segments + .into_iter() + .filter(|s| { + let t = s.text.trim().to_lowercase(); + + if s.confidence < 0.005 + || t == "you" + || t == "thank you" + || t == "you." + || t == "thank you." + || t == "🎵" + { + false + } else { + true + } + }) + .collect() + } + // https://github.com/ggml-org/whisper.cpp/pull/971/files#diff-2d3599a9fad195f2c3c60bd06691bc1815325b3560b5feda41a91fa71194e805R310-R327 fn calculate_segment_confidence(&self, segment_idx: i32) -> f32 { let n_tokens = self.state.full_n_tokens(segment_idx).unwrap_or(0); @@ -262,10 +290,6 @@ impl Segment { pub fn meta(&self) -> Option { self.meta.clone() } - - pub fn trim(&mut self) { - self.text = TRAILING_DOTS.replace(&self.text, "").to_string(); - } } #[cfg(test)] @@ -273,36 +297,6 @@ mod tests { use super::*; use futures_util::StreamExt; - #[test] - fn test_trim() { - { - let mut segment = Segment { - text: "Hello...".to_string(), - ..Default::default() - }; - segment.trim(); - assert_eq!(segment.text, "Hello"); - } - - { - let mut segment = Segment { - text: "Hello".to_string(), - ..Default::default() - }; - segment.trim(); - assert_eq!(segment.text, "Hello"); - } - - { - let mut segment = Segment { - text: "Hello.".to_string(), - ..Default::default() - }; - segment.trim(); - assert_eq!(segment.text, "Hello."); - } - } - #[test] fn test_whisper() { let mut whisper = Whisper::builder() diff --git a/plugins/listener/build.rs b/plugins/listener/build.rs index 39b2a7fb8e..d4b0b38521 100644 --- a/plugins/listener/build.rs +++ b/plugins/listener/build.rs @@ -1,5 +1,7 @@ const COMMANDS: &[&str] = &[ "list_microphone_devices", + "get_current_microphone_device", + "set_microphone_device", "check_microphone_access", "check_system_audio_access", "request_microphone_access", diff --git a/plugins/listener/js/bindings.gen.ts b/plugins/listener/js/bindings.gen.ts index da49cfd16d..74092637fc 100644 --- a/plugins/listener/js/bindings.gen.ts +++ b/plugins/listener/js/bindings.gen.ts @@ -10,6 +10,12 @@ export const commands = { async listMicrophoneDevices() : Promise { return await TAURI_INVOKE("plugin:listener|list_microphone_devices"); }, +async getCurrentMicrophoneDevice() : Promise { + return await TAURI_INVOKE("plugin:listener|get_current_microphone_device"); +}, +async setMicrophoneDevice(deviceName: string) : Promise { + return await TAURI_INVOKE("plugin:listener|set_microphone_device", { deviceName }); +}, async checkMicrophoneAccess() : Promise { return await TAURI_INVOKE("plugin:listener|check_microphone_access"); }, diff --git a/plugins/listener/permissions/autogenerated/commands/get_current_microphone_device.toml b/plugins/listener/permissions/autogenerated/commands/get_current_microphone_device.toml new file mode 100644 index 0000000000..e1908c17de --- /dev/null +++ b/plugins/listener/permissions/autogenerated/commands/get_current_microphone_device.toml @@ -0,0 +1,13 @@ +# Automatically generated - DO NOT EDIT! + +"$schema" = "../../schemas/schema.json" + +[[permission]] +identifier = "allow-get-current-microphone-device" +description = "Enables the get_current_microphone_device command without any pre-configured scope." +commands.allow = ["get_current_microphone_device"] + +[[permission]] +identifier = "deny-get-current-microphone-device" +description = "Denies the get_current_microphone_device command without any pre-configured scope." +commands.deny = ["get_current_microphone_device"] diff --git a/plugins/listener/permissions/autogenerated/commands/set_microphone_device.toml b/plugins/listener/permissions/autogenerated/commands/set_microphone_device.toml new file mode 100644 index 0000000000..803f563dec --- /dev/null +++ b/plugins/listener/permissions/autogenerated/commands/set_microphone_device.toml @@ -0,0 +1,13 @@ +# Automatically generated - DO NOT EDIT! + +"$schema" = "../../schemas/schema.json" + +[[permission]] +identifier = "allow-set-microphone-device" +description = "Enables the set_microphone_device command without any pre-configured scope." +commands.allow = ["set_microphone_device"] + +[[permission]] +identifier = "deny-set-microphone-device" +description = "Denies the set_microphone_device command without any pre-configured scope." +commands.deny = ["set_microphone_device"] diff --git a/plugins/listener/permissions/autogenerated/reference.md b/plugins/listener/permissions/autogenerated/reference.md index c9f0890609..a44206bf14 100644 --- a/plugins/listener/permissions/autogenerated/reference.md +++ b/plugins/listener/permissions/autogenerated/reference.md @@ -5,6 +5,8 @@ Default permissions for the plugin #### This default permission set includes the following: - `allow-list-microphone-devices` +- `allow-get-current-microphone-device` +- `allow-set-microphone-device` - `allow-check-microphone-access` - `allow-check-system-audio-access` - `allow-request-microphone-access` @@ -85,6 +87,32 @@ Denies the check_system_audio_access command without any pre-configured scope. +`listener:allow-get-current-microphone-device` + + + + +Enables the get_current_microphone_device command without any pre-configured scope. + + + + + + + +`listener:deny-get-current-microphone-device` + + + + +Denies the get_current_microphone_device command without any pre-configured scope. + + + + + + + `listener:allow-get-mic-muted` @@ -397,6 +425,32 @@ Denies the set_mic_muted command without any pre-configured scope. +`listener:allow-set-microphone-device` + + + + +Enables the set_microphone_device command without any pre-configured scope. + + + + + + + +`listener:deny-set-microphone-device` + + + + +Denies the set_microphone_device command without any pre-configured scope. + + + + + + + `listener:allow-set-speaker-muted` diff --git a/plugins/listener/permissions/default.toml b/plugins/listener/permissions/default.toml index f7316703c6..010548327b 100644 --- a/plugins/listener/permissions/default.toml +++ b/plugins/listener/permissions/default.toml @@ -2,6 +2,8 @@ description = "Default permissions for the plugin" permissions = [ "allow-list-microphone-devices", + "allow-get-current-microphone-device", + "allow-set-microphone-device", "allow-check-microphone-access", "allow-check-system-audio-access", "allow-request-microphone-access", diff --git a/plugins/listener/permissions/schemas/schema.json b/plugins/listener/permissions/schemas/schema.json index 05f5258f72..4b10ef8e55 100644 --- a/plugins/listener/permissions/schemas/schema.json +++ b/plugins/listener/permissions/schemas/schema.json @@ -318,6 +318,18 @@ "const": "deny-check-system-audio-access", "markdownDescription": "Denies the check_system_audio_access command without any pre-configured scope." }, + { + "description": "Enables the get_current_microphone_device command without any pre-configured scope.", + "type": "string", + "const": "allow-get-current-microphone-device", + "markdownDescription": "Enables the get_current_microphone_device command without any pre-configured scope." + }, + { + "description": "Denies the get_current_microphone_device command without any pre-configured scope.", + "type": "string", + "const": "deny-get-current-microphone-device", + "markdownDescription": "Denies the get_current_microphone_device command without any pre-configured scope." + }, { "description": "Enables the get_mic_muted command without any pre-configured scope.", "type": "string", @@ -462,6 +474,18 @@ "const": "deny-set-mic-muted", "markdownDescription": "Denies the set_mic_muted command without any pre-configured scope." }, + { + "description": "Enables the set_microphone_device command without any pre-configured scope.", + "type": "string", + "const": "allow-set-microphone-device", + "markdownDescription": "Enables the set_microphone_device command without any pre-configured scope." + }, + { + "description": "Denies the set_microphone_device command without any pre-configured scope.", + "type": "string", + "const": "deny-set-microphone-device", + "markdownDescription": "Denies the set_microphone_device command without any pre-configured scope." + }, { "description": "Enables the set_speaker_muted command without any pre-configured scope.", "type": "string", @@ -499,10 +523,10 @@ "markdownDescription": "Denies the stop_session command without any pre-configured scope." }, { - "description": "Default permissions for the plugin\n#### This default permission set includes:\n\n- `allow-list-microphone-devices`\n- `allow-check-microphone-access`\n- `allow-check-system-audio-access`\n- `allow-request-microphone-access`\n- `allow-request-system-audio-access`\n- `allow-open-microphone-access-settings`\n- `allow-open-system-audio-access-settings`\n- `allow-start-session`\n- `allow-stop-session`\n- `allow-pause-session`\n- `allow-resume-session`\n- `allow-get-mic-muted`\n- `allow-set-mic-muted`\n- `allow-get-speaker-muted`\n- `allow-set-speaker-muted`\n- `allow-get-state`", + "description": "Default permissions for the plugin\n#### This default permission set includes:\n\n- `allow-list-microphone-devices`\n- `allow-get-current-microphone-device`\n- `allow-set-microphone-device`\n- `allow-check-microphone-access`\n- `allow-check-system-audio-access`\n- `allow-request-microphone-access`\n- `allow-request-system-audio-access`\n- `allow-open-microphone-access-settings`\n- `allow-open-system-audio-access-settings`\n- `allow-start-session`\n- `allow-stop-session`\n- `allow-pause-session`\n- `allow-resume-session`\n- `allow-get-mic-muted`\n- `allow-set-mic-muted`\n- `allow-get-speaker-muted`\n- `allow-set-speaker-muted`\n- `allow-get-state`", "type": "string", "const": "default", - "markdownDescription": "Default permissions for the plugin\n#### This default permission set includes:\n\n- `allow-list-microphone-devices`\n- `allow-check-microphone-access`\n- `allow-check-system-audio-access`\n- `allow-request-microphone-access`\n- `allow-request-system-audio-access`\n- `allow-open-microphone-access-settings`\n- `allow-open-system-audio-access-settings`\n- `allow-start-session`\n- `allow-stop-session`\n- `allow-pause-session`\n- `allow-resume-session`\n- `allow-get-mic-muted`\n- `allow-set-mic-muted`\n- `allow-get-speaker-muted`\n- `allow-set-speaker-muted`\n- `allow-get-state`" + "markdownDescription": "Default permissions for the plugin\n#### This default permission set includes:\n\n- `allow-list-microphone-devices`\n- `allow-get-current-microphone-device`\n- `allow-set-microphone-device`\n- `allow-check-microphone-access`\n- `allow-check-system-audio-access`\n- `allow-request-microphone-access`\n- `allow-request-system-audio-access`\n- `allow-open-microphone-access-settings`\n- `allow-open-system-audio-access-settings`\n- `allow-start-session`\n- `allow-stop-session`\n- `allow-pause-session`\n- `allow-resume-session`\n- `allow-get-mic-muted`\n- `allow-set-mic-muted`\n- `allow-get-speaker-muted`\n- `allow-set-speaker-muted`\n- `allow-get-state`" } ] } diff --git a/plugins/listener/src/commands.rs b/plugins/listener/src/commands.rs index 3be35dc4ab..f6d4811dab 100644 --- a/plugins/listener/src/commands.rs +++ b/plugins/listener/src/commands.rs @@ -10,6 +10,27 @@ pub async fn list_microphone_devices( .map_err(|e| e.to_string()) } +#[tauri::command] +#[specta::specta] +pub async fn get_current_microphone_device( + app: tauri::AppHandle, +) -> Result, String> { + app.get_current_microphone_device() + .await + .map_err(|e| e.to_string()) +} + +#[tauri::command] +#[specta::specta] +pub async fn set_microphone_device( + app: tauri::AppHandle, + device_name: String, +) -> Result<(), String> { + app.set_microphone_device(device_name) + .await + .map_err(|e| e.to_string()) +} + #[tauri::command] #[specta::specta] pub async fn check_microphone_access( diff --git a/plugins/listener/src/ext.rs b/plugins/listener/src/ext.rs index ab8a2fdf0a..3ab75fc91d 100644 --- a/plugins/listener/src/ext.rs +++ b/plugins/listener/src/ext.rs @@ -1,7 +1,6 @@ use std::future::Future; use futures_util::StreamExt; -use hypr_audio::cpal::traits::{DeviceTrait, HostTrait}; #[cfg(target_os = "macos")] use { @@ -11,6 +10,13 @@ use { pub trait ListenerPluginExt { fn list_microphone_devices(&self) -> impl Future, crate::Error>>; + fn get_current_microphone_device( + &self, + ) -> impl Future, crate::Error>>; + fn set_microphone_device( + &self, + device_name: impl Into, + ) -> impl Future>; fn check_microphone_access(&self) -> impl Future>; fn check_system_audio_access(&self) -> impl Future>; @@ -34,13 +40,30 @@ pub trait ListenerPluginExt { impl> ListenerPluginExt for T { #[tracing::instrument(skip_all)] async fn list_microphone_devices(&self) -> Result, crate::Error> { - let host = hypr_audio::cpal::default_host(); - let devices = host.input_devices()?; + Ok(hypr_audio::AudioInput::list_mic_devices()) + } + + #[tracing::instrument(skip_all)] + async fn get_current_microphone_device(&self) -> Result, crate::Error> { + let state = self.state::(); + let s = state.lock().await; + Ok(s.fsm.get_current_mic_device()) + } + + #[tracing::instrument(skip_all)] + async fn set_microphone_device( + &self, + device_name: impl Into, + ) -> Result<(), crate::Error> { + let state = self.state::(); - Ok(devices - .filter_map(|d| d.name().ok()) - .filter(|d| d != "hypr-audio-tap") - .collect()) + { + let mut guard = state.lock().await; + let event = crate::fsm::StateEvent::MicChange(Some(device_name.into())); + guard.fsm.handle(&event).await; + } + + Ok(()) } #[tracing::instrument(skip_all)] diff --git a/plugins/listener/src/fsm.rs b/plugins/listener/src/fsm.rs index e30a877e60..efd14a93c4 100644 --- a/plugins/listener/src/fsm.rs +++ b/plugins/listener/src/fsm.rs @@ -169,6 +169,7 @@ impl AudioChannels { pub struct Session { app: tauri::AppHandle, session_id: Option, + mic_device_name: Option, mic_muted_tx: Option>, mic_muted_rx: Option>, speaker_muted_tx: Option>, @@ -180,9 +181,12 @@ pub struct Session { impl Session { pub fn new(app: tauri::AppHandle) -> Self { + let mic_device_name = hypr_audio::AudioInput::get_default_mic_device_name(); + Self { app, session_id: None, + mic_device_name: Some(mic_device_name), mic_muted_tx: None, mic_muted_rx: None, speaker_muted_tx: None, @@ -197,8 +201,8 @@ impl Session { async fn setup_resources(&mut self, id: impl Into) -> Result<(), crate::Error> { use tauri_plugin_db::DatabasePluginExt; - let user_id = self.app.db_user_id().await?.unwrap(); let session_id = id.into(); + let user_id = self.app.db_user_id().await?.unwrap(); self.session_id = Some(session_id.clone()); let (record, language, jargons) = { @@ -240,7 +244,12 @@ impl Session { let listen_client = setup_listen_client(&self.app, language, jargons).await?; let mic_sample_stream = { - let mut input = hypr_audio::AudioInput::from_mic(); + let mut input = match &self.mic_device_name { + Some(device_name) => { + hypr_audio::AudioInput::from_mic_with_device_name(device_name.clone()) + } + None => hypr_audio::AudioInput::from_mic(), + }; input.stream() }; let mic_stream = mic_sample_stream @@ -520,6 +529,14 @@ impl Session { None => false, } } + + pub fn get_available_mic_devices() -> Vec { + hypr_audio::AudioInput::list_mic_devices() + } + + pub fn get_current_mic_device(&self) -> Option { + self.mic_device_name.clone() + } } async fn setup_listen_client( @@ -589,6 +606,7 @@ pub enum StateEvent { Resume, MicMuted(bool), SpeakerMuted(bool), + MicChange(Option), } #[state_machine( @@ -614,6 +632,18 @@ impl Session { } Handled } + StateEvent::MicChange(device_name) => { + self.mic_device_name = device_name.clone(); + + if self.session_id.is_some() && self.tasks.is_some() { + if let Some(session_id) = self.session_id.clone() { + self.teardown_resources().await; + self.setup_resources(&session_id).await.unwrap(); + } + } + + Handled + } _ => Super, } } diff --git a/plugins/listener/src/lib.rs b/plugins/listener/src/lib.rs index db02f2f6e9..002e18e834 100644 --- a/plugins/listener/src/lib.rs +++ b/plugins/listener/src/lib.rs @@ -29,6 +29,8 @@ fn make_specta_builder() -> tauri_specta::Builder { .plugin_name(PLUGIN_NAME) .commands(tauri_specta::collect_commands![ commands::list_microphone_devices::, + commands::get_current_microphone_device::, + commands::set_microphone_device::, commands::check_microphone_access::, commands::check_system_audio_access::, commands::request_microphone_access::, diff --git a/plugins/local-stt/src/server.rs b/plugins/local-stt/src/server.rs index 023e60c4bc..2e6126d82d 100644 --- a/plugins/local-stt/src/server.rs +++ b/plugins/local-stt/src/server.rs @@ -207,10 +207,7 @@ async fn process_transcription_stream( let duration = chunk.duration() as u64; let confidence = chunk.confidence(); - if confidence < 0.1 { - tracing::warn!(confidence, "skipping_transcript: {}", text); - continue; - } + let source = meta.and_then(|meta| meta.get("source")