From 2f34cb1ccffe58279adb0c26cd2299da12143edb Mon Sep 17 00:00:00 2001 From: Yujong Lee Date: Tue, 19 Aug 2025 23:57:14 -0700 Subject: [PATCH 1/5] wip --- .../settings/components/ai/stt-view-local.tsx | 2 +- plugins/local-stt/src/events.rs | 90 ++++++++----------- plugins/local-stt/src/ext.rs | 14 ++- 3 files changed, 49 insertions(+), 57 deletions(-) diff --git a/apps/desktop/src/components/settings/components/ai/stt-view-local.tsx b/apps/desktop/src/components/settings/components/ai/stt-view-local.tsx index 451152059e..b4fbee4bd5 100644 --- a/apps/desktop/src/components/settings/components/ai/stt-view-local.tsx +++ b/apps/desktop/src/components/settings/components/ai/stt-view-local.tsx @@ -224,7 +224,7 @@ function ProModelsManagement( {proModels.data?.map((model) => ( (app: &tauri::AppHandle, event: &tauri::Run let state = app.state::(); match event { - tauri::RunEvent::ExitRequested { .. } | tauri::RunEvent::Exit => { - tokio::task::block_in_place(|| { - tokio::runtime::Handle::current().block_on(async { - let mut guard = state.lock().await; - - if let Some(server) = guard.internal_server.take() { - let _ = server.terminate(); - guard.internal_server = None; - } - if let Some(server) = guard.external_server.take() { - let _ = server.terminate(); - guard.external_server = None; - } - for (_, (task, token)) in guard.download_task.drain() { - token.cancel(); - task.abort(); + tauri::RunEvent::WindowEvent { label, event, .. } => match event { + tauri::WindowEvent::CloseRequested { .. } | tauri::WindowEvent::Destroyed => { + let hypr_window = match label.parse::() { + Ok(window) => window, + Err(e) => { + tracing::warn!("window_parse_error: {:?}", e); + return; } - }); - }); - } - tauri::RunEvent::WindowEvent { label, event, .. } => { - let hypr_window = match label.parse::() { - Ok(window) => window, - Err(e) => { - tracing::warn!("window_parse_error: {:?}", e); + }; + + if hypr_window != HyprWindow::Main { return; } - }; - if hypr_window != HyprWindow::Main { - return; - } + tracing::info!("events: stopping servers"); - match event { - tauri::WindowEvent::CloseRequested { .. } | tauri::WindowEvent::Destroyed => { - tokio::task::block_in_place(|| { - tokio::runtime::Handle::current().block_on(async { - let mut guard = state.lock().await; + tokio::task::block_in_place(|| { + tokio::runtime::Handle::current().block_on(async { + let mut guard = state.lock().await; - if let Some(server) = guard.internal_server.take() { - let _ = server.terminate(); - guard.internal_server = None; - } - if let Some(server) = guard.external_server.take() { - let _ = server.terminate(); - guard.external_server = None; - } - for (_, (task, token)) in guard.download_task.drain() { - token.cancel(); - task.abort(); - } - }); + if let Some(server) = guard.internal_server.take() { + let _ = server.terminate(); + guard.internal_server = None; + } + if let Some(server) = guard.external_server.take() { + let _ = server.terminate(); + guard.external_server = None; + } + for (_, (task, token)) in guard.download_task.drain() { + token.cancel(); + task.abort(); + } }); - } - tauri::WindowEvent::Focused(true) => { - tokio::task::block_in_place(|| { - tokio::runtime::Handle::current().block_on(async { - let _ = app.start_server(None).await; - }); + }); + } + tauri::WindowEvent::Focused(true) => { + tokio::task::block_in_place(|| { + tokio::runtime::Handle::current().block_on(async { + let _ = app.start_server(None).await; }); - } - _ => {} + }); } - } + _ => {} + }, _ => {} } } diff --git a/plugins/local-stt/src/ext.rs b/plugins/local-stt/src/ext.rs index 184eaebceb..896465220c 100644 --- a/plugins/local-stt/src/ext.rs +++ b/plugins/local-stt/src/ext.rs @@ -271,6 +271,8 @@ impl> LocalSttPluginExt for T { let state = self.state::(); let mut s = state.lock().await; + tracing::info!("ext: stopping servers"); + let mut stopped = false; match server_type { Some(ServerType::External) => { @@ -303,12 +305,17 @@ impl> LocalSttPluginExt for T { #[tracing::instrument(skip_all)] async fn get_servers(&self) -> Result>, crate::Error> { let state = self.state::(); - let guard = state.lock().await; + let mut guard = state.lock().await; let internal_url = if let Some(server) = &guard.internal_server { if server.health().await { Some(server.base_url.clone()) } else { + if let Some(server) = guard.internal_server.take() { + server.terminate().ok(); + guard.internal_server = None; + } + None } } else { @@ -319,6 +326,11 @@ impl> LocalSttPluginExt for T { if server.health().await { Some(server.base_url.clone()) } else { + if let Some(server) = guard.external_server.take() { + server.terminate().ok(); + guard.external_server = None; + } + None } } else { From 000eb11046bd9ea56cb4aa494095392f8907dae7 Mon Sep 17 00:00:00 2001 From: Yujong Lee Date: Wed, 20 Aug 2025 14:15:48 -0700 Subject: [PATCH 2/5] done --- .../editor-area/note-header/listen-button.tsx | 23 +- .../settings/components/ai/stt-view-local.tsx | 388 +++++++++++------- crates/am/src/model.rs | 2 +- crates/am/src/types.rs | 4 +- .../src/service/streaming.rs | 2 +- owhisper/owhisper-client/src/lib.rs | 2 +- plugins/listener/src/fsm.rs | 2 +- plugins/local-stt/js/bindings.gen.ts | 3 +- plugins/local-stt/src/commands.rs | 5 +- plugins/local-stt/src/ext.rs | 37 +- plugins/local-stt/src/server/external.rs | 80 +++- plugins/local-stt/src/server/internal.rs | 10 +- plugins/local-stt/src/server/mod.rs | 10 + 13 files changed, 358 insertions(+), 210 deletions(-) diff --git a/apps/desktop/src/components/editor-area/note-header/listen-button.tsx b/apps/desktop/src/components/editor-area/note-header/listen-button.tsx index a7d8f3b1df..1b7d835740 100644 --- a/apps/desktop/src/components/editor-area/note-header/listen-button.tsx +++ b/apps/desktop/src/components/editor-area/note-header/listen-button.tsx @@ -59,9 +59,20 @@ export default function ListenButton({ sessionId, isCompact = false }: { session const { onboardingSessionId } = useHypr(); const isOnboarding = sessionId === onboardingSessionId; + const ongoingSessionStatus = useOngoingSession((s) => s.status); + const ongoingSessionId = useOngoingSession((s) => s.sessionId); + const ongoingSessionStore = useOngoingSession((s) => ({ + start: s.start, + resume: s.resume, + pause: s.pause, + stop: s.stop, + loading: s.loading, + })); + const modelDownloaded = useQuery({ queryKey: ["check-stt-model-downloaded"], - refetchInterval: 1000, + refetchInterval: 1500, + enabled: ongoingSessionStatus === "inactive", queryFn: async () => { const currentModel = await localSttCommands.getCurrentModel(); const isDownloaded = await localSttCommands.isModelDownloaded(currentModel); @@ -84,16 +95,6 @@ export default function ListenButton({ sessionId, isCompact = false }: { session enabled: isOnboarding, }); - const ongoingSessionStatus = useOngoingSession((s) => s.status); - const ongoingSessionId = useOngoingSession((s) => s.sessionId); - const ongoingSessionStore = useOngoingSession((s) => ({ - start: s.start, - resume: s.resume, - pause: s.pause, - stop: s.stop, - loading: s.loading, - })); - const sessionWords = useSession(sessionId, (s) => s.session.words); // don't show consent notification if the session already has transcript diff --git a/apps/desktop/src/components/settings/components/ai/stt-view-local.tsx b/apps/desktop/src/components/settings/components/ai/stt-view-local.tsx index b4fbee4bd5..cf1b2a3da3 100644 --- a/apps/desktop/src/components/settings/components/ai/stt-view-local.tsx +++ b/apps/desktop/src/components/settings/components/ai/stt-view-local.tsx @@ -1,19 +1,49 @@ import { useQuery } from "@tanstack/react-query"; import { openPath } from "@tauri-apps/plugin-opener"; import { arch, platform } from "@tauri-apps/plugin-os"; -import { DownloadIcon, FolderIcon } from "lucide-react"; +import { DownloadIcon, FolderIcon, InfoIcon } from "lucide-react"; import { useEffect, useMemo } from "react"; -import { commands as localSttCommands, SupportedSttModel, type WhisperModel } from "@hypr/plugin-local-stt"; +import { useLicense } from "@/hooks/use-license"; +import { + commands as localSttCommands, + ServerHealth, + SupportedSttModel, + type WhisperModel, +} from "@hypr/plugin-local-stt"; import { Button } from "@hypr/ui/components/ui/button"; import { cn } from "@hypr/ui/lib/utils"; import { SharedSTTProps, STTModel } from "./shared"; +const DEFAULT_MODEL_KEYS = ["QuantizedSmall"]; +const OTHER_MODEL_KEYS = [ + "QuantizedTiny", + "QuantizedTinyEn", + "QuantizedBase", + "QuantizedBaseEn", + "QuantizedSmallEn", + "QuantizedLargeTurbo", +]; + +const REFETCH_INTERVALS = { + servers: 1000, + downloadStatus: 3000, +} as const; + interface STTViewProps extends SharedSTTProps { isWerModalOpen: boolean; setIsWerModalOpen: (open: boolean) => void; } +interface ModelSectionProps { + status?: ServerHealth; + modelsToShow: STTModel[]; + selectedSTTModel: string; + setSelectedSTTModel: (model: string) => void; + downloadingModels: Set; + handleModelDownload: (model: string) => void; +} + export function STTViewLocal({ selectedSTTModel, setSelectedSTTModel, @@ -22,10 +52,16 @@ export function STTViewLocal({ downloadingModels, handleModelDownload, }: STTViewProps) { + const amAvailable = useMemo(() => platform() === "macos" && arch() === "aarch64", []); + const servers = useQuery({ queryKey: ["local-stt-servers"], - queryFn: () => localSttCommands.getServers(), - refetchInterval: 1000, + queryFn: async () => { + const servers = await localSttCommands.getServers(); + console.log(servers); + return servers; + }, + refetchInterval: REFETCH_INTERVALS.servers, }); const currentSTTModel = useQuery({ @@ -33,41 +69,42 @@ export function STTViewLocal({ queryFn: () => localSttCommands.getCurrentModel(), }); - useEffect(() => { - if (currentSTTModel.data) { - setSelectedSTTModel(currentSTTModel.data); - } - }, [currentSTTModel.data, setSelectedSTTModel]); - - const amAvailable = useMemo(() => platform() === "macos" && arch() === "aarch64", []); - const sttModelDownloadStatus = useQuery({ queryKey: ["stt-model-download-status"], queryFn: async () => { - const statusChecks = await Promise.all([ - localSttCommands.isModelDownloaded("QuantizedTiny"), - localSttCommands.isModelDownloaded("QuantizedTinyEn"), - localSttCommands.isModelDownloaded("QuantizedBase"), - localSttCommands.isModelDownloaded("QuantizedBaseEn"), - localSttCommands.isModelDownloaded("QuantizedSmall"), - localSttCommands.isModelDownloaded("QuantizedSmallEn"), - localSttCommands.isModelDownloaded("QuantizedLargeTurbo"), - localSttCommands.isModelDownloaded("am-parakeet-v2"), - localSttCommands.isModelDownloaded("am-whisper-large-v3"), - ]); - return { - "QuantizedTiny": statusChecks[0], - "QuantizedTinyEn": statusChecks[1], - "QuantizedBase": statusChecks[2], - "QuantizedBaseEn": statusChecks[3], - "QuantizedSmall": statusChecks[4], - "QuantizedSmallEn": statusChecks[5], - "QuantizedLargeTurbo": statusChecks[6], - } as Record; + const models = [ + "QuantizedTiny", + "QuantizedTinyEn", + "QuantizedBase", + "QuantizedBaseEn", + "QuantizedSmall", + "QuantizedSmallEn", + "QuantizedLargeTurbo", + "am-parakeet-v2", + "am-whisper-large-v3", + ]; + + const statusChecks = await Promise.all( + models.map(model => localSttCommands.isModelDownloaded(model as SupportedSttModel)), + ); + + return models.reduce((acc, model, index) => ({ + ...acc, + [model]: statusChecks[index], + }), {} as Record); }, - refetchInterval: 3000, + refetchInterval: REFETCH_INTERVALS.downloadStatus, }); + // ---------------------------------------- + // Effects + // ---------------------------------------- + useEffect(() => { + if (currentSTTModel.data) { + setSelectedSTTModel(currentSTTModel.data); + } + }, [currentSTTModel.data, setSelectedSTTModel]); + useEffect(() => { if (sttModelDownloadStatus.data) { setSttModels(prev => @@ -79,81 +116,77 @@ export function STTViewLocal({ } }, [sttModelDownloadStatus.data, setSttModels]); - const defaultModelKeys = ["QuantizedSmall"]; - const otherModelKeys = [ - "QuantizedTiny", - "QuantizedTinyEn", - "QuantizedBase", - "QuantizedBaseEn", - "QuantizedSmallEn", - "QuantizedLargeTurbo", - ]; - - const modelsToShow = sttModels.filter(model => { - if (defaultModelKeys.includes(model.key)) { - return true; - } - - if (otherModelKeys.includes(model.key) && model.downloaded) { - return true; - } - - return false; - }); + // ---------------------------------------- + // Model Filtering + // ---------------------------------------- + const modelsToShow = useMemo(() => + sttModels.filter(model => + DEFAULT_MODEL_KEYS.includes(model.key) + || (OTHER_MODEL_KEYS.includes(model.key) && model.downloaded) + ), [sttModels]); + // ---------------------------------------- + // Render + // ---------------------------------------- return ( -
- + {/* Basic Models Section */} + + + {/* Divider - only show if pro models available */} {amAvailable && ( - + <> +
+ + {/* Pro Models Section */} + + )}
); } -function BasicModelsManagement({ - on, +// ============================================ +// BASIC MODELS SECTION +// ============================================ +function BasicModelsSection({ + status, modelsToShow, selectedSTTModel, setSelectedSTTModel, downloadingModels, handleModelDownload, -}: { - on: boolean; - modelsToShow: STTModel[]; - selectedSTTModel: string; - setSelectedSTTModel: (model: string) => void; - downloadingModels: Set; - handleModelDownload: (model: string) => void; -}) { +}: ModelSectionProps) { const handleShowFileLocation = async () => { - localSttCommands.modelsDir().then((path) => openPath(path)); + const path = await localSttCommands.modelsDir(); + openPath(path); }; return ( -
-
-
-

Basic Models

- -
-

Default inference mode powered by Whisper.cpp.

-
+
+ {/* Section Header */} + -
+ {/* Models List */} +
{modelsToShow.map((model) => ( ))}
-
+
); } -function ProModelsManagement( - { on, selectedSTTModel, setSelectedSTTModel, downloadingModels, handleModelDownload }: { - on: boolean; - selectedSTTModel: string; - setSelectedSTTModel: (model: string) => void; - downloadingModels: Set; - handleModelDownload: (model: string) => void; - }, -) { - // const { getLicense } = useLicense(); +// ============================================ +// PRO MODELS SECTION +// ============================================ +function ProModelsSection({ + status, + selectedSTTModel, + setSelectedSTTModel, + downloadingModels, + handleModelDownload, +}: Omit) { + const { getLicense } = useLicense(); + const handleShowFileLocation = async () => { - localSttCommands.modelsDir().then((path) => openPath(path)); + const path = await localSttCommands.modelsDir(); + openPath(path); }; const proModels = useQuery({ queryKey: ["pro-models"], queryFn: async () => { const models = await localSttCommands.listSupportedModels().then((models) => - models.filter((model) => - model.key === "am-whisper-large-v3" || model.key === "am-parakeet-v2" || model.key === "am-parakeet-v3" - ) + models.filter((model) => ["am-whisper-large-v3", "am-parakeet-v2", "am-parakeet-v3"].includes(model.key)) ); + const downloaded = await Promise.all( models.map(({ key }) => localSttCommands.isModelDownloaded(key)), ); @@ -204,38 +239,84 @@ function ProModelsManagement( fileName: "", })); }, - refetchInterval: 3000, + refetchInterval: REFETCH_INTERVALS.downloadStatus, }); return ( -
-
-
-
-

Pro Models (Available soon)

- -
-

- Latency and resource optimized. Only for pro plan users. -

-
+
+ -
- {proModels.data?.map((model) => ( - - ))} -
+ {/* Models List */} +
+ {proModels.data?.map((model) => ( + + ))}
-
+ + ); +} + +// ============================================ +// SHARED COMPONENTS +// ============================================ +function SectionHeader({ + title, + subtitle, + description, + status, + docsUrl, +}: { + title: string; + subtitle?: string; + description: string; + status?: ServerHealth; + docsUrl?: string; +}) { + return ( +
+
+

+ {title} + {subtitle && {subtitle}} +

+ + {docsUrl && ( + + + + )} +
+

{description}

+
); } @@ -246,7 +327,7 @@ function ModelEntry({ downloadingModels, handleModelDownload, handleShowFileLocation, - disabled, + disabled = false, }: { model: STTModel; selectedSTTModel: string; @@ -256,26 +337,39 @@ function ModelEntry({ handleShowFileLocation: () => void; disabled?: boolean; }) { + const isSelected = selectedSTTModel === model.key && model.downloaded; + const isSelectable = model.downloaded && !disabled; + const isDownloading = downloadingModels.has(model.key); + + const handleClick = () => { + if (isSelectable) { + setSelectedSTTModel(model.key as WhisperModel); + localSttCommands.setCurrentModel(model.key as WhisperModel); + localSttCommands.stopServer(null); + localSttCommands.startServer(null); + } + }; + + const getCardStyles = () => { + if (isSelected) { + return "border-solid border-blue-500 bg-blue-50"; + } + if (isSelectable) { + return "border-dashed border-gray-300 hover:border-gray-400 bg-white"; + } + return "border-dashed border-gray-200 bg-gray-50 cursor-not-allowed"; + }; + return (
{ - if (model.downloaded && !disabled) { - setSelectedSTTModel(model.key as WhisperModel); - localSttCommands.setCurrentModel(model.key as WhisperModel); - localSttCommands.stopServer(null); - localSttCommands.startServer(null); - } - }} + onClick={handleClick} > + {/* Model Info */}

+ {/* Action Buttons */}
{model.downloaded ? ( @@ -296,20 +391,23 @@ function ModelEntry({ size="sm" disabled={disabled} variant="outline" - onClick={handleShowFileLocation} + onClick={(e) => { + e.stopPropagation(); + handleShowFileLocation(); + }} className="text-xs h-7 px-2 flex items-center gap-1" > Show in Finder ) - : downloadingModels.has(model.key) + : isDownloading ? ( diff --git a/crates/am/src/model.rs b/crates/am/src/model.rs index fe71b346c7..02860150e4 100644 --- a/crates/am/src/model.rs +++ b/crates/am/src/model.rs @@ -87,7 +87,7 @@ impl AmModel { match self { AmModel::ParakeetV2 => 1906983049, AmModel::ParakeetV3 => 3016060540, - AmModel::WhisperLargeV3 => 3016060540, + AmModel::WhisperLargeV3 => 1964673816, } } diff --git a/crates/am/src/types.rs b/crates/am/src/types.rs index 54fb0dbb77..b70c63b4c8 100644 --- a/crates/am/src/types.rs +++ b/crates/am/src/types.rs @@ -19,6 +19,7 @@ common_derives! { } common_derives! { + #[derive(Eq, PartialEq)] #[serde(rename_all = "lowercase")] pub enum ServerStatusType { Ready, @@ -29,7 +30,8 @@ common_derives! { } common_derives! { - #[serde(rename_all = "PascalCase")] + #[derive(Eq, PartialEq)] + #[serde(rename_all = "lowercase")] pub enum ModelState { Unloading, Unloaded, diff --git a/crates/transcribe-whisper-local/src/service/streaming.rs b/crates/transcribe-whisper-local/src/service/streaming.rs index 4bd91d6f0f..9719455ba0 100644 --- a/crates/transcribe-whisper-local/src/service/streaming.rs +++ b/crates/transcribe-whisper-local/src/service/streaming.rs @@ -134,7 +134,7 @@ async fn handle_websocket_connection( let redemption_time = params .redemption_time_ms .map(|ms| Duration::from_millis(ms)) - .unwrap_or(Duration::from_millis(500)); + .unwrap_or(Duration::from_millis(400)); match params.channels { 1 => { diff --git a/owhisper/owhisper-client/src/lib.rs b/owhisper/owhisper-client/src/lib.rs index 38199a684d..fa5c85f823 100644 --- a/owhisper/owhisper-client/src/lib.rs +++ b/owhisper/owhisper-client/src/lib.rs @@ -83,7 +83,7 @@ impl ListenClientBuilder { .append_pair("channels", &channels.to_string()) .append_pair( "redemption_time_ms", - ¶ms.redemption_time_ms.unwrap_or(500).to_string(), + ¶ms.redemption_time_ms.unwrap_or(400).to_string(), ); } diff --git a/plugins/listener/src/fsm.rs b/plugins/listener/src/fsm.rs index 14d99220cf..232fb88839 100644 --- a/plugins/listener/src/fsm.rs +++ b/plugins/listener/src/fsm.rs @@ -585,7 +585,7 @@ async fn setup_listen_client( .api_key(conn.api_key.unwrap_or_default()) .params(owhisper_interface::ListenParams { languages, - redemption_time_ms: Some(if is_onboarding { 70 } else { 500 }), + redemption_time_ms: Some(if is_onboarding { 60 } else { 400 }), ..Default::default() }) .build_dual()) diff --git a/plugins/local-stt/js/bindings.gen.ts b/plugins/local-stt/js/bindings.gen.ts index 7dbfa033c3..339ffcbd35 100644 --- a/plugins/local-stt/js/bindings.gen.ts +++ b/plugins/local-stt/js/bindings.gen.ts @@ -28,7 +28,7 @@ async getCurrentModel() : Promise { async setCurrentModel(model: SupportedSttModel) : Promise { return await TAURI_INVOKE("plugin:local-stt|set_current_model", { model }); }, -async getServers() : Promise> { +async getServers() : Promise> { return await TAURI_INVOKE("plugin:local-stt|get_servers"); }, async startServer(model: SupportedSttModel | null) : Promise { @@ -58,6 +58,7 @@ async listSupportedLanguages(model: SupportedSttModel) : Promise { export type AmModel = "am-parakeet-v2" | "am-parakeet-v3" | "am-whisper-large-v3" export type GgmlBackend = { kind: string; name: string; description: string; total_memory_mb: number; free_memory_mb: number } export type Language = { iso639: string } +export type ServerHealth = "unreachable" | "loading" | "ready" export type ServerType = "internal" | "external" export type SttModelInfo = { key: SupportedSttModel; display_name: string; size_bytes: number } export type SupportedSttModel = WhisperModel | AmModel diff --git a/plugins/local-stt/src/commands.rs b/plugins/local-stt/src/commands.rs index 37aad9887d..3ed1e2743d 100644 --- a/plugins/local-stt/src/commands.rs +++ b/plugins/local-stt/src/commands.rs @@ -2,7 +2,8 @@ use std::collections::HashMap; use tauri::ipc::Channel; use crate::{ - server::ServerType, LocalSttPluginExt, SttModelInfo, SupportedSttModel, SUPPORTED_MODELS, + server::{ServerHealth, ServerType}, + LocalSttPluginExt, SttModelInfo, SupportedSttModel, SUPPORTED_MODELS, }; #[tauri::command] @@ -100,7 +101,7 @@ pub async fn stop_server( #[specta::specta] pub async fn get_servers( app: tauri::AppHandle, -) -> Result>, String> { +) -> Result, String> { app.get_servers().await.map_err(|e| e.to_string()) } diff --git a/plugins/local-stt/src/ext.rs b/plugins/local-stt/src/ext.rs index 896465220c..5263694aa4 100644 --- a/plugins/local-stt/src/ext.rs +++ b/plugins/local-stt/src/ext.rs @@ -11,7 +11,7 @@ use tokio_util::sync::CancellationToken; use crate::{ model::SupportedSttModel, - server::{external, internal, ServerType}, + server::{external, internal, ServerHealth, ServerType}, Connection, }; @@ -32,7 +32,7 @@ pub trait LocalSttPluginExt { ) -> impl Future>; fn get_servers( &self, - ) -> impl Future>, crate::Error>>; + ) -> impl Future, crate::Error>>; fn get_current_model(&self) -> Result; fn set_current_model( @@ -271,8 +271,6 @@ impl> LocalSttPluginExt for T { let state = self.state::(); let mut s = state.lock().await; - tracing::info!("ext: stopping servers"); - let mut stopped = false; match server_type { Some(ServerType::External) => { @@ -303,38 +301,21 @@ impl> LocalSttPluginExt for T { } #[tracing::instrument(skip_all)] - async fn get_servers(&self) -> Result>, crate::Error> { + async fn get_servers(&self) -> Result, crate::Error> { let state = self.state::(); - let mut guard = state.lock().await; + let guard = state.lock().await; let internal_url = if let Some(server) = &guard.internal_server { - if server.health().await { - Some(server.base_url.clone()) - } else { - if let Some(server) = guard.internal_server.take() { - server.terminate().ok(); - guard.internal_server = None; - } - - None - } + let status = server.health().await; + status } else { - None + ServerHealth::Unreachable }; let external_url = if let Some(server) = &guard.external_server { - if server.health().await { - Some(server.base_url.clone()) - } else { - if let Some(server) = guard.external_server.take() { - server.terminate().ok(); - guard.external_server = None; - } - - None - } + server.health().await } else { - None + ServerHealth::Unreachable }; Ok([ diff --git a/plugins/local-stt/src/server/external.rs b/plugins/local-stt/src/server/external.rs index e58af7853b..b56e9b02e0 100644 --- a/plugins/local-stt/src/server/external.rs +++ b/plugins/local-stt/src/server/external.rs @@ -1,3 +1,5 @@ +use super::ServerHealth; + pub struct ServerHandle { pub base_url: String, api_key: Option, @@ -7,17 +9,29 @@ pub struct ServerHandle { } impl ServerHandle { - pub async fn health(&self) -> bool { + pub async fn health(&self) -> ServerHealth { let res = self.client.status().await; if res.is_err() { - return false; + tracing::error!("{:?}", res); + return ServerHealth::Unreachable; + } + + let res = res.unwrap(); + + if res.model_state == hypr_am::ModelState::Loading { + return ServerHealth::Loading; + } + + if res.model_state == hypr_am::ModelState::Loaded { + return ServerHealth::Ready; } - matches!(res.unwrap().status, hypr_am::ServerStatusType::Ready) + ServerHealth::Unreachable } pub fn terminate(self) -> Result<(), crate::Error> { let _ = self.shutdown.send(()); + std::thread::sleep(std::time::Duration::from_millis(250)); self.child.kill().map_err(|e| crate::Error::ShellError(e))?; Ok(()) } @@ -43,8 +57,11 @@ pub async fn run_server( cmd: tauri_plugin_shell::process::Command, am_key: String, ) -> Result { - let port = 8282; - let _ = port_killer::kill(port); + let port = 50060; + + if port_killer::kill(port).is_ok() { + tokio::time::sleep(std::time::Duration::from_millis(100)).await; + } let (mut rx, child) = cmd.args(["--port", &port.to_string()]).spawn()?; let base_url = format!("http://localhost:{}", port); @@ -55,35 +72,66 @@ pub async fn run_server( loop { tokio::select! { _ = shutdown_rx.changed() => { - tracing::info!("external_server_shutdown"); + tracing::info!("shutdown_signal_received"); break; } event = rx.recv() => { - if event.is_none() { - break; - } - - match event.unwrap() { - tauri_plugin_shell::process::CommandEvent::Stdout(bytes) => { + match event { + Some(tauri_plugin_shell::process::CommandEvent::Stdout(bytes)) => { if let Ok(text) = String::from_utf8(bytes) { let text = text.trim(); - tracing::info!("{}", text); + if !text.is_empty() { + tracing::info!("{}", text); + } } } - tauri_plugin_shell::process::CommandEvent::Stderr(bytes) => { + Some(tauri_plugin_shell::process::CommandEvent::Stderr(bytes)) => { if let Ok(text) = String::from_utf8(bytes) { let text = text.trim(); - tracing::info!("{}", text); + if !text.is_empty() { + tracing::info!("{}", text); + } } } + Some(tauri_plugin_shell::process::CommandEvent::Terminated(payload)) => { + // Only log error if it's not a normal exit (code 0) + if payload.code != Some(0) { + tracing::error!("Server process terminated unexpectedly: {:?}", payload); + } + break; + } + Some(tauri_plugin_shell::process::CommandEvent::Error(error)) => { + tracing::error!("{}", error); + break; + } + None => { + tracing::warn!("closed"); + break; + } _ => {} - } } } } }); + // Wait a bit for server to start up before returning + // The server needs time to bind to the port and initialize + tokio::time::sleep(std::time::Duration::from_millis(1000)).await; + + // Verify the server started successfully by checking if we can connect + // But don't check status as it may require initialization first + match client.status().await { + Ok(_) => { + tracing::info!("Server is ready and responding"); + } + Err(e) => { + // Server may need initialization, which happens after this function returns + // Just log the status check result + tracing::info!("Server status check: {:?} (may need initialization)", e); + } + } + Ok(ServerHandle { api_key: Some(am_key), base_url, diff --git a/plugins/local-stt/src/server/internal.rs b/plugins/local-stt/src/server/internal.rs index 50785f3168..7a1a02cd96 100644 --- a/plugins/local-stt/src/server/internal.rs +++ b/plugins/local-stt/src/server/internal.rs @@ -6,6 +6,7 @@ use std::{ use axum::{http::StatusCode, response::IntoResponse, routing::get, Router}; use tower_http::cors::{self, CorsLayer}; +use super::ServerHealth; use hypr_whisper_local_model::WhisperModel; #[derive(Default)] @@ -53,9 +54,14 @@ pub struct ServerHandle { } impl ServerHandle { - pub async fn health(&self) -> bool { + pub async fn health(&self) -> ServerHealth { let response = reqwest::get(format!("{}/health", self.base_url)).await; - response.is_ok() + if response.is_err() { + tracing::error!("{:?}", response); + ServerHealth::Unreachable + } else { + ServerHealth::Ready + } } pub fn terminate(self) -> Result<(), crate::Error> { diff --git a/plugins/local-stt/src/server/mod.rs b/plugins/local-stt/src/server/mod.rs index 8116c20226..e57ee6892c 100644 --- a/plugins/local-stt/src/server/mod.rs +++ b/plugins/local-stt/src/server/mod.rs @@ -10,3 +10,13 @@ pub enum ServerType { #[serde(rename = "external")] External, } + +#[derive( + Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize, specta::Type, +)] +#[serde(rename_all = "lowercase")] +pub enum ServerHealth { + Unreachable, + Loading, + Ready, +} From 171c5339d828391521d0ced995c153d2bdf7c90d Mon Sep 17 00:00:00 2001 From: Yujong Lee Date: Wed, 20 Aug 2025 15:32:33 -0700 Subject: [PATCH 3/5] fix multi-channel transcroption --- .../src/service/streaming.rs | 13 +++++++------ plugins/listener/src/manager.rs | 13 ++++++++++++- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/crates/transcribe-whisper-local/src/service/streaming.rs b/crates/transcribe-whisper-local/src/service/streaming.rs index 9719455ba0..373d3ab3be 100644 --- a/crates/transcribe-whisper-local/src/service/streaming.rs +++ b/crates/transcribe-whisper-local/src/service/streaming.rs @@ -159,7 +159,7 @@ async fn handle_single_channel( let chunked = hypr_whisper_local::AudioChunkStream(process_vad_stream(vad_chunks, "mixed")); let stream = hypr_whisper_local::TranscribeMetadataAudioStreamExt::transcribe(chunked, model); - process_transcription_stream(ws_sender, stream, guard).await; + process_transcription_stream(ws_sender, stream, guard, 1).await; } async fn handle_dual_channel( @@ -190,13 +190,14 @@ async fn handle_dual_channel( let stream = hypr_whisper_local::TranscribeMetadataAudioStreamExt::transcribe(merged_stream, model); - process_transcription_stream(ws_sender, stream, guard).await; + process_transcription_stream(ws_sender, stream, guard, 2).await; } async fn process_transcription_stream( mut ws_sender: futures_util::stream::SplitSink, mut stream: impl futures_util::Stream + Unpin, guard: ConnectionGuard, + channels: i32, ) { loop { tokio::select! { @@ -221,16 +222,16 @@ async fn process_transcription_stream( ); let (speaker, channel_index) = match source.as_deref() { - Some("mic") => (Some(0), vec![0]), - Some("speaker") => (Some(1), vec![1]), - _ => (None, vec![0]), + Some("mic") => (Some(0), vec![0, channels]), + Some("speaker") => (Some(1), vec![1, channels]), + _ => (None, vec![0, 1]), }; let words: Vec = text .split_whitespace() .filter(|w| !w.is_empty()) .map(|w| Word { - word: w.to_string(), + word: w.trim().to_string(), start: start_f64, end: start_f64 + duration_f64, confidence, diff --git a/plugins/listener/src/manager.rs b/plugins/listener/src/manager.rs index 93db30f269..4f5120c87c 100644 --- a/plugins/listener/src/manager.rs +++ b/plugins/listener/src/manager.rs @@ -45,7 +45,10 @@ impl TranscriptManager { Self::log(self.id, &response); if let owhisper_interface::StreamResponse::TranscriptResponse { - is_final, channel, .. + is_final, + channel, + channel_index, + .. } = response { let data = &channel.alternatives[0]; @@ -62,6 +65,14 @@ impl TranscriptManager { Some(w) } }) + .map(|mut w| { + if w.speaker.is_none() { + let speaker = channel_index.first().unwrap().clone(); + w.speaker = Some(speaker); + } + + w + }) .collect::>(); if is_final { From f2252444965ecb57d3b06209b73efd4547862d03 Mon Sep 17 00:00:00 2001 From: Yujong Lee Date: Wed, 20 Aug 2025 16:55:48 -0700 Subject: [PATCH 4/5] more wip --- apps/desktop/src/locales/en/messages.po | 98 ++++++++++++------------- apps/desktop/src/locales/ko/messages.po | 98 ++++++++++++------------- owhisper/owhisper-client/src/lib.rs | 22 +++++- owhisper/owhisper-interface/src/lib.rs | 6 +- plugins/listener/src/fsm.rs | 15 +++- plugins/listener/src/manager.rs | 53 ++++++++----- 6 files changed, 168 insertions(+), 124 deletions(-) diff --git a/apps/desktop/src/locales/en/messages.po b/apps/desktop/src/locales/en/messages.po index 70122f0450..53e03fac29 100644 --- a/apps/desktop/src/locales/en/messages.po +++ b/apps/desktop/src/locales/en/messages.po @@ -264,14 +264,14 @@ msgstr "(Optional for localhost)" msgid "(Optional)" msgstr "(Optional)" -#. placeholder {0}: isViewingTemplate ? "Back" : "Save and close" -#. placeholder {0}: isCompact ? "Go" : "Resume" +#. placeholder {0}: disabled ? "Wait..." : "Play again" +#. placeholder {0}: disabled ? "Wait..." : "Play video" #. placeholder {0}: disabled ? "Wait..." : isHovered ? (isCompact ? "Go" : "Resume") : (isCompact ? "End" : "Ended") +#: src/components/editor-area/note-header/listen-button.tsx:153 +#: src/components/editor-area/note-header/listen-button.tsx:231 +#: src/components/editor-area/note-header/listen-button.tsx:253 +#: src/components/editor-area/note-header/listen-button.tsx:273 #: src/components/settings/views/templates.tsx:217 -#: src/components/editor-area/note-header/listen-button.tsx:152 -#: src/components/editor-area/note-header/listen-button.tsx:230 -#: src/components/editor-area/note-header/listen-button.tsx:252 -#: src/components/editor-area/note-header/listen-button.tsx:272 msgid "{0}" msgstr "{0}" @@ -280,8 +280,8 @@ msgstr "{0}" msgid "{0} calendars selected" msgstr "{0} calendars selected" -#: src/components/welcome-modal/audio-permissions-view.tsx:82 #: src/components/settings/views/sound.tsx:64 +#: src/components/welcome-modal/audio-permissions-view.tsx:82 msgid "{buttonText}" msgstr "{buttonText}" @@ -318,9 +318,9 @@ msgstr "<0>Create Note" msgid "Access granted" msgstr "Access granted" -#: src/components/welcome-modal/calendar-permissions-view.tsx:50 -#: src/components/welcome-modal/audio-permissions-view.tsx:58 #: src/components/settings/views/sound.tsx:44 +#: src/components/welcome-modal/audio-permissions-view.tsx:58 +#: src/components/welcome-modal/calendar-permissions-view.tsx:50 msgid "Access Granted" msgstr "Access Granted" @@ -398,20 +398,20 @@ msgstr "and {0} more members" msgid "Anyone with the link can view this page" msgstr "Anyone with the link can view this page" -#: src/components/welcome-modal/custom-endpoint-view.tsx:498 #: src/components/settings/components/ai/llm-custom-view.tsx:603 +#: src/components/welcome-modal/custom-endpoint-view.tsx:498 msgid "API Base URL" msgstr "API Base URL" -#: src/components/welcome-modal/custom-endpoint-view.tsx:294 -#: src/components/welcome-modal/custom-endpoint-view.tsx:361 -#: src/components/welcome-modal/custom-endpoint-view.tsx:438 -#: src/components/welcome-modal/custom-endpoint-view.tsx:518 -#: src/components/settings/views/integrations.tsx:203 #: src/components/settings/components/ai/llm-custom-view.tsx:314 #: src/components/settings/components/ai/llm-custom-view.tsx:409 #: src/components/settings/components/ai/llm-custom-view.tsx:514 #: src/components/settings/components/ai/llm-custom-view.tsx:625 +#: src/components/settings/views/integrations.tsx:203 +#: src/components/welcome-modal/custom-endpoint-view.tsx:294 +#: src/components/welcome-modal/custom-endpoint-view.tsx:361 +#: src/components/welcome-modal/custom-endpoint-view.tsx:438 +#: src/components/welcome-modal/custom-endpoint-view.tsx:518 msgid "API Key" msgstr "API Key" @@ -419,8 +419,8 @@ msgstr "API Key" msgid "Apple" msgstr "Apple" -#: src/components/toolbar/buttons/delete-note-button.tsx:43 #: src/components/left-sidebar/notes-list.tsx:269 +#: src/components/toolbar/buttons/delete-note-button.tsx:43 msgid "Are you sure you want to delete this note?" msgstr "Are you sure you want to delete this note?" @@ -464,8 +464,8 @@ msgstr "Base URL" #~ msgid "Billing features are currently under development and will be available in a future update." #~ msgstr "Billing features are currently under development and will be available in a future update." -#: src/components/settings/views/templates.tsx:319 #: src/components/settings/components/templates-sidebar.tsx:68 +#: src/components/settings/views/templates.tsx:319 msgid "Built-in Templates" msgstr "Built-in Templates" @@ -601,12 +601,12 @@ msgstr "Connect your Obsidian vault to export notes" msgid "Contacts Access" msgstr "Contacts Access" -#: src/components/welcome-modal/model-selection-view.tsx:94 -#: src/components/welcome-modal/llm-selection-view.tsx:94 -#: src/components/welcome-modal/download-progress-view.tsx:255 -#: src/components/welcome-modal/custom-endpoint-view.tsx:595 -#: src/components/welcome-modal/calendar-permissions-view.tsx:153 #: src/components/welcome-modal/audio-permissions-view.tsx:189 +#: src/components/welcome-modal/calendar-permissions-view.tsx:153 +#: src/components/welcome-modal/custom-endpoint-view.tsx:595 +#: src/components/welcome-modal/download-progress-view.tsx:255 +#: src/components/welcome-modal/llm-selection-view.tsx:94 +#: src/components/welcome-modal/model-selection-view.tsx:94 msgid "Continue" msgstr "Continue" @@ -630,8 +630,8 @@ msgstr "Control how autonomous the AI enhancement should be." #~ msgid "Control how creative the AI enhancement should be" #~ msgstr "Control how creative the AI enhancement should be" -#: src/routes/app.human.$id.tsx:535 #: src/components/editor-area/note-header/chips/participants-chip.tsx:518 +#: src/routes/app.human.$id.tsx:535 msgid "Create" msgstr "Create" @@ -659,8 +659,8 @@ msgstr "Create your first template to get started" #~ msgid "Current Plan" #~ msgstr "Current Plan" -#: src/components/settings/views/ai-stt.tsx:66 #: src/components/settings/views/ai-llm.tsx:671 +#: src/components/settings/views/ai-stt.tsx:66 msgid "Custom" msgstr "Custom" @@ -676,8 +676,8 @@ msgstr "Custom" msgid "Custom Vocabulary" msgstr "Custom Vocabulary" -#: src/components/settings/views/ai-stt.tsx:63 #: src/components/settings/views/ai-llm.tsx:668 +#: src/components/settings/views/ai-stt.tsx:63 msgid "Default" msgstr "Default" @@ -685,9 +685,9 @@ msgstr "Default" #~ msgid "Default (llama-3.2-3b-q4)" #~ msgstr "Default (llama-3.2-3b-q4)" -#: src/components/settings/views/template.tsx:218 -#: src/components/settings/views/team.tsx:165 #: src/components/left-sidebar/notes-list.tsx:336 +#: src/components/settings/views/team.tsx:165 +#: src/components/settings/views/template.tsx:218 msgid "Delete" msgstr "Delete" @@ -873,10 +873,10 @@ msgstr "Grant both permissions to continue" #~ msgid "Help us improve the Hyprnote experience by providing feedback." #~ msgstr "Help us improve the Hyprnote experience by providing feedback." -#: src/components/individualization-modal/role-view.tsx:24 -#: src/components/individualization-modal/org-size-view.tsx:24 -#: src/components/individualization-modal/industry-view.tsx:63 #: src/components/individualization-modal/how-heard-view.tsx:33 +#: src/components/individualization-modal/industry-view.tsx:63 +#: src/components/individualization-modal/org-size-view.tsx:24 +#: src/components/individualization-modal/role-view.tsx:24 msgid "Help us tailor your Hyprnote experience" msgstr "Help us tailor your Hyprnote experience" @@ -904,8 +904,8 @@ msgstr "Important Q&As" #~ msgid "Integration with other apps like Notion and Google Calendar" #~ msgstr "Integration with other apps like Notion and Google Calendar" -#: src/routes/app.settings.tsx:67 #: src/components/settings/views/integrations.tsx:124 +#: src/routes/app.settings.tsx:67 msgid "Integrations" msgstr "Integrations" @@ -1047,22 +1047,22 @@ msgstr "Member" msgid "Members" msgstr "Members" -#: src/components/welcome-modal/audio-permissions-view.tsx:165 #: src/components/settings/views/sound.tsx:127 +#: src/components/welcome-modal/audio-permissions-view.tsx:165 msgid "Microphone Access" msgstr "Microphone Access" -#: src/components/welcome-modal/custom-endpoint-view.tsx:315 -#: src/components/welcome-modal/custom-endpoint-view.tsx:382 -#: src/components/welcome-modal/custom-endpoint-view.tsx:459 #: src/components/settings/components/ai/llm-custom-view.tsx:334 #: src/components/settings/components/ai/llm-custom-view.tsx:429 #: src/components/settings/components/ai/llm-custom-view.tsx:534 +#: src/components/welcome-modal/custom-endpoint-view.tsx:315 +#: src/components/welcome-modal/custom-endpoint-view.tsx:382 +#: src/components/welcome-modal/custom-endpoint-view.tsx:459 msgid "Model" msgstr "Model" -#: src/components/welcome-modal/custom-endpoint-view.tsx:544 #: src/components/settings/components/ai/llm-custom-view.tsx:650 +#: src/components/welcome-modal/custom-endpoint-view.tsx:544 msgid "Model Name" msgstr "Model Name" @@ -1090,8 +1090,8 @@ msgstr "My Templates" msgid "New note" msgstr "New note" -#: src/components/left-sidebar/notes-list.tsx:313 #: src/components/left-sidebar/events-list.tsx:181 +#: src/components/left-sidebar/notes-list.tsx:313 msgid "New window" msgstr "New window" @@ -1135,7 +1135,7 @@ msgstr "No recent notes for this organization" #~ msgid "No Template" #~ msgstr "No Template" -#: src/components/editor-area/note-header/listen-button.tsx:527 +#: src/components/editor-area/note-header/listen-button.tsx:528 msgid "No Template (Default)" msgstr "No Template (Default)" @@ -1183,9 +1183,9 @@ msgstr "Only works with Custom Endpoints. Please configure one of the above firs msgid "Open finder view" msgstr "Open finder view" -#: src/components/toolbar/buttons/new-window-button.tsx:35 #: src/components/left-sidebar/search-list.tsx:298 #: src/components/left-sidebar/search-list.tsx:368 +#: src/components/toolbar/buttons/new-window-button.tsx:35 msgid "Open in new window" msgstr "Open in new window" @@ -1219,7 +1219,7 @@ msgstr "Others" msgid "Owner" msgstr "Owner" -#: src/components/editor-area/note-header/listen-button.tsx:373 +#: src/components/editor-area/note-header/listen-button.tsx:374 msgid "Pause" msgstr "Pause" @@ -1284,8 +1284,8 @@ msgstr "Recent Notes" #~ msgid "Remove {0} from list" #~ msgstr "Remove {0} from list" -#: src/components/welcome-modal/audio-permissions-view.tsx:79 #: src/components/settings/views/sound.tsx:61 +#: src/components/welcome-modal/audio-permissions-view.tsx:79 msgid "Requesting..." msgstr "Requesting..." @@ -1315,7 +1315,7 @@ msgstr "Role" #~ msgid "Save and close" #~ msgstr "Save and close" -#: src/components/editor-area/note-header/listen-button.tsx:497 +#: src/components/editor-area/note-header/listen-button.tsx:498 msgid "Save current recording" msgstr "Save current recording" @@ -1339,8 +1339,8 @@ msgstr "Saving..." msgid "Search names or emails" msgstr "Search names or emails" -#: src/components/settings/components/templates-sidebar.tsx:33 #: src/components/settings/components/template-list.tsx:43 +#: src/components/settings/components/templates-sidebar.tsx:33 msgid "Search templates..." msgstr "Search templates..." @@ -1456,11 +1456,11 @@ msgstr "Spoken languages" #~ msgid "Start Monthly Plan" #~ msgstr "Start Monthly Plan" -#: src/components/editor-area/note-header/listen-button.tsx:201 +#: src/components/editor-area/note-header/listen-button.tsx:202 msgid "Start recording" msgstr "Start recording" -#: src/components/editor-area/note-header/listen-button.tsx:474 +#: src/components/editor-area/note-header/listen-button.tsx:475 msgid "Stop" msgstr "Stop" @@ -1484,8 +1484,8 @@ msgstr "Summary language" #~ msgid "Synchronization across multiple devices" #~ msgstr "Synchronization across multiple devices" -#: src/components/welcome-modal/audio-permissions-view.tsx:175 #: src/components/settings/views/sound.tsx:137 +#: src/components/welcome-modal/audio-permissions-view.tsx:175 msgid "System Audio Access" msgstr "System Audio Access" @@ -1509,7 +1509,7 @@ msgstr "Team management features are currently under development and will be ava msgid "Teamspace" msgstr "Teamspace" -#: src/components/editor-area/note-header/listen-button.tsx:518 +#: src/components/editor-area/note-header/listen-button.tsx:519 msgid "Template" msgstr "Template" @@ -1649,8 +1649,8 @@ msgstr "Vault Name" msgid "View calendar" msgstr "View calendar" -#: src/components/left-sidebar/events-list.tsx:193 #: src/components/editor-area/note-header/chips/event-chip.tsx:209 +#: src/components/left-sidebar/events-list.tsx:193 msgid "View in calendar" msgstr "View in calendar" @@ -1722,7 +1722,7 @@ msgstr "Your LinkedIn username (the part after linkedin.com/in/)" msgid "Your Name" msgstr "Your Name" -#: src/components/settings/views/templates.tsx:255 #: src/components/settings/components/templates-sidebar.tsx:45 +#: src/components/settings/views/templates.tsx:255 msgid "Your Templates" msgstr "Your Templates" diff --git a/apps/desktop/src/locales/ko/messages.po b/apps/desktop/src/locales/ko/messages.po index 51b7401b73..febd729f1c 100644 --- a/apps/desktop/src/locales/ko/messages.po +++ b/apps/desktop/src/locales/ko/messages.po @@ -264,14 +264,14 @@ msgstr "" msgid "(Optional)" msgstr "" -#. placeholder {0}: isViewingTemplate ? "Back" : "Save and close" -#. placeholder {0}: isCompact ? "Go" : "Resume" +#. placeholder {0}: disabled ? "Wait..." : "Play again" +#. placeholder {0}: disabled ? "Wait..." : "Play video" #. placeholder {0}: disabled ? "Wait..." : isHovered ? (isCompact ? "Go" : "Resume") : (isCompact ? "End" : "Ended") +#: src/components/editor-area/note-header/listen-button.tsx:153 +#: src/components/editor-area/note-header/listen-button.tsx:231 +#: src/components/editor-area/note-header/listen-button.tsx:253 +#: src/components/editor-area/note-header/listen-button.tsx:273 #: src/components/settings/views/templates.tsx:217 -#: src/components/editor-area/note-header/listen-button.tsx:152 -#: src/components/editor-area/note-header/listen-button.tsx:230 -#: src/components/editor-area/note-header/listen-button.tsx:252 -#: src/components/editor-area/note-header/listen-button.tsx:272 msgid "{0}" msgstr "" @@ -280,8 +280,8 @@ msgstr "" msgid "{0} calendars selected" msgstr "" -#: src/components/welcome-modal/audio-permissions-view.tsx:82 #: src/components/settings/views/sound.tsx:64 +#: src/components/welcome-modal/audio-permissions-view.tsx:82 msgid "{buttonText}" msgstr "" @@ -318,9 +318,9 @@ msgstr "" msgid "Access granted" msgstr "" -#: src/components/welcome-modal/calendar-permissions-view.tsx:50 -#: src/components/welcome-modal/audio-permissions-view.tsx:58 #: src/components/settings/views/sound.tsx:44 +#: src/components/welcome-modal/audio-permissions-view.tsx:58 +#: src/components/welcome-modal/calendar-permissions-view.tsx:50 msgid "Access Granted" msgstr "" @@ -398,20 +398,20 @@ msgstr "" msgid "Anyone with the link can view this page" msgstr "" -#: src/components/welcome-modal/custom-endpoint-view.tsx:498 #: src/components/settings/components/ai/llm-custom-view.tsx:603 +#: src/components/welcome-modal/custom-endpoint-view.tsx:498 msgid "API Base URL" msgstr "" -#: src/components/welcome-modal/custom-endpoint-view.tsx:294 -#: src/components/welcome-modal/custom-endpoint-view.tsx:361 -#: src/components/welcome-modal/custom-endpoint-view.tsx:438 -#: src/components/welcome-modal/custom-endpoint-view.tsx:518 -#: src/components/settings/views/integrations.tsx:203 #: src/components/settings/components/ai/llm-custom-view.tsx:314 #: src/components/settings/components/ai/llm-custom-view.tsx:409 #: src/components/settings/components/ai/llm-custom-view.tsx:514 #: src/components/settings/components/ai/llm-custom-view.tsx:625 +#: src/components/settings/views/integrations.tsx:203 +#: src/components/welcome-modal/custom-endpoint-view.tsx:294 +#: src/components/welcome-modal/custom-endpoint-view.tsx:361 +#: src/components/welcome-modal/custom-endpoint-view.tsx:438 +#: src/components/welcome-modal/custom-endpoint-view.tsx:518 msgid "API Key" msgstr "" @@ -419,8 +419,8 @@ msgstr "" msgid "Apple" msgstr "" -#: src/components/toolbar/buttons/delete-note-button.tsx:43 #: src/components/left-sidebar/notes-list.tsx:269 +#: src/components/toolbar/buttons/delete-note-button.tsx:43 msgid "Are you sure you want to delete this note?" msgstr "" @@ -464,8 +464,8 @@ msgstr "" #~ msgid "Billing features are currently under development and will be available in a future update." #~ msgstr "" -#: src/components/settings/views/templates.tsx:319 #: src/components/settings/components/templates-sidebar.tsx:68 +#: src/components/settings/views/templates.tsx:319 msgid "Built-in Templates" msgstr "" @@ -601,12 +601,12 @@ msgstr "" msgid "Contacts Access" msgstr "" -#: src/components/welcome-modal/model-selection-view.tsx:94 -#: src/components/welcome-modal/llm-selection-view.tsx:94 -#: src/components/welcome-modal/download-progress-view.tsx:255 -#: src/components/welcome-modal/custom-endpoint-view.tsx:595 -#: src/components/welcome-modal/calendar-permissions-view.tsx:153 #: src/components/welcome-modal/audio-permissions-view.tsx:189 +#: src/components/welcome-modal/calendar-permissions-view.tsx:153 +#: src/components/welcome-modal/custom-endpoint-view.tsx:595 +#: src/components/welcome-modal/download-progress-view.tsx:255 +#: src/components/welcome-modal/llm-selection-view.tsx:94 +#: src/components/welcome-modal/model-selection-view.tsx:94 msgid "Continue" msgstr "" @@ -630,8 +630,8 @@ msgstr "" #~ msgid "Control how creative the AI enhancement should be" #~ msgstr "" -#: src/routes/app.human.$id.tsx:535 #: src/components/editor-area/note-header/chips/participants-chip.tsx:518 +#: src/routes/app.human.$id.tsx:535 msgid "Create" msgstr "" @@ -659,8 +659,8 @@ msgstr "" #~ msgid "Current Plan" #~ msgstr "" -#: src/components/settings/views/ai-stt.tsx:66 #: src/components/settings/views/ai-llm.tsx:671 +#: src/components/settings/views/ai-stt.tsx:66 msgid "Custom" msgstr "" @@ -676,8 +676,8 @@ msgstr "" msgid "Custom Vocabulary" msgstr "" -#: src/components/settings/views/ai-stt.tsx:63 #: src/components/settings/views/ai-llm.tsx:668 +#: src/components/settings/views/ai-stt.tsx:63 msgid "Default" msgstr "" @@ -685,9 +685,9 @@ msgstr "" #~ msgid "Default (llama-3.2-3b-q4)" #~ msgstr "" -#: src/components/settings/views/template.tsx:218 -#: src/components/settings/views/team.tsx:165 #: src/components/left-sidebar/notes-list.tsx:336 +#: src/components/settings/views/team.tsx:165 +#: src/components/settings/views/template.tsx:218 msgid "Delete" msgstr "" @@ -873,10 +873,10 @@ msgstr "" #~ msgid "Help us improve the Hyprnote experience by providing feedback." #~ msgstr "" -#: src/components/individualization-modal/role-view.tsx:24 -#: src/components/individualization-modal/org-size-view.tsx:24 -#: src/components/individualization-modal/industry-view.tsx:63 #: src/components/individualization-modal/how-heard-view.tsx:33 +#: src/components/individualization-modal/industry-view.tsx:63 +#: src/components/individualization-modal/org-size-view.tsx:24 +#: src/components/individualization-modal/role-view.tsx:24 msgid "Help us tailor your Hyprnote experience" msgstr "" @@ -904,8 +904,8 @@ msgstr "" #~ msgid "Integration with other apps like Notion and Google Calendar" #~ msgstr "" -#: src/routes/app.settings.tsx:67 #: src/components/settings/views/integrations.tsx:124 +#: src/routes/app.settings.tsx:67 msgid "Integrations" msgstr "" @@ -1047,22 +1047,22 @@ msgstr "" msgid "Members" msgstr "" -#: src/components/welcome-modal/audio-permissions-view.tsx:165 #: src/components/settings/views/sound.tsx:127 +#: src/components/welcome-modal/audio-permissions-view.tsx:165 msgid "Microphone Access" msgstr "" -#: src/components/welcome-modal/custom-endpoint-view.tsx:315 -#: src/components/welcome-modal/custom-endpoint-view.tsx:382 -#: src/components/welcome-modal/custom-endpoint-view.tsx:459 #: src/components/settings/components/ai/llm-custom-view.tsx:334 #: src/components/settings/components/ai/llm-custom-view.tsx:429 #: src/components/settings/components/ai/llm-custom-view.tsx:534 +#: src/components/welcome-modal/custom-endpoint-view.tsx:315 +#: src/components/welcome-modal/custom-endpoint-view.tsx:382 +#: src/components/welcome-modal/custom-endpoint-view.tsx:459 msgid "Model" msgstr "" -#: src/components/welcome-modal/custom-endpoint-view.tsx:544 #: src/components/settings/components/ai/llm-custom-view.tsx:650 +#: src/components/welcome-modal/custom-endpoint-view.tsx:544 msgid "Model Name" msgstr "" @@ -1090,8 +1090,8 @@ msgstr "" msgid "New note" msgstr "" -#: src/components/left-sidebar/notes-list.tsx:313 #: src/components/left-sidebar/events-list.tsx:181 +#: src/components/left-sidebar/notes-list.tsx:313 msgid "New window" msgstr "" @@ -1135,7 +1135,7 @@ msgstr "" #~ msgid "No Template" #~ msgstr "" -#: src/components/editor-area/note-header/listen-button.tsx:527 +#: src/components/editor-area/note-header/listen-button.tsx:528 msgid "No Template (Default)" msgstr "" @@ -1183,9 +1183,9 @@ msgstr "" msgid "Open finder view" msgstr "" -#: src/components/toolbar/buttons/new-window-button.tsx:35 #: src/components/left-sidebar/search-list.tsx:298 #: src/components/left-sidebar/search-list.tsx:368 +#: src/components/toolbar/buttons/new-window-button.tsx:35 msgid "Open in new window" msgstr "" @@ -1219,7 +1219,7 @@ msgstr "" msgid "Owner" msgstr "" -#: src/components/editor-area/note-header/listen-button.tsx:373 +#: src/components/editor-area/note-header/listen-button.tsx:374 msgid "Pause" msgstr "" @@ -1284,8 +1284,8 @@ msgstr "" #~ msgid "Remove {0} from list" #~ msgstr "" -#: src/components/welcome-modal/audio-permissions-view.tsx:79 #: src/components/settings/views/sound.tsx:61 +#: src/components/welcome-modal/audio-permissions-view.tsx:79 msgid "Requesting..." msgstr "" @@ -1315,7 +1315,7 @@ msgstr "" #~ msgid "Save and close" #~ msgstr "" -#: src/components/editor-area/note-header/listen-button.tsx:497 +#: src/components/editor-area/note-header/listen-button.tsx:498 msgid "Save current recording" msgstr "" @@ -1339,8 +1339,8 @@ msgstr "" msgid "Search names or emails" msgstr "" -#: src/components/settings/components/templates-sidebar.tsx:33 #: src/components/settings/components/template-list.tsx:43 +#: src/components/settings/components/templates-sidebar.tsx:33 msgid "Search templates..." msgstr "" @@ -1456,11 +1456,11 @@ msgstr "" #~ msgid "Start Monthly Plan" #~ msgstr "" -#: src/components/editor-area/note-header/listen-button.tsx:201 +#: src/components/editor-area/note-header/listen-button.tsx:202 msgid "Start recording" msgstr "" -#: src/components/editor-area/note-header/listen-button.tsx:474 +#: src/components/editor-area/note-header/listen-button.tsx:475 msgid "Stop" msgstr "" @@ -1484,8 +1484,8 @@ msgstr "" #~ msgid "Synchronization across multiple devices" #~ msgstr "" -#: src/components/welcome-modal/audio-permissions-view.tsx:175 #: src/components/settings/views/sound.tsx:137 +#: src/components/welcome-modal/audio-permissions-view.tsx:175 msgid "System Audio Access" msgstr "" @@ -1509,7 +1509,7 @@ msgstr "" msgid "Teamspace" msgstr "" -#: src/components/editor-area/note-header/listen-button.tsx:518 +#: src/components/editor-area/note-header/listen-button.tsx:519 msgid "Template" msgstr "" @@ -1649,8 +1649,8 @@ msgstr "" msgid "View calendar" msgstr "" -#: src/components/left-sidebar/events-list.tsx:193 #: src/components/editor-area/note-header/chips/event-chip.tsx:209 +#: src/components/left-sidebar/events-list.tsx:193 msgid "View in calendar" msgstr "" @@ -1722,7 +1722,7 @@ msgstr "" msgid "Your Name" msgstr "" -#: src/components/settings/views/templates.tsx:255 #: src/components/settings/components/templates-sidebar.tsx:45 +#: src/components/settings/views/templates.tsx:255 msgid "Your Templates" msgstr "" diff --git a/owhisper/owhisper-client/src/lib.rs b/owhisper/owhisper-client/src/lib.rs index fa5c85f823..6014b275b6 100644 --- a/owhisper/owhisper-client/src/lib.rs +++ b/owhisper/owhisper-client/src/lib.rs @@ -69,11 +69,27 @@ impl ListenClientBuilder { { let mut query_pairs = url.query_pairs_mut(); - // TODO // https://developers.deepgram.com/docs/language-detection#restricting-the-detectable-languages - for lang in ¶ms.languages { - query_pairs.append_pair("languages", lang.iso639().code()); + // https://www.rfc-editor.org/info/bcp47 + match params.languages.len() { + 0 => { + query_pairs.append_pair("detect_language", "true"); + } + 1 => { + query_pairs.append_pair("language", params.languages[0].iso639().code()); + } + _ => { + query_pairs.append_pair("language", params.languages[0].iso639().code()); + + for lang in ¶ms.languages { + let code = lang.iso639().code(); + + query_pairs.append_pair("languages", code); + query_pairs.append_pair("detect_language", code); + } + } } + query_pairs // https://developers.deepgram.com/reference/speech-to-text-api/listen-streaming#handshake .append_pair("model", ¶ms.model.unwrap_or("hypr-whisper".to_string())) diff --git a/owhisper/owhisper-interface/src/lib.rs b/owhisper/owhisper-interface/src/lib.rs index 7ee29841be..c7f0c2c29f 100644 --- a/owhisper/owhisper-interface/src/lib.rs +++ b/owhisper/owhisper-interface/src/lib.rs @@ -33,13 +33,13 @@ common_derives! { impl From for Word2 { fn from(word: Word) -> Self { Word2 { - text: word.word.trim().to_string(), + text: word.word.to_string(), speaker: word .speaker .map(|s| SpeakerIdentity::Unassigned { index: s as u8 }), confidence: Some(word.confidence as f32), - start_ms: Some(word.start as u64), - end_ms: Some(word.end as u64), + start_ms: Some((word.start * 1000.0) as u64), + end_ms: Some((word.end * 1000.0) as u64), } } } diff --git a/plugins/listener/src/fsm.rs b/plugins/listener/src/fsm.rs index 232fb88839..e14b4713b0 100644 --- a/plugins/listener/src/fsm.rs +++ b/plugins/listener/src/fsm.rs @@ -468,11 +468,16 @@ impl Session { Ok(Some(response)) => { let diff = manager.append(response.clone()); - let partial_words = diff + let mut partial_words = diff .partial_words .iter() .map(|w| owhisper_interface::Word2::from(w.clone())) .collect::>(); + partial_words.sort_by(|a, b| { + a.start_ms + .partial_cmp(&b.start_ms) + .unwrap_or(std::cmp::Ordering::Equal) + }); SessionEvent::PartialWords { words: partial_words, @@ -480,11 +485,17 @@ impl Session { .emit(&app) .unwrap(); - let final_words = diff + let mut final_words = diff .final_words .iter() .map(|w| owhisper_interface::Word2::from(w.clone())) .collect::>(); + println!("final_words: {:#?}", final_words); + final_words.sort_by(|a, b| { + a.start_ms + .partial_cmp(&b.start_ms) + .unwrap_or(std::cmp::Ordering::Equal) + }); update_session(&app, &session.id, final_words.clone()) .await diff --git a/plugins/listener/src/manager.rs b/plugins/listener/src/manager.rs index 4f5120c87c..53ac4c2931 100644 --- a/plugins/listener/src/manager.rs +++ b/plugins/listener/src/manager.rs @@ -53,27 +53,44 @@ impl TranscriptManager { { let data = &channel.alternatives[0]; - let words = data - .words - .clone() - .into_iter() - .filter_map(|mut w| { - w.word = w.word.trim().to_string(); - if w.word.is_empty() { - None + let words = { + let mut ws = data + .words + .clone() + .into_iter() + .filter_map(|mut w| { + w.word = w.word.trim().to_string(); + if w.word.is_empty() { + None + } else { + Some(w) + } + }) + .map(|mut w| { + if w.speaker.is_none() { + let speaker = channel_index.first().unwrap().clone(); + w.speaker = Some(speaker); + } + + w + }) + .collect::>(); + + let mut i = 1; + while i < ws.len() { + if ws[i].word.starts_with('\'') { + let current_word = ws[i].word.clone(); + let current_end = ws[i].end; + ws[i - 1].word.push_str(¤t_word); + ws[i - 1].end = current_end; + ws.remove(i); } else { - Some(w) - } - }) - .map(|mut w| { - if w.speaker.is_none() { - let speaker = channel_index.first().unwrap().clone(); - w.speaker = Some(speaker); + i += 1; } + } - w - }) - .collect::>(); + ws + }; if is_final { let last_final_word_end = words.last().unwrap().end; From 18762f3dde971bd3fe30932029b492ba15a3f2f5 Mon Sep 17 00:00:00 2001 From: Yujong Lee Date: Wed, 20 Aug 2025 17:11:38 -0700 Subject: [PATCH 5/5] single language fixes --- owhisper/owhisper-client/src/lib.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/owhisper/owhisper-client/src/lib.rs b/owhisper/owhisper-client/src/lib.rs index 6014b275b6..29aa0937d5 100644 --- a/owhisper/owhisper-client/src/lib.rs +++ b/owhisper/owhisper-client/src/lib.rs @@ -76,7 +76,9 @@ impl ListenClientBuilder { query_pairs.append_pair("detect_language", "true"); } 1 => { - query_pairs.append_pair("language", params.languages[0].iso639().code()); + let code = params.languages[0].iso639().code(); + query_pairs.append_pair("language", code); + query_pairs.append_pair("languages", code); } _ => { query_pairs.append_pair("language", params.languages[0].iso639().code());