Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions apps/web/content/docs/developers/13.languages.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
title: "Language Support"
section: "Developers"
description: "Learn about language support in Hyprnote"
---

## Every Provider/Model has its own supported languages

- Whisper (50-60 languages)
- Deepgram ()

## Every Provider/Model has different ways to specify languages
34 changes: 18 additions & 16 deletions crates/ws/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,9 @@ use futures_util::{
future::{pending, FutureExt},
SinkExt, Stream, StreamExt,
};
use tokio_tungstenite::{
connect_async,
tungstenite::{client::IntoClientRequest, Utf8Bytes},
};
use tokio_tungstenite::{connect_async, tungstenite::client::IntoClientRequest};

pub use tokio_tungstenite::tungstenite::{protocol::Message, ClientRequestBuilder};
pub use tokio_tungstenite::tungstenite::{protocol::Message, ClientRequestBuilder, Utf8Bytes};

#[derive(Debug)]
enum ControlCommand {
Expand Down Expand Up @@ -70,6 +67,7 @@ impl WebSocketClient {

pub async fn from_audio<T: WebSocketIO>(
&self,
initial_message: Option<Message>,
mut audio_stream: impl Stream<Item = T::Data> + Send + Unpin + 'static,
) -> Result<
(
Expand Down Expand Up @@ -99,6 +97,14 @@ impl WebSocketClient {
let handle = WebSocketHandle { control_tx };

let _send_task = tokio::spawn(async move {
if let Some(msg) = initial_message {
if let Err(e) = ws_sender.send(msg).await {
tracing::error!("ws_initial_message_failed: {:?}", e);
let _ = error_tx.send(e.into());
return;
}
}

let mut last_outbound_at = tokio::time::Instant::now();
loop {
let mut keep_alive_fut = if let Some(cfg) = keep_alive_config.as_ref() {
Expand Down Expand Up @@ -131,18 +137,14 @@ impl WebSocketClient {
}
last_outbound_at = tokio::time::Instant::now();
}
Some(cmd) = control_rx.recv() => {
match cmd {
ControlCommand::Finalize(maybe_msg) => {
if let Some(msg) = maybe_msg {
if let Err(e) = ws_sender.send(msg).await {
tracing::error!("ws_finalize_failed: {:?}", e);
let _ = error_tx.send(e.into());
break;
}
last_outbound_at = tokio::time::Instant::now();
}
Some(ControlCommand::Finalize(maybe_msg)) = control_rx.recv() => {
if let Some(msg) = maybe_msg {
if let Err(e) = ws_sender.send(msg).await {
tracing::error!("ws_finalize_failed: {:?}", e);
let _ = error_tx.send(e.into());
break;
}
last_outbound_at = tokio::time::Instant::now();
}
}
else => break,
Expand Down
8 changes: 4 additions & 4 deletions crates/ws/tests/client_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ async fn test_basic_echo() {
];

let stream = futures_util::stream::iter(messages.clone());
let (output, _handle) = client.from_audio::<TestIO>(stream).await.unwrap();
let (output, _handle) = client.from_audio::<TestIO>(None, stream).await.unwrap();

let received = collect_messages::<TestIO>(output, 2).await;
assert_eq!(received, messages);
Expand All @@ -115,7 +115,7 @@ async fn test_finalize() {
text: "initial".to_string(),
count: 1,
}]);
let (output, handle) = client.from_audio::<TestIO>(stream).await.unwrap();
let (output, handle) = client.from_audio::<TestIO>(None, stream).await.unwrap();

let final_msg = TestMessage {
text: "final".to_string(),
Expand Down Expand Up @@ -169,7 +169,7 @@ async fn test_keep_alive() {
);

let stream = futures_util::stream::pending::<TestMessage>();
let (output, _handle) = client.from_audio::<TestIO>(stream).await.unwrap();
let (output, _handle) = client.from_audio::<TestIO>(None, stream).await.unwrap();

let received = collect_messages::<TestIO>(output, 1).await;
assert_eq!(received[0].text, "done");
Expand Down Expand Up @@ -216,7 +216,7 @@ async fn test_retry() {
text: "retry_test".to_string(),
count: 1,
}]);
let (output, _handle) = client.from_audio::<TestIO>(stream).await.unwrap();
let (output, _handle) = client.from_audio::<TestIO>(None, stream).await.unwrap();

let received = collect_messages::<TestIO>(output, 1).await;
assert_eq!(received[0].text, "retry_test");
Expand Down
4 changes: 3 additions & 1 deletion owhisper/owhisper-client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@ hypr-ws = { workspace = true }
owhisper-interface = { workspace = true }

futures-util = { workspace = true }
reqwest = { workspace = true, features = ["json"] }
reqwest = { workspace = true, features = ["json", "multipart"] }
tokio = { workspace = true }
tokio-stream = { workspace = true }

bytes = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }
Expand Down
93 changes: 93 additions & 0 deletions owhisper/owhisper-client/src/adapter/argmax.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
use std::path::Path;

use hypr_ws::client::Message;
use owhisper_interface::stream::StreamResponse;
use owhisper_interface::ListenParams;

use super::{BatchFuture, DeepgramAdapter, SttAdapter};

#[derive(Clone, Default)]
pub struct ArgmaxAdapter {
inner: DeepgramAdapter,
}

impl SttAdapter for ArgmaxAdapter {
fn supports_native_multichannel(&self) -> bool {
false
}

fn build_ws_url(&self, api_base: &str, params: &ListenParams, channels: u8) -> url::Url {
self.inner.build_ws_url(api_base, params, channels)
}

fn build_auth_header(&self, api_key: Option<&str>) -> Option<(&'static str, String)> {
self.inner.build_auth_header(api_key)
}

fn keep_alive_message(&self) -> Option<Message> {
self.inner.keep_alive_message()
}

fn finalize_message(&self) -> Message {
self.inner.finalize_message()
}

fn parse_response(&self, raw: &str) -> Option<StreamResponse> {
self.inner.parse_response(raw)
}

fn transcribe_file<'a, P: AsRef<Path> + Send + 'a>(
&'a self,
client: &'a reqwest::Client,
api_base: &'a str,
api_key: &'a str,
params: &'a ListenParams,
file_path: P,
) -> BatchFuture<'a> {
self.inner
.transcribe_file(client, api_base, api_key, params, file_path)
}
}

#[cfg(test)]
mod tests {
use super::ArgmaxAdapter;

use futures_util::StreamExt;
use hypr_audio_utils::AudioFormatExt;

use crate::live::ListenClientInput;
use crate::ListenClientBuilder;

#[tokio::test]
async fn test_client() {
let audio = rodio::Decoder::new(std::io::BufReader::new(
std::fs::File::open(hypr_data::english_1::AUDIO_PATH).unwrap(),
))
.unwrap()
.to_i16_le_chunks(16000, 16000);

let input = Box::pin(tokio_stream::StreamExt::throttle(
audio.map(|chunk| ListenClientInput::Audio(bytes::Bytes::from(chunk.to_vec()))),
std::time::Duration::from_millis(20),
));

let client = ListenClientBuilder::default()
.api_base("ws://localhost:50060/v1")
.api_key("".to_string())
.params(owhisper_interface::ListenParams {
model: Some("large-v3-v20240930_626MB".to_string()),
languages: vec![hypr_language::ISO639::En.into()],
..Default::default()
})
.adapter::<ArgmaxAdapter>()
.build_single();

let (stream, _) = client.from_realtime_audio(input).await.unwrap();
futures_util::pin_mut!(stream);

while let Some(result) = stream.next().await {
println!("{:?}", result);
}
}
}
Loading