Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

stenographer interface #175

Open
wants to merge 23 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
10107d4
chore: Update ONNX Runtime version to 1.19.2 and adjust corresponding…
royshil Sep 14, 2024
0b34106
Merge commit '10107d482e3dd41057c5c9c930f6710aabf674c5' into roy.sten…
royshil Oct 1, 2024
6a5e1e7
refactor: Add stenographer options and resample utils
royshil Oct 1, 2024
b3a0316
refactor: Add stenographer delay option
royshil Oct 6, 2024
db155bb
refactor: Add resample-utils.cpp and update stenographer delay
royshil Oct 7, 2024
fcb79ef
refactor: Update stenographer delay variable name
royshil Oct 7, 2024
49538e7
refactor: Update stenographer interface buttons and add pause/resume …
royshil Oct 7, 2024
37f8439
Revert "refactor: Update stenographer interface buttons and add pause…
royshil Oct 8, 2024
bca37a7
refactor: Clear stenographer delay buffers when resetting caption state
royshil Oct 8, 2024
f5b2189
Merge commit 'e26819cf9a34eaa859d877bc9fe05796fd74a8f8' into roy.sten…
royshil Oct 9, 2024
aefd87d
Refactor CMakeLists.txt and stenographer.cpp
royshil Oct 10, 2024
1de9357
Refactor CMakeLists.txt and stenographer.cpp
royshil Oct 10, 2024
0c1ee70
Refactor CMakeLists.txt and stenographer.cpp
royshil Oct 10, 2024
6ccc44b
Update asio URL_HASH in FetchWebsocketpp.cmake
royshil Oct 10, 2024
283bf34
Refactor CMakeLists.txt to include the correct path for asio library
royshil Oct 10, 2024
f5dc4c8
Refactor WebSocket server initialization and communication in stenogr…
royshil Oct 10, 2024
e979fca
Refactor stenographer-util.h and stenographer-util.cpp
royshil Oct 10, 2024
f173aa8
Fix wspp cmake
royshil Oct 10, 2024
f6639e9
Merge branch 'master' into roy.stenographer_interface
royshil Oct 14, 2024
fe8875b
Refactor translation.cpp to remove commented code and improve detoken…
royshil Oct 14, 2024
5d441fc
Refactor translation.cpp to improve detokenization logic and handle t…
royshil Oct 14, 2024
a6b670e
Refactor en-US.ini to add translation_remove_punctuation_from_start o…
royshil Oct 14, 2024
704ca5d
Refactor stenographer_interface.html and transcription-filter.cpp
royshil Oct 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,32 @@ include(cmake/BuildICU.cmake)
target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE ICU)
target_include_directories(${CMAKE_PROJECT_NAME} SYSTEM PUBLIC ${ICU_INCLUDE_DIR})

if(WIN32 OR APPLE)
if(NOT buildspec)
file(READ "${CMAKE_CURRENT_SOURCE_DIR}/buildspec.json" buildspec)
endif()
string(
JSON
version
GET
${buildspec}
dependencies
prebuilt
version)
if(MSVC)
set(arch ${CMAKE_GENERATOR_PLATFORM})
elseif(APPLE)
set(arch universal)
endif()
set(deps_root "${CMAKE_CURRENT_SOURCE_DIR}/.deps/obs-deps-${version}-${arch}")
message(STATUS "deps_root: ${deps_root}")
target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE "${deps_root}/include")
else()
include(cmake/FetchWebsocketpp.cmake)
target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE websocketpp)
target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE ${asio_SOURCE_DIR}/asio/include/)
endif()

target_sources(
${CMAKE_PROJECT_NAME}
PRIVATE src/plugin-main.c
Expand All @@ -120,12 +146,15 @@ target_sources(
src/whisper-utils/silero-vad-onnx.cpp
src/whisper-utils/token-buffer-thread.cpp
src/whisper-utils/vad-processing.cpp
src/whisper-utils/resample-utils.cpp
src/translation/language_codes.cpp
src/translation/translation.cpp
src/translation/translation-utils.cpp
src/ui/filter-replace-utils.cpp
src/translation/translation-language-utils.cpp
src/ui/filter-replace-dialog.cpp)
src/ui/filter-replace-dialog.cpp
src/stenographer/stenographer.cpp
src/stenographer/stenographer-util.cpp)

set_target_properties_plugin(${CMAKE_PROJECT_NAME} PROPERTIES OUTPUT_NAME ${_name})

Expand Down
24 changes: 24 additions & 0 deletions cmake/FetchWebsocketpp.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
include(FetchContent)

FetchContent_Declare(
websocketpp
URL https://github.com/zaphoyd/websocketpp/archive/refs/tags/0.8.2.tar.gz
URL_HASH SHA256=6ce889d85ecdc2d8fa07408d6787e7352510750daa66b5ad44aacb47bea76755)

# Only download the content, don't configure or build it
FetchContent_GetProperties(websocketpp)
if(NOT websocketpp_POPULATED)
FetchContent_Populate(websocketpp)
endif()

# Add WebSocket++ as an interface library
add_library(websocketpp INTERFACE)
target_include_directories(websocketpp INTERFACE ${websocketpp_SOURCE_DIR})

# Fetch ASIO
FetchContent_Declare(
asio
URL https://github.com/chriskohlhoff/asio/archive/asio-1-28-0.tar.gz
URL_HASH SHA256=226438b0798099ad2a202563a83571ce06dd13b570d8fded4840dbc1f97fa328)

FetchContent_MakeAvailable(websocketpp asio)
8 changes: 4 additions & 4 deletions cmake/linux/compilerconfig.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ set(_obs_gcc_c_options
-Wformat-security
-Wno-conversion
-Wno-deprecated-declarations
-Wno-error=conversion
-Wno-error=deprecated-declarations
-Wno-float-conversion
-Wno-implicit-fallthrough
Expand All @@ -42,14 +43,13 @@ set(_obs_gcc_c_options
-Wvla)

# gcc options for C++
set(_obs_gcc_cxx_options
# cmake-format: sortable
${_obs_gcc_c_options} -Wconversion -Wfloat-conversion -Winvalid-offsetof -Wno-overloaded-virtual)
set(_obs_gcc_cxx_options # cmake-format: sortable
${_obs_gcc_c_options} -Winvalid-offsetof -Wno-overloaded-virtual)

add_compile_options(
-fopenmp-simd
"$<$<COMPILE_LANG_AND_ID:C,GNU>:${_obs_gcc_c_options}>"
"$<$<COMPILE_LANG_AND_ID:C,GNU>:-Wint-conversion;-Wno-missing-prototypes;-Wno-strict-prototypes;-Wpointer-sign>"
"$<$<COMPILE_LANG_AND_ID:C,GNU>:-Wno-missing-prototypes;-Wno-strict-prototypes;-Wpointer-sign>"
"$<$<COMPILE_LANG_AND_ID:CXX,GNU>:${_obs_gcc_cxx_options}>"
"$<$<COMPILE_LANG_AND_ID:C,Clang>:${_obs_clang_c_options}>"
"$<$<COMPILE_LANG_AND_ID:CXX,Clang>:${_obs_clang_cxx_options}>")
Expand Down
5 changes: 4 additions & 1 deletion data/locale/en-US.ini
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,7 @@ Active_VAD="Active VAD"
Hybrid_VAD="Hybrid VAD"
translate_only_full_sentences="Translate only full sentences"
duration_filter_threshold="Duration filter"
segment_duration="Segment duration"
segment_duration="Segment duration"
stenographer_parameters="Stenographer Options"
stenographer_delay="Audio Delay"
translation_remove_punctuation_from_start="Remove punctuation from sentence start"
67 changes: 67 additions & 0 deletions src/stenographer/stenographer-util.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@

#include "stenographer-util.h"
#include "transcription-filter-data.h"
#include "transcription-utils.h"

#include <obs.h>

#include <cstring>
#include <vector>

/**
* @brief Applies a simple delay to the audio data for stenographer mode.
*
* This function stores the incoming audio data in a buffer and processes it after a specified delay.
* The delayed audio data is then emitted, replacing the original audio data in the buffer.
* If the buffer does not yet contain enough data to satisfy the delay, the audio buffer is filled with silence.
*
* @param gf Pointer to the transcription filter data structure containing the delay buffer and configuration.
* @param audio Pointer to the audio data structure containing the audio frames to be processed.
* @return Pointer to the processed audio data structure with the applied delay.
*/
struct obs_audio_data *stenographer_simple_delay(transcription_filter_data *gf,
struct obs_audio_data *audio)
{
// Stenographer mode - apply delay.
// Store the audio data in a buffer and process it after the delay.
// push the data to the back of gf->stenographer_delay_buffer
for (size_t c = 0; c < gf->channels; c++) {
// take a audio->frames * sizeof(float) bytes chunk from audio->data[c] and push it
// to the back of the buffer as a float
std::vector<float> audio_data_chunk((float *)audio->data[c],
((float *)audio->data[c]) + audio->frames);
gf->stenographer_delay_buffers[c].insert(gf->stenographer_delay_buffers[c].end(),
audio_data_chunk.begin(),
audio_data_chunk.end());
}

// If the buffer is larger than the delay, emit the oldest data
// Take from the buffer as much as requested by the incoming audio data
size_t delay_frames =
(size_t)((float)gf->sample_rate * (float)gf->stenographer_delay_ms / 1000.0f) +
audio->frames;

if (gf->stenographer_delay_buffers[0].size() >= delay_frames) {
// Replace data on the audio buffer with the delayed data
for (size_t c = 0; c < gf->channels; c++) {
// take exatcly audio->frames from the buffer
std::vector<float> audio_data(gf->stenographer_delay_buffers[c].begin(),
gf->stenographer_delay_buffers[c].begin() +
audio->frames);
// remove the oldest buffers from the delay buffer
gf->stenographer_delay_buffers[c].erase(
gf->stenographer_delay_buffers[c].begin(),
gf->stenographer_delay_buffers[c].begin() + audio->frames);

// replace the data on the audio buffer with the delayed data
memcpy(audio->data[c], audio_data.data(),
audio_data.size() * sizeof(float));
}
} else {
// Fill the audio buffer with silence
for (size_t c = 0; c < gf->channels; c++) {
memset(audio->data[c], 0, audio->frames * sizeof(float));
}
}
return audio;
}
10 changes: 10 additions & 0 deletions src/stenographer/stenographer-util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#ifndef STENOGRAPHER_UTIL_H
#define STENOGRAPHER_UTIL_H

struct transcription_filter_data;
struct obs_audio_data;

struct obs_audio_data *stenographer_simple_delay(transcription_filter_data *gf,
struct obs_audio_data *audio);

#endif /* STENOGRAPHER_UTIL_H */
Loading
Loading