locaal-ai · royshil · Sep 14, 2024 · Oct 1, 2024 · Oct 1, 2024 · Oct 6, 2024
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -101,6 +101,32 @@ include(cmake/BuildICU.cmake)
 target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE ICU)
 target_include_directories(${CMAKE_PROJECT_NAME} SYSTEM PUBLIC ${ICU_INCLUDE_DIR})
 
+if(WIN32 OR APPLE)
+  if(NOT buildspec)
+    file(READ "${CMAKE_CURRENT_SOURCE_DIR}/buildspec.json" buildspec)
+  endif()
+  string(
+    JSON
+    version
+    GET
+    ${buildspec}
+    dependencies
+    prebuilt
+    version)
+  if(MSVC)
+    set(arch ${CMAKE_GENERATOR_PLATFORM})
+  elseif(APPLE)
+    set(arch universal)
+  endif()
+  set(deps_root "${CMAKE_CURRENT_SOURCE_DIR}/.deps/obs-deps-${version}-${arch}")
+  message(STATUS "deps_root: ${deps_root}")
+  target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE "${deps_root}/include")
+else()
+  include(cmake/FetchWebsocketpp.cmake)
+  target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE websocketpp)
+  target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE ${asio_SOURCE_DIR}/asio/include/)
+endif()
+
 target_sources(
   ${CMAKE_PROJECT_NAME}
   PRIVATE src/plugin-main.c
@@ -120,12 +146,15 @@ target_sources(
           src/whisper-utils/silero-vad-onnx.cpp
           src/whisper-utils/token-buffer-thread.cpp
           src/whisper-utils/vad-processing.cpp
+          src/whisper-utils/resample-utils.cpp
           src/translation/language_codes.cpp
           src/translation/translation.cpp
           src/translation/translation-utils.cpp
           src/ui/filter-replace-utils.cpp
           src/translation/translation-language-utils.cpp
-          src/ui/filter-replace-dialog.cpp)
+          src/ui/filter-replace-dialog.cpp
+          src/stenographer/stenographer.cpp
+          src/stenographer/stenographer-util.cpp)
 
 set_target_properties_plugin(${CMAKE_PROJECT_NAME} PROPERTIES OUTPUT_NAME ${_name})
 

diff --git a/cmake/FetchWebsocketpp.cmake b/cmake/FetchWebsocketpp.cmake
@@ -0,0 +1,24 @@
+include(FetchContent)
+
+FetchContent_Declare(
+  websocketpp
+  URL https://github.com/zaphoyd/websocketpp/archive/refs/tags/0.8.2.tar.gz
+  URL_HASH SHA256=6ce889d85ecdc2d8fa07408d6787e7352510750daa66b5ad44aacb47bea76755)
+
+# Only download the content, don't configure or build it
+FetchContent_GetProperties(websocketpp)
+if(NOT websocketpp_POPULATED)
+  FetchContent_Populate(websocketpp)
+endif()
+
+# Add WebSocket++ as an interface library
+add_library(websocketpp INTERFACE)
+target_include_directories(websocketpp INTERFACE ${websocketpp_SOURCE_DIR})
+
+# Fetch ASIO
+FetchContent_Declare(
+  asio
+  URL https://github.com/chriskohlhoff/asio/archive/asio-1-28-0.tar.gz
+  URL_HASH SHA256=226438b0798099ad2a202563a83571ce06dd13b570d8fded4840dbc1f97fa328)
+
+FetchContent_MakeAvailable(websocketpp asio)
diff --git a/cmake/linux/compilerconfig.cmake b/cmake/linux/compilerconfig.cmake
@@ -21,6 +21,7 @@ set(_obs_gcc_c_options
     -Wformat-security
     -Wno-conversion
     -Wno-deprecated-declarations
+    -Wno-error=conversion
     -Wno-error=deprecated-declarations
     -Wno-float-conversion
     -Wno-implicit-fallthrough
@@ -42,14 +43,13 @@ set(_obs_gcc_c_options
     -Wvla)
 
 # gcc options for C++
-set(_obs_gcc_cxx_options
-    # cmake-format: sortable
-    ${_obs_gcc_c_options} -Wconversion -Wfloat-conversion -Winvalid-offsetof -Wno-overloaded-virtual)
+set(_obs_gcc_cxx_options # cmake-format: sortable
+                         ${_obs_gcc_c_options} -Winvalid-offsetof -Wno-overloaded-virtual)
 
 add_compile_options(
   -fopenmp-simd
   "$<$<COMPILE_LANG_AND_ID:C,GNU>:${_obs_gcc_c_options}>"
-  "$<$<COMPILE_LANG_AND_ID:C,GNU>:-Wint-conversion;-Wno-missing-prototypes;-Wno-strict-prototypes;-Wpointer-sign>"
+  "$<$<COMPILE_LANG_AND_ID:C,GNU>:-Wno-missing-prototypes;-Wno-strict-prototypes;-Wpointer-sign>"
   "$<$<COMPILE_LANG_AND_ID:CXX,GNU>:${_obs_gcc_cxx_options}>"
   "$<$<COMPILE_LANG_AND_ID:C,Clang>:${_obs_clang_c_options}>"
   "$<$<COMPILE_LANG_AND_ID:CXX,Clang>:${_obs_clang_cxx_options}>")

diff --git a/data/locale/en-US.ini b/data/locale/en-US.ini
@@ -87,4 +87,7 @@ Active_VAD="Active VAD"
 Hybrid_VAD="Hybrid VAD"
 translate_only_full_sentences="Translate only full sentences"
 duration_filter_threshold="Duration filter"
-segment_duration="Segment duration"
+segment_duration="Segment duration"
+stenographer_parameters="Stenographer Options"
+stenographer_delay="Audio Delay"
+translation_remove_punctuation_from_start="Remove punctuation from sentence start"
diff --git a/src/stenographer/stenographer-util.cpp b/src/stenographer/stenographer-util.cpp
@@ -0,0 +1,67 @@
+
+#include "stenographer-util.h"
+#include "transcription-filter-data.h"
+#include "transcription-utils.h"
+
+#include <obs.h>
+
+#include <cstring>
+#include <vector>
+
+/**
+ * @brief Applies a simple delay to the audio data for stenographer mode.
+ *
+ * This function stores the incoming audio data in a buffer and processes it after a specified delay.
+ * The delayed audio data is then emitted, replacing the original audio data in the buffer.
+ * If the buffer does not yet contain enough data to satisfy the delay, the audio buffer is filled with silence.
+ *
+ * @param gf Pointer to the transcription filter data structure containing the delay buffer and configuration.
+ * @param audio Pointer to the audio data structure containing the audio frames to be processed.
+ * @return Pointer to the processed audio data structure with the applied delay.
+ */
+struct obs_audio_data *stenographer_simple_delay(transcription_filter_data *gf,
+						 struct obs_audio_data *audio)
+{
+	// Stenographer mode - apply delay.
+	// Store the audio data in a buffer and process it after the delay.
+	// push the data to the back of gf->stenographer_delay_buffer
+	for (size_t c = 0; c < gf->channels; c++) {
+		// take a audio->frames * sizeof(float) bytes chunk from audio->data[c] and push it
+		// to the back of the buffer as a float
+		std::vector<float> audio_data_chunk((float *)audio->data[c],
+						    ((float *)audio->data[c]) + audio->frames);
+		gf->stenographer_delay_buffers[c].insert(gf->stenographer_delay_buffers[c].end(),
+							 audio_data_chunk.begin(),
+							 audio_data_chunk.end());
+	}
+
+	// If the buffer is larger than the delay, emit the oldest data
+	// Take from the buffer as much as requested by the incoming audio data
+	size_t delay_frames =
+		(size_t)((float)gf->sample_rate * (float)gf->stenographer_delay_ms / 1000.0f) +
+		audio->frames;
+
+	if (gf->stenographer_delay_buffers[0].size() >= delay_frames) {
+		// Replace data on the audio buffer with the delayed data
+		for (size_t c = 0; c < gf->channels; c++) {
+			// take exatcly audio->frames from the buffer
+			std::vector<float> audio_data(gf->stenographer_delay_buffers[c].begin(),
+						      gf->stenographer_delay_buffers[c].begin() +
+							      audio->frames);
+			// remove the oldest buffers from the delay buffer
+			gf->stenographer_delay_buffers[c].erase(
+				gf->stenographer_delay_buffers[c].begin(),
+				gf->stenographer_delay_buffers[c].begin() + audio->frames);
+
+			// replace the data on the audio buffer with the delayed data
+			memcpy(audio->data[c], audio_data.data(),
+			       audio_data.size() * sizeof(float));
+		}
+	} else {
+		// Fill the audio buffer with silence
+		for (size_t c = 0; c < gf->channels; c++) {
+			memset(audio->data[c], 0, audio->frames * sizeof(float));
+		}
+	}
+	return audio;
+}
diff --git a/src/stenographer/stenographer-util.h b/src/stenographer/stenographer-util.h
@@ -0,0 +1,10 @@
+#ifndef STENOGRAPHER_UTIL_H
+#define STENOGRAPHER_UTIL_H
+
+struct transcription_filter_data;
+struct obs_audio_data;
+
+struct obs_audio_data *stenographer_simple_delay(transcription_filter_data *gf,
+						 struct obs_audio_data *audio);
+
+#endif /* STENOGRAPHER_UTIL_H */