Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update whisper-utils.h and whisper-utils.cpp to use update_whisper_mo… #16

Merged
merged 17 commits into from
May 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/scripts/.Aptfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ package 'jq'
package 'ninja-build', bin: 'ninja'
package 'pkg-config'
package 'libopenblas-dev'
package 'libavformat-dev'
81 changes: 80 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,55 @@ if(ENABLE_QT)
AUTORCC ON)
endif()

function(find_libav)
if(UNIX AND NOT APPLE)
find_package(PkgConfig REQUIRED)
pkg_check_modules(
FFMPEG
REQUIRED
IMPORTED_TARGET
libavformat
libavcodec
libavutil
libswresample)
if(FFMPEG_FOUND)
target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE PkgConfig::FFMPEG)
else()
message(FATAL_ERROR "FFMPEG not found!")
endif()
return()
endif()

if(NOT buildspec)
file(READ "${CMAKE_CURRENT_SOURCE_DIR}/buildspec.json" buildspec)
endif()
string(
JSON
version
GET
${buildspec}
dependencies
prebuilt
version)

if(MSVC)
set(arch ${CMAKE_GENERATOR_PLATFORM})
elseif(APPLE)
set(arch universal)
endif()
set(deps_root "${CMAKE_CURRENT_SOURCE_DIR}/.deps/obs-deps-${version}-${arch}")

target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE "${deps_root}/include")
target_link_libraries(
${CMAKE_PROJECT_NAME}
PRIVATE "${deps_root}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}avcodec${CMAKE_STATIC_LIBRARY_SUFFIX}"
"${deps_root}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}avformat${CMAKE_STATIC_LIBRARY_SUFFIX}"
"${deps_root}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}avutil${CMAKE_STATIC_LIBRARY_SUFFIX}"
"${deps_root}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}swresample${CMAKE_STATIC_LIBRARY_SUFFIX}")
endfunction(find_libav)

find_libav()

set(USE_SYSTEM_CURL
OFF
CACHE STRING "Use system cURL")
Expand All @@ -50,14 +99,44 @@ endif()
include(cmake/BuildWhispercpp.cmake)
target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE Whispercpp)

set(USE_SYSTEM_ONNXRUNTIME
OFF
CACHE STRING "Use system ONNX Runtime")

set(DISABLE_ONNXRUNTIME_GPU
OFF
CACHE STRING "Disables GPU support of ONNX Runtime (Only valid on Linux)")

if(DISABLE_ONNXRUNTIME_GPU)
target_compile_definitions(${CMAKE_PROJECT_NAME} PRIVATE DISABLE_ONNXRUNTIME_GPU)
endif()

if(USE_SYSTEM_ONNXRUNTIME)
if(OS_LINUX)
find_package(Onnxruntime 1.16.3 REQUIRED)
set(Onnxruntime_INCLUDE_PATH
${Onnxruntime_INCLUDE_DIR} ${Onnxruntime_INCLUDE_DIR}/onnxruntime
${Onnxruntime_INCLUDE_DIR}/onnxruntime/core/session ${Onnxruntime_INCLUDE_DIR}/onnxruntime/core/providers/cpu)
target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE "${Onnxruntime_LIBRARIES}")
target_include_directories(${CMAKE_PROJECT_NAME} SYSTEM PUBLIC "${Onnxruntime_INCLUDE_PATH}")
else()
message(FATAL_ERROR "System ONNX Runtime is only supported on Linux!")
endif()
else()
include(cmake/FetchOnnxruntime.cmake)
endif()

target_sources(
${CMAKE_PROJECT_NAME}
PRIVATE src/plugin-main.c
src/cleanstream-filter.cpp
src/cleanstream-filter.c
src/audio-utils/read-audio-file.cpp
src/model-utils/model-downloader.cpp
src/model-utils/model-downloader-ui.cpp
src/model-utils/model-infos.cpp
src/whisper-utils/whisper-utils.cpp
src/whisper-utils/whisper-processing.cpp)
src/whisper-utils/whisper-processing.cpp
src/whisper-utils/silero-vad-onnx.cpp)

set_target_properties_plugin(${CMAKE_PROJECT_NAME} PROPERTIES OUTPUT_NAME ${_name})
2 changes: 1 addition & 1 deletion buildspec.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
}
},
"name": "obs-cleanstream",
"version": "0.0.5",
"version": "0.0.6",
"author": "Roy Shilkrot",
"website": "https://github.com/occ-ai/obs-cleanstream/",
"email": "roy.shil@gmail.com",
Expand Down
97 changes: 97 additions & 0 deletions cmake/FetchOnnxruntime.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
include(FetchContent)

set(CUSTOM_ONNXRUNTIME_URL
""
CACHE STRING "URL of a downloaded ONNX Runtime tarball")

set(CUSTOM_ONNXRUNTIME_HASH
""
CACHE STRING "Hash of a downloaded ONNX Runtime tarball")

set(Onnxruntime_VERSION "1.17.1")

if(CUSTOM_ONNXRUNTIME_URL STREQUAL "")
set(USE_PREDEFINED_ONNXRUNTIME ON)
else()
if(CUSTOM_ONNXRUNTIME_HASH STREQUAL "")
message(FATAL_ERROR "Both of CUSTOM_ONNXRUNTIME_URL and CUSTOM_ONNXRUNTIME_HASH must be present!")
else()
set(USE_PREDEFINED_ONNXRUNTIME OFF)
endif()
endif()

if(USE_PREDEFINED_ONNXRUNTIME)
set(Onnxruntime_BASEURL "https://github.com/microsoft/onnxruntime/releases/download/v${Onnxruntime_VERSION}")

if(APPLE)
set(Onnxruntime_URL "${Onnxruntime_BASEURL}/onnxruntime-osx-universal2-${Onnxruntime_VERSION}.tgz")
set(Onnxruntime_HASH SHA256=9FA57FA6F202A373599377EF75064AE568FDA8DA838632B26A86024C7378D306)
elseif(MSVC)
set(Onnxruntime_URL "${Onnxruntime_BASEURL}/onnxruntime-win-x64-${Onnxruntime_VERSION}.zip")
set(OOnnxruntime_HASH SHA256=4802AF9598DB02153D7DA39432A48823FF69B2FB4B59155461937F20782AA91C)
else()
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
set(Onnxruntime_URL "${Onnxruntime_BASEURL}/onnxruntime-linux-aarch64-${Onnxruntime_VERSION}.tgz")
set(Onnxruntime_HASH SHA256=70B6F536BB7AB5961D128E9DBD192368AC1513BFFB74FE92F97AAC342FBD0AC1)
else()
set(Onnxruntime_URL "${Onnxruntime_BASEURL}/onnxruntime-linux-x64-gpu-${Onnxruntime_VERSION}.tgz")
set(Onnxruntime_HASH SHA256=613C53745EA4960ED368F6B3AB673558BB8561C84A8FA781B4EA7FB4A4340BE4)
endif()
endif()
else()
set(Onnxruntime_URL "${CUSTOM_ONNXRUNTIME_URL}")
set(Onnxruntime_HASH "${CUSTOM_ONNXRUNTIME_HASH}")
endif()

FetchContent_Declare(
onnxruntime
URL ${Onnxruntime_URL}
URL_HASH ${Onnxruntime_HASH})
FetchContent_MakeAvailable(onnxruntime)

if(APPLE)
set(Onnxruntime_LIB "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime.${Onnxruntime_VERSION}.dylib")
target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE "${Onnxruntime_LIB}")
target_include_directories(${CMAKE_PROJECT_NAME} SYSTEM PUBLIC "${onnxruntime_SOURCE_DIR}/include")
target_sources(${CMAKE_PROJECT_NAME} PRIVATE "${Onnxruntime_LIB}")
set_property(SOURCE "${Onnxruntime_LIB}" PROPERTY MACOSX_PACKAGE_LOCATION Frameworks)
source_group("Frameworks" FILES "${Onnxruntime_LIB}")
# add a codesigning step
add_custom_command(
TARGET "${CMAKE_PROJECT_NAME}"
PRE_BUILD VERBATIM
COMMAND /usr/bin/codesign --force --verify --verbose --sign "${CODESIGN_IDENTITY}" "${Onnxruntime_LIB}")
add_custom_command(
TARGET "${CMAKE_PROJECT_NAME}"
POST_BUILD
COMMAND
${CMAKE_INSTALL_NAME_TOOL} -change "@rpath/libonnxruntime.${Onnxruntime_VERSION}.dylib"
"@loader_path/../Frameworks/libonnxruntime.${Onnxruntime_VERSION}.dylib" $<TARGET_FILE:${CMAKE_PROJECT_NAME}>)
elseif(MSVC)
add_library(Ort INTERFACE)
set(Onnxruntime_LIB_NAMES onnxruntime;onnxruntime_providers_shared)
foreach(lib_name IN LISTS Onnxruntime_LIB_NAMES)
add_library(Ort::${lib_name} SHARED IMPORTED)
set_target_properties(Ort::${lib_name} PROPERTIES IMPORTED_IMPLIB ${onnxruntime_SOURCE_DIR}/lib/${lib_name}.lib)
set_target_properties(Ort::${lib_name} PROPERTIES IMPORTED_LOCATION ${onnxruntime_SOURCE_DIR}/lib/${lib_name}.dll)
set_target_properties(Ort::${lib_name} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${onnxruntime_SOURCE_DIR}/include)
target_link_libraries(Ort INTERFACE Ort::${lib_name})
install(FILES ${onnxruntime_SOURCE_DIR}/lib/${lib_name}.dll DESTINATION "obs-plugins/64bit")
endforeach()

target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE Ort)

else()
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
set(Onnxruntime_LINK_LIBS "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime.so.${Onnxruntime_VERSION}")
set(Onnxruntime_INSTALL_LIBS ${Onnxruntime_LINK_LIBS})
else()
set(Onnxruntime_LINK_LIBS "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime.so.${Onnxruntime_VERSION}")
set(Onnxruntime_INSTALL_LIBS ${Onnxruntime_LINK_LIBS}
"${onnxruntime_SOURCE_DIR}/lib/libonnxruntime_providers_shared.so")
endif()
target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE ${Onnxruntime_LINK_LIBS})
target_include_directories(${CMAKE_PROJECT_NAME} SYSTEM PUBLIC "${onnxruntime_SOURCE_DIR}/include")
install(FILES ${Onnxruntime_INSTALL_LIBS} DESTINATION "${CMAKE_INSTALL_LIBDIR}/obs-plugins/${CMAKE_PROJECT_NAME}")
set_target_properties(${CMAKE_PROJECT_NAME} PROPERTIES INSTALL_RPATH "$ORIGIN/${CMAKE_PROJECT_NAME}")
endif()
15 changes: 15 additions & 0 deletions data/locale/en-US.ini
Original file line number Diff line number Diff line change
@@ -1,2 +1,17 @@
CleanStreamAudioFilter="Clean stream audio filter"
CleanStreamFilterPlugin="Clean stream filter plugin"
detect_regex="Detect regex"
advanced_settings="Advanced settings"
filler_p_threshold="Filler p threshold"
do_silence="Do silence"
vad_enabled="VAD enabled"
log_level="Log level"
log_words="Log words"
whisper_model="Whisper model"
Whisper_Parameters="Whisper Parameters"
whisper_sampling_method="Whisper sampling method"
n_threads="Number of threads"
n_max_text_ctx="Number of max text context"
no_context="No context"
replace_sound_path="Replace Sound Path"
replace_sound="Replace Sound"
Binary file added data/models/silero-vad/silero_vad.onnx
Binary file not shown.
Binary file added data/sounds/beep.wav
Binary file not shown.
Binary file added data/sounds/horn.wav
Binary file not shown.
137 changes: 137 additions & 0 deletions src/audio-utils/read-audio-file.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#if defined(_WIN32) || defined(__APPLE__)

extern "C" {
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include <libavutil/frame.h>
#include <libavutil/mem.h>
#include <libavutil/opt.h>
#include <libswresample/swresample.h>
}

#include <iostream>
#include <vector>

#include "read-audio-file.h"
#include "obs.h"
#include "plugin-support.h"

AudioDataFloat read_audio_file(const char *filename, int targetSampleRate)
{
AVFormatContext *formatContext = nullptr;
if (avformat_open_input(&formatContext, filename, nullptr, nullptr) != 0) {
obs_log(LOG_ERROR, "Error opening file");
return {};
}

if (avformat_find_stream_info(formatContext, nullptr) < 0) {
obs_log(LOG_ERROR, "Error finding stream information");
return {};
}

int audioStreamIndex = -1;
for (unsigned int i = 0; i < formatContext->nb_streams; i++) {
if (formatContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
audioStreamIndex = i;
break;
}
}

if (audioStreamIndex == -1) {
obs_log(LOG_ERROR, "No audio stream found");
return {};
}

AVCodecParameters *codecParams = formatContext->streams[audioStreamIndex]->codecpar;
const AVCodec *codec = avcodec_find_decoder(codecParams->codec_id);
if (!codec) {
obs_log(LOG_ERROR, "Decoder not found");
return {};
}

AVCodecContext *codecContext = avcodec_alloc_context3(codec);
if (!codecContext) {
obs_log(LOG_ERROR, "Failed to allocate codec context");
return {};
}

if (avcodec_parameters_to_context(codecContext, codecParams) < 0) {
obs_log(LOG_ERROR, "Failed to copy codec parameters to codec context");
return {};
}

if (avcodec_open2(codecContext, codec, nullptr) < 0) {
obs_log(LOG_ERROR, "Failed to open codec");
return {};
}

AVFrame *frame = av_frame_alloc();
AVPacket packet;

// set up swresample
AVChannelLayout ch_layout;
av_channel_layout_from_string(&ch_layout, "mono");
SwrContext *swr = nullptr;
int ret;
ret = swr_alloc_set_opts2(&swr, &ch_layout, AV_SAMPLE_FMT_FLT, targetSampleRate,
&(codecContext->ch_layout), codecContext->sample_fmt,
codecContext->sample_rate, 0, nullptr);
if (ret < 0) {
char errbuf[AV_ERROR_MAX_STRING_SIZE];
av_strerror(ret, errbuf, AV_ERROR_MAX_STRING_SIZE);
obs_log(LOG_ERROR, "Failed to set up swr context: %s", errbuf);
return {};
}
// init swr
ret = swr_init(swr);
if (ret < 0) {
char errbuf[AV_ERROR_MAX_STRING_SIZE];
av_strerror(ret, errbuf, AV_ERROR_MAX_STRING_SIZE);
obs_log(LOG_ERROR, "Failed to initialize swr context: %s", errbuf);
return {};
}

AudioDataFloat audioFrames;

float *convertBuffer[1];
convertBuffer[0] = (float *)av_malloc(4096 * sizeof(float));
while (av_read_frame(formatContext, &packet) >= 0) {
if (packet.stream_index == audioStreamIndex) {
if (avcodec_send_packet(codecContext, &packet) == 0) {
while (avcodec_receive_frame(codecContext, frame) == 0) {
int ret = swr_convert(swr, (uint8_t **)convertBuffer, 4096,
(const uint8_t **)frame->data,
frame->nb_samples);
if (ret < 0) {
char errbuf[AV_ERROR_MAX_STRING_SIZE];
av_strerror(ret, errbuf, AV_ERROR_MAX_STRING_SIZE);
obs_log(LOG_ERROR,
"Failed to convert audio frame: %s",
errbuf);
return {};
}
audioFrames.insert(audioFrames.end(), convertBuffer[0],
convertBuffer[0] + ret);
}
}
}
av_packet_unref(&packet);
}
av_free(convertBuffer[0]);

obs_log(LOG_INFO,
"Converted %lu frames of audio data (orig: %d, %s sample format, %d channels, %s)",
audioFrames.size(), codecContext->sample_rate,
av_get_sample_fmt_name(codecContext->sample_fmt),
codecContext->ch_layout.nb_channels,
av_sample_fmt_is_planar(codecContext->sample_fmt) ? "planar" : "packed");

swr_free(&swr);
av_frame_free(&frame);
avcodec_free_context(&codecContext);
avformat_close_input(&formatContext);

return audioFrames;
}

#endif
Loading
Loading