Skip to content

Commit 163d9a2

Browse files
committed
initial changes for silero-vad-cpp
1 parent 8ad8369 commit 163d9a2

21 files changed

+990
-50
lines changed

Dockerfile

+2-6
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,12 @@ RUN apt-get update \
1111
&& apt-get -y --quiet --no-install-recommends install \
1212
gcc \
1313
git \
14-
wget \
15-
portaudio19-dev \
16-
python3 \
17-
python3-pip
14+
curl
1815

1916
WORKDIR /root/ros2_ws/src
2017
RUN git clone https://github.com/mgonzs13/audio_common.git
21-
WORKDIR /root/ros2_ws
2218

23-
RUN pip3 install -r src/requirements.txt
19+
WORKDIR /root/ros2_ws
2420
RUN rosdep install --from-paths src --ignore-src -r -y
2521

2622
# Install CUDA nvcc

README.md

-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ To run whisper_ros with CUDA, first, you must install the [CUDA Toolkit](https:/
3333
$ cd ~/ros2_ws/src
3434
$ git clone https://github.com/mgonzs13/audio_common.git
3535
$ git clone https://github.com/mgonzs13/whisper_ros.git
36-
$ pip3 install -r whisper_ros/requirements.txt
3736
$ cd ~/ros2_ws
3837
$ rosdep install --from-paths src --ignore-src -r -y
3938
$ colcon build --cmake-args -DGGML_CUDA=ON # add this for CUDA

onnxruntime_vendor/CMakeLists.txt

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
cmake_minimum_required(VERSION 3.8)
2+
project(onnxruntime_vendor)
3+
4+
# Set variables for the package
5+
set(ONNXRUNTIME_VERSION "1.18.1") # Specify the desired ONNX Runtime version
6+
set(ONNXRUNTIME_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNXRUNTIME_VERSION}/onnxruntime-linux-x64-${ONNXRUNTIME_VERSION}.tgz")
7+
8+
# ROS 2 package configuration
9+
find_package(ament_cmake REQUIRED)
10+
11+
# Define a vendor package installation directory
12+
set(ONNXRUNTIME_INSTALL_DIR "${CMAKE_BINARY_DIR}/onnxruntime-linux-x64-${ONNXRUNTIME_VERSION}")
13+
14+
# Add a custom target to download and extract the prebuilt ONNX Runtime
15+
find_program(CURL_EXECUTABLE curl REQUIRED)
16+
17+
if(NOT CURL_EXECUTABLE)
18+
message(FATAL_ERROR "curl is required to download ONNX Runtime but was not found.")
19+
endif()
20+
21+
# Add custom command to download and extract the ONNX Runtime
22+
add_custom_target(download_onnxruntime ALL
23+
COMMENT "Downloading and extracting ONNX Runtime ${ONNXRUNTIME_VERSION}"
24+
COMMAND ${CURL_EXECUTABLE} -L -o onnxruntime.tgz ${ONNXRUNTIME_URL} >/dev/null 2>&1
25+
COMMAND ${CMAKE_COMMAND} -E tar xzf onnxruntime.tgz
26+
)
27+
28+
# Install the ONNX Runtime library and include files
29+
install(DIRECTORY ${ONNXRUNTIME_INSTALL_DIR}/lib)
30+
install(DIRECTORY ${ONNXRUNTIME_INSTALL_DIR}/include)
31+
32+
# Export the onnxruntime library for downstream packages
33+
ament_export_include_directories(${ONNXRUNTIME_INSTALL_DIR}/include)
34+
ament_export_libraries(onnxruntime)
35+
36+
# Export the package
37+
ament_package()

onnxruntime_vendor/package.xml

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<?xml version="1.0"?>
2+
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
3+
<package format="3">
4+
<name>onnxruntime_vendor</name>
5+
<version>1.3.1</version>
6+
<description>Vendor package for onnxruntime</description>
7+
<maintainer email="mgons@unileon.es">Miguel Ángel González Santamarta</maintainer>
8+
<license>MIT</license>
9+
10+
<buildtool_depend>ament_cmake</buildtool_depend>
11+
12+
<test_depend>ament_lint_auto</test_depend>
13+
<test_depend>ament_lint_common</test_depend>
14+
15+
<export>
16+
<build_type>ament_cmake</build_type>
17+
</export>
18+
</package>

requirements.txt

-4
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# MIT License
2+
3+
# Copyright (c) 2023 Miguel Ángel González Santamarta
4+
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to deal
7+
# in the Software without restriction, including without limitation the rights
8+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
# copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
12+
# The above copyright notice and this permission notice shall be included in all
13+
# copies or substantial portions of the Software.
14+
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
# SOFTWARE.
22+
23+
24+
from launch_ros.actions import Node
25+
from launch import LaunchDescription, LaunchContext
26+
from launch.substitutions import LaunchConfiguration
27+
from launch.actions import OpaqueFunction, DeclareLaunchArgument
28+
from huggingface_hub import hf_hub_download
29+
30+
31+
def generate_launch_description():
32+
33+
def run_silero_vad(context: LaunchContext, repo, file, model_path):
34+
repo = str(context.perform_substitution(repo))
35+
file = str(context.perform_substitution(file))
36+
model_path = str(context.perform_substitution(model_path))
37+
38+
if not model_path:
39+
model_path = hf_hub_download(
40+
repo_id=repo, filename=file, force_download=False
41+
)
42+
43+
return (
44+
Node(
45+
package="whisper_ros",
46+
executable="silero_vad_node",
47+
name="silero_vad_node",
48+
namespace="whisper",
49+
parameters=[
50+
{
51+
"enabled": LaunchConfiguration("enabled", default=True),
52+
"model_path": model_path,
53+
"sample_rate": LaunchConfiguration("sample_rate", default=16000),
54+
"frame_size_ms": LaunchConfiguration("frame_size_ms", default=32),
55+
"threshold": LaunchConfiguration("threshold", default=0.5),
56+
"min_silence_ms": LaunchConfiguration(
57+
"min_silence_ms", default=0
58+
),
59+
"speech_pad_ms": LaunchConfiguration("speech_pad_ms", default=32),
60+
"min_speech_ms": LaunchConfiguration("min_speech_ms", default=32),
61+
"max_speech_s": LaunchConfiguration(
62+
"max_speech_s", default=float("inf")
63+
),
64+
}
65+
],
66+
remappings=[("audio", "/audio/in")],
67+
),
68+
)
69+
70+
model_repo = LaunchConfiguration("model_repo")
71+
model_repo_cmd = DeclareLaunchArgument(
72+
"model_repo",
73+
default_value="deepghs/silero-vad-onnx",
74+
description="Hugging Face model repo",
75+
)
76+
77+
model_filename = LaunchConfiguration("model_filename")
78+
model_filename_cmd = DeclareLaunchArgument(
79+
"model_filename",
80+
default_value="silero_vad.onnx",
81+
description="Hugging Face model filename",
82+
)
83+
84+
model_path = LaunchConfiguration("model_path")
85+
model_path_cmd = DeclareLaunchArgument(
86+
"model_path", default_value="", description="Local path to the model file"
87+
)
88+
89+
return LaunchDescription(
90+
[
91+
model_repo_cmd,
92+
model_filename_cmd,
93+
model_path_cmd,
94+
OpaqueFunction(
95+
function=run_silero_vad, args=[model_repo, model_filename, model_path]
96+
),
97+
]
98+
)

whisper_bringup/launch/whisper.launch.py

+54-22
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,15 @@
2121
# SOFTWARE.
2222

2323

24-
from launch import LaunchDescription, LaunchContext
24+
import os
2525
from launch_ros.actions import Node
26+
from launch import LaunchDescription, LaunchContext
27+
from launch.conditions import IfCondition, UnlessCondition
2628
from launch.substitutions import LaunchConfiguration, PythonExpression
27-
from launch.actions import OpaqueFunction, DeclareLaunchArgument
29+
from launch.launch_description_sources import PythonLaunchDescriptionSource
30+
from launch.actions import OpaqueFunction, DeclareLaunchArgument, IncludeLaunchDescription
31+
from ament_index_python.packages import get_package_share_directory
2832
from huggingface_hub import hf_hub_download
29-
from launch.conditions import IfCondition, UnlessCondition
3033

3134

3235
def generate_launch_description():
@@ -126,19 +129,42 @@ def run_whisper(context: LaunchContext, repo, file, model_path):
126129
model_repo_cmd = DeclareLaunchArgument(
127130
"model_repo",
128131
default_value="ggerganov/whisper.cpp",
129-
description="Hugging Face model repo",
132+
description="Hugging Face model repo for Whisper",
130133
)
131134

132135
model_filename = LaunchConfiguration("model_filename")
133136
model_filename_cmd = DeclareLaunchArgument(
134137
"model_filename",
135138
default_value="ggml-large-v3-turbo-q5_0.bin",
136-
description="Hugging Face model filename",
139+
description="Hugging Face model filename for Whisper",
137140
)
138141

139142
model_path = LaunchConfiguration("model_path")
140143
model_path_cmd = DeclareLaunchArgument(
141-
"model_path", default_value="", description="Local path to the model file"
144+
"model_path",
145+
default_value="",
146+
description="Local path to the model file for Whisper",
147+
)
148+
149+
silero_vad_model_repo = LaunchConfiguration("silero_vad_model_repo")
150+
silero_vad_model_repo_cmd = DeclareLaunchArgument(
151+
"silero_vad_model_repo",
152+
default_value="onnx-community/silero-vad",
153+
description="Hugging Face model repo for SileroVAD",
154+
)
155+
156+
silero_vad_model_filename = LaunchConfiguration("silero_vad_model_filename")
157+
silero_vad_model_filename_cmd = DeclareLaunchArgument(
158+
"silero_vad_model_filename",
159+
default_value="onnx/model.onnx",
160+
description="Hugging Face model filename for SileroVAD",
161+
)
162+
163+
silero_vad_model_path = LaunchConfiguration("silero_vad_model_path")
164+
silero_vad_model_path_cmd = DeclareLaunchArgument(
165+
"silero_vad_model_path",
166+
default_value="",
167+
description="Local path to the model file for SileroVAD",
142168
)
143169

144170
return LaunchDescription(
@@ -147,24 +173,30 @@ def run_whisper(context: LaunchContext, repo, file, model_path):
147173
model_repo_cmd,
148174
model_filename_cmd,
149175
model_path_cmd,
176+
silero_vad_model_repo_cmd,
177+
silero_vad_model_filename_cmd,
178+
silero_vad_model_path_cmd,
150179
OpaqueFunction(
151-
function=run_whisper, args=[model_repo, model_filename, model_path]
180+
function=run_whisper,
181+
args=[model_repo, model_filename, model_path],
152182
),
153-
Node(
154-
package="whisper_ros",
155-
executable="silero_vad_node",
156-
name="silero_vad_node",
157-
namespace="whisper",
158-
parameters=[
159-
{
160-
"enabled": LaunchConfiguration(
161-
"vad_enabled",
162-
default=PythonExpression([LaunchConfiguration("stream")]),
163-
),
164-
"threshold": LaunchConfiguration("vad_threshold", default=0.5),
165-
}
166-
],
167-
remappings=[("audio", "/audio/in")],
183+
IncludeLaunchDescription(
184+
PythonLaunchDescriptionSource(
185+
os.path.join(
186+
get_package_share_directory("whisper_bringup"),
187+
"launch",
188+
"silero-vad.launch.py",
189+
)
190+
),
191+
launch_arguments={
192+
"enabled": LaunchConfiguration(
193+
"vad_enabled",
194+
default=PythonExpression([LaunchConfiguration("stream")]),
195+
),
196+
"model_repo": silero_vad_model_repo,
197+
"model_filename": silero_vad_model_filename,
198+
"model_path": silero_vad_model_path,
199+
}.items(),
168200
),
169201
Node(
170202
package="audio_common",

whisper_ros/CMakeLists.txt

+31-6
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,18 @@ find_package(rclcpp_action REQUIRED)
1212
find_package(rclcpp_lifecycle REQUIRED)
1313
find_package(std_msgs REQUIRED)
1414
find_package(std_srvs REQUIRED)
15+
find_package(audio_common_msgs REQUIRED)
1516
find_package(whisper_msgs REQUIRED)
1617
find_package(whisper_cpp_vendor REQUIRED)
18+
find_package(onnxruntime_vendor REQUIRED)
19+
find_library(PORTAUDIO_LIB portaudio REQUIRED)
1720

18-
include_directories(include)
21+
include_directories(
22+
include
23+
${PORTAUDIO_INCLUDE_DIR}
24+
)
1925

26+
# whisper_node
2027
add_executable(whisper_node
2128
src/whisper_main.cpp
2229
src/whisper_ros/whisper_node.cpp
@@ -36,6 +43,7 @@ ament_target_dependencies(whisper_node
3643
whisper_cpp_vendor
3744
)
3845

46+
# whisper_server_node
3947
add_executable(whisper_server_node
4048
src/whisper_server_main.cpp
4149
src/whisper_ros/whisper_server_node.cpp
@@ -56,10 +64,28 @@ ament_target_dependencies(whisper_server_node
5664
whisper_cpp_vendor
5765
)
5866

59-
ament_export_dependencies(whisper_cpp_vendor)
67+
# silero_vad_node
68+
add_executable(silero_vad_node
69+
src/silero_vad_main.cpp
70+
src/silero_vad/silero_vad_node.cpp
71+
src/silero_vad/vad_iterator.cpp
72+
src/silero_vad/timestamp.cpp
73+
)
74+
target_link_libraries(silero_vad_node ${PORTAUDIO_LIB})
75+
ament_target_dependencies(silero_vad_node
76+
rclcpp
77+
rclcpp_lifecycle
78+
std_msgs
79+
std_srvs
80+
audio_common_msgs
81+
onnxruntime_vendor
82+
)
6083

84+
# Export dependencies
85+
ament_export_dependencies(whisper_cpp_vendor)
86+
ament_export_dependencies(onnxruntime_vendor)
6187

62-
# INSTALL
88+
# Install
6389
install(TARGETS
6490
whisper_node
6591
DESTINATION lib/${PROJECT_NAME}
@@ -70,10 +96,9 @@ install(TARGETS
7096
DESTINATION lib/${PROJECT_NAME}
7197
)
7298

73-
install(PROGRAMS
74-
whisper_ros/silero_vad_node.py
99+
install(TARGETS
100+
silero_vad_node
75101
DESTINATION lib/${PROJECT_NAME}
76-
RENAME silero_vad_node
77102
)
78103

79104
ament_package()

0 commit comments

Comments
 (0)