mgonzs13
diff --git a/‎Dockerfile
+1-8 b/‎Dockerfile
+1-8
diff --git a/‎README.md
-1 b/‎README.md
-1
diff --git a/‎requirements.txt
-1 b/‎requirements.txt
-1
diff --git a/‎whisper_bringup/launch/silero-vad.launch.py
+10-47 b/‎whisper_bringup/launch/silero-vad.launch.py
+10-47
diff --git a/‎whisper_bringup/launch/whisper.launch.py
+72-112 b/‎whisper_bringup/launch/whisper.launch.py
+72-112
diff --git a/‎whisper_hfhub_vendor/CMakeLists.txt
+35 b/‎whisper_hfhub_vendor/CMakeLists.txt
+35
diff --git a/‎whisper_hfhub_vendor/package.xml
+17 b/‎whisper_hfhub_vendor/package.xml
+17
@@ -7,19 +7,12 @@ SHELL ["/bin/bash", "-c"]
 COPY . /root/ros2_ws/src
 
 # Install dependencies
-RUN apt-get update
-RUN apt-get -y --quiet --no-install-recommends install python3-pip
-RUN if [ "$ROS_DISTRO" = "jazzy" ] || [ "$ROS_DISTRO" = "rolling" ]; then \
-    pip3 install -r src/requirements.txt --break-system-packages; \
-    else \
-    pip3 install -r src/requirements.txt; \
-    fi
-
 WORKDIR /root/ros2_ws/src
 RUN git clone https://github.com/mgonzs13/audio_common.git
 
 WORKDIR /root/ros2_ws
 RUN source /opt/ros/${ROS_DISTRO}/setup.bash
+RUN apt-get update
 RUN rosdep update --include-eol-distros && rosdep install --from-paths src --ignore-src -r -y
 RUN rosdep install --from-paths src --ignore-src -r -y
 
 
@@ -35,7 +35,6 @@ To run whisper_ros with CUDA, first, you must install the [CUDA Toolkit](https:/
 cd ~/ros2_ws/src
 git clone https://github.com/mgonzs13/audio_common.git
 git clone https://github.com/mgonzs13/whisper_ros.git
-pip3 install -r whisper_ros/requirements.txt
 cd ~/ros2_ws
 rosdep install --from-paths src --ignore-src -r -y
 colcon build --cmake-args -DGGML_CUDA=ON # add this for CUDA
 
@@ -22,25 +22,14 @@
 
 
 from launch_ros.actions import Node
-from launch import LaunchDescription, LaunchContext
+from launch import LaunchDescription
 from launch.substitutions import LaunchConfiguration
-from launch.actions import OpaqueFunction, DeclareLaunchArgument
-from huggingface_hub import hf_hub_download
 
 
 def generate_launch_description():
 
-    def run_silero_vad(context: LaunchContext, repo, file, model_path):
-        repo = str(context.perform_substitution(repo))
-        file = str(context.perform_substitution(file))
-        model_path = str(context.perform_substitution(model_path))
-
-        if not model_path:
-            model_path = hf_hub_download(
-                repo_id=repo, filename=file, force_download=False
-            )
-
-        return (
+    return LaunchDescription(
+        [
             Node(
                 package="whisper_ros",
                 executable="silero_vad_node",
@@ -49,7 +38,13 @@ def run_silero_vad(context: LaunchContext, repo, file, model_path):
                 parameters=[
                     {
                         "enabled": LaunchConfiguration("enabled", default=True),
-                        "model_path": model_path,
+                        "model_repo": LaunchConfiguration(
+                            "model_repo", default="mgonzs13/silero-vad-onnx"
+                        ),
+                        "model_filename": LaunchConfiguration(
+                            "model_filename", default="silero_vad.onnx"
+                        ),
+                        "model_path": LaunchConfiguration("model_path", default=""),
                         "sample_rate": LaunchConfiguration("sample_rate", default=16000),
                         "frame_size_ms": LaunchConfiguration("frame_size_ms", default=32),
                         "threshold": LaunchConfiguration("threshold", default=0.5),
@@ -61,37 +56,5 @@ def run_silero_vad(context: LaunchContext, repo, file, model_path):
                 ],
                 remappings=[("audio", "/audio/in")],
             ),
-        )
-
-    model_repo = LaunchConfiguration("model_repo")
-    model_repo_cmd = DeclareLaunchArgument(
-        "model_repo",
-        default_value="mgonzs13/silero-vad-onnx",
-        description="Hugging Face model repo",
-    )
-
-    model_filename = LaunchConfiguration("model_filename")
-    model_filename_cmd = DeclareLaunchArgument(
-        "model_filename",
-        default_value="silero_vad.onnx",
-        description="Hugging Face model filename",
-    )
-
-    model_path = LaunchConfiguration("model_path")
-    model_path_cmd = DeclareLaunchArgument(
-        "model_path",
-        default_value="",
-        description="Local path to the model file",
-    )
-
-    return LaunchDescription(
-        [
-            model_repo_cmd,
-            model_filename_cmd,
-            model_path_cmd,
-            OpaqueFunction(
-                function=run_silero_vad,
-                args=[model_repo, model_filename, model_path],
-            ),
         ]
     )
@@ -23,126 +23,75 @@
 
 import os
 from launch_ros.actions import Node
-from launch import LaunchDescription, LaunchContext
+from launch import LaunchDescription
 from launch.conditions import IfCondition, UnlessCondition
 from launch.substitutions import LaunchConfiguration, PythonExpression
 from launch.launch_description_sources import PythonLaunchDescriptionSource
-from launch.actions import OpaqueFunction, DeclareLaunchArgument, IncludeLaunchDescription
+from launch.actions import DeclareLaunchArgument, IncludeLaunchDescription
 from ament_index_python.packages import get_package_share_directory
-from huggingface_hub import hf_hub_download
 
 
 def generate_launch_description():
 
-    def run_whisper(context: LaunchContext, repo, file, model_path):
-        repo = str(context.perform_substitution(repo))
-        file = str(context.perform_substitution(file))
-        model_path = str(context.perform_substitution(model_path))
-
-        if not model_path:
-            model_path = hf_hub_download(
-                repo_id=repo, filename=file, force_download=False
-            )
-
-        params = {
-            "sampling_strategy": LaunchConfiguration(
-                "sampling_strategy", default="beam_search"
-            ),
-            "model": LaunchConfiguration("model", default=model_path),
-            "openvino_encode_device": LaunchConfiguration(
-                "openvino_encode_device", default="CPU"
-            ),
-            "n_threads": LaunchConfiguration("n_threads", default=4),
-            "n_max_text_ctx": LaunchConfiguration("n_max_text_ctx", default=16384),
-            "offset_ms": LaunchConfiguration("offset_ms", default=0),
-            "duration_ms": LaunchConfiguration("duration_ms", default=0),
-            "translate": LaunchConfiguration("translate", default=False),
-            "no_context": LaunchConfiguration("no_context", default=True),
-            "single_segment": LaunchConfiguration("single_segment", default=True),
-            "token_timestamps": LaunchConfiguration("token_timestamps", default=False),
-            "thold_pt": LaunchConfiguration("thold_pt", default=0.01),
-            "thold_ptsum": LaunchConfiguration("thold_ptsum", default=0.01),
-            "max_len": LaunchConfiguration("max_len", default=0),
-            "split_on_word": LaunchConfiguration("split_on_word", default=False),
-            "max_tokens": LaunchConfiguration("max_tokens", default=0),
-            "audio_ctx": LaunchConfiguration("audio_ctx", default=0),
-            "suppress_regex": LaunchConfiguration("suppress_regex", default=""),
-            "language": LaunchConfiguration("language", default="en"),
-            "detect_language": LaunchConfiguration("detect_language", default=False),
-            "suppress_blank": LaunchConfiguration("suppress_blank", default=True),
-            "suppress_nst": LaunchConfiguration("suppress_nst", default=False),
-            "temperature": LaunchConfiguration("temperature", default=0.00),
-            "max_initial_ts": LaunchConfiguration("max_initial_ts", default=1.00),
-            "length_penalty": LaunchConfiguration("length_penalty", default=-1.00),
-            "temperature_inc": LaunchConfiguration("temperature_inc", default=0.40),
-            "entropy_thold": LaunchConfiguration("entropy_thold", default=2.40),
-            "logprob_thold": LaunchConfiguration("logprob_thold", default=-1.00),
-            "no_speech_thold": LaunchConfiguration("no_speech_thold", default=0.60),
-            "greedy_best_of": LaunchConfiguration("greedy_best_of", default=5),
-            "beam_search_beam_size": LaunchConfiguration(
-                "beam_search_beam_size", default=5
-            ),
-            "beam_search_patience": LaunchConfiguration(
-                "beam_search_patience", default=-1.00
-            ),
-            "n_processors": LaunchConfiguration("n_processors", default=1),
-            "use_gpu": LaunchConfiguration("use_gpu", default=True),
-            "gpu_device": LaunchConfiguration("gpu_device", default=0),
-            "flash_attn": LaunchConfiguration("flash_attn", default=False),
-            "dtw_n_top": LaunchConfiguration("dtw_n_top", default=-1),
-            "dtw_token_timestamps": LaunchConfiguration(
-                "dtw_token_timestamps", default=False
-            ),
-            "dtw_aheads": LaunchConfiguration("dtw_aheads", default="none"),
-        }
-
-        return (
-            Node(
-                package="whisper_ros",
-                executable="whisper_server_node",
-                name="whisper_node",
-                namespace="whisper",
-                parameters=[params],
-                condition=UnlessCondition(
-                    PythonExpression([LaunchConfiguration("stream")])
-                ),
-            ),
-            Node(
-                package="whisper_ros",
-                executable="whisper_node",
-                name="whisper_node",
-                namespace="whisper",
-                parameters=[params],
-                condition=IfCondition(PythonExpression([LaunchConfiguration("stream")])),
-            ),
-        )
-
     stream_cmd = DeclareLaunchArgument(
         "stream",
         default_value="False",
         description="Whether to launch stream or server node",
     )
 
-    model_repo = LaunchConfiguration("model_repo")
-    model_repo_cmd = DeclareLaunchArgument(
-        "model_repo",
-        default_value="ggerganov/whisper.cpp",
-        description="Hugging Face model repo for Whisper",
-    )
-
-    model_filename = LaunchConfiguration("model_filename")
-    model_filename_cmd = DeclareLaunchArgument(
-        "model_filename",
-        default_value="ggml-large-v3-turbo-q5_0.bin",
-        description="Hugging Face model filename for Whisper",
-    )
-
-    model_path = LaunchConfiguration("model_path")
-    model_path_cmd = DeclareLaunchArgument(
-        "model_path",
-        default_value="",
-        description="Local path to the model file for Whisper",
-    )
+    whisper_params = {
+        "sampling_strategy": LaunchConfiguration(
+            "sampling_strategy", default="beam_search"
+        ),
+        "model_repo": LaunchConfiguration("model_repo", default="ggerganov/whisper.cpp"),
+        "model_filename": LaunchConfiguration(
+            "model_filename", default="ggml-large-v3-turbo-q5_0.bin"
+        ),
+        "model": LaunchConfiguration("model", default=""),
+        "openvino_encode_device": LaunchConfiguration(
+            "openvino_encode_device", default="CPU"
+        ),
+        "n_threads": LaunchConfiguration("n_threads", default=4),
+        "n_max_text_ctx": LaunchConfiguration("n_max_text_ctx", default=16384),
+        "offset_ms": LaunchConfiguration("offset_ms", default=0),
+        "duration_ms": LaunchConfiguration("duration_ms", default=0),
+        "translate": LaunchConfiguration("translate", default=False),
+        "no_context": LaunchConfiguration("no_context", default=True),
+        "single_segment": LaunchConfiguration("single_segment", default=True),
+        "token_timestamps": LaunchConfiguration("token_timestamps", default=False),
+        "thold_pt": LaunchConfiguration("thold_pt", default=0.01),
+        "thold_ptsum": LaunchConfiguration("thold_ptsum", default=0.01),
+        "max_len": LaunchConfiguration("max_len", default=0),
+        "split_on_word": LaunchConfiguration("split_on_word", default=False),
+        "max_tokens": LaunchConfiguration("max_tokens", default=0),
+        "audio_ctx": LaunchConfiguration("audio_ctx", default=0),
+        "suppress_regex": LaunchConfiguration("suppress_regex", default=""),
+        "language": LaunchConfiguration("language", default="en"),
+        "detect_language": LaunchConfiguration("detect_language", default=False),
+        "suppress_blank": LaunchConfiguration("suppress_blank", default=True),
+        "suppress_nst": LaunchConfiguration("suppress_nst", default=False),
+        "temperature": LaunchConfiguration("temperature", default=0.00),
+        "max_initial_ts": LaunchConfiguration("max_initial_ts", default=1.00),
+        "length_penalty": LaunchConfiguration("length_penalty", default=-1.00),
+        "temperature_inc": LaunchConfiguration("temperature_inc", default=0.40),
+        "entropy_thold": LaunchConfiguration("entropy_thold", default=2.40),
+        "logprob_thold": LaunchConfiguration("logprob_thold", default=-1.00),
+        "no_speech_thold": LaunchConfiguration("no_speech_thold", default=0.60),
+        "greedy_best_of": LaunchConfiguration("greedy_best_of", default=5),
+        "beam_search_beam_size": LaunchConfiguration("beam_search_beam_size", default=5),
+        "beam_search_patience": LaunchConfiguration(
+            "beam_search_patience", default=-1.00
+        ),
+        "n_processors": LaunchConfiguration("n_processors", default=1),
+        "use_gpu": LaunchConfiguration("use_gpu", default=True),
+        "gpu_device": LaunchConfiguration("gpu_device", default=0),
+        "flash_attn": LaunchConfiguration("flash_attn", default=False),
+        "dtw_n_top": LaunchConfiguration("dtw_n_top", default=-1),
+        "dtw_token_timestamps": LaunchConfiguration(
+            "dtw_token_timestamps", default=False
+        ),
+        "dtw_aheads": LaunchConfiguration("dtw_aheads", default="none"),
+    }
 
     silero_vad_model_repo = LaunchConfiguration("silero_vad_model_repo")
     silero_vad_model_repo_cmd = DeclareLaunchArgument(
@@ -168,15 +117,26 @@ def run_whisper(context: LaunchContext, repo, file, model_path):
     return LaunchDescription(
         [
             stream_cmd,
-            model_repo_cmd,
-            model_filename_cmd,
-            model_path_cmd,
             silero_vad_model_repo_cmd,
             silero_vad_model_filename_cmd,
             silero_vad_model_path_cmd,
-            OpaqueFunction(
-                function=run_whisper,
-                args=[model_repo, model_filename, model_path],
+            Node(
+                package="whisper_ros",
+                executable="whisper_server_node",
+                name="whisper_node",
+                namespace="whisper",
+                parameters=[whisper_params],
+                condition=UnlessCondition(
+                    PythonExpression([LaunchConfiguration("stream")])
+                ),
+            ),
+            Node(
+                package="whisper_ros",
+                executable="whisper_node",
+                name="whisper_node",
+                namespace="whisper",
+                parameters=[whisper_params],
+                condition=IfCondition(PythonExpression([LaunchConfiguration("stream")])),
             ),
             IncludeLaunchDescription(
                 PythonLaunchDescriptionSource(
 
@@ -0,0 +1,35 @@
+cmake_minimum_required(VERSION 3.8)
+project(whisper_hfhub_vendor)
+
+if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+  add_compile_options(-Wall -Wextra -Wpedantic)
+endif()
+
+include(FetchContent)
+find_package(ament_cmake REQUIRED)
+find_package(CURL REQUIRED)  # Add CURL as a required package
+
+# Declare hfhub from Git repository
+FetchContent_Declare(
+  hfhub
+  GIT_REPOSITORY https://github.com/agonzc34/huggingface-hub-cpp
+  GIT_TAG        v1.0.0
+  GIT_SHALLOW    TRUE
+)
+
+FetchContent_MakeAvailable(hfhub)
+
+# Export targets and include directories
+install(
+  TARGETS hfhub
+  EXPORT export_hfhub
+  LIBRARY DESTINATION lib
+  INCLUDES DESTINATION include
+)
+
+# Export include directories and dependencies (CURL)
+ament_export_include_directories(include)
+ament_export_dependencies(CURL)  # Export CURL dependency
+ament_export_targets(export_hfhub HAS_LIBRARY_TARGET)
+
+ament_package()
@@ -0,0 +1,17 @@
+<?xml version="1.0"?>
+<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
+<package format="3">
+  <name>whisper_hfhub_vendor</name>
+  <version>3.0.3</version>
+  <description>huggingface-hub-cpp vendor package for whisper_ros</description>
+  <maintainer email="mgons@unileon.es">Miguel Ángel González Santamarta</maintainer>
+  <license>MIT</license>
+
+  <buildtool_depend>ament_cmake</buildtool_depend>
+
+  <depend>curl</depend>
+
+  <export>
+    <build_type>ament_cmake</build_type>
+  </export>
+</package>