23
23
24
24
import os
25
25
from launch_ros .actions import Node
26
- from launch import LaunchDescription , LaunchContext
26
+ from launch import LaunchDescription
27
27
from launch .conditions import IfCondition , UnlessCondition
28
28
from launch .substitutions import LaunchConfiguration , PythonExpression
29
29
from launch .launch_description_sources import PythonLaunchDescriptionSource
30
- from launch .actions import OpaqueFunction , DeclareLaunchArgument , IncludeLaunchDescription
30
+ from launch .actions import DeclareLaunchArgument , IncludeLaunchDescription
31
31
from ament_index_python .packages import get_package_share_directory
32
- from huggingface_hub import hf_hub_download
33
32
34
33
35
34
def generate_launch_description ():
36
35
37
- def run_whisper (context : LaunchContext , repo , file , model_path ):
38
- repo = str (context .perform_substitution (repo ))
39
- file = str (context .perform_substitution (file ))
40
- model_path = str (context .perform_substitution (model_path ))
41
-
42
- if not model_path :
43
- model_path = hf_hub_download (
44
- repo_id = repo , filename = file , force_download = False
45
- )
46
-
47
- params = {
48
- "sampling_strategy" : LaunchConfiguration (
49
- "sampling_strategy" , default = "beam_search"
50
- ),
51
- "model" : LaunchConfiguration ("model" , default = model_path ),
52
- "openvino_encode_device" : LaunchConfiguration (
53
- "openvino_encode_device" , default = "CPU"
54
- ),
55
- "n_threads" : LaunchConfiguration ("n_threads" , default = 4 ),
56
- "n_max_text_ctx" : LaunchConfiguration ("n_max_text_ctx" , default = 16384 ),
57
- "offset_ms" : LaunchConfiguration ("offset_ms" , default = 0 ),
58
- "duration_ms" : LaunchConfiguration ("duration_ms" , default = 0 ),
59
- "translate" : LaunchConfiguration ("translate" , default = False ),
60
- "no_context" : LaunchConfiguration ("no_context" , default = True ),
61
- "single_segment" : LaunchConfiguration ("single_segment" , default = True ),
62
- "token_timestamps" : LaunchConfiguration ("token_timestamps" , default = False ),
63
- "thold_pt" : LaunchConfiguration ("thold_pt" , default = 0.01 ),
64
- "thold_ptsum" : LaunchConfiguration ("thold_ptsum" , default = 0.01 ),
65
- "max_len" : LaunchConfiguration ("max_len" , default = 0 ),
66
- "split_on_word" : LaunchConfiguration ("split_on_word" , default = False ),
67
- "max_tokens" : LaunchConfiguration ("max_tokens" , default = 0 ),
68
- "audio_ctx" : LaunchConfiguration ("audio_ctx" , default = 0 ),
69
- "suppress_regex" : LaunchConfiguration ("suppress_regex" , default = "" ),
70
- "language" : LaunchConfiguration ("language" , default = "en" ),
71
- "detect_language" : LaunchConfiguration ("detect_language" , default = False ),
72
- "suppress_blank" : LaunchConfiguration ("suppress_blank" , default = True ),
73
- "suppress_nst" : LaunchConfiguration ("suppress_nst" , default = False ),
74
- "temperature" : LaunchConfiguration ("temperature" , default = 0.00 ),
75
- "max_initial_ts" : LaunchConfiguration ("max_initial_ts" , default = 1.00 ),
76
- "length_penalty" : LaunchConfiguration ("length_penalty" , default = - 1.00 ),
77
- "temperature_inc" : LaunchConfiguration ("temperature_inc" , default = 0.40 ),
78
- "entropy_thold" : LaunchConfiguration ("entropy_thold" , default = 2.40 ),
79
- "logprob_thold" : LaunchConfiguration ("logprob_thold" , default = - 1.00 ),
80
- "no_speech_thold" : LaunchConfiguration ("no_speech_thold" , default = 0.60 ),
81
- "greedy_best_of" : LaunchConfiguration ("greedy_best_of" , default = 5 ),
82
- "beam_search_beam_size" : LaunchConfiguration (
83
- "beam_search_beam_size" , default = 5
84
- ),
85
- "beam_search_patience" : LaunchConfiguration (
86
- "beam_search_patience" , default = - 1.00
87
- ),
88
- "n_processors" : LaunchConfiguration ("n_processors" , default = 1 ),
89
- "use_gpu" : LaunchConfiguration ("use_gpu" , default = True ),
90
- "gpu_device" : LaunchConfiguration ("gpu_device" , default = 0 ),
91
- "flash_attn" : LaunchConfiguration ("flash_attn" , default = False ),
92
- "dtw_n_top" : LaunchConfiguration ("dtw_n_top" , default = - 1 ),
93
- "dtw_token_timestamps" : LaunchConfiguration (
94
- "dtw_token_timestamps" , default = False
95
- ),
96
- "dtw_aheads" : LaunchConfiguration ("dtw_aheads" , default = "none" ),
97
- }
98
-
99
- return (
100
- Node (
101
- package = "whisper_ros" ,
102
- executable = "whisper_server_node" ,
103
- name = "whisper_node" ,
104
- namespace = "whisper" ,
105
- parameters = [params ],
106
- condition = UnlessCondition (
107
- PythonExpression ([LaunchConfiguration ("stream" )])
108
- ),
109
- ),
110
- Node (
111
- package = "whisper_ros" ,
112
- executable = "whisper_node" ,
113
- name = "whisper_node" ,
114
- namespace = "whisper" ,
115
- parameters = [params ],
116
- condition = IfCondition (PythonExpression ([LaunchConfiguration ("stream" )])),
117
- ),
118
- )
119
-
120
36
stream_cmd = DeclareLaunchArgument (
121
37
"stream" ,
122
38
default_value = "False" ,
123
39
description = "Whether to launch stream or server node" ,
124
40
)
125
41
126
- model_repo = LaunchConfiguration ("model_repo" )
127
- model_repo_cmd = DeclareLaunchArgument (
128
- "model_repo" ,
129
- default_value = "ggerganov/whisper.cpp" ,
130
- description = "Hugging Face model repo for Whisper" ,
131
- )
132
-
133
- model_filename = LaunchConfiguration ("model_filename" )
134
- model_filename_cmd = DeclareLaunchArgument (
135
- "model_filename" ,
136
- default_value = "ggml-large-v3-turbo-q5_0.bin" ,
137
- description = "Hugging Face model filename for Whisper" ,
138
- )
139
-
140
- model_path = LaunchConfiguration ("model_path" )
141
- model_path_cmd = DeclareLaunchArgument (
142
- "model_path" ,
143
- default_value = "" ,
144
- description = "Local path to the model file for Whisper" ,
145
- )
42
+ whisper_params = {
43
+ "sampling_strategy" : LaunchConfiguration (
44
+ "sampling_strategy" , default = "beam_search"
45
+ ),
46
+ "model_repo" : LaunchConfiguration ("model_repo" , default = "ggerganov/whisper.cpp" ),
47
+ "model_filename" : LaunchConfiguration (
48
+ "model_filename" , default = "ggml-large-v3-turbo-q5_0.bin"
49
+ ),
50
+ "model" : LaunchConfiguration ("model" , default = "" ),
51
+ "openvino_encode_device" : LaunchConfiguration (
52
+ "openvino_encode_device" , default = "CPU"
53
+ ),
54
+ "n_threads" : LaunchConfiguration ("n_threads" , default = 4 ),
55
+ "n_max_text_ctx" : LaunchConfiguration ("n_max_text_ctx" , default = 16384 ),
56
+ "offset_ms" : LaunchConfiguration ("offset_ms" , default = 0 ),
57
+ "duration_ms" : LaunchConfiguration ("duration_ms" , default = 0 ),
58
+ "translate" : LaunchConfiguration ("translate" , default = False ),
59
+ "no_context" : LaunchConfiguration ("no_context" , default = True ),
60
+ "single_segment" : LaunchConfiguration ("single_segment" , default = True ),
61
+ "token_timestamps" : LaunchConfiguration ("token_timestamps" , default = False ),
62
+ "thold_pt" : LaunchConfiguration ("thold_pt" , default = 0.01 ),
63
+ "thold_ptsum" : LaunchConfiguration ("thold_ptsum" , default = 0.01 ),
64
+ "max_len" : LaunchConfiguration ("max_len" , default = 0 ),
65
+ "split_on_word" : LaunchConfiguration ("split_on_word" , default = False ),
66
+ "max_tokens" : LaunchConfiguration ("max_tokens" , default = 0 ),
67
+ "audio_ctx" : LaunchConfiguration ("audio_ctx" , default = 0 ),
68
+ "suppress_regex" : LaunchConfiguration ("suppress_regex" , default = "" ),
69
+ "language" : LaunchConfiguration ("language" , default = "en" ),
70
+ "detect_language" : LaunchConfiguration ("detect_language" , default = False ),
71
+ "suppress_blank" : LaunchConfiguration ("suppress_blank" , default = True ),
72
+ "suppress_nst" : LaunchConfiguration ("suppress_nst" , default = False ),
73
+ "temperature" : LaunchConfiguration ("temperature" , default = 0.00 ),
74
+ "max_initial_ts" : LaunchConfiguration ("max_initial_ts" , default = 1.00 ),
75
+ "length_penalty" : LaunchConfiguration ("length_penalty" , default = - 1.00 ),
76
+ "temperature_inc" : LaunchConfiguration ("temperature_inc" , default = 0.40 ),
77
+ "entropy_thold" : LaunchConfiguration ("entropy_thold" , default = 2.40 ),
78
+ "logprob_thold" : LaunchConfiguration ("logprob_thold" , default = - 1.00 ),
79
+ "no_speech_thold" : LaunchConfiguration ("no_speech_thold" , default = 0.60 ),
80
+ "greedy_best_of" : LaunchConfiguration ("greedy_best_of" , default = 5 ),
81
+ "beam_search_beam_size" : LaunchConfiguration ("beam_search_beam_size" , default = 5 ),
82
+ "beam_search_patience" : LaunchConfiguration (
83
+ "beam_search_patience" , default = - 1.00
84
+ ),
85
+ "n_processors" : LaunchConfiguration ("n_processors" , default = 1 ),
86
+ "use_gpu" : LaunchConfiguration ("use_gpu" , default = True ),
87
+ "gpu_device" : LaunchConfiguration ("gpu_device" , default = 0 ),
88
+ "flash_attn" : LaunchConfiguration ("flash_attn" , default = False ),
89
+ "dtw_n_top" : LaunchConfiguration ("dtw_n_top" , default = - 1 ),
90
+ "dtw_token_timestamps" : LaunchConfiguration (
91
+ "dtw_token_timestamps" , default = False
92
+ ),
93
+ "dtw_aheads" : LaunchConfiguration ("dtw_aheads" , default = "none" ),
94
+ }
146
95
147
96
silero_vad_model_repo = LaunchConfiguration ("silero_vad_model_repo" )
148
97
silero_vad_model_repo_cmd = DeclareLaunchArgument (
@@ -168,15 +117,26 @@ def run_whisper(context: LaunchContext, repo, file, model_path):
168
117
return LaunchDescription (
169
118
[
170
119
stream_cmd ,
171
- model_repo_cmd ,
172
- model_filename_cmd ,
173
- model_path_cmd ,
174
120
silero_vad_model_repo_cmd ,
175
121
silero_vad_model_filename_cmd ,
176
122
silero_vad_model_path_cmd ,
177
- OpaqueFunction (
178
- function = run_whisper ,
179
- args = [model_repo , model_filename , model_path ],
123
+ Node (
124
+ package = "whisper_ros" ,
125
+ executable = "whisper_server_node" ,
126
+ name = "whisper_node" ,
127
+ namespace = "whisper" ,
128
+ parameters = [whisper_params ],
129
+ condition = UnlessCondition (
130
+ PythonExpression ([LaunchConfiguration ("stream" )])
131
+ ),
132
+ ),
133
+ Node (
134
+ package = "whisper_ros" ,
135
+ executable = "whisper_node" ,
136
+ name = "whisper_node" ,
137
+ namespace = "whisper" ,
138
+ parameters = [whisper_params ],
139
+ condition = IfCondition (PythonExpression ([LaunchConfiguration ("stream" )])),
180
140
),
181
141
IncludeLaunchDescription (
182
142
PythonLaunchDescriptionSource (
0 commit comments