2424# Unless specified, these settings have been tested to work on a single L4.
2525
2626
27- # Ultravox 0.5-1B
28- def run_ultravox (question : str , audio_count : int ):
29- model_name = "fixie-ai/ultravox-v0_5-llama-3_2-1b"
27+ # MiniCPM-O
28+ def run_minicpmo (question : str , audio_count : int ):
29+ model_name = "openbmb/MiniCPM-o-2_6"
30+ tokenizer = AutoTokenizer .from_pretrained (model_name ,
31+ trust_remote_code = True )
32+ llm = LLM (model = model_name ,
33+ trust_remote_code = True ,
34+ max_model_len = 4096 ,
35+ max_num_seqs = 5 ,
36+ limit_mm_per_prompt = {"audio" : audio_count })
3037
31- tokenizer = AutoTokenizer .from_pretrained (model_name )
38+ stop_tokens = ['<|im_end|>' , '<|endoftext|>' ]
39+ stop_token_ids = [tokenizer .convert_tokens_to_ids (i ) for i in stop_tokens ]
40+
41+ audio_placeholder = "(<audio>./</audio>)" * audio_count
42+ audio_chat_template = "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n ' + message['content'] + '<|im_end|>' + '\n '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n <|spk_bos|><|spk|><|spk_eos|><|tts_bos|>' }}{% endif %}" # noqa: E501
3243 messages = [{
3344 'role' : 'user' ,
34- 'content' : "<|audio|> \n " * audio_count + question
45+ 'content' : f' { audio_placeholder } \n { question } '
3546 }]
3647 prompt = tokenizer .apply_chat_template (messages ,
3748 tokenize = False ,
38- add_generation_prompt = True )
39-
40- llm = LLM (model = model_name ,
41- max_model_len = 4096 ,
42- max_num_seqs = 5 ,
43- trust_remote_code = True ,
44- limit_mm_per_prompt = {"audio" : audio_count })
45- stop_token_ids = None
49+ add_generation_prompt = True ,
50+ chat_template = audio_chat_template )
4651 return llm , prompt , stop_token_ids
4752
4853
@@ -68,36 +73,49 @@ def run_qwen2_audio(question: str, audio_count: int):
6873 return llm , prompt , stop_token_ids
6974
7075
71- def run_minicpmo (question : str , audio_count : int ):
72- model_name = "openbmb/MiniCPM-o-2_6"
73- tokenizer = AutoTokenizer .from_pretrained (model_name ,
74- trust_remote_code = True )
75- llm = LLM (model = model_name ,
76- trust_remote_code = True ,
77- max_model_len = 4096 ,
78- max_num_seqs = 5 ,
79- limit_mm_per_prompt = {"audio" : audio_count })
80-
81- stop_tokens = ['<|im_end|>' , '<|endoftext|>' ]
82- stop_token_ids = [tokenizer .convert_tokens_to_ids (i ) for i in stop_tokens ]
76+ # Ultravox 0.5-1B
77+ def run_ultravox (question : str , audio_count : int ):
78+ model_name = "fixie-ai/ultravox-v0_5-llama-3_2-1b"
8379
84- audio_placeholder = "(<audio>./</audio>)" * audio_count
85- audio_chat_template = "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n ' + message['content'] + '<|im_end|>' + '\n '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n <|spk_bos|><|spk|><|spk_eos|><|tts_bos|>' }}{% endif %}" # noqa: E501
80+ tokenizer = AutoTokenizer .from_pretrained (model_name )
8681 messages = [{
8782 'role' : 'user' ,
88- 'content' : f' { audio_placeholder } \n { question } '
83+ 'content' : "<|audio|> \n " * audio_count + question
8984 }]
9085 prompt = tokenizer .apply_chat_template (messages ,
9186 tokenize = False ,
92- add_generation_prompt = True ,
93- chat_template = audio_chat_template )
87+ add_generation_prompt = True )
88+
89+ llm = LLM (model = model_name ,
90+ max_model_len = 4096 ,
91+ max_num_seqs = 5 ,
92+ trust_remote_code = True ,
93+ limit_mm_per_prompt = {"audio" : audio_count })
94+ stop_token_ids = None
95+ return llm , prompt , stop_token_ids
96+
97+
98+ # Whisper
99+ def run_whisper (question : str , audio_count : int ):
100+ assert audio_count == 1 , (
101+ "Whisper only support single audio input per prompt" )
102+ model_name = "openai/whisper-large-v3-turbo"
103+
104+ prompt = "<|startoftranscript|>"
105+
106+ llm = LLM (model = model_name ,
107+ max_model_len = 448 ,
108+ max_num_seqs = 5 ,
109+ limit_mm_per_prompt = {"audio" : audio_count })
110+ stop_token_ids = None
94111 return llm , prompt , stop_token_ids
95112
96113
97114model_example_map = {
98- "ultravox " : run_ultravox ,
115+ "minicpmo " : run_minicpmo ,
99116 "qwen2_audio" : run_qwen2_audio ,
100- "minicpmo" : run_minicpmo
117+ "ultravox" : run_ultravox ,
118+ "whisper" : run_whisper ,
101119}
102120
103121
0 commit comments