Skip to content

Commit

Permalink
1.remove chunk_length_s=30 2.set task to translate for genai
Browse files Browse the repository at this point in the history
  • Loading branch information
wgzintel committed Nov 5, 2024
1 parent e9e8601 commit b4563cf
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
4 changes: 1 addition & 3 deletions tools/llm_bench/llm_bench_utils/ov_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from llm_bench_utils.hook_common import get_bench_hook
from llm_bench_utils.config_class import OV_MODEL_CLASSES_MAPPING, TOKENIZE_CLASSES_MAPPING, DEFAULT_MODEL_CLASSES
import openvino.runtime.opset13 as opset
from transformers import pipeline


def generate_simplified(self, *args, **kwargs):
Expand Down Expand Up @@ -324,12 +325,9 @@ def create_speech_2txt_model(model_path, device, **kwargs):
from_pretrained_time = end - start
log.info(f'From pretrained time: {from_pretrained_time:.2f}s')
processor = AutoProcessor.from_pretrained(model_path)

from transformers import pipeline
pipe = pipeline(
"automatic-speech-recognition",
model=ov_model,
chunk_length_s=30,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor
)
Expand Down
11 changes: 8 additions & 3 deletions tools/llm_bench/task/speech_to_text_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def run_speech_2_txt_generation(input_param, args, md5_list, iter_data_list):
max_new_tokens=max_gen_tokens,
# 'task' and 'language' parameters are supported for multilingual models only
language=speech_language,
task="transcribe",
task="translate",
return_timestamps=ret_timestamps
)
end = time.perf_counter()
Expand All @@ -55,13 +55,18 @@ def run_speech_2_txt_generation(input_param, args, md5_list, iter_data_list):
result_text = result_text.texts[0]
else:
start = time.perf_counter()
result_text = pipe(raw_speech, generate_kwargs={"task": 'translate'}, return_timestamps=ret_timestamps)["text"]
result_text = pipe(
raw_speech,
generate_kwargs={"task": 'translate', "language": speech_language},
return_timestamps=ret_timestamps
)["text"]
end = time.perf_counter()
tm_list = whisper_hook.get_time_list()
tm_infer_list = whisper_hook.get_time_infer_list()
log.debug('latency of all tokens:')
[log.debug('[{}]{:.4f}'.format(idx, tm)) for idx, tm in enumerate(tm_list)]

log.debug('latency of all infers:')
[log.debug('[{}]{:.4f}'.format(idx, tm)) for idx, tm in enumerate(tm_infer_list)]
generation_time = end - start
out_data = processor.tokenizer(result_text, return_tensors='pt')
out_tokens = out_data['input_ids'] if 'input_ids' in out_data else out_data
Expand Down

0 comments on commit b4563cf

Please sign in to comment.