diff --git a/examples/whisper/README.md b/examples/whisper/README.md index 6766607f0..44be097d2 100755 --- a/examples/whisper/README.md +++ b/examples/whisper/README.md @@ -127,7 +127,8 @@ python3 convert_checkpoint.py \ --use_weight_only \ --weight_only_precision $WEIGHT_ONLY_PRECISION \ --output_dir $checkpoint_dir \ - --model_name distil-medium.en + --model_name distil-medium.en \ + --chunk_length 15 ```
Now, we can build and run the model like before:

diff --git a/examples/whisper/convert_checkpoint.py b/examples/whisper/convert_checkpoint.py index 9e5ab01bf..1dae0e33b 100644 --- a/examples/whisper/convert_checkpoint.py +++ b/examples/whisper/convert_checkpoint.py @@ -25,6 +25,7 @@ from tensorrt_llm.functional import LayerNormPositionType, LayerNormType from tensorrt_llm.models.convert_utils import weight_only_quantize_dict from tensorrt_llm.quantization import QuantAlgo +from whisper_utils import SAMPLE_RATE, HOP_LENGTH def parse_arguments(): @@ -100,7 +101,7 @@ def get_encoder_config(model_metadata: dict, dtype: str, chunk_length: int, 'hidden_size': model_metadata['n_audio_state'], 'n_mels': model_metadata['n_mels'], 'n_audio_ctx': model_metadata['n_audio_ctx'], - 'chunk_length': chunk_length * 100, + 'chunk_length': chunk_length * SAMPLE_RATE / HOP_LENGTH, 'vocab_size': model_metadata['n_vocab'], 'hidden_act': "gelu", 'num_languages': num_languages,