From 986940a9230b0d04fc203acadb6eccbc9943a1c1 Mon Sep 17 00:00:00 2001 From: Junjun Dong Date: Tue, 28 Oct 2025 22:28:45 -0700 Subject: [PATCH] fix: add clear error message when mistral-common is missing for AutoTokenizer loading Voxtral --- src/transformers/models/auto/tokenization_auto.py | 4 ++++ tests/models/voxtral/test_tokenization_voxtral.py | 11 +++++++++++ 2 files changed, 15 insertions(+) create mode 100644 tests/models/voxtral/test_tokenization_voxtral.py diff --git a/src/transformers/models/auto/tokenization_auto.py b/src/transformers/models/auto/tokenization_auto.py index a861aee12c57..1e0e328eca98 100644 --- a/src/transformers/models/auto/tokenization_auto.py +++ b/src/transformers/models/auto/tokenization_auto.py @@ -1131,6 +1131,10 @@ def from_pretrained( model_type = config_class_to_model_type(type(config).__name__) if model_type is not None: + if model_type == "voxtral" and not is_mistral_common_available(): + raise ImportError( + "The Voxtral tokenizer requires the 'mistral-common' package. Use `pip install mistral-common` to install the package." + ) tokenizer_class_py, tokenizer_class_fast = TOKENIZER_MAPPING[type(config)] if tokenizer_class_fast and (use_fast or tokenizer_class_py is None): diff --git a/tests/models/voxtral/test_tokenization_voxtral.py b/tests/models/voxtral/test_tokenization_voxtral.py new file mode 100644 index 000000000000..2dc6c5f3f319 --- /dev/null +++ b/tests/models/voxtral/test_tokenization_voxtral.py @@ -0,0 +1,11 @@ +import pytest + +from transformers import AutoTokenizer +from transformers.models.auto import tokenization_auto +from transformers.models.voxtral import VoxtralConfig + +def test_voxtral_tokenizer_requires_mistral_common(monkeypatch): + monkeypatch.setattr(tokenization_auto, "is_mistral_common_available", lambda: False) + monkeypatch.setattr(tokenization_auto, "get_tokenizer_config", lambda *args, **kwargs: {}) + with pytest.raises(ImportError, match="mistral-common"): + AutoTokenizer.from_pretrained("dummy", config=VoxtralConfig())