diff --git a/src/transformers/models/auto/tokenization_auto.py b/src/transformers/models/auto/tokenization_auto.py index a861aee12c57..b7fe884af075 100644 --- a/src/transformers/models/auto/tokenization_auto.py +++ b/src/transformers/models/auto/tokenization_auto.py @@ -1096,6 +1096,12 @@ def from_pretrained( trust_remote_code, pretrained_model_name_or_path, has_local_code, has_remote_code, upstream_repo ) + # Detect missing dependency for Voxtral early and provide a clear error message + if getattr(config, "model_type", None) == "voxtral" and not is_mistral_common_available(): + raise ImportError( + "The Voxtral tokenizer requires the 'mistral-common' package. Please install it using `pip install mistral-common`." + ) + if has_remote_code and trust_remote_code: tokenizer_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, **kwargs) _ = kwargs.pop("code_revision", None) diff --git a/tests/models/voxtral/test_tokenization_voxtral.py b/tests/models/voxtral/test_tokenization_voxtral.py new file mode 100644 index 000000000000..d227eae7886c --- /dev/null +++ b/tests/models/voxtral/test_tokenization_voxtral.py @@ -0,0 +1,17 @@ +import pytest + +from transformers import AutoTokenizer +from transformers.models.voxtral import VoxtralConfig +import transformers.models.auto.tokenization_auto as ta + + +def test_voxtral_tokenizer_requires_mistral_common(monkeypatch): + # Simulate that mistral_common is not available for the auto-tokenizer logic + monkeypatch.setattr(ta, "is_mistral_common_available", lambda: False) + # Avoid network access by short-circuiting tokenizer_config retrieval + monkeypatch.setattr(ta, "get_tokenizer_config", lambda *args, **kwargs: {}) + with pytest.raises(ImportError, match="mistral-common"): + # Using a dummy path since the guard should raise before any file access + AutoTokenizer.from_pretrained("dummy", config=VoxtralConfig()) + +