Skip to content

Commit 87d166a

Browse files
jeejeeleeeicherseiji
authored andcommitted
[Model] Remove quantized mixtral (vllm-project#24437)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
1 parent 832f314 commit 87d166a

File tree

4 files changed

+0
-472
lines changed

4 files changed

+0
-472
lines changed

tests/models/registry.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,6 @@ def check_available_online(
285285
"MistralForCausalLM": _HfExamplesInfo("mistralai/Mistral-7B-Instruct-v0.1"),
286286
"MixtralForCausalLM": _HfExamplesInfo("mistralai/Mixtral-8x7B-Instruct-v0.1", # noqa: E501
287287
{"tiny": "TitanML/tiny-mixtral"}), # noqa: E501
288-
"QuantMixtralForCausalLM": _HfExamplesInfo("mistral-community/Mixtral-8x22B-v0.1-AWQ"), # noqa: E501
289288
"MptForCausalLM": _HfExamplesInfo("mpt", is_available_online=False),
290289
"MPTForCausalLM": _HfExamplesInfo("mosaicml/mpt-7b"),
291290
"NemotronForCausalLM": _HfExamplesInfo("nvidia/Minitron-8B-Base"),

vllm/model_executor/model_loader/utils.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -169,22 +169,6 @@ def get_model_architecture(
169169
model_config: ModelConfig) -> tuple[type[nn.Module], str]:
170170
architectures = getattr(model_config.hf_config, "architectures", [])
171171

172-
# Special handling for quantized Mixtral.
173-
# FIXME(woosuk): This is a temporary hack.
174-
mixtral_supported = [
175-
"fp8",
176-
"compressed-tensors",
177-
"gptq_marlin",
178-
"awq_marlin",
179-
"quark",
180-
"bitsandbytes",
181-
]
182-
183-
if (model_config.quantization is not None
184-
and model_config.quantization not in mixtral_supported
185-
and "MixtralForCausalLM" in architectures):
186-
architectures = ["QuantMixtralForCausalLM"]
187-
188172
model_cls, arch = model_config.registry.resolve_model_cls(
189173
architectures,
190174
model_config=model_config,

0 commit comments

Comments
 (0)