File tree Expand file tree Collapse file tree 4 files changed +0
-472
lines changed Expand file tree Collapse file tree 4 files changed +0
-472
lines changed Original file line number Diff line number Diff line change @@ -285,7 +285,6 @@ def check_available_online(
285285 "MistralForCausalLM" : _HfExamplesInfo ("mistralai/Mistral-7B-Instruct-v0.1" ),
286286 "MixtralForCausalLM" : _HfExamplesInfo ("mistralai/Mixtral-8x7B-Instruct-v0.1" , # noqa: E501
287287 {"tiny" : "TitanML/tiny-mixtral" }), # noqa: E501
288- "QuantMixtralForCausalLM" : _HfExamplesInfo ("mistral-community/Mixtral-8x22B-v0.1-AWQ" ), # noqa: E501
289288 "MptForCausalLM" : _HfExamplesInfo ("mpt" , is_available_online = False ),
290289 "MPTForCausalLM" : _HfExamplesInfo ("mosaicml/mpt-7b" ),
291290 "NemotronForCausalLM" : _HfExamplesInfo ("nvidia/Minitron-8B-Base" ),
Original file line number Diff line number Diff line change @@ -169,22 +169,6 @@ def get_model_architecture(
169169 model_config : ModelConfig ) -> tuple [type [nn .Module ], str ]:
170170 architectures = getattr (model_config .hf_config , "architectures" , [])
171171
172- # Special handling for quantized Mixtral.
173- # FIXME(woosuk): This is a temporary hack.
174- mixtral_supported = [
175- "fp8" ,
176- "compressed-tensors" ,
177- "gptq_marlin" ,
178- "awq_marlin" ,
179- "quark" ,
180- "bitsandbytes" ,
181- ]
182-
183- if (model_config .quantization is not None
184- and model_config .quantization not in mixtral_supported
185- and "MixtralForCausalLM" in architectures ):
186- architectures = ["QuantMixtralForCausalLM" ]
187-
188172 model_cls , arch = model_config .registry .resolve_model_cls (
189173 architectures ,
190174 model_config = model_config ,
You can’t perform that action at this time.
0 commit comments