intel · n1ck-guo · Jan 24, 2025 · Jan 24, 2025 · Jan 24, 2025
diff --git a/test_cuda/test_support_vlms.py b/test_cuda/test_support_vlms.py
@@ -35,7 +35,7 @@ def test_qwen2(self):
         from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
         model = Qwen2VLForConditionalGeneration.from_pretrained(
             quantized_model_path,
-            torch_dtype="auto",
+            torch_dtype="float16",
             device_map=f"cuda:{self.device}",
         )
         processor = AutoProcessor.from_pretrained(quantized_model_path)
@@ -92,7 +92,7 @@ def test_phi3(self):
             quantized_model_path, 
             device_map=f"cuda:{self.device}", 
             trust_remote_code=True, 
-            torch_dtype="auto"
+            torch_dtype="float16",
             )
         processor = AutoProcessor.from_pretrained(quantized_model_path, 
         trust_remote_code=True, 
@@ -244,7 +244,7 @@ def test_llama(self):
         quantized_model_path = os.path.join(self.save_dir, "Llama-3.2-11B-Vision-Instruct-w4g128-auto_round")
         model = MllamaForConditionalGeneration.from_pretrained(
             quantized_model_path,
-            torch_dtype="auto",
+            torch_dtype="float16",
             device_map=f"cuda:{self.device}",
         )
         processor = AutoProcessor.from_pretrained(quantized_model_path)
@@ -288,7 +288,7 @@ def test_cogvlm(self):
         )
         model = AutoModelForCausalLM.from_pretrained(
             quantized_model_path,
-            torch_dtype="auto",
+            torch_dtype="float16",
             trust_remote_code=True,
             device_map=DEVICE,
         ).to(DEVICE).eval()
@@ -352,7 +352,7 @@ def test_deepseek_vl2(self):
             quantized_model_path,
             trust_remote_code=True,
             device_map=f"cuda:{self.device}",
-            torch_dtype="auto",
+            torch_dtype="float16",
         )
         vl_gpt = vl_gpt.eval()
 
@@ -399,3 +399,4 @@ def test_deepseek_vl2(self):
 
 if __name__ == "__main__":
     unittest.main()
+
diff --git a/test_cuda/test_vlms.py b/test_cuda/test_vlms.py
@@ -1,4 +1,4 @@
-import copy
+iimport copy
 import shutil
 import sys
 import unittest
@@ -46,7 +46,7 @@ def qwen_inference(self, quantized_model_dir):
         processor = AutoProcessor.from_pretrained(quantized_model_dir, trust_remote_code=True)
         model = Qwen2VLForConditionalGeneration.from_pretrained(
             quantized_model_dir,
-            torch_dtype="auto",
+            torch_dtype="float16",
             device_map="auto",
             ##revision="df7f44c" ##AutoGPTQ format
         )
@@ -120,7 +120,7 @@ def phi3_infernece(self, quantized_model_dir):
             quantized_model_path, 
             device_map="auto", 
             trust_remote_code=True, 
-            torch_dtype="auto"
+            torch_dtype="float16",
             )
         processor = AutoProcessor.from_pretrained(quantized_model_path, 
         trust_remote_code=True, 
@@ -198,3 +198,4 @@ def test_quant_not_text_fp_layers(self):
 
 if __name__ == "__main__":
     unittest.main()
+