From 4337f2958a7f822b249cd02f9048f1190a6cfb76 Mon Sep 17 00:00:00 2001 From: "Zhang, Weiwei1" Date: Fri, 24 Jan 2025 17:28:48 +0800 Subject: [PATCH 1/2] fix cuda UT torch_dtype Signed-off-by: Zhang, Weiwei1 --- test_cuda/test_support_vlms.py | 11 ++++++----- test_cuda/test_vlms.py | 7 ++++--- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/test_cuda/test_support_vlms.py b/test_cuda/test_support_vlms.py index a2627df3..b84836a8 100644 --- a/test_cuda/test_support_vlms.py +++ b/test_cuda/test_support_vlms.py @@ -35,7 +35,7 @@ def test_qwen2(self): from transformers import Qwen2VLForConditionalGeneration, AutoProcessor model = Qwen2VLForConditionalGeneration.from_pretrained( quantized_model_path, - torch_dtype="auto", + torch_dtype="float16", device_map=f"cuda:{self.device}", ) processor = AutoProcessor.from_pretrained(quantized_model_path) @@ -92,7 +92,7 @@ def test_phi3(self): quantized_model_path, device_map=f"cuda:{self.device}", trust_remote_code=True, - torch_dtype="auto" + torch_dtype="float16", ) processor = AutoProcessor.from_pretrained(quantized_model_path, trust_remote_code=True, @@ -244,7 +244,7 @@ def test_llama(self): quantized_model_path = os.path.join(self.save_dir, "Llama-3.2-11B-Vision-Instruct-w4g128-auto_round") model = MllamaForConditionalGeneration.from_pretrained( quantized_model_path, - torch_dtype="auto", + torch_dtype="float16", device_map=f"cuda:{self.device}", ) processor = AutoProcessor.from_pretrained(quantized_model_path) @@ -288,7 +288,7 @@ def test_cogvlm(self): ) model = AutoModelForCausalLM.from_pretrained( quantized_model_path, - torch_dtype="auto", + torch_dtype="float16", trust_remote_code=True, device_map=DEVICE, ).to(DEVICE).eval() @@ -352,7 +352,7 @@ def test_deepseek_vl2(self): quantized_model_path, trust_remote_code=True, device_map=f"cuda:{self.device}", - torch_dtype="auto", + torch_dtype="float16", ) vl_gpt = vl_gpt.eval() @@ -399,3 +399,4 @@ def test_deepseek_vl2(self): if __name__ == "__main__": unittest.main() + diff --git a/test_cuda/test_vlms.py b/test_cuda/test_vlms.py index b40322da..1044e363 100644 --- a/test_cuda/test_vlms.py +++ b/test_cuda/test_vlms.py @@ -1,4 +1,4 @@ -import copy +iimport copy import shutil import sys import unittest @@ -46,7 +46,7 @@ def qwen_inference(self, quantized_model_dir): processor = AutoProcessor.from_pretrained(quantized_model_dir, trust_remote_code=True) model = Qwen2VLForConditionalGeneration.from_pretrained( quantized_model_dir, - torch_dtype="auto", + torch_dtype="float16", device_map="auto", ##revision="df7f44c" ##AutoGPTQ format ) @@ -120,7 +120,7 @@ def phi3_infernece(self, quantized_model_dir): quantized_model_path, device_map="auto", trust_remote_code=True, - torch_dtype="auto" + torch_dtype="float16", ) processor = AutoProcessor.from_pretrained(quantized_model_path, trust_remote_code=True, @@ -198,3 +198,4 @@ def test_quant_not_text_fp_layers(self): if __name__ == "__main__": unittest.main() + From ead838d3bb86c9b5cae7f20491cabe8c16155daf Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Fri, 24 Jan 2025 06:24:00 -0500 Subject: [PATCH 2/2] fix Signed-off-by: n1ck-guo --- test_cuda/test_vlms.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test_cuda/test_vlms.py b/test_cuda/test_vlms.py index 1044e363..ab0438e0 100644 --- a/test_cuda/test_vlms.py +++ b/test_cuda/test_vlms.py @@ -1,4 +1,4 @@ -iimport copy +import copy import shutil import sys import unittest @@ -94,7 +94,7 @@ def test_vlm_tune(self): ## load the model model_name = "/models/Qwen2-VL-2B-Instruct" model = Qwen2VLForConditionalGeneration.from_pretrained( - model_name, trust_remote_code=True) + model_name, trust_remote_code=True, device_map="auto") tokenizer = AutoTokenizer.from_pretrained(model_name) processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)