huggingface · younesbelkada · Jun 20, 2023 · Jun 20, 2023
diff --git a/tests/test_common_gpu.py b/tests/test_common_gpu.py
@@ -54,6 +54,8 @@ def tearDown(self):
         gc.collect()
 
     @require_bitsandbytes
+    @pytest.mark.multi_gpu_tests
+    @pytest.mark.single_gpu_tests
     def test_lora_bnb_8bit_quantization(self):
         r"""
         Test that tests if the 8bit quantization using LoRA works as expected
@@ -104,6 +106,7 @@ def test_lora_bnb_8bit_quantization(self):
 
     @require_bitsandbytes
     @pytest.mark.multi_gpu_tests
+    @pytest.mark.single_gpu_tests
     def test_lora_bnb_4bit_quantization_from_pretrained_safetensors(self):
         r"""
         Test that tests if the 4bit quantization using LoRA works as expected with safetensors weights.
@@ -114,9 +117,11 @@ def test_lora_bnb_4bit_quantization_from_pretrained_safetensors(self):
         model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
         model = PeftModel.from_pretrained(model, peft_model_id)
 
-        _ = model.generate(torch.LongTensor([[0, 2, 3, 1]]).to(0))
+        _ = model.generate(input_ids=torch.LongTensor([[0, 2, 3, 1]]).to(0))
 
     @require_bitsandbytes
+    @pytest.mark.multi_gpu_tests
+    @pytest.mark.single_gpu_tests
     def test_lora_bnb_4bit_quantization(self):
         r"""
         Test that tests if the 4bit quantization using LoRA works as expected