huggingface · BenjaminBossan · Jun 13, 2024 · Jun 12, 2024 · Jun 12, 2024
diff --git a/src/peft/tuners/adalora/layer.py b/src/peft/tuners/adalora/layer.py
@@ -35,7 +35,8 @@ class AdaLoraLayer(LoraLayer):
     # List all names of layers that may contain adapter weights
     # Note: ranknum doesn't need to be included as it is not an nn.Module
     adapter_layer_names = ("lora_A", "lora_B", "lora_E", "lora_embedding_A", "lora_embedding_B")
-    # other_param_names is defined in LoraLayer
+    # All names of other parameters that may contain adapter-related parameters
+    other_param_names = ("r", "lora_alpha", "scaling", "lora_dropout", "ranknum")
 
     def __init__(self, base_layer: nn.Module) -> None:
         super().__init__(base_layer)

diff --git a/tests/test_common_gpu.py b/tests/test_common_gpu.py
@@ -1085,6 +1085,7 @@ def test_8bit_dora_merging(self):
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires a CUDA GPU")
+@pytest.mark.single_gpu_tests
 class TestSameAdapterDifferentDevices:
     # 1639
     # The original issue comes down to the following problem: If the user has a base layer on CUDA, moves the adapter to