FIX: More VeRA tests, fix tests, more checks (#1900)

BenjaminBossan · BenjaminBossan · web-flow · commit ba75bb14d115 · 2024-07-22T19:12:15.000+05:30
* FIX More VeRA tests, fix tests, more checks

- Fixes incorrect config for VeRA in a test
- Add VeRA to multi-adapter tests
- Add more checks on the VeRA A/B shapes

The latter becomes necessary when we add more than one VeRA adapter. The
shapes for VeRA A and B are only determined once, when the first VeRA
adapter is created. After that, they are fixed. However, users may add a
second VeRA adapter. As long as that adapter targets the same layers and
has the same rank, we're good. But if it targets other, bigger layers,
or if it has increased rank, the shapes of VeRA A and/or VeRA B will be
too small, resulting in an error during the forward pass. To prevent
this, we already check the shapes during initialization of the new
adapter and raise an error right away.

* Revier feedback: wording, better error message

* Reviewer feedback: Clarify tests

---------

Co-authored-by: BenjaminBossan &lt;b.bossan@gmail.com&gt;
diff --git a/src/peft/tuners/vera/layer.py b/src/peft/tuners/vera/layer.py
@@ -100,6 +100,28 @@ def update_layer(
             # we can take any of the existing adapter's parameters, as they should all be identical
             vera_A_param = list(self.vera_A.values())[0]
             vera_B_param = list(self.vera_B.values())[0]
+
+            error_tmpl = (
+                "{} has a size of {} but {} or greater is required; this probably happened because an additional VeRA "
+                "adapter was added after the first one with incompatible shapes."
+            )
+            # check input size
+            if vera_A_param.shape[1] < self.in_features:
+                raise ValueError(error_tmpl.format("vera_A", vera_A_param.shape[1], self.in_features))
+            # check output size
+            if vera_B_param.shape[0] < self.out_features:
+                raise ValueError(error_tmpl.format("vera_B", vera_B_param.shape[0], self.out_features))
+            # check r
+            error_tmpl = (
+                "{} has a size of {} but {} or greater is required; this probably happened because an additional VeRA "
+                "adapter with a lower rank was added after the first one; loading the adapters "
+                "in reverse order may solve this."
+            )
+            if vera_A_param.shape[0] < self.r[adapter_name]:
+                raise ValueError(error_tmpl.format("vera_A", vera_A_param.shape[0], self.r[adapter_name]))
+            if vera_B_param.shape[1] < self.r[adapter_name]:
+                raise ValueError(error_tmpl.format("vera_B", vera_B_param.shape[1], self.r[adapter_name]))
+
             self.vera_A[adapter_name] = vera_A_param
             self.vera_B[adapter_name] = vera_B_param
 
diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py
@@ -391,6 +391,13 @@
     ),
 ]
 
+# For this test matrix, each tuple consists of:
+# - test name
+# - tuner method
+# - config_cls
+# - 1st config kwargs
+# - 2nd config kwargs
+# The model used for this test is `MLP`, which uses linear layers `lin0` and `lin1`
 MULTIPLE_ACTIVE_ADAPTERS_TEST_CASES = [
     (
         "LoRA Same",
@@ -464,6 +471,16 @@
         {"n_frequency": 10, "target_modules": ["lin0"]},
         {"n_frequency": 10, "target_modules": ["lin1"]},
     ),
+    # Note: Currently, we cannot target lin0 and lin1 with different adapters when using VeRA. The reason is that the
+    # first adapter being created will result in a vera_A or vera_B shape that is too small for the next adapter
+    # (remember that VeRA shares these parameters across all layers), which results in an error.
+    (
+        "VeRA Same",
+        "vera",
+        VeraConfig,
+        {"target_modules": ["lin0"], "init_weights": False},
+        {"target_modules": ["lin0"], "init_weights": False},
+    ),
     (
         "HRA Same",
         "hra",
@@ -479,6 +496,7 @@
         {"target_modules": ["lin1"], "init_weights": False},
     ),
 ]
+
 PREFIXES = {
     IA3Config: "ia3_",
     LoraConfig: "lora_",
diff --git a/tests/test_initialization.py b/tests/test_initialization.py
@@ -1098,21 +1098,65 @@ def test_use_prompt_tuning_init_text_raises(self):
         with pytest.raises(ValueError, match="When prompt_tuning_init='TEXT', prompt_tuning_init_text can't be None"):
             PromptTuningConfig(prompt_tuning_init="TEXT", tokenizer_name_or_path="t5-base")
 
+
+class TestVeraInitialization:
+    torch_device = infer_device()
+
+    def get_model(self):
+        class MLP(nn.Module):
+            def __init__(self, bias=True):
+                super().__init__()
+                self.lin0 = nn.Linear(10, 20, bias=bias)
+                self.lin1 = nn.Linear(20, 2, bias=bias)
+
+            def forward(self, X):
+                X = self.lin0(X)
+                X = self.lin1(X)
+                return X
+
+        return MLP().to(self.torch_device)
+
     def test_vera_mixing_save_projection_raises(self):
         # it is unclear what the right thing to do would be if some adapters save the projection weights and some don't
         # so we better raise an error
 
-        config0 = VeraConfig(target_modules="linear", init_weights=False, save_projection=True)
+        config0 = VeraConfig(target_modules=["lin0"], init_weights=False, save_projection=True)
         model = self.get_model()
         model = get_peft_model(model, config0)
-        config1 = VeraConfig(target_modules="linear", init_weights=False, save_projection=False)
+        config1 = VeraConfig(target_modules=["lin0"], init_weights=False, save_projection=False)
         msg = re.escape(
             "VeRA projection weights must be saved for all adapters or none, but got multiple different values: "
             "[False, True]"
         )
         with pytest.raises(ValueError, match=msg):
             model.add_adapter("other", config1)
 
+    def test_vera_add_second_adapter_with_incompatible_input_shape(self):
+        config0 = VeraConfig(target_modules=["lin0"], r=8)
+        config1 = VeraConfig(target_modules=["lin1"])
+
+        base_model = self.get_model()
+        lin0_in_feat = base_model.lin0.in_features
+        lin1_in_feat = base_model.lin1.in_features
+        model = get_peft_model(base_model, config0)
+        # not full message but enough to identify the error
+        msg = f"vera_A has a size of {lin0_in_feat} but {lin1_in_feat} or greater is required"
+        with pytest.raises(ValueError, match=msg):
+            model.add_adapter("other", config1)
+
+    def test_vera_add_second_adapter_with_higher_rank(self):
+        rank0 = 123
+        rank1 = 456
+        config0 = VeraConfig(target_modules=["lin0"], r=rank0)
+        # second adapter has higher rank
+        config1 = VeraConfig(target_modules=["lin0"], r=rank1)
+
+        model = get_peft_model(self.get_model(), config0)
+        # not full message but enough to identify the error
+        msg = f"vera_A has a size of {rank0} but {rank1} or greater is required"
+        with pytest.raises(ValueError, match=msg):
+            model.add_adapter("other", config1)
+
 
 class TestNoInfiniteRecursionDeepspeed:
     # see #1892 for details