huggingface · BenjaminBossan · Nov 27, 2024 · Nov 26, 2024 · Nov 26, 2024 · Nov 26, 2024
diff --git a/src/peft/tuners/lora/aqlm.py b/src/peft/tuners/lora/aqlm.py
@@ -35,13 +35,27 @@ def __init__(
         lora_dropout: float = 0.0,
         init_lora_weights: bool = True,
         use_rslora: bool = False,
+        use_dora: bool = False,
+        lora_bias: bool = False,
         **kwargs,
     ):
+        if use_dora:
+            raise ValueError(f"{self.__class__.__name__} does not support DoRA yet, please set it to False")
+
         super().__init__()
         LoraLayer.__init__(self, base_layer)
 
         self._active_adapter = adapter_name
-        self.update_layer(adapter_name, r, lora_alpha, lora_dropout, init_lora_weights, use_rslora)
+        self.update_layer(
+            adapter_name,
+            r,
+            lora_alpha=lora_alpha,
+            lora_dropout=lora_dropout,
+            init_lora_weights=init_lora_weights,
+            use_rslora=use_rslora,
+            use_dora=use_dora,
+            lora_bias=lora_bias,
+        )
 
     def forward(self, x: torch.Tensor):
         # note: logic differs from default Linear because merging is not supported

diff --git a/src/peft/tuners/lora/awq.py b/src/peft/tuners/lora/awq.py
@@ -36,8 +36,13 @@ def __init__(
         lora_dropout: float = 0.0,
         init_lora_weights: bool = True,
         use_rslora: bool = False,
+        use_dora: bool = False,
+        lora_bias: bool = False,
         **kwargs,
     ):
+        if use_dora:
+            raise ValueError(f"{self.__class__.__name__} does not support DoRA yet, please set it to False")
+
         super().__init__()
         LoraLayer.__init__(self, base_layer)
 
@@ -46,7 +51,16 @@ def __init__(
         self.quant_linear_module = base_layer
 
         self._active_adapter = adapter_name
-        self.update_layer(adapter_name, r, lora_alpha, lora_dropout, init_lora_weights, use_rslora)
+        self.update_layer(
+            adapter_name,
+            r,
+            lora_alpha=lora_alpha,
+            lora_dropout=lora_dropout,
+            init_lora_weights=init_lora_weights,
+            use_rslora=use_rslora,
+            use_dora=use_dora,
+            lora_bias=lora_bias,
+        )
 
     def forward(self, x: torch.Tensor):
         result = self.quant_linear_module(x)

diff --git a/src/peft/tuners/lora/bnb.py b/src/peft/tuners/lora/bnb.py
@@ -41,6 +41,7 @@ def __init__(
             init_lora_weights: bool = True,
             use_rslora: bool = False,
             use_dora: bool = False,
+            lora_bias: bool = False,
             **kwargs,
         ) -> None:
             super().__init__()
@@ -56,6 +57,7 @@ def __init__(
                 init_lora_weights=init_lora_weights,
                 use_rslora=use_rslora,
                 use_dora=use_dora,
+                lora_bias=lora_bias,
             )
 
         def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = None) -> None:
@@ -118,6 +120,14 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = N
                 self.get_base_layer().weight = bnb.nn.Int8Params(
                     w_data.to("cpu"), requires_grad=False, has_fp16_weights=weight.has_fp16_weights
                 ).to(weight.device)
+                if self.lora_bias[active_adapter]:
+                    bias_data = self.get_base_layer().bias.data = self.lora_B[active_adapter].bias
+                    if safe_merge and not torch.isfinite(bias_data):
+                        raise ValueError(
+                            f"NaNs detected in the merged weights. The adapter {active_adapter} seems to be broken"
+                        )
+                    self.get_base_layer().bias.data = bias_data
+
                 state.reset_grads()
                 self.merged_adapters.append(active_adapter)
 
@@ -154,6 +164,9 @@ def unmerge(self) -> None:
                 self.get_base_layer().weight = bnb.nn.Int8Params(
                     w_data.to("cpu"), requires_grad=False, has_fp16_weights=weight.has_fp16_weights
                 ).to(weight.device)
+
+                if self.lora_bias[active_adapter]:
+                    self.get_base_layer().bias.data -= self.lora_B[active_adapter].bias
                 state.reset_grads()
 
         def get_delta_weight(self, adapter):
@@ -298,6 +311,7 @@ def __init__(
             init_lora_weights: bool = True,
             use_rslora: bool = False,
             use_dora: bool = False,
+            lora_bias: bool = False,
             **kwargs,
         ) -> None:
             super().__init__()
@@ -313,6 +327,7 @@ def __init__(
                 init_lora_weights=init_lora_weights,
                 use_rslora=use_rslora,
                 use_dora=use_dora,
+                lora_bias=lora_bias,
             )
 
         def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = None) -> None:
@@ -372,6 +387,14 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = N
                 kwargs["requires_grad"] = False
                 kwargs.pop("data", None)
                 self.get_base_layer().weight = bnb.nn.Params4bit(w_data.to("cpu"), **kwargs).to(weight.device)
+                if self.lora_bias[active_adapter]:
+                    bias_data = self.get_base_layer().bias.data = self.lora_B[active_adapter].bias
+                    if safe_merge and not torch.isfinite(bias_data):
+                        raise ValueError(
+                            f"NaNs detected in the merged weights. The adapter {active_adapter} seems to be broken"
+                        )
+                    self.get_base_layer().bias.data = bias_data
+
                 self.merged_adapters.append(active_adapter)
 
         def unmerge(self) -> None:
@@ -407,6 +430,8 @@ def unmerge(self) -> None:
                 kwargs["requires_grad"] = False
                 kwargs.pop("data", None)
                 self.get_base_layer().weight = bnb.nn.Params4bit(w_data.to("cpu"), **kwargs).to(weight.device)
+                if self.lora_bias[active_adapter]:
+                    self.get_base_layer().bias.data -= self.lora_B[active_adapter].bias
 
         def get_delta_weight(self, adapter):
             return (

diff --git a/src/peft/tuners/lora/config.py b/src/peft/tuners/lora/config.py
@@ -212,6 +212,10 @@ class LoraConfig(PeftConfig):
             all have separate LoRA adapters attached to them.
         runtime_config (`LoraRuntimeConfig`):
             Runtime configurations (which are not saved or restored).
+        lora_bias (`bool`):
+            Defaults to `False`. Whether to enable the bias term for the LoRA B parameter. Typically, this should be
+            disabled. The main use case for this is when the LoRA weights were extracted from fully fine-tuned
+            parameters so the bias of those parameters can be taken into account.
     """
 
     r: int = field(default=8, metadata={"help": "Lora attention dimension"})
@@ -391,6 +395,16 @@ class LoraConfig(PeftConfig):
     runtime_config: LoraRuntimeConfig = field(
         default_factory=LoraRuntimeConfig, metadata={"help": "Runtime configurations"}
     )
+    lora_bias: bool = field(
+        default=False,
+        metadata={
+            "help": (
+                "Whether to enable the bias term for the LoRA B parameter. Typically, this should be disabled. The "
+                "main use case for this is when the LoRA weights were extracted from fully fine-tuned parameters so "
+                "the bias of those parameters can be taken into account."
+            )
+        },
+    )
 
     def to_dict(self):
         """
@@ -446,6 +460,15 @@ def __post_init__(self):
         elif self.init_lora_weights != "eva" and self.eva_config is not None:
             warnings.warn("`eva_config` specified but will be ignored when `init_lora_weights` is not 'eva'.")
 
+        if self.lora_bias:
+            if self.init_lora_weights not in (True, False):
+                raise ValueError(
+                    f"The argument lora_bias=True is only supported with init_lora_weights=True or False, got "
+                    f"init_lora_weights={self.init_lora_weights} instead."
+                )
+            if self.use_dora:
+                raise ValueError("The argument lora_bias=True is not supported for DoRA, please pass use_dora=False")
+
         # Using post training conversion of modified base weights to restore their initial values (PiSSA, OLoRA) cannot
         # be correctly done when using rslora + rank_pattern/alpha_pattern. We can't really know if the user intends
         # this when they'll eventually call save_pretrained (i.e. if they'll pass

diff --git a/src/peft/tuners/lora/eetq.py b/src/peft/tuners/lora/eetq.py
@@ -33,8 +33,13 @@ def __init__(
             lora_dropout: float = 0.0,
             init_lora_weights: bool = True,
             use_rslora: bool = False,
+            use_dora: bool = False,
+            lora_bias: bool = False,
             **kwargs,
         ):
+            if use_dora:
+                raise ValueError(f"{self.__class__.__name__} does not support DoRA yet, please set it to False")
+
             super().__init__()
             LoraLayer.__init__(self, base_layer)
 
@@ -43,7 +48,16 @@ def __init__(
             self.quant_linear_module = base_layer
 
             self._active_adapter = adapter_name
-            self.update_layer(adapter_name, r, lora_alpha, lora_dropout, init_lora_weights, use_rslora)
+            self.update_layer(
+                adapter_name,
+                r,
+                lora_alpha=lora_alpha,
+                lora_dropout=lora_dropout,
+                init_lora_weights=init_lora_weights,
+                use_rslora=use_rslora,
+                use_dora=use_dora,
+                lora_bias=lora_bias,
+            )
 
         def forward(self, x: torch.Tensor):
             result = self.quant_linear_module(x)

diff --git a/src/peft/tuners/lora/eva.py b/src/peft/tuners/lora/eva.py
@@ -497,6 +497,7 @@ def _load_eva_state_dict(
         "lora_dropout": peft_config.lora_dropout,
         "use_rslora": peft_config.use_rslora,
         "use_dora": peft_config.use_dora,
+        "lora_bias": peft_config.lora_bias,
     }
     missing_eva_inits = []
     new_target_modules = []

diff --git a/src/peft/tuners/lora/gptq.py b/src/peft/tuners/lora/gptq.py
@@ -32,6 +32,7 @@ def __init__(
         init_lora_weights: bool = True,
         use_rslora: bool = False,
         use_dora: bool = False,
+        lora_bias: bool = False,
         **kwargs,
     ):
         super().__init__()
@@ -52,6 +53,7 @@ def __init__(
             init_lora_weights=init_lora_weights,
             use_rslora=use_rslora,
             use_dora=use_dora,
+            lora_bias=lora_bias,
         )
 
     def forward(self, x: torch.Tensor):

diff --git a/src/peft/tuners/lora/hqq.py b/src/peft/tuners/lora/hqq.py
@@ -41,8 +41,12 @@ def __init__(
             init_lora_weights: bool = True,
             use_rslora: bool = False,
             use_dora: bool = False,
+            lora_bias: bool = False,
             **kwargs,
         ) -> None:
+            if lora_bias:
+                raise ValueError(f"{self.__class__.__name__} does not support lora_bias yet, set it to False")
+
             super().__init__()
             LoraLayer.__init__(self, base_layer)
             self.fan_in_fan_out = False
@@ -56,6 +60,7 @@ def __init__(
                 init_lora_weights=init_lora_weights,
                 use_rslora=use_rslora,
                 use_dora=use_dora,
+                lora_bias=lora_bias,
             )
 
         def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = None) -> None: