diff --git a/src/peft/tuners/lora/layer.py b/src/peft/tuners/lora/layer.py index 2dc8b2e68a..3972012de0 100644 --- a/src/peft/tuners/lora/layer.py +++ b/src/peft/tuners/lora/layer.py @@ -194,6 +194,7 @@ def loftq_init(self, adapter_name): "num_bits": self.kwargs.get("loftq_bits", 4), "reduced_rank": self.r[adapter_name], "num_iter": self.kwargs.get("loftq_iter", 1), + "scaling": self.scaling[adapter_name] } qweight, lora_A, lora_B = loftq_init(weight, **kwargs) diff --git a/src/peft/utils/loftq_utils.py b/src/peft/utils/loftq_utils.py index f8323485a7..e8681a5124 100644 --- a/src/peft/utils/loftq_utils.py +++ b/src/peft/utils/loftq_utils.py @@ -187,7 +187,7 @@ def _low_rank_decomposition(weight, reduced_rank=32): @torch.no_grad() -def loftq_init(weight: Union[torch.Tensor, torch.nn.Parameter], num_bits: int, reduced_rank: int, num_iter=1): +def loftq_init(weight: Union[torch.Tensor, torch.nn.Parameter], num_bits: int, reduced_rank: int, num_iter=1, scaling=1): if is_bnb_available(): import bitsandbytes as bnb else: @@ -233,7 +233,7 @@ def loftq_init(weight: Union[torch.Tensor, torch.nn.Parameter], num_bits: int, r L, R, reduced_rank = output["L"], output["R"], output["reduced_rank"] res = weight - torch.mm(L, R) - lora_A, lora_B = R, L + lora_A, lora_B = R, L/scaling return dequantized_weight.to(device=device, dtype=dtype), lora_A, lora_B