diff --git a/src/peft/tuners/lora/layer.py b/src/peft/tuners/lora/layer.py
index 2dc8b2e68a..3972012de0 100644
--- a/src/peft/tuners/lora/layer.py
+++ b/src/peft/tuners/lora/layer.py
@@ -194,6 +194,7 @@ def loftq_init(self, adapter_name):
             "num_bits": self.kwargs.get("loftq_bits", 4),
             "reduced_rank": self.r[adapter_name],
             "num_iter": self.kwargs.get("loftq_iter", 1),
+            "scaling": self.scaling[adapter_name]
         }
 
         qweight, lora_A, lora_B = loftq_init(weight, **kwargs)
diff --git a/src/peft/utils/loftq_utils.py b/src/peft/utils/loftq_utils.py
index f8323485a7..e8681a5124 100644
--- a/src/peft/utils/loftq_utils.py
+++ b/src/peft/utils/loftq_utils.py
@@ -187,7 +187,7 @@ def _low_rank_decomposition(weight, reduced_rank=32):
 
 
 @torch.no_grad()
-def loftq_init(weight: Union[torch.Tensor, torch.nn.Parameter], num_bits: int, reduced_rank: int, num_iter=1):
+def loftq_init(weight: Union[torch.Tensor, torch.nn.Parameter], num_bits: int, reduced_rank: int, num_iter=1, scaling=1):
     if is_bnb_available():
         import bitsandbytes as bnb
     else:
@@ -233,7 +233,7 @@ def loftq_init(weight: Union[torch.Tensor, torch.nn.Parameter], num_bits: int, r
         L, R, reduced_rank = output["L"], output["R"], output["reduced_rank"]
         res = weight - torch.mm(L, R)
 
-    lora_A, lora_B = R, L
+    lora_A, lora_B = R, L/scaling
 
     return dequantized_weight.to(device=device, dtype=dtype), lora_A, lora_B