From 41a71e320a70be5ab175855922195eb34b4f280c Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Fri, 29 Sep 2023 16:24:53 +0300
Subject: [PATCH 01/33] Initial commit for LoKr implementation

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 13f4e88a04..ced6e17882 100644
--- a/README.md
+++ b/README.md
@@ -32,6 +32,7 @@ Supported methods:
 5. AdaLoRA: [Adaptive Budget Allocation for Parameter-Efficient Fine-Tuning](https://arxiv.org/abs/2303.10512)  
 6. $(IA)^3$: [Few-Shot Parameter-Efficient Fine-Tuning is Better and Cheaper than In-Context Learning](https://arxiv.org/abs/2205.05638)
 7. MultiTask Prompt Tuning: [Multitask Prompt Tuning Enables Parameter-Efficient Transfer Learning](https://arxiv.org/abs/2303.02861)
+8. LoKr: [KronA: Parameter Efficient Tuning with Kronecker Adapter](https://arxiv.org/abs/2212.10650) based on [Navigating Text-To-Image Customization:From LyCORIS Fine-Tuning to Model Evaluation](https://arxiv.org/abs/2309.14859) implementation
 
 ## Getting started
 

From 0f6089356dad288558b8ec38c875b330991492ae Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Tue, 3 Oct 2023 19:26:14 +0300
Subject: [PATCH 02/33] Added current implementation of LoKr

---
 src/peft/__init__.py             |   2 +
 src/peft/mapping.py              |   4 +
 src/peft/peft_model.py           |   2 +
 src/peft/tuners/__init__.py      |   1 +
 src/peft/tuners/lokr/__init__.py |  20 ++
 src/peft/tuners/lokr/config.py   | 130 +++++++++
 src/peft/tuners/lokr/layer.py    | 452 +++++++++++++++++++++++++++++++
 src/peft/tuners/lokr/model.py    | 285 +++++++++++++++++++
 src/peft/utils/peft_types.py     |   1 +
 src/peft/utils/save_and_load.py  |   6 +-
 10 files changed, 902 insertions(+), 1 deletion(-)
 create mode 100644 src/peft/tuners/lokr/__init__.py
 create mode 100644 src/peft/tuners/lokr/config.py
 create mode 100644 src/peft/tuners/lokr/layer.py
 create mode 100644 src/peft/tuners/lokr/model.py

diff --git a/src/peft/__init__.py b/src/peft/__init__.py
index a22fc87a08..53ba2bd568 100644
--- a/src/peft/__init__.py
+++ b/src/peft/__init__.py
@@ -51,6 +51,8 @@
     LoraModel,
     LoHaConfig,
     LoHaModel,
+    LoKrConfig,
+    LoKrModel,
     IA3Config,
     IA3Model,
     AdaLoraConfig,
diff --git a/src/peft/mapping.py b/src/peft/mapping.py
index ecb12be37c..f69e89ec3e 100644
--- a/src/peft/mapping.py
+++ b/src/peft/mapping.py
@@ -37,6 +37,8 @@
     IA3Model,
     LoHaConfig,
     LoHaModel,
+    LoKrConfig,
+    LoKrModel,
     LoraConfig,
     LoraModel,
     MultitaskPromptTuningConfig,
@@ -67,6 +69,7 @@
     "P_TUNING": PromptEncoderConfig,
     "LORA": LoraConfig,
     "LOHA": LoHaConfig,
+    "LOKR": LoKrConfig,
     "ADALORA": AdaLoraConfig,
     "IA3": IA3Config,
     "MULTITASK_PROMPT_TUNING": MultitaskPromptTuningConfig,
@@ -75,6 +78,7 @@
 PEFT_TYPE_TO_TUNER_MAPPING = {
     "LORA": LoraModel,
     "LOHA": LoHaModel,
+    "LOKR": LoKrModel,
     "ADALORA": AdaLoraModel,
     "IA3": IA3Model,
 }
diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
index 126a536d7e..c2c6959632 100644
--- a/src/peft/peft_model.py
+++ b/src/peft/peft_model.py
@@ -40,6 +40,7 @@
     AdaptionPromptModel,
     IA3Model,
     LoHaModel,
+    LoKrModel,
     LoraModel,
     MultitaskPromptEmbedding,
     PrefixEncoder,
@@ -68,6 +69,7 @@
 PEFT_TYPE_TO_MODEL_MAPPING = {
     PeftType.LORA: LoraModel,
     PeftType.LOHA: LoHaModel,
+    PeftType.LOKR: LoKrModel,
     PeftType.PROMPT_TUNING: PromptEmbedding,
     PeftType.P_TUNING: PromptEncoder,
     PeftType.PREFIX_TUNING: PrefixEncoder,
diff --git a/src/peft/tuners/__init__.py b/src/peft/tuners/__init__.py
index 20f0bb2b31..dd4c94b947 100644
--- a/src/peft/tuners/__init__.py
+++ b/src/peft/tuners/__init__.py
@@ -20,6 +20,7 @@
 from .adaption_prompt import AdaptionPromptConfig, AdaptionPromptModel
 from .lora import LoraConfig, LoraModel
 from .loha import LoHaConfig, LoHaModel
+from .lokr import LoKrConfig, LoKrModel
 from .ia3 import IA3Config, IA3Model
 from .adalora import AdaLoraConfig, AdaLoraModel
 from .p_tuning import PromptEncoder, PromptEncoderConfig, PromptEncoderReparameterizationType
diff --git a/src/peft/tuners/lokr/__init__.py b/src/peft/tuners/lokr/__init__.py
new file mode 100644
index 0000000000..bb138202fd
--- /dev/null
+++ b/src/peft/tuners/lokr/__init__.py
@@ -0,0 +1,20 @@
+# coding=utf-8
+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .config import LoKrConfig
+from .model import LoKrModel
+
+
+__all__ = ["LoKrConfig", "LoKrModel"]
diff --git a/src/peft/tuners/lokr/config.py b/src/peft/tuners/lokr/config.py
new file mode 100644
index 0000000000..eb36c5673f
--- /dev/null
+++ b/src/peft/tuners/lokr/config.py
@@ -0,0 +1,130 @@
+# coding=utf-8
+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass, field
+from typing import List, Optional, Union
+
+from peft.config import PeftConfig
+from peft.utils import PeftType
+
+
+@dataclass
+class LoKrConfig(PeftConfig):
+    """
+    This is the configuration class to store the configuration of a [`LoKrModel`].
+
+    Args:
+        r (`int`): LoKr rank.
+        alpha (`int`): The alpha parameter for LoKr scaling.
+        rank_dropout (`int`): The dropout probability for rank dimension during training.
+        module_dropout (`int`): The dropout probability for disabling LoHa modules during training.
+        use_effective_conv2d (`bool`):
+            Use parameter effective decomposition for Conv2d with ksize > 1 ("Proposition 3" from FedPara paper).
+        decompose_both (`bool`): Perform rank decomposition of left kronecker product matrix.
+        decompose_factor (`int`): Kronecker product decomposition factor.
+        target_modules (`Union[List[str],str]`): The names of the modules to apply LoKr to.
+        init_weights (`bool`): Whether to perform initialization of LoKr weights.
+        layers_to_transform (`Union[List[int],int]`):
+            The layer indexes to transform, if this argument is specified, it will apply the LoHa transformations on
+            the layer indexes that are specified in this list. If a single integer is passed, it will apply the LoKr
+            transformations on the layer at this index.
+        layers_pattern (`str`):
+            The layer pattern name, used only if `layers_to_transform` is different from `None` and if the layer
+            pattern is not in the common layers pattern.
+        rank_pattern (`dict`):
+            The mapping from layer names or regexp expression to ranks which are different from the default rank
+            specified by `r`.
+        alpha_pattern (`dict`):
+            The mapping from layer names or regexp expression to alphas which are different from the default alpha
+            specified by `alpha`.
+        modules_to_save (`List[str]`): The names of modules to be set as trainable except LoHa parameters.
+    """
+
+    r: int = field(default=8, metadata={"help": "LoHa rank"})
+    alpha: int = field(default=8, metadata={"help": "LoHa alpha"})
+    rank_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout probability for rank dimension during training"}
+    )
+    module_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout probability for disabling LoHa modules during training"}
+    )
+    use_effective_conv2d: bool = field(
+        default=False,
+        metadata={
+            "help": 'Use parameter effective decomposition for Conv2d 3x3 with ksize > 1 ("Proposition 3" from FedPara paper)'
+        },
+    )
+    decompose_both: bool = field(
+        default=False,
+        metadata={"help": "Perform rank decomposition of left kronecker product matrix."},
+    )
+    decompose_factor: int = field(default=-1, metadata={"help": "Kronecker product decomposition factor."})
+    target_modules: Optional[Union[List[str], str]] = field(
+        default=None,
+        metadata={
+            "help": "List of module names or regex expression of the module names to replace with LoHa."
+            "For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$' "
+        },
+    )
+    init_weights: bool = field(
+        default=True,
+        metadata={
+            "help": (
+                "Whether to initialize the weights of the LoHa layers with their default initialization. Don't change "
+                "this setting, except if you know exactly what you're doing."
+            ),
+        },
+    )
+    layers_to_transform: Optional[Union[List[int], int]] = field(
+        default=None,
+        metadata={
+            "help": "The layer indexes to transform, is this argument is specified, PEFT will transform only the layers indexes that are specified inside this list. If a single integer is passed, PEFT will transform only the layer at this index."
+        },
+    )
+    layers_pattern: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "The layer pattern name, used only if `layers_to_transform` is different to None and if the layer pattern is not in the common layers pattern."
+        },
+    )
+    rank_pattern: Optional[dict] = field(
+        default_factory=dict,
+        metadata={
+            "help": (
+                "The mapping from layer names or regexp expression to ranks which are different from the default rank specified by `r`. "
+                "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 8`}"
+            )
+        },
+    )
+    alpha_pattern: Optional[dict] = field(
+        default_factory=dict,
+        metadata={
+            "help": (
+                "The mapping from layer names or regexp expression to alphas which are different from the default alpha specified by `alpha`. "
+                "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 32`}"
+            )
+        },
+    )
+    modules_to_save: Optional[List[str]] = field(
+        default=None,
+        metadata={
+            "help": "List of modules apart from LoHA layers to be set as trainable and saved in the final checkpoint. "
+            "For example, in Sequence Classification or Token Classification tasks, "
+            "the final layer `classifier/score` are randomly initialized and as such need to be trainable and saved."
+        },
+    )
+
+    def __post_init__(self):
+        self.peft_type = PeftType.LOKR
diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py
new file mode 100644
index 0000000000..e78fc7c32f
--- /dev/null
+++ b/src/peft/tuners/lokr/layer.py
@@ -0,0 +1,452 @@
+# coding=utf-8
+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import warnings
+from itertools import chain
+from typing import Iterable, Optional, Tuple, Union
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from peft.tuners.tuners_utils import BaseTunerLayer
+
+
+class LoKrLayer(BaseTunerLayer, nn.Module):
+    # List all names of layers that may contain adapter weights
+    adapter_layer_names = [
+        "lokr_w1",
+        "lokr_w1_a",
+        "lokr_w1_b",
+        "lokr_w2",
+        "lokr_w2_a",
+        "lokr_w2_b",
+        "lokr_t2",
+    ]
+
+    def __init__(self):
+        super(nn.Module, self).__init__()
+
+        # LoKr info
+        self.r = {}
+        self.alpha = {}
+        self.scaling = {}
+        self.lokr_w1 = nn.ParameterDict({})
+        self.lokr_w1_a = nn.ParameterDict({})
+        self.lokr_w1_b = nn.ParameterDict({})
+        self.lokr_w2 = nn.ParameterDict({})
+        self.lokr_w2_a = nn.ParameterDict({})
+        self.lokr_w2_b = nn.ParameterDict({})
+        self.lokr_t2 = nn.ParameterDict({})
+        self.rank_dropout = {}
+        self.module_dropout = {}
+
+        # Tuner info
+        self.merged = False
+        self._disable_adapters = False
+        self.merged_adapters = []
+
+    def _init_empty_weights(self, cls, *args, **kwargs) -> None:
+        # A helper method that allows to initialize the layer of the given class without spending time to initialize the
+        # model weights. The implementation is inspired by
+        # https://pytorch.org/docs/stable/generated/torch.nn.utils.skip_init.html but this function cannot be used
+        # directly.
+        # Instead of this approach, it would be possible to bypass the __init__ of the class but that runs the risk of
+        # omitting important logic inside that __init__.
+        kwargs = kwargs.copy()
+        final_device = kwargs.pop("device", "cpu")
+        cls.__init__(self, *args, device="meta", **kwargs)
+        self.to_empty(device=final_device)
+
+    @property
+    def _available_adapters(self) -> Iterable[str]:
+        return set(
+            chain(
+                self.lokr_w1.keys(),
+                self.lokr_w1_a.keys(),
+                self.lokr_w1_b.keys(),
+                self.lokr_w2.keys(),
+                self.lokr_w2_a.keys(),
+                self.lokr_w2_b.keys(),
+                self.lokr_t2.keys(),
+            )
+        )
+
+    def create_lokr_parameters(
+        self,
+        adapter_name: str,
+        r: int,
+        shape,
+        use_w1: bool,
+        use_w2: bool,
+        use_effective_conv2d: bool,
+    ):
+        if use_w1:
+            self.lokr_w1[adapter_name] = nn.Parameter(torch.empty(shape[0][0], shape[1][0]))
+        else:
+            self.lokr_w1_a[adapter_name] = nn.Parameter(torch.empty(shape[0][0], r))
+            self.lokr_w1_b[adapter_name] = nn.Parameter(torch.empty(r, shape[1][0]))
+
+        if len(shape) == 4:
+            # Conv2d
+            if use_w2:
+                self.lokr_w2[adapter_name] = nn.Parameter(torch.empty(shape[0][1], shape[1][1], *shape[2:]))
+            elif use_effective_conv2d:
+                self.lokr_t2[adapter_name] = nn.Parameter(torch.empty(r, r, shape[2], shape[3]))
+                self.lokr_w2_a[adapter_name] = nn.Parameter(torch.empty(r, shape[0][1]))  # b, 1-mode
+                self.lokr_w2_b[adapter_name] = nn.Parameter(torch.empty(r, shape[1][1]))  # d, 2-mode
+            else:
+                self.lokr_w2_a[adapter_name] = nn.Parameter(torch.empty(shape[0][1], r))
+                self.lokr_w2_b[adapter_name] = nn.Parameter(torch.empty(r, shape[1][1] * shape[2] * shape[3]))
+        else:
+            # Linear
+            if use_w2:
+                self.lokr_w2[adapter_name] = nn.Parameter(torch.empty(shape[0][1], shape[1][1]))
+            else:
+                self.lokr_w2_a[adapter_name] = nn.Parameter(torch.empty(shape[0][1], r))
+                self.lokr_w2_b[adapter_name] = nn.Parameter(torch.empty(r, shape[1][1]))
+
+    def reset_lokr_parameters(self, adapter_name: str):
+        if adapter_name in self.lokr_w1:
+            nn.init.kaiming_uniform_(self.lokr_w1[adapter_name], a=math.sqrt(5))
+        if adapter_name in self.lokr_w2:
+            nn.init.kaiming_uniform_(self.lokr_w2[adapter_name], a=math.sqrt(5))
+        if adapter_name in self.lokr_w1_a:
+            nn.init.kaiming_uniform_(self.lokr_w1_a[adapter_name], a=math.sqrt(5))
+            nn.init.zeros_(self.lokr_w1_b[adapter_name])
+        if adapter_name in self.lokr_w2_a:
+            nn.init.kaiming_uniform_(self.lokr_w2_a[adapter_name], a=math.sqrt(5))
+            nn.init.zeros_(self.lokr_w2_b[adapter_name])
+        if adapter_name in self.lokr_t2:
+            nn.init.kaiming_uniform_(self.lokr_t2[adapter_name], a=math.sqrt(5))
+
+    def update_layer(
+        self,
+        adapter_name: str,
+        r: int,
+        alpha: float,
+        rank_dropout: float,
+        module_dropout: float,
+        init_weights: bool,
+        use_effective_conv2d: bool,
+        decompose_both: bool,
+        decompose_factor: int,
+        **kwargs,
+    ) -> None:
+        """Internal function to create lokr adapter
+
+        Args:
+            shape (`Tuple[int, ...]`): Shape of weights to produce
+            adapter_name (`str`): Name for the adapter to add
+            r (`int`): Rank for the added adapter
+            alpha (`float`): Alpha for the added adapter
+            rank_dropout (`float`): The dropout probability for rank dimension during training
+            module_dropout (`float`): The dropout probability for disabling adapter during training.
+            init_weights (`bool`): Whether to initialize weights
+        """
+
+        self.r[adapter_name] = r
+        self.alpha[adapter_name] = alpha
+        self.scaling[adapter_name] = alpha / r
+        self.rank_dropout[adapter_name] = rank_dropout
+        self.module_dropout[adapter_name] = module_dropout
+
+        # Determine shape of LoKr weights
+        if isinstance(self, nn.Linear):
+            in_dim, out_dim = self.in_features, self.out_features
+
+            in_m, in_n = factorization(in_dim, decompose_factor)
+            out_l, out_k = factorization(out_dim, decompose_factor)
+            shape = ((out_l, out_k), (in_m, in_n))  # ((a, b), (c, d)), out_dim = a*c, in_dim = b*d
+
+            use_w1 = not (decompose_both and r < max(shape[0][0], shape[1][0]) / 2)
+            use_w2 = not (r < max(shape[0][1], shape[1][1]) / 2)
+            use_effective_conv2d = False
+        elif isinstance(self, nn.Conv2d):
+            in_dim, out_dim = self.in_channels, self.out_channels
+            k_size = self.kernel_size
+
+            in_m, in_n = factorization(in_dim, decompose_factor)
+            out_l, out_k = factorization(out_dim, decompose_factor)
+            shape = ((out_l, out_k), (in_m, in_n), *k_size)  # ((a, b), (c, d), *k_size)
+
+            use_w1 = not (decompose_both and r < max(shape[0][0], shape[1][0]) / 2)
+            use_w2 = r >= max(shape[0][1], shape[1][1]) / 2
+            use_effective_conv2d = use_effective_conv2d and self.kernel_size != (1, 1)
+        else:
+            raise NotImplementedError(f"LoHa is not implemented for {type(self).__name__} layer")
+
+        # Create weights with provided shape
+        self.create_lokr_parameters(adapter_name, r, shape, use_w1, use_w2, use_effective_conv2d)
+
+        # Initialize weights
+        if init_weights:
+            self.reset_lokr_parameters(adapter_name)
+
+        # Move new weights to device
+        weight = getattr(self, "weight", None)
+        if weight is not None:
+            # the layer is already completely initialized, this is an update
+            if weight.dtype.is_floating_point or weight.dtype.is_complex:
+                self.to(weight.device, dtype=weight.dtype)
+            else:
+                self.to(weight.device)
+        self.set_adapter(self.active_adapters)
+
+    def get_delta_weight(self, adapter_name: str) -> torch.Tensor:
+        # https://github.com/KohakuBlueleaf/LyCORIS/blob/eb460098187f752a5d66406d3affade6f0a07ece/lycoris/modules/loha.py#L178
+        if adapter_name in self.lokr_w1:
+            w1 = self.lokr_w1[adapter_name]
+        else:
+            w1 = self.lokr_w1_a[adapter_name] @ self.lokr_w1_b[adapter_name]
+
+        if adapter_name in self.lokr_w2:
+            w2 = self.lokr_w2[adapter_name]
+        elif adapter_name in self.lokr_t2:
+            w2 = make_weight_cp(self.lokr_t2[adapter_name], self.lokr_w2_a[adapter_name], self.lokr_w2_b[adapter_name])
+        else:
+            w2 = self.lokr_w2_a[adapter_name] @ self.lokr_w2_b[adapter_name]
+
+        # Make weights with Kronecker product
+        weight = make_kron(w1, w2)
+        weight = weight.reshape(self.weight.shape)
+
+        # Perform rank dropout during training - drop rows of addition weights
+        rank_dropout = self.rank_dropout[adapter_name]
+        if self.training and rank_dropout:
+            drop = (torch.rand(weight.size(0)) > rank_dropout).float()
+            drop = drop.view(-1, *[1] * len(weight.shape[1:])).to(weight.device)
+            drop /= drop.mean()
+            weight *= drop
+
+        return weight
+
+    def merge(self) -> None:
+        if self.merged:
+            warnings.warn(
+                f"Already following adapters were merged {','.join(self.merged_adapters)}. "
+                f"You are now additionally merging {','.join(self.active_adapters)}."
+            )
+        for active_adapter in self.active_adapters:
+            # if active_adapter in self.hada_w1_a.keys():
+            if active_adapter in self._available_adapters:
+                self.weight.data += self.get_delta_weight(active_adapter)
+                self.merged_adapters.append(active_adapter)
+                self.merged = True
+
+    def unmerge(self) -> None:
+        if not self.merged:
+            warnings.warn("Already unmerged. Nothing to do.")
+            return
+        while len(self.merged_adapters) > 0:
+            active_adapter = self.merged_adapters.pop()
+            # if active_adapter in self.hada_w1_a.keys():
+            if active_adapter in self._available_adapters:
+                self.weight.data -= self.get_delta_weight(active_adapter)
+                self.merged = False
+
+    def _op(self, x: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
+        raise NotImplementedError
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        previous_dtype = x.dtype
+
+        if self.disable_adapters:
+            if self.merged:
+                self.unmerge()
+            result = self._op(x, self.weight)
+        elif self.merged:
+            result = self._op(x, self.weight)
+        else:
+            # Get base weights
+            weight = self.weight.data
+
+            # Execute all the adapters
+            for active_adapter in self.active_adapters:
+                # if active_adapter not in self.hada_w1_a.keys():
+                if active_adapter not in self._available_adapters:
+                    continue
+
+                module_dropout = self.module_dropout[active_adapter]
+
+                # Modify current execution weights
+                if (not self.training) or (self.training and torch.rand(1) > module_dropout):
+                    weight = weight + self.get_delta_weight(active_adapter)
+
+            # Perform actual operation
+            result = self._op(x, weight)
+
+        result = result.to(previous_dtype)
+        return result
+
+    def scale_layer(self, scale_factor: float) -> None:
+        if scale_factor != 1:
+            for active_adapter in self.active_adapters:
+                alpha = self.alpha[active_adapter]
+                r = self.r[active_adapter]
+                self.scaling[active_adapter] = (alpha / r) * scale_factor
+
+    def unscale_layer(self) -> None:
+        for active_adapter in self.active_adapters:
+            alpha = self.alpha[active_adapter]
+            r = self.r[active_adapter]
+            self.scaling[active_adapter] = alpha / r
+
+
+class Linear(LoKrLayer, nn.Linear):
+    """LoKr implemented in Linear layer"""
+
+    def __init__(
+        self,
+        in_features: int,
+        out_features: int,
+        bias: bool = True,
+        device: Optional[Union[str, torch.device]] = None,
+        dtype: Optional[torch.dtype] = None,
+        adapter_name: str = "default",
+        r: int = 0,
+        alpha: float = 0.0,
+        rank_dropout: float = 0.0,
+        module_dropout: float = 0.0,
+        **kwargs,
+    ):
+        init_weights = kwargs.pop("init_weights", True)
+        self._init_empty_weights(nn.Linear, in_features, out_features, bias, device=device, dtype=dtype)
+
+        LoKrLayer.__init__(self)
+
+        # Create adapter and set it active
+        self.update_layer(adapter_name, r, alpha, rank_dropout, module_dropout, init_weights, **kwargs)
+        self.set_adapter(adapter_name)
+
+    def _op(self, input: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
+        return F.linear(input, weight, bias=self.bias)
+
+
+class Conv2d(LoKrLayer, nn.Conv2d):
+    """LoKr implemented in Conv2d layer"""
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: Union[int, Tuple[int]],
+        stride: Union[int, Tuple[int]] = 1,
+        padding: Union[int, Tuple[int]] = 0,
+        dilation: int = 1,
+        groups: int = 1,
+        bias: bool = True,
+        padding_mode: str = "zeros",
+        device: Optional[Union[str, torch.device]] = None,
+        dtype: Optional[torch.dtype] = None,
+        adapter_name: str = "default",
+        r: int = 0,
+        alpha: float = 0.0,
+        rank_dropout: float = 0.0,
+        module_dropout: float = 0.0,
+        use_effective_conv2d: bool = False,
+        **kwargs,
+    ):
+        init_weights = kwargs.pop("init_weights", True)
+        self._init_empty_weights(
+            nn.Conv2d,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+            bias=bias,
+            padding_mode=padding_mode,
+            device=device,
+            dtype=dtype,
+        )
+
+        LoKrLayer.__init__(self)
+
+        # Create adapter and set it active
+        self.update_layer(
+            adapter_name, r, alpha, rank_dropout, module_dropout, init_weights, use_effective_conv2d, **kwargs
+        )
+        self.set_adapter(adapter_name)
+
+    def _op(self, input: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
+        return F.conv2d(
+            input,
+            weight,
+            bias=self.bias,
+            stride=self.stride,
+            padding=self.padding,
+            dilation=self.dilation,
+            groups=self.groups,
+        )
+
+
+# Below code is a direct copy from https://github.com/KohakuBlueleaf/LyCORIS/blob/eb460098187f752a5d66406d3affade6f0a07ece/lycoris/modules/lokr.py#L11
+
+
+def factorization(dimension: int, factor: int = -1) -> Tuple[int, int]:
+    """
+    return a tuple of two value of input dimension decomposed by the number closest to factor second value is higher or
+    equal than first value.
+
+    In LoRA with Kroneckor Product, first value is a value for weight scale. secon value is a value for weight.
+
+    Becuase of non-commutative property, A⊗B ≠ B⊗A. Meaning of two matrices is slightly different.
+
+    examples) factor
+        -1 2 4 8 16 ...
+    127 -> 127, 1 127 -> 127, 1 127 -> 127, 1 127 -> 127, 1 127 -> 127, 1 128 -> 16, 8 128 -> 64, 2 128 -> 32, 4 128 ->
+    16, 8 128 -> 16, 8 250 -> 125, 2 250 -> 125, 2 250 -> 125, 2 250 -> 125, 2 250 -> 125, 2 360 -> 45, 8 360 -> 180, 2
+    360 -> 90, 4 360 -> 45, 8 360 -> 45, 8 512 -> 32, 16 512 -> 256, 2 512 -> 128, 4 512 -> 64, 8 512 -> 32, 16 1024 ->
+    32, 32 1024 -> 512, 2 1024 -> 256, 4 1024 -> 128, 8 1024 -> 64, 16
+    """
+
+    if factor > 0 and (dimension % factor) == 0:
+        m = factor
+        n = dimension // factor
+        return m, n
+    if factor == -1:
+        factor = dimension
+    m, n = 1, dimension
+    length = m + n
+    while m < n:
+        new_m = m + 1
+        while dimension % new_m != 0:
+            new_m += 1
+        new_n = dimension // new_m
+        if new_m + new_n > length or new_m > factor:
+            break
+        else:
+            m, n = new_m, new_n
+    if m > n:
+        n, m = m, n
+    return m, n
+
+
+def make_weight_cp(t, wa, wb):
+    rebuild2 = torch.einsum("i j k l, i p, j r -> p r k l", t, wa, wb)  # [c, d, k1, k2]
+    return rebuild2
+
+
+def make_kron(w1, w2, scale=1.0):
+    if len(w2.shape) == 4:
+        w1 = w1.unsqueeze(2).unsqueeze(2)
+    w2 = w2.contiguous()
+    rebuild = torch.kron(w1, w2)
+
+    return rebuild * scale
diff --git a/src/peft/tuners/lokr/model.py b/src/peft/tuners/lokr/model.py
new file mode 100644
index 0000000000..3757ecac22
--- /dev/null
+++ b/src/peft/tuners/lokr/model.py
@@ -0,0 +1,285 @@
+# coding=utf-8
+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# coding=utf-8
+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+import warnings
+from itertools import chain
+from typing import Union
+
+import torch
+from torch import nn
+from tqdm import tqdm
+
+from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists
+from peft.utils import (
+    ModulesToSaveWrapper,
+    _get_submodules,
+)
+
+from .layer import Conv2d, Linear, LoKrLayer
+
+
+class LoKrModel(BaseTuner):
+    """
+    Creates Low-Rank Kronecker Product model from a pretrained model. The original method is partially described in
+    https://arxiv.org/abs/2108.06098 and in https://arxiv.org/abs/2309.14859 Current implementation heavily borrows
+    from
+    https://github.com/KohakuBlueleaf/LyCORIS/blob/eb460098187f752a5d66406d3affade6f0a07ece/lycoris/modules/lokr.py
+
+    Args:
+        model (`torch.nn.Module`): The model to which the adapter tuner layers will be attached.
+        config ([`LoKrConfig`]): The configuration of the LoKr model.
+        adapter_name (`str`): The name of the adapter, defaults to `"default"`.
+
+    Returns:
+        `torch.nn.Module`: The LoKr model.
+
+    Example:
+        ```py
+        >>> from diffusers import StableDiffusionPipeline
+        >>> from peft import LoKrModel, LoKrConfig
+
+        >>> config_te = LoKrConfig(
+        ...     r=8,
+        ...     lora_alpha=32,
+        ...     target_modules=["k_proj", "q_proj", "v_proj", "out_proj", "fc1", "fc2"],
+        ...     rank_dropout=0.0,
+        ...     module_dropout=0.0,
+        ...     init_weights=True,
+        ... )
+        >>> config_unet = LoKrConfig(
+        ...     r=8,
+        ...     lora_alpha=32,
+        ...     target_modules=[
+        ...         "proj_in",
+        ...         "proj_out",
+        ...         "to_k",
+        ...         "to_q",
+        ...         "to_v",
+        ...         "to_out.0",
+        ...         "ff.net.0.proj",
+        ...         "ff.net.2",
+        ...     ],
+        ...     rank_dropout=0.0,
+        ...     module_dropout=0.0,
+        ...     init_weights=True,
+        ...     use_effective_conv2d=True,
+        ... )
+
+        >>> model = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
+        >>> model.text_encoder = LoKrModel(model.text_encoder, config_te, "default")
+        >>> model.unet = LoKrModel(model.unet, config_unet, "default")
+        ```
+
+    **Attributes**:
+        - **model** ([`~torch.nn.Module`]) -- The model to be adapted.
+        - **peft_config** ([`LoKrConfig`]): The configuration of the LoKr model.
+    """
+
+    def __init__(self, model, config, adapter_name):
+        super().__init__(model, config, adapter_name)
+
+    def __getattr__(self, name: str):
+        """Forward missing attributes to the wrapped module."""
+        try:
+            return super().__getattr__(name)  # defer to nn.Module's logic
+        except AttributeError:
+            return getattr(self.model, name)
+
+    def _set_adapter_layers(self, enabled=True):
+        for module in self.model.modules():
+            if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)):
+                module.enable_adapters(enabled)
+
+    def enable_adapter_layers(self):
+        self._set_adapter_layers(enabled=True)
+
+    def disable_adapter_layers(self):
+        self._set_adapter_layers(enabled=False)
+
+    def set_adapter(self, adapter_name):
+        for module in self.model.modules():
+            if isinstance(module, LoKrLayer):
+                if module.merged:
+                    warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
+                    module.unmerge()
+                module.set_adapter(adapter_name)
+
+    @staticmethod
+    def _prepare_adapter_config(peft_config, model_config):
+        if peft_config.target_modules is None:
+            raise ValueError("Please specify `target_modules` in `peft_config`")
+        return peft_config
+
+    @staticmethod
+    def _check_target_module_exists(lokr_config, key):
+        return check_target_module_exists(lokr_config, key)
+
+    def _create_and_replace(
+        self,
+        lokr_config,
+        adapter_name: str,
+        target: Union[LoKrLayer, nn.Module],
+        target_name,
+        parent,
+        current_key,
+        **optional_kwargs,
+    ):
+        """
+        A private method to create and replace the target module with the adapter module.
+        """
+
+        # Regexp matching - Find key which matches current target_name in patterns provided
+        pattern_keys = list(chain(lokr_config.rank_pattern.keys(), lokr_config.alpha_pattern.keys()))
+        target_name_key = next(filter(lambda key: re.match(f"(.*\.)?{key}$", current_key), pattern_keys), target_name)
+
+        r = lokr_config.rank_pattern.get(target_name_key, lokr_config.r)
+        alpha = lokr_config.alpha_pattern.get(target_name_key, lokr_config.alpha)
+
+        kwargs = {
+            "r": r,
+            "alpha": alpha,
+            "rank_dropout": lokr_config.rank_dropout,
+            "module_dropout": lokr_config.module_dropout,
+            "use_effective_conv2d": lokr_config.use_effective_conv2d,
+            "init_weights": lokr_config.init_weights,
+            "decompose_both": lokr_config.decompose_both,
+            "decompose_factor": lokr_config.decompose_factor,
+        }
+
+        if isinstance(target, LoKrLayer):
+            target.update_layer(adapter_name, **kwargs)
+        else:
+            new_module = self._create_new_module(lokr_config, adapter_name, target, **kwargs)
+            self._replace_module(parent, target_name, new_module, target)
+
+    @staticmethod
+    def _create_new_module(lokr_config, adapter_name, target, **kwargs) -> LoKrLayer:
+        if isinstance(target, torch.nn.Conv2d):
+            new_module = Conv2d(
+                target.in_channels,
+                target.out_channels,
+                target.weight.size()[2:],
+                stride=target.stride,
+                padding=target.padding,
+                dilation=target.dilation,
+                groups=target.groups,
+                bias=target.bias is not None,
+                padding_mode=target.padding_mode,
+                device=target.weight.device,
+                dtype=target.weight.dtype,
+                adapter_name=adapter_name,
+                **kwargs,
+            )
+        elif isinstance(target, torch.nn.Linear):
+            new_module = Linear(
+                target.in_features,
+                target.out_features,
+                bias=target.bias is not None,
+                device=target.weight.device,
+                dtype=target.weight.dtype,
+                adapter_name=adapter_name,
+                **kwargs,
+            )
+        else:
+            raise ValueError(
+                "Target module not found, currently only adapters for nn.Linear and nn.Conv2d are supported"
+            )
+        return new_module
+
+    @staticmethod
+    def _replace_module(parent, child_name, new_module, child):
+        setattr(parent, child_name, new_module)
+        # It's not necessary to set requires_grad here, as that is handled by
+        # _mark_only_adapters_as_trainable
+        new_module.weight = child.weight
+        if hasattr(child, "bias"):
+            new_module.bias = child.bias
+
+        if getattr(child, "state", None) is not None:
+            new_module.state = child.state
+            new_module.to(child.weight.device)
+
+        # dispatch to correct device
+        for name, module in new_module.named_modules():
+            if "hada_" in name:
+                module.to(child.weight.device)
+
+    def _mark_only_adapters_as_trainable(self) -> None:
+        for n, p in self.model.named_parameters():
+            if "hada_" not in n:
+                p.requires_grad = False
+
+    def merge_and_unload(self, progressbar: bool = False):
+        return self._unload_and_optionally_merge(progressbar=progressbar)
+
+    def _unload_and_optionally_merge(self, merge=True, progressbar: bool = False):
+        if merge:
+            if getattr(self.model, "quantization_method", None) == "gptq":
+                raise ValueError("Cannot merge LOKR layers when the model is gptq quantized")
+
+        key_list = [key for key, _ in self.model.named_modules() if "hada" not in key]
+        desc = "Unloading " + ("and merging " if merge else "") + "model"
+        for key in tqdm(key_list, disable=not progressbar, desc=desc):
+            try:
+                parent, target, target_name = _get_submodules(self.model, key)
+            except AttributeError:
+                continue
+            if isinstance(target, LoKrLayer):
+                if isinstance(target, nn.Conv2d):
+                    new_module = torch.nn.Conv2d(
+                        target.in_channels,
+                        target.out_channels,
+                        kernel_size=target.kernel_size,
+                        stride=target.stride,
+                        padding=target.padding,
+                        dilation=target.dilation,
+                    )
+                elif isinstance(target, nn.Linear):
+                    bias = target.bias is not None
+                    new_module = torch.nn.Linear(
+                        target.in_features,
+                        target.out_features,
+                        bias=bias,
+                        device=target.weight.device,
+                    )
+                else:
+                    raise ValueError(
+                        "Cannot convert current module to torch module, currently only adapters for nn.Linear and nn.Conv2d are supported"
+                    )
+                if merge:
+                    target.merge()
+                self._replace_module(parent, target_name, new_module, target)
+
+            # save any additional trainable modules part of `modules_to_save`
+            if isinstance(target, ModulesToSaveWrapper):
+                setattr(parent, target_name, target.modules_to_save[target.active_adapter])
+
+        return self.model
diff --git a/src/peft/utils/peft_types.py b/src/peft/utils/peft_types.py
index d073be81c3..29c764a08f 100644
--- a/src/peft/utils/peft_types.py
+++ b/src/peft/utils/peft_types.py
@@ -29,6 +29,7 @@ class PeftType(str, enum.Enum):
     ADAPTION_PROMPT = "ADAPTION_PROMPT"
     IA3 = "IA3"
     LOHA = "LOHA"
+    LOKR = "LOKR"
 
 
 class TaskType(str, enum.Enum):
diff --git a/src/peft/utils/save_and_load.py b/src/peft/utils/save_and_load.py
index ff00541121..cd8088e93e 100644
--- a/src/peft/utils/save_and_load.py
+++ b/src/peft/utils/save_and_load.py
@@ -75,6 +75,9 @@ def get_peft_model_state_dict(model, state_dict=None, adapter_name="default", un
     elif config.peft_type == PeftType.LOHA:
         to_return = {k: state_dict[k] for k in state_dict if "hada_" in k}
 
+    elif config.peft_type == PeftType.LOKR:
+        to_return = {k: state_dict[k] for k in state_dict if "lokr_" in k}
+
     elif config.peft_type == PeftType.ADAPTION_PROMPT:
         to_return = {k: state_dict[k] for k in state_dict if k.split(".")[-1].startswith("adaption_")}
     elif config.is_prompt_learning:
@@ -123,13 +126,14 @@ def set_peft_model_state_dict(model, peft_model_state_dict, adapter_name="defaul
     else:
         state_dict = peft_model_state_dict
 
-    if config.peft_type in (PeftType.LORA, PeftType.LOHA, PeftType.ADALORA, PeftType.IA3):
+    if config.peft_type in (PeftType.LORA, PeftType.LOHA, PeftType.LOKR, PeftType.ADALORA, PeftType.IA3):
         peft_model_state_dict = {}
         parameter_prefix = {
             PeftType.IA3: "ia3_",
             PeftType.LORA: "lora_",
             PeftType.ADALORA: "lora_",
             PeftType.LOHA: "hada_",
+            PeftType.LOKR: "lokr_",
         }[config.peft_type]
         for k, v in state_dict.items():
             if parameter_prefix in k:

From e7d6e238e5efa8e433c9d6939e9c146ec4a9f221 Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Tue, 3 Oct 2023 19:32:06 +0300
Subject: [PATCH 03/33] Fixed setting requires_grad for lokr modules

---
 src/peft/tuners/lokr/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/tuners/lokr/model.py b/src/peft/tuners/lokr/model.py
index 3757ecac22..458978176f 100644
--- a/src/peft/tuners/lokr/model.py
+++ b/src/peft/tuners/lokr/model.py
@@ -234,7 +234,7 @@ def _replace_module(parent, child_name, new_module, child):
 
     def _mark_only_adapters_as_trainable(self) -> None:
         for n, p in self.model.named_parameters():
-            if "hada_" not in n:
+            if "lokr_" not in n:
                 p.requires_grad = False
 
     def merge_and_unload(self, progressbar: bool = False):

From bb45764b339dd004f522bbe5129f07762c65e0ea Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Wed, 4 Oct 2023 13:26:08 +0300
Subject: [PATCH 04/33] Updated initialization of LoKr adapter weights

---
 src/peft/tuners/lokr/layer.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py
index e78fc7c32f..eb280d440b 100644
--- a/src/peft/tuners/lokr/layer.py
+++ b/src/peft/tuners/lokr/layer.py
@@ -121,15 +121,17 @@ def create_lokr_parameters(
 
     def reset_lokr_parameters(self, adapter_name: str):
         if adapter_name in self.lokr_w1:
-            nn.init.kaiming_uniform_(self.lokr_w1[adapter_name], a=math.sqrt(5))
+            nn.init.zeros_(self.lokr_w1[adapter_name])
+        else:
+            nn.init.zeros_(self.lokr_w1_a[adapter_name])
+            nn.init.kaiming_uniform_(self.lokr_w1_b[adapter_name], a=math.sqrt(5))
+
         if adapter_name in self.lokr_w2:
             nn.init.kaiming_uniform_(self.lokr_w2[adapter_name], a=math.sqrt(5))
-        if adapter_name in self.lokr_w1_a:
-            nn.init.kaiming_uniform_(self.lokr_w1_a[adapter_name], a=math.sqrt(5))
-            nn.init.zeros_(self.lokr_w1_b[adapter_name])
-        if adapter_name in self.lokr_w2_a:
+        else:
             nn.init.kaiming_uniform_(self.lokr_w2_a[adapter_name], a=math.sqrt(5))
-            nn.init.zeros_(self.lokr_w2_b[adapter_name])
+            nn.init.kaiming_uniform_(self.lokr_w2_b[adapter_name], a=math.sqrt(5))
+
         if adapter_name in self.lokr_t2:
             nn.init.kaiming_uniform_(self.lokr_t2[adapter_name], a=math.sqrt(5))
 
@@ -187,7 +189,7 @@ def update_layer(
             use_w2 = r >= max(shape[0][1], shape[1][1]) / 2
             use_effective_conv2d = use_effective_conv2d and self.kernel_size != (1, 1)
         else:
-            raise NotImplementedError(f"LoHa is not implemented for {type(self).__name__} layer")
+            raise NotImplementedError(f"LoKr is not implemented for {type(self).__name__} layer")
 
         # Create weights with provided shape
         self.create_lokr_parameters(adapter_name, r, shape, use_w1, use_w2, use_effective_conv2d)
@@ -207,7 +209,7 @@ def update_layer(
         self.set_adapter(self.active_adapters)
 
     def get_delta_weight(self, adapter_name: str) -> torch.Tensor:
-        # https://github.com/KohakuBlueleaf/LyCORIS/blob/eb460098187f752a5d66406d3affade6f0a07ece/lycoris/modules/loha.py#L178
+        # https://github.com/KohakuBlueleaf/LyCORIS/blob/e4259b870d3354a9615a96be61cb5d07455c58ea/lycoris/modules/lokr.py#L224
         if adapter_name in self.lokr_w1:
             w1 = self.lokr_w1[adapter_name]
         else:

From 0c33d8c3d1a1e9be6a7bce469504262816bbf5ca Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Wed, 4 Oct 2023 13:27:07 +0300
Subject: [PATCH 05/33] Updated docstrings for LoKr params

---
 src/peft/tuners/lokr/__init__.py |  3 ++-
 src/peft/tuners/lokr/config.py   | 18 +++++++++---------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/peft/tuners/lokr/__init__.py b/src/peft/tuners/lokr/__init__.py
index bb138202fd..b137f22c96 100644
--- a/src/peft/tuners/lokr/__init__.py
+++ b/src/peft/tuners/lokr/__init__.py
@@ -14,7 +14,8 @@
 # limitations under the License.
 
 from .config import LoKrConfig
+from .layer import Conv2d, Linear, LoKrLayer
 from .model import LoKrModel
 
 
-__all__ = ["LoKrConfig", "LoKrModel"]
+__all__ = ["LoKrConfig", "LoKrModel", "Conv2d", "Linear", "LoKrLayer"]
diff --git a/src/peft/tuners/lokr/config.py b/src/peft/tuners/lokr/config.py
index eb36c5673f..c2e3d0536c 100644
--- a/src/peft/tuners/lokr/config.py
+++ b/src/peft/tuners/lokr/config.py
@@ -29,7 +29,7 @@ class LoKrConfig(PeftConfig):
         r (`int`): LoKr rank.
         alpha (`int`): The alpha parameter for LoKr scaling.
         rank_dropout (`int`): The dropout probability for rank dimension during training.
-        module_dropout (`int`): The dropout probability for disabling LoHa modules during training.
+        module_dropout (`int`): The dropout probability for disabling LoKr modules during training.
         use_effective_conv2d (`bool`):
             Use parameter effective decomposition for Conv2d with ksize > 1 ("Proposition 3" from FedPara paper).
         decompose_both (`bool`): Perform rank decomposition of left kronecker product matrix.
@@ -37,7 +37,7 @@ class LoKrConfig(PeftConfig):
         target_modules (`Union[List[str],str]`): The names of the modules to apply LoKr to.
         init_weights (`bool`): Whether to perform initialization of LoKr weights.
         layers_to_transform (`Union[List[int],int]`):
-            The layer indexes to transform, if this argument is specified, it will apply the LoHa transformations on
+            The layer indexes to transform, if this argument is specified, it will apply the LoKr transformations on
             the layer indexes that are specified in this list. If a single integer is passed, it will apply the LoKr
             transformations on the layer at this index.
         layers_pattern (`str`):
@@ -49,16 +49,16 @@ class LoKrConfig(PeftConfig):
         alpha_pattern (`dict`):
             The mapping from layer names or regexp expression to alphas which are different from the default alpha
             specified by `alpha`.
-        modules_to_save (`List[str]`): The names of modules to be set as trainable except LoHa parameters.
+        modules_to_save (`List[str]`): The names of modules to be set as trainable except LoKr parameters.
     """
 
-    r: int = field(default=8, metadata={"help": "LoHa rank"})
-    alpha: int = field(default=8, metadata={"help": "LoHa alpha"})
+    r: int = field(default=8, metadata={"help": "LoKr rank"})
+    alpha: int = field(default=8, metadata={"help": "LoKr alpha"})
     rank_dropout: float = field(
         default=0.0, metadata={"help": "The dropout probability for rank dimension during training"}
     )
     module_dropout: float = field(
-        default=0.0, metadata={"help": "The dropout probability for disabling LoHa modules during training"}
+        default=0.0, metadata={"help": "The dropout probability for disabling LoKr modules during training"}
     )
     use_effective_conv2d: bool = field(
         default=False,
@@ -74,7 +74,7 @@ class LoKrConfig(PeftConfig):
     target_modules: Optional[Union[List[str], str]] = field(
         default=None,
         metadata={
-            "help": "List of module names or regex expression of the module names to replace with LoHa."
+            "help": "List of module names or regex expression of the module names to replace with LoKr."
             "For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$' "
         },
     )
@@ -82,7 +82,7 @@ class LoKrConfig(PeftConfig):
         default=True,
         metadata={
             "help": (
-                "Whether to initialize the weights of the LoHa layers with their default initialization. Don't change "
+                "Whether to initialize the weights of the LoKr layers with their default initialization. Don't change "
                 "this setting, except if you know exactly what you're doing."
             ),
         },
@@ -120,7 +120,7 @@ class LoKrConfig(PeftConfig):
     modules_to_save: Optional[List[str]] = field(
         default=None,
         metadata={
-            "help": "List of modules apart from LoHA layers to be set as trainable and saved in the final checkpoint. "
+            "help": "List of modules apart from LoKr layers to be set as trainable and saved in the final checkpoint. "
             "For example, in Sequence Classification or Token Classification tasks, "
             "the final layer `classifier/score` are randomly initialized and as such need to be trainable and saved."
         },

From 84b890bb0c23216081ee208d36eef6b6a5ebe167 Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Wed, 4 Oct 2023 15:36:52 +0300
Subject: [PATCH 06/33] Removed unneccessary comments

---
 src/peft/tuners/lokr/layer.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py
index eb280d440b..92da95f80d 100644
--- a/src/peft/tuners/lokr/layer.py
+++ b/src/peft/tuners/lokr/layer.py
@@ -243,7 +243,6 @@ def merge(self) -> None:
                 f"You are now additionally merging {','.join(self.active_adapters)}."
             )
         for active_adapter in self.active_adapters:
-            # if active_adapter in self.hada_w1_a.keys():
             if active_adapter in self._available_adapters:
                 self.weight.data += self.get_delta_weight(active_adapter)
                 self.merged_adapters.append(active_adapter)
@@ -255,7 +254,6 @@ def unmerge(self) -> None:
             return
         while len(self.merged_adapters) > 0:
             active_adapter = self.merged_adapters.pop()
-            # if active_adapter in self.hada_w1_a.keys():
             if active_adapter in self._available_adapters:
                 self.weight.data -= self.get_delta_weight(active_adapter)
                 self.merged = False
@@ -278,7 +276,6 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
             # Execute all the adapters
             for active_adapter in self.active_adapters:
-                # if active_adapter not in self.hada_w1_a.keys():
                 if active_adapter not in self._available_adapters:
                     continue
 

From fd4a7541c01c2068958e2a34a0b08929d00b784c Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Wed, 4 Oct 2023 15:38:09 +0300
Subject: [PATCH 07/33] Modified sd dreambooth script to be able to train LoRA,
 LoHa, LoKr adapters

---
 ...dreambooth_loha.py => train_dreambooth.py} | 274 ++++++++++++++----
 1 file changed, 216 insertions(+), 58 deletions(-)
 rename examples/stable_diffusion/{train_dreambooth_loha.py => train_dreambooth.py} (84%)

diff --git a/examples/stable_diffusion/train_dreambooth_loha.py b/examples/stable_diffusion/train_dreambooth.py
similarity index 84%
rename from examples/stable_diffusion/train_dreambooth_loha.py
rename to examples/stable_diffusion/train_dreambooth.py
index 944a8394b6..8364603a0b 100644
--- a/examples/stable_diffusion/train_dreambooth_loha.py
+++ b/examples/stable_diffusion/train_dreambooth.py
@@ -8,7 +8,7 @@
 import threading
 import warnings
 from pathlib import Path
-from typing import Optional
+from typing import Optional, Union
 
 import datasets
 import diffusers
@@ -38,7 +38,7 @@
 from tqdm.auto import tqdm
 from transformers import AutoTokenizer, PretrainedConfig
 
-from peft import LoHaConfig, get_peft_model
+from peft import LoHaConfig, LoKrConfig, LoraConfig, get_peft_model
 
 
 # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
@@ -85,6 +85,80 @@ def import_model_class_from_model_name_or_path(pretrained_model_name_or_path: st
         raise ValueError(f"{model_class} is not supported.")
 
 
+def create_unet_adapter_config(args: argparse.Namespace) -> Union[LoraConfig, LoHaConfig, LoKrConfig]:
+    if args.adapter == "full":
+        raise ValueError("Cannot create unet adapter config for full parameter")
+
+    if args.adapter == "lora":
+        config = LoraConfig(
+            r=args.unet_r,
+            lora_alpha=args.unet_alpha,
+            target_modules=UNET_TARGET_MODULES,
+            lora_dropout=args.unet_dropout,
+            bias=args.unet_bias,
+            init_lora_weights=True,
+        )
+    elif args.adapter == "loha":
+        config = LoHaConfig(
+            r=args.unet_r,
+            alpha=args.unet_alpha,
+            target_modules=UNET_TARGET_MODULES,
+            rank_dropout=args.unet_rank_dropout,
+            module_dropout=args.unet_module_dropout,
+            use_effective_conv2d=args.unet_use_effective_conv2d,
+            init_weights=True,
+        )
+    elif args.adapter == "lokr":
+        config = LoKrConfig(
+            r=args.unet_r,
+            alpha=args.unet_alpha,
+            target_modules=UNET_TARGET_MODULES,
+            rank_dropout=args.unet_rank_dropout,
+            module_dropout=args.unet_module_dropout,
+            use_effective_conv2d=args.unet_use_effective_conv2d,
+            decompose_both=args.unet_decompose_both,
+            decompose_factor=args.unet_decompose_factor,
+            init_weights=True,
+        )
+    return config
+
+
+def create_te_adapter_config(args: argparse.Namespace) -> Union[LoraConfig, LoHaConfig, LoKrConfig]:
+    if args.adapter == "full":
+        raise ValueError("Cannot create text_encoder adapter config for full parameter")
+
+    if args.adapter == "lora":
+        config = LoraConfig(
+            r=args.te_r,
+            lora_alpha=args.te_alpha,
+            target_modules=TEXT_ENCODER_TARGET_MODULES,
+            lora_dropout=args.te_dropout,
+            bias=args.te_bias,
+            init_lora_weights=True,
+        )
+    elif args.adapter == "loha":
+        config = LoHaConfig(
+            r=args.te_r,
+            alpha=args.te_alpha,
+            target_modules=TEXT_ENCODER_TARGET_MODULES,
+            rank_dropout=args.te_rank_dropout,
+            module_dropout=args.te_module_dropout,
+            init_weights=True,
+        )
+    elif args.adapter == "lokr":
+        config = LoKrConfig(
+            r=args.te_r,
+            alpha=args.te_alpha,
+            target_modules=TEXT_ENCODER_TARGET_MODULES,
+            rank_dropout=args.te_rank_dropout,
+            module_dropout=args.te_module_dropout,
+            decompose_both=args.te_decompose_both,
+            decompose_factor=args.te_decompose_factor,
+            init_weights=True,
+        )
+    return config
+
+
 def parse_args(input_args=None):
     parser = argparse.ArgumentParser(description="Simple example of a training script.")
     parser.add_argument(
@@ -192,41 +266,6 @@ def parse_args(input_args=None):
     )
     parser.add_argument("--train_text_encoder", action="store_true", help="Whether to train the text encoder")
 
-    # loha args
-    parser.add_argument("--use_loha", action="store_true", help="Whether to use LoHa for parameter efficient tuning")
-    parser.add_argument("--r", type=int, default=8, help="LoHa rank, only used if use_loha is True")
-    parser.add_argument("--alpha", type=int, default=32, help="LoHa alpha, only used if use_loha is True")
-    parser.add_argument("--rank_dropout", type=float, default=0.0, help="LoHa dropout for rank")
-    parser.add_argument("--module_dropout", type=float, default=0.0, help="LoHa dropout for disabling module at all")
-    parser.add_argument(
-        "--use_effective_conv2d",
-        action="store_true",
-        help="Use parameter effective decomposition for Conv2d 3x3 with ksize > 1",
-    )
-    parser.add_argument(
-        "--loha_text_encoder_r",
-        type=int,
-        default=8,
-        help="LoHa rank for text encoder, only used if `use_loha` and `train_text_encoder` are True",
-    )
-    parser.add_argument(
-        "--loha_text_encoder_alpha",
-        type=int,
-        default=32,
-        help="LoHa alpha for text encoder, only used if `use_loha` and `train_text_encoder` are True",
-    )
-    parser.add_argument(
-        "--loha_text_encoder_rank_dropout",
-        type=float,
-        default=0.0,
-        help="LoHa dropout for text encoder for rank, only used if `use_loha` and `train_text_encoder` are True",
-    )
-    parser.add_argument(
-        "--loha_text_encoder_module_dropout",
-        type=float,
-        default=0.0,
-        help="LoHa dropout for text encoder for modules, only used if `use_loha` and `train_text_encoder` are True",
-    )
     parser.add_argument(
         "--train_batch_size", type=int, default=4, help="Batch size (per device) for the training dataloader."
     )
@@ -381,6 +420,132 @@ def parse_args(input_args=None):
         "--enable_xformers_memory_efficient_attention", action="store_true", help="Whether or not to use xformers."
     )
 
+    # Adapter arguments
+    subparsers = parser.add_subparsers(dest="adapter")
+
+    # Dummy subparser to train whole model
+    subparsers.add_parser("full", help="Train full model without adapters")
+
+    # LoRA adapter
+    lora = subparsers.add_parser("lora", help="Use LoRA adapter")
+    lora.add_argument("--unet_r", type=int, default=8, help="LoRA rank for unet")
+    lora.add_argument("--unet_alpha", type=int, default=8, help="LoRA alpha for unet")
+    lora.add_argument("--unet_dropout", type=float, default=0.0, help="LoRA dropout probability for unet")
+    lora.add_argument(
+        "--unet_bias",
+        type=str,
+        default="none",
+        help="Bias type for LoRA. Can be 'none', 'all' or 'lora_only'",
+    )
+    lora.add_argument(
+        "--te_r", type=int, default=8, help="LoRA rank for text_encoder, only used if `train_text_encoder` is True"
+    )
+    lora.add_argument(
+        "--te_alpha",
+        type=int,
+        default=8,
+        help="LoRA alpha for text_encoder, only used if `train_text_encoder` is True",
+    )
+    lora.add_argument(
+        "--te_dropout",
+        type=float,
+        default=0.0,
+        help="LoRA dropout probability for text_encoder, only used if `train_text_encoder` is True",
+    )
+    lora.add_argument(
+        "--te_bias",
+        type=str,
+        default="none",
+        help="Bias type for LoRA. Can be 'none', 'all' or 'lora_only', only used if `train_text_encoder` is True",
+    )
+
+    # LoHa adapter
+    loha = subparsers.add_parser("loha", help="Use LoHa adapter")
+    loha.add_argument("--unet_r", type=int, default=8, help="LoHa rank for unet")
+    loha.add_argument("--unet_alpha", type=int, default=8, help="LoHa alpha for unet")
+    loha.add_argument("--unet_rank_dropout", type=float, default=0.0, help="LoHa rank_dropout probability for unet")
+    loha.add_argument(
+        "--unet_module_dropout", type=float, default=0.0, help="LoHa module_dropout probability for unet"
+    )
+    loha.add_argument(
+        "--unet_use_effective_conv2d",
+        action="store_true",
+        help="Use parameter effective decomposition in unet for Conv2d 3x3 with ksize > 1",
+    )
+    loha.add_argument(
+        "--te_r", type=int, default=8, help="LoHa rank for text_encoder, only used if `train_text_encoder` is True"
+    )
+    loha.add_argument(
+        "--te_alpha",
+        type=int,
+        default=8,
+        help="LoHa alpha for text_encoder, only used if `train_text_encoder` is True",
+    )
+    loha.add_argument(
+        "--te_rank_dropout",
+        type=float,
+        default=0.0,
+        help="LoHa rank_dropout probability for text_encoder, only used if `train_text_encoder` is True",
+    )
+    loha.add_argument(
+        "--te_module_dropout",
+        type=float,
+        default=0.0,
+        help="LoHa module_dropout probability for text_encoder, only used if `train_text_encoder` is True",
+    )
+
+    # LoKr adapter
+    lokr = subparsers.add_parser("lokr", help="Use LoKr adapter")
+    lokr.add_argument("--unet_r", type=int, default=8, help="LoKr rank for unet")
+    lokr.add_argument("--unet_alpha", type=int, default=8, help="LoKr alpha for unet")
+    lokr.add_argument("--unet_rank_dropout", type=float, default=0.0, help="LoKr rank_dropout probability for unet")
+    lokr.add_argument(
+        "--unet_module_dropout", type=float, default=0.0, help="LoKr module_dropout probability for unet"
+    )
+    lokr.add_argument(
+        "--unet_use_effective_conv2d",
+        action="store_true",
+        help="Use parameter effective decomposition in unet for Conv2d 3x3 with ksize > 1",
+    )
+    lokr.add_argument(
+        "--unet_decompose_both", action="store_true", help="Decompose left matrix in kronecker product for unet"
+    )
+    lokr.add_argument(
+        "--unet_decompose_factor", type=int, default=-1, help="Decompose factor in kronecker product for unet"
+    )
+    lokr.add_argument(
+        "--te_r", type=int, default=8, help="LoKr rank for text_encoder, only used if `train_text_encoder` is True"
+    )
+    lokr.add_argument(
+        "--te_alpha",
+        type=int,
+        default=8,
+        help="LoKr alpha for text_encoder, only used if `train_text_encoder` is True",
+    )
+    lokr.add_argument(
+        "--te_rank_dropout",
+        type=float,
+        default=0.0,
+        help="LoKr rank_dropout probability for text_encoder, only used if `train_text_encoder` is True",
+    )
+    lokr.add_argument(
+        "--te_module_dropout",
+        type=float,
+        default=0.0,
+        help="LoKr module_dropout probability for text_encoder, only used if `train_text_encoder` is True",
+    )
+    lokr.add_argument(
+        "--te_decompose_both",
+        action="store_true",
+        help="Decompose left matrix in kronecker product for text_encoder, only used if `train_text_encoder` is True",
+    )
+    lokr.add_argument(
+        "--te_decompose_factor",
+        type=int,
+        default=-1,
+        help="Decompose factor in kronecker product for text_encoder, only used if `train_text_encoder` is True",
+    )
+
     if input_args is not None:
         args = parser.parse_args(input_args)
     else:
@@ -723,16 +888,8 @@ def main(args):
         args.pretrained_model_name_or_path, subfolder="unet", revision=args.revision
     )
 
-    if args.use_loha:
-        config = LoHaConfig(
-            r=args.r,
-            alpha=args.alpha,
-            target_modules=UNET_TARGET_MODULES,
-            rank_dropout=args.rank_dropout,
-            module_dropout=args.module_dropout,
-            use_effective_conv2d=args.use_effective_conv2d,
-            init_weights=True,
-        )
+    if args.adapter != "full":
+        config = create_unet_adapter_config(args)
         unet = get_peft_model(unet, config)
         unet.print_trainable_parameters()
         print(unet)
@@ -740,15 +897,8 @@ def main(args):
     vae.requires_grad_(False)
     if not args.train_text_encoder:
         text_encoder.requires_grad_(False)
-    elif args.train_text_encoder and args.use_loha:
-        config = LoHaConfig(
-            r=args.loha_text_encoder_r,
-            alpha=args.loha_text_encoder_alpha,
-            target_modules=TEXT_ENCODER_TARGET_MODULES,
-            rank_dropout=args.loha_text_encoder_rank_dropout,
-            module_dropout=args.loha_text_encoder_module_dropout,
-            init_weights=True,
-        )
+    elif args.train_text_encoder and args.adapter != "full":
+        config = create_te_adapter_config(args)
         text_encoder = get_peft_model(text_encoder, config)
         text_encoder.print_trainable_parameters()
         print(text_encoder)
@@ -761,7 +911,7 @@ def main(args):
 
     if args.gradient_checkpointing:
         unet.enable_gradient_checkpointing()
-        if args.train_text_encoder and not args.use_loha:
+        if args.train_text_encoder and not args.adapter != "full":
             text_encoder.gradient_checkpointing_enable()
 
     # Enable TF32 for faster training on Ampere GPUs,
@@ -1018,6 +1168,10 @@ def main(args):
                     pipeline = pipeline.to(accelerator.device)
                     pipeline.set_progress_bar_config(disable=True)
 
+                    # Set evaliation mode
+                    pipeline.unet.eval()
+                    pipeline.text_encoder.eval()
+
                     # run inference
                     if args.seed is not None:
                         generator = torch.Generator(device=accelerator.device).manual_seed(args.seed)
@@ -1044,6 +1198,10 @@ def main(args):
                                 }
                             )
 
+                    # Set evaliation mode
+                    pipeline.unet.train()
+                    pipeline.text_encoder.train()
+
                     del pipeline
                     torch.cuda.empty_cache()
 
@@ -1071,7 +1229,7 @@ def main(args):
     # Create the pipeline using using the trained modules and save it.
     accelerator.wait_for_everyone()
     if accelerator.is_main_process:
-        if args.use_loha:
+        if args.adapter != "full":
             unwarpped_unet = accelerator.unwrap_model(unet)
             unwarpped_unet.save_pretrained(
                 os.path.join(args.output_dir, "unet"), state_dict=accelerator.get_state_dict(unet)

From ddfae52641143645c774be03418053660dab4d4d Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Wed, 4 Oct 2023 18:39:57 +0300
Subject: [PATCH 08/33] Updated conversion script to incorporate LoKr

---
 .../convert_sd_adapter_to_peft.py             | 201 ++++++++++++++++--
 1 file changed, 187 insertions(+), 14 deletions(-)

diff --git a/examples/stable_diffusion/convert_sd_adapter_to_peft.py b/examples/stable_diffusion/convert_sd_adapter_to_peft.py
index d15537b348..e0bc2e7716 100644
--- a/examples/stable_diffusion/convert_sd_adapter_to_peft.py
+++ b/examples/stable_diffusion/convert_sd_adapter_to_peft.py
@@ -1,15 +1,19 @@
 import argparse
+import json
 import os
 from collections import Counter
 from dataclasses import dataclass
+from functools import reduce
 from typing import Dict, List, Optional, Union
 
 import safetensors
 import torch
+import torch.nn as nn
 from diffusers import UNet2DConditionModel
 from transformers import CLIPTextModel
 
-from peft import LoHaConfig, LoraConfig, PeftType, get_peft_model, set_peft_model_state_dict
+from peft import LoHaConfig, LoKrConfig, LoraConfig, PeftType, get_peft_model, set_peft_model_state_dict
+from peft.tuners.lokr.layer import factorization
 
 
 # Default kohya_ss LoRA replacement modules
@@ -21,6 +25,11 @@
 PREFIX_TEXT_ENCODER = "lora_te"
 
 
+def get_module_by_name(module: Union[torch.Tensor, nn.Module], access_string: str):
+    names = access_string.split(sep=".")
+    return reduce(getattr, names, module)
+
+
 @dataclass
 class LoRAInfo:
     kohya_key: str
@@ -35,7 +44,7 @@ def peft_state_dict(self) -> Dict[str, torch.Tensor]:
             raise ValueError("At least one of lora_A or lora_B is None, they must both be provided")
         return {
             f"base_model.model{self.peft_key}.lora_A.weight": self.lora_A,
-            f"base_model.model.{self.peft_key}.lora_B.weight": self.lora_A,
+            f"base_model.model.{self.peft_key}.lora_B.weight": self.lora_B,
         }
 
 
@@ -73,7 +82,49 @@ def peft_state_dict(self) -> Dict[str, torch.Tensor]:
         return state_dict
 
 
-def construct_peft_loraconfig(info: Dict[str, LoRAInfo]) -> LoraConfig:
+@dataclass
+class LoKrInfo:
+    kohya_key: str
+    peft_key: str
+    alpha: Optional[float] = None
+    rank: Optional[int] = None
+    lokr_w1: Optional[torch.Tensor] = None
+    lokr_w1_a: Optional[torch.Tensor] = None
+    lokr_w1_b: Optional[torch.Tensor] = None
+    lokr_w2: Optional[torch.Tensor] = None
+    lokr_w2_a: Optional[torch.Tensor] = None
+    lokr_w2_b: Optional[torch.Tensor] = None
+    lokr_t2: Optional[torch.Tensor] = None
+
+    def peft_state_dict(self) -> Dict[str, torch.Tensor]:
+        if (self.lokr_w1 is None and self.lokr_w1_a is None and self.lokr_w1_b is None) or (
+            self.lokr_w2 is None and self.lokr_w2_a is None and self.lokr_w2_b is None
+        ):
+            raise ValueError(
+                "At least one of lokr_w1, lokr_w1_a, lokr_w1_b, lokr_w2, lokr_w2_a, lokr_w2_b is missing, they all must be provided"
+            )
+
+        state_dict = {}
+
+        if self.lokr_w1 is not None:
+            state_dict[f"base_model.model.{self.peft_key}.lokr_w1"] = self.lokr_w1
+        elif self.lokr_w1_a is not None:
+            state_dict[f"base_model.model.{self.peft_key}.lokr_w1_a"] = self.lokr_w1_a
+            state_dict[f"base_model.model.{self.peft_key}.lokr_w1_b"] = self.lokr_w1_b
+
+        if self.lokr_w2 is not None:
+            state_dict[f"base_model.model.{self.peft_key}.lokr_w2"] = self.lokr_w2
+        elif self.lokr_w2_a is not None:
+            state_dict[f"base_model.model.{self.peft_key}.lokr_w2_a"] = self.lokr_w2_a
+            state_dict[f"base_model.model.{self.peft_key}.lokr_w2_b"] = self.lokr_w2_b
+
+        if self.lokr_t2 is not None:
+            state_dict[f"base_model.model.{self.peft_key}.lokr_t2"] = self.lokr_t2
+
+        return state_dict
+
+
+def construct_peft_loraconfig(info: Dict[str, LoRAInfo], **kwargs) -> LoraConfig:
     """Constructs LoraConfig from data extracted from adapter checkpoint
 
     Args:
@@ -91,8 +142,8 @@ def construct_peft_loraconfig(info: Dict[str, LoRAInfo]) -> LoraConfig:
     target_modules = sorted(info.keys())
 
     # Determine most common rank and alpha
-    r = Counter(ranks.values()).most_common(1)[0]
-    lora_alpha = Counter(alphas.values()).most_common(1)[0]
+    r = int(Counter(ranks.values()).most_common(1)[0][0])
+    lora_alpha = Counter(alphas.values()).most_common(1)[0][0]
 
     # Determine which modules have different rank and alpha
     rank_pattern = dict(sorted(filter(lambda x: x[1] != r, ranks.items()), key=lambda x: x[0]))
@@ -112,7 +163,7 @@ def construct_peft_loraconfig(info: Dict[str, LoRAInfo]) -> LoraConfig:
     return config
 
 
-def construct_peft_lohaconfig(info: Dict[str, LoHaInfo]) -> LoHaConfig:
+def construct_peft_lohaconfig(info: Dict[str, LoHaInfo], **kwargs) -> LoHaConfig:
     """Constructs LoHaConfig from data extracted from adapter checkpoint
 
     Args:
@@ -130,8 +181,8 @@ def construct_peft_lohaconfig(info: Dict[str, LoHaInfo]) -> LoHaConfig:
     target_modules = sorted(info.keys())
 
     # Determine most common rank and alpha
-    r = Counter(ranks.values()).most_common(1)[0]
-    alpha = Counter(alphas.values()).most_common(1)[0]
+    r = int(Counter(ranks.values()).most_common(1)[0][0])
+    alpha = Counter(alphas.values()).most_common(1)[0][0]
 
     # Determine which modules have different rank and alpha
     rank_pattern = dict(sorted(filter(lambda x: x[1] != r, ranks.items()), key=lambda x: x[0]))
@@ -155,6 +206,77 @@ def construct_peft_lohaconfig(info: Dict[str, LoHaInfo]) -> LoHaConfig:
     return config
 
 
+def construct_peft_lokrconfig(info: Dict[str, LoKrInfo], decompose_factor: int = -1, **kwargs) -> LoKrConfig:
+    """Constructs LoKrConfig from data extracted from adapter checkpoint
+
+    Args:
+        info (Dict[str, LoKrInfo]): Information extracted from adapter checkpoint
+
+    Returns:
+        LoKrConfig: config for constructing LoKr
+    """
+
+    # Unpack all ranks and alphas
+    ranks = {x[0]: x[1].rank for x in info.items()}
+    alphas = {x[0]: x[1].alpha or x[1].rank for x in info.items()}
+
+    # Determine which modules needs to be transformed
+    target_modules = sorted(info.keys())
+
+    # Determine most common rank and alpha
+    r = int(Counter(ranks.values()).most_common(1)[0][0])
+    alpha = Counter(alphas.values()).most_common(1)[0][0]
+
+    # Determine which modules have different rank and alpha
+    rank_pattern = dict(sorted(filter(lambda x: x[1] != r, ranks.items()), key=lambda x: x[0]))
+    alpha_pattern = dict(sorted(filter(lambda x: x[1] != alpha, alphas.items()), key=lambda x: x[0]))
+
+    # Determine whether any of modules have effective conv2d decomposition
+    use_effective_conv2d = any(((val.lokr_t2 is not None) for val in info.values()))
+
+    # decompose_both should be enabled if any w1 matrix in any layer is decomposed into 2
+    decompose_both = any((val.lokr_w1_a is not None and val.lokr_w1_b is not None) for val in info.values())
+
+    # Determining decompose factor is a bit tricky (but it is most often -1)
+    # Check that decompose_factor is equal to provided
+    for val in info.values():
+        # Determine shape of first matrix
+        if val.lokr_w1 is not None:
+            w1_shape = tuple(val.lokr_w1.shape)
+        else:
+            w1_shape = (val.lokr_w1_a.shape[0], val.lokr_w1_b.shape[1])
+
+        # Determine shape of second matrix
+        if val.lokr_w2 is not None:
+            w2_shape = tuple(val.lokr_w2.shape[:2])
+        elif val.lokr_t2 is not None:
+            w2_shape = (val.lokr_w2_a.shape[1], val.lokr_w2_b.shape[1])
+        else:
+            # We may iterate over Conv2d layer, for which second item in shape is multiplied by ksize^2
+            w2_shape = (val.lokr_w2_a.shape[0], val.lokr_w2_b.shape[1])
+
+        # We need to check, whether decompose_factor is really -1 or not
+        shape = (w1_shape[0], w2_shape[0])
+        if factorization(shape[0] * shape[1], factor=-1) != shape:
+            raise ValueError("Cannot infer decompose_factor, probably it is not equal to -1")
+
+    config = LoKrConfig(
+        r=r,
+        alpha=alpha,
+        target_modules=target_modules,
+        rank_dropout=0.0,
+        module_dropout=0.0,
+        init_weights=False,
+        rank_pattern=rank_pattern,
+        alpha_pattern=alpha_pattern,
+        use_effective_conv2d=use_effective_conv2d,
+        decompose_both=decompose_both,
+        decompose_factor=decompose_factor,
+    )
+
+    return config
+
+
 def combine_peft_state_dict(info: Dict[str, Union[LoRAInfo, LoHaInfo]]) -> Dict[str, torch.Tensor]:
     result = {}
     for key_info in info.values():
@@ -179,7 +301,7 @@ def detect_adapter_type(keys: List[str]) -> PeftType:
         elif any(x in key for x in ["lokr_w1", "lokr_w2", "lokr_t1", "lokr_t2"]):
             # LoKr may have the following keys:
             # lokr_w1, lokr_w2, lokr_w1_a, lokr_w1_b, lokr_w2_a, lokr_w2_b, lokr_t1, lokr_t2
-            raise ValueError("Currently LoKr adapters are not implemented")
+            return PeftType.LOKR
         elif "diff" in key:
             raise ValueError("Currently full diff adapters are not implemented")
         else:
@@ -221,22 +343,40 @@ def detect_adapter_type(keys: List[str]) -> PeftType:
             }
         )
 
-    # Store conversion info (model_type -> peft_key -> LoRAInfo | LoHaInfo)
-    adapter_info: Dict[str, Dict[str, Union[LoRAInfo, LoHaInfo]]] = {
+    # Store conversion info (model_type -> peft_key -> LoRAInfo | LoHaInfo | LoKrInfo)
+    adapter_info: Dict[str, Dict[str, Union[LoRAInfo, LoHaInfo, LoKrInfo]]] = {
         "text_encoder": {},
         "unet": {},
     }
 
+    # Store decompose_factor for LoKr
+    decompose_factor = -1
+
     # Open adapter checkpoint
     with safetensors.safe_open(args.adapter_path, framework="pt", device="cpu") as f:
         # Extract information about adapter structure
         metadata = f.metadata()
 
+        # It may be difficult to determine rank for LoKr adapters
+        # If checkpoint was trained with large rank it may not be utilized during weights creation at all
+        # So we need to get it from checkpoint metadata (along with decompose_factor)
+        rank, conv_rank = None, None
+        if metadata is not None:
+            rank = metadata.get("ss_network_dim", None)
+            rank = int(rank) if rank else None
+            if "ss_network_args" in metadata:
+                network_args = json.loads(metadata["ss_network_args"])
+                conv_rank = network_args.get("conv_dim", None)
+                conv_rank = int(conv_rank) if conv_rank else rank
+                decompose_factor = network_args.get("factor", -1)
+                decompose_factor = int(decompose_factor)
+
         # Detect adapter type based on keys
         adapter_type = detect_adapter_type(f.keys())
         adapter_info_cls = {
             PeftType.LORA: LoRAInfo,
             PeftType.LOHA: LoHaInfo,
+            PeftType.LOKR: LoKrInfo,
         }[adapter_type]
 
         # Iterate through available info and unpack all the values
@@ -245,9 +385,9 @@ def detect_adapter_type(keys: List[str]) -> PeftType:
 
             # Find which model this key belongs to
             if kohya_key.startswith(PREFIX_TEXT_ENCODER):
-                model_type = "text_encoder"
+                model_type, model = "text_encoder", text_encoder
             elif kohya_key.startswith(PREFIX_UNET):
-                model_type = "unet"
+                model_type, model = "unet", unet
             else:
                 raise ValueError(f"Cannot determine model for key: {key}")
 
@@ -256,6 +396,9 @@ def detect_adapter_type(keys: List[str]) -> PeftType:
                 raise ValueError(f"Cannot find corresponding key for diffusers/transformers model: {kohya_key}")
             peft_key = models_keys[kohya_key]
 
+            # Retrieve corresponding layer of model
+            layer = get_module_by_name(model, peft_key)
+
             # Create a corresponding adapter info
             if peft_key not in adapter_info[model_type]:
                 adapter_info[model_type][peft_key] = adapter_info_cls(kohya_key=kohya_key, peft_key=peft_key)
@@ -285,6 +428,35 @@ def detect_adapter_type(keys: List[str]) -> PeftType:
             elif kohya_type == "hada_t2":
                 adapter_info[model_type][peft_key].hada_t2 = tensor
                 adapter_info[model_type][peft_key].rank = tensor.shape[0]
+            elif kohya_type == "lokr_t2":
+                adapter_info[model_type][peft_key].lokr_t2 = tensor
+                adapter_info[model_type][peft_key].rank = tensor.shape[0]
+            elif kohya_type == "lokr_w1":
+                adapter_info[model_type][peft_key].lokr_w1 = tensor
+                if isinstance(layer, nn.Linear) or (
+                    isinstance(layer, nn.Conv2d) and tuple(layer.weight.shape[2:]) == (1, 1)
+                ):
+                    adapter_info[model_type][peft_key].rank = rank
+                elif isinstance(layer, nn.Conv2d):
+                    adapter_info[model_type][peft_key].rank = conv_rank
+            elif kohya_type == "lokr_w2":
+                adapter_info[model_type][peft_key].lokr_w2 = tensor
+                if isinstance(layer, nn.Linear) or (
+                    isinstance(layer, nn.Conv2d) and tuple(layer.weight.shape[2:]) == (1, 1)
+                ):
+                    adapter_info[model_type][peft_key].rank = rank
+                elif isinstance(layer, nn.Conv2d):
+                    adapter_info[model_type][peft_key].rank = conv_rank
+            elif kohya_type == "lokr_w1_a":
+                adapter_info[model_type][peft_key].lokr_w1_a = tensor
+                adapter_info[model_type][peft_key].rank = tensor.shape[1]
+            elif kohya_type == "lokr_w1_b":
+                adapter_info[model_type][peft_key].lokr_w1_b = tensor
+                adapter_info[model_type][peft_key].rank = tensor.shape[0]
+            elif kohya_type == "lokr_w2_a":
+                adapter_info[model_type][peft_key].lokr_w2_a = tensor
+            elif kohya_type == "lokr_w2_b":
+                adapter_info[model_type][peft_key].lokr_w2_b = tensor
             else:
                 raise ValueError(f"Unknown weight name in key: {key} - {kohya_type}")
 
@@ -292,11 +464,12 @@ def detect_adapter_type(keys: List[str]) -> PeftType:
     construct_config_fn = {
         PeftType.LORA: construct_peft_loraconfig,
         PeftType.LOHA: construct_peft_lohaconfig,
+        PeftType.LOKR: construct_peft_lokrconfig,
     }[adapter_type]
 
     # Process each model sequentially
     for model, model_name in [(text_encoder, "text_encoder"), (unet, "unet")]:
-        config = construct_config_fn(adapter_info[model_name])
+        config = construct_config_fn(adapter_info[model_name], decompose_factor=decompose_factor)
         model = get_peft_model(model, config)
         set_peft_model_state_dict(model, combine_peft_state_dict(adapter_info[model_name]))
 

From 7526aa224f5fb06d1b5bfe5e94d14f83094f7f06 Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Wed, 4 Oct 2023 18:57:06 +0300
Subject: [PATCH 09/33] Added simple tests for LoKr adapter

---
 tests/test_custom_models.py   | 21 ++++++++++++++++++++-
 tests/test_stablediffusion.py |  4 +++-
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py
index 65dab6c66f..1f0e089327 100644
--- a/tests/test_custom_models.py
+++ b/tests/test_custom_models.py
@@ -23,7 +23,7 @@
 from torch import nn
 from transformers.pytorch_utils import Conv1D
 
-from peft import AdaLoraConfig, IA3Config, LoHaConfig, LoraConfig, PeftModel, get_peft_model
+from peft import AdaLoraConfig, IA3Config, LoHaConfig, LoKrConfig, LoraConfig, PeftModel, get_peft_model
 from peft.tuners.tuners_utils import BaseTunerLayer
 
 from .testing_common import PeftCommonTester
@@ -73,6 +73,24 @@
     ),
     ("Conv2d 1 LOHA", "Conv2d", LoHaConfig, {"target_modules": ["conv2d"]}),
     ("Conv2d 2 LOHA", "Conv2d", LoHaConfig, {"target_modules": ["conv2d", "lin0"]}),
+    # LoKr
+    ("Vanilla MLP 1 LOKR", "MLP", LoKrConfig, {"target_modules": "lin0"}),
+    ("Vanilla MLP 2 LOKR", "MLP", LoKrConfig, {"target_modules": ["lin0"]}),
+    ("Vanilla MLP 3 LOKR", "MLP", LoKrConfig, {"target_modules": ["lin1"]}),
+    ("Vanilla MLP 4 LOKR", "MLP", LoKrConfig, {"target_modules": ["lin0", "lin1"]}),
+    ("Vanilla MLP 5 LOKR", "MLP", LoKrConfig, {"target_modules": ["lin0"], "modules_to_save": ["lin1"]}),
+    (
+        "Vanilla MLP 6 LOKR",
+        "MLP",
+        LoKrConfig,
+        {
+            "target_modules": ["lin0"],
+            "alpha": 4,
+            "module_dropout": 0.1,
+        },
+    ),
+    ("Conv2d 1 LOKR", "Conv2d", LoKrConfig, {"target_modules": ["conv2d"]}),
+    ("Conv2d 2 LOKR", "Conv2d", LoKrConfig, {"target_modules": ["conv2d", "lin0"]}),
 ]
 
 MULTIPLE_ACTIVE_ADAPTERS_TEST_CASES = [
@@ -138,6 +156,7 @@
 PREFIXES = {
     LoraConfig: "lora_",
     LoHaConfig: "hada_",
+    LoKrConfig: "lokr_",
 }
 
 
diff --git a/tests/test_stablediffusion.py b/tests/test_stablediffusion.py
index 5910287927..f14717326e 100644
--- a/tests/test_stablediffusion.py
+++ b/tests/test_stablediffusion.py
@@ -64,6 +64,7 @@
 CLASSES_MAPPING = {
     "lora": (LoraConfig, CONFIG_TESTING_KWARGS[0]),
     "loha": (LoHaConfig, CONFIG_TESTING_KWARGS[1]),
+    "lokr": (LoHaConfig, CONFIG_TESTING_KWARGS[1]),
 }
 
 
@@ -143,7 +144,7 @@ def test_merge_layers(self, test_name, model_id, config_cls, config_kwargs):
                 "model_ids": PEFT_DIFFUSERS_SD_MODELS_TO_TEST,
                 "lora_kwargs": {"init_lora_weights": [False]},
             },
-            filter_params_func=lambda tests: [x for x in tests if "loha" not in x[0]],
+            filter_params_func=lambda tests: [x for x in tests if all(s not in x[0] for s in ["loha", "lokr"])],
         )
     )
     def test_add_weighted_adapter_base_unchanged(self, test_name, model_id, config_cls, config_kwargs):
@@ -172,6 +173,7 @@ def test_add_weighted_adapter_base_unchanged(self, test_name, model_id, config_c
                 "model_ids": PEFT_DIFFUSERS_SD_MODELS_TO_TEST,
                 "lora_kwargs": {"init_lora_weights": [False]},
                 "loha_kwargs": {"init_weights": [False]},
+                "lokr_kwargs": {"init_weights": [False]},
             },
         )
     )

From 8dc5e9881ce8ce41097efc8fbe86dbcb0630a9b2 Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Mon, 9 Oct 2023 19:28:26 +0300
Subject: [PATCH 10/33] Modified 'merged' property

---
 src/peft/tuners/lokr/layer.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py
index 92da95f80d..9a6cca0ec2 100644
--- a/src/peft/tuners/lokr/layer.py
+++ b/src/peft/tuners/lokr/layer.py
@@ -55,10 +55,13 @@ def __init__(self):
         self.module_dropout = {}
 
         # Tuner info
-        self.merged = False
         self._disable_adapters = False
         self.merged_adapters = []
 
+    @property
+    def merged(self) -> bool:
+        return bool(self.merged_adapters)
+
     def _init_empty_weights(self, cls, *args, **kwargs) -> None:
         # A helper method that allows to initialize the layer of the given class without spending time to initialize the
         # model weights. The implementation is inspired by
@@ -246,7 +249,6 @@ def merge(self) -> None:
             if active_adapter in self._available_adapters:
                 self.weight.data += self.get_delta_weight(active_adapter)
                 self.merged_adapters.append(active_adapter)
-                self.merged = True
 
     def unmerge(self) -> None:
         if not self.merged:
@@ -256,7 +258,6 @@ def unmerge(self) -> None:
             active_adapter = self.merged_adapters.pop()
             if active_adapter in self._available_adapters:
                 self.weight.data -= self.get_delta_weight(active_adapter)
-                self.merged = False
 
     def _op(self, x: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
         raise NotImplementedError

From c1cef3835c2eee52b21497a91127688ee67350c0 Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Mon, 9 Oct 2023 19:42:20 +0300
Subject: [PATCH 11/33] Removed duplicated comments

---
 src/peft/tuners/lokr/model.py | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/src/peft/tuners/lokr/model.py b/src/peft/tuners/lokr/model.py
index 458978176f..0f93b2fba3 100644
--- a/src/peft/tuners/lokr/model.py
+++ b/src/peft/tuners/lokr/model.py
@@ -13,21 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# coding=utf-8
-# Copyright 2023-present the HuggingFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 import re
 import warnings
 from itertools import chain

From ad525e4b5a1b584fdd1d54ca6be981c175dbd8b7 Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Tue, 10 Oct 2023 12:34:57 +0300
Subject: [PATCH 12/33] Replaced wrong keys for LoKr

---
 src/peft/tuners/lokr/model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/peft/tuners/lokr/model.py b/src/peft/tuners/lokr/model.py
index 0f93b2fba3..78fe71bde3 100644
--- a/src/peft/tuners/lokr/model.py
+++ b/src/peft/tuners/lokr/model.py
@@ -214,7 +214,7 @@ def _replace_module(parent, child_name, new_module, child):
 
         # dispatch to correct device
         for name, module in new_module.named_modules():
-            if "hada_" in name:
+            if "lokr_" in name:
                 module.to(child.weight.device)
 
     def _mark_only_adapters_as_trainable(self) -> None:
@@ -230,7 +230,7 @@ def _unload_and_optionally_merge(self, merge=True, progressbar: bool = False):
             if getattr(self.model, "quantization_method", None) == "gptq":
                 raise ValueError("Cannot merge LOKR layers when the model is gptq quantized")
 
-        key_list = [key for key, _ in self.model.named_modules() if "hada" not in key]
+        key_list = [key for key, _ in self.model.named_modules() if "lokr" not in key]
         desc = "Unloading " + ("and merging " if merge else "") + "model"
         for key in tqdm(key_list, disable=not progressbar, desc=desc):
             try:

From ba458816b77f015363fc569eca9a213a16b9a0fb Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Tue, 10 Oct 2023 16:36:45 +0300
Subject: [PATCH 13/33] Refactored LoHaModel and LoKrModel

---
 src/peft/tuners/loha/config.py   |   5 +-
 src/peft/tuners/loha/layer.py    |  13 +-
 src/peft/tuners/loha/model.py    | 163 +---------------------
 src/peft/tuners/lokr/config.py   |   5 +-
 src/peft/tuners/lokr/layer.py    |  13 +-
 src/peft/tuners/lokr/model.py    | 165 +----------------------
 src/peft/tuners/lycoris_utils.py | 224 +++++++++++++++++++++++++++++++
 7 files changed, 244 insertions(+), 344 deletions(-)
 create mode 100644 src/peft/tuners/lycoris_utils.py

diff --git a/src/peft/tuners/loha/config.py b/src/peft/tuners/loha/config.py
index 9081883461..00d24c374b 100644
--- a/src/peft/tuners/loha/config.py
+++ b/src/peft/tuners/loha/config.py
@@ -16,12 +16,13 @@
 from dataclasses import dataclass, field
 from typing import List, Optional, Union
 
-from peft.config import PeftConfig
+# from peft.config import PeftConfig
+from peft.tuners.lycoris_utils import LyCORISConfig
 from peft.utils import PeftType
 
 
 @dataclass
-class LoHaConfig(PeftConfig):
+class LoHaConfig(LyCORISConfig):
     """
     This is the configuration class to store the configuration of a [`LoHaModel`].
 
diff --git a/src/peft/tuners/loha/layer.py b/src/peft/tuners/loha/layer.py
index ec9e5fc694..0e5ee1af00 100644
--- a/src/peft/tuners/loha/layer.py
+++ b/src/peft/tuners/loha/layer.py
@@ -21,14 +21,15 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from peft.tuners.tuners_utils import BaseTunerLayer
+from peft.tuners.lycoris_utils import LyCORISLayer
 
 
-class LoHaLayer(BaseTunerLayer, nn.Module):
+class LoHaLayer(LyCORISLayer, nn.Module):
     # List all names of layers that may contain adapter weights
     adapter_layer_names = ["hada_w1_a", "hada_w1_b", "hada_w2_a", "hada_w2_b", "hada_t1", "hada_t2"]
 
     def __init__(self):
+        LyCORISLayer.__init__(self)
         super(nn.Module, self).__init__()
 
         # LoHa info
@@ -44,14 +45,6 @@ def __init__(self):
         self.rank_dropout = {}
         self.module_dropout = {}
 
-        # Tuner info
-        self._disable_adapters = False
-        self.merged_adapters = []
-
-    @property
-    def merged(self) -> bool:
-        return bool(self.merged_adapters)
-
     def _init_empty_weights(self, cls, *args, **kwargs) -> None:
         # A helper method that allows to initialize the layer of the given class without spending time to initialize the
         # model weights. The implementation is inspired by
diff --git a/src/peft/tuners/loha/model.py b/src/peft/tuners/loha/model.py
index c9403b76eb..824649f1b3 100644
--- a/src/peft/tuners/loha/model.py
+++ b/src/peft/tuners/loha/model.py
@@ -13,25 +13,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import re
-import warnings
-from itertools import chain
-from typing import Union
-
 import torch
-from torch import nn
-from tqdm import tqdm
-
-from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists
-from peft.utils import (
-    ModulesToSaveWrapper,
-    _get_submodules,
-)
 
+from ..lycoris_utils import LyCORISTuner
 from .layer import Conv2d, Linear, LoHaLayer
 
 
-class LoHaModel(BaseTuner):
+class LoHaModel(LyCORISTuner):
     """
     Creates Low-Rank Hadamard Product model from a pretrained model. The method is partially described in
     https://arxiv.org/abs/2108.06098 Current implementation heavily borrows from
@@ -87,83 +75,10 @@ class LoHaModel(BaseTuner):
         - **peft_config** ([`LoHaConfig`]): The configuration of the LoHa model.
     """
 
-    def __init__(self, model, config, adapter_name):
-        super().__init__(model, config, adapter_name)
-
-    def __getattr__(self, name: str):
-        """Forward missing attributes to the wrapped module."""
-        try:
-            return super().__getattr__(name)  # defer to nn.Module's logic
-        except AttributeError:
-            return getattr(self.model, name)
-
-    def _set_adapter_layers(self, enabled=True):
-        for module in self.model.modules():
-            if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)):
-                module.enable_adapters(enabled)
-
-    def enable_adapter_layers(self):
-        self._set_adapter_layers(enabled=True)
-
-    def disable_adapter_layers(self):
-        self._set_adapter_layers(enabled=False)
-
-    def set_adapter(self, adapter_name):
-        for module in self.model.modules():
-            if isinstance(module, LoHaLayer):
-                if module.merged:
-                    warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
-                    module.unmerge()
-                module.set_adapter(adapter_name)
+    prefix: str = "hada_"
 
     @staticmethod
-    def _prepare_adapter_config(peft_config, model_config):
-        if peft_config.target_modules is None:
-            raise ValueError("Please specify `target_modules` in `peft_config`")
-        return peft_config
-
-    @staticmethod
-    def _check_target_module_exists(loha_config, key):
-        return check_target_module_exists(loha_config, key)
-
-    def _create_and_replace(
-        self,
-        loha_config,
-        adapter_name: str,
-        target: Union[LoHaLayer, nn.Module],
-        target_name,
-        parent,
-        current_key,
-        **optional_kwargs,
-    ):
-        """
-        A private method to create and replace the target module with the adapter module.
-        """
-
-        # Regexp matching - Find key which matches current target_name in patterns provided
-        pattern_keys = list(chain(loha_config.rank_pattern.keys(), loha_config.alpha_pattern.keys()))
-        target_name_key = next(filter(lambda key: re.match(f"(.*\.)?{key}$", current_key), pattern_keys), target_name)
-
-        r = loha_config.rank_pattern.get(target_name_key, loha_config.r)
-        alpha = loha_config.alpha_pattern.get(target_name_key, loha_config.alpha)
-
-        kwargs = {
-            "r": r,
-            "alpha": alpha,
-            "rank_dropout": loha_config.rank_dropout,
-            "module_dropout": loha_config.module_dropout,
-            "use_effective_conv2d": loha_config.use_effective_conv2d,
-            "init_weights": loha_config.init_weights,
-        }
-
-        if isinstance(target, LoHaLayer):
-            target.update_layer(adapter_name, **kwargs)
-        else:
-            new_module = self._create_new_module(loha_config, adapter_name, target, **kwargs)
-            self._replace_module(parent, target_name, new_module, target)
-
-    @staticmethod
-    def _create_new_module(loha_config, adapter_name, target, **kwargs) -> LoHaLayer:
+    def _create_new_module(config, adapter_name, target, **kwargs) -> LoHaLayer:
         if isinstance(target, torch.nn.Conv2d):
             new_module = Conv2d(
                 target.in_channels,
@@ -195,73 +110,3 @@ def _create_new_module(loha_config, adapter_name, target, **kwargs) -> LoHaLayer
                 "Target module not found, currently only adapters for nn.Linear and nn.Conv2d are supported"
             )
         return new_module
-
-    @staticmethod
-    def _replace_module(parent, child_name, new_module, child):
-        setattr(parent, child_name, new_module)
-        # It's not necessary to set requires_grad here, as that is handled by
-        # _mark_only_adapters_as_trainable
-        new_module.weight = child.weight
-        if hasattr(child, "bias"):
-            new_module.bias = child.bias
-
-        if getattr(child, "state", None) is not None:
-            new_module.state = child.state
-            new_module.to(child.weight.device)
-
-        # dispatch to correct device
-        for name, module in new_module.named_modules():
-            if "hada_" in name:
-                module.to(child.weight.device)
-
-    def _mark_only_adapters_as_trainable(self) -> None:
-        for n, p in self.model.named_parameters():
-            if "hada_" not in n:
-                p.requires_grad = False
-
-    def merge_and_unload(self, progressbar: bool = False):
-        return self._unload_and_optionally_merge(progressbar=progressbar)
-
-    def _unload_and_optionally_merge(self, merge=True, progressbar: bool = False):
-        if merge:
-            if getattr(self.model, "quantization_method", None) == "gptq":
-                raise ValueError("Cannot merge LOHA layers when the model is gptq quantized")
-
-        key_list = [key for key, _ in self.model.named_modules() if "hada" not in key]
-        desc = "Unloading " + ("and merging " if merge else "") + "model"
-        for key in tqdm(key_list, disable=not progressbar, desc=desc):
-            try:
-                parent, target, target_name = _get_submodules(self.model, key)
-            except AttributeError:
-                continue
-            if isinstance(target, LoHaLayer):
-                if isinstance(target, nn.Conv2d):
-                    new_module = torch.nn.Conv2d(
-                        target.in_channels,
-                        target.out_channels,
-                        kernel_size=target.kernel_size,
-                        stride=target.stride,
-                        padding=target.padding,
-                        dilation=target.dilation,
-                    )
-                elif isinstance(target, nn.Linear):
-                    bias = target.bias is not None
-                    new_module = torch.nn.Linear(
-                        target.in_features,
-                        target.out_features,
-                        bias=bias,
-                        device=target.weight.device,
-                    )
-                else:
-                    raise ValueError(
-                        "Cannot convert current module to torch module, currently only adapters for nn.Linear and nn.Conv2d are supported"
-                    )
-                if merge:
-                    target.merge()
-                self._replace_module(parent, target_name, new_module, target)
-
-            # save any additional trainable modules part of `modules_to_save`
-            if isinstance(target, ModulesToSaveWrapper):
-                setattr(parent, target_name, target.modules_to_save[target.active_adapter])
-
-        return self.model
diff --git a/src/peft/tuners/lokr/config.py b/src/peft/tuners/lokr/config.py
index c2e3d0536c..258fc277e2 100644
--- a/src/peft/tuners/lokr/config.py
+++ b/src/peft/tuners/lokr/config.py
@@ -16,12 +16,13 @@
 from dataclasses import dataclass, field
 from typing import List, Optional, Union
 
-from peft.config import PeftConfig
+# from peft.config import PeftConfig
+from peft.tuners.lycoris_utils import LyCORISConfig
 from peft.utils import PeftType
 
 
 @dataclass
-class LoKrConfig(PeftConfig):
+class LoKrConfig(LyCORISConfig):
     """
     This is the configuration class to store the configuration of a [`LoKrModel`].
 
diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py
index 9a6cca0ec2..b4995426ea 100644
--- a/src/peft/tuners/lokr/layer.py
+++ b/src/peft/tuners/lokr/layer.py
@@ -22,10 +22,10 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from peft.tuners.tuners_utils import BaseTunerLayer
+from peft.tuners.lycoris_utils import LyCORISLayer
 
 
-class LoKrLayer(BaseTunerLayer, nn.Module):
+class LoKrLayer(LyCORISLayer, nn.Module):
     # List all names of layers that may contain adapter weights
     adapter_layer_names = [
         "lokr_w1",
@@ -38,6 +38,7 @@ class LoKrLayer(BaseTunerLayer, nn.Module):
     ]
 
     def __init__(self):
+        LyCORISLayer.__init__(self)
         super(nn.Module, self).__init__()
 
         # LoKr info
@@ -54,14 +55,6 @@ def __init__(self):
         self.rank_dropout = {}
         self.module_dropout = {}
 
-        # Tuner info
-        self._disable_adapters = False
-        self.merged_adapters = []
-
-    @property
-    def merged(self) -> bool:
-        return bool(self.merged_adapters)
-
     def _init_empty_weights(self, cls, *args, **kwargs) -> None:
         # A helper method that allows to initialize the layer of the given class without spending time to initialize the
         # model weights. The implementation is inspired by
diff --git a/src/peft/tuners/lokr/model.py b/src/peft/tuners/lokr/model.py
index 78fe71bde3..7f329c1ccd 100644
--- a/src/peft/tuners/lokr/model.py
+++ b/src/peft/tuners/lokr/model.py
@@ -13,25 +13,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import re
-import warnings
-from itertools import chain
-from typing import Union
-
 import torch
-from torch import nn
-from tqdm import tqdm
-
-from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists
-from peft.utils import (
-    ModulesToSaveWrapper,
-    _get_submodules,
-)
 
+from ..lycoris_utils import LyCORISTuner
 from .layer import Conv2d, Linear, LoKrLayer
 
 
-class LoKrModel(BaseTuner):
+class LoKrModel(LyCORISTuner):
     """
     Creates Low-Rank Kronecker Product model from a pretrained model. The original method is partially described in
     https://arxiv.org/abs/2108.06098 and in https://arxiv.org/abs/2309.14859 Current implementation heavily borrows
@@ -88,85 +76,10 @@ class LoKrModel(BaseTuner):
         - **peft_config** ([`LoKrConfig`]): The configuration of the LoKr model.
     """
 
-    def __init__(self, model, config, adapter_name):
-        super().__init__(model, config, adapter_name)
-
-    def __getattr__(self, name: str):
-        """Forward missing attributes to the wrapped module."""
-        try:
-            return super().__getattr__(name)  # defer to nn.Module's logic
-        except AttributeError:
-            return getattr(self.model, name)
-
-    def _set_adapter_layers(self, enabled=True):
-        for module in self.model.modules():
-            if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)):
-                module.enable_adapters(enabled)
-
-    def enable_adapter_layers(self):
-        self._set_adapter_layers(enabled=True)
-
-    def disable_adapter_layers(self):
-        self._set_adapter_layers(enabled=False)
-
-    def set_adapter(self, adapter_name):
-        for module in self.model.modules():
-            if isinstance(module, LoKrLayer):
-                if module.merged:
-                    warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
-                    module.unmerge()
-                module.set_adapter(adapter_name)
+    prefix: str = "lokr_"
 
     @staticmethod
-    def _prepare_adapter_config(peft_config, model_config):
-        if peft_config.target_modules is None:
-            raise ValueError("Please specify `target_modules` in `peft_config`")
-        return peft_config
-
-    @staticmethod
-    def _check_target_module_exists(lokr_config, key):
-        return check_target_module_exists(lokr_config, key)
-
-    def _create_and_replace(
-        self,
-        lokr_config,
-        adapter_name: str,
-        target: Union[LoKrLayer, nn.Module],
-        target_name,
-        parent,
-        current_key,
-        **optional_kwargs,
-    ):
-        """
-        A private method to create and replace the target module with the adapter module.
-        """
-
-        # Regexp matching - Find key which matches current target_name in patterns provided
-        pattern_keys = list(chain(lokr_config.rank_pattern.keys(), lokr_config.alpha_pattern.keys()))
-        target_name_key = next(filter(lambda key: re.match(f"(.*\.)?{key}$", current_key), pattern_keys), target_name)
-
-        r = lokr_config.rank_pattern.get(target_name_key, lokr_config.r)
-        alpha = lokr_config.alpha_pattern.get(target_name_key, lokr_config.alpha)
-
-        kwargs = {
-            "r": r,
-            "alpha": alpha,
-            "rank_dropout": lokr_config.rank_dropout,
-            "module_dropout": lokr_config.module_dropout,
-            "use_effective_conv2d": lokr_config.use_effective_conv2d,
-            "init_weights": lokr_config.init_weights,
-            "decompose_both": lokr_config.decompose_both,
-            "decompose_factor": lokr_config.decompose_factor,
-        }
-
-        if isinstance(target, LoKrLayer):
-            target.update_layer(adapter_name, **kwargs)
-        else:
-            new_module = self._create_new_module(lokr_config, adapter_name, target, **kwargs)
-            self._replace_module(parent, target_name, new_module, target)
-
-    @staticmethod
-    def _create_new_module(lokr_config, adapter_name, target, **kwargs) -> LoKrLayer:
+    def _create_new_module(config, adapter_name, target, **kwargs) -> LoKrLayer:
         if isinstance(target, torch.nn.Conv2d):
             new_module = Conv2d(
                 target.in_channels,
@@ -198,73 +111,3 @@ def _create_new_module(lokr_config, adapter_name, target, **kwargs) -> LoKrLayer
                 "Target module not found, currently only adapters for nn.Linear and nn.Conv2d are supported"
             )
         return new_module
-
-    @staticmethod
-    def _replace_module(parent, child_name, new_module, child):
-        setattr(parent, child_name, new_module)
-        # It's not necessary to set requires_grad here, as that is handled by
-        # _mark_only_adapters_as_trainable
-        new_module.weight = child.weight
-        if hasattr(child, "bias"):
-            new_module.bias = child.bias
-
-        if getattr(child, "state", None) is not None:
-            new_module.state = child.state
-            new_module.to(child.weight.device)
-
-        # dispatch to correct device
-        for name, module in new_module.named_modules():
-            if "lokr_" in name:
-                module.to(child.weight.device)
-
-    def _mark_only_adapters_as_trainable(self) -> None:
-        for n, p in self.model.named_parameters():
-            if "lokr_" not in n:
-                p.requires_grad = False
-
-    def merge_and_unload(self, progressbar: bool = False):
-        return self._unload_and_optionally_merge(progressbar=progressbar)
-
-    def _unload_and_optionally_merge(self, merge=True, progressbar: bool = False):
-        if merge:
-            if getattr(self.model, "quantization_method", None) == "gptq":
-                raise ValueError("Cannot merge LOKR layers when the model is gptq quantized")
-
-        key_list = [key for key, _ in self.model.named_modules() if "lokr" not in key]
-        desc = "Unloading " + ("and merging " if merge else "") + "model"
-        for key in tqdm(key_list, disable=not progressbar, desc=desc):
-            try:
-                parent, target, target_name = _get_submodules(self.model, key)
-            except AttributeError:
-                continue
-            if isinstance(target, LoKrLayer):
-                if isinstance(target, nn.Conv2d):
-                    new_module = torch.nn.Conv2d(
-                        target.in_channels,
-                        target.out_channels,
-                        kernel_size=target.kernel_size,
-                        stride=target.stride,
-                        padding=target.padding,
-                        dilation=target.dilation,
-                    )
-                elif isinstance(target, nn.Linear):
-                    bias = target.bias is not None
-                    new_module = torch.nn.Linear(
-                        target.in_features,
-                        target.out_features,
-                        bias=bias,
-                        device=target.weight.device,
-                    )
-                else:
-                    raise ValueError(
-                        "Cannot convert current module to torch module, currently only adapters for nn.Linear and nn.Conv2d are supported"
-                    )
-                if merge:
-                    target.merge()
-                self._replace_module(parent, target_name, new_module, target)
-
-            # save any additional trainable modules part of `modules_to_save`
-            if isinstance(target, ModulesToSaveWrapper):
-                setattr(parent, target_name, target.modules_to_save[target.active_adapter])
-
-        return self.model
diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py
new file mode 100644
index 0000000000..2ed85872b8
--- /dev/null
+++ b/src/peft/tuners/lycoris_utils.py
@@ -0,0 +1,224 @@
+# coding=utf-8
+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+import warnings
+from dataclasses import dataclass, field
+from itertools import chain
+from typing import Optional, Union
+
+import torch
+import torch.nn as nn
+from tqdm import tqdm
+
+from peft.config import PeftConfig
+from peft.utils import (
+    ModulesToSaveWrapper,
+    _get_submodules,
+)
+
+from .tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists
+
+
+@dataclass
+class LyCORISConfig(PeftConfig):
+    r"""
+    A base config for LyCORIS like adapters
+    """
+    rank_pattern: Optional[dict] = field(
+        default_factory=dict,
+        metadata={
+            "help": (
+                "The mapping from layer names or regexp expression to ranks which are different from the default rank specified by `r`. "
+                "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 8`}"
+            )
+        },
+    )
+    alpha_pattern: Optional[dict] = field(
+        default_factory=dict,
+        metadata={
+            "help": (
+                "The mapping from layer names or regexp expression to alphas which are different from the default alpha specified by `alpha`. "
+                "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 32`}"
+            )
+        },
+    )
+
+
+class LyCORISLayer(BaseTunerLayer):
+    r"""
+    A base layer for LyCORIS like adapters
+    """
+
+    def __init__(self):
+        # Tuner info
+        self._disable_adapters = False
+        self.merged_adapters = []
+
+    @property
+    def merged(self) -> bool:
+        return bool(self.merged_adapters)
+
+    def update_layer(self, adapter_name: str, r: int, alpha: float, **kwargs):
+        ...
+
+
+class LyCORISTuner(BaseTuner):
+    r"""
+    A base tuner for LyCORIS like adapters
+    """
+
+    prefix: str
+
+    def __init__(self, model, config, adapter_name):
+        super().__init__(model, config, adapter_name)
+
+    def __getattr__(self, name: str):
+        """Forward missing attributes to the wrapped module."""
+        try:
+            return super().__getattr__(name)  # defer to nn.Module's logic
+        except AttributeError:
+            return getattr(self.model, name)
+
+    @staticmethod
+    def _check_target_module_exists(config, key):
+        return check_target_module_exists(config, key)
+
+    def _create_and_replace(
+        self,
+        config: LyCORISConfig,
+        adapter_name: str,
+        target: Union[LyCORISLayer, nn.Module],
+        target_name,
+        parent,
+        current_key,
+        **optional_kwargs,
+    ):
+        """
+        A private method to create and replace the target module with the adapter module.
+        """
+
+        # Regexp matching - Find key which matches current target_name in patterns provided
+        pattern_keys = list(chain(config.rank_pattern.keys(), config.alpha_pattern.keys()))
+        target_name_key = next(filter(lambda key: re.match(f"(.*\.)?{key}$", current_key), pattern_keys), target_name)
+
+        kwargs = config.to_dict()
+        kwargs["r"] = config.rank_pattern.get(target_name_key, config.r)
+        kwargs["alpha"] = config.alpha_pattern.get(target_name_key, config.alpha)
+
+        if isinstance(target, LyCORISLayer):
+            target.update_layer(adapter_name, **kwargs)
+        else:
+            new_module = self._create_new_module(config, adapter_name, target, **kwargs)
+            self._replace_module(parent, target_name, new_module, target)
+
+    @staticmethod
+    def _create_new_module(config: LyCORISConfig, adapter_name: str, target: nn.Module, **kwargs) -> LyCORISLayer:
+        ...
+
+    def _mark_only_adapters_as_trainable(self) -> None:
+        for n, p in self.model.named_parameters():
+            if self.prefix not in n:
+                p.requires_grad = False
+
+    @staticmethod
+    def _prepare_adapter_config(peft_config, model_config):
+        if peft_config.target_modules is None:
+            raise ValueError("Please specify `target_modules` in `peft_config`")
+        return peft_config
+
+    @classmethod
+    def _replace_module(cls, parent, child_name, new_module, child):
+        setattr(parent, child_name, new_module)
+        # It's not necessary to set requires_grad here, as that is handled by
+        # _mark_only_adapters_as_trainable
+        new_module.weight = child.weight
+        if hasattr(child, "bias"):
+            new_module.bias = child.bias
+
+        if getattr(child, "state", None) is not None:
+            new_module.state = child.state
+            new_module.to(child.weight.device)
+
+        # dispatch to correct device
+        for name, module in new_module.named_modules():
+            if cls.prefix in name:
+                module.to(child.weight.device)
+
+    def _set_adapter_layers(self, enabled=True):
+        for module in self.model.modules():
+            if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)):
+                module.enable_adapters(enabled)
+
+    def _unload_and_optionally_merge(self, merge=True, progressbar: bool = False):
+        if merge:
+            if getattr(self.model, "quantization_method", None) == "gptq":
+                raise ValueError("Cannot merge LOHA layers when the model is gptq quantized")
+
+        key_list = [key for key, _ in self.model.named_modules() if "hada" not in key]
+        desc = "Unloading " + ("and merging " if merge else "") + "model"
+        for key in tqdm(key_list, disable=not progressbar, desc=desc):
+            try:
+                parent, target, target_name = _get_submodules(self.model, key)
+            except AttributeError:
+                continue
+            if isinstance(target, LyCORISLayer):
+                if isinstance(target, nn.Conv2d):
+                    new_module = torch.nn.Conv2d(
+                        target.in_channels,
+                        target.out_channels,
+                        kernel_size=target.kernel_size,
+                        stride=target.stride,
+                        padding=target.padding,
+                        dilation=target.dilation,
+                    )
+                elif isinstance(target, nn.Linear):
+                    bias = target.bias is not None
+                    new_module = torch.nn.Linear(
+                        target.in_features,
+                        target.out_features,
+                        bias=bias,
+                        device=target.weight.device,
+                    )
+                else:
+                    raise ValueError(
+                        "Cannot convert current module to torch module, currently only adapters for nn.Linear and nn.Conv2d are supported"
+                    )
+                if merge:
+                    target.merge()
+                self._replace_module(parent, target_name, new_module, target)
+
+            # save any additional trainable modules part of `modules_to_save`
+            if isinstance(target, ModulesToSaveWrapper):
+                setattr(parent, target_name, target.modules_to_save[target.active_adapter])
+
+        return self.model
+
+    def enable_adapter_layers(self):
+        self._set_adapter_layers(enabled=True)
+
+    def disable_adapter_layers(self):
+        self._set_adapter_layers(enabled=False)
+
+    def merge_and_unload(self, progressbar: bool = False):
+        return self._unload_and_optionally_merge(progressbar=progressbar)
+
+    def set_adapter(self, adapter_name):
+        for module in self.model.modules():
+            if isinstance(module, LyCORISLayer):
+                if module.merged:
+                    warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
+                    module.unmerge()
+                module.set_adapter(adapter_name)

From 2401bf10e070a60ffc533d0e260fcff5dd7dea90 Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Tue, 10 Oct 2023 17:06:59 +0300
Subject: [PATCH 14/33] Refactored LoHaModel and LoKrModel again

---
 src/peft/tuners/loha/model.py    | 40 ++++--------------------
 src/peft/tuners/lokr/model.py    | 40 ++++--------------------
 src/peft/tuners/lycoris_utils.py | 53 +++++++++++++++++++++++++++++---
 3 files changed, 61 insertions(+), 72 deletions(-)

diff --git a/src/peft/tuners/loha/model.py b/src/peft/tuners/loha/model.py
index 824649f1b3..f4c60ab020 100644
--- a/src/peft/tuners/loha/model.py
+++ b/src/peft/tuners/loha/model.py
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Dict, Type
+
 import torch
 
 from ..lycoris_utils import LyCORISTuner
@@ -76,37 +78,7 @@ class LoHaModel(LyCORISTuner):
     """
 
     prefix: str = "hada_"
-
-    @staticmethod
-    def _create_new_module(config, adapter_name, target, **kwargs) -> LoHaLayer:
-        if isinstance(target, torch.nn.Conv2d):
-            new_module = Conv2d(
-                target.in_channels,
-                target.out_channels,
-                target.weight.size()[2:],
-                stride=target.stride,
-                padding=target.padding,
-                dilation=target.dilation,
-                groups=target.groups,
-                bias=target.bias is not None,
-                padding_mode=target.padding_mode,
-                device=target.weight.device,
-                dtype=target.weight.dtype,
-                adapter_name=adapter_name,
-                **kwargs,
-            )
-        elif isinstance(target, torch.nn.Linear):
-            new_module = Linear(
-                target.in_features,
-                target.out_features,
-                bias=target.bias is not None,
-                device=target.weight.device,
-                dtype=target.weight.dtype,
-                adapter_name=adapter_name,
-                **kwargs,
-            )
-        else:
-            raise ValueError(
-                "Target module not found, currently only adapters for nn.Linear and nn.Conv2d are supported"
-            )
-        return new_module
+    layers_mapping: Dict[Type[torch.nn.Module], Type[LoHaLayer]] = {
+        torch.nn.Conv2d: Conv2d,
+        torch.nn.Linear: Linear,
+    }
diff --git a/src/peft/tuners/lokr/model.py b/src/peft/tuners/lokr/model.py
index 7f329c1ccd..778a7dcfe3 100644
--- a/src/peft/tuners/lokr/model.py
+++ b/src/peft/tuners/lokr/model.py
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Dict, Type
+
 import torch
 
 from ..lycoris_utils import LyCORISTuner
@@ -77,37 +79,7 @@ class LoKrModel(LyCORISTuner):
     """
 
     prefix: str = "lokr_"
-
-    @staticmethod
-    def _create_new_module(config, adapter_name, target, **kwargs) -> LoKrLayer:
-        if isinstance(target, torch.nn.Conv2d):
-            new_module = Conv2d(
-                target.in_channels,
-                target.out_channels,
-                target.weight.size()[2:],
-                stride=target.stride,
-                padding=target.padding,
-                dilation=target.dilation,
-                groups=target.groups,
-                bias=target.bias is not None,
-                padding_mode=target.padding_mode,
-                device=target.weight.device,
-                dtype=target.weight.dtype,
-                adapter_name=adapter_name,
-                **kwargs,
-            )
-        elif isinstance(target, torch.nn.Linear):
-            new_module = Linear(
-                target.in_features,
-                target.out_features,
-                bias=target.bias is not None,
-                device=target.weight.device,
-                dtype=target.weight.dtype,
-                adapter_name=adapter_name,
-                **kwargs,
-            )
-        else:
-            raise ValueError(
-                "Target module not found, currently only adapters for nn.Linear and nn.Conv2d are supported"
-            )
-        return new_module
+    layers_mapping: Dict[Type[torch.nn.Module], Type[LoKrLayer]] = {
+        torch.nn.Conv2d: Conv2d,
+        torch.nn.Linear: Linear,
+    }
diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py
index 2ed85872b8..82991e7cdc 100644
--- a/src/peft/tuners/lycoris_utils.py
+++ b/src/peft/tuners/lycoris_utils.py
@@ -17,7 +17,7 @@
 import warnings
 from dataclasses import dataclass, field
 from itertools import chain
-from typing import Optional, Union
+from typing import Dict, Optional, Type, Union
 
 import torch
 import torch.nn as nn
@@ -81,6 +81,7 @@ class LyCORISTuner(BaseTuner):
     """
 
     prefix: str
+    layers_mapping: Dict[Type[torch.nn.Module], Type[LyCORISLayer]]
 
     def __init__(self, model, config, adapter_name):
         super().__init__(model, config, adapter_name)
@@ -124,9 +125,53 @@ def _create_and_replace(
             new_module = self._create_new_module(config, adapter_name, target, **kwargs)
             self._replace_module(parent, target_name, new_module, target)
 
-    @staticmethod
-    def _create_new_module(config: LyCORISConfig, adapter_name: str, target: nn.Module, **kwargs) -> LyCORISLayer:
-        ...
+    @classmethod
+    def _create_new_module(cls, config: LyCORISConfig, adapter_name: str, target: nn.Module, **kwargs) -> LyCORISLayer:
+        # Find corresponding subtype of provided target module
+        new_module_cls = None
+        for subtype, target_cls in cls.layers_mapping.items():
+            if isinstance(target, subtype):
+                new_module_cls = target_cls
+                break
+
+        # We didn't find corresponding type, so adapter for this layer is not supported
+        if new_module_cls is None:
+            raise ValueError(
+                f"Target module not found, currently only adapters for {', '.join([x.__name__ for x in cls.modules_mapping.keys()])} are supported"
+            )
+
+        if isinstance(target, torch.nn.Conv2d):
+            new_module = new_module_cls(
+                target.in_channels,
+                target.out_channels,
+                target.weight.size()[2:],
+                stride=target.stride,
+                padding=target.padding,
+                dilation=target.dilation,
+                groups=target.groups,
+                bias=target.bias is not None,
+                padding_mode=target.padding_mode,
+                device=target.weight.device,
+                dtype=target.weight.dtype,
+                adapter_name=adapter_name,
+                **kwargs,
+            )
+        elif isinstance(target, torch.nn.Linear):
+            new_module = new_module_cls(
+                target.in_features,
+                target.out_features,
+                bias=target.bias is not None,
+                device=target.weight.device,
+                dtype=target.weight.dtype,
+                adapter_name=adapter_name,
+                **kwargs,
+            )
+        else:
+            raise ValueError(
+                "Target module not found, currently only adapters for nn.Linear and nn.Conv2d are supported"
+            )
+
+        return new_module
 
     def _mark_only_adapters_as_trainable(self) -> None:
         for n, p in self.model.named_parameters():

From 1fad986f2d9a2818b00a79d4a63e54904b87eb2a Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Tue, 10 Oct 2023 19:46:30 +0300
Subject: [PATCH 15/33] Refactored LoHaLayer and LoKrLayer a bit

---
 src/peft/tuners/loha/layer.py    | 110 ++++++-------------------------
 src/peft/tuners/lokr/layer.py    |  92 ++------------------------
 src/peft/tuners/lycoris_utils.py | 101 +++++++++++++++++++++++++++-
 3 files changed, 123 insertions(+), 180 deletions(-)

diff --git a/src/peft/tuners/loha/layer.py b/src/peft/tuners/loha/layer.py
index 0e5ee1af00..3fcd84a45e 100644
--- a/src/peft/tuners/loha/layer.py
+++ b/src/peft/tuners/loha/layer.py
@@ -14,8 +14,8 @@
 # limitations under the License.
 
 import math
-import warnings
-from typing import Optional, Tuple, Union
+from itertools import chain
+from typing import Iterable, Optional, Tuple, Union
 
 import torch
 import torch.nn as nn
@@ -33,31 +33,27 @@ def __init__(self):
         super(nn.Module, self).__init__()
 
         # LoHa info
-        self.r = {}
-        self.alpha = {}
-        self.scaling = {}
         self.hada_w1_a = nn.ParameterDict({})
         self.hada_w1_b = nn.ParameterDict({})
         self.hada_w2_a = nn.ParameterDict({})
         self.hada_w2_b = nn.ParameterDict({})
         self.hada_t1 = nn.ParameterDict({})
         self.hada_t2 = nn.ParameterDict({})
-        self.rank_dropout = {}
-        self.module_dropout = {}
-
-    def _init_empty_weights(self, cls, *args, **kwargs) -> None:
-        # A helper method that allows to initialize the layer of the given class without spending time to initialize the
-        # model weights. The implementation is inspired by
-        # https://pytorch.org/docs/stable/generated/torch.nn.utils.skip_init.html but this function cannot be used
-        # directly.
-        # Instead of this approach, it would be possible to bypass the __init__ of the class but that runs the risk of
-        # omitting important logic inside that __init__.
-        kwargs = kwargs.copy()
-        final_device = kwargs.pop("device", "cpu")
-        cls.__init__(self, *args, device="meta", **kwargs)
-        self.to_empty(device=final_device)
-
-    def create_loha_parameters(self, adapter_name: str, r: int, shape: Tuple[int, ...]):
+
+    @property
+    def _available_adapters(self) -> Iterable[str]:
+        return set(
+            chain(
+                self.hada_w1_a.keys(),
+                self.hada_w1_b.keys(),
+                self.hada_w2_a.keys(),
+                self.hada_w2_b.keys(),
+                self.hada_t1.keys(),
+                self.hada_t2.keys(),
+            )
+        )
+
+    def create_adapter_parameters(self, adapter_name: str, r: int, shape: Tuple[int, ...]):
         # https://github.com/KohakuBlueleaf/LyCORIS/blob/eb460098187f752a5d66406d3affade6f0a07ece/lycoris/modules/loha.py#L130C9-L143C75
         if len(shape) == 4:
             self.hada_t1[adapter_name] = nn.Parameter(torch.empty(r, r, shape[2], shape[3]))
@@ -74,7 +70,7 @@ def create_loha_parameters(self, adapter_name: str, r: int, shape: Tuple[int, ..
             self.hada_w2_a[adapter_name] = nn.Parameter(torch.empty(shape[0], r))
             self.hada_w2_b[adapter_name] = nn.Parameter(torch.empty(r, shape[1]))
 
-    def reset_loha_parameters(self, adapter_name: str):
+    def reset_adapter_parameters(self, adapter_name: str):
         # Original implementation performs initialization with normal distribution
         # https://github.com/KohakuBlueleaf/LyCORIS/blob/3549fdef8f564761d68b695a08ef88b1122fdedc/lycoris/modules/loha.py#L158
 
@@ -131,11 +127,11 @@ def update_layer(
             raise NotImplementedError(f"LoHa is not implemented for {type(self).__name__} layer")
 
         # Create weights with provided shape
-        self.create_loha_parameters(adapter_name, r, shape)
+        self.create_adapter_parameters(adapter_name, r, shape)
 
         # Initialize weights
         if init_weights:
-            self.reset_loha_parameters(adapter_name)
+            self.reset_adapter_parameters(adapter_name)
 
         # Move new weights to device
         weight = getattr(self, "weight", None)
@@ -183,72 +179,6 @@ def get_delta_weight(self, adapter_name: str) -> torch.Tensor:
 
         return weight
 
-    def merge(self) -> None:
-        if self.merged:
-            warnings.warn(
-                f"Already following adapters were merged {','.join(self.merged_adapters)}. "
-                f"You are now additionally merging {','.join(self.active_adapters)}."
-            )
-        for active_adapter in self.active_adapters:
-            if active_adapter in self.hada_w1_a.keys():
-                self.weight.data += self.get_delta_weight(active_adapter)
-                self.merged_adapters.append(active_adapter)
-
-    def unmerge(self) -> None:
-        if not self.merged:
-            warnings.warn("Already unmerged. Nothing to do.")
-            return
-        while len(self.merged_adapters) > 0:
-            active_adapter = self.merged_adapters.pop()
-            if active_adapter in self.hada_w1_a.keys():
-                self.weight.data -= self.get_delta_weight(active_adapter)
-
-    def _op(self, x: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
-        raise NotImplementedError
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        previous_dtype = x.dtype
-
-        if self.disable_adapters:
-            if self.merged:
-                self.unmerge()
-            result = self._op(x, self.weight)
-        elif self.merged:
-            result = self._op(x, self.weight)
-        else:
-            # Get base weights
-            weight = self.weight.data
-
-            # Execute all the adapters
-            for active_adapter in self.active_adapters:
-                if active_adapter not in self.hada_w1_a.keys():
-                    continue
-
-                module_dropout = self.module_dropout[active_adapter]
-
-                # Modify current execution weights
-                if (not self.training) or (self.training and torch.rand(1) > module_dropout):
-                    weight = weight + self.get_delta_weight(active_adapter)
-
-            # Perform actual operation
-            result = self._op(x, weight)
-
-        result = result.to(previous_dtype)
-        return result
-
-    def scale_layer(self, scale_factor: float) -> None:
-        if scale_factor != 1:
-            for active_adapter in self.active_adapters:
-                alpha = self.alpha[active_adapter]
-                r = self.r[active_adapter]
-                self.scaling[active_adapter] = (alpha / r) * scale_factor
-
-    def unscale_layer(self) -> None:
-        for active_adapter in self.active_adapters:
-            alpha = self.alpha[active_adapter]
-            r = self.r[active_adapter]
-            self.scaling[active_adapter] = alpha / r
-
 
 class Linear(LoHaLayer, nn.Linear):
     """LoHa implemented in Linear layer"""
diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py
index b4995426ea..505944c971 100644
--- a/src/peft/tuners/lokr/layer.py
+++ b/src/peft/tuners/lokr/layer.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 import math
-import warnings
 from itertools import chain
 from typing import Iterable, Optional, Tuple, Union
 
@@ -42,9 +41,6 @@ def __init__(self):
         super(nn.Module, self).__init__()
 
         # LoKr info
-        self.r = {}
-        self.alpha = {}
-        self.scaling = {}
         self.lokr_w1 = nn.ParameterDict({})
         self.lokr_w1_a = nn.ParameterDict({})
         self.lokr_w1_b = nn.ParameterDict({})
@@ -52,20 +48,6 @@ def __init__(self):
         self.lokr_w2_a = nn.ParameterDict({})
         self.lokr_w2_b = nn.ParameterDict({})
         self.lokr_t2 = nn.ParameterDict({})
-        self.rank_dropout = {}
-        self.module_dropout = {}
-
-    def _init_empty_weights(self, cls, *args, **kwargs) -> None:
-        # A helper method that allows to initialize the layer of the given class without spending time to initialize the
-        # model weights. The implementation is inspired by
-        # https://pytorch.org/docs/stable/generated/torch.nn.utils.skip_init.html but this function cannot be used
-        # directly.
-        # Instead of this approach, it would be possible to bypass the __init__ of the class but that runs the risk of
-        # omitting important logic inside that __init__.
-        kwargs = kwargs.copy()
-        final_device = kwargs.pop("device", "cpu")
-        cls.__init__(self, *args, device="meta", **kwargs)
-        self.to_empty(device=final_device)
 
     @property
     def _available_adapters(self) -> Iterable[str]:
@@ -81,7 +63,7 @@ def _available_adapters(self) -> Iterable[str]:
             )
         )
 
-    def create_lokr_parameters(
+    def create_adapter_parameters(
         self,
         adapter_name: str,
         r: int,
@@ -115,7 +97,7 @@ def create_lokr_parameters(
                 self.lokr_w2_a[adapter_name] = nn.Parameter(torch.empty(shape[0][1], r))
                 self.lokr_w2_b[adapter_name] = nn.Parameter(torch.empty(r, shape[1][1]))
 
-    def reset_lokr_parameters(self, adapter_name: str):
+    def reset_adapter_parameters(self, adapter_name: str):
         if adapter_name in self.lokr_w1:
             nn.init.zeros_(self.lokr_w1[adapter_name])
         else:
@@ -188,11 +170,11 @@ def update_layer(
             raise NotImplementedError(f"LoKr is not implemented for {type(self).__name__} layer")
 
         # Create weights with provided shape
-        self.create_lokr_parameters(adapter_name, r, shape, use_w1, use_w2, use_effective_conv2d)
+        self.create_adapter_parameters(adapter_name, r, shape, use_w1, use_w2, use_effective_conv2d)
 
         # Initialize weights
         if init_weights:
-            self.reset_lokr_parameters(adapter_name)
+            self.reset_adapter_parameters(adapter_name)
 
         # Move new weights to device
         weight = getattr(self, "weight", None)
@@ -232,72 +214,6 @@ def get_delta_weight(self, adapter_name: str) -> torch.Tensor:
 
         return weight
 
-    def merge(self) -> None:
-        if self.merged:
-            warnings.warn(
-                f"Already following adapters were merged {','.join(self.merged_adapters)}. "
-                f"You are now additionally merging {','.join(self.active_adapters)}."
-            )
-        for active_adapter in self.active_adapters:
-            if active_adapter in self._available_adapters:
-                self.weight.data += self.get_delta_weight(active_adapter)
-                self.merged_adapters.append(active_adapter)
-
-    def unmerge(self) -> None:
-        if not self.merged:
-            warnings.warn("Already unmerged. Nothing to do.")
-            return
-        while len(self.merged_adapters) > 0:
-            active_adapter = self.merged_adapters.pop()
-            if active_adapter in self._available_adapters:
-                self.weight.data -= self.get_delta_weight(active_adapter)
-
-    def _op(self, x: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
-        raise NotImplementedError
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        previous_dtype = x.dtype
-
-        if self.disable_adapters:
-            if self.merged:
-                self.unmerge()
-            result = self._op(x, self.weight)
-        elif self.merged:
-            result = self._op(x, self.weight)
-        else:
-            # Get base weights
-            weight = self.weight.data
-
-            # Execute all the adapters
-            for active_adapter in self.active_adapters:
-                if active_adapter not in self._available_adapters:
-                    continue
-
-                module_dropout = self.module_dropout[active_adapter]
-
-                # Modify current execution weights
-                if (not self.training) or (self.training and torch.rand(1) > module_dropout):
-                    weight = weight + self.get_delta_weight(active_adapter)
-
-            # Perform actual operation
-            result = self._op(x, weight)
-
-        result = result.to(previous_dtype)
-        return result
-
-    def scale_layer(self, scale_factor: float) -> None:
-        if scale_factor != 1:
-            for active_adapter in self.active_adapters:
-                alpha = self.alpha[active_adapter]
-                r = self.r[active_adapter]
-                self.scaling[active_adapter] = (alpha / r) * scale_factor
-
-    def unscale_layer(self) -> None:
-        for active_adapter in self.active_adapters:
-            alpha = self.alpha[active_adapter]
-            r = self.r[active_adapter]
-            self.scaling[active_adapter] = alpha / r
-
 
 class Linear(LoKrLayer, nn.Linear):
     """LoKr implemented in Linear layer"""
diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py
index 82991e7cdc..5eff17f5fa 100644
--- a/src/peft/tuners/lycoris_utils.py
+++ b/src/peft/tuners/lycoris_utils.py
@@ -17,7 +17,7 @@
 import warnings
 from dataclasses import dataclass, field
 from itertools import chain
-from typing import Dict, Optional, Type, Union
+from typing import Dict, Iterable, Optional, Type, Union
 
 import torch
 import torch.nn as nn
@@ -57,20 +57,117 @@ class LyCORISConfig(PeftConfig):
     )
 
 
-class LyCORISLayer(BaseTunerLayer):
+class LyCORISLayer(BaseTunerLayer, nn.Module):
     r"""
     A base layer for LyCORIS like adapters
     """
 
     def __init__(self):
+        self.r = {}
+        self.alpha = {}
+        self.scaling = {}
+        self.rank_dropout = {}
+        self.module_dropout = {}
+
         # Tuner info
         self._disable_adapters = False
         self.merged_adapters = []
 
+    @property
+    def _available_adapters(self) -> Iterable[str]:
+        ...
+
     @property
     def merged(self) -> bool:
         return bool(self.merged_adapters)
 
+    def _init_empty_weights(self, cls, *args, **kwargs) -> None:
+        # A helper method that allows to initialize the layer of the given class without spending time to initialize the
+        # model weights. The implementation is inspired by
+        # https://pytorch.org/docs/stable/generated/torch.nn.utils.skip_init.html but this function cannot be used
+        # directly.
+        # Instead of this approach, it would be possible to bypass the __init__ of the class but that runs the risk of
+        # omitting important logic inside that __init__.
+        kwargs = kwargs.copy()
+        final_device = kwargs.pop("device", "cpu")
+        cls.__init__(self, *args, device="meta", **kwargs)
+        self.to_empty(device=final_device)
+
+    def _op(self, x: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
+        raise NotImplementedError
+
+    def create_adapter_parameters(self, adapter_name: str, r: int, **kwargs):
+        ...
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        previous_dtype = x.dtype
+
+        if self.disable_adapters:
+            if self.merged:
+                self.unmerge()
+            result = self._op(x, self.weight)
+        elif self.merged:
+            result = self._op(x, self.weight)
+        else:
+            # Get base weights
+            weight = self.weight.data
+
+            # Execute all the adapters
+            for active_adapter in self.active_adapters:
+                if active_adapter not in self._available_adapters:
+                    continue
+
+                module_dropout = self.module_dropout[active_adapter]
+
+                # Modify current execution weights
+                if (not self.training) or (self.training and torch.rand(1) > module_dropout):
+                    weight = weight + self.get_delta_weight(active_adapter)
+
+            # Perform actual operation
+            result = self._op(x, weight)
+
+        result = result.to(previous_dtype)
+        return result
+
+    def get_delta_weight(self, adapter_name: str) -> torch.Tensor:
+        ...
+
+    def merge(self) -> None:
+        if self.merged:
+            warnings.warn(
+                f"Already following adapters were merged {','.join(self.merged_adapters)}. "
+                f"You are now additionally merging {','.join(self.active_adapters)}."
+            )
+        for active_adapter in self.active_adapters:
+            if active_adapter in self._available_adapters:
+                self.weight.data += self.get_delta_weight(active_adapter)
+                self.merged_adapters.append(active_adapter)
+
+    def reset_adapter_parameters(self, adapter_name: str):
+        ...
+
+    def scale_layer(self, scale_factor: float) -> None:
+        if scale_factor != 1:
+            for active_adapter in self.active_adapters:
+                alpha = self.alpha[active_adapter]
+                r = self.r[active_adapter]
+                self.scaling[active_adapter] = (alpha / r) * scale_factor
+
+    def unmerge(self) -> None:
+        if not self.merged:
+            warnings.warn("Already unmerged. Nothing to do.")
+            return
+        while len(self.merged_adapters) > 0:
+            active_adapter = self.merged_adapters.pop()
+            if active_adapter in self._available_adapters:
+                self.weight.data -= self.get_delta_weight(active_adapter)
+
+    def unscale_layer(self) -> None:
+        for active_adapter in self.active_adapters:
+            alpha = self.alpha[active_adapter]
+            r = self.r[active_adapter]
+            self.scaling[active_adapter] = alpha / r
+
     def update_layer(self, adapter_name: str, r: int, alpha: float, **kwargs):
         ...
 

From 39e87ceb2849a054197e6838dc08fbc9a4895e39 Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Tue, 10 Oct 2023 19:48:27 +0300
Subject: [PATCH 16/33] Removed unnecessary comments

---
 src/peft/tuners/loha/config.py | 1 -
 src/peft/tuners/lokr/config.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/peft/tuners/loha/config.py b/src/peft/tuners/loha/config.py
index 00d24c374b..e1994a9843 100644
--- a/src/peft/tuners/loha/config.py
+++ b/src/peft/tuners/loha/config.py
@@ -16,7 +16,6 @@
 from dataclasses import dataclass, field
 from typing import List, Optional, Union
 
-# from peft.config import PeftConfig
 from peft.tuners.lycoris_utils import LyCORISConfig
 from peft.utils import PeftType
 
diff --git a/src/peft/tuners/lokr/config.py b/src/peft/tuners/lokr/config.py
index 258fc277e2..ce25ec793e 100644
--- a/src/peft/tuners/lokr/config.py
+++ b/src/peft/tuners/lokr/config.py
@@ -16,7 +16,6 @@
 from dataclasses import dataclass, field
 from typing import List, Optional, Union
 
-# from peft.config import PeftConfig
 from peft.tuners.lycoris_utils import LyCORISConfig
 from peft.utils import PeftType
 

From 4171c64dfa3cd8aa6e2cf7bfb5934e4d613ee943 Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Fri, 13 Oct 2023 12:11:22 +0300
Subject: [PATCH 17/33] Addressed comments on _available_adapters property

---
 src/peft/tuners/loha/layer.py    | 16 +++-------------
 src/peft/tuners/lokr/layer.py    | 25 +++++++++++--------------
 src/peft/tuners/lycoris_utils.py |  4 ++--
 3 files changed, 16 insertions(+), 29 deletions(-)

diff --git a/src/peft/tuners/loha/layer.py b/src/peft/tuners/loha/layer.py
index 3fcd84a45e..df8e3f275e 100644
--- a/src/peft/tuners/loha/layer.py
+++ b/src/peft/tuners/loha/layer.py
@@ -14,8 +14,7 @@
 # limitations under the License.
 
 import math
-from itertools import chain
-from typing import Iterable, Optional, Tuple, Union
+from typing import Optional, Set, Tuple, Union
 
 import torch
 import torch.nn as nn
@@ -41,17 +40,8 @@ def __init__(self):
         self.hada_t2 = nn.ParameterDict({})
 
     @property
-    def _available_adapters(self) -> Iterable[str]:
-        return set(
-            chain(
-                self.hada_w1_a.keys(),
-                self.hada_w1_b.keys(),
-                self.hada_w2_a.keys(),
-                self.hada_w2_b.keys(),
-                self.hada_t1.keys(),
-                self.hada_t2.keys(),
-            )
-        )
+    def _available_adapters(self) -> Set[str]:
+        return {*self.hada_w1_a, *self.hada_w1_b, *self.hada_w2_a, *self.hada_w2_b, *self.hada_t1, *self.hada_t2}
 
     def create_adapter_parameters(self, adapter_name: str, r: int, shape: Tuple[int, ...]):
         # https://github.com/KohakuBlueleaf/LyCORIS/blob/eb460098187f752a5d66406d3affade6f0a07ece/lycoris/modules/loha.py#L130C9-L143C75
diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py
index 505944c971..275b9a31b8 100644
--- a/src/peft/tuners/lokr/layer.py
+++ b/src/peft/tuners/lokr/layer.py
@@ -14,8 +14,7 @@
 # limitations under the License.
 
 import math
-from itertools import chain
-from typing import Iterable, Optional, Tuple, Union
+from typing import Optional, Set, Tuple, Union
 
 import torch
 import torch.nn as nn
@@ -50,18 +49,16 @@ def __init__(self):
         self.lokr_t2 = nn.ParameterDict({})
 
     @property
-    def _available_adapters(self) -> Iterable[str]:
-        return set(
-            chain(
-                self.lokr_w1.keys(),
-                self.lokr_w1_a.keys(),
-                self.lokr_w1_b.keys(),
-                self.lokr_w2.keys(),
-                self.lokr_w2_a.keys(),
-                self.lokr_w2_b.keys(),
-                self.lokr_t2.keys(),
-            )
-        )
+    def _available_adapters(self) -> Set[str]:
+        return {
+            *self.lokr_w1,
+            *self.lokr_w1_a,
+            *self.lokr_w1_b,
+            *self.lokr_w2,
+            *self.lokr_w2_a,
+            *self.lokr_w2_b,
+            *self.lokr_t2,
+        }
 
     def create_adapter_parameters(
         self,
diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py
index 5eff17f5fa..07cc7e9c5f 100644
--- a/src/peft/tuners/lycoris_utils.py
+++ b/src/peft/tuners/lycoris_utils.py
@@ -17,7 +17,7 @@
 import warnings
 from dataclasses import dataclass, field
 from itertools import chain
-from typing import Dict, Iterable, Optional, Type, Union
+from typing import Dict, Optional, Set, Type, Union
 
 import torch
 import torch.nn as nn
@@ -74,7 +74,7 @@ def __init__(self):
         self.merged_adapters = []
 
     @property
-    def _available_adapters(self) -> Iterable[str]:
+    def _available_adapters(self) -> Set[str]:
         ...
 
     @property

From b24bdbf07b78c88f1705b20266680102169337f4 Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Fri, 13 Oct 2023 12:31:22 +0300
Subject: [PATCH 18/33] Replaced te with text_encoder

---
 examples/stable_diffusion/train_dreambooth.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/examples/stable_diffusion/train_dreambooth.py b/examples/stable_diffusion/train_dreambooth.py
index 8364603a0b..6fc3a30fc6 100644
--- a/examples/stable_diffusion/train_dreambooth.py
+++ b/examples/stable_diffusion/train_dreambooth.py
@@ -120,10 +120,13 @@ def create_unet_adapter_config(args: argparse.Namespace) -> Union[LoraConfig, Lo
             decompose_factor=args.unet_decompose_factor,
             init_weights=True,
         )
+    else:
+        raise ValueError(f"Unknown adapter type {args.adapter}")
+
     return config
 
 
-def create_te_adapter_config(args: argparse.Namespace) -> Union[LoraConfig, LoHaConfig, LoKrConfig]:
+def create_text_encoder_adapter_config(args: argparse.Namespace) -> Union[LoraConfig, LoHaConfig, LoKrConfig]:
     if args.adapter == "full":
         raise ValueError("Cannot create text_encoder adapter config for full parameter")
 
@@ -156,6 +159,9 @@ def create_te_adapter_config(args: argparse.Namespace) -> Union[LoraConfig, LoHa
             decompose_factor=args.te_decompose_factor,
             init_weights=True,
         )
+    else:
+        raise ValueError(f"Unknown adapter type {args.adapter}")
+
     return config
 
 
@@ -898,7 +904,7 @@ def main(args):
     if not args.train_text_encoder:
         text_encoder.requires_grad_(False)
     elif args.train_text_encoder and args.adapter != "full":
-        config = create_te_adapter_config(args)
+        config = create_text_encoder_adapter_config(args)
         text_encoder = get_peft_model(text_encoder, config)
         text_encoder.print_trainable_parameters()
         print(text_encoder)

From d8f2a83479bb79689264cb0b0760a67c0b288b2c Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Fri, 13 Oct 2023 11:40:06 +0200
Subject: [PATCH 19/33] Apply suggestions from code review

Co-authored-by: Benjamin Bossan <BenjaminBossan@users.noreply.github.com>
---
 src/peft/tuners/lokr/config.py | 2 +-
 src/peft/tuners/lokr/layer.py  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/peft/tuners/lokr/config.py b/src/peft/tuners/lokr/config.py
index ce25ec793e..0db0c48729 100644
--- a/src/peft/tuners/lokr/config.py
+++ b/src/peft/tuners/lokr/config.py
@@ -23,7 +23,7 @@
 @dataclass
 class LoKrConfig(LyCORISConfig):
     """
-    This is the configuration class to store the configuration of a [`LoKrModel`].
+    Configuration class of [`LoKrModel`].
 
     Args:
         r (`int`): LoKr rank.
diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py
index 275b9a31b8..da7976b56e 100644
--- a/src/peft/tuners/lokr/layer.py
+++ b/src/peft/tuners/lokr/layer.py
@@ -310,9 +310,9 @@ def factorization(dimension: int, factor: int = -1) -> Tuple[int, int]:
     return a tuple of two value of input dimension decomposed by the number closest to factor second value is higher or
     equal than first value.
 
-    In LoRA with Kroneckor Product, first value is a value for weight scale. secon value is a value for weight.
+    In LoRA with Kroneckor Product, first value is a value for weight scale, second value is a value for weight.
 
-    Becuase of non-commutative property, A⊗B ≠ B⊗A. Meaning of two matrices is slightly different.
+    Because of non-commutative property, A⊗B ≠ B⊗A. Meaning of two matrices is slightly different.
 
     examples) factor
         -1 2 4 8 16 ...

From 64655978f8b273fa5c11044c208ff0da55d5347e Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Fri, 13 Oct 2023 12:45:19 +0300
Subject: [PATCH 20/33] Changed exception type raised when creating adapter for
 unsupported layer

---
 src/peft/tuners/loha/layer.py | 2 +-
 src/peft/tuners/lokr/layer.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/peft/tuners/loha/layer.py b/src/peft/tuners/loha/layer.py
index df8e3f275e..66c00e3775 100644
--- a/src/peft/tuners/loha/layer.py
+++ b/src/peft/tuners/loha/layer.py
@@ -114,7 +114,7 @@ def update_layer(
             else:
                 shape = (self.out_channels, self.in_channels * self.kernel_size[0] * self.kernel_size[1])
         else:
-            raise NotImplementedError(f"LoHa is not implemented for {type(self).__name__} layer")
+            raise TypeError(f"LoHa is not implemented for {type(self).__name__} layer")
 
         # Create weights with provided shape
         self.create_adapter_parameters(adapter_name, r, shape)
diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py
index da7976b56e..3ffd9a0761 100644
--- a/src/peft/tuners/lokr/layer.py
+++ b/src/peft/tuners/lokr/layer.py
@@ -164,7 +164,7 @@ def update_layer(
             use_w2 = r >= max(shape[0][1], shape[1][1]) / 2
             use_effective_conv2d = use_effective_conv2d and self.kernel_size != (1, 1)
         else:
-            raise NotImplementedError(f"LoKr is not implemented for {type(self).__name__} layer")
+            raise TypeError(f"LoKr is not implemented for {type(self).__name__} layer")
 
         # Create weights with provided shape
         self.create_adapter_parameters(adapter_name, r, shape, use_w1, use_w2, use_effective_conv2d)

From a63d249d598d7a8b631075ba42caf616d29064bb Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Fri, 13 Oct 2023 23:51:10 +0300
Subject: [PATCH 21/33] Added additional tests for
 use_effective_conv2d/decompose_both/decompose_factor

---
 tests/test_custom_models.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py
index 69cde6284d..eeae16d5c7 100644
--- a/tests/test_custom_models.py
+++ b/tests/test_custom_models.py
@@ -162,6 +162,31 @@
     ),
     ("Conv2d 1 LOKR", "Conv2d", LoKrConfig, {"target_modules": ["conv2d"]}),
     ("Conv2d 2 LOKR", "Conv2d", LoKrConfig, {"target_modules": ["conv2d", "lin0"]}),
+    ("Conv2d 3 LOKR", "Conv2d", LoKrConfig, {"target_modules": ["conv2d"], "use_effective_conv2d": True}),
+    ("Conv2d 4 LOKR", "Conv2d", LoKrConfig, {"target_modules": ["conv2d", "lin0"], "use_effective_conv2d": True}),
+    (
+        "Conv2d 5 LOKR",
+        "Conv2d",
+        LoKrConfig,
+        {"target_modules": ["conv2d", "lin0"], "use_effective_conv2d": True, "decompose_both": True},
+    ),
+    (
+        "Conv2d 6 LOKR",
+        "Conv2d",
+        LoKrConfig,
+        {"target_modules": ["conv2d", "lin0"], "use_effective_conv2d": True, "decompose_factor": 4},
+    ),
+    (
+        "Conv2d 7 LOKR",
+        "Conv2d",
+        LoKrConfig,
+        {
+            "target_modules": ["conv2d", "lin0"],
+            "use_effective_conv2d": True,
+            "decompose_both": True,
+            "decompose_factor": 4,
+        },
+    ),
 ]
 
 MULTIPLE_ACTIVE_ADAPTERS_TEST_CASES = [
@@ -450,6 +475,10 @@ def test_only_params_are_updated(self, test_name, model_id, config_cls, config_k
         params_after = dict(model.named_parameters())
         self.assertEqual(params_before.keys(), params_after.keys())
 
+        if isinstance(model, ModelConv2D):
+            print(model)
+            self.assertFalse(True)
+
         prefix = PREFIXES[config_cls]
         for name, param_before in params_before.items():
             param_after = params_after[name]

From 2b70fc013ada5d8f7259887443f398529a0c1840 Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Sat, 14 Oct 2023 00:10:46 +0300
Subject: [PATCH 22/33] Removed classmethod

---
 src/peft/tuners/lycoris_utils.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py
index 07cc7e9c5f..8d045e3cb3 100644
--- a/src/peft/tuners/lycoris_utils.py
+++ b/src/peft/tuners/lycoris_utils.py
@@ -281,8 +281,7 @@ def _prepare_adapter_config(peft_config, model_config):
             raise ValueError("Please specify `target_modules` in `peft_config`")
         return peft_config
 
-    @classmethod
-    def _replace_module(cls, parent, child_name, new_module, child):
+    def _replace_module(self, parent, child_name, new_module, child):
         setattr(parent, child_name, new_module)
         # It's not necessary to set requires_grad here, as that is handled by
         # _mark_only_adapters_as_trainable
@@ -296,7 +295,7 @@ def _replace_module(cls, parent, child_name, new_module, child):
 
         # dispatch to correct device
         for name, module in new_module.named_modules():
-            if cls.prefix in name:
+            if self.prefix in name:
                 module.to(child.weight.device)
 
     def _set_adapter_layers(self, enabled=True):

From 3033a75e4513d1b49df71faf0f17eb4b7a0ffef7 Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Sat, 14 Oct 2023 01:05:03 +0300
Subject: [PATCH 23/33] Addressed conversion script review comments

---
 .../convert_sd_adapter_to_peft.py             | 20 +++++++------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/examples/stable_diffusion/convert_sd_adapter_to_peft.py b/examples/stable_diffusion/convert_sd_adapter_to_peft.py
index e0bc2e7716..348eb4eb3d 100644
--- a/examples/stable_diffusion/convert_sd_adapter_to_peft.py
+++ b/examples/stable_diffusion/convert_sd_adapter_to_peft.py
@@ -3,7 +3,7 @@
 import os
 from collections import Counter
 from dataclasses import dataclass
-from functools import reduce
+from operator import attrgetter
 from typing import Dict, List, Optional, Union
 
 import safetensors
@@ -25,11 +25,6 @@
 PREFIX_TEXT_ENCODER = "lora_te"
 
 
-def get_module_by_name(module: Union[torch.Tensor, nn.Module], access_string: str):
-    names = access_string.split(sep=".")
-    return reduce(getattr, names, module)
-
-
 @dataclass
 class LoRAInfo:
     kohya_key: str
@@ -97,12 +92,11 @@ class LoKrInfo:
     lokr_t2: Optional[torch.Tensor] = None
 
     def peft_state_dict(self) -> Dict[str, torch.Tensor]:
-        if (self.lokr_w1 is None and self.lokr_w1_a is None and self.lokr_w1_b is None) or (
-            self.lokr_w2 is None and self.lokr_w2_a is None and self.lokr_w2_b is None
-        ):
-            raise ValueError(
-                "At least one of lokr_w1, lokr_w1_a, lokr_w1_b, lokr_w2, lokr_w2_a, lokr_w2_b is missing, they all must be provided"
-            )
+        if (self.lokr_w1 is None) and ((self.lokr_w1_a is None) or (self.lokr_w1_b is None)):
+            raise ValueError("Either lokr_w1 or both lokr_w1_a and lokr_w1_b should be provided")
+
+        if (self.lokr_w2 is None) and ((self.lokr_w2_a is None) or (self.lokr_w2_b is None)):
+            raise ValueError("Either lokr_w2 or both lokr_w2_a and lokr_w2_b should be provided")
 
         state_dict = {}
 
@@ -397,7 +391,7 @@ def detect_adapter_type(keys: List[str]) -> PeftType:
             peft_key = models_keys[kohya_key]
 
             # Retrieve corresponding layer of model
-            layer = get_module_by_name(model, peft_key)
+            layer = attrgetter(peft_key)(model)
 
             # Create a corresponding adapter info
             if peft_key not in adapter_info[model_type]:

From 299de88cfcefd6ea2aea4fb22bf4f0ec124af46e Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Sat, 14 Oct 2023 01:29:25 +0300
Subject: [PATCH 24/33] Replaced factorization docstring

---
 src/peft/tuners/lokr/layer.py | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py
index 3ffd9a0761..c14d0406bc 100644
--- a/src/peft/tuners/lokr/layer.py
+++ b/src/peft/tuners/lokr/layer.py
@@ -306,20 +306,33 @@ def _op(self, input: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
 
 
 def factorization(dimension: int, factor: int = -1) -> Tuple[int, int]:
-    """
-    return a tuple of two value of input dimension decomposed by the number closest to factor second value is higher or
-    equal than first value.
+    """Factorizes the provided number into the product of two numbers
+
+    Args:
+        dimension (`int`): The number that needs to be factorized.
+        factor (`int`, optional):
+            Factorization divider. The algorithm will try to output two numbers, one of each will be as close to the
+            factor as possible. If -1 is provided, the decomposition algorithm would try to search dividers near the
+            square root of the dimension. Defaults to -1.
+
+    Returns:
+        Tuple[`int`, `int`]: A tuple of two numbers, whose product is equal to the provided number. The first number is
+        always less than or equal to the second.
+
+    Example:
+        ```py
+        >>> factorization(256, factor=-1)
+        (16, 16)
 
-    In LoRA with Kroneckor Product, first value is a value for weight scale, second value is a value for weight.
+        >>> factorization(128, factor=-1)
+        (8, 16)
 
-    Because of non-commutative property, A⊗B ≠ B⊗A. Meaning of two matrices is slightly different.
+        >>> factorization(127, factor=-1)
+        (1, 127)
 
-    examples) factor
-        -1 2 4 8 16 ...
-    127 -> 127, 1 127 -> 127, 1 127 -> 127, 1 127 -> 127, 1 127 -> 127, 1 128 -> 16, 8 128 -> 64, 2 128 -> 32, 4 128 ->
-    16, 8 128 -> 16, 8 250 -> 125, 2 250 -> 125, 2 250 -> 125, 2 250 -> 125, 2 250 -> 125, 2 360 -> 45, 8 360 -> 180, 2
-    360 -> 90, 4 360 -> 45, 8 360 -> 45, 8 512 -> 32, 16 512 -> 256, 2 512 -> 128, 4 512 -> 64, 8 512 -> 32, 16 1024 ->
-    32, 32 1024 -> 512, 2 1024 -> 256, 4 1024 -> 128, 8 1024 -> 64, 16
+        >>> factorization(128, factor=4)
+        (4, 32)
+        ```
     """
 
     if factor > 0 and (dimension % factor) == 0:

From d518728c136f1a02be13b2d4bd4a509b2868232c Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Sat, 14 Oct 2023 02:01:15 +0300
Subject: [PATCH 25/33] LyCORIS -> Lycoris

---
 src/peft/tuners/loha/config.py   |  4 ++--
 src/peft/tuners/loha/layer.py    |  6 +++---
 src/peft/tuners/lokr/config.py   |  4 ++--
 src/peft/tuners/lokr/layer.py    |  6 +++---
 src/peft/tuners/lycoris_utils.py | 18 +++++++++---------
 5 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/peft/tuners/loha/config.py b/src/peft/tuners/loha/config.py
index e1994a9843..7ea73400fd 100644
--- a/src/peft/tuners/loha/config.py
+++ b/src/peft/tuners/loha/config.py
@@ -16,12 +16,12 @@
 from dataclasses import dataclass, field
 from typing import List, Optional, Union
 
-from peft.tuners.lycoris_utils import LyCORISConfig
+from peft.tuners.lycoris_utils import LycorisConfig
 from peft.utils import PeftType
 
 
 @dataclass
-class LoHaConfig(LyCORISConfig):
+class LoHaConfig(LycorisConfig):
     """
     This is the configuration class to store the configuration of a [`LoHaModel`].
 
diff --git a/src/peft/tuners/loha/layer.py b/src/peft/tuners/loha/layer.py
index 66c00e3775..0b4dde56f7 100644
--- a/src/peft/tuners/loha/layer.py
+++ b/src/peft/tuners/loha/layer.py
@@ -20,15 +20,15 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from peft.tuners.lycoris_utils import LyCORISLayer
+from peft.tuners.lycoris_utils import LycorisLayer
 
 
-class LoHaLayer(LyCORISLayer, nn.Module):
+class LoHaLayer(LycorisLayer, nn.Module):
     # List all names of layers that may contain adapter weights
     adapter_layer_names = ["hada_w1_a", "hada_w1_b", "hada_w2_a", "hada_w2_b", "hada_t1", "hada_t2"]
 
     def __init__(self):
-        LyCORISLayer.__init__(self)
+        LycorisLayer.__init__(self)
         super(nn.Module, self).__init__()
 
         # LoHa info
diff --git a/src/peft/tuners/lokr/config.py b/src/peft/tuners/lokr/config.py
index 0db0c48729..d3cdf1b5af 100644
--- a/src/peft/tuners/lokr/config.py
+++ b/src/peft/tuners/lokr/config.py
@@ -16,12 +16,12 @@
 from dataclasses import dataclass, field
 from typing import List, Optional, Union
 
-from peft.tuners.lycoris_utils import LyCORISConfig
+from peft.tuners.lycoris_utils import LycorisConfig
 from peft.utils import PeftType
 
 
 @dataclass
-class LoKrConfig(LyCORISConfig):
+class LoKrConfig(LycorisConfig):
     """
     Configuration class of [`LoKrModel`].
 
diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py
index c14d0406bc..afe74f13a5 100644
--- a/src/peft/tuners/lokr/layer.py
+++ b/src/peft/tuners/lokr/layer.py
@@ -20,10 +20,10 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from peft.tuners.lycoris_utils import LyCORISLayer
+from peft.tuners.lycoris_utils import LycorisLayer
 
 
-class LoKrLayer(LyCORISLayer, nn.Module):
+class LoKrLayer(LycorisLayer, nn.Module):
     # List all names of layers that may contain adapter weights
     adapter_layer_names = [
         "lokr_w1",
@@ -36,7 +36,7 @@ class LoKrLayer(LyCORISLayer, nn.Module):
     ]
 
     def __init__(self):
-        LyCORISLayer.__init__(self)
+        LycorisLayer.__init__(self)
         super(nn.Module, self).__init__()
 
         # LoKr info
diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py
index 8d045e3cb3..d9e74a3977 100644
--- a/src/peft/tuners/lycoris_utils.py
+++ b/src/peft/tuners/lycoris_utils.py
@@ -33,7 +33,7 @@
 
 
 @dataclass
-class LyCORISConfig(PeftConfig):
+class LycorisConfig(PeftConfig):
     r"""
     A base config for LyCORIS like adapters
     """
@@ -57,7 +57,7 @@ class LyCORISConfig(PeftConfig):
     )
 
 
-class LyCORISLayer(BaseTunerLayer, nn.Module):
+class LycorisLayer(BaseTunerLayer, nn.Module):
     r"""
     A base layer for LyCORIS like adapters
     """
@@ -178,7 +178,7 @@ class LyCORISTuner(BaseTuner):
     """
 
     prefix: str
-    layers_mapping: Dict[Type[torch.nn.Module], Type[LyCORISLayer]]
+    layers_mapping: Dict[Type[torch.nn.Module], Type[LycorisLayer]]
 
     def __init__(self, model, config, adapter_name):
         super().__init__(model, config, adapter_name)
@@ -196,9 +196,9 @@ def _check_target_module_exists(config, key):
 
     def _create_and_replace(
         self,
-        config: LyCORISConfig,
+        config: LycorisConfig,
         adapter_name: str,
-        target: Union[LyCORISLayer, nn.Module],
+        target: Union[LycorisLayer, nn.Module],
         target_name,
         parent,
         current_key,
@@ -216,14 +216,14 @@ def _create_and_replace(
         kwargs["r"] = config.rank_pattern.get(target_name_key, config.r)
         kwargs["alpha"] = config.alpha_pattern.get(target_name_key, config.alpha)
 
-        if isinstance(target, LyCORISLayer):
+        if isinstance(target, LycorisLayer):
             target.update_layer(adapter_name, **kwargs)
         else:
             new_module = self._create_new_module(config, adapter_name, target, **kwargs)
             self._replace_module(parent, target_name, new_module, target)
 
     @classmethod
-    def _create_new_module(cls, config: LyCORISConfig, adapter_name: str, target: nn.Module, **kwargs) -> LyCORISLayer:
+    def _create_new_module(cls, config: LycorisConfig, adapter_name: str, target: nn.Module, **kwargs) -> LycorisLayer:
         # Find corresponding subtype of provided target module
         new_module_cls = None
         for subtype, target_cls in cls.layers_mapping.items():
@@ -315,7 +315,7 @@ def _unload_and_optionally_merge(self, merge=True, progressbar: bool = False):
                 parent, target, target_name = _get_submodules(self.model, key)
             except AttributeError:
                 continue
-            if isinstance(target, LyCORISLayer):
+            if isinstance(target, LycorisLayer):
                 if isinstance(target, nn.Conv2d):
                     new_module = torch.nn.Conv2d(
                         target.in_channels,
@@ -358,7 +358,7 @@ def merge_and_unload(self, progressbar: bool = False):
 
     def set_adapter(self, adapter_name):
         for module in self.model.modules():
-            if isinstance(module, LyCORISLayer):
+            if isinstance(module, LycorisLayer):
                 if module.merged:
                     warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
                     module.unmerge()

From 63aba4e7f52f6b37bacf51b444e356b27b176e48 Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Wed, 25 Oct 2023 20:19:23 +0300
Subject: [PATCH 26/33] Updated README to include LoKr adapter

---
 README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 5b6a59c096..d4dfee5c38 100644
--- a/README.md
+++ b/README.md
@@ -135,7 +135,7 @@ Try out the 🤗 Gradio Space which should run seamlessly on a T4 instance:
 **NEW** ✨ Multi Adapter support and combining multiple LoRA adapters in a weighted combination 
 ![peft lora dreambooth weighted adapter](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/peft/weighted_adapter_dreambooth_lora.png)
 
-**NEW** ✨ Dreambooth training for Stable Diffusion using LoHa adapter [`examples/stable_diffusion/train_dreambooth_loha.py`](examples/stable_diffusion/train_dreambooth_loha.py)
+**NEW** ✨ Dreambooth training for Stable Diffusion using LoHa and LoKr adapters [`examples/stable_diffusion/train_dreambooth.py`](examples/stable_diffusion/train_dreambooth.py)
 
 ### Parameter Efficient Tuning of LLMs for RLHF components such as Ranker and Policy
 - Here is an example in [trl](https://github.com/lvwerra/trl) library using PEFT+INT8 for tuning policy model: [gpt2-sentiment_peft.py](https://github.com/lvwerra/trl/blob/main/examples/sentiment/scripts/gpt2-sentiment_peft.py) and corresponding [Blog](https://huggingface.co/blog/trl-peft)
@@ -274,9 +274,9 @@ An example is provided in `~examples/causal_language_modeling/peft_lora_clm_acce
 
 ### Text-to-Image Generation
 
-|   Model         | LoRA | LoHa | Prefix Tuning  | P-Tuning | Prompt Tuning  | IA3 |
-| --------- | ---- | ---- | ---- | ---- | ----  | ----  |
-| Stable Diffusion           | ✅  | ✅  |   |   |   |
+|   Model         | LoRA | LoHa | LoKr | Prefix Tuning  | P-Tuning | Prompt Tuning  | IA3 |
+| --------- | ---- | ---- | ---- | ---- | ---- | ----  | ----  |
+| Stable Diffusion           | ✅  | ✅  | ✅  |  |   |   |
 
 
 ### Image Classification

From 6700bafa6c561f65d5a4e8ec535896c2485e1c6d Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Wed, 25 Oct 2023 20:20:13 +0300
Subject: [PATCH 27/33] Addressed some code review comments

---
 src/peft/tuners/loha/config.py   | 18 ----------------
 src/peft/tuners/loha/layer.py    | 12 +++++------
 src/peft/tuners/loha/model.py    |  4 ++--
 src/peft/tuners/lokr/config.py   | 18 ----------------
 src/peft/tuners/lokr/layer.py    | 12 ++++++-----
 src/peft/tuners/lokr/model.py    |  4 ++--
 src/peft/tuners/lycoris_utils.py | 35 ++++++++++++++++++++++----------
 tests/test_custom_models.py      |  4 ----
 8 files changed, 41 insertions(+), 66 deletions(-)

diff --git a/src/peft/tuners/loha/config.py b/src/peft/tuners/loha/config.py
index 7ea73400fd..7c0f0c81ef 100644
--- a/src/peft/tuners/loha/config.py
+++ b/src/peft/tuners/loha/config.py
@@ -92,24 +92,6 @@ class LoHaConfig(LycorisConfig):
             "help": "The layer pattern name, used only if `layers_to_transform` is different to None and if the layer pattern is not in the common layers pattern."
         },
     )
-    rank_pattern: Optional[dict] = field(
-        default_factory=dict,
-        metadata={
-            "help": (
-                "The mapping from layer names or regexp expression to ranks which are different from the default rank specified by `r`. "
-                "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 8`}"
-            )
-        },
-    )
-    alpha_pattern: Optional[dict] = field(
-        default_factory=dict,
-        metadata={
-            "help": (
-                "The mapping from layer names or regexp expression to alphas which are different from the default alpha specified by `alpha`. "
-                "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 32`}"
-            )
-        },
-    )
     modules_to_save: Optional[List[str]] = field(
         default=None,
         metadata={
diff --git a/src/peft/tuners/loha/layer.py b/src/peft/tuners/loha/layer.py
index 91c49ba0a3..4cad9b9b18 100644
--- a/src/peft/tuners/loha/layer.py
+++ b/src/peft/tuners/loha/layer.py
@@ -89,13 +89,13 @@ def update_layer(
         """Internal function to create loha adapter
 
         Args:
-            shape (`Tuple[int, ...]`): Shape of weights to produce
-            adapter_name (`str`): Name for the adapter to add
-            r (`int`): Rank for the added adapter
-            alpha (`float`): Alpha for the added adapter
-            rank_dropout (`float`): The dropout probability for rank dimension during training
+            adapter_name (`str`): Name for the adapter to add.
+            r (`int`): Rank for the added adapter.
+            alpha (`float`): Alpha for the added adapter.
+            rank_dropout (`float`): The dropout probability for rank dimension during training.
             module_dropout (`float`): The dropout probability for disabling adapter during training.
-            init_weights (`bool`): Whether to initialize weights
+            init_weights (`bool`): Whether to initialize weights.
+            use_effective_conv2d (`bool`): Use parameter effective decomposition for Conv2d with ksize > 1.
         """
 
         self.r[adapter_name] = r
diff --git a/src/peft/tuners/loha/model.py b/src/peft/tuners/loha/model.py
index f4c60ab020..92d5b887ef 100644
--- a/src/peft/tuners/loha/model.py
+++ b/src/peft/tuners/loha/model.py
@@ -17,11 +17,11 @@
 
 import torch
 
-from ..lycoris_utils import LyCORISTuner
+from ..lycoris_utils import LycorisTuner
 from .layer import Conv2d, Linear, LoHaLayer
 
 
-class LoHaModel(LyCORISTuner):
+class LoHaModel(LycorisTuner):
     """
     Creates Low-Rank Hadamard Product model from a pretrained model. The method is partially described in
     https://arxiv.org/abs/2108.06098 Current implementation heavily borrows from
diff --git a/src/peft/tuners/lokr/config.py b/src/peft/tuners/lokr/config.py
index d3cdf1b5af..d99b22aa76 100644
--- a/src/peft/tuners/lokr/config.py
+++ b/src/peft/tuners/lokr/config.py
@@ -99,24 +99,6 @@ class LoKrConfig(LycorisConfig):
             "help": "The layer pattern name, used only if `layers_to_transform` is different to None and if the layer pattern is not in the common layers pattern."
         },
     )
-    rank_pattern: Optional[dict] = field(
-        default_factory=dict,
-        metadata={
-            "help": (
-                "The mapping from layer names or regexp expression to ranks which are different from the default rank specified by `r`. "
-                "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 8`}"
-            )
-        },
-    )
-    alpha_pattern: Optional[dict] = field(
-        default_factory=dict,
-        metadata={
-            "help": (
-                "The mapping from layer names or regexp expression to alphas which are different from the default alpha specified by `alpha`. "
-                "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 32`}"
-            )
-        },
-    )
     modules_to_save: Optional[List[str]] = field(
         default=None,
         metadata={
diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py
index afe74f13a5..9b01ecf96f 100644
--- a/src/peft/tuners/lokr/layer.py
+++ b/src/peft/tuners/lokr/layer.py
@@ -126,13 +126,15 @@ def update_layer(
         """Internal function to create lokr adapter
 
         Args:
-            shape (`Tuple[int, ...]`): Shape of weights to produce
-            adapter_name (`str`): Name for the adapter to add
-            r (`int`): Rank for the added adapter
-            alpha (`float`): Alpha for the added adapter
+            adapter_name (`str`): Name for the adapter to add.
+            r (`int`): Rank for the added adapter.
+            alpha (`float`): Alpha for the added adapter.
             rank_dropout (`float`): The dropout probability for rank dimension during training
             module_dropout (`float`): The dropout probability for disabling adapter during training.
-            init_weights (`bool`): Whether to initialize weights
+            init_weights (`bool`): Whether to initialize adapter weights.
+            use_effective_conv2d (`bool`): Use parameter effective decomposition for Conv2d with ksize > 1.
+            decompose_both (`bool`): Perform rank decomposition of left kronecker product matrix.
+            decompose_factor (`int`): Kronecker product decomposition factor.
         """
 
         self.r[adapter_name] = r
diff --git a/src/peft/tuners/lokr/model.py b/src/peft/tuners/lokr/model.py
index 778a7dcfe3..e08b7a7c48 100644
--- a/src/peft/tuners/lokr/model.py
+++ b/src/peft/tuners/lokr/model.py
@@ -17,11 +17,11 @@
 
 import torch
 
-from ..lycoris_utils import LyCORISTuner
+from ..lycoris_utils import LycorisTuner
 from .layer import Conv2d, Linear, LoKrLayer
 
 
-class LoKrModel(LyCORISTuner):
+class LoKrModel(LycorisTuner):
     """
     Creates Low-Rank Kronecker Product model from a pretrained model. The original method is partially described in
     https://arxiv.org/abs/2108.06098 and in https://arxiv.org/abs/2309.14859 Current implementation heavily borrows
diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py
index d9e74a3977..e5194486b1 100644
--- a/src/peft/tuners/lycoris_utils.py
+++ b/src/peft/tuners/lycoris_utils.py
@@ -146,12 +146,21 @@ def merge(self) -> None:
     def reset_adapter_parameters(self, adapter_name: str):
         ...
 
-    def scale_layer(self, scale_factor: float) -> None:
-        if scale_factor != 1:
-            for active_adapter in self.active_adapters:
-                alpha = self.alpha[active_adapter]
-                r = self.r[active_adapter]
-                self.scaling[active_adapter] = (alpha / r) * scale_factor
+    def set_scale(self, adapter, scale):
+        if adapter not in self._available_adapters:
+            # Ignore the case where the adapter is not in the layer
+            return
+        self.scaling[adapter] = scale * self.alpha[adapter] / self.r[adapter]
+
+    def scale_layer(self, scale: float) -> None:
+        if scale == 1:
+            return
+
+        for active_adapter in self.active_adapters:
+            if active_adapter not in self._available_adapters:
+                continue
+
+            self.scaling[active_adapter] *= scale
 
     def unmerge(self) -> None:
         if not self.merged:
@@ -162,17 +171,21 @@ def unmerge(self) -> None:
             if active_adapter in self._available_adapters:
                 self.weight.data -= self.get_delta_weight(active_adapter)
 
-    def unscale_layer(self) -> None:
+    def unscale_layer(self, scale=None) -> None:
         for active_adapter in self.active_adapters:
-            alpha = self.alpha[active_adapter]
-            r = self.r[active_adapter]
-            self.scaling[active_adapter] = alpha / r
+            if active_adapter not in self._available_adapters:
+                continue
+
+            if scale is None:
+                self.scaling[active_adapter] = self.alpha[active_adapter] / self.r[active_adapter]
+            else:
+                self.scaling[active_adapter] /= scale
 
     def update_layer(self, adapter_name: str, r: int, alpha: float, **kwargs):
         ...
 
 
-class LyCORISTuner(BaseTuner):
+class LycorisTuner(BaseTuner):
     r"""
     A base tuner for LyCORIS like adapters
     """
diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py
index 5015593ea0..84064e65ce 100644
--- a/tests/test_custom_models.py
+++ b/tests/test_custom_models.py
@@ -489,10 +489,6 @@ def test_only_params_are_updated(self, test_name, model_id, config_cls, config_k
         params_after = dict(model.named_parameters())
         self.assertEqual(params_before.keys(), params_after.keys())
 
-        if isinstance(model, ModelConv2D):
-            print(model)
-            self.assertFalse(True)
-
         prefix = PREFIXES[config_cls]
         for name, param_before in params_before.items():
             param_after = params_after[name]

From fa6b522e1319c9f49440aa57763ec120cc08bffe Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Wed, 25 Oct 2023 21:09:45 +0300
Subject: [PATCH 28/33] Addressed some code review comments

---
 src/peft/tuners/lycoris_utils.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py
index e5194486b1..e748aaba35 100644
--- a/src/peft/tuners/lycoris_utils.py
+++ b/src/peft/tuners/lycoris_utils.py
@@ -15,6 +15,7 @@
 
 import re
 import warnings
+from abc import abstractmethod
 from dataclasses import dataclass, field
 from itertools import chain
 from typing import Dict, Optional, Set, Type, Union
@@ -74,6 +75,7 @@ def __init__(self):
         self.merged_adapters = []
 
     @property
+    @abstractmethod
     def _available_adapters(self) -> Set[str]:
         ...
 
@@ -96,6 +98,7 @@ def _init_empty_weights(self, cls, *args, **kwargs) -> None:
     def _op(self, x: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
         raise NotImplementedError
 
+    @abstractmethod
     def create_adapter_parameters(self, adapter_name: str, r: int, **kwargs):
         ...
 
@@ -129,6 +132,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         result = result.to(previous_dtype)
         return result
 
+    @abstractmethod
     def get_delta_weight(self, adapter_name: str) -> torch.Tensor:
         ...
 
@@ -143,6 +147,7 @@ def merge(self) -> None:
                 self.weight.data += self.get_delta_weight(active_adapter)
                 self.merged_adapters.append(active_adapter)
 
+    @abstractmethod
     def reset_adapter_parameters(self, adapter_name: str):
         ...
 
@@ -181,6 +186,7 @@ def unscale_layer(self, scale=None) -> None:
             else:
                 self.scaling[active_adapter] /= scale
 
+    @abstractmethod
     def update_layer(self, adapter_name: str, r: int, alpha: float, **kwargs):
         ...
 

From e76182ff40e308491e4655b3d5685396b725d105 Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Wed, 25 Oct 2023 21:33:39 +0300
Subject: [PATCH 29/33] Addressed some code review comments

---
 src/peft/tuners/ia3/layer.py     | 4 ----
 src/peft/tuners/lora/layer.py    | 4 ----
 src/peft/tuners/lycoris_utils.py | 4 ----
 src/peft/tuners/tuners_utils.py  | 7 +++++++
 4 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/src/peft/tuners/ia3/layer.py b/src/peft/tuners/ia3/layer.py
index c35f3d875c..8f75e832b0 100644
--- a/src/peft/tuners/ia3/layer.py
+++ b/src/peft/tuners/ia3/layer.py
@@ -43,10 +43,6 @@ def __init__(
         self.out_features = out_features
         self.is_feedforward = is_feedforward
 
-    @property
-    def merged(self) -> bool:
-        return bool(self.merged_adapters)
-
     def update_layer(self, adapter_name, init_ia3_weights):
         # Actual trainable parameters
         if self.is_feedforward:
diff --git a/src/peft/tuners/lora/layer.py b/src/peft/tuners/lora/layer.py
index df6083dd8e..0eb2efa2f2 100644
--- a/src/peft/tuners/lora/layer.py
+++ b/src/peft/tuners/lora/layer.py
@@ -46,10 +46,6 @@ def __init__(self, in_features: int, out_features: int, **kwargs):
         self.out_features = out_features
         self.kwargs = kwargs
 
-    @property
-    def merged(self) -> bool:
-        return bool(self.merged_adapters)
-
     def _init_empty_weights(self, cls, *args, **kwargs) -> None:
         # A helper method that allows to initialize the layer of the given class without spending time to initialize the
         # model weights. The implementation is inspired by
diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py
index e748aaba35..f78531bd49 100644
--- a/src/peft/tuners/lycoris_utils.py
+++ b/src/peft/tuners/lycoris_utils.py
@@ -79,10 +79,6 @@ def __init__(self):
     def _available_adapters(self) -> Set[str]:
         ...
 
-    @property
-    def merged(self) -> bool:
-        return bool(self.merged_adapters)
-
     def _init_empty_weights(self, cls, *args, **kwargs) -> None:
         # A helper method that allows to initialize the layer of the given class without spending time to initialize the
         # model weights. The implementation is inspired by
diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
index 10f8754296..4f446fd015 100644
--- a/src/peft/tuners/tuners_utils.py
+++ b/src/peft/tuners/tuners_utils.py
@@ -281,12 +281,19 @@ class BaseTunerLayer(ABC):
     # the currently active adapter(s)
     _active_adapter: str | list[str] = "default"
 
+    # List all merged adapters
+    merged_adapters: list[str] = []
+
     def merge(self, *args) -> None:
         raise NotImplementedError
 
     def unmerge(self, *args) -> None:
         raise NotImplementedError
 
+    @property
+    def merged(self) -> bool:
+        return bool(self.merged_adapters)
+
     @property
     def disable_adapters(self) -> bool:
         # use a property to ensure that disable_adapters is not set directly, instead use the enable_adapters method

From 25077b236a30bba9af57d231fbd0ced03ec83275 Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Fri, 27 Oct 2023 14:00:12 +0300
Subject: [PATCH 30/33] Updated check_target_modules docstring, increased test
 coverage

---
 src/peft/tuners/tuners_utils.py | 2 +-
 tests/test_custom_models.py     | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
index 4f446fd015..9307e5fc12 100644
--- a/src/peft/tuners/tuners_utils.py
+++ b/src/peft/tuners/tuners_utils.py
@@ -356,7 +356,7 @@ def check_target_module_exists(config, key: str) -> bool | re.Match[str] | None:
     """A helper method to check if the passed module's key name matches any of the target modules in the adapter_config.
 
     Args:
-        config (`LoraConfig` | `LoHaConfig`): A config to match target modules from
+        config (`LoraConfig` | `LycorisConfig`): A config to match target modules from
         key (`str`): A key to search any matches in config
 
     Returns:
diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py
index 84064e65ce..83574757ea 100644
--- a/tests/test_custom_models.py
+++ b/tests/test_custom_models.py
@@ -142,6 +142,7 @@
             "module_dropout": 0.1,
         },
     ),
+    ("Vanilla MLP 7 LOHA", "MLP", LoHaConfig, {"target_modules": "lin0", "rank_dropout": 0.5}),
     ("Conv2d 1 LOHA", "Conv2d", LoHaConfig, {"target_modules": ["conv2d"]}),
     ("Conv2d 2 LOHA", "Conv2d", LoHaConfig, {"target_modules": ["conv2d", "lin0"]}),
     ("Conv2d 3 LOHA", "Conv2d", LoHaConfig, {"target_modules": ["conv2d"], "use_effective_conv2d": True}),
@@ -162,6 +163,8 @@
             "module_dropout": 0.1,
         },
     ),
+    ("Vanilla MLP 7 LOKR", "MLP", LoKrConfig, {"target_modules": "lin0", "rank_dropout": 0.5}),
+    ("Vanilla MLP 8 LOKR", "MLP", LoKrConfig, {"target_modules": "lin0", "decompose_both": True, "r": 1, "alpha": 1}),
     ("Conv2d 1 LOKR", "Conv2d", LoKrConfig, {"target_modules": ["conv2d"]}),
     ("Conv2d 2 LOKR", "Conv2d", LoKrConfig, {"target_modules": ["conv2d", "lin0"]}),
     ("Conv2d 3 LOKR", "Conv2d", LoKrConfig, {"target_modules": ["conv2d"], "use_effective_conv2d": True}),

From 9f05024586fa55224f11dc55c7912100cecedb13 Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Fri, 27 Oct 2023 17:00:59 +0300
Subject: [PATCH 31/33] Added delete_adapter method for LoKr and LoHa

---
 src/peft/tuners/lycoris_utils.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py
index f78531bd49..9a1d63d969 100644
--- a/src/peft/tuners/lycoris_utils.py
+++ b/src/peft/tuners/lycoris_utils.py
@@ -378,3 +378,30 @@ def set_adapter(self, adapter_name):
                     warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
                     module.unmerge()
                 module.set_adapter(adapter_name)
+
+    def delete_adapter(self, adapter_name: str):
+        """
+        Deletes an existing adapter.
+
+        Args:
+            adapter_name (`str`): Name of the adapter to be deleted.
+        """
+        if adapter_name not in list(self.peft_config.keys()):
+            raise ValueError(f"Adapter {adapter_name} does not exist")
+        del self.peft_config[adapter_name]
+
+        key_list = [key for key, _ in self.model.named_modules() if "lora" not in key]
+        for key in key_list:
+            _, target, _ = _get_submodules(self.model, key)
+            if isinstance(target, LycorisLayer):
+                for attr in target.adapter_layer_names:
+                    if adapter_name in getattr(target, attr):
+                        getattr(target, attr).pop(adapter_name)
+                if adapter_name in target.active_adapters:
+                    resetting_active_adapter = (
+                        list(self.peft_config.keys())[0] if len(self.peft_config) > 0 else "default"
+                    )
+                    warnings.warn(
+                        f"Adapter {adapter_name} was active which is now deleted. Setting active adapter to {resetting_active_adapter}. "
+                    )
+                    target.set_adapter(resetting_active_adapter)

From f6e73352197d309382b964c28fa764d8caec2a2f Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Fri, 27 Oct 2023 17:22:17 +0300
Subject: [PATCH 32/33] Fixed typo in delete_adapter

---
 src/peft/tuners/lycoris_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py
index 9a1d63d969..8d3fb7481b 100644
--- a/src/peft/tuners/lycoris_utils.py
+++ b/src/peft/tuners/lycoris_utils.py
@@ -390,7 +390,7 @@ def delete_adapter(self, adapter_name: str):
             raise ValueError(f"Adapter {adapter_name} does not exist")
         del self.peft_config[adapter_name]
 
-        key_list = [key for key, _ in self.model.named_modules() if "lora" not in key]
+        key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
         for key in key_list:
             _, target, _ = _get_submodules(self.model, key)
             if isinstance(target, LycorisLayer):

From 69ae74c8114df2f2fc698c870fcd59d27725b6d4 Mon Sep 17 00:00:00 2001
From: Alexander Kovalchuk <kovalexal@gmail.com>
Date: Sun, 29 Oct 2023 11:11:37 +0300
Subject: [PATCH 33/33] Provide default value for

---
 src/peft/tuners/loha/layer.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/peft/tuners/loha/layer.py b/src/peft/tuners/loha/layer.py
index 4cad9b9b18..26f57ac681 100644
--- a/src/peft/tuners/loha/layer.py
+++ b/src/peft/tuners/loha/layer.py
@@ -83,7 +83,7 @@ def update_layer(
         rank_dropout: float,
         module_dropout: float,
         init_weights: bool,
-        use_effective_conv2d: bool,
+        use_effective_conv2d: bool = False,
         **kwargs,
     ) -> None:
         """Internal function to create loha adapter
@@ -95,7 +95,8 @@ def update_layer(
             rank_dropout (`float`): The dropout probability for rank dimension during training.
             module_dropout (`float`): The dropout probability for disabling adapter during training.
             init_weights (`bool`): Whether to initialize weights.
-            use_effective_conv2d (`bool`): Use parameter effective decomposition for Conv2d with ksize > 1.
+            use_effective_conv2d (`bool`, *optional*, defaults to `False`):
+                Use parameter effective decomposition for Conv2d with ksize > 1.
         """
 
         self.r[adapter_name] = r