From 41a71e320a70be5ab175855922195eb34b4f280c Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Fri, 29 Sep 2023 16:24:53 +0300 Subject: [PATCH 01/33] Initial commit for LoKr implementation --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 13f4e88a04..ced6e17882 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ Supported methods: 5. AdaLoRA: [Adaptive Budget Allocation for Parameter-Efficient Fine-Tuning](https://arxiv.org/abs/2303.10512) 6. $(IA)^3$: [Few-Shot Parameter-Efficient Fine-Tuning is Better and Cheaper than In-Context Learning](https://arxiv.org/abs/2205.05638) 7. MultiTask Prompt Tuning: [Multitask Prompt Tuning Enables Parameter-Efficient Transfer Learning](https://arxiv.org/abs/2303.02861) +8. LoKr: [KronA: Parameter Efficient Tuning with Kronecker Adapter](https://arxiv.org/abs/2212.10650) based on [Navigating Text-To-Image Customization:From LyCORIS Fine-Tuning to Model Evaluation](https://arxiv.org/abs/2309.14859) implementation ## Getting started From 0f6089356dad288558b8ec38c875b330991492ae Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Tue, 3 Oct 2023 19:26:14 +0300 Subject: [PATCH 02/33] Added current implementation of LoKr --- src/peft/__init__.py | 2 + src/peft/mapping.py | 4 + src/peft/peft_model.py | 2 + src/peft/tuners/__init__.py | 1 + src/peft/tuners/lokr/__init__.py | 20 ++ src/peft/tuners/lokr/config.py | 130 +++++++++ src/peft/tuners/lokr/layer.py | 452 +++++++++++++++++++++++++++++++ src/peft/tuners/lokr/model.py | 285 +++++++++++++++++++ src/peft/utils/peft_types.py | 1 + src/peft/utils/save_and_load.py | 6 +- 10 files changed, 902 insertions(+), 1 deletion(-) create mode 100644 src/peft/tuners/lokr/__init__.py create mode 100644 src/peft/tuners/lokr/config.py create mode 100644 src/peft/tuners/lokr/layer.py create mode 100644 src/peft/tuners/lokr/model.py diff --git a/src/peft/__init__.py b/src/peft/__init__.py index a22fc87a08..53ba2bd568 100644 --- a/src/peft/__init__.py +++ b/src/peft/__init__.py @@ -51,6 +51,8 @@ LoraModel, LoHaConfig, LoHaModel, + LoKrConfig, + LoKrModel, IA3Config, IA3Model, AdaLoraConfig, diff --git a/src/peft/mapping.py b/src/peft/mapping.py index ecb12be37c..f69e89ec3e 100644 --- a/src/peft/mapping.py +++ b/src/peft/mapping.py @@ -37,6 +37,8 @@ IA3Model, LoHaConfig, LoHaModel, + LoKrConfig, + LoKrModel, LoraConfig, LoraModel, MultitaskPromptTuningConfig, @@ -67,6 +69,7 @@ "P_TUNING": PromptEncoderConfig, "LORA": LoraConfig, "LOHA": LoHaConfig, + "LOKR": LoKrConfig, "ADALORA": AdaLoraConfig, "IA3": IA3Config, "MULTITASK_PROMPT_TUNING": MultitaskPromptTuningConfig, @@ -75,6 +78,7 @@ PEFT_TYPE_TO_TUNER_MAPPING = { "LORA": LoraModel, "LOHA": LoHaModel, + "LOKR": LoKrModel, "ADALORA": AdaLoraModel, "IA3": IA3Model, } diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py index 126a536d7e..c2c6959632 100644 --- a/src/peft/peft_model.py +++ b/src/peft/peft_model.py @@ -40,6 +40,7 @@ AdaptionPromptModel, IA3Model, LoHaModel, + LoKrModel, LoraModel, MultitaskPromptEmbedding, PrefixEncoder, @@ -68,6 +69,7 @@ PEFT_TYPE_TO_MODEL_MAPPING = { PeftType.LORA: LoraModel, PeftType.LOHA: LoHaModel, + PeftType.LOKR: LoKrModel, PeftType.PROMPT_TUNING: PromptEmbedding, PeftType.P_TUNING: PromptEncoder, PeftType.PREFIX_TUNING: PrefixEncoder, diff --git a/src/peft/tuners/__init__.py b/src/peft/tuners/__init__.py index 20f0bb2b31..dd4c94b947 100644 --- a/src/peft/tuners/__init__.py +++ b/src/peft/tuners/__init__.py @@ -20,6 +20,7 @@ from .adaption_prompt import AdaptionPromptConfig, AdaptionPromptModel from .lora import LoraConfig, LoraModel from .loha import LoHaConfig, LoHaModel +from .lokr import LoKrConfig, LoKrModel from .ia3 import IA3Config, IA3Model from .adalora import AdaLoraConfig, AdaLoraModel from .p_tuning import PromptEncoder, PromptEncoderConfig, PromptEncoderReparameterizationType diff --git a/src/peft/tuners/lokr/__init__.py b/src/peft/tuners/lokr/__init__.py new file mode 100644 index 0000000000..bb138202fd --- /dev/null +++ b/src/peft/tuners/lokr/__init__.py @@ -0,0 +1,20 @@ +# coding=utf-8 +# Copyright 2023-present the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .config import LoKrConfig +from .model import LoKrModel + + +__all__ = ["LoKrConfig", "LoKrModel"] diff --git a/src/peft/tuners/lokr/config.py b/src/peft/tuners/lokr/config.py new file mode 100644 index 0000000000..eb36c5673f --- /dev/null +++ b/src/peft/tuners/lokr/config.py @@ -0,0 +1,130 @@ +# coding=utf-8 +# Copyright 2023-present the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass, field +from typing import List, Optional, Union + +from peft.config import PeftConfig +from peft.utils import PeftType + + +@dataclass +class LoKrConfig(PeftConfig): + """ + This is the configuration class to store the configuration of a [`LoKrModel`]. + + Args: + r (`int`): LoKr rank. + alpha (`int`): The alpha parameter for LoKr scaling. + rank_dropout (`int`): The dropout probability for rank dimension during training. + module_dropout (`int`): The dropout probability for disabling LoHa modules during training. + use_effective_conv2d (`bool`): + Use parameter effective decomposition for Conv2d with ksize > 1 ("Proposition 3" from FedPara paper). + decompose_both (`bool`): Perform rank decomposition of left kronecker product matrix. + decompose_factor (`int`): Kronecker product decomposition factor. + target_modules (`Union[List[str],str]`): The names of the modules to apply LoKr to. + init_weights (`bool`): Whether to perform initialization of LoKr weights. + layers_to_transform (`Union[List[int],int]`): + The layer indexes to transform, if this argument is specified, it will apply the LoHa transformations on + the layer indexes that are specified in this list. If a single integer is passed, it will apply the LoKr + transformations on the layer at this index. + layers_pattern (`str`): + The layer pattern name, used only if `layers_to_transform` is different from `None` and if the layer + pattern is not in the common layers pattern. + rank_pattern (`dict`): + The mapping from layer names or regexp expression to ranks which are different from the default rank + specified by `r`. + alpha_pattern (`dict`): + The mapping from layer names or regexp expression to alphas which are different from the default alpha + specified by `alpha`. + modules_to_save (`List[str]`): The names of modules to be set as trainable except LoHa parameters. + """ + + r: int = field(default=8, metadata={"help": "LoHa rank"}) + alpha: int = field(default=8, metadata={"help": "LoHa alpha"}) + rank_dropout: float = field( + default=0.0, metadata={"help": "The dropout probability for rank dimension during training"} + ) + module_dropout: float = field( + default=0.0, metadata={"help": "The dropout probability for disabling LoHa modules during training"} + ) + use_effective_conv2d: bool = field( + default=False, + metadata={ + "help": 'Use parameter effective decomposition for Conv2d 3x3 with ksize > 1 ("Proposition 3" from FedPara paper)' + }, + ) + decompose_both: bool = field( + default=False, + metadata={"help": "Perform rank decomposition of left kronecker product matrix."}, + ) + decompose_factor: int = field(default=-1, metadata={"help": "Kronecker product decomposition factor."}) + target_modules: Optional[Union[List[str], str]] = field( + default=None, + metadata={ + "help": "List of module names or regex expression of the module names to replace with LoHa." + "For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$' " + }, + ) + init_weights: bool = field( + default=True, + metadata={ + "help": ( + "Whether to initialize the weights of the LoHa layers with their default initialization. Don't change " + "this setting, except if you know exactly what you're doing." + ), + }, + ) + layers_to_transform: Optional[Union[List[int], int]] = field( + default=None, + metadata={ + "help": "The layer indexes to transform, is this argument is specified, PEFT will transform only the layers indexes that are specified inside this list. If a single integer is passed, PEFT will transform only the layer at this index." + }, + ) + layers_pattern: Optional[str] = field( + default=None, + metadata={ + "help": "The layer pattern name, used only if `layers_to_transform` is different to None and if the layer pattern is not in the common layers pattern." + }, + ) + rank_pattern: Optional[dict] = field( + default_factory=dict, + metadata={ + "help": ( + "The mapping from layer names or regexp expression to ranks which are different from the default rank specified by `r`. " + "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 8`}" + ) + }, + ) + alpha_pattern: Optional[dict] = field( + default_factory=dict, + metadata={ + "help": ( + "The mapping from layer names or regexp expression to alphas which are different from the default alpha specified by `alpha`. " + "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 32`}" + ) + }, + ) + modules_to_save: Optional[List[str]] = field( + default=None, + metadata={ + "help": "List of modules apart from LoHA layers to be set as trainable and saved in the final checkpoint. " + "For example, in Sequence Classification or Token Classification tasks, " + "the final layer `classifier/score` are randomly initialized and as such need to be trainable and saved." + }, + ) + + def __post_init__(self): + self.peft_type = PeftType.LOKR diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py new file mode 100644 index 0000000000..e78fc7c32f --- /dev/null +++ b/src/peft/tuners/lokr/layer.py @@ -0,0 +1,452 @@ +# coding=utf-8 +# Copyright 2023-present the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import warnings +from itertools import chain +from typing import Iterable, Optional, Tuple, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from peft.tuners.tuners_utils import BaseTunerLayer + + +class LoKrLayer(BaseTunerLayer, nn.Module): + # List all names of layers that may contain adapter weights + adapter_layer_names = [ + "lokr_w1", + "lokr_w1_a", + "lokr_w1_b", + "lokr_w2", + "lokr_w2_a", + "lokr_w2_b", + "lokr_t2", + ] + + def __init__(self): + super(nn.Module, self).__init__() + + # LoKr info + self.r = {} + self.alpha = {} + self.scaling = {} + self.lokr_w1 = nn.ParameterDict({}) + self.lokr_w1_a = nn.ParameterDict({}) + self.lokr_w1_b = nn.ParameterDict({}) + self.lokr_w2 = nn.ParameterDict({}) + self.lokr_w2_a = nn.ParameterDict({}) + self.lokr_w2_b = nn.ParameterDict({}) + self.lokr_t2 = nn.ParameterDict({}) + self.rank_dropout = {} + self.module_dropout = {} + + # Tuner info + self.merged = False + self._disable_adapters = False + self.merged_adapters = [] + + def _init_empty_weights(self, cls, *args, **kwargs) -> None: + # A helper method that allows to initialize the layer of the given class without spending time to initialize the + # model weights. The implementation is inspired by + # https://pytorch.org/docs/stable/generated/torch.nn.utils.skip_init.html but this function cannot be used + # directly. + # Instead of this approach, it would be possible to bypass the __init__ of the class but that runs the risk of + # omitting important logic inside that __init__. + kwargs = kwargs.copy() + final_device = kwargs.pop("device", "cpu") + cls.__init__(self, *args, device="meta", **kwargs) + self.to_empty(device=final_device) + + @property + def _available_adapters(self) -> Iterable[str]: + return set( + chain( + self.lokr_w1.keys(), + self.lokr_w1_a.keys(), + self.lokr_w1_b.keys(), + self.lokr_w2.keys(), + self.lokr_w2_a.keys(), + self.lokr_w2_b.keys(), + self.lokr_t2.keys(), + ) + ) + + def create_lokr_parameters( + self, + adapter_name: str, + r: int, + shape, + use_w1: bool, + use_w2: bool, + use_effective_conv2d: bool, + ): + if use_w1: + self.lokr_w1[adapter_name] = nn.Parameter(torch.empty(shape[0][0], shape[1][0])) + else: + self.lokr_w1_a[adapter_name] = nn.Parameter(torch.empty(shape[0][0], r)) + self.lokr_w1_b[adapter_name] = nn.Parameter(torch.empty(r, shape[1][0])) + + if len(shape) == 4: + # Conv2d + if use_w2: + self.lokr_w2[adapter_name] = nn.Parameter(torch.empty(shape[0][1], shape[1][1], *shape[2:])) + elif use_effective_conv2d: + self.lokr_t2[adapter_name] = nn.Parameter(torch.empty(r, r, shape[2], shape[3])) + self.lokr_w2_a[adapter_name] = nn.Parameter(torch.empty(r, shape[0][1])) # b, 1-mode + self.lokr_w2_b[adapter_name] = nn.Parameter(torch.empty(r, shape[1][1])) # d, 2-mode + else: + self.lokr_w2_a[adapter_name] = nn.Parameter(torch.empty(shape[0][1], r)) + self.lokr_w2_b[adapter_name] = nn.Parameter(torch.empty(r, shape[1][1] * shape[2] * shape[3])) + else: + # Linear + if use_w2: + self.lokr_w2[adapter_name] = nn.Parameter(torch.empty(shape[0][1], shape[1][1])) + else: + self.lokr_w2_a[adapter_name] = nn.Parameter(torch.empty(shape[0][1], r)) + self.lokr_w2_b[adapter_name] = nn.Parameter(torch.empty(r, shape[1][1])) + + def reset_lokr_parameters(self, adapter_name: str): + if adapter_name in self.lokr_w1: + nn.init.kaiming_uniform_(self.lokr_w1[adapter_name], a=math.sqrt(5)) + if adapter_name in self.lokr_w2: + nn.init.kaiming_uniform_(self.lokr_w2[adapter_name], a=math.sqrt(5)) + if adapter_name in self.lokr_w1_a: + nn.init.kaiming_uniform_(self.lokr_w1_a[adapter_name], a=math.sqrt(5)) + nn.init.zeros_(self.lokr_w1_b[adapter_name]) + if adapter_name in self.lokr_w2_a: + nn.init.kaiming_uniform_(self.lokr_w2_a[adapter_name], a=math.sqrt(5)) + nn.init.zeros_(self.lokr_w2_b[adapter_name]) + if adapter_name in self.lokr_t2: + nn.init.kaiming_uniform_(self.lokr_t2[adapter_name], a=math.sqrt(5)) + + def update_layer( + self, + adapter_name: str, + r: int, + alpha: float, + rank_dropout: float, + module_dropout: float, + init_weights: bool, + use_effective_conv2d: bool, + decompose_both: bool, + decompose_factor: int, + **kwargs, + ) -> None: + """Internal function to create lokr adapter + + Args: + shape (`Tuple[int, ...]`): Shape of weights to produce + adapter_name (`str`): Name for the adapter to add + r (`int`): Rank for the added adapter + alpha (`float`): Alpha for the added adapter + rank_dropout (`float`): The dropout probability for rank dimension during training + module_dropout (`float`): The dropout probability for disabling adapter during training. + init_weights (`bool`): Whether to initialize weights + """ + + self.r[adapter_name] = r + self.alpha[adapter_name] = alpha + self.scaling[adapter_name] = alpha / r + self.rank_dropout[adapter_name] = rank_dropout + self.module_dropout[adapter_name] = module_dropout + + # Determine shape of LoKr weights + if isinstance(self, nn.Linear): + in_dim, out_dim = self.in_features, self.out_features + + in_m, in_n = factorization(in_dim, decompose_factor) + out_l, out_k = factorization(out_dim, decompose_factor) + shape = ((out_l, out_k), (in_m, in_n)) # ((a, b), (c, d)), out_dim = a*c, in_dim = b*d + + use_w1 = not (decompose_both and r < max(shape[0][0], shape[1][0]) / 2) + use_w2 = not (r < max(shape[0][1], shape[1][1]) / 2) + use_effective_conv2d = False + elif isinstance(self, nn.Conv2d): + in_dim, out_dim = self.in_channels, self.out_channels + k_size = self.kernel_size + + in_m, in_n = factorization(in_dim, decompose_factor) + out_l, out_k = factorization(out_dim, decompose_factor) + shape = ((out_l, out_k), (in_m, in_n), *k_size) # ((a, b), (c, d), *k_size) + + use_w1 = not (decompose_both and r < max(shape[0][0], shape[1][0]) / 2) + use_w2 = r >= max(shape[0][1], shape[1][1]) / 2 + use_effective_conv2d = use_effective_conv2d and self.kernel_size != (1, 1) + else: + raise NotImplementedError(f"LoHa is not implemented for {type(self).__name__} layer") + + # Create weights with provided shape + self.create_lokr_parameters(adapter_name, r, shape, use_w1, use_w2, use_effective_conv2d) + + # Initialize weights + if init_weights: + self.reset_lokr_parameters(adapter_name) + + # Move new weights to device + weight = getattr(self, "weight", None) + if weight is not None: + # the layer is already completely initialized, this is an update + if weight.dtype.is_floating_point or weight.dtype.is_complex: + self.to(weight.device, dtype=weight.dtype) + else: + self.to(weight.device) + self.set_adapter(self.active_adapters) + + def get_delta_weight(self, adapter_name: str) -> torch.Tensor: + # https://github.com/KohakuBlueleaf/LyCORIS/blob/eb460098187f752a5d66406d3affade6f0a07ece/lycoris/modules/loha.py#L178 + if adapter_name in self.lokr_w1: + w1 = self.lokr_w1[adapter_name] + else: + w1 = self.lokr_w1_a[adapter_name] @ self.lokr_w1_b[adapter_name] + + if adapter_name in self.lokr_w2: + w2 = self.lokr_w2[adapter_name] + elif adapter_name in self.lokr_t2: + w2 = make_weight_cp(self.lokr_t2[adapter_name], self.lokr_w2_a[adapter_name], self.lokr_w2_b[adapter_name]) + else: + w2 = self.lokr_w2_a[adapter_name] @ self.lokr_w2_b[adapter_name] + + # Make weights with Kronecker product + weight = make_kron(w1, w2) + weight = weight.reshape(self.weight.shape) + + # Perform rank dropout during training - drop rows of addition weights + rank_dropout = self.rank_dropout[adapter_name] + if self.training and rank_dropout: + drop = (torch.rand(weight.size(0)) > rank_dropout).float() + drop = drop.view(-1, *[1] * len(weight.shape[1:])).to(weight.device) + drop /= drop.mean() + weight *= drop + + return weight + + def merge(self) -> None: + if self.merged: + warnings.warn( + f"Already following adapters were merged {','.join(self.merged_adapters)}. " + f"You are now additionally merging {','.join(self.active_adapters)}." + ) + for active_adapter in self.active_adapters: + # if active_adapter in self.hada_w1_a.keys(): + if active_adapter in self._available_adapters: + self.weight.data += self.get_delta_weight(active_adapter) + self.merged_adapters.append(active_adapter) + self.merged = True + + def unmerge(self) -> None: + if not self.merged: + warnings.warn("Already unmerged. Nothing to do.") + return + while len(self.merged_adapters) > 0: + active_adapter = self.merged_adapters.pop() + # if active_adapter in self.hada_w1_a.keys(): + if active_adapter in self._available_adapters: + self.weight.data -= self.get_delta_weight(active_adapter) + self.merged = False + + def _op(self, x: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: + raise NotImplementedError + + def forward(self, x: torch.Tensor) -> torch.Tensor: + previous_dtype = x.dtype + + if self.disable_adapters: + if self.merged: + self.unmerge() + result = self._op(x, self.weight) + elif self.merged: + result = self._op(x, self.weight) + else: + # Get base weights + weight = self.weight.data + + # Execute all the adapters + for active_adapter in self.active_adapters: + # if active_adapter not in self.hada_w1_a.keys(): + if active_adapter not in self._available_adapters: + continue + + module_dropout = self.module_dropout[active_adapter] + + # Modify current execution weights + if (not self.training) or (self.training and torch.rand(1) > module_dropout): + weight = weight + self.get_delta_weight(active_adapter) + + # Perform actual operation + result = self._op(x, weight) + + result = result.to(previous_dtype) + return result + + def scale_layer(self, scale_factor: float) -> None: + if scale_factor != 1: + for active_adapter in self.active_adapters: + alpha = self.alpha[active_adapter] + r = self.r[active_adapter] + self.scaling[active_adapter] = (alpha / r) * scale_factor + + def unscale_layer(self) -> None: + for active_adapter in self.active_adapters: + alpha = self.alpha[active_adapter] + r = self.r[active_adapter] + self.scaling[active_adapter] = alpha / r + + +class Linear(LoKrLayer, nn.Linear): + """LoKr implemented in Linear layer""" + + def __init__( + self, + in_features: int, + out_features: int, + bias: bool = True, + device: Optional[Union[str, torch.device]] = None, + dtype: Optional[torch.dtype] = None, + adapter_name: str = "default", + r: int = 0, + alpha: float = 0.0, + rank_dropout: float = 0.0, + module_dropout: float = 0.0, + **kwargs, + ): + init_weights = kwargs.pop("init_weights", True) + self._init_empty_weights(nn.Linear, in_features, out_features, bias, device=device, dtype=dtype) + + LoKrLayer.__init__(self) + + # Create adapter and set it active + self.update_layer(adapter_name, r, alpha, rank_dropout, module_dropout, init_weights, **kwargs) + self.set_adapter(adapter_name) + + def _op(self, input: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: + return F.linear(input, weight, bias=self.bias) + + +class Conv2d(LoKrLayer, nn.Conv2d): + """LoKr implemented in Conv2d layer""" + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: Union[int, Tuple[int]], + stride: Union[int, Tuple[int]] = 1, + padding: Union[int, Tuple[int]] = 0, + dilation: int = 1, + groups: int = 1, + bias: bool = True, + padding_mode: str = "zeros", + device: Optional[Union[str, torch.device]] = None, + dtype: Optional[torch.dtype] = None, + adapter_name: str = "default", + r: int = 0, + alpha: float = 0.0, + rank_dropout: float = 0.0, + module_dropout: float = 0.0, + use_effective_conv2d: bool = False, + **kwargs, + ): + init_weights = kwargs.pop("init_weights", True) + self._init_empty_weights( + nn.Conv2d, + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias, + padding_mode=padding_mode, + device=device, + dtype=dtype, + ) + + LoKrLayer.__init__(self) + + # Create adapter and set it active + self.update_layer( + adapter_name, r, alpha, rank_dropout, module_dropout, init_weights, use_effective_conv2d, **kwargs + ) + self.set_adapter(adapter_name) + + def _op(self, input: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: + return F.conv2d( + input, + weight, + bias=self.bias, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + groups=self.groups, + ) + + +# Below code is a direct copy from https://github.com/KohakuBlueleaf/LyCORIS/blob/eb460098187f752a5d66406d3affade6f0a07ece/lycoris/modules/lokr.py#L11 + + +def factorization(dimension: int, factor: int = -1) -> Tuple[int, int]: + """ + return a tuple of two value of input dimension decomposed by the number closest to factor second value is higher or + equal than first value. + + In LoRA with Kroneckor Product, first value is a value for weight scale. secon value is a value for weight. + + Becuase of non-commutative property, A⊗B ≠ B⊗A. Meaning of two matrices is slightly different. + + examples) factor + -1 2 4 8 16 ... + 127 -> 127, 1 127 -> 127, 1 127 -> 127, 1 127 -> 127, 1 127 -> 127, 1 128 -> 16, 8 128 -> 64, 2 128 -> 32, 4 128 -> + 16, 8 128 -> 16, 8 250 -> 125, 2 250 -> 125, 2 250 -> 125, 2 250 -> 125, 2 250 -> 125, 2 360 -> 45, 8 360 -> 180, 2 + 360 -> 90, 4 360 -> 45, 8 360 -> 45, 8 512 -> 32, 16 512 -> 256, 2 512 -> 128, 4 512 -> 64, 8 512 -> 32, 16 1024 -> + 32, 32 1024 -> 512, 2 1024 -> 256, 4 1024 -> 128, 8 1024 -> 64, 16 + """ + + if factor > 0 and (dimension % factor) == 0: + m = factor + n = dimension // factor + return m, n + if factor == -1: + factor = dimension + m, n = 1, dimension + length = m + n + while m < n: + new_m = m + 1 + while dimension % new_m != 0: + new_m += 1 + new_n = dimension // new_m + if new_m + new_n > length or new_m > factor: + break + else: + m, n = new_m, new_n + if m > n: + n, m = m, n + return m, n + + +def make_weight_cp(t, wa, wb): + rebuild2 = torch.einsum("i j k l, i p, j r -> p r k l", t, wa, wb) # [c, d, k1, k2] + return rebuild2 + + +def make_kron(w1, w2, scale=1.0): + if len(w2.shape) == 4: + w1 = w1.unsqueeze(2).unsqueeze(2) + w2 = w2.contiguous() + rebuild = torch.kron(w1, w2) + + return rebuild * scale diff --git a/src/peft/tuners/lokr/model.py b/src/peft/tuners/lokr/model.py new file mode 100644 index 0000000000..3757ecac22 --- /dev/null +++ b/src/peft/tuners/lokr/model.py @@ -0,0 +1,285 @@ +# coding=utf-8 +# Copyright 2023-present the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding=utf-8 +# Copyright 2023-present the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import warnings +from itertools import chain +from typing import Union + +import torch +from torch import nn +from tqdm import tqdm + +from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists +from peft.utils import ( + ModulesToSaveWrapper, + _get_submodules, +) + +from .layer import Conv2d, Linear, LoKrLayer + + +class LoKrModel(BaseTuner): + """ + Creates Low-Rank Kronecker Product model from a pretrained model. The original method is partially described in + https://arxiv.org/abs/2108.06098 and in https://arxiv.org/abs/2309.14859 Current implementation heavily borrows + from + https://github.com/KohakuBlueleaf/LyCORIS/blob/eb460098187f752a5d66406d3affade6f0a07ece/lycoris/modules/lokr.py + + Args: + model (`torch.nn.Module`): The model to which the adapter tuner layers will be attached. + config ([`LoKrConfig`]): The configuration of the LoKr model. + adapter_name (`str`): The name of the adapter, defaults to `"default"`. + + Returns: + `torch.nn.Module`: The LoKr model. + + Example: + ```py + >>> from diffusers import StableDiffusionPipeline + >>> from peft import LoKrModel, LoKrConfig + + >>> config_te = LoKrConfig( + ... r=8, + ... lora_alpha=32, + ... target_modules=["k_proj", "q_proj", "v_proj", "out_proj", "fc1", "fc2"], + ... rank_dropout=0.0, + ... module_dropout=0.0, + ... init_weights=True, + ... ) + >>> config_unet = LoKrConfig( + ... r=8, + ... lora_alpha=32, + ... target_modules=[ + ... "proj_in", + ... "proj_out", + ... "to_k", + ... "to_q", + ... "to_v", + ... "to_out.0", + ... "ff.net.0.proj", + ... "ff.net.2", + ... ], + ... rank_dropout=0.0, + ... module_dropout=0.0, + ... init_weights=True, + ... use_effective_conv2d=True, + ... ) + + >>> model = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5") + >>> model.text_encoder = LoKrModel(model.text_encoder, config_te, "default") + >>> model.unet = LoKrModel(model.unet, config_unet, "default") + ``` + + **Attributes**: + - **model** ([`~torch.nn.Module`]) -- The model to be adapted. + - **peft_config** ([`LoKrConfig`]): The configuration of the LoKr model. + """ + + def __init__(self, model, config, adapter_name): + super().__init__(model, config, adapter_name) + + def __getattr__(self, name: str): + """Forward missing attributes to the wrapped module.""" + try: + return super().__getattr__(name) # defer to nn.Module's logic + except AttributeError: + return getattr(self.model, name) + + def _set_adapter_layers(self, enabled=True): + for module in self.model.modules(): + if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)): + module.enable_adapters(enabled) + + def enable_adapter_layers(self): + self._set_adapter_layers(enabled=True) + + def disable_adapter_layers(self): + self._set_adapter_layers(enabled=False) + + def set_adapter(self, adapter_name): + for module in self.model.modules(): + if isinstance(module, LoKrLayer): + if module.merged: + warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.") + module.unmerge() + module.set_adapter(adapter_name) + + @staticmethod + def _prepare_adapter_config(peft_config, model_config): + if peft_config.target_modules is None: + raise ValueError("Please specify `target_modules` in `peft_config`") + return peft_config + + @staticmethod + def _check_target_module_exists(lokr_config, key): + return check_target_module_exists(lokr_config, key) + + def _create_and_replace( + self, + lokr_config, + adapter_name: str, + target: Union[LoKrLayer, nn.Module], + target_name, + parent, + current_key, + **optional_kwargs, + ): + """ + A private method to create and replace the target module with the adapter module. + """ + + # Regexp matching - Find key which matches current target_name in patterns provided + pattern_keys = list(chain(lokr_config.rank_pattern.keys(), lokr_config.alpha_pattern.keys())) + target_name_key = next(filter(lambda key: re.match(f"(.*\.)?{key}$", current_key), pattern_keys), target_name) + + r = lokr_config.rank_pattern.get(target_name_key, lokr_config.r) + alpha = lokr_config.alpha_pattern.get(target_name_key, lokr_config.alpha) + + kwargs = { + "r": r, + "alpha": alpha, + "rank_dropout": lokr_config.rank_dropout, + "module_dropout": lokr_config.module_dropout, + "use_effective_conv2d": lokr_config.use_effective_conv2d, + "init_weights": lokr_config.init_weights, + "decompose_both": lokr_config.decompose_both, + "decompose_factor": lokr_config.decompose_factor, + } + + if isinstance(target, LoKrLayer): + target.update_layer(adapter_name, **kwargs) + else: + new_module = self._create_new_module(lokr_config, adapter_name, target, **kwargs) + self._replace_module(parent, target_name, new_module, target) + + @staticmethod + def _create_new_module(lokr_config, adapter_name, target, **kwargs) -> LoKrLayer: + if isinstance(target, torch.nn.Conv2d): + new_module = Conv2d( + target.in_channels, + target.out_channels, + target.weight.size()[2:], + stride=target.stride, + padding=target.padding, + dilation=target.dilation, + groups=target.groups, + bias=target.bias is not None, + padding_mode=target.padding_mode, + device=target.weight.device, + dtype=target.weight.dtype, + adapter_name=adapter_name, + **kwargs, + ) + elif isinstance(target, torch.nn.Linear): + new_module = Linear( + target.in_features, + target.out_features, + bias=target.bias is not None, + device=target.weight.device, + dtype=target.weight.dtype, + adapter_name=adapter_name, + **kwargs, + ) + else: + raise ValueError( + "Target module not found, currently only adapters for nn.Linear and nn.Conv2d are supported" + ) + return new_module + + @staticmethod + def _replace_module(parent, child_name, new_module, child): + setattr(parent, child_name, new_module) + # It's not necessary to set requires_grad here, as that is handled by + # _mark_only_adapters_as_trainable + new_module.weight = child.weight + if hasattr(child, "bias"): + new_module.bias = child.bias + + if getattr(child, "state", None) is not None: + new_module.state = child.state + new_module.to(child.weight.device) + + # dispatch to correct device + for name, module in new_module.named_modules(): + if "hada_" in name: + module.to(child.weight.device) + + def _mark_only_adapters_as_trainable(self) -> None: + for n, p in self.model.named_parameters(): + if "hada_" not in n: + p.requires_grad = False + + def merge_and_unload(self, progressbar: bool = False): + return self._unload_and_optionally_merge(progressbar=progressbar) + + def _unload_and_optionally_merge(self, merge=True, progressbar: bool = False): + if merge: + if getattr(self.model, "quantization_method", None) == "gptq": + raise ValueError("Cannot merge LOKR layers when the model is gptq quantized") + + key_list = [key for key, _ in self.model.named_modules() if "hada" not in key] + desc = "Unloading " + ("and merging " if merge else "") + "model" + for key in tqdm(key_list, disable=not progressbar, desc=desc): + try: + parent, target, target_name = _get_submodules(self.model, key) + except AttributeError: + continue + if isinstance(target, LoKrLayer): + if isinstance(target, nn.Conv2d): + new_module = torch.nn.Conv2d( + target.in_channels, + target.out_channels, + kernel_size=target.kernel_size, + stride=target.stride, + padding=target.padding, + dilation=target.dilation, + ) + elif isinstance(target, nn.Linear): + bias = target.bias is not None + new_module = torch.nn.Linear( + target.in_features, + target.out_features, + bias=bias, + device=target.weight.device, + ) + else: + raise ValueError( + "Cannot convert current module to torch module, currently only adapters for nn.Linear and nn.Conv2d are supported" + ) + if merge: + target.merge() + self._replace_module(parent, target_name, new_module, target) + + # save any additional trainable modules part of `modules_to_save` + if isinstance(target, ModulesToSaveWrapper): + setattr(parent, target_name, target.modules_to_save[target.active_adapter]) + + return self.model diff --git a/src/peft/utils/peft_types.py b/src/peft/utils/peft_types.py index d073be81c3..29c764a08f 100644 --- a/src/peft/utils/peft_types.py +++ b/src/peft/utils/peft_types.py @@ -29,6 +29,7 @@ class PeftType(str, enum.Enum): ADAPTION_PROMPT = "ADAPTION_PROMPT" IA3 = "IA3" LOHA = "LOHA" + LOKR = "LOKR" class TaskType(str, enum.Enum): diff --git a/src/peft/utils/save_and_load.py b/src/peft/utils/save_and_load.py index ff00541121..cd8088e93e 100644 --- a/src/peft/utils/save_and_load.py +++ b/src/peft/utils/save_and_load.py @@ -75,6 +75,9 @@ def get_peft_model_state_dict(model, state_dict=None, adapter_name="default", un elif config.peft_type == PeftType.LOHA: to_return = {k: state_dict[k] for k in state_dict if "hada_" in k} + elif config.peft_type == PeftType.LOKR: + to_return = {k: state_dict[k] for k in state_dict if "lokr_" in k} + elif config.peft_type == PeftType.ADAPTION_PROMPT: to_return = {k: state_dict[k] for k in state_dict if k.split(".")[-1].startswith("adaption_")} elif config.is_prompt_learning: @@ -123,13 +126,14 @@ def set_peft_model_state_dict(model, peft_model_state_dict, adapter_name="defaul else: state_dict = peft_model_state_dict - if config.peft_type in (PeftType.LORA, PeftType.LOHA, PeftType.ADALORA, PeftType.IA3): + if config.peft_type in (PeftType.LORA, PeftType.LOHA, PeftType.LOKR, PeftType.ADALORA, PeftType.IA3): peft_model_state_dict = {} parameter_prefix = { PeftType.IA3: "ia3_", PeftType.LORA: "lora_", PeftType.ADALORA: "lora_", PeftType.LOHA: "hada_", + PeftType.LOKR: "lokr_", }[config.peft_type] for k, v in state_dict.items(): if parameter_prefix in k: From e7d6e238e5efa8e433c9d6939e9c146ec4a9f221 Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Tue, 3 Oct 2023 19:32:06 +0300 Subject: [PATCH 03/33] Fixed setting requires_grad for lokr modules --- src/peft/tuners/lokr/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/peft/tuners/lokr/model.py b/src/peft/tuners/lokr/model.py index 3757ecac22..458978176f 100644 --- a/src/peft/tuners/lokr/model.py +++ b/src/peft/tuners/lokr/model.py @@ -234,7 +234,7 @@ def _replace_module(parent, child_name, new_module, child): def _mark_only_adapters_as_trainable(self) -> None: for n, p in self.model.named_parameters(): - if "hada_" not in n: + if "lokr_" not in n: p.requires_grad = False def merge_and_unload(self, progressbar: bool = False): From bb45764b339dd004f522bbe5129f07762c65e0ea Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Wed, 4 Oct 2023 13:26:08 +0300 Subject: [PATCH 04/33] Updated initialization of LoKr adapter weights --- src/peft/tuners/lokr/layer.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py index e78fc7c32f..eb280d440b 100644 --- a/src/peft/tuners/lokr/layer.py +++ b/src/peft/tuners/lokr/layer.py @@ -121,15 +121,17 @@ def create_lokr_parameters( def reset_lokr_parameters(self, adapter_name: str): if adapter_name in self.lokr_w1: - nn.init.kaiming_uniform_(self.lokr_w1[adapter_name], a=math.sqrt(5)) + nn.init.zeros_(self.lokr_w1[adapter_name]) + else: + nn.init.zeros_(self.lokr_w1_a[adapter_name]) + nn.init.kaiming_uniform_(self.lokr_w1_b[adapter_name], a=math.sqrt(5)) + if adapter_name in self.lokr_w2: nn.init.kaiming_uniform_(self.lokr_w2[adapter_name], a=math.sqrt(5)) - if adapter_name in self.lokr_w1_a: - nn.init.kaiming_uniform_(self.lokr_w1_a[adapter_name], a=math.sqrt(5)) - nn.init.zeros_(self.lokr_w1_b[adapter_name]) - if adapter_name in self.lokr_w2_a: + else: nn.init.kaiming_uniform_(self.lokr_w2_a[adapter_name], a=math.sqrt(5)) - nn.init.zeros_(self.lokr_w2_b[adapter_name]) + nn.init.kaiming_uniform_(self.lokr_w2_b[adapter_name], a=math.sqrt(5)) + if adapter_name in self.lokr_t2: nn.init.kaiming_uniform_(self.lokr_t2[adapter_name], a=math.sqrt(5)) @@ -187,7 +189,7 @@ def update_layer( use_w2 = r >= max(shape[0][1], shape[1][1]) / 2 use_effective_conv2d = use_effective_conv2d and self.kernel_size != (1, 1) else: - raise NotImplementedError(f"LoHa is not implemented for {type(self).__name__} layer") + raise NotImplementedError(f"LoKr is not implemented for {type(self).__name__} layer") # Create weights with provided shape self.create_lokr_parameters(adapter_name, r, shape, use_w1, use_w2, use_effective_conv2d) @@ -207,7 +209,7 @@ def update_layer( self.set_adapter(self.active_adapters) def get_delta_weight(self, adapter_name: str) -> torch.Tensor: - # https://github.com/KohakuBlueleaf/LyCORIS/blob/eb460098187f752a5d66406d3affade6f0a07ece/lycoris/modules/loha.py#L178 + # https://github.com/KohakuBlueleaf/LyCORIS/blob/e4259b870d3354a9615a96be61cb5d07455c58ea/lycoris/modules/lokr.py#L224 if adapter_name in self.lokr_w1: w1 = self.lokr_w1[adapter_name] else: From 0c33d8c3d1a1e9be6a7bce469504262816bbf5ca Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Wed, 4 Oct 2023 13:27:07 +0300 Subject: [PATCH 05/33] Updated docstrings for LoKr params --- src/peft/tuners/lokr/__init__.py | 3 ++- src/peft/tuners/lokr/config.py | 18 +++++++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/peft/tuners/lokr/__init__.py b/src/peft/tuners/lokr/__init__.py index bb138202fd..b137f22c96 100644 --- a/src/peft/tuners/lokr/__init__.py +++ b/src/peft/tuners/lokr/__init__.py @@ -14,7 +14,8 @@ # limitations under the License. from .config import LoKrConfig +from .layer import Conv2d, Linear, LoKrLayer from .model import LoKrModel -__all__ = ["LoKrConfig", "LoKrModel"] +__all__ = ["LoKrConfig", "LoKrModel", "Conv2d", "Linear", "LoKrLayer"] diff --git a/src/peft/tuners/lokr/config.py b/src/peft/tuners/lokr/config.py index eb36c5673f..c2e3d0536c 100644 --- a/src/peft/tuners/lokr/config.py +++ b/src/peft/tuners/lokr/config.py @@ -29,7 +29,7 @@ class LoKrConfig(PeftConfig): r (`int`): LoKr rank. alpha (`int`): The alpha parameter for LoKr scaling. rank_dropout (`int`): The dropout probability for rank dimension during training. - module_dropout (`int`): The dropout probability for disabling LoHa modules during training. + module_dropout (`int`): The dropout probability for disabling LoKr modules during training. use_effective_conv2d (`bool`): Use parameter effective decomposition for Conv2d with ksize > 1 ("Proposition 3" from FedPara paper). decompose_both (`bool`): Perform rank decomposition of left kronecker product matrix. @@ -37,7 +37,7 @@ class LoKrConfig(PeftConfig): target_modules (`Union[List[str],str]`): The names of the modules to apply LoKr to. init_weights (`bool`): Whether to perform initialization of LoKr weights. layers_to_transform (`Union[List[int],int]`): - The layer indexes to transform, if this argument is specified, it will apply the LoHa transformations on + The layer indexes to transform, if this argument is specified, it will apply the LoKr transformations on the layer indexes that are specified in this list. If a single integer is passed, it will apply the LoKr transformations on the layer at this index. layers_pattern (`str`): @@ -49,16 +49,16 @@ class LoKrConfig(PeftConfig): alpha_pattern (`dict`): The mapping from layer names or regexp expression to alphas which are different from the default alpha specified by `alpha`. - modules_to_save (`List[str]`): The names of modules to be set as trainable except LoHa parameters. + modules_to_save (`List[str]`): The names of modules to be set as trainable except LoKr parameters. """ - r: int = field(default=8, metadata={"help": "LoHa rank"}) - alpha: int = field(default=8, metadata={"help": "LoHa alpha"}) + r: int = field(default=8, metadata={"help": "LoKr rank"}) + alpha: int = field(default=8, metadata={"help": "LoKr alpha"}) rank_dropout: float = field( default=0.0, metadata={"help": "The dropout probability for rank dimension during training"} ) module_dropout: float = field( - default=0.0, metadata={"help": "The dropout probability for disabling LoHa modules during training"} + default=0.0, metadata={"help": "The dropout probability for disabling LoKr modules during training"} ) use_effective_conv2d: bool = field( default=False, @@ -74,7 +74,7 @@ class LoKrConfig(PeftConfig): target_modules: Optional[Union[List[str], str]] = field( default=None, metadata={ - "help": "List of module names or regex expression of the module names to replace with LoHa." + "help": "List of module names or regex expression of the module names to replace with LoKr." "For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$' " }, ) @@ -82,7 +82,7 @@ class LoKrConfig(PeftConfig): default=True, metadata={ "help": ( - "Whether to initialize the weights of the LoHa layers with their default initialization. Don't change " + "Whether to initialize the weights of the LoKr layers with their default initialization. Don't change " "this setting, except if you know exactly what you're doing." ), }, @@ -120,7 +120,7 @@ class LoKrConfig(PeftConfig): modules_to_save: Optional[List[str]] = field( default=None, metadata={ - "help": "List of modules apart from LoHA layers to be set as trainable and saved in the final checkpoint. " + "help": "List of modules apart from LoKr layers to be set as trainable and saved in the final checkpoint. " "For example, in Sequence Classification or Token Classification tasks, " "the final layer `classifier/score` are randomly initialized and as such need to be trainable and saved." }, From 84b890bb0c23216081ee208d36eef6b6a5ebe167 Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Wed, 4 Oct 2023 15:36:52 +0300 Subject: [PATCH 06/33] Removed unneccessary comments --- src/peft/tuners/lokr/layer.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py index eb280d440b..92da95f80d 100644 --- a/src/peft/tuners/lokr/layer.py +++ b/src/peft/tuners/lokr/layer.py @@ -243,7 +243,6 @@ def merge(self) -> None: f"You are now additionally merging {','.join(self.active_adapters)}." ) for active_adapter in self.active_adapters: - # if active_adapter in self.hada_w1_a.keys(): if active_adapter in self._available_adapters: self.weight.data += self.get_delta_weight(active_adapter) self.merged_adapters.append(active_adapter) @@ -255,7 +254,6 @@ def unmerge(self) -> None: return while len(self.merged_adapters) > 0: active_adapter = self.merged_adapters.pop() - # if active_adapter in self.hada_w1_a.keys(): if active_adapter in self._available_adapters: self.weight.data -= self.get_delta_weight(active_adapter) self.merged = False @@ -278,7 +276,6 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: # Execute all the adapters for active_adapter in self.active_adapters: - # if active_adapter not in self.hada_w1_a.keys(): if active_adapter not in self._available_adapters: continue From fd4a7541c01c2068958e2a34a0b08929d00b784c Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Wed, 4 Oct 2023 15:38:09 +0300 Subject: [PATCH 07/33] Modified sd dreambooth script to be able to train LoRA, LoHa, LoKr adapters --- ...dreambooth_loha.py => train_dreambooth.py} | 274 ++++++++++++++---- 1 file changed, 216 insertions(+), 58 deletions(-) rename examples/stable_diffusion/{train_dreambooth_loha.py => train_dreambooth.py} (84%) diff --git a/examples/stable_diffusion/train_dreambooth_loha.py b/examples/stable_diffusion/train_dreambooth.py similarity index 84% rename from examples/stable_diffusion/train_dreambooth_loha.py rename to examples/stable_diffusion/train_dreambooth.py index 944a8394b6..8364603a0b 100644 --- a/examples/stable_diffusion/train_dreambooth_loha.py +++ b/examples/stable_diffusion/train_dreambooth.py @@ -8,7 +8,7 @@ import threading import warnings from pathlib import Path -from typing import Optional +from typing import Optional, Union import datasets import diffusers @@ -38,7 +38,7 @@ from tqdm.auto import tqdm from transformers import AutoTokenizer, PretrainedConfig -from peft import LoHaConfig, get_peft_model +from peft import LoHaConfig, LoKrConfig, LoraConfig, get_peft_model # Will error if the minimal version of diffusers is not installed. Remove at your own risks. @@ -85,6 +85,80 @@ def import_model_class_from_model_name_or_path(pretrained_model_name_or_path: st raise ValueError(f"{model_class} is not supported.") +def create_unet_adapter_config(args: argparse.Namespace) -> Union[LoraConfig, LoHaConfig, LoKrConfig]: + if args.adapter == "full": + raise ValueError("Cannot create unet adapter config for full parameter") + + if args.adapter == "lora": + config = LoraConfig( + r=args.unet_r, + lora_alpha=args.unet_alpha, + target_modules=UNET_TARGET_MODULES, + lora_dropout=args.unet_dropout, + bias=args.unet_bias, + init_lora_weights=True, + ) + elif args.adapter == "loha": + config = LoHaConfig( + r=args.unet_r, + alpha=args.unet_alpha, + target_modules=UNET_TARGET_MODULES, + rank_dropout=args.unet_rank_dropout, + module_dropout=args.unet_module_dropout, + use_effective_conv2d=args.unet_use_effective_conv2d, + init_weights=True, + ) + elif args.adapter == "lokr": + config = LoKrConfig( + r=args.unet_r, + alpha=args.unet_alpha, + target_modules=UNET_TARGET_MODULES, + rank_dropout=args.unet_rank_dropout, + module_dropout=args.unet_module_dropout, + use_effective_conv2d=args.unet_use_effective_conv2d, + decompose_both=args.unet_decompose_both, + decompose_factor=args.unet_decompose_factor, + init_weights=True, + ) + return config + + +def create_te_adapter_config(args: argparse.Namespace) -> Union[LoraConfig, LoHaConfig, LoKrConfig]: + if args.adapter == "full": + raise ValueError("Cannot create text_encoder adapter config for full parameter") + + if args.adapter == "lora": + config = LoraConfig( + r=args.te_r, + lora_alpha=args.te_alpha, + target_modules=TEXT_ENCODER_TARGET_MODULES, + lora_dropout=args.te_dropout, + bias=args.te_bias, + init_lora_weights=True, + ) + elif args.adapter == "loha": + config = LoHaConfig( + r=args.te_r, + alpha=args.te_alpha, + target_modules=TEXT_ENCODER_TARGET_MODULES, + rank_dropout=args.te_rank_dropout, + module_dropout=args.te_module_dropout, + init_weights=True, + ) + elif args.adapter == "lokr": + config = LoKrConfig( + r=args.te_r, + alpha=args.te_alpha, + target_modules=TEXT_ENCODER_TARGET_MODULES, + rank_dropout=args.te_rank_dropout, + module_dropout=args.te_module_dropout, + decompose_both=args.te_decompose_both, + decompose_factor=args.te_decompose_factor, + init_weights=True, + ) + return config + + def parse_args(input_args=None): parser = argparse.ArgumentParser(description="Simple example of a training script.") parser.add_argument( @@ -192,41 +266,6 @@ def parse_args(input_args=None): ) parser.add_argument("--train_text_encoder", action="store_true", help="Whether to train the text encoder") - # loha args - parser.add_argument("--use_loha", action="store_true", help="Whether to use LoHa for parameter efficient tuning") - parser.add_argument("--r", type=int, default=8, help="LoHa rank, only used if use_loha is True") - parser.add_argument("--alpha", type=int, default=32, help="LoHa alpha, only used if use_loha is True") - parser.add_argument("--rank_dropout", type=float, default=0.0, help="LoHa dropout for rank") - parser.add_argument("--module_dropout", type=float, default=0.0, help="LoHa dropout for disabling module at all") - parser.add_argument( - "--use_effective_conv2d", - action="store_true", - help="Use parameter effective decomposition for Conv2d 3x3 with ksize > 1", - ) - parser.add_argument( - "--loha_text_encoder_r", - type=int, - default=8, - help="LoHa rank for text encoder, only used if `use_loha` and `train_text_encoder` are True", - ) - parser.add_argument( - "--loha_text_encoder_alpha", - type=int, - default=32, - help="LoHa alpha for text encoder, only used if `use_loha` and `train_text_encoder` are True", - ) - parser.add_argument( - "--loha_text_encoder_rank_dropout", - type=float, - default=0.0, - help="LoHa dropout for text encoder for rank, only used if `use_loha` and `train_text_encoder` are True", - ) - parser.add_argument( - "--loha_text_encoder_module_dropout", - type=float, - default=0.0, - help="LoHa dropout for text encoder for modules, only used if `use_loha` and `train_text_encoder` are True", - ) parser.add_argument( "--train_batch_size", type=int, default=4, help="Batch size (per device) for the training dataloader." ) @@ -381,6 +420,132 @@ def parse_args(input_args=None): "--enable_xformers_memory_efficient_attention", action="store_true", help="Whether or not to use xformers." ) + # Adapter arguments + subparsers = parser.add_subparsers(dest="adapter") + + # Dummy subparser to train whole model + subparsers.add_parser("full", help="Train full model without adapters") + + # LoRA adapter + lora = subparsers.add_parser("lora", help="Use LoRA adapter") + lora.add_argument("--unet_r", type=int, default=8, help="LoRA rank for unet") + lora.add_argument("--unet_alpha", type=int, default=8, help="LoRA alpha for unet") + lora.add_argument("--unet_dropout", type=float, default=0.0, help="LoRA dropout probability for unet") + lora.add_argument( + "--unet_bias", + type=str, + default="none", + help="Bias type for LoRA. Can be 'none', 'all' or 'lora_only'", + ) + lora.add_argument( + "--te_r", type=int, default=8, help="LoRA rank for text_encoder, only used if `train_text_encoder` is True" + ) + lora.add_argument( + "--te_alpha", + type=int, + default=8, + help="LoRA alpha for text_encoder, only used if `train_text_encoder` is True", + ) + lora.add_argument( + "--te_dropout", + type=float, + default=0.0, + help="LoRA dropout probability for text_encoder, only used if `train_text_encoder` is True", + ) + lora.add_argument( + "--te_bias", + type=str, + default="none", + help="Bias type for LoRA. Can be 'none', 'all' or 'lora_only', only used if `train_text_encoder` is True", + ) + + # LoHa adapter + loha = subparsers.add_parser("loha", help="Use LoHa adapter") + loha.add_argument("--unet_r", type=int, default=8, help="LoHa rank for unet") + loha.add_argument("--unet_alpha", type=int, default=8, help="LoHa alpha for unet") + loha.add_argument("--unet_rank_dropout", type=float, default=0.0, help="LoHa rank_dropout probability for unet") + loha.add_argument( + "--unet_module_dropout", type=float, default=0.0, help="LoHa module_dropout probability for unet" + ) + loha.add_argument( + "--unet_use_effective_conv2d", + action="store_true", + help="Use parameter effective decomposition in unet for Conv2d 3x3 with ksize > 1", + ) + loha.add_argument( + "--te_r", type=int, default=8, help="LoHa rank for text_encoder, only used if `train_text_encoder` is True" + ) + loha.add_argument( + "--te_alpha", + type=int, + default=8, + help="LoHa alpha for text_encoder, only used if `train_text_encoder` is True", + ) + loha.add_argument( + "--te_rank_dropout", + type=float, + default=0.0, + help="LoHa rank_dropout probability for text_encoder, only used if `train_text_encoder` is True", + ) + loha.add_argument( + "--te_module_dropout", + type=float, + default=0.0, + help="LoHa module_dropout probability for text_encoder, only used if `train_text_encoder` is True", + ) + + # LoKr adapter + lokr = subparsers.add_parser("lokr", help="Use LoKr adapter") + lokr.add_argument("--unet_r", type=int, default=8, help="LoKr rank for unet") + lokr.add_argument("--unet_alpha", type=int, default=8, help="LoKr alpha for unet") + lokr.add_argument("--unet_rank_dropout", type=float, default=0.0, help="LoKr rank_dropout probability for unet") + lokr.add_argument( + "--unet_module_dropout", type=float, default=0.0, help="LoKr module_dropout probability for unet" + ) + lokr.add_argument( + "--unet_use_effective_conv2d", + action="store_true", + help="Use parameter effective decomposition in unet for Conv2d 3x3 with ksize > 1", + ) + lokr.add_argument( + "--unet_decompose_both", action="store_true", help="Decompose left matrix in kronecker product for unet" + ) + lokr.add_argument( + "--unet_decompose_factor", type=int, default=-1, help="Decompose factor in kronecker product for unet" + ) + lokr.add_argument( + "--te_r", type=int, default=8, help="LoKr rank for text_encoder, only used if `train_text_encoder` is True" + ) + lokr.add_argument( + "--te_alpha", + type=int, + default=8, + help="LoKr alpha for text_encoder, only used if `train_text_encoder` is True", + ) + lokr.add_argument( + "--te_rank_dropout", + type=float, + default=0.0, + help="LoKr rank_dropout probability for text_encoder, only used if `train_text_encoder` is True", + ) + lokr.add_argument( + "--te_module_dropout", + type=float, + default=0.0, + help="LoKr module_dropout probability for text_encoder, only used if `train_text_encoder` is True", + ) + lokr.add_argument( + "--te_decompose_both", + action="store_true", + help="Decompose left matrix in kronecker product for text_encoder, only used if `train_text_encoder` is True", + ) + lokr.add_argument( + "--te_decompose_factor", + type=int, + default=-1, + help="Decompose factor in kronecker product for text_encoder, only used if `train_text_encoder` is True", + ) + if input_args is not None: args = parser.parse_args(input_args) else: @@ -723,16 +888,8 @@ def main(args): args.pretrained_model_name_or_path, subfolder="unet", revision=args.revision ) - if args.use_loha: - config = LoHaConfig( - r=args.r, - alpha=args.alpha, - target_modules=UNET_TARGET_MODULES, - rank_dropout=args.rank_dropout, - module_dropout=args.module_dropout, - use_effective_conv2d=args.use_effective_conv2d, - init_weights=True, - ) + if args.adapter != "full": + config = create_unet_adapter_config(args) unet = get_peft_model(unet, config) unet.print_trainable_parameters() print(unet) @@ -740,15 +897,8 @@ def main(args): vae.requires_grad_(False) if not args.train_text_encoder: text_encoder.requires_grad_(False) - elif args.train_text_encoder and args.use_loha: - config = LoHaConfig( - r=args.loha_text_encoder_r, - alpha=args.loha_text_encoder_alpha, - target_modules=TEXT_ENCODER_TARGET_MODULES, - rank_dropout=args.loha_text_encoder_rank_dropout, - module_dropout=args.loha_text_encoder_module_dropout, - init_weights=True, - ) + elif args.train_text_encoder and args.adapter != "full": + config = create_te_adapter_config(args) text_encoder = get_peft_model(text_encoder, config) text_encoder.print_trainable_parameters() print(text_encoder) @@ -761,7 +911,7 @@ def main(args): if args.gradient_checkpointing: unet.enable_gradient_checkpointing() - if args.train_text_encoder and not args.use_loha: + if args.train_text_encoder and not args.adapter != "full": text_encoder.gradient_checkpointing_enable() # Enable TF32 for faster training on Ampere GPUs, @@ -1018,6 +1168,10 @@ def main(args): pipeline = pipeline.to(accelerator.device) pipeline.set_progress_bar_config(disable=True) + # Set evaliation mode + pipeline.unet.eval() + pipeline.text_encoder.eval() + # run inference if args.seed is not None: generator = torch.Generator(device=accelerator.device).manual_seed(args.seed) @@ -1044,6 +1198,10 @@ def main(args): } ) + # Set evaliation mode + pipeline.unet.train() + pipeline.text_encoder.train() + del pipeline torch.cuda.empty_cache() @@ -1071,7 +1229,7 @@ def main(args): # Create the pipeline using using the trained modules and save it. accelerator.wait_for_everyone() if accelerator.is_main_process: - if args.use_loha: + if args.adapter != "full": unwarpped_unet = accelerator.unwrap_model(unet) unwarpped_unet.save_pretrained( os.path.join(args.output_dir, "unet"), state_dict=accelerator.get_state_dict(unet) From ddfae52641143645c774be03418053660dab4d4d Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Wed, 4 Oct 2023 18:39:57 +0300 Subject: [PATCH 08/33] Updated conversion script to incorporate LoKr --- .../convert_sd_adapter_to_peft.py | 201 ++++++++++++++++-- 1 file changed, 187 insertions(+), 14 deletions(-) diff --git a/examples/stable_diffusion/convert_sd_adapter_to_peft.py b/examples/stable_diffusion/convert_sd_adapter_to_peft.py index d15537b348..e0bc2e7716 100644 --- a/examples/stable_diffusion/convert_sd_adapter_to_peft.py +++ b/examples/stable_diffusion/convert_sd_adapter_to_peft.py @@ -1,15 +1,19 @@ import argparse +import json import os from collections import Counter from dataclasses import dataclass +from functools import reduce from typing import Dict, List, Optional, Union import safetensors import torch +import torch.nn as nn from diffusers import UNet2DConditionModel from transformers import CLIPTextModel -from peft import LoHaConfig, LoraConfig, PeftType, get_peft_model, set_peft_model_state_dict +from peft import LoHaConfig, LoKrConfig, LoraConfig, PeftType, get_peft_model, set_peft_model_state_dict +from peft.tuners.lokr.layer import factorization # Default kohya_ss LoRA replacement modules @@ -21,6 +25,11 @@ PREFIX_TEXT_ENCODER = "lora_te" +def get_module_by_name(module: Union[torch.Tensor, nn.Module], access_string: str): + names = access_string.split(sep=".") + return reduce(getattr, names, module) + + @dataclass class LoRAInfo: kohya_key: str @@ -35,7 +44,7 @@ def peft_state_dict(self) -> Dict[str, torch.Tensor]: raise ValueError("At least one of lora_A or lora_B is None, they must both be provided") return { f"base_model.model{self.peft_key}.lora_A.weight": self.lora_A, - f"base_model.model.{self.peft_key}.lora_B.weight": self.lora_A, + f"base_model.model.{self.peft_key}.lora_B.weight": self.lora_B, } @@ -73,7 +82,49 @@ def peft_state_dict(self) -> Dict[str, torch.Tensor]: return state_dict -def construct_peft_loraconfig(info: Dict[str, LoRAInfo]) -> LoraConfig: +@dataclass +class LoKrInfo: + kohya_key: str + peft_key: str + alpha: Optional[float] = None + rank: Optional[int] = None + lokr_w1: Optional[torch.Tensor] = None + lokr_w1_a: Optional[torch.Tensor] = None + lokr_w1_b: Optional[torch.Tensor] = None + lokr_w2: Optional[torch.Tensor] = None + lokr_w2_a: Optional[torch.Tensor] = None + lokr_w2_b: Optional[torch.Tensor] = None + lokr_t2: Optional[torch.Tensor] = None + + def peft_state_dict(self) -> Dict[str, torch.Tensor]: + if (self.lokr_w1 is None and self.lokr_w1_a is None and self.lokr_w1_b is None) or ( + self.lokr_w2 is None and self.lokr_w2_a is None and self.lokr_w2_b is None + ): + raise ValueError( + "At least one of lokr_w1, lokr_w1_a, lokr_w1_b, lokr_w2, lokr_w2_a, lokr_w2_b is missing, they all must be provided" + ) + + state_dict = {} + + if self.lokr_w1 is not None: + state_dict[f"base_model.model.{self.peft_key}.lokr_w1"] = self.lokr_w1 + elif self.lokr_w1_a is not None: + state_dict[f"base_model.model.{self.peft_key}.lokr_w1_a"] = self.lokr_w1_a + state_dict[f"base_model.model.{self.peft_key}.lokr_w1_b"] = self.lokr_w1_b + + if self.lokr_w2 is not None: + state_dict[f"base_model.model.{self.peft_key}.lokr_w2"] = self.lokr_w2 + elif self.lokr_w2_a is not None: + state_dict[f"base_model.model.{self.peft_key}.lokr_w2_a"] = self.lokr_w2_a + state_dict[f"base_model.model.{self.peft_key}.lokr_w2_b"] = self.lokr_w2_b + + if self.lokr_t2 is not None: + state_dict[f"base_model.model.{self.peft_key}.lokr_t2"] = self.lokr_t2 + + return state_dict + + +def construct_peft_loraconfig(info: Dict[str, LoRAInfo], **kwargs) -> LoraConfig: """Constructs LoraConfig from data extracted from adapter checkpoint Args: @@ -91,8 +142,8 @@ def construct_peft_loraconfig(info: Dict[str, LoRAInfo]) -> LoraConfig: target_modules = sorted(info.keys()) # Determine most common rank and alpha - r = Counter(ranks.values()).most_common(1)[0] - lora_alpha = Counter(alphas.values()).most_common(1)[0] + r = int(Counter(ranks.values()).most_common(1)[0][0]) + lora_alpha = Counter(alphas.values()).most_common(1)[0][0] # Determine which modules have different rank and alpha rank_pattern = dict(sorted(filter(lambda x: x[1] != r, ranks.items()), key=lambda x: x[0])) @@ -112,7 +163,7 @@ def construct_peft_loraconfig(info: Dict[str, LoRAInfo]) -> LoraConfig: return config -def construct_peft_lohaconfig(info: Dict[str, LoHaInfo]) -> LoHaConfig: +def construct_peft_lohaconfig(info: Dict[str, LoHaInfo], **kwargs) -> LoHaConfig: """Constructs LoHaConfig from data extracted from adapter checkpoint Args: @@ -130,8 +181,8 @@ def construct_peft_lohaconfig(info: Dict[str, LoHaInfo]) -> LoHaConfig: target_modules = sorted(info.keys()) # Determine most common rank and alpha - r = Counter(ranks.values()).most_common(1)[0] - alpha = Counter(alphas.values()).most_common(1)[0] + r = int(Counter(ranks.values()).most_common(1)[0][0]) + alpha = Counter(alphas.values()).most_common(1)[0][0] # Determine which modules have different rank and alpha rank_pattern = dict(sorted(filter(lambda x: x[1] != r, ranks.items()), key=lambda x: x[0])) @@ -155,6 +206,77 @@ def construct_peft_lohaconfig(info: Dict[str, LoHaInfo]) -> LoHaConfig: return config +def construct_peft_lokrconfig(info: Dict[str, LoKrInfo], decompose_factor: int = -1, **kwargs) -> LoKrConfig: + """Constructs LoKrConfig from data extracted from adapter checkpoint + + Args: + info (Dict[str, LoKrInfo]): Information extracted from adapter checkpoint + + Returns: + LoKrConfig: config for constructing LoKr + """ + + # Unpack all ranks and alphas + ranks = {x[0]: x[1].rank for x in info.items()} + alphas = {x[0]: x[1].alpha or x[1].rank for x in info.items()} + + # Determine which modules needs to be transformed + target_modules = sorted(info.keys()) + + # Determine most common rank and alpha + r = int(Counter(ranks.values()).most_common(1)[0][0]) + alpha = Counter(alphas.values()).most_common(1)[0][0] + + # Determine which modules have different rank and alpha + rank_pattern = dict(sorted(filter(lambda x: x[1] != r, ranks.items()), key=lambda x: x[0])) + alpha_pattern = dict(sorted(filter(lambda x: x[1] != alpha, alphas.items()), key=lambda x: x[0])) + + # Determine whether any of modules have effective conv2d decomposition + use_effective_conv2d = any(((val.lokr_t2 is not None) for val in info.values())) + + # decompose_both should be enabled if any w1 matrix in any layer is decomposed into 2 + decompose_both = any((val.lokr_w1_a is not None and val.lokr_w1_b is not None) for val in info.values()) + + # Determining decompose factor is a bit tricky (but it is most often -1) + # Check that decompose_factor is equal to provided + for val in info.values(): + # Determine shape of first matrix + if val.lokr_w1 is not None: + w1_shape = tuple(val.lokr_w1.shape) + else: + w1_shape = (val.lokr_w1_a.shape[0], val.lokr_w1_b.shape[1]) + + # Determine shape of second matrix + if val.lokr_w2 is not None: + w2_shape = tuple(val.lokr_w2.shape[:2]) + elif val.lokr_t2 is not None: + w2_shape = (val.lokr_w2_a.shape[1], val.lokr_w2_b.shape[1]) + else: + # We may iterate over Conv2d layer, for which second item in shape is multiplied by ksize^2 + w2_shape = (val.lokr_w2_a.shape[0], val.lokr_w2_b.shape[1]) + + # We need to check, whether decompose_factor is really -1 or not + shape = (w1_shape[0], w2_shape[0]) + if factorization(shape[0] * shape[1], factor=-1) != shape: + raise ValueError("Cannot infer decompose_factor, probably it is not equal to -1") + + config = LoKrConfig( + r=r, + alpha=alpha, + target_modules=target_modules, + rank_dropout=0.0, + module_dropout=0.0, + init_weights=False, + rank_pattern=rank_pattern, + alpha_pattern=alpha_pattern, + use_effective_conv2d=use_effective_conv2d, + decompose_both=decompose_both, + decompose_factor=decompose_factor, + ) + + return config + + def combine_peft_state_dict(info: Dict[str, Union[LoRAInfo, LoHaInfo]]) -> Dict[str, torch.Tensor]: result = {} for key_info in info.values(): @@ -179,7 +301,7 @@ def detect_adapter_type(keys: List[str]) -> PeftType: elif any(x in key for x in ["lokr_w1", "lokr_w2", "lokr_t1", "lokr_t2"]): # LoKr may have the following keys: # lokr_w1, lokr_w2, lokr_w1_a, lokr_w1_b, lokr_w2_a, lokr_w2_b, lokr_t1, lokr_t2 - raise ValueError("Currently LoKr adapters are not implemented") + return PeftType.LOKR elif "diff" in key: raise ValueError("Currently full diff adapters are not implemented") else: @@ -221,22 +343,40 @@ def detect_adapter_type(keys: List[str]) -> PeftType: } ) - # Store conversion info (model_type -> peft_key -> LoRAInfo | LoHaInfo) - adapter_info: Dict[str, Dict[str, Union[LoRAInfo, LoHaInfo]]] = { + # Store conversion info (model_type -> peft_key -> LoRAInfo | LoHaInfo | LoKrInfo) + adapter_info: Dict[str, Dict[str, Union[LoRAInfo, LoHaInfo, LoKrInfo]]] = { "text_encoder": {}, "unet": {}, } + # Store decompose_factor for LoKr + decompose_factor = -1 + # Open adapter checkpoint with safetensors.safe_open(args.adapter_path, framework="pt", device="cpu") as f: # Extract information about adapter structure metadata = f.metadata() + # It may be difficult to determine rank for LoKr adapters + # If checkpoint was trained with large rank it may not be utilized during weights creation at all + # So we need to get it from checkpoint metadata (along with decompose_factor) + rank, conv_rank = None, None + if metadata is not None: + rank = metadata.get("ss_network_dim", None) + rank = int(rank) if rank else None + if "ss_network_args" in metadata: + network_args = json.loads(metadata["ss_network_args"]) + conv_rank = network_args.get("conv_dim", None) + conv_rank = int(conv_rank) if conv_rank else rank + decompose_factor = network_args.get("factor", -1) + decompose_factor = int(decompose_factor) + # Detect adapter type based on keys adapter_type = detect_adapter_type(f.keys()) adapter_info_cls = { PeftType.LORA: LoRAInfo, PeftType.LOHA: LoHaInfo, + PeftType.LOKR: LoKrInfo, }[adapter_type] # Iterate through available info and unpack all the values @@ -245,9 +385,9 @@ def detect_adapter_type(keys: List[str]) -> PeftType: # Find which model this key belongs to if kohya_key.startswith(PREFIX_TEXT_ENCODER): - model_type = "text_encoder" + model_type, model = "text_encoder", text_encoder elif kohya_key.startswith(PREFIX_UNET): - model_type = "unet" + model_type, model = "unet", unet else: raise ValueError(f"Cannot determine model for key: {key}") @@ -256,6 +396,9 @@ def detect_adapter_type(keys: List[str]) -> PeftType: raise ValueError(f"Cannot find corresponding key for diffusers/transformers model: {kohya_key}") peft_key = models_keys[kohya_key] + # Retrieve corresponding layer of model + layer = get_module_by_name(model, peft_key) + # Create a corresponding adapter info if peft_key not in adapter_info[model_type]: adapter_info[model_type][peft_key] = adapter_info_cls(kohya_key=kohya_key, peft_key=peft_key) @@ -285,6 +428,35 @@ def detect_adapter_type(keys: List[str]) -> PeftType: elif kohya_type == "hada_t2": adapter_info[model_type][peft_key].hada_t2 = tensor adapter_info[model_type][peft_key].rank = tensor.shape[0] + elif kohya_type == "lokr_t2": + adapter_info[model_type][peft_key].lokr_t2 = tensor + adapter_info[model_type][peft_key].rank = tensor.shape[0] + elif kohya_type == "lokr_w1": + adapter_info[model_type][peft_key].lokr_w1 = tensor + if isinstance(layer, nn.Linear) or ( + isinstance(layer, nn.Conv2d) and tuple(layer.weight.shape[2:]) == (1, 1) + ): + adapter_info[model_type][peft_key].rank = rank + elif isinstance(layer, nn.Conv2d): + adapter_info[model_type][peft_key].rank = conv_rank + elif kohya_type == "lokr_w2": + adapter_info[model_type][peft_key].lokr_w2 = tensor + if isinstance(layer, nn.Linear) or ( + isinstance(layer, nn.Conv2d) and tuple(layer.weight.shape[2:]) == (1, 1) + ): + adapter_info[model_type][peft_key].rank = rank + elif isinstance(layer, nn.Conv2d): + adapter_info[model_type][peft_key].rank = conv_rank + elif kohya_type == "lokr_w1_a": + adapter_info[model_type][peft_key].lokr_w1_a = tensor + adapter_info[model_type][peft_key].rank = tensor.shape[1] + elif kohya_type == "lokr_w1_b": + adapter_info[model_type][peft_key].lokr_w1_b = tensor + adapter_info[model_type][peft_key].rank = tensor.shape[0] + elif kohya_type == "lokr_w2_a": + adapter_info[model_type][peft_key].lokr_w2_a = tensor + elif kohya_type == "lokr_w2_b": + adapter_info[model_type][peft_key].lokr_w2_b = tensor else: raise ValueError(f"Unknown weight name in key: {key} - {kohya_type}") @@ -292,11 +464,12 @@ def detect_adapter_type(keys: List[str]) -> PeftType: construct_config_fn = { PeftType.LORA: construct_peft_loraconfig, PeftType.LOHA: construct_peft_lohaconfig, + PeftType.LOKR: construct_peft_lokrconfig, }[adapter_type] # Process each model sequentially for model, model_name in [(text_encoder, "text_encoder"), (unet, "unet")]: - config = construct_config_fn(adapter_info[model_name]) + config = construct_config_fn(adapter_info[model_name], decompose_factor=decompose_factor) model = get_peft_model(model, config) set_peft_model_state_dict(model, combine_peft_state_dict(adapter_info[model_name])) From 7526aa224f5fb06d1b5bfe5e94d14f83094f7f06 Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Wed, 4 Oct 2023 18:57:06 +0300 Subject: [PATCH 09/33] Added simple tests for LoKr adapter --- tests/test_custom_models.py | 21 ++++++++++++++++++++- tests/test_stablediffusion.py | 4 +++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py index 65dab6c66f..1f0e089327 100644 --- a/tests/test_custom_models.py +++ b/tests/test_custom_models.py @@ -23,7 +23,7 @@ from torch import nn from transformers.pytorch_utils import Conv1D -from peft import AdaLoraConfig, IA3Config, LoHaConfig, LoraConfig, PeftModel, get_peft_model +from peft import AdaLoraConfig, IA3Config, LoHaConfig, LoKrConfig, LoraConfig, PeftModel, get_peft_model from peft.tuners.tuners_utils import BaseTunerLayer from .testing_common import PeftCommonTester @@ -73,6 +73,24 @@ ), ("Conv2d 1 LOHA", "Conv2d", LoHaConfig, {"target_modules": ["conv2d"]}), ("Conv2d 2 LOHA", "Conv2d", LoHaConfig, {"target_modules": ["conv2d", "lin0"]}), + # LoKr + ("Vanilla MLP 1 LOKR", "MLP", LoKrConfig, {"target_modules": "lin0"}), + ("Vanilla MLP 2 LOKR", "MLP", LoKrConfig, {"target_modules": ["lin0"]}), + ("Vanilla MLP 3 LOKR", "MLP", LoKrConfig, {"target_modules": ["lin1"]}), + ("Vanilla MLP 4 LOKR", "MLP", LoKrConfig, {"target_modules": ["lin0", "lin1"]}), + ("Vanilla MLP 5 LOKR", "MLP", LoKrConfig, {"target_modules": ["lin0"], "modules_to_save": ["lin1"]}), + ( + "Vanilla MLP 6 LOKR", + "MLP", + LoKrConfig, + { + "target_modules": ["lin0"], + "alpha": 4, + "module_dropout": 0.1, + }, + ), + ("Conv2d 1 LOKR", "Conv2d", LoKrConfig, {"target_modules": ["conv2d"]}), + ("Conv2d 2 LOKR", "Conv2d", LoKrConfig, {"target_modules": ["conv2d", "lin0"]}), ] MULTIPLE_ACTIVE_ADAPTERS_TEST_CASES = [ @@ -138,6 +156,7 @@ PREFIXES = { LoraConfig: "lora_", LoHaConfig: "hada_", + LoKrConfig: "lokr_", } diff --git a/tests/test_stablediffusion.py b/tests/test_stablediffusion.py index 5910287927..f14717326e 100644 --- a/tests/test_stablediffusion.py +++ b/tests/test_stablediffusion.py @@ -64,6 +64,7 @@ CLASSES_MAPPING = { "lora": (LoraConfig, CONFIG_TESTING_KWARGS[0]), "loha": (LoHaConfig, CONFIG_TESTING_KWARGS[1]), + "lokr": (LoHaConfig, CONFIG_TESTING_KWARGS[1]), } @@ -143,7 +144,7 @@ def test_merge_layers(self, test_name, model_id, config_cls, config_kwargs): "model_ids": PEFT_DIFFUSERS_SD_MODELS_TO_TEST, "lora_kwargs": {"init_lora_weights": [False]}, }, - filter_params_func=lambda tests: [x for x in tests if "loha" not in x[0]], + filter_params_func=lambda tests: [x for x in tests if all(s not in x[0] for s in ["loha", "lokr"])], ) ) def test_add_weighted_adapter_base_unchanged(self, test_name, model_id, config_cls, config_kwargs): @@ -172,6 +173,7 @@ def test_add_weighted_adapter_base_unchanged(self, test_name, model_id, config_c "model_ids": PEFT_DIFFUSERS_SD_MODELS_TO_TEST, "lora_kwargs": {"init_lora_weights": [False]}, "loha_kwargs": {"init_weights": [False]}, + "lokr_kwargs": {"init_weights": [False]}, }, ) ) From 8dc5e9881ce8ce41097efc8fbe86dbcb0630a9b2 Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Mon, 9 Oct 2023 19:28:26 +0300 Subject: [PATCH 10/33] Modified 'merged' property --- src/peft/tuners/lokr/layer.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py index 92da95f80d..9a6cca0ec2 100644 --- a/src/peft/tuners/lokr/layer.py +++ b/src/peft/tuners/lokr/layer.py @@ -55,10 +55,13 @@ def __init__(self): self.module_dropout = {} # Tuner info - self.merged = False self._disable_adapters = False self.merged_adapters = [] + @property + def merged(self) -> bool: + return bool(self.merged_adapters) + def _init_empty_weights(self, cls, *args, **kwargs) -> None: # A helper method that allows to initialize the layer of the given class without spending time to initialize the # model weights. The implementation is inspired by @@ -246,7 +249,6 @@ def merge(self) -> None: if active_adapter in self._available_adapters: self.weight.data += self.get_delta_weight(active_adapter) self.merged_adapters.append(active_adapter) - self.merged = True def unmerge(self) -> None: if not self.merged: @@ -256,7 +258,6 @@ def unmerge(self) -> None: active_adapter = self.merged_adapters.pop() if active_adapter in self._available_adapters: self.weight.data -= self.get_delta_weight(active_adapter) - self.merged = False def _op(self, x: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: raise NotImplementedError From c1cef3835c2eee52b21497a91127688ee67350c0 Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Mon, 9 Oct 2023 19:42:20 +0300 Subject: [PATCH 11/33] Removed duplicated comments --- src/peft/tuners/lokr/model.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/peft/tuners/lokr/model.py b/src/peft/tuners/lokr/model.py index 458978176f..0f93b2fba3 100644 --- a/src/peft/tuners/lokr/model.py +++ b/src/peft/tuners/lokr/model.py @@ -13,21 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -# coding=utf-8 -# Copyright 2023-present the HuggingFace Inc. team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - import re import warnings from itertools import chain From ad525e4b5a1b584fdd1d54ca6be981c175dbd8b7 Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Tue, 10 Oct 2023 12:34:57 +0300 Subject: [PATCH 12/33] Replaced wrong keys for LoKr --- src/peft/tuners/lokr/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/peft/tuners/lokr/model.py b/src/peft/tuners/lokr/model.py index 0f93b2fba3..78fe71bde3 100644 --- a/src/peft/tuners/lokr/model.py +++ b/src/peft/tuners/lokr/model.py @@ -214,7 +214,7 @@ def _replace_module(parent, child_name, new_module, child): # dispatch to correct device for name, module in new_module.named_modules(): - if "hada_" in name: + if "lokr_" in name: module.to(child.weight.device) def _mark_only_adapters_as_trainable(self) -> None: @@ -230,7 +230,7 @@ def _unload_and_optionally_merge(self, merge=True, progressbar: bool = False): if getattr(self.model, "quantization_method", None) == "gptq": raise ValueError("Cannot merge LOKR layers when the model is gptq quantized") - key_list = [key for key, _ in self.model.named_modules() if "hada" not in key] + key_list = [key for key, _ in self.model.named_modules() if "lokr" not in key] desc = "Unloading " + ("and merging " if merge else "") + "model" for key in tqdm(key_list, disable=not progressbar, desc=desc): try: From ba458816b77f015363fc569eca9a213a16b9a0fb Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Tue, 10 Oct 2023 16:36:45 +0300 Subject: [PATCH 13/33] Refactored LoHaModel and LoKrModel --- src/peft/tuners/loha/config.py | 5 +- src/peft/tuners/loha/layer.py | 13 +- src/peft/tuners/loha/model.py | 163 +--------------------- src/peft/tuners/lokr/config.py | 5 +- src/peft/tuners/lokr/layer.py | 13 +- src/peft/tuners/lokr/model.py | 165 +---------------------- src/peft/tuners/lycoris_utils.py | 224 +++++++++++++++++++++++++++++++ 7 files changed, 244 insertions(+), 344 deletions(-) create mode 100644 src/peft/tuners/lycoris_utils.py diff --git a/src/peft/tuners/loha/config.py b/src/peft/tuners/loha/config.py index 9081883461..00d24c374b 100644 --- a/src/peft/tuners/loha/config.py +++ b/src/peft/tuners/loha/config.py @@ -16,12 +16,13 @@ from dataclasses import dataclass, field from typing import List, Optional, Union -from peft.config import PeftConfig +# from peft.config import PeftConfig +from peft.tuners.lycoris_utils import LyCORISConfig from peft.utils import PeftType @dataclass -class LoHaConfig(PeftConfig): +class LoHaConfig(LyCORISConfig): """ This is the configuration class to store the configuration of a [`LoHaModel`]. diff --git a/src/peft/tuners/loha/layer.py b/src/peft/tuners/loha/layer.py index ec9e5fc694..0e5ee1af00 100644 --- a/src/peft/tuners/loha/layer.py +++ b/src/peft/tuners/loha/layer.py @@ -21,14 +21,15 @@ import torch.nn as nn import torch.nn.functional as F -from peft.tuners.tuners_utils import BaseTunerLayer +from peft.tuners.lycoris_utils import LyCORISLayer -class LoHaLayer(BaseTunerLayer, nn.Module): +class LoHaLayer(LyCORISLayer, nn.Module): # List all names of layers that may contain adapter weights adapter_layer_names = ["hada_w1_a", "hada_w1_b", "hada_w2_a", "hada_w2_b", "hada_t1", "hada_t2"] def __init__(self): + LyCORISLayer.__init__(self) super(nn.Module, self).__init__() # LoHa info @@ -44,14 +45,6 @@ def __init__(self): self.rank_dropout = {} self.module_dropout = {} - # Tuner info - self._disable_adapters = False - self.merged_adapters = [] - - @property - def merged(self) -> bool: - return bool(self.merged_adapters) - def _init_empty_weights(self, cls, *args, **kwargs) -> None: # A helper method that allows to initialize the layer of the given class without spending time to initialize the # model weights. The implementation is inspired by diff --git a/src/peft/tuners/loha/model.py b/src/peft/tuners/loha/model.py index c9403b76eb..824649f1b3 100644 --- a/src/peft/tuners/loha/model.py +++ b/src/peft/tuners/loha/model.py @@ -13,25 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import re -import warnings -from itertools import chain -from typing import Union - import torch -from torch import nn -from tqdm import tqdm - -from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists -from peft.utils import ( - ModulesToSaveWrapper, - _get_submodules, -) +from ..lycoris_utils import LyCORISTuner from .layer import Conv2d, Linear, LoHaLayer -class LoHaModel(BaseTuner): +class LoHaModel(LyCORISTuner): """ Creates Low-Rank Hadamard Product model from a pretrained model. The method is partially described in https://arxiv.org/abs/2108.06098 Current implementation heavily borrows from @@ -87,83 +75,10 @@ class LoHaModel(BaseTuner): - **peft_config** ([`LoHaConfig`]): The configuration of the LoHa model. """ - def __init__(self, model, config, adapter_name): - super().__init__(model, config, adapter_name) - - def __getattr__(self, name: str): - """Forward missing attributes to the wrapped module.""" - try: - return super().__getattr__(name) # defer to nn.Module's logic - except AttributeError: - return getattr(self.model, name) - - def _set_adapter_layers(self, enabled=True): - for module in self.model.modules(): - if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)): - module.enable_adapters(enabled) - - def enable_adapter_layers(self): - self._set_adapter_layers(enabled=True) - - def disable_adapter_layers(self): - self._set_adapter_layers(enabled=False) - - def set_adapter(self, adapter_name): - for module in self.model.modules(): - if isinstance(module, LoHaLayer): - if module.merged: - warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.") - module.unmerge() - module.set_adapter(adapter_name) + prefix: str = "hada_" @staticmethod - def _prepare_adapter_config(peft_config, model_config): - if peft_config.target_modules is None: - raise ValueError("Please specify `target_modules` in `peft_config`") - return peft_config - - @staticmethod - def _check_target_module_exists(loha_config, key): - return check_target_module_exists(loha_config, key) - - def _create_and_replace( - self, - loha_config, - adapter_name: str, - target: Union[LoHaLayer, nn.Module], - target_name, - parent, - current_key, - **optional_kwargs, - ): - """ - A private method to create and replace the target module with the adapter module. - """ - - # Regexp matching - Find key which matches current target_name in patterns provided - pattern_keys = list(chain(loha_config.rank_pattern.keys(), loha_config.alpha_pattern.keys())) - target_name_key = next(filter(lambda key: re.match(f"(.*\.)?{key}$", current_key), pattern_keys), target_name) - - r = loha_config.rank_pattern.get(target_name_key, loha_config.r) - alpha = loha_config.alpha_pattern.get(target_name_key, loha_config.alpha) - - kwargs = { - "r": r, - "alpha": alpha, - "rank_dropout": loha_config.rank_dropout, - "module_dropout": loha_config.module_dropout, - "use_effective_conv2d": loha_config.use_effective_conv2d, - "init_weights": loha_config.init_weights, - } - - if isinstance(target, LoHaLayer): - target.update_layer(adapter_name, **kwargs) - else: - new_module = self._create_new_module(loha_config, adapter_name, target, **kwargs) - self._replace_module(parent, target_name, new_module, target) - - @staticmethod - def _create_new_module(loha_config, adapter_name, target, **kwargs) -> LoHaLayer: + def _create_new_module(config, adapter_name, target, **kwargs) -> LoHaLayer: if isinstance(target, torch.nn.Conv2d): new_module = Conv2d( target.in_channels, @@ -195,73 +110,3 @@ def _create_new_module(loha_config, adapter_name, target, **kwargs) -> LoHaLayer "Target module not found, currently only adapters for nn.Linear and nn.Conv2d are supported" ) return new_module - - @staticmethod - def _replace_module(parent, child_name, new_module, child): - setattr(parent, child_name, new_module) - # It's not necessary to set requires_grad here, as that is handled by - # _mark_only_adapters_as_trainable - new_module.weight = child.weight - if hasattr(child, "bias"): - new_module.bias = child.bias - - if getattr(child, "state", None) is not None: - new_module.state = child.state - new_module.to(child.weight.device) - - # dispatch to correct device - for name, module in new_module.named_modules(): - if "hada_" in name: - module.to(child.weight.device) - - def _mark_only_adapters_as_trainable(self) -> None: - for n, p in self.model.named_parameters(): - if "hada_" not in n: - p.requires_grad = False - - def merge_and_unload(self, progressbar: bool = False): - return self._unload_and_optionally_merge(progressbar=progressbar) - - def _unload_and_optionally_merge(self, merge=True, progressbar: bool = False): - if merge: - if getattr(self.model, "quantization_method", None) == "gptq": - raise ValueError("Cannot merge LOHA layers when the model is gptq quantized") - - key_list = [key for key, _ in self.model.named_modules() if "hada" not in key] - desc = "Unloading " + ("and merging " if merge else "") + "model" - for key in tqdm(key_list, disable=not progressbar, desc=desc): - try: - parent, target, target_name = _get_submodules(self.model, key) - except AttributeError: - continue - if isinstance(target, LoHaLayer): - if isinstance(target, nn.Conv2d): - new_module = torch.nn.Conv2d( - target.in_channels, - target.out_channels, - kernel_size=target.kernel_size, - stride=target.stride, - padding=target.padding, - dilation=target.dilation, - ) - elif isinstance(target, nn.Linear): - bias = target.bias is not None - new_module = torch.nn.Linear( - target.in_features, - target.out_features, - bias=bias, - device=target.weight.device, - ) - else: - raise ValueError( - "Cannot convert current module to torch module, currently only adapters for nn.Linear and nn.Conv2d are supported" - ) - if merge: - target.merge() - self._replace_module(parent, target_name, new_module, target) - - # save any additional trainable modules part of `modules_to_save` - if isinstance(target, ModulesToSaveWrapper): - setattr(parent, target_name, target.modules_to_save[target.active_adapter]) - - return self.model diff --git a/src/peft/tuners/lokr/config.py b/src/peft/tuners/lokr/config.py index c2e3d0536c..258fc277e2 100644 --- a/src/peft/tuners/lokr/config.py +++ b/src/peft/tuners/lokr/config.py @@ -16,12 +16,13 @@ from dataclasses import dataclass, field from typing import List, Optional, Union -from peft.config import PeftConfig +# from peft.config import PeftConfig +from peft.tuners.lycoris_utils import LyCORISConfig from peft.utils import PeftType @dataclass -class LoKrConfig(PeftConfig): +class LoKrConfig(LyCORISConfig): """ This is the configuration class to store the configuration of a [`LoKrModel`]. diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py index 9a6cca0ec2..b4995426ea 100644 --- a/src/peft/tuners/lokr/layer.py +++ b/src/peft/tuners/lokr/layer.py @@ -22,10 +22,10 @@ import torch.nn as nn import torch.nn.functional as F -from peft.tuners.tuners_utils import BaseTunerLayer +from peft.tuners.lycoris_utils import LyCORISLayer -class LoKrLayer(BaseTunerLayer, nn.Module): +class LoKrLayer(LyCORISLayer, nn.Module): # List all names of layers that may contain adapter weights adapter_layer_names = [ "lokr_w1", @@ -38,6 +38,7 @@ class LoKrLayer(BaseTunerLayer, nn.Module): ] def __init__(self): + LyCORISLayer.__init__(self) super(nn.Module, self).__init__() # LoKr info @@ -54,14 +55,6 @@ def __init__(self): self.rank_dropout = {} self.module_dropout = {} - # Tuner info - self._disable_adapters = False - self.merged_adapters = [] - - @property - def merged(self) -> bool: - return bool(self.merged_adapters) - def _init_empty_weights(self, cls, *args, **kwargs) -> None: # A helper method that allows to initialize the layer of the given class without spending time to initialize the # model weights. The implementation is inspired by diff --git a/src/peft/tuners/lokr/model.py b/src/peft/tuners/lokr/model.py index 78fe71bde3..7f329c1ccd 100644 --- a/src/peft/tuners/lokr/model.py +++ b/src/peft/tuners/lokr/model.py @@ -13,25 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import re -import warnings -from itertools import chain -from typing import Union - import torch -from torch import nn -from tqdm import tqdm - -from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists -from peft.utils import ( - ModulesToSaveWrapper, - _get_submodules, -) +from ..lycoris_utils import LyCORISTuner from .layer import Conv2d, Linear, LoKrLayer -class LoKrModel(BaseTuner): +class LoKrModel(LyCORISTuner): """ Creates Low-Rank Kronecker Product model from a pretrained model. The original method is partially described in https://arxiv.org/abs/2108.06098 and in https://arxiv.org/abs/2309.14859 Current implementation heavily borrows @@ -88,85 +76,10 @@ class LoKrModel(BaseTuner): - **peft_config** ([`LoKrConfig`]): The configuration of the LoKr model. """ - def __init__(self, model, config, adapter_name): - super().__init__(model, config, adapter_name) - - def __getattr__(self, name: str): - """Forward missing attributes to the wrapped module.""" - try: - return super().__getattr__(name) # defer to nn.Module's logic - except AttributeError: - return getattr(self.model, name) - - def _set_adapter_layers(self, enabled=True): - for module in self.model.modules(): - if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)): - module.enable_adapters(enabled) - - def enable_adapter_layers(self): - self._set_adapter_layers(enabled=True) - - def disable_adapter_layers(self): - self._set_adapter_layers(enabled=False) - - def set_adapter(self, adapter_name): - for module in self.model.modules(): - if isinstance(module, LoKrLayer): - if module.merged: - warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.") - module.unmerge() - module.set_adapter(adapter_name) + prefix: str = "lokr_" @staticmethod - def _prepare_adapter_config(peft_config, model_config): - if peft_config.target_modules is None: - raise ValueError("Please specify `target_modules` in `peft_config`") - return peft_config - - @staticmethod - def _check_target_module_exists(lokr_config, key): - return check_target_module_exists(lokr_config, key) - - def _create_and_replace( - self, - lokr_config, - adapter_name: str, - target: Union[LoKrLayer, nn.Module], - target_name, - parent, - current_key, - **optional_kwargs, - ): - """ - A private method to create and replace the target module with the adapter module. - """ - - # Regexp matching - Find key which matches current target_name in patterns provided - pattern_keys = list(chain(lokr_config.rank_pattern.keys(), lokr_config.alpha_pattern.keys())) - target_name_key = next(filter(lambda key: re.match(f"(.*\.)?{key}$", current_key), pattern_keys), target_name) - - r = lokr_config.rank_pattern.get(target_name_key, lokr_config.r) - alpha = lokr_config.alpha_pattern.get(target_name_key, lokr_config.alpha) - - kwargs = { - "r": r, - "alpha": alpha, - "rank_dropout": lokr_config.rank_dropout, - "module_dropout": lokr_config.module_dropout, - "use_effective_conv2d": lokr_config.use_effective_conv2d, - "init_weights": lokr_config.init_weights, - "decompose_both": lokr_config.decompose_both, - "decompose_factor": lokr_config.decompose_factor, - } - - if isinstance(target, LoKrLayer): - target.update_layer(adapter_name, **kwargs) - else: - new_module = self._create_new_module(lokr_config, adapter_name, target, **kwargs) - self._replace_module(parent, target_name, new_module, target) - - @staticmethod - def _create_new_module(lokr_config, adapter_name, target, **kwargs) -> LoKrLayer: + def _create_new_module(config, adapter_name, target, **kwargs) -> LoKrLayer: if isinstance(target, torch.nn.Conv2d): new_module = Conv2d( target.in_channels, @@ -198,73 +111,3 @@ def _create_new_module(lokr_config, adapter_name, target, **kwargs) -> LoKrLayer "Target module not found, currently only adapters for nn.Linear and nn.Conv2d are supported" ) return new_module - - @staticmethod - def _replace_module(parent, child_name, new_module, child): - setattr(parent, child_name, new_module) - # It's not necessary to set requires_grad here, as that is handled by - # _mark_only_adapters_as_trainable - new_module.weight = child.weight - if hasattr(child, "bias"): - new_module.bias = child.bias - - if getattr(child, "state", None) is not None: - new_module.state = child.state - new_module.to(child.weight.device) - - # dispatch to correct device - for name, module in new_module.named_modules(): - if "lokr_" in name: - module.to(child.weight.device) - - def _mark_only_adapters_as_trainable(self) -> None: - for n, p in self.model.named_parameters(): - if "lokr_" not in n: - p.requires_grad = False - - def merge_and_unload(self, progressbar: bool = False): - return self._unload_and_optionally_merge(progressbar=progressbar) - - def _unload_and_optionally_merge(self, merge=True, progressbar: bool = False): - if merge: - if getattr(self.model, "quantization_method", None) == "gptq": - raise ValueError("Cannot merge LOKR layers when the model is gptq quantized") - - key_list = [key for key, _ in self.model.named_modules() if "lokr" not in key] - desc = "Unloading " + ("and merging " if merge else "") + "model" - for key in tqdm(key_list, disable=not progressbar, desc=desc): - try: - parent, target, target_name = _get_submodules(self.model, key) - except AttributeError: - continue - if isinstance(target, LoKrLayer): - if isinstance(target, nn.Conv2d): - new_module = torch.nn.Conv2d( - target.in_channels, - target.out_channels, - kernel_size=target.kernel_size, - stride=target.stride, - padding=target.padding, - dilation=target.dilation, - ) - elif isinstance(target, nn.Linear): - bias = target.bias is not None - new_module = torch.nn.Linear( - target.in_features, - target.out_features, - bias=bias, - device=target.weight.device, - ) - else: - raise ValueError( - "Cannot convert current module to torch module, currently only adapters for nn.Linear and nn.Conv2d are supported" - ) - if merge: - target.merge() - self._replace_module(parent, target_name, new_module, target) - - # save any additional trainable modules part of `modules_to_save` - if isinstance(target, ModulesToSaveWrapper): - setattr(parent, target_name, target.modules_to_save[target.active_adapter]) - - return self.model diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py new file mode 100644 index 0000000000..2ed85872b8 --- /dev/null +++ b/src/peft/tuners/lycoris_utils.py @@ -0,0 +1,224 @@ +# coding=utf-8 +# Copyright 2023-present the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import warnings +from dataclasses import dataclass, field +from itertools import chain +from typing import Optional, Union + +import torch +import torch.nn as nn +from tqdm import tqdm + +from peft.config import PeftConfig +from peft.utils import ( + ModulesToSaveWrapper, + _get_submodules, +) + +from .tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists + + +@dataclass +class LyCORISConfig(PeftConfig): + r""" + A base config for LyCORIS like adapters + """ + rank_pattern: Optional[dict] = field( + default_factory=dict, + metadata={ + "help": ( + "The mapping from layer names or regexp expression to ranks which are different from the default rank specified by `r`. " + "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 8`}" + ) + }, + ) + alpha_pattern: Optional[dict] = field( + default_factory=dict, + metadata={ + "help": ( + "The mapping from layer names or regexp expression to alphas which are different from the default alpha specified by `alpha`. " + "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 32`}" + ) + }, + ) + + +class LyCORISLayer(BaseTunerLayer): + r""" + A base layer for LyCORIS like adapters + """ + + def __init__(self): + # Tuner info + self._disable_adapters = False + self.merged_adapters = [] + + @property + def merged(self) -> bool: + return bool(self.merged_adapters) + + def update_layer(self, adapter_name: str, r: int, alpha: float, **kwargs): + ... + + +class LyCORISTuner(BaseTuner): + r""" + A base tuner for LyCORIS like adapters + """ + + prefix: str + + def __init__(self, model, config, adapter_name): + super().__init__(model, config, adapter_name) + + def __getattr__(self, name: str): + """Forward missing attributes to the wrapped module.""" + try: + return super().__getattr__(name) # defer to nn.Module's logic + except AttributeError: + return getattr(self.model, name) + + @staticmethod + def _check_target_module_exists(config, key): + return check_target_module_exists(config, key) + + def _create_and_replace( + self, + config: LyCORISConfig, + adapter_name: str, + target: Union[LyCORISLayer, nn.Module], + target_name, + parent, + current_key, + **optional_kwargs, + ): + """ + A private method to create and replace the target module with the adapter module. + """ + + # Regexp matching - Find key which matches current target_name in patterns provided + pattern_keys = list(chain(config.rank_pattern.keys(), config.alpha_pattern.keys())) + target_name_key = next(filter(lambda key: re.match(f"(.*\.)?{key}$", current_key), pattern_keys), target_name) + + kwargs = config.to_dict() + kwargs["r"] = config.rank_pattern.get(target_name_key, config.r) + kwargs["alpha"] = config.alpha_pattern.get(target_name_key, config.alpha) + + if isinstance(target, LyCORISLayer): + target.update_layer(adapter_name, **kwargs) + else: + new_module = self._create_new_module(config, adapter_name, target, **kwargs) + self._replace_module(parent, target_name, new_module, target) + + @staticmethod + def _create_new_module(config: LyCORISConfig, adapter_name: str, target: nn.Module, **kwargs) -> LyCORISLayer: + ... + + def _mark_only_adapters_as_trainable(self) -> None: + for n, p in self.model.named_parameters(): + if self.prefix not in n: + p.requires_grad = False + + @staticmethod + def _prepare_adapter_config(peft_config, model_config): + if peft_config.target_modules is None: + raise ValueError("Please specify `target_modules` in `peft_config`") + return peft_config + + @classmethod + def _replace_module(cls, parent, child_name, new_module, child): + setattr(parent, child_name, new_module) + # It's not necessary to set requires_grad here, as that is handled by + # _mark_only_adapters_as_trainable + new_module.weight = child.weight + if hasattr(child, "bias"): + new_module.bias = child.bias + + if getattr(child, "state", None) is not None: + new_module.state = child.state + new_module.to(child.weight.device) + + # dispatch to correct device + for name, module in new_module.named_modules(): + if cls.prefix in name: + module.to(child.weight.device) + + def _set_adapter_layers(self, enabled=True): + for module in self.model.modules(): + if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)): + module.enable_adapters(enabled) + + def _unload_and_optionally_merge(self, merge=True, progressbar: bool = False): + if merge: + if getattr(self.model, "quantization_method", None) == "gptq": + raise ValueError("Cannot merge LOHA layers when the model is gptq quantized") + + key_list = [key for key, _ in self.model.named_modules() if "hada" not in key] + desc = "Unloading " + ("and merging " if merge else "") + "model" + for key in tqdm(key_list, disable=not progressbar, desc=desc): + try: + parent, target, target_name = _get_submodules(self.model, key) + except AttributeError: + continue + if isinstance(target, LyCORISLayer): + if isinstance(target, nn.Conv2d): + new_module = torch.nn.Conv2d( + target.in_channels, + target.out_channels, + kernel_size=target.kernel_size, + stride=target.stride, + padding=target.padding, + dilation=target.dilation, + ) + elif isinstance(target, nn.Linear): + bias = target.bias is not None + new_module = torch.nn.Linear( + target.in_features, + target.out_features, + bias=bias, + device=target.weight.device, + ) + else: + raise ValueError( + "Cannot convert current module to torch module, currently only adapters for nn.Linear and nn.Conv2d are supported" + ) + if merge: + target.merge() + self._replace_module(parent, target_name, new_module, target) + + # save any additional trainable modules part of `modules_to_save` + if isinstance(target, ModulesToSaveWrapper): + setattr(parent, target_name, target.modules_to_save[target.active_adapter]) + + return self.model + + def enable_adapter_layers(self): + self._set_adapter_layers(enabled=True) + + def disable_adapter_layers(self): + self._set_adapter_layers(enabled=False) + + def merge_and_unload(self, progressbar: bool = False): + return self._unload_and_optionally_merge(progressbar=progressbar) + + def set_adapter(self, adapter_name): + for module in self.model.modules(): + if isinstance(module, LyCORISLayer): + if module.merged: + warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.") + module.unmerge() + module.set_adapter(adapter_name) From 2401bf10e070a60ffc533d0e260fcff5dd7dea90 Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Tue, 10 Oct 2023 17:06:59 +0300 Subject: [PATCH 14/33] Refactored LoHaModel and LoKrModel again --- src/peft/tuners/loha/model.py | 40 ++++-------------------- src/peft/tuners/lokr/model.py | 40 ++++-------------------- src/peft/tuners/lycoris_utils.py | 53 +++++++++++++++++++++++++++++--- 3 files changed, 61 insertions(+), 72 deletions(-) diff --git a/src/peft/tuners/loha/model.py b/src/peft/tuners/loha/model.py index 824649f1b3..f4c60ab020 100644 --- a/src/peft/tuners/loha/model.py +++ b/src/peft/tuners/loha/model.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Dict, Type + import torch from ..lycoris_utils import LyCORISTuner @@ -76,37 +78,7 @@ class LoHaModel(LyCORISTuner): """ prefix: str = "hada_" - - @staticmethod - def _create_new_module(config, adapter_name, target, **kwargs) -> LoHaLayer: - if isinstance(target, torch.nn.Conv2d): - new_module = Conv2d( - target.in_channels, - target.out_channels, - target.weight.size()[2:], - stride=target.stride, - padding=target.padding, - dilation=target.dilation, - groups=target.groups, - bias=target.bias is not None, - padding_mode=target.padding_mode, - device=target.weight.device, - dtype=target.weight.dtype, - adapter_name=adapter_name, - **kwargs, - ) - elif isinstance(target, torch.nn.Linear): - new_module = Linear( - target.in_features, - target.out_features, - bias=target.bias is not None, - device=target.weight.device, - dtype=target.weight.dtype, - adapter_name=adapter_name, - **kwargs, - ) - else: - raise ValueError( - "Target module not found, currently only adapters for nn.Linear and nn.Conv2d are supported" - ) - return new_module + layers_mapping: Dict[Type[torch.nn.Module], Type[LoHaLayer]] = { + torch.nn.Conv2d: Conv2d, + torch.nn.Linear: Linear, + } diff --git a/src/peft/tuners/lokr/model.py b/src/peft/tuners/lokr/model.py index 7f329c1ccd..778a7dcfe3 100644 --- a/src/peft/tuners/lokr/model.py +++ b/src/peft/tuners/lokr/model.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Dict, Type + import torch from ..lycoris_utils import LyCORISTuner @@ -77,37 +79,7 @@ class LoKrModel(LyCORISTuner): """ prefix: str = "lokr_" - - @staticmethod - def _create_new_module(config, adapter_name, target, **kwargs) -> LoKrLayer: - if isinstance(target, torch.nn.Conv2d): - new_module = Conv2d( - target.in_channels, - target.out_channels, - target.weight.size()[2:], - stride=target.stride, - padding=target.padding, - dilation=target.dilation, - groups=target.groups, - bias=target.bias is not None, - padding_mode=target.padding_mode, - device=target.weight.device, - dtype=target.weight.dtype, - adapter_name=adapter_name, - **kwargs, - ) - elif isinstance(target, torch.nn.Linear): - new_module = Linear( - target.in_features, - target.out_features, - bias=target.bias is not None, - device=target.weight.device, - dtype=target.weight.dtype, - adapter_name=adapter_name, - **kwargs, - ) - else: - raise ValueError( - "Target module not found, currently only adapters for nn.Linear and nn.Conv2d are supported" - ) - return new_module + layers_mapping: Dict[Type[torch.nn.Module], Type[LoKrLayer]] = { + torch.nn.Conv2d: Conv2d, + torch.nn.Linear: Linear, + } diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py index 2ed85872b8..82991e7cdc 100644 --- a/src/peft/tuners/lycoris_utils.py +++ b/src/peft/tuners/lycoris_utils.py @@ -17,7 +17,7 @@ import warnings from dataclasses import dataclass, field from itertools import chain -from typing import Optional, Union +from typing import Dict, Optional, Type, Union import torch import torch.nn as nn @@ -81,6 +81,7 @@ class LyCORISTuner(BaseTuner): """ prefix: str + layers_mapping: Dict[Type[torch.nn.Module], Type[LyCORISLayer]] def __init__(self, model, config, adapter_name): super().__init__(model, config, adapter_name) @@ -124,9 +125,53 @@ def _create_and_replace( new_module = self._create_new_module(config, adapter_name, target, **kwargs) self._replace_module(parent, target_name, new_module, target) - @staticmethod - def _create_new_module(config: LyCORISConfig, adapter_name: str, target: nn.Module, **kwargs) -> LyCORISLayer: - ... + @classmethod + def _create_new_module(cls, config: LyCORISConfig, adapter_name: str, target: nn.Module, **kwargs) -> LyCORISLayer: + # Find corresponding subtype of provided target module + new_module_cls = None + for subtype, target_cls in cls.layers_mapping.items(): + if isinstance(target, subtype): + new_module_cls = target_cls + break + + # We didn't find corresponding type, so adapter for this layer is not supported + if new_module_cls is None: + raise ValueError( + f"Target module not found, currently only adapters for {', '.join([x.__name__ for x in cls.modules_mapping.keys()])} are supported" + ) + + if isinstance(target, torch.nn.Conv2d): + new_module = new_module_cls( + target.in_channels, + target.out_channels, + target.weight.size()[2:], + stride=target.stride, + padding=target.padding, + dilation=target.dilation, + groups=target.groups, + bias=target.bias is not None, + padding_mode=target.padding_mode, + device=target.weight.device, + dtype=target.weight.dtype, + adapter_name=adapter_name, + **kwargs, + ) + elif isinstance(target, torch.nn.Linear): + new_module = new_module_cls( + target.in_features, + target.out_features, + bias=target.bias is not None, + device=target.weight.device, + dtype=target.weight.dtype, + adapter_name=adapter_name, + **kwargs, + ) + else: + raise ValueError( + "Target module not found, currently only adapters for nn.Linear and nn.Conv2d are supported" + ) + + return new_module def _mark_only_adapters_as_trainable(self) -> None: for n, p in self.model.named_parameters(): From 1fad986f2d9a2818b00a79d4a63e54904b87eb2a Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Tue, 10 Oct 2023 19:46:30 +0300 Subject: [PATCH 15/33] Refactored LoHaLayer and LoKrLayer a bit --- src/peft/tuners/loha/layer.py | 110 ++++++------------------------- src/peft/tuners/lokr/layer.py | 92 ++------------------------ src/peft/tuners/lycoris_utils.py | 101 +++++++++++++++++++++++++++- 3 files changed, 123 insertions(+), 180 deletions(-) diff --git a/src/peft/tuners/loha/layer.py b/src/peft/tuners/loha/layer.py index 0e5ee1af00..3fcd84a45e 100644 --- a/src/peft/tuners/loha/layer.py +++ b/src/peft/tuners/loha/layer.py @@ -14,8 +14,8 @@ # limitations under the License. import math -import warnings -from typing import Optional, Tuple, Union +from itertools import chain +from typing import Iterable, Optional, Tuple, Union import torch import torch.nn as nn @@ -33,31 +33,27 @@ def __init__(self): super(nn.Module, self).__init__() # LoHa info - self.r = {} - self.alpha = {} - self.scaling = {} self.hada_w1_a = nn.ParameterDict({}) self.hada_w1_b = nn.ParameterDict({}) self.hada_w2_a = nn.ParameterDict({}) self.hada_w2_b = nn.ParameterDict({}) self.hada_t1 = nn.ParameterDict({}) self.hada_t2 = nn.ParameterDict({}) - self.rank_dropout = {} - self.module_dropout = {} - - def _init_empty_weights(self, cls, *args, **kwargs) -> None: - # A helper method that allows to initialize the layer of the given class without spending time to initialize the - # model weights. The implementation is inspired by - # https://pytorch.org/docs/stable/generated/torch.nn.utils.skip_init.html but this function cannot be used - # directly. - # Instead of this approach, it would be possible to bypass the __init__ of the class but that runs the risk of - # omitting important logic inside that __init__. - kwargs = kwargs.copy() - final_device = kwargs.pop("device", "cpu") - cls.__init__(self, *args, device="meta", **kwargs) - self.to_empty(device=final_device) - - def create_loha_parameters(self, adapter_name: str, r: int, shape: Tuple[int, ...]): + + @property + def _available_adapters(self) -> Iterable[str]: + return set( + chain( + self.hada_w1_a.keys(), + self.hada_w1_b.keys(), + self.hada_w2_a.keys(), + self.hada_w2_b.keys(), + self.hada_t1.keys(), + self.hada_t2.keys(), + ) + ) + + def create_adapter_parameters(self, adapter_name: str, r: int, shape: Tuple[int, ...]): # https://github.com/KohakuBlueleaf/LyCORIS/blob/eb460098187f752a5d66406d3affade6f0a07ece/lycoris/modules/loha.py#L130C9-L143C75 if len(shape) == 4: self.hada_t1[adapter_name] = nn.Parameter(torch.empty(r, r, shape[2], shape[3])) @@ -74,7 +70,7 @@ def create_loha_parameters(self, adapter_name: str, r: int, shape: Tuple[int, .. self.hada_w2_a[adapter_name] = nn.Parameter(torch.empty(shape[0], r)) self.hada_w2_b[adapter_name] = nn.Parameter(torch.empty(r, shape[1])) - def reset_loha_parameters(self, adapter_name: str): + def reset_adapter_parameters(self, adapter_name: str): # Original implementation performs initialization with normal distribution # https://github.com/KohakuBlueleaf/LyCORIS/blob/3549fdef8f564761d68b695a08ef88b1122fdedc/lycoris/modules/loha.py#L158 @@ -131,11 +127,11 @@ def update_layer( raise NotImplementedError(f"LoHa is not implemented for {type(self).__name__} layer") # Create weights with provided shape - self.create_loha_parameters(adapter_name, r, shape) + self.create_adapter_parameters(adapter_name, r, shape) # Initialize weights if init_weights: - self.reset_loha_parameters(adapter_name) + self.reset_adapter_parameters(adapter_name) # Move new weights to device weight = getattr(self, "weight", None) @@ -183,72 +179,6 @@ def get_delta_weight(self, adapter_name: str) -> torch.Tensor: return weight - def merge(self) -> None: - if self.merged: - warnings.warn( - f"Already following adapters were merged {','.join(self.merged_adapters)}. " - f"You are now additionally merging {','.join(self.active_adapters)}." - ) - for active_adapter in self.active_adapters: - if active_adapter in self.hada_w1_a.keys(): - self.weight.data += self.get_delta_weight(active_adapter) - self.merged_adapters.append(active_adapter) - - def unmerge(self) -> None: - if not self.merged: - warnings.warn("Already unmerged. Nothing to do.") - return - while len(self.merged_adapters) > 0: - active_adapter = self.merged_adapters.pop() - if active_adapter in self.hada_w1_a.keys(): - self.weight.data -= self.get_delta_weight(active_adapter) - - def _op(self, x: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: - raise NotImplementedError - - def forward(self, x: torch.Tensor) -> torch.Tensor: - previous_dtype = x.dtype - - if self.disable_adapters: - if self.merged: - self.unmerge() - result = self._op(x, self.weight) - elif self.merged: - result = self._op(x, self.weight) - else: - # Get base weights - weight = self.weight.data - - # Execute all the adapters - for active_adapter in self.active_adapters: - if active_adapter not in self.hada_w1_a.keys(): - continue - - module_dropout = self.module_dropout[active_adapter] - - # Modify current execution weights - if (not self.training) or (self.training and torch.rand(1) > module_dropout): - weight = weight + self.get_delta_weight(active_adapter) - - # Perform actual operation - result = self._op(x, weight) - - result = result.to(previous_dtype) - return result - - def scale_layer(self, scale_factor: float) -> None: - if scale_factor != 1: - for active_adapter in self.active_adapters: - alpha = self.alpha[active_adapter] - r = self.r[active_adapter] - self.scaling[active_adapter] = (alpha / r) * scale_factor - - def unscale_layer(self) -> None: - for active_adapter in self.active_adapters: - alpha = self.alpha[active_adapter] - r = self.r[active_adapter] - self.scaling[active_adapter] = alpha / r - class Linear(LoHaLayer, nn.Linear): """LoHa implemented in Linear layer""" diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py index b4995426ea..505944c971 100644 --- a/src/peft/tuners/lokr/layer.py +++ b/src/peft/tuners/lokr/layer.py @@ -14,7 +14,6 @@ # limitations under the License. import math -import warnings from itertools import chain from typing import Iterable, Optional, Tuple, Union @@ -42,9 +41,6 @@ def __init__(self): super(nn.Module, self).__init__() # LoKr info - self.r = {} - self.alpha = {} - self.scaling = {} self.lokr_w1 = nn.ParameterDict({}) self.lokr_w1_a = nn.ParameterDict({}) self.lokr_w1_b = nn.ParameterDict({}) @@ -52,20 +48,6 @@ def __init__(self): self.lokr_w2_a = nn.ParameterDict({}) self.lokr_w2_b = nn.ParameterDict({}) self.lokr_t2 = nn.ParameterDict({}) - self.rank_dropout = {} - self.module_dropout = {} - - def _init_empty_weights(self, cls, *args, **kwargs) -> None: - # A helper method that allows to initialize the layer of the given class without spending time to initialize the - # model weights. The implementation is inspired by - # https://pytorch.org/docs/stable/generated/torch.nn.utils.skip_init.html but this function cannot be used - # directly. - # Instead of this approach, it would be possible to bypass the __init__ of the class but that runs the risk of - # omitting important logic inside that __init__. - kwargs = kwargs.copy() - final_device = kwargs.pop("device", "cpu") - cls.__init__(self, *args, device="meta", **kwargs) - self.to_empty(device=final_device) @property def _available_adapters(self) -> Iterable[str]: @@ -81,7 +63,7 @@ def _available_adapters(self) -> Iterable[str]: ) ) - def create_lokr_parameters( + def create_adapter_parameters( self, adapter_name: str, r: int, @@ -115,7 +97,7 @@ def create_lokr_parameters( self.lokr_w2_a[adapter_name] = nn.Parameter(torch.empty(shape[0][1], r)) self.lokr_w2_b[adapter_name] = nn.Parameter(torch.empty(r, shape[1][1])) - def reset_lokr_parameters(self, adapter_name: str): + def reset_adapter_parameters(self, adapter_name: str): if adapter_name in self.lokr_w1: nn.init.zeros_(self.lokr_w1[adapter_name]) else: @@ -188,11 +170,11 @@ def update_layer( raise NotImplementedError(f"LoKr is not implemented for {type(self).__name__} layer") # Create weights with provided shape - self.create_lokr_parameters(adapter_name, r, shape, use_w1, use_w2, use_effective_conv2d) + self.create_adapter_parameters(adapter_name, r, shape, use_w1, use_w2, use_effective_conv2d) # Initialize weights if init_weights: - self.reset_lokr_parameters(adapter_name) + self.reset_adapter_parameters(adapter_name) # Move new weights to device weight = getattr(self, "weight", None) @@ -232,72 +214,6 @@ def get_delta_weight(self, adapter_name: str) -> torch.Tensor: return weight - def merge(self) -> None: - if self.merged: - warnings.warn( - f"Already following adapters were merged {','.join(self.merged_adapters)}. " - f"You are now additionally merging {','.join(self.active_adapters)}." - ) - for active_adapter in self.active_adapters: - if active_adapter in self._available_adapters: - self.weight.data += self.get_delta_weight(active_adapter) - self.merged_adapters.append(active_adapter) - - def unmerge(self) -> None: - if not self.merged: - warnings.warn("Already unmerged. Nothing to do.") - return - while len(self.merged_adapters) > 0: - active_adapter = self.merged_adapters.pop() - if active_adapter in self._available_adapters: - self.weight.data -= self.get_delta_weight(active_adapter) - - def _op(self, x: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: - raise NotImplementedError - - def forward(self, x: torch.Tensor) -> torch.Tensor: - previous_dtype = x.dtype - - if self.disable_adapters: - if self.merged: - self.unmerge() - result = self._op(x, self.weight) - elif self.merged: - result = self._op(x, self.weight) - else: - # Get base weights - weight = self.weight.data - - # Execute all the adapters - for active_adapter in self.active_adapters: - if active_adapter not in self._available_adapters: - continue - - module_dropout = self.module_dropout[active_adapter] - - # Modify current execution weights - if (not self.training) or (self.training and torch.rand(1) > module_dropout): - weight = weight + self.get_delta_weight(active_adapter) - - # Perform actual operation - result = self._op(x, weight) - - result = result.to(previous_dtype) - return result - - def scale_layer(self, scale_factor: float) -> None: - if scale_factor != 1: - for active_adapter in self.active_adapters: - alpha = self.alpha[active_adapter] - r = self.r[active_adapter] - self.scaling[active_adapter] = (alpha / r) * scale_factor - - def unscale_layer(self) -> None: - for active_adapter in self.active_adapters: - alpha = self.alpha[active_adapter] - r = self.r[active_adapter] - self.scaling[active_adapter] = alpha / r - class Linear(LoKrLayer, nn.Linear): """LoKr implemented in Linear layer""" diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py index 82991e7cdc..5eff17f5fa 100644 --- a/src/peft/tuners/lycoris_utils.py +++ b/src/peft/tuners/lycoris_utils.py @@ -17,7 +17,7 @@ import warnings from dataclasses import dataclass, field from itertools import chain -from typing import Dict, Optional, Type, Union +from typing import Dict, Iterable, Optional, Type, Union import torch import torch.nn as nn @@ -57,20 +57,117 @@ class LyCORISConfig(PeftConfig): ) -class LyCORISLayer(BaseTunerLayer): +class LyCORISLayer(BaseTunerLayer, nn.Module): r""" A base layer for LyCORIS like adapters """ def __init__(self): + self.r = {} + self.alpha = {} + self.scaling = {} + self.rank_dropout = {} + self.module_dropout = {} + # Tuner info self._disable_adapters = False self.merged_adapters = [] + @property + def _available_adapters(self) -> Iterable[str]: + ... + @property def merged(self) -> bool: return bool(self.merged_adapters) + def _init_empty_weights(self, cls, *args, **kwargs) -> None: + # A helper method that allows to initialize the layer of the given class without spending time to initialize the + # model weights. The implementation is inspired by + # https://pytorch.org/docs/stable/generated/torch.nn.utils.skip_init.html but this function cannot be used + # directly. + # Instead of this approach, it would be possible to bypass the __init__ of the class but that runs the risk of + # omitting important logic inside that __init__. + kwargs = kwargs.copy() + final_device = kwargs.pop("device", "cpu") + cls.__init__(self, *args, device="meta", **kwargs) + self.to_empty(device=final_device) + + def _op(self, x: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: + raise NotImplementedError + + def create_adapter_parameters(self, adapter_name: str, r: int, **kwargs): + ... + + def forward(self, x: torch.Tensor) -> torch.Tensor: + previous_dtype = x.dtype + + if self.disable_adapters: + if self.merged: + self.unmerge() + result = self._op(x, self.weight) + elif self.merged: + result = self._op(x, self.weight) + else: + # Get base weights + weight = self.weight.data + + # Execute all the adapters + for active_adapter in self.active_adapters: + if active_adapter not in self._available_adapters: + continue + + module_dropout = self.module_dropout[active_adapter] + + # Modify current execution weights + if (not self.training) or (self.training and torch.rand(1) > module_dropout): + weight = weight + self.get_delta_weight(active_adapter) + + # Perform actual operation + result = self._op(x, weight) + + result = result.to(previous_dtype) + return result + + def get_delta_weight(self, adapter_name: str) -> torch.Tensor: + ... + + def merge(self) -> None: + if self.merged: + warnings.warn( + f"Already following adapters were merged {','.join(self.merged_adapters)}. " + f"You are now additionally merging {','.join(self.active_adapters)}." + ) + for active_adapter in self.active_adapters: + if active_adapter in self._available_adapters: + self.weight.data += self.get_delta_weight(active_adapter) + self.merged_adapters.append(active_adapter) + + def reset_adapter_parameters(self, adapter_name: str): + ... + + def scale_layer(self, scale_factor: float) -> None: + if scale_factor != 1: + for active_adapter in self.active_adapters: + alpha = self.alpha[active_adapter] + r = self.r[active_adapter] + self.scaling[active_adapter] = (alpha / r) * scale_factor + + def unmerge(self) -> None: + if not self.merged: + warnings.warn("Already unmerged. Nothing to do.") + return + while len(self.merged_adapters) > 0: + active_adapter = self.merged_adapters.pop() + if active_adapter in self._available_adapters: + self.weight.data -= self.get_delta_weight(active_adapter) + + def unscale_layer(self) -> None: + for active_adapter in self.active_adapters: + alpha = self.alpha[active_adapter] + r = self.r[active_adapter] + self.scaling[active_adapter] = alpha / r + def update_layer(self, adapter_name: str, r: int, alpha: float, **kwargs): ... From 39e87ceb2849a054197e6838dc08fbc9a4895e39 Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Tue, 10 Oct 2023 19:48:27 +0300 Subject: [PATCH 16/33] Removed unnecessary comments --- src/peft/tuners/loha/config.py | 1 - src/peft/tuners/lokr/config.py | 1 - 2 files changed, 2 deletions(-) diff --git a/src/peft/tuners/loha/config.py b/src/peft/tuners/loha/config.py index 00d24c374b..e1994a9843 100644 --- a/src/peft/tuners/loha/config.py +++ b/src/peft/tuners/loha/config.py @@ -16,7 +16,6 @@ from dataclasses import dataclass, field from typing import List, Optional, Union -# from peft.config import PeftConfig from peft.tuners.lycoris_utils import LyCORISConfig from peft.utils import PeftType diff --git a/src/peft/tuners/lokr/config.py b/src/peft/tuners/lokr/config.py index 258fc277e2..ce25ec793e 100644 --- a/src/peft/tuners/lokr/config.py +++ b/src/peft/tuners/lokr/config.py @@ -16,7 +16,6 @@ from dataclasses import dataclass, field from typing import List, Optional, Union -# from peft.config import PeftConfig from peft.tuners.lycoris_utils import LyCORISConfig from peft.utils import PeftType From 4171c64dfa3cd8aa6e2cf7bfb5934e4d613ee943 Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Fri, 13 Oct 2023 12:11:22 +0300 Subject: [PATCH 17/33] Addressed comments on _available_adapters property --- src/peft/tuners/loha/layer.py | 16 +++------------- src/peft/tuners/lokr/layer.py | 25 +++++++++++-------------- src/peft/tuners/lycoris_utils.py | 4 ++-- 3 files changed, 16 insertions(+), 29 deletions(-) diff --git a/src/peft/tuners/loha/layer.py b/src/peft/tuners/loha/layer.py index 3fcd84a45e..df8e3f275e 100644 --- a/src/peft/tuners/loha/layer.py +++ b/src/peft/tuners/loha/layer.py @@ -14,8 +14,7 @@ # limitations under the License. import math -from itertools import chain -from typing import Iterable, Optional, Tuple, Union +from typing import Optional, Set, Tuple, Union import torch import torch.nn as nn @@ -41,17 +40,8 @@ def __init__(self): self.hada_t2 = nn.ParameterDict({}) @property - def _available_adapters(self) -> Iterable[str]: - return set( - chain( - self.hada_w1_a.keys(), - self.hada_w1_b.keys(), - self.hada_w2_a.keys(), - self.hada_w2_b.keys(), - self.hada_t1.keys(), - self.hada_t2.keys(), - ) - ) + def _available_adapters(self) -> Set[str]: + return {*self.hada_w1_a, *self.hada_w1_b, *self.hada_w2_a, *self.hada_w2_b, *self.hada_t1, *self.hada_t2} def create_adapter_parameters(self, adapter_name: str, r: int, shape: Tuple[int, ...]): # https://github.com/KohakuBlueleaf/LyCORIS/blob/eb460098187f752a5d66406d3affade6f0a07ece/lycoris/modules/loha.py#L130C9-L143C75 diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py index 505944c971..275b9a31b8 100644 --- a/src/peft/tuners/lokr/layer.py +++ b/src/peft/tuners/lokr/layer.py @@ -14,8 +14,7 @@ # limitations under the License. import math -from itertools import chain -from typing import Iterable, Optional, Tuple, Union +from typing import Optional, Set, Tuple, Union import torch import torch.nn as nn @@ -50,18 +49,16 @@ def __init__(self): self.lokr_t2 = nn.ParameterDict({}) @property - def _available_adapters(self) -> Iterable[str]: - return set( - chain( - self.lokr_w1.keys(), - self.lokr_w1_a.keys(), - self.lokr_w1_b.keys(), - self.lokr_w2.keys(), - self.lokr_w2_a.keys(), - self.lokr_w2_b.keys(), - self.lokr_t2.keys(), - ) - ) + def _available_adapters(self) -> Set[str]: + return { + *self.lokr_w1, + *self.lokr_w1_a, + *self.lokr_w1_b, + *self.lokr_w2, + *self.lokr_w2_a, + *self.lokr_w2_b, + *self.lokr_t2, + } def create_adapter_parameters( self, diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py index 5eff17f5fa..07cc7e9c5f 100644 --- a/src/peft/tuners/lycoris_utils.py +++ b/src/peft/tuners/lycoris_utils.py @@ -17,7 +17,7 @@ import warnings from dataclasses import dataclass, field from itertools import chain -from typing import Dict, Iterable, Optional, Type, Union +from typing import Dict, Optional, Set, Type, Union import torch import torch.nn as nn @@ -74,7 +74,7 @@ def __init__(self): self.merged_adapters = [] @property - def _available_adapters(self) -> Iterable[str]: + def _available_adapters(self) -> Set[str]: ... @property From b24bdbf07b78c88f1705b20266680102169337f4 Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Fri, 13 Oct 2023 12:31:22 +0300 Subject: [PATCH 18/33] Replaced te with text_encoder --- examples/stable_diffusion/train_dreambooth.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/examples/stable_diffusion/train_dreambooth.py b/examples/stable_diffusion/train_dreambooth.py index 8364603a0b..6fc3a30fc6 100644 --- a/examples/stable_diffusion/train_dreambooth.py +++ b/examples/stable_diffusion/train_dreambooth.py @@ -120,10 +120,13 @@ def create_unet_adapter_config(args: argparse.Namespace) -> Union[LoraConfig, Lo decompose_factor=args.unet_decompose_factor, init_weights=True, ) + else: + raise ValueError(f"Unknown adapter type {args.adapter}") + return config -def create_te_adapter_config(args: argparse.Namespace) -> Union[LoraConfig, LoHaConfig, LoKrConfig]: +def create_text_encoder_adapter_config(args: argparse.Namespace) -> Union[LoraConfig, LoHaConfig, LoKrConfig]: if args.adapter == "full": raise ValueError("Cannot create text_encoder adapter config for full parameter") @@ -156,6 +159,9 @@ def create_te_adapter_config(args: argparse.Namespace) -> Union[LoraConfig, LoHa decompose_factor=args.te_decompose_factor, init_weights=True, ) + else: + raise ValueError(f"Unknown adapter type {args.adapter}") + return config @@ -898,7 +904,7 @@ def main(args): if not args.train_text_encoder: text_encoder.requires_grad_(False) elif args.train_text_encoder and args.adapter != "full": - config = create_te_adapter_config(args) + config = create_text_encoder_adapter_config(args) text_encoder = get_peft_model(text_encoder, config) text_encoder.print_trainable_parameters() print(text_encoder) From d8f2a83479bb79689264cb0b0760a67c0b288b2c Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Fri, 13 Oct 2023 11:40:06 +0200 Subject: [PATCH 19/33] Apply suggestions from code review Co-authored-by: Benjamin Bossan --- src/peft/tuners/lokr/config.py | 2 +- src/peft/tuners/lokr/layer.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/peft/tuners/lokr/config.py b/src/peft/tuners/lokr/config.py index ce25ec793e..0db0c48729 100644 --- a/src/peft/tuners/lokr/config.py +++ b/src/peft/tuners/lokr/config.py @@ -23,7 +23,7 @@ @dataclass class LoKrConfig(LyCORISConfig): """ - This is the configuration class to store the configuration of a [`LoKrModel`]. + Configuration class of [`LoKrModel`]. Args: r (`int`): LoKr rank. diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py index 275b9a31b8..da7976b56e 100644 --- a/src/peft/tuners/lokr/layer.py +++ b/src/peft/tuners/lokr/layer.py @@ -310,9 +310,9 @@ def factorization(dimension: int, factor: int = -1) -> Tuple[int, int]: return a tuple of two value of input dimension decomposed by the number closest to factor second value is higher or equal than first value. - In LoRA with Kroneckor Product, first value is a value for weight scale. secon value is a value for weight. + In LoRA with Kroneckor Product, first value is a value for weight scale, second value is a value for weight. - Becuase of non-commutative property, A⊗B ≠ B⊗A. Meaning of two matrices is slightly different. + Because of non-commutative property, A⊗B ≠ B⊗A. Meaning of two matrices is slightly different. examples) factor -1 2 4 8 16 ... From 64655978f8b273fa5c11044c208ff0da55d5347e Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Fri, 13 Oct 2023 12:45:19 +0300 Subject: [PATCH 20/33] Changed exception type raised when creating adapter for unsupported layer --- src/peft/tuners/loha/layer.py | 2 +- src/peft/tuners/lokr/layer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/peft/tuners/loha/layer.py b/src/peft/tuners/loha/layer.py index df8e3f275e..66c00e3775 100644 --- a/src/peft/tuners/loha/layer.py +++ b/src/peft/tuners/loha/layer.py @@ -114,7 +114,7 @@ def update_layer( else: shape = (self.out_channels, self.in_channels * self.kernel_size[0] * self.kernel_size[1]) else: - raise NotImplementedError(f"LoHa is not implemented for {type(self).__name__} layer") + raise TypeError(f"LoHa is not implemented for {type(self).__name__} layer") # Create weights with provided shape self.create_adapter_parameters(adapter_name, r, shape) diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py index da7976b56e..3ffd9a0761 100644 --- a/src/peft/tuners/lokr/layer.py +++ b/src/peft/tuners/lokr/layer.py @@ -164,7 +164,7 @@ def update_layer( use_w2 = r >= max(shape[0][1], shape[1][1]) / 2 use_effective_conv2d = use_effective_conv2d and self.kernel_size != (1, 1) else: - raise NotImplementedError(f"LoKr is not implemented for {type(self).__name__} layer") + raise TypeError(f"LoKr is not implemented for {type(self).__name__} layer") # Create weights with provided shape self.create_adapter_parameters(adapter_name, r, shape, use_w1, use_w2, use_effective_conv2d) From a63d249d598d7a8b631075ba42caf616d29064bb Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Fri, 13 Oct 2023 23:51:10 +0300 Subject: [PATCH 21/33] Added additional tests for use_effective_conv2d/decompose_both/decompose_factor --- tests/test_custom_models.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py index 69cde6284d..eeae16d5c7 100644 --- a/tests/test_custom_models.py +++ b/tests/test_custom_models.py @@ -162,6 +162,31 @@ ), ("Conv2d 1 LOKR", "Conv2d", LoKrConfig, {"target_modules": ["conv2d"]}), ("Conv2d 2 LOKR", "Conv2d", LoKrConfig, {"target_modules": ["conv2d", "lin0"]}), + ("Conv2d 3 LOKR", "Conv2d", LoKrConfig, {"target_modules": ["conv2d"], "use_effective_conv2d": True}), + ("Conv2d 4 LOKR", "Conv2d", LoKrConfig, {"target_modules": ["conv2d", "lin0"], "use_effective_conv2d": True}), + ( + "Conv2d 5 LOKR", + "Conv2d", + LoKrConfig, + {"target_modules": ["conv2d", "lin0"], "use_effective_conv2d": True, "decompose_both": True}, + ), + ( + "Conv2d 6 LOKR", + "Conv2d", + LoKrConfig, + {"target_modules": ["conv2d", "lin0"], "use_effective_conv2d": True, "decompose_factor": 4}, + ), + ( + "Conv2d 7 LOKR", + "Conv2d", + LoKrConfig, + { + "target_modules": ["conv2d", "lin0"], + "use_effective_conv2d": True, + "decompose_both": True, + "decompose_factor": 4, + }, + ), ] MULTIPLE_ACTIVE_ADAPTERS_TEST_CASES = [ @@ -450,6 +475,10 @@ def test_only_params_are_updated(self, test_name, model_id, config_cls, config_k params_after = dict(model.named_parameters()) self.assertEqual(params_before.keys(), params_after.keys()) + if isinstance(model, ModelConv2D): + print(model) + self.assertFalse(True) + prefix = PREFIXES[config_cls] for name, param_before in params_before.items(): param_after = params_after[name] From 2b70fc013ada5d8f7259887443f398529a0c1840 Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Sat, 14 Oct 2023 00:10:46 +0300 Subject: [PATCH 22/33] Removed classmethod --- src/peft/tuners/lycoris_utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py index 07cc7e9c5f..8d045e3cb3 100644 --- a/src/peft/tuners/lycoris_utils.py +++ b/src/peft/tuners/lycoris_utils.py @@ -281,8 +281,7 @@ def _prepare_adapter_config(peft_config, model_config): raise ValueError("Please specify `target_modules` in `peft_config`") return peft_config - @classmethod - def _replace_module(cls, parent, child_name, new_module, child): + def _replace_module(self, parent, child_name, new_module, child): setattr(parent, child_name, new_module) # It's not necessary to set requires_grad here, as that is handled by # _mark_only_adapters_as_trainable @@ -296,7 +295,7 @@ def _replace_module(cls, parent, child_name, new_module, child): # dispatch to correct device for name, module in new_module.named_modules(): - if cls.prefix in name: + if self.prefix in name: module.to(child.weight.device) def _set_adapter_layers(self, enabled=True): From 3033a75e4513d1b49df71faf0f17eb4b7a0ffef7 Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Sat, 14 Oct 2023 01:05:03 +0300 Subject: [PATCH 23/33] Addressed conversion script review comments --- .../convert_sd_adapter_to_peft.py | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/examples/stable_diffusion/convert_sd_adapter_to_peft.py b/examples/stable_diffusion/convert_sd_adapter_to_peft.py index e0bc2e7716..348eb4eb3d 100644 --- a/examples/stable_diffusion/convert_sd_adapter_to_peft.py +++ b/examples/stable_diffusion/convert_sd_adapter_to_peft.py @@ -3,7 +3,7 @@ import os from collections import Counter from dataclasses import dataclass -from functools import reduce +from operator import attrgetter from typing import Dict, List, Optional, Union import safetensors @@ -25,11 +25,6 @@ PREFIX_TEXT_ENCODER = "lora_te" -def get_module_by_name(module: Union[torch.Tensor, nn.Module], access_string: str): - names = access_string.split(sep=".") - return reduce(getattr, names, module) - - @dataclass class LoRAInfo: kohya_key: str @@ -97,12 +92,11 @@ class LoKrInfo: lokr_t2: Optional[torch.Tensor] = None def peft_state_dict(self) -> Dict[str, torch.Tensor]: - if (self.lokr_w1 is None and self.lokr_w1_a is None and self.lokr_w1_b is None) or ( - self.lokr_w2 is None and self.lokr_w2_a is None and self.lokr_w2_b is None - ): - raise ValueError( - "At least one of lokr_w1, lokr_w1_a, lokr_w1_b, lokr_w2, lokr_w2_a, lokr_w2_b is missing, they all must be provided" - ) + if (self.lokr_w1 is None) and ((self.lokr_w1_a is None) or (self.lokr_w1_b is None)): + raise ValueError("Either lokr_w1 or both lokr_w1_a and lokr_w1_b should be provided") + + if (self.lokr_w2 is None) and ((self.lokr_w2_a is None) or (self.lokr_w2_b is None)): + raise ValueError("Either lokr_w2 or both lokr_w2_a and lokr_w2_b should be provided") state_dict = {} @@ -397,7 +391,7 @@ def detect_adapter_type(keys: List[str]) -> PeftType: peft_key = models_keys[kohya_key] # Retrieve corresponding layer of model - layer = get_module_by_name(model, peft_key) + layer = attrgetter(peft_key)(model) # Create a corresponding adapter info if peft_key not in adapter_info[model_type]: From 299de88cfcefd6ea2aea4fb22bf4f0ec124af46e Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Sat, 14 Oct 2023 01:29:25 +0300 Subject: [PATCH 24/33] Replaced factorization docstring --- src/peft/tuners/lokr/layer.py | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py index 3ffd9a0761..c14d0406bc 100644 --- a/src/peft/tuners/lokr/layer.py +++ b/src/peft/tuners/lokr/layer.py @@ -306,20 +306,33 @@ def _op(self, input: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: def factorization(dimension: int, factor: int = -1) -> Tuple[int, int]: - """ - return a tuple of two value of input dimension decomposed by the number closest to factor second value is higher or - equal than first value. + """Factorizes the provided number into the product of two numbers + + Args: + dimension (`int`): The number that needs to be factorized. + factor (`int`, optional): + Factorization divider. The algorithm will try to output two numbers, one of each will be as close to the + factor as possible. If -1 is provided, the decomposition algorithm would try to search dividers near the + square root of the dimension. Defaults to -1. + + Returns: + Tuple[`int`, `int`]: A tuple of two numbers, whose product is equal to the provided number. The first number is + always less than or equal to the second. + + Example: + ```py + >>> factorization(256, factor=-1) + (16, 16) - In LoRA with Kroneckor Product, first value is a value for weight scale, second value is a value for weight. + >>> factorization(128, factor=-1) + (8, 16) - Because of non-commutative property, A⊗B ≠ B⊗A. Meaning of two matrices is slightly different. + >>> factorization(127, factor=-1) + (1, 127) - examples) factor - -1 2 4 8 16 ... - 127 -> 127, 1 127 -> 127, 1 127 -> 127, 1 127 -> 127, 1 127 -> 127, 1 128 -> 16, 8 128 -> 64, 2 128 -> 32, 4 128 -> - 16, 8 128 -> 16, 8 250 -> 125, 2 250 -> 125, 2 250 -> 125, 2 250 -> 125, 2 250 -> 125, 2 360 -> 45, 8 360 -> 180, 2 - 360 -> 90, 4 360 -> 45, 8 360 -> 45, 8 512 -> 32, 16 512 -> 256, 2 512 -> 128, 4 512 -> 64, 8 512 -> 32, 16 1024 -> - 32, 32 1024 -> 512, 2 1024 -> 256, 4 1024 -> 128, 8 1024 -> 64, 16 + >>> factorization(128, factor=4) + (4, 32) + ``` """ if factor > 0 and (dimension % factor) == 0: From d518728c136f1a02be13b2d4bd4a509b2868232c Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Sat, 14 Oct 2023 02:01:15 +0300 Subject: [PATCH 25/33] LyCORIS -> Lycoris --- src/peft/tuners/loha/config.py | 4 ++-- src/peft/tuners/loha/layer.py | 6 +++--- src/peft/tuners/lokr/config.py | 4 ++-- src/peft/tuners/lokr/layer.py | 6 +++--- src/peft/tuners/lycoris_utils.py | 18 +++++++++--------- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/peft/tuners/loha/config.py b/src/peft/tuners/loha/config.py index e1994a9843..7ea73400fd 100644 --- a/src/peft/tuners/loha/config.py +++ b/src/peft/tuners/loha/config.py @@ -16,12 +16,12 @@ from dataclasses import dataclass, field from typing import List, Optional, Union -from peft.tuners.lycoris_utils import LyCORISConfig +from peft.tuners.lycoris_utils import LycorisConfig from peft.utils import PeftType @dataclass -class LoHaConfig(LyCORISConfig): +class LoHaConfig(LycorisConfig): """ This is the configuration class to store the configuration of a [`LoHaModel`]. diff --git a/src/peft/tuners/loha/layer.py b/src/peft/tuners/loha/layer.py index 66c00e3775..0b4dde56f7 100644 --- a/src/peft/tuners/loha/layer.py +++ b/src/peft/tuners/loha/layer.py @@ -20,15 +20,15 @@ import torch.nn as nn import torch.nn.functional as F -from peft.tuners.lycoris_utils import LyCORISLayer +from peft.tuners.lycoris_utils import LycorisLayer -class LoHaLayer(LyCORISLayer, nn.Module): +class LoHaLayer(LycorisLayer, nn.Module): # List all names of layers that may contain adapter weights adapter_layer_names = ["hada_w1_a", "hada_w1_b", "hada_w2_a", "hada_w2_b", "hada_t1", "hada_t2"] def __init__(self): - LyCORISLayer.__init__(self) + LycorisLayer.__init__(self) super(nn.Module, self).__init__() # LoHa info diff --git a/src/peft/tuners/lokr/config.py b/src/peft/tuners/lokr/config.py index 0db0c48729..d3cdf1b5af 100644 --- a/src/peft/tuners/lokr/config.py +++ b/src/peft/tuners/lokr/config.py @@ -16,12 +16,12 @@ from dataclasses import dataclass, field from typing import List, Optional, Union -from peft.tuners.lycoris_utils import LyCORISConfig +from peft.tuners.lycoris_utils import LycorisConfig from peft.utils import PeftType @dataclass -class LoKrConfig(LyCORISConfig): +class LoKrConfig(LycorisConfig): """ Configuration class of [`LoKrModel`]. diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py index c14d0406bc..afe74f13a5 100644 --- a/src/peft/tuners/lokr/layer.py +++ b/src/peft/tuners/lokr/layer.py @@ -20,10 +20,10 @@ import torch.nn as nn import torch.nn.functional as F -from peft.tuners.lycoris_utils import LyCORISLayer +from peft.tuners.lycoris_utils import LycorisLayer -class LoKrLayer(LyCORISLayer, nn.Module): +class LoKrLayer(LycorisLayer, nn.Module): # List all names of layers that may contain adapter weights adapter_layer_names = [ "lokr_w1", @@ -36,7 +36,7 @@ class LoKrLayer(LyCORISLayer, nn.Module): ] def __init__(self): - LyCORISLayer.__init__(self) + LycorisLayer.__init__(self) super(nn.Module, self).__init__() # LoKr info diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py index 8d045e3cb3..d9e74a3977 100644 --- a/src/peft/tuners/lycoris_utils.py +++ b/src/peft/tuners/lycoris_utils.py @@ -33,7 +33,7 @@ @dataclass -class LyCORISConfig(PeftConfig): +class LycorisConfig(PeftConfig): r""" A base config for LyCORIS like adapters """ @@ -57,7 +57,7 @@ class LyCORISConfig(PeftConfig): ) -class LyCORISLayer(BaseTunerLayer, nn.Module): +class LycorisLayer(BaseTunerLayer, nn.Module): r""" A base layer for LyCORIS like adapters """ @@ -178,7 +178,7 @@ class LyCORISTuner(BaseTuner): """ prefix: str - layers_mapping: Dict[Type[torch.nn.Module], Type[LyCORISLayer]] + layers_mapping: Dict[Type[torch.nn.Module], Type[LycorisLayer]] def __init__(self, model, config, adapter_name): super().__init__(model, config, adapter_name) @@ -196,9 +196,9 @@ def _check_target_module_exists(config, key): def _create_and_replace( self, - config: LyCORISConfig, + config: LycorisConfig, adapter_name: str, - target: Union[LyCORISLayer, nn.Module], + target: Union[LycorisLayer, nn.Module], target_name, parent, current_key, @@ -216,14 +216,14 @@ def _create_and_replace( kwargs["r"] = config.rank_pattern.get(target_name_key, config.r) kwargs["alpha"] = config.alpha_pattern.get(target_name_key, config.alpha) - if isinstance(target, LyCORISLayer): + if isinstance(target, LycorisLayer): target.update_layer(adapter_name, **kwargs) else: new_module = self._create_new_module(config, adapter_name, target, **kwargs) self._replace_module(parent, target_name, new_module, target) @classmethod - def _create_new_module(cls, config: LyCORISConfig, adapter_name: str, target: nn.Module, **kwargs) -> LyCORISLayer: + def _create_new_module(cls, config: LycorisConfig, adapter_name: str, target: nn.Module, **kwargs) -> LycorisLayer: # Find corresponding subtype of provided target module new_module_cls = None for subtype, target_cls in cls.layers_mapping.items(): @@ -315,7 +315,7 @@ def _unload_and_optionally_merge(self, merge=True, progressbar: bool = False): parent, target, target_name = _get_submodules(self.model, key) except AttributeError: continue - if isinstance(target, LyCORISLayer): + if isinstance(target, LycorisLayer): if isinstance(target, nn.Conv2d): new_module = torch.nn.Conv2d( target.in_channels, @@ -358,7 +358,7 @@ def merge_and_unload(self, progressbar: bool = False): def set_adapter(self, adapter_name): for module in self.model.modules(): - if isinstance(module, LyCORISLayer): + if isinstance(module, LycorisLayer): if module.merged: warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.") module.unmerge() From 63aba4e7f52f6b37bacf51b444e356b27b176e48 Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Wed, 25 Oct 2023 20:19:23 +0300 Subject: [PATCH 26/33] Updated README to include LoKr adapter --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 5b6a59c096..d4dfee5c38 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,7 @@ Try out the 🤗 Gradio Space which should run seamlessly on a T4 instance: **NEW** ✨ Multi Adapter support and combining multiple LoRA adapters in a weighted combination ![peft lora dreambooth weighted adapter](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/peft/weighted_adapter_dreambooth_lora.png) -**NEW** ✨ Dreambooth training for Stable Diffusion using LoHa adapter [`examples/stable_diffusion/train_dreambooth_loha.py`](examples/stable_diffusion/train_dreambooth_loha.py) +**NEW** ✨ Dreambooth training for Stable Diffusion using LoHa and LoKr adapters [`examples/stable_diffusion/train_dreambooth.py`](examples/stable_diffusion/train_dreambooth.py) ### Parameter Efficient Tuning of LLMs for RLHF components such as Ranker and Policy - Here is an example in [trl](https://github.com/lvwerra/trl) library using PEFT+INT8 for tuning policy model: [gpt2-sentiment_peft.py](https://github.com/lvwerra/trl/blob/main/examples/sentiment/scripts/gpt2-sentiment_peft.py) and corresponding [Blog](https://huggingface.co/blog/trl-peft) @@ -274,9 +274,9 @@ An example is provided in `~examples/causal_language_modeling/peft_lora_clm_acce ### Text-to-Image Generation -| Model | LoRA | LoHa | Prefix Tuning | P-Tuning | Prompt Tuning | IA3 | -| --------- | ---- | ---- | ---- | ---- | ---- | ---- | -| Stable Diffusion | ✅ | ✅ | | | | +| Model | LoRA | LoHa | LoKr | Prefix Tuning | P-Tuning | Prompt Tuning | IA3 | +| --------- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | +| Stable Diffusion | ✅ | ✅ | ✅ | | | | ### Image Classification From 6700bafa6c561f65d5a4e8ec535896c2485e1c6d Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Wed, 25 Oct 2023 20:20:13 +0300 Subject: [PATCH 27/33] Addressed some code review comments --- src/peft/tuners/loha/config.py | 18 ---------------- src/peft/tuners/loha/layer.py | 12 +++++------ src/peft/tuners/loha/model.py | 4 ++-- src/peft/tuners/lokr/config.py | 18 ---------------- src/peft/tuners/lokr/layer.py | 12 ++++++----- src/peft/tuners/lokr/model.py | 4 ++-- src/peft/tuners/lycoris_utils.py | 35 ++++++++++++++++++++++---------- tests/test_custom_models.py | 4 ---- 8 files changed, 41 insertions(+), 66 deletions(-) diff --git a/src/peft/tuners/loha/config.py b/src/peft/tuners/loha/config.py index 7ea73400fd..7c0f0c81ef 100644 --- a/src/peft/tuners/loha/config.py +++ b/src/peft/tuners/loha/config.py @@ -92,24 +92,6 @@ class LoHaConfig(LycorisConfig): "help": "The layer pattern name, used only if `layers_to_transform` is different to None and if the layer pattern is not in the common layers pattern." }, ) - rank_pattern: Optional[dict] = field( - default_factory=dict, - metadata={ - "help": ( - "The mapping from layer names or regexp expression to ranks which are different from the default rank specified by `r`. " - "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 8`}" - ) - }, - ) - alpha_pattern: Optional[dict] = field( - default_factory=dict, - metadata={ - "help": ( - "The mapping from layer names or regexp expression to alphas which are different from the default alpha specified by `alpha`. " - "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 32`}" - ) - }, - ) modules_to_save: Optional[List[str]] = field( default=None, metadata={ diff --git a/src/peft/tuners/loha/layer.py b/src/peft/tuners/loha/layer.py index 91c49ba0a3..4cad9b9b18 100644 --- a/src/peft/tuners/loha/layer.py +++ b/src/peft/tuners/loha/layer.py @@ -89,13 +89,13 @@ def update_layer( """Internal function to create loha adapter Args: - shape (`Tuple[int, ...]`): Shape of weights to produce - adapter_name (`str`): Name for the adapter to add - r (`int`): Rank for the added adapter - alpha (`float`): Alpha for the added adapter - rank_dropout (`float`): The dropout probability for rank dimension during training + adapter_name (`str`): Name for the adapter to add. + r (`int`): Rank for the added adapter. + alpha (`float`): Alpha for the added adapter. + rank_dropout (`float`): The dropout probability for rank dimension during training. module_dropout (`float`): The dropout probability for disabling adapter during training. - init_weights (`bool`): Whether to initialize weights + init_weights (`bool`): Whether to initialize weights. + use_effective_conv2d (`bool`): Use parameter effective decomposition for Conv2d with ksize > 1. """ self.r[adapter_name] = r diff --git a/src/peft/tuners/loha/model.py b/src/peft/tuners/loha/model.py index f4c60ab020..92d5b887ef 100644 --- a/src/peft/tuners/loha/model.py +++ b/src/peft/tuners/loha/model.py @@ -17,11 +17,11 @@ import torch -from ..lycoris_utils import LyCORISTuner +from ..lycoris_utils import LycorisTuner from .layer import Conv2d, Linear, LoHaLayer -class LoHaModel(LyCORISTuner): +class LoHaModel(LycorisTuner): """ Creates Low-Rank Hadamard Product model from a pretrained model. The method is partially described in https://arxiv.org/abs/2108.06098 Current implementation heavily borrows from diff --git a/src/peft/tuners/lokr/config.py b/src/peft/tuners/lokr/config.py index d3cdf1b5af..d99b22aa76 100644 --- a/src/peft/tuners/lokr/config.py +++ b/src/peft/tuners/lokr/config.py @@ -99,24 +99,6 @@ class LoKrConfig(LycorisConfig): "help": "The layer pattern name, used only if `layers_to_transform` is different to None and if the layer pattern is not in the common layers pattern." }, ) - rank_pattern: Optional[dict] = field( - default_factory=dict, - metadata={ - "help": ( - "The mapping from layer names or regexp expression to ranks which are different from the default rank specified by `r`. " - "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 8`}" - ) - }, - ) - alpha_pattern: Optional[dict] = field( - default_factory=dict, - metadata={ - "help": ( - "The mapping from layer names or regexp expression to alphas which are different from the default alpha specified by `alpha`. " - "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 32`}" - ) - }, - ) modules_to_save: Optional[List[str]] = field( default=None, metadata={ diff --git a/src/peft/tuners/lokr/layer.py b/src/peft/tuners/lokr/layer.py index afe74f13a5..9b01ecf96f 100644 --- a/src/peft/tuners/lokr/layer.py +++ b/src/peft/tuners/lokr/layer.py @@ -126,13 +126,15 @@ def update_layer( """Internal function to create lokr adapter Args: - shape (`Tuple[int, ...]`): Shape of weights to produce - adapter_name (`str`): Name for the adapter to add - r (`int`): Rank for the added adapter - alpha (`float`): Alpha for the added adapter + adapter_name (`str`): Name for the adapter to add. + r (`int`): Rank for the added adapter. + alpha (`float`): Alpha for the added adapter. rank_dropout (`float`): The dropout probability for rank dimension during training module_dropout (`float`): The dropout probability for disabling adapter during training. - init_weights (`bool`): Whether to initialize weights + init_weights (`bool`): Whether to initialize adapter weights. + use_effective_conv2d (`bool`): Use parameter effective decomposition for Conv2d with ksize > 1. + decompose_both (`bool`): Perform rank decomposition of left kronecker product matrix. + decompose_factor (`int`): Kronecker product decomposition factor. """ self.r[adapter_name] = r diff --git a/src/peft/tuners/lokr/model.py b/src/peft/tuners/lokr/model.py index 778a7dcfe3..e08b7a7c48 100644 --- a/src/peft/tuners/lokr/model.py +++ b/src/peft/tuners/lokr/model.py @@ -17,11 +17,11 @@ import torch -from ..lycoris_utils import LyCORISTuner +from ..lycoris_utils import LycorisTuner from .layer import Conv2d, Linear, LoKrLayer -class LoKrModel(LyCORISTuner): +class LoKrModel(LycorisTuner): """ Creates Low-Rank Kronecker Product model from a pretrained model. The original method is partially described in https://arxiv.org/abs/2108.06098 and in https://arxiv.org/abs/2309.14859 Current implementation heavily borrows diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py index d9e74a3977..e5194486b1 100644 --- a/src/peft/tuners/lycoris_utils.py +++ b/src/peft/tuners/lycoris_utils.py @@ -146,12 +146,21 @@ def merge(self) -> None: def reset_adapter_parameters(self, adapter_name: str): ... - def scale_layer(self, scale_factor: float) -> None: - if scale_factor != 1: - for active_adapter in self.active_adapters: - alpha = self.alpha[active_adapter] - r = self.r[active_adapter] - self.scaling[active_adapter] = (alpha / r) * scale_factor + def set_scale(self, adapter, scale): + if adapter not in self._available_adapters: + # Ignore the case where the adapter is not in the layer + return + self.scaling[adapter] = scale * self.alpha[adapter] / self.r[adapter] + + def scale_layer(self, scale: float) -> None: + if scale == 1: + return + + for active_adapter in self.active_adapters: + if active_adapter not in self._available_adapters: + continue + + self.scaling[active_adapter] *= scale def unmerge(self) -> None: if not self.merged: @@ -162,17 +171,21 @@ def unmerge(self) -> None: if active_adapter in self._available_adapters: self.weight.data -= self.get_delta_weight(active_adapter) - def unscale_layer(self) -> None: + def unscale_layer(self, scale=None) -> None: for active_adapter in self.active_adapters: - alpha = self.alpha[active_adapter] - r = self.r[active_adapter] - self.scaling[active_adapter] = alpha / r + if active_adapter not in self._available_adapters: + continue + + if scale is None: + self.scaling[active_adapter] = self.alpha[active_adapter] / self.r[active_adapter] + else: + self.scaling[active_adapter] /= scale def update_layer(self, adapter_name: str, r: int, alpha: float, **kwargs): ... -class LyCORISTuner(BaseTuner): +class LycorisTuner(BaseTuner): r""" A base tuner for LyCORIS like adapters """ diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py index 5015593ea0..84064e65ce 100644 --- a/tests/test_custom_models.py +++ b/tests/test_custom_models.py @@ -489,10 +489,6 @@ def test_only_params_are_updated(self, test_name, model_id, config_cls, config_k params_after = dict(model.named_parameters()) self.assertEqual(params_before.keys(), params_after.keys()) - if isinstance(model, ModelConv2D): - print(model) - self.assertFalse(True) - prefix = PREFIXES[config_cls] for name, param_before in params_before.items(): param_after = params_after[name] From fa6b522e1319c9f49440aa57763ec120cc08bffe Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Wed, 25 Oct 2023 21:09:45 +0300 Subject: [PATCH 28/33] Addressed some code review comments --- src/peft/tuners/lycoris_utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py index e5194486b1..e748aaba35 100644 --- a/src/peft/tuners/lycoris_utils.py +++ b/src/peft/tuners/lycoris_utils.py @@ -15,6 +15,7 @@ import re import warnings +from abc import abstractmethod from dataclasses import dataclass, field from itertools import chain from typing import Dict, Optional, Set, Type, Union @@ -74,6 +75,7 @@ def __init__(self): self.merged_adapters = [] @property + @abstractmethod def _available_adapters(self) -> Set[str]: ... @@ -96,6 +98,7 @@ def _init_empty_weights(self, cls, *args, **kwargs) -> None: def _op(self, x: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: raise NotImplementedError + @abstractmethod def create_adapter_parameters(self, adapter_name: str, r: int, **kwargs): ... @@ -129,6 +132,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: result = result.to(previous_dtype) return result + @abstractmethod def get_delta_weight(self, adapter_name: str) -> torch.Tensor: ... @@ -143,6 +147,7 @@ def merge(self) -> None: self.weight.data += self.get_delta_weight(active_adapter) self.merged_adapters.append(active_adapter) + @abstractmethod def reset_adapter_parameters(self, adapter_name: str): ... @@ -181,6 +186,7 @@ def unscale_layer(self, scale=None) -> None: else: self.scaling[active_adapter] /= scale + @abstractmethod def update_layer(self, adapter_name: str, r: int, alpha: float, **kwargs): ... From e76182ff40e308491e4655b3d5685396b725d105 Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Wed, 25 Oct 2023 21:33:39 +0300 Subject: [PATCH 29/33] Addressed some code review comments --- src/peft/tuners/ia3/layer.py | 4 ---- src/peft/tuners/lora/layer.py | 4 ---- src/peft/tuners/lycoris_utils.py | 4 ---- src/peft/tuners/tuners_utils.py | 7 +++++++ 4 files changed, 7 insertions(+), 12 deletions(-) diff --git a/src/peft/tuners/ia3/layer.py b/src/peft/tuners/ia3/layer.py index c35f3d875c..8f75e832b0 100644 --- a/src/peft/tuners/ia3/layer.py +++ b/src/peft/tuners/ia3/layer.py @@ -43,10 +43,6 @@ def __init__( self.out_features = out_features self.is_feedforward = is_feedforward - @property - def merged(self) -> bool: - return bool(self.merged_adapters) - def update_layer(self, adapter_name, init_ia3_weights): # Actual trainable parameters if self.is_feedforward: diff --git a/src/peft/tuners/lora/layer.py b/src/peft/tuners/lora/layer.py index df6083dd8e..0eb2efa2f2 100644 --- a/src/peft/tuners/lora/layer.py +++ b/src/peft/tuners/lora/layer.py @@ -46,10 +46,6 @@ def __init__(self, in_features: int, out_features: int, **kwargs): self.out_features = out_features self.kwargs = kwargs - @property - def merged(self) -> bool: - return bool(self.merged_adapters) - def _init_empty_weights(self, cls, *args, **kwargs) -> None: # A helper method that allows to initialize the layer of the given class without spending time to initialize the # model weights. The implementation is inspired by diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py index e748aaba35..f78531bd49 100644 --- a/src/peft/tuners/lycoris_utils.py +++ b/src/peft/tuners/lycoris_utils.py @@ -79,10 +79,6 @@ def __init__(self): def _available_adapters(self) -> Set[str]: ... - @property - def merged(self) -> bool: - return bool(self.merged_adapters) - def _init_empty_weights(self, cls, *args, **kwargs) -> None: # A helper method that allows to initialize the layer of the given class without spending time to initialize the # model weights. The implementation is inspired by diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py index 10f8754296..4f446fd015 100644 --- a/src/peft/tuners/tuners_utils.py +++ b/src/peft/tuners/tuners_utils.py @@ -281,12 +281,19 @@ class BaseTunerLayer(ABC): # the currently active adapter(s) _active_adapter: str | list[str] = "default" + # List all merged adapters + merged_adapters: list[str] = [] + def merge(self, *args) -> None: raise NotImplementedError def unmerge(self, *args) -> None: raise NotImplementedError + @property + def merged(self) -> bool: + return bool(self.merged_adapters) + @property def disable_adapters(self) -> bool: # use a property to ensure that disable_adapters is not set directly, instead use the enable_adapters method From 25077b236a30bba9af57d231fbd0ced03ec83275 Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Fri, 27 Oct 2023 14:00:12 +0300 Subject: [PATCH 30/33] Updated check_target_modules docstring, increased test coverage --- src/peft/tuners/tuners_utils.py | 2 +- tests/test_custom_models.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py index 4f446fd015..9307e5fc12 100644 --- a/src/peft/tuners/tuners_utils.py +++ b/src/peft/tuners/tuners_utils.py @@ -356,7 +356,7 @@ def check_target_module_exists(config, key: str) -> bool | re.Match[str] | None: """A helper method to check if the passed module's key name matches any of the target modules in the adapter_config. Args: - config (`LoraConfig` | `LoHaConfig`): A config to match target modules from + config (`LoraConfig` | `LycorisConfig`): A config to match target modules from key (`str`): A key to search any matches in config Returns: diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py index 84064e65ce..83574757ea 100644 --- a/tests/test_custom_models.py +++ b/tests/test_custom_models.py @@ -142,6 +142,7 @@ "module_dropout": 0.1, }, ), + ("Vanilla MLP 7 LOHA", "MLP", LoHaConfig, {"target_modules": "lin0", "rank_dropout": 0.5}), ("Conv2d 1 LOHA", "Conv2d", LoHaConfig, {"target_modules": ["conv2d"]}), ("Conv2d 2 LOHA", "Conv2d", LoHaConfig, {"target_modules": ["conv2d", "lin0"]}), ("Conv2d 3 LOHA", "Conv2d", LoHaConfig, {"target_modules": ["conv2d"], "use_effective_conv2d": True}), @@ -162,6 +163,8 @@ "module_dropout": 0.1, }, ), + ("Vanilla MLP 7 LOKR", "MLP", LoKrConfig, {"target_modules": "lin0", "rank_dropout": 0.5}), + ("Vanilla MLP 8 LOKR", "MLP", LoKrConfig, {"target_modules": "lin0", "decompose_both": True, "r": 1, "alpha": 1}), ("Conv2d 1 LOKR", "Conv2d", LoKrConfig, {"target_modules": ["conv2d"]}), ("Conv2d 2 LOKR", "Conv2d", LoKrConfig, {"target_modules": ["conv2d", "lin0"]}), ("Conv2d 3 LOKR", "Conv2d", LoKrConfig, {"target_modules": ["conv2d"], "use_effective_conv2d": True}), From 9f05024586fa55224f11dc55c7912100cecedb13 Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Fri, 27 Oct 2023 17:00:59 +0300 Subject: [PATCH 31/33] Added delete_adapter method for LoKr and LoHa --- src/peft/tuners/lycoris_utils.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py index f78531bd49..9a1d63d969 100644 --- a/src/peft/tuners/lycoris_utils.py +++ b/src/peft/tuners/lycoris_utils.py @@ -378,3 +378,30 @@ def set_adapter(self, adapter_name): warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.") module.unmerge() module.set_adapter(adapter_name) + + def delete_adapter(self, adapter_name: str): + """ + Deletes an existing adapter. + + Args: + adapter_name (`str`): Name of the adapter to be deleted. + """ + if adapter_name not in list(self.peft_config.keys()): + raise ValueError(f"Adapter {adapter_name} does not exist") + del self.peft_config[adapter_name] + + key_list = [key for key, _ in self.model.named_modules() if "lora" not in key] + for key in key_list: + _, target, _ = _get_submodules(self.model, key) + if isinstance(target, LycorisLayer): + for attr in target.adapter_layer_names: + if adapter_name in getattr(target, attr): + getattr(target, attr).pop(adapter_name) + if adapter_name in target.active_adapters: + resetting_active_adapter = ( + list(self.peft_config.keys())[0] if len(self.peft_config) > 0 else "default" + ) + warnings.warn( + f"Adapter {adapter_name} was active which is now deleted. Setting active adapter to {resetting_active_adapter}. " + ) + target.set_adapter(resetting_active_adapter) From f6e73352197d309382b964c28fa764d8caec2a2f Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Fri, 27 Oct 2023 17:22:17 +0300 Subject: [PATCH 32/33] Fixed typo in delete_adapter --- src/peft/tuners/lycoris_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py index 9a1d63d969..8d3fb7481b 100644 --- a/src/peft/tuners/lycoris_utils.py +++ b/src/peft/tuners/lycoris_utils.py @@ -390,7 +390,7 @@ def delete_adapter(self, adapter_name: str): raise ValueError(f"Adapter {adapter_name} does not exist") del self.peft_config[adapter_name] - key_list = [key for key, _ in self.model.named_modules() if "lora" not in key] + key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key] for key in key_list: _, target, _ = _get_submodules(self.model, key) if isinstance(target, LycorisLayer): From 69ae74c8114df2f2fc698c870fcd59d27725b6d4 Mon Sep 17 00:00:00 2001 From: Alexander Kovalchuk Date: Sun, 29 Oct 2023 11:11:37 +0300 Subject: [PATCH 33/33] Provide default value for --- src/peft/tuners/loha/layer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/peft/tuners/loha/layer.py b/src/peft/tuners/loha/layer.py index 4cad9b9b18..26f57ac681 100644 --- a/src/peft/tuners/loha/layer.py +++ b/src/peft/tuners/loha/layer.py @@ -83,7 +83,7 @@ def update_layer( rank_dropout: float, module_dropout: float, init_weights: bool, - use_effective_conv2d: bool, + use_effective_conv2d: bool = False, **kwargs, ) -> None: """Internal function to create loha adapter @@ -95,7 +95,8 @@ def update_layer( rank_dropout (`float`): The dropout probability for rank dimension during training. module_dropout (`float`): The dropout probability for disabling adapter during training. init_weights (`bool`): Whether to initialize weights. - use_effective_conv2d (`bool`): Use parameter effective decomposition for Conv2d with ksize > 1. + use_effective_conv2d (`bool`, *optional*, defaults to `False`): + Use parameter effective decomposition for Conv2d with ksize > 1. """ self.r[adapter_name] = r