From 435e518a10fd42f63625f676d73d497e5a28a591 Mon Sep 17 00:00:00 2001 From: Zeju Date: Wed, 7 Aug 2024 13:30:01 +0200 Subject: [PATCH 1/9] fixing non-contiguous tensor when saving the model after merge_and_unload() --- src/peft/tuners/boft/layer.py | 12 ++++++------ src/peft/tuners/boft/model.py | 33 +++++++++++++++++++++++---------- 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/src/peft/tuners/boft/layer.py b/src/peft/tuners/boft/layer.py index 97a1baaa58..52c0b847f4 100644 --- a/src/peft/tuners/boft/layer.py +++ b/src/peft/tuners/boft/layer.py @@ -510,9 +510,9 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = N # because of the copy operation. orig_weight = base_layer.weight.data.clone() butterfly_oft_mat, boft_s = self.get_delta_weight(active_adapter) - orig_weight = torch.transpose(orig_weight, 0, 1) - orig_weight = torch.mm(butterfly_oft_mat, orig_weight) - orig_weight = torch.transpose(orig_weight, 0, 1) + orig_weight = torch.transpose(orig_weight, 0, 1).contiguous() + orig_weight = torch.mm(butterfly_oft_mat, orig_weight).contiguous() + orig_weight = torch.transpose(orig_weight, 0, 1).contiguous() orig_weight = orig_weight * boft_s if not torch.isfinite(orig_weight).all(): @@ -524,9 +524,9 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = N else: butterfly_oft_mat, boft_s = self.get_delta_weight(active_adapter) orig_weight = base_layer.weight.data.clone() - orig_weight = torch.transpose(orig_weight, 0, 1) - orig_weight = torch.mm(butterfly_oft_mat, orig_weight) - orig_weight = torch.transpose(orig_weight, 0, 1) + orig_weight = torch.transpose(orig_weight, 0, 1).contiguous() + orig_weight = torch.mm(butterfly_oft_mat, orig_weight).contiguous() + orig_weight = torch.transpose(orig_weight, 0, 1).contiguous() orig_weight = orig_weight * boft_s self.base_layer.weight.data = orig_weight diff --git a/src/peft/tuners/boft/model.py b/src/peft/tuners/boft/model.py index 0cb3a92915..e4664db1dc 100644 --- a/src/peft/tuners/boft/model.py +++ b/src/peft/tuners/boft/model.py @@ -24,7 +24,12 @@ from torch import nn from tqdm import tqdm -from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists +from peft.tuners.tuners_utils import ( + BaseTuner, + BaseTunerLayer, + check_target_module_exists, + onload_layer, +) from peft.utils import ( TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING, ModulesToSaveWrapper, @@ -265,7 +270,9 @@ def _unload_and_optionally_merge( safe_merge: bool = False, adapter_names: Optional[List[str]] = None, ): - self._unloading_checks(adapter_names) + if merge: + self._check_merge_allowed() + key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key] desc = "Unloading " + ("and merging " if merge else "") + "model" for key in tqdm(key_list, disable=not progressbar, desc=desc): @@ -273,14 +280,20 @@ def _unload_and_optionally_merge( parent, target, target_name = _get_submodules(self.model, key) except AttributeError: continue - - if hasattr(target, "base_layer"): - if merge: - target.merge(safe_merge=safe_merge, adapter_names=adapter_names) - self._replace_module(parent, target_name, target.get_base_layer(), target) - elif isinstance(target, ModulesToSaveWrapper): - # save any additional trainable modules part of `modules_to_save` - setattr(parent, target_name, target.modules_to_save[target.active_adapter]) + with onload_layer(target): + if hasattr(target, "base_layer"): + if merge: + target.merge(safe_merge=safe_merge, adapter_names=adapter_names) + self._replace_module(parent, target_name, target.get_base_layer(), target) + elif isinstance(target, ModulesToSaveWrapper): + # save any additional trainable modules part of `modules_to_save` + new_module = target.modules_to_save[target.active_adapter] + if hasattr(new_module, "base_layer"): + # check if the module is itself a tuner layer + if merge: + new_module.merge(safe_merge=safe_merge, adapter_names=adapter_names) + new_module = new_module.get_base_layer() + setattr(parent, target_name, new_module) return self.model From cac49e61e1827a34b30436d487db8c61fb88102d Mon Sep 17 00:00:00 2001 From: Zeju Date: Wed, 7 Aug 2024 16:02:53 +0200 Subject: [PATCH 2/9] fixing non contigous error and adding unit test in testing_common.py --- src/peft/tuners/boft/layer.py | 20 ++++++++++---------- tests/testing_common.py | 4 ++++ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/peft/tuners/boft/layer.py b/src/peft/tuners/boft/layer.py index 52c0b847f4..604f83fcae 100644 --- a/src/peft/tuners/boft/layer.py +++ b/src/peft/tuners/boft/layer.py @@ -510,9 +510,9 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = N # because of the copy operation. orig_weight = base_layer.weight.data.clone() butterfly_oft_mat, boft_s = self.get_delta_weight(active_adapter) - orig_weight = torch.transpose(orig_weight, 0, 1).contiguous() - orig_weight = torch.mm(butterfly_oft_mat, orig_weight).contiguous() - orig_weight = torch.transpose(orig_weight, 0, 1).contiguous() + orig_weight = torch.transpose(orig_weight, 0, 1) + orig_weight = torch.mm(butterfly_oft_mat, orig_weight) + orig_weight = torch.transpose(orig_weight, 0, 1) orig_weight = orig_weight * boft_s if not torch.isfinite(orig_weight).all(): @@ -520,16 +520,16 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = N f"NaNs detected in the merged weights. The adapter {active_adapter} seems to be broken" ) - self.base_layer.weight.data = orig_weight + self.base_layer.weight.data = orig_weight.contiguous() else: butterfly_oft_mat, boft_s = self.get_delta_weight(active_adapter) orig_weight = base_layer.weight.data.clone() - orig_weight = torch.transpose(orig_weight, 0, 1).contiguous() - orig_weight = torch.mm(butterfly_oft_mat, orig_weight).contiguous() - orig_weight = torch.transpose(orig_weight, 0, 1).contiguous() + orig_weight = torch.transpose(orig_weight, 0, 1) + orig_weight = torch.mm(butterfly_oft_mat, orig_weight) + orig_weight = torch.transpose(orig_weight, 0, 1) orig_weight = orig_weight * boft_s - self.base_layer.weight.data = orig_weight + self.base_layer.weight.data = orig_weight.contiguous() self.merged_adapters.append(active_adapter) @@ -817,7 +817,7 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = N self.out_features, self.in_features, base_layer.kernel_size[0], base_layer.kernel_size[0] ) - self.base_layer.weight.data = orig_weight + self.base_layer.weight.data = orig_weight.contiguous() else: butterfly_oft_mat, boft_s = self.get_delta_weight(active_adapter) @@ -831,7 +831,7 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = N self.out_features, self.in_features, base_layer.kernel_size[0], base_layer.kernel_size[0] ) - self.base_layer.weight.data = orig_weight + self.base_layer.weight.data = orig_weight.contiguous() self.merged_adapters.append(active_adapter) diff --git a/tests/testing_common.py b/tests/testing_common.py index 9168b54b5a..4ed7befb76 100644 --- a/tests/testing_common.py +++ b/tests/testing_common.py @@ -763,6 +763,10 @@ def _test_safe_merge(self, model_id, config_cls, config_kwargs): # check that the logits are the same after unloading assert torch.allclose(logits_peft, logits_unloaded, atol=atol, rtol=rtol) + # serializing with safetensors works + from safetensors.torch import save_file + save_file(model_unloaded.state_dict(), os.path.join(tmp_dirname, "model.safetensors")) + def _test_mixed_adapter_batches(self, model_id, config_cls, config_kwargs): # Test for mixing different adapters in a single batch by passing the adapter_names argument if config_cls not in (LoraConfig,): From e3e2f79be5b7fd674d535e20fd45428c8739df05 Mon Sep 17 00:00:00 2001 From: Zeju Date: Wed, 7 Aug 2024 16:11:37 +0200 Subject: [PATCH 3/9] update oft / fixing non contigous when saving --- src/peft/tuners/oft/layer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/peft/tuners/oft/layer.py b/src/peft/tuners/oft/layer.py index 8f973bda8b..965f2e83ff 100644 --- a/src/peft/tuners/oft/layer.py +++ b/src/peft/tuners/oft/layer.py @@ -171,7 +171,7 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[List[str]] = N f"NaNs detected in the merged weights. The adapter {active_adapter} seems to be broken" ) - base_layer.weight.data = new_weights + base_layer.weight.data = new_weights.contiguous() self.merged_adapters.append(active_adapter) def unmerge(self) -> None: @@ -215,7 +215,7 @@ def unmerge(self) -> None: base_layer.kernel_size[1], ] ) - base_layer.weight.data = orig_weights + base_layer.weight.data = orig_weights.contiguous() def get_delta_weight(self, adapter_name: str) -> torch.Tensor: rank = self.r[adapter_name] From ac319b0f33aa52e5b8f0ea728c93dc3e20658c02 Mon Sep 17 00:00:00 2001 From: Zeju Date: Wed, 7 Aug 2024 16:18:25 +0200 Subject: [PATCH 4/9] update unit test in testing_common for save --- tests/testing_common.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/testing_common.py b/tests/testing_common.py index 4ed7befb76..c3c39eee73 100644 --- a/tests/testing_common.py +++ b/tests/testing_common.py @@ -763,9 +763,14 @@ def _test_safe_merge(self, model_id, config_cls, config_kwargs): # check that the logits are the same after unloading assert torch.allclose(logits_peft, logits_unloaded, atol=atol, rtol=rtol) - # serializing with safetensors works from safetensors.torch import save_file - save_file(model_unloaded.state_dict(), os.path.join(tmp_dirname, "model.safetensors")) + # serializing works without errors + with tempfile.TemporaryDirectory() as tmp_dirname: + # serializing with torch.save works + torch.save(model_unloaded.state_dict(), os.path.join(tmp_dirname, "model.bin")) + + # serializing with safetensors works + save_file(model_unloaded.state_dict(), os.path.join(tmp_dirname, "model.safetensors")) def _test_mixed_adapter_batches(self, model_id, config_cls, config_kwargs): # Test for mixing different adapters in a single batch by passing the adapter_names argument From 2758cf9cc9c77657f9c04972e97d43d8c5736217 Mon Sep 17 00:00:00 2001 From: Zeju Date: Wed, 7 Aug 2024 16:20:31 +0200 Subject: [PATCH 5/9] update unit test in testing_common for save --- tests/testing_common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testing_common.py b/tests/testing_common.py index c3c39eee73..2ed3a70450 100644 --- a/tests/testing_common.py +++ b/tests/testing_common.py @@ -52,6 +52,7 @@ from peft.utils import _get_submodules, infer_device from .testing_utils import get_state_dict +from safetensors.torch import save_file CONFIG_TESTING_KWARGS = ( @@ -763,8 +764,7 @@ def _test_safe_merge(self, model_id, config_cls, config_kwargs): # check that the logits are the same after unloading assert torch.allclose(logits_peft, logits_unloaded, atol=atol, rtol=rtol) - from safetensors.torch import save_file - # serializing works without errors + # Ensure that serializing with safetensors works, there was an error when weights were not contiguous with tempfile.TemporaryDirectory() as tmp_dirname: # serializing with torch.save works torch.save(model_unloaded.state_dict(), os.path.join(tmp_dirname, "model.bin")) From 746bc4c45f1c374594ee0c95dd6bcf351e520a49 Mon Sep 17 00:00:00 2001 From: Zeju Date: Wed, 7 Aug 2024 16:32:43 +0200 Subject: [PATCH 6/9] changing code after running make style --- examples/boft_controlnet/utils/dataset.py | 3 ++- src/peft/tuners/boft/model.py | 6 +++--- tests/testing_common.py | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/boft_controlnet/utils/dataset.py b/examples/boft_controlnet/utils/dataset.py index 1de3c8cc36..de10d16a95 100644 --- a/examples/boft_controlnet/utils/dataset.py +++ b/examples/boft_controlnet/utils/dataset.py @@ -2,13 +2,14 @@ import numpy as np import torch -import wandb from datasets import load_dataset from diffusers import DDIMScheduler from PIL import Image from torchvision import transforms from utils.pipeline_controlnet import LightControlNetPipeline +import wandb + def image_grid(imgs, rows, cols): assert len(imgs) == rows * cols diff --git a/src/peft/tuners/boft/model.py b/src/peft/tuners/boft/model.py index e4664db1dc..11bd4c3ad2 100644 --- a/src/peft/tuners/boft/model.py +++ b/src/peft/tuners/boft/model.py @@ -25,8 +25,8 @@ from tqdm import tqdm from peft.tuners.tuners_utils import ( - BaseTuner, - BaseTunerLayer, + BaseTuner, + BaseTunerLayer, check_target_module_exists, onload_layer, ) @@ -272,7 +272,7 @@ def _unload_and_optionally_merge( ): if merge: self._check_merge_allowed() - + key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key] desc = "Unloading " + ("and merging " if merge else "") + "model" for key in tqdm(key_list, disable=not progressbar, desc=desc): diff --git a/tests/testing_common.py b/tests/testing_common.py index 2ed3a70450..5a530337d5 100644 --- a/tests/testing_common.py +++ b/tests/testing_common.py @@ -26,6 +26,7 @@ import yaml from diffusers import StableDiffusionPipeline from packaging import version +from safetensors.torch import save_file from peft import ( AdaLoraConfig, @@ -52,7 +53,6 @@ from peft.utils import _get_submodules, infer_device from .testing_utils import get_state_dict -from safetensors.torch import save_file CONFIG_TESTING_KWARGS = ( From 4909102368a242c00d4555e16a54db8ac90a790c Mon Sep 17 00:00:00 2001 From: Zeju Date: Wed, 7 Aug 2024 16:46:57 +0200 Subject: [PATCH 7/9] changing code after running make style --- examples/int8_training/peft_adalora_whisper_large_training.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/int8_training/peft_adalora_whisper_large_training.py b/examples/int8_training/peft_adalora_whisper_large_training.py index 0c8d02a237..0c00c89c85 100644 --- a/examples/int8_training/peft_adalora_whisper_large_training.py +++ b/examples/int8_training/peft_adalora_whisper_large_training.py @@ -18,7 +18,6 @@ import numpy as np import torch import transformers -import wandb # accelerate imports from accelerate import Accelerator, dispatch_model @@ -39,6 +38,8 @@ ) from transformers.models.whisper.english_normalizer import BasicTextNormalizer +import wandb + # peft imports from peft import AdaLoraConfig, LoraConfig, PeftModel, get_peft_model From 38ae778d405d0a29583ebabbdda070bc55be2332 Mon Sep 17 00:00:00 2001 From: Zeju Date: Wed, 7 Aug 2024 17:04:30 +0200 Subject: [PATCH 8/9] changing code to pass make quality --- examples/boft_controlnet/utils/dataset.py | 3 +-- examples/int8_training/peft_adalora_whisper_large_training.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/examples/boft_controlnet/utils/dataset.py b/examples/boft_controlnet/utils/dataset.py index de10d16a95..1de3c8cc36 100644 --- a/examples/boft_controlnet/utils/dataset.py +++ b/examples/boft_controlnet/utils/dataset.py @@ -2,14 +2,13 @@ import numpy as np import torch +import wandb from datasets import load_dataset from diffusers import DDIMScheduler from PIL import Image from torchvision import transforms from utils.pipeline_controlnet import LightControlNetPipeline -import wandb - def image_grid(imgs, rows, cols): assert len(imgs) == rows * cols diff --git a/examples/int8_training/peft_adalora_whisper_large_training.py b/examples/int8_training/peft_adalora_whisper_large_training.py index 0c00c89c85..0c8d02a237 100644 --- a/examples/int8_training/peft_adalora_whisper_large_training.py +++ b/examples/int8_training/peft_adalora_whisper_large_training.py @@ -18,6 +18,7 @@ import numpy as np import torch import transformers +import wandb # accelerate imports from accelerate import Accelerator, dispatch_model @@ -38,8 +39,6 @@ ) from transformers.models.whisper.english_normalizer import BasicTextNormalizer -import wandb - # peft imports from peft import AdaLoraConfig, LoraConfig, PeftModel, get_peft_model From f72d42afed50de3ddd6a9e7d4cc85e6ad0a3bed3 Mon Sep 17 00:00:00 2001 From: Zeju Date: Wed, 7 Aug 2024 17:17:03 +0200 Subject: [PATCH 9/9] changing code to pass make quality --- tests/testing_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testing_common.py b/tests/testing_common.py index 5a530337d5..da58337bc2 100644 --- a/tests/testing_common.py +++ b/tests/testing_common.py @@ -764,7 +764,7 @@ def _test_safe_merge(self, model_id, config_cls, config_kwargs): # check that the logits are the same after unloading assert torch.allclose(logits_peft, logits_unloaded, atol=atol, rtol=rtol) - # Ensure that serializing with safetensors works, there was an error when weights were not contiguous + # Ensure that serializing with safetensors works, there was an error when weights were not contiguous with tempfile.TemporaryDirectory() as tmp_dirname: # serializing with torch.save works torch.save(model_unloaded.state_dict(), os.path.join(tmp_dirname, "model.bin"))