From 1780b3a1ebaf867090432ee8bce3529261c862cb Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Tue, 30 Jul 2024 13:32:31 +0900 Subject: [PATCH 1/4] fix: lora+: include lr in optimizer kwargs --- src/peft/optimizers/loraplus.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/peft/optimizers/loraplus.py b/src/peft/optimizers/loraplus.py index 2bc7e44dc8..77e20d6707 100644 --- a/src/peft/optimizers/loraplus.py +++ b/src/peft/optimizers/loraplus.py @@ -30,7 +30,7 @@ def create_loraplus_optimizer( - model: PeftModel, optimizer_cls: type[Optimizer], *, lr: float, loraplus_lr_ratio: float, **kwargs + model: PeftModel, optimizer_cls: type[Optimizer], *, loraplus_lr_ratio: float, **kwargs ) -> Optimizer: """ Creates a LoraPlus optimizer. @@ -82,6 +82,7 @@ def create_loraplus_optimizer( else: param_groups["groupA"][name] = param + lr = kwargs['lr'] loraplus_weight_decay = kwargs.pop("loraplus_weight_decay", 0.0) loraplus_lr_embedding = kwargs.pop("loraplus_lr_embedding", 1e-6) From de4f83adc3bd2f368ede602207eae22c7aa7a47e Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Tue, 30 Jul 2024 13:34:59 +0900 Subject: [PATCH 2/4] remove lr from args list as it is now in kwargs --- src/peft/optimizers/loraplus.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/peft/optimizers/loraplus.py b/src/peft/optimizers/loraplus.py index 77e20d6707..3278d87fba 100644 --- a/src/peft/optimizers/loraplus.py +++ b/src/peft/optimizers/loraplus.py @@ -42,7 +42,6 @@ def create_loraplus_optimizer( Args: model (`torch.nn.Module`): The model to be optimized. optimizer_cls (`torch.optim.Optimizer`): The optimizer class to be used. - lr (`float`): The learning rate to be used for the optimizer. loraplus_lr_ratio (`float`): The ratio of learning ηB/ηA where ηA (lr) is passed in as the optimizer learning rate. Should be ≥1. Should be set in tandem with the optimizer learning rate (lr); should be larger when the task is more difficult @@ -82,7 +81,7 @@ def create_loraplus_optimizer( else: param_groups["groupA"][name] = param - lr = kwargs['lr'] + lr = kwargs["lr"] loraplus_weight_decay = kwargs.pop("loraplus_weight_decay", 0.0) loraplus_lr_embedding = kwargs.pop("loraplus_lr_embedding", 1e-6) From a36a87258eb28278d493cdec160c6fdfabd17f49 Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Tue, 30 Jul 2024 19:04:24 +0900 Subject: [PATCH 3/4] put lr into kwargs instead --- src/peft/optimizers/loraplus.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/peft/optimizers/loraplus.py b/src/peft/optimizers/loraplus.py index 3278d87fba..21d334a75c 100644 --- a/src/peft/optimizers/loraplus.py +++ b/src/peft/optimizers/loraplus.py @@ -30,7 +30,7 @@ def create_loraplus_optimizer( - model: PeftModel, optimizer_cls: type[Optimizer], *, loraplus_lr_ratio: float, **kwargs + model: PeftModel, optimizer_cls: type[Optimizer], *, lr: float, loraplus_lr_ratio: float, **kwargs ) -> Optimizer: """ Creates a LoraPlus optimizer. @@ -81,7 +81,7 @@ def create_loraplus_optimizer( else: param_groups["groupA"][name] = param - lr = kwargs["lr"] + kwargs["lr"] = lr loraplus_weight_decay = kwargs.pop("loraplus_weight_decay", 0.0) loraplus_lr_embedding = kwargs.pop("loraplus_lr_embedding", 1e-6) From 0a07313e2ba1196e05b82a40be7ade3ea29a94af Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Tue, 30 Jul 2024 19:05:18 +0900 Subject: [PATCH 4/4] restore comment --- src/peft/optimizers/loraplus.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/peft/optimizers/loraplus.py b/src/peft/optimizers/loraplus.py index 21d334a75c..0c4bd7d1e0 100644 --- a/src/peft/optimizers/loraplus.py +++ b/src/peft/optimizers/loraplus.py @@ -42,6 +42,7 @@ def create_loraplus_optimizer( Args: model (`torch.nn.Module`): The model to be optimized. optimizer_cls (`torch.optim.Optimizer`): The optimizer class to be used. + lr (`float`): The learning rate to be used for the optimizer. loraplus_lr_ratio (`float`): The ratio of learning ηB/ηA where ηA (lr) is passed in as the optimizer learning rate. Should be ≥1. Should be set in tandem with the optimizer learning rate (lr); should be larger when the task is more difficult