[ModulesToSave] add correct hook management for modules to save (#755)

younesbelkada · BenjaminBossan · web-flow · commit e27e8834436b · 2023-07-27T10:29:32.000+02:00
* add correct hook management for modules to save

* forward contrib credits from finding the solution

* add nice GPU tests

* quality

---------

Co-authored-by: BenjaminBossan &lt;BenjaminBossan@users.noreply.github.com&gt;
diff --git a/src/peft/utils/other.py b/src/peft/utils/other.py
@@ -12,12 +12,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import copy
+import inspect
 import os
 import warnings
 
+import accelerate
 import torch
+from accelerate.hooks import add_hook_to_module, remove_hook_from_module
 
 
 # Add or edit model card to have `library_name: peft`
@@ -140,6 +142,26 @@ def __init__(self, module_to_save, adapter_name):
     def update(self, adapter_name):
         self.modules_to_save.update(torch.nn.ModuleDict({adapter_name: copy.deepcopy(self.original_module)}))
 
+        if hasattr(self.modules_to_save[adapter_name], "_hf_hook"):
+            old_hook = self.modules_to_save[adapter_name]._hf_hook
+            new_hook = self._create_new_hook(old_hook)
+            remove_hook_from_module(self.modules_to_save[adapter_name])
+            add_hook_to_module(self.modules_to_save[adapter_name], new_hook)
+
+    def _create_new_hook(self, old_hook):
+        r"""
+        Creates a new hook based on the old hook. Use it only if you know what you are doing !
+        """
+        old_hook_cls = getattr(accelerate.hooks, old_hook.__class__.__name__)
+        old_hook_attr = old_hook.__dict__
+        filtered_old_hook_attr = {}
+        old_hook_init_signature = inspect.signature(old_hook_cls.__init__)
+        for k in old_hook_attr.keys():
+            if k in old_hook_init_signature.parameters:
+                filtered_old_hook_attr[k] = old_hook_attr[k]
+        new_hook = old_hook_cls(**filtered_old_hook_attr)
+        return new_hook
+
     def forward(self, *args, **kwargs):
         if self.disable_adapters or (self.active_adapter not in self.modules_to_save):
             return self.original_module(*args, **kwargs)
diff --git a/tests/test_common_gpu.py b/tests/test_common_gpu.py
@@ -20,6 +20,7 @@
 from transformers import (
     AutoModelForCausalLM,
     AutoModelForSeq2SeqLM,
+    AutoModelForSequenceClassification,
     AutoTokenizer,
     BitsAndBytesConfig,
     LlamaForCausalLM,
@@ -316,3 +317,40 @@ def test_print_4bit_expected(self):
 
         self.assertEqual(trainable_params, EXPECTED_TRAINABLE_PARAMS)
         self.assertEqual(all_params, EXPECTED_ALL_PARAMS)
+
+    @require_torch_gpu
+    @pytest.mark.single_gpu_tests
+    @require_bitsandbytes
+    def test_modules_to_save_grad(self):
+        model_id = "bigscience/bloomz-560m"
+        load_in_4bit = True
+
+        model = AutoModelForSequenceClassification.from_pretrained(
+            model_id,
+            load_in_4bit=load_in_4bit,
+            torch_dtype=torch.float32,
+        )
+
+        model = prepare_model_for_kbit_training(model)
+
+        config = LoraConfig(
+            r=16,
+            lora_alpha=16,
+            lora_dropout=0.05,
+            bias="none",
+            task_type="SEQ_CLS",
+        )
+
+        peft_model = get_peft_model(model, config)
+
+        lm_head = peft_model.base_model.model.score
+        original_module = lm_head.original_module
+        modules_to_save = lm_head.modules_to_save.default
+
+        inputs = torch.randn((1024))
+        o1 = lm_head(inputs)
+        o1.mean().backward()
+
+        self.assertTrue(modules_to_save.weight.requires_grad is True)
+        self.assertTrue(original_module.weight.grad is None)
+        self.assertTrue(modules_to_save.weight.grad is not None)