huggingface · BenjaminBossan · Sep 17, 2024 · Aug 5, 2024 · Aug 6, 2024 · Aug 24, 2024
diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py
@@ -46,6 +46,7 @@
 )
 from peft.utils.merge_utils import dare_linear, dare_ties, magnitude_prune, task_arithmetic, ties
 
+from ...utils.other import ModulesToSaveWrapper
 from .aqlm import dispatch_aqlm
 from .awq import dispatch_awq
 from .config import LoraConfig
@@ -432,7 +433,7 @@ def _enable_peft_forward_hooks(self, *args, **kwargs):
 
         hook_handles = []
         for module in self.modules():
-            if isinstance(module, LoraLayer):
+            if isinstance(module, LoraLayer) or isinstance(module, ModulesToSaveWrapper):
                 pre_forward = partial(_adapter_names_pre_forward_hook, adapter_names=adapter_names)
                 handle = module.register_forward_pre_hook(pre_forward, with_kwargs=True)
                 hook_handles.append(handle)

diff --git a/src/peft/utils/other.py b/src/peft/utils/other.py
@@ -261,7 +261,27 @@ def _create_new_hook(self, old_hook):
     def forward(self, *args, **kwargs):
         if self.disable_adapters or (self.active_adapter not in self.modules_to_save):
             return self.original_module(*args, **kwargs)
-        return self.modules_to_save[self.active_adapter](*args, **kwargs)
+        if "adapter_names" not in kwargs.keys():
+            return self.modules_to_save[self.active_adapter](*args, **kwargs)
+        # Batches requests with similar LoRAs into microbatches
 def _mixed_batch_forward( 
 def _check_forward_args(self, x, *args, **kwargs): 
 def _mixed_batch_forward( 
 def _check_forward_args(self, x, *args, **kwargs): 
+        adapter_names = kwargs["adapter_names"]
+        kwargs = {}
+        batch = args[0] # Get the batch dimension
+        unique_adapters = set(adapter_names)
+        sub_batch_indices_list = []
+        for adapter in unique_adapters:
+            sub_batch_indices_list.append(
+                [index for index, item in enumerate(adapter_names) if item == adapter]
+            )
+
+        results = [0 for i in range(len(batch))]
+        for i, active_adapter in enumerate(unique_adapters):
+            sub_batch = batch[sub_batch_indices_list[i]]
+            output = self.modules_to_save[active_adapter](*(sub_batch,), **kwargs)
+            for index, j in enumerate(sub_batch_indices_list[i]):
+                results[j] = output[index]
+        return torch.stack(results)
+
 
     def enable_adapters(self, enabled: bool):
         """Toggle the enabling and disabling of adapters