Bump transformers to 4.54.1

jackzhxng · jackzhxng · commit 3a960bb88d2b · 2025-07-31T17:55:24.000-07:00
diff --git a/install_dev.py b/install_dev.py
@@ -34,7 +34,7 @@ def install_dep_from_source():
             "-m",
             "pip",
             "install",
-            "git+https://github.com/huggingface/transformers@896e9cea1ade521b2648f4798218550f6c72190c#egg=transformers",  # 4.53.1
+            "git+https://github.com/huggingface/transformers@9c641dc16154964e5ffc0c13e9ec6aaffa295ed6#egg=transformers",  # 4.54.1
         ]
     )
     subprocess.check_call(
diff --git a/optimum/executorch/attentions/custom_kv_cache.py b/optimum/executorch/attentions/custom_kv_cache.py
@@ -54,12 +54,12 @@ def __init__(
 
         # Create a list of CustomKVCache instances, one per layer
         self.kv_cache = torch.nn.ModuleList()
-        for _ in range(config.num_hidden_layers):
+        for layer in self.layers:
             layer_cache = CustomKVCache(
-                max_batch_size=self.max_batch_size,
-                max_context_length=self.max_cache_len,
-                n_heads=self.num_key_value_heads,
-                head_dim=self.head_dim,
+                max_batch_size=layer.max_batch_size,
+                max_context_length=layer.max_cache_len,
+                n_heads=layer.num_heads,
+                head_dim=layer.head_dim,
                 dtype=dtype,
             )
             self.kv_cache.append(layer_cache)
@@ -202,32 +202,29 @@ def __init__(
             layer_device_map=layer_device_map,
         )
 
-        # make sure layer_device_map is none
         assert layer_device_map is None
         assert device is None or device == "cpu", "Device must be None or 'cpu'"
 
         self.cache_position = None
-        # Create a list of cache instances, one per layer
-        # Use CustomKVCache for global layers and CustomRingKVCache for sliding window layers
+        # Create a list of cache instances, one per layer.
+        # Use CustomKVCache for global layers and CustomRingKVCache for sliding window layers.
         self.kv_cache = torch.nn.ModuleList()
-        for layer_idx in range(config.num_hidden_layers):
-            # newer version of transfomer has is_sliding defined
-            # for HybridCache
-            if self.is_sliding[layer_idx]:
+        for layer in self.layers:
+            if layer.is_sliding():
                 # This is a sliding window layer
                 layer_cache = CustomRingKVCache(
-                    max_batch_size=self.max_batch_size,
-                    max_context_length=self.sliding_window_len,
-                    n_heads=self.num_key_value_heads,
-                    head_dim=self.head_dim,
+                    max_batch_size=layer.max_batch_size,
+                    max_context_length=layer.max_cache_len,
+                    n_heads=layer.num_heads,
+                    head_dim=layer.head_dim,
                     dtype=dtype,
                 )
             else:
                 layer_cache = CustomKVCache(
-                    max_batch_size=self.max_batch_size,
-                    max_context_length=self.max_cache_len,
-                    n_heads=self.num_key_value_heads,
-                    head_dim=self.head_dim,
+                    max_batch_size=layer.max_batch_size,
+                    max_context_length=layer.max_cache_len,
+                    n_heads=layer.num_heads,
+                    head_dim=layer.head_dim,
                     dtype=dtype,
                 )
             self.kv_cache.append(layer_cache)
@@ -284,7 +281,7 @@ def get_seq_length(self, layer_idx: Optional[int] = 0) -> int:
 
         # For CustomRingKVCache, we need to handle the sequence length differently
         layer_cache = self.kv_cache[layer_idx]
-        if self.is_sliding[layer_idx]:
+        if self.layers[layer_idx].is_sliding():
             # CustomRingKVCache cache_position_manager which
             # maintains cache position for each slot in the kv cache
             # we return the max position + 1 to indicate max position
@@ -308,7 +305,7 @@ def get_layer_cache(self, layer_idx: int):
 
 def replace_with_et_custom_kv_cache(module, config, generation_config, cache_dtype):
     """
-    Replace all KV caches in the module with ETCustomStaticCache.
+    Replace all KV caches in the module with ETCustomStaticCache or ETCustomHybridCache.
     This modifies the model in place.
 
     Args:
@@ -342,18 +339,18 @@ def _replace_with_et_custom_kv_cache(module, config, generation_config, cache_dt
         if getattr(module, "replace_cache", None) is not None:
             static_cache = ETCustomStaticCache(
                 config=config,
-                max_batch_size=generation_config.cache_config.batch_size,
-                max_cache_len=generation_config.cache_config.max_cache_len,
-                device=generation_config.cache_config.device,
+                max_batch_size=generation_config.cache_config.get("batch_size"),
+                max_cache_len=generation_config.cache_config.get("max_cache_len"),
+                device=generation_config.cache_config.get("device"),
                 dtype=cache_dtype,
             )
             module.replace_cache(static_cache)
         else:
             module.static_cache = ETCustomStaticCache(
                 config=config,
-                max_batch_size=generation_config.cache_config.batch_size,
-                max_cache_len=generation_config.cache_config.max_cache_len,
-                device=generation_config.cache_config.device,
+                max_batch_size=generation_config.cache_config.get("batch_size"),
+                max_cache_len=generation_config.cache_config.get("max_cache_len"),
+                device=generation_config.cache_config.get("device"),
                 dtype=cache_dtype,
             )
             # Dont know why we need to this even though
@@ -370,25 +367,25 @@ def _replace_with_et_custom_kv_cache(module, config, generation_config, cache_dt
         if getattr(module, "replace_cache", None) is not None:
             hybrid_cache = ETCustomHybridCache(
                 config=config,
-                max_batch_size=generation_config.cache_config.batch_size,
-                max_cache_len=generation_config.cache_config.max_cache_len,
-                device=generation_config.cache_config.device,
+                max_batch_size=generation_config.cache_config.get("batch_size"),
+                max_cache_len=generation_config.cache_config.get("max_cache_len"),
+                device=generation_config.cache_config.get("device"),
                 dtype=cache_dtype,
             )
             module.replace_cache(hybrid_cache)
         else:
             module.cache = ETCustomHybridCache(
                 config=config,
-                max_batch_size=generation_config.cache_config.batch_size,
-                max_cache_len=generation_config.cache_config.max_cache_len,
-                device=generation_config.cache_config.device,
+                max_batch_size=generation_config.cache_config.get("batch_size"),
+                max_cache_len=generation_config.cache_config.get("max_cache_len"),
+                device=generation_config.cache_config.get("device"),
                 dtype=cache_dtype,
             )
             # Register cache attributes for each layer
             for i in range(len(module.cache.kv_cache)):
                 setattr(module, f"key_cache_{i}", module.cache.kv_cache[i].k_cache)
                 setattr(module, f"value_cache_{i}", module.cache.kv_cache[i].v_cache)
-                if module.cache.is_sliding[i]:
+                if module.cache.layers[i].is_sliding():
                     # Register cache_positions as buffer for sliding window layers
                     # This prevents it from being traced as a constant
                     module.register_buffer(
diff --git a/optimum/exporters/executorch/integrations.py b/optimum/exporters/executorch/integrations.py
@@ -395,8 +395,8 @@ def _export_decoder(self, decoder_input_ids, encoder_hidden_states, cache_positi
         wrapped_decoder = (
             Seq2SeqLMDecoderExportableModuleWithStaticCache(
                 model=self.full_model,
-                max_static_cache_length=self.generation_config.cache_config.max_cache_len,
-                batch_size=self.generation_config.cache_config.batch_size,
+                max_static_cache_length=self.generation_config.cache_config.get("max_cache_len"),
+                batch_size=self.generation_config.cache_config.get("batch_size"),
             )
             .to("cpu")
             .eval()
diff --git a/optimum/exporters/executorch/utils.py b/optimum/exporters/executorch/utils.py
@@ -53,8 +53,8 @@ def save_config_to_constant_methods(
         # Check for cache_config and its attributes
         cache_config = getattr(generation_config, "cache_config", None)
         if cache_config is not None:
-            max_batch_size = getattr(cache_config, "batch_size", None)
-            max_seq_len = getattr(cache_config, "max_cache_len", None)
+            max_batch_size = cache_config.get("batch_size")
+            max_seq_len = cache_config.get("max_cache_len")
 
             if max_batch_size is not None:
                 metadata["get_max_batch_size"] = max_batch_size
diff --git a/setup.py b/setup.py
@@ -14,7 +14,7 @@
 INSTALL_REQUIRE = [
     "optimum~=1.24",
     "executorch>=0.6.0",
-    "transformers==4.51.3",
+    "transformers==4.54.1",
 ]
 
 TESTS_REQUIRE = [

Original file line number	Diff line number	Diff line change
`@@ -34,7 +34,7 @@ def install_dep_from_source():`
`34`	`34`	`"-m",`
`35`	`35`	`"pip",`
`36`	`36`	`"install",`
`37`		`- "git+https://github.com/huggingface/transformers@896e9cea1ade521b2648f4798218550f6c72190c#egg=transformers", # 4.53.1`
	`37`	`+ "git+https://github.com/huggingface/transformers@9c641dc16154964e5ffc0c13e9ec6aaffa295ed6#egg=transformers", # 4.54.1`
`38`	`38`	`]`
`39`	`39`	`)`
`40`	`40`	`subprocess.check_call(`
Original file line number	Diff line number	Diff line change
`@@ -14,7 +14,7 @@`
`14`	`14`	`INSTALL_REQUIRE = [`
`15`	`15`	`"optimum~=1.24",`
`16`	`16`	`"executorch>=0.6.0",`
`17`		`- "transformers==4.51.3",`
	`17`	`+ "transformers==4.54.1",`
`18`	`18`	`]`
`19`	`19`
`20`	`20`	`TESTS_REQUIRE = [`