Fix sliding window, print loaded ops

jackzhxng · jackzhxng · commit d89e18de05e6 · 2025-08-04T17:09:51.000-07:00
diff --git a/optimum/executorch/attentions/custom_kv_cache.py b/optimum/executorch/attentions/custom_kv_cache.py
@@ -210,7 +210,7 @@ def __init__(
         # Use CustomKVCache for global layers and CustomRingKVCache for sliding window layers.
         self.kv_cache = torch.nn.ModuleList()
         for layer in self.layers:
-            if layer.is_sliding():
+            if layer.is_sliding:
                 # This is a sliding window layer
                 layer_cache = CustomRingKVCache(
                     max_batch_size=layer.max_batch_size,
@@ -281,7 +281,7 @@ def get_seq_length(self, layer_idx: Optional[int] = 0) -> int:
 
         # For CustomRingKVCache, we need to handle the sequence length differently
         layer_cache = self.kv_cache[layer_idx]
-        if self.layers[layer_idx].is_sliding():
+        if self.layers[layer_idx].is_sliding:
             # CustomRingKVCache cache_position_manager which
             # maintains cache position for each slot in the kv cache
             # we return the max position + 1 to indicate max position
@@ -385,7 +385,7 @@ def _replace_with_et_custom_kv_cache(module, config, generation_config, cache_dt
             for i in range(len(module.cache.kv_cache)):
                 setattr(module, f"key_cache_{i}", module.cache.kv_cache[i].k_cache)
                 setattr(module, f"value_cache_{i}", module.cache.kv_cache[i].v_cache)
-                if module.cache.layers[i].is_sliding():
+                if module.cache.layers[i].is_sliding:
                     # Register cache_positions as buffer for sliding window layers
                     # This prevents it from being traced as a constant
                     module.register_buffer(
diff --git a/optimum/executorch/modeling.py b/optimum/executorch/modeling.py
@@ -186,6 +186,10 @@ def _from_pretrained(
             subfolder=subfolder,
             local_files_only=local_files_only,
         )
+        from executorch.extension.pybindings.portable_lib import _get_operator_names
+        print("----------- LOADED OPS ----------")
+        print('\n'.join(_get_operator_names()))
+        print("---------------------------------")
         model = _load_for_executorch(model_cache_path)
         logging.info(
             f"Loaded model from {model_cache_path} ({os.path.getsize(model_cache_path) / (1024 * 1024):.2f} MB)"