export cache_position dynamically

Guang Yang · Guang Yang · commit f735b5f8c035 · 2025-06-09T15:21:20.000-07:00
diff --git a/optimum/exporters/executorch/integrations.py b/optimum/exporters/executorch/integrations.py
@@ -75,7 +75,9 @@ def export(
                 )
 
             with torch.no_grad():
-                exported_program = exportable_module.export(example_input_ids, example_cache_position)
+                exported_program = exportable_module.export(
+                    example_input_ids, example_cache_position, dynamic_shapes, strict
+                )
                 # Apply RemoveTransposes pass to remove
                 # any back-to-back transpose ops that are not needed
                 # e.g. output of update_cache is transposed and
diff --git a/optimum/exporters/executorch/recipes/xnnpack.py b/optimum/exporters/executorch/recipes/xnnpack.py
@@ -97,10 +97,11 @@ def _lower_to_executorch(
         return et_progs
 
     # Make the sequence length dim to be dynamic in orfer to leverage parallel prefill in ExecuTorch runtime.
-    seq_length = 7
+    seq_length = 3
     input_ids = torch.zeros((1, seq_length), dtype=torch.long)
-    cache_position = torch.tensor([0], dtype=torch.long)
-    dynamic_shapes = {"input_ids": {1: torch.export.Dim.DYNAMIC}, "cache_position": None}
+    cache_position = torch.tensor([0, 1, 2], dtype=torch.long).unsqueeze(0)  # llama runner expects cache_pos to be 2d
+    seq_len_dim = torch.export.Dim("seq_length_dim", max=128 - 1)
+    dynamic_shapes = {"input_ids": {1: seq_len_dim}, "cache_position": {1: seq_len_dim}}
     strict = parse(torch.__version__) != parse("2.7.0")  # Due to bug https://github.com/pytorch/pytorch/issues/150994
     exported_progs = model.export(
         input_ids=input_ids,