microsoft · justinchuby · Jan 24, 2025 · Jan 17, 2025 · Jan 17, 2025 · Jan 17, 2025
diff --git a/onnxscript/function_libs/torch_lib/ops/core.py b/onnxscript/function_libs/torch_lib/ops/core.py
@@ -4297,36 +4297,8 @@
     """index_put(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor"""
 
     index = op.SequenceAt(indices, 0)  # assume indices only have 1 element
-    # FIXME: ORT ArgMax fails on INT64 input even though ONNX allows it
-    index_int = op.Cast(index, to=INT32.dtype)
-    # if all False, return op.Identity(self)
-    if op.ReduceSum(index_int) == 0:
-        result = self
-    else:
-        # change array([F,F,T,F,F]) to array([2])
-        index = op.ArgMax(index_int)  # assume index only have 1 True
-        # change array([2]) to array([2,2,2,2,2])
-        self_dim_1 = op.Shape(self, start=1, end=2)
-        index_dim_0 = op.Shape(index, start=0, end=1)
-        shape = op.Concat(self_dim_1, index_dim_0, axis=0)
-        new_ind = op.Expand(index, shape)
-        new_ind_t = op.Transpose(new_ind)
-
-        # values must have same rank with input(self)
-        if op.Size(op.Shape(values)) < op.Size(op.Shape(self)):  # type: ignore[operator]
-            values = op.Unsqueeze(values, op.Constant(value_ints=[0]))
-
-        if op.Cast(accumulate, to=BOOL.dtype):
-            zeros = op.Expand(op.Constant(value_float=0.0), op.Shape(self))
-            zeros = op.CastLike(zeros, values)
-            result = op.ScatterElements(zeros, new_ind_t, values)
-            # FIXME: type promotion
-            result = op.CastLike(result, self)
-            result = op.Add(result, self)
-        else:
-            result = op.ScatterElements(self, new_ind_t, values)
-
-    return result
+    # accumulate should be always False, True does not make sense but an assert would be great
+    return op.Where(index, values, self)
 
 
 def aten_index_reduce(

diff --git a/tests/function_libs/torch_lib/e2e_test.py b/tests/function_libs/torch_lib/e2e_test.py
@@ -0,0 +1,60 @@
+import unittest
+
+import onnx
+import torch
+
+from onnxscript._internal.version_utils import torch_older_than
+
+
+class TestEnd2End(unittest.TestCase):
+    @unittest.skipIf(torch_older_than("2.6"), reason="fails to export")
+    def test_adaptive_enc_mask(self):
+        def adaptive_enc_mask(x_len, chunk_start_idx, left_window=0, right_window=0):
+            # first idx of each chunk, such as [0,18,36,48].
+            chunk_start_idx = torch.Tensor(chunk_start_idx).long()
+            # append 0 to the beginning, so it becomes [0, 0, 18, 36, 48]
+            start_pad = torch.nn.functional.pad(chunk_start_idx, (1, 0))
+            # append x_len to the end, so it becomes [0,18,36,48, x_len]
+            end_pad = torch.nn.functional.pad(chunk_start_idx, (0, 1), value=x_len)
+            # seq_range size: [x_len, 1]
+            seq_range = torch.arange(0, x_len).unsqueeze(-1)
+            # idx size: [x_len]
+            idx = ((seq_range < end_pad) & (seq_range >= start_pad)).nonzero()[:, 1]
+            # boundary size: [x_len]
+            # boundary = end_pad[idx]
+            # seq_range_expand size [x_len, x_len]
+            seq_range_expand = torch.arange(0, x_len).unsqueeze(0).expand(x_len, -1)
+            idx_left = idx - left_window
+            idx_left[idx_left < 0] = 0
+            boundary_left = start_pad[idx_left]
+            mask_left = seq_range_expand >= boundary_left.unsqueeze(-1)
+            idx_right = idx + right_window
+            idx_right[idx_right > len(chunk_start_idx)] = len(chunk_start_idx)
+            boundary_right = end_pad[idx_right]
+            mask_right = seq_range_expand < boundary_right.unsqueeze(-1)
+            return mask_left & mask_right
+
+        class MyModule(torch.nn.Module):
+            def forward(self, X):
+                x_len = 10  # 368
+                chunk_start_idx = [4]
+                left_window = 18
+                result = adaptive_enc_mask(x_len, chunk_start_idx, left_window, right_window=0)
+                return X + torch.unsqueeze(result, -1)
+
+        torch_model = MyModule()
+        torch_model.eval()
+        inputs = (torch.randn(1, 1, 368),)
+        expected = torch_model(*inputs)
+
+        program = torch.onnx.export(torch_model, inputs, dynamo=True)
+        # program.save(r"test_adaptive_enc_mask_not_optimized.onnx")
+        program.optimize()
+        program.save(r"test_adaptive_enc_mask.onnx")
+        ref = onnx.reference.ReferenceEvaluator(program.model_proto)
+        got = ref.run(None, {"x": inputs[0].numpy()})
+        torch.testing.assert_close(expected, torch.tensor(got[0]))
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)