diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py
index cb6f2f3e30f70d..2e28eb50f6d9d4 100644
--- a/python/paddle/__init__.py
+++ b/python/paddle/__init__.py
@@ -343,6 +343,7 @@
     row_stack,
     scatter,
     scatter_,
+    scatter_add,
     scatter_nd,
     scatter_nd_add,
     select_scatter,
@@ -1180,6 +1181,7 @@
     'renorm_',
     'take_along_axis',
     'put_along_axis',
+    'scatter_add',
     'select_scatter',
     'multigammaln',
     'multigammaln_',
diff --git a/python/paddle/tensor/__init__.py b/python/paddle/tensor/__init__.py
index 82d4d22c45c7f5..fe776319a8df19 100644
--- a/python/paddle/tensor/__init__.py
+++ b/python/paddle/tensor/__init__.py
@@ -202,6 +202,7 @@
     row_stack,
     scatter,
     scatter_,
+    scatter_add,
     scatter_nd,
     scatter_nd_add,
     select_scatter,
@@ -801,6 +802,7 @@
     'repeat_interleave',
     'take_along_axis',
     'put_along_axis',
+    'scatter_add',
     'select_scatter',
     'put_along_axis_',
     'bernoulli_',
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
index ec94963095696b..2bae220bde01ab 100644
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -6692,6 +6692,47 @@ def infer_broadcast_shape(
     return broadcast_shape
 
 
+def scatter_add(
+    input: Tensor,
+    dim: int,
+    index: Tensor,
+    src: Tensor,
+) -> Tensor:
+    """
+    Scatter the values of the source tensor to the target tensor according to the given indices, and perform a add operation along the designated axis.
+
+    Args:
+        input (Tensor) : The Input Tensor. Supported data types are bfloat16, float16, float32, float64,
+            int32, int64, uint8.
+        dim (int) : The axis to scatter 1d slices along.
+        index (Tensor) : Indices to scatter along each 1d slice of input. This must match the dimension of input,
+             Supported data type are int32 and int64.
+        src (Tensor) : The value element(s) to scatter. The data types should be same as input.
+
+    Returns:
+        Tensor, The indexed element, same dtype with input
+
+    Examples:
+        .. code-block:: python
+
+            >>> import paddle
+
+            >>> x = paddle.to_tensor([[10, 20, 30], [40, 50, 60]])
+            >>> indices = paddle.zeros((2,3)).astype("int32")
+            >>> values = paddle.to_tensor([[1, 2, 3],[4, 5, 6]]).astype(x.dtype)
+            >>> result = paddle.scatter_add(x, 0, indices, values)
+            >>> print(result)
+            Tensor(shape=[2, 3], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [[15, 27, 39],
+             [40, 50, 60]])
+
+    """
+
+    return put_along_axis(
+        input, index, src, dim, 'add', include_self=True, broadcast=False
+    )
+
+
 @ParamAliasDecorator({"arr": ["input"], "axis": ["dim"]})
 def take_along_axis(
     arr: Tensor, indices: Tensor, axis: int, broadcast: bool = True
diff --git a/test/legacy_test/test_scatter_add_op.py b/test/legacy_test/test_scatter_add_op.py
new file mode 100644
index 00000000000000..97af458f53ed48
--- /dev/null
+++ b/test/legacy_test/test_scatter_add_op.py
@@ -0,0 +1,398 @@
+#   Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import unittest
+
+import numpy as np
+from op_test import get_places
+from utils import dygraph_guard
+
+import paddle
+from paddle.framework import core
+from paddle.static import InputSpec
+
+
+def scatter_add_net(x, axis=-1):
+    index = paddle.full_like(x, fill_value=2, dtype='int64')
+    value = paddle.full_like(x, fill_value=-4.0, dtype=x.dtype)
+    return paddle.scatter_add(x, axis, index, value)
+
+
+class TestScatterAddAPI(unittest.TestCase):
+    def setUp(self):
+        np.random.seed(0)
+        self.shape = [10, 10]
+        self.index_shape = [10, 10]
+        self.index_np = np.random.randint(0, 10, (10, 10)).astype('int64')
+        self.x_np = np.random.random(self.shape).astype(np.float32)
+        self.place = get_places()
+        self.axis = 0
+        self.value_np = np.random.randint(0, 10, (10, 10)).astype(np.float32)
+        self.value_shape = [10, 10]
+        self.x_feed = copy.deepcopy(self.x_np)
+
+    def test_api_static(self):
+        paddle.enable_static()
+
+        def run(place):
+            with paddle.static.program_guard(paddle.static.Program()):
+                x = paddle.static.data('X', self.shape)
+                index = paddle.static.data('Index', self.index_shape, "int64")
+                value = paddle.static.data('Value', self.value_shape)
+                out = paddle.scatter_add(x, self.axis, index, value)
+                exe = paddle.static.Executor(self.place[0])
+                res = exe.run(
+                    feed={
+                        'X': self.x_feed,
+                        'Value': self.value_np,
+                        'Index': self.index_np,
+                    },
+                    fetch_list=[out],
+                )
+            target = copy.deepcopy(self.x_np)
+
+            for i in range(10):
+                for j in range(10):
+                    target[self.index_np[i, j], j] += self.value_np[i, j]
+            # numpy put_along_axis is an inplace operation.
+            out_ref = target
+
+            for out in res:
+                np.testing.assert_allclose(out, out_ref, rtol=0.001)
+
+        for place in self.place:
+            run(place)
+
+    def test_api_dygraph(self):
+        def run(place):
+            paddle.disable_static(place)
+            x_tensor = paddle.to_tensor(self.x_np)
+            index_tensor = paddle.to_tensor(self.index_np)
+            value_tensor = paddle.to_tensor(self.value_np)
+            out = paddle.scatter_add(
+                x_tensor, self.axis, index_tensor, value_tensor
+            )
+
+            target = copy.deepcopy(self.x_np)
+            for i in range(10):
+                for j in range(10):
+                    target[self.index_np[i, j], j] += self.value_np[i, j]
+
+            out_ref = target
+            np.testing.assert_allclose(out.numpy(), out_ref, rtol=0.001)
+
+            paddle.enable_static()
+
+        for place in self.place:
+            run(place)
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(),
+    "core is not compiled with CUDA",
+)
+class TestScatterAddAPILargeCase(unittest.TestCase):
+    def setUp(self):
+        np.random.seed(0)
+        self.shape = [64, 102400]
+        self.index_shape = [64, 102400]
+        self.index_np = np.zeros(self.index_shape).astype('int64')
+        self.x_np = np.random.random(self.shape).astype(np.float32)
+        self.axis = 1
+        self.value_np = np.ones(self.index_shape).astype(np.float32)
+        self.x_feed = copy.deepcopy(self.x_np)
+        self.place = [paddle.CUDAPlace(0)]
+
+    def test_api_dygraph(self):
+        def run(place):
+            paddle.disable_static(place)
+            x_tensor = paddle.to_tensor(self.x_np)
+            index_tensor = paddle.to_tensor(self.index_np)
+            value_tensor = paddle.to_tensor(self.value_np)
+            out = paddle.scatter_add(
+                x_tensor, self.axis, index_tensor, value_tensor
+            )
+
+            for i in range(64):
+                for j in range(102400):
+                    self.x_np[i, self.index_np[i, j]] += self.value_np[i, j]
+            out_ref = self.x_np
+            np.testing.assert_allclose(out.numpy(), out_ref, rtol=0.001)
+
+            paddle.enable_static()
+
+        for place in self.place:
+            run(place)
+
+
+class TestScatterAddAPIOtherCase(unittest.TestCase):
+    def setUp(self):
+        np.random.seed(0)
+        self.shape = [3, 5]
+        self.index1_shape = [1, 4]
+        self.index_np1 = np.array([[0, 1, 2, 0]]).astype('int64')
+        self.index2_shape = [2, 3]
+        self.index_np2 = np.array([[0, 1, 2], [0, 1, 4]]).astype('int64')
+        self.x_np = np.zeros((3, 5)).astype(np.float32)
+        self.value_shape = [2, 5]
+        self.value = (
+            np.arange(1, 11).reshape(self.value_shape).astype(np.float32)
+        )
+        self.place = get_places()
+
+    def test_api_dygraph(self):
+        def run(place):
+            paddle.disable_static(place)
+            x_tensor = paddle.to_tensor(self.x_np)
+            index_tensor1 = paddle.to_tensor(self.index_np1)
+            value_tensor = paddle.to_tensor(self.value)
+            out = paddle.scatter_add(x_tensor, 0, index_tensor1, value_tensor)
+            out_ref = copy.deepcopy(self.x_np)
+            for i in range(self.index1_shape[0]):
+                for j in range(self.index1_shape[1]):
+                    out_ref[self.index_np1[i, j], j] += self.value[i, j]
+            np.testing.assert_allclose(out.numpy(), out_ref, rtol=0.001)
+
+            index_tensor2 = paddle.to_tensor(self.index_np2)
+            out = paddle.scatter_add(x_tensor, 1, index_tensor2, value_tensor)
+            out_ref = copy.deepcopy(self.x_np)
+            for i in range(self.index2_shape[0]):
+                for j in range(self.index2_shape[1]):
+                    out_ref[i, self.index_np2[i, j]] += self.value[i, j]
+            np.testing.assert_allclose(out.numpy(), out_ref, rtol=0.001)
+
+            paddle.enable_static()
+
+        for place in self.place:
+            run(place)
+
+    def test_api_static(self):
+        paddle.enable_static()
+
+        def run(place):
+            with paddle.static.program_guard(paddle.static.Program()):
+                x1 = paddle.static.data('X', self.shape)
+                index1 = paddle.static.data('Index', self.index1_shape, "int64")
+                value_tensor = paddle.to_tensor(self.value)
+                out1 = paddle.scatter_add(x1, 0, index1, value_tensor)
+                exe = paddle.static.Executor(place)
+                res = exe.run(
+                    feed={
+                        'X': self.x_np,
+                        'Value': self.value,
+                        'Index': self.index_np1,
+                    },
+                    fetch_list=[out1],
+                )
+            out_ref = copy.deepcopy(self.x_np)
+            for i in range(self.index1_shape[0]):
+                for j in range(self.index1_shape[1]):
+                    out_ref[self.index_np1[i, j], j] += self.value[i, j]
+
+            for out in res:
+                np.testing.assert_allclose(out, out_ref, rtol=0.001)
+
+            with paddle.static.program_guard(paddle.static.Program()):
+                x2 = paddle.static.data('X', self.shape)
+                index2 = paddle.static.data('Index', self.index2_shape, "int64")
+                value_tensor = paddle.to_tensor(self.value)
+                out2 = paddle.scatter_add(x2, 1, index2, value_tensor)
+                exe = paddle.static.Executor(place)
+                res = exe.run(
+                    feed={
+                        'X': self.x_np,
+                        'Value': self.value,
+                        'Index': self.index_np2,
+                    },
+                    fetch_list=[out2],
+                )
+            out_ref = copy.deepcopy(self.x_np)
+            for i in range(self.index2_shape[0]):
+                for j in range(self.index2_shape[1]):
+                    out_ref[i, self.index_np2[i, j]] += self.value[i, j]
+
+            for out in res:
+                np.testing.assert_allclose(out, out_ref, rtol=0.001)
+
+        for place in self.place:
+            run(place)
+
+    def test_error(self):
+        tensorx = paddle.to_tensor([[1, 2, 3], [4, 5, 6]]).astype("float32")
+        indices = paddle.to_tensor([[1, 0, 1], [0, 1, 1]]).astype("int32")
+        values = paddle.to_tensor([1])
+
+        try:
+            res = paddle.scatter_add(tensorx, 0, indices, values)
+        except Exception as error:
+            self.assertIsInstance(error, ValueError)
+
+        indices = paddle.to_tensor([1]).astype("int32")
+        values = paddle.to_tensor([[1, 2, 3], [4, 5, 6]])
+
+        try:
+            res = paddle.scatter_add(tensorx, 0, indices, values)
+        except Exception as error:
+            self.assertIsInstance(error, ValueError)
+
+        indices = paddle.to_tensor(
+            [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
+        ).astype("int32")
+        # indices too large
+        try:
+            res = paddle.scatter_add(tensorx, 0, indices, values)
+        except Exception as error:
+            self.assertIsInstance(error, RuntimeError)
+
+        indices = paddle.to_tensor([[3, 0, 4], [0, 5, 10]]).astype("int32")
+        # the element of indices out of range
+        try:
+            res = paddle.scatter_add(tensorx, 0, indices, values)
+        except Exception as error:
+            self.assertIsInstance(error, RuntimeError)
+
+    def test_index_type_error(self):
+        tensorx = paddle.to_tensor([[1, 2, 3], [4, 5, 6]]).astype("float32")
+        indices = paddle.to_tensor([[1, 0, 1], [0, 1, 1]]).astype("float32")
+        values = paddle.to_tensor([[1, 2, 3], [4, 5, 6]])
+        with self.assertRaises(TypeError):
+            res = paddle.scatter_add(tensorx, 0, indices, values)
+
+
+class TestScatterAddAPIDynamicShape(unittest.TestCase):
+    def setUp(self):
+        np.random.seed(2024)
+        self.net = scatter_add_net
+        self.enable_cinn = False
+        self.tol = 1e-6
+        self.dtype = "float32"
+        self.axis = -2
+        self.input_specs = [
+            InputSpec(
+                shape=(-1, -1, -1, -1),
+                dtype=self.dtype,
+                stop_gradient=False,
+            )
+        ]
+        self.arr = np.random.random([10, 10, 10, 10]).astype(self.dtype)
+
+    def train(self, to_static):
+        arr = paddle.to_tensor(self.arr, stop_gradient=False)
+        if to_static:
+            backend = "CINN" if self.enable_cinn else None
+            net = paddle.jit.to_static(
+                self.net,
+                input_spec=self.input_specs,
+                backend=backend,
+                full_graph=True,
+            )
+            net.train()
+        else:
+            net = self.net
+
+        res = net(arr, self.axis)
+        res.backward()
+        arr_grad = arr.grad
+        return res, arr_grad
+
+    def test_dynamic_static(self):
+        with dygraph_guard():
+            st_out, st_grads = self.train(to_static=True)
+            dy_out, dy_grads = self.train(to_static=False)
+
+            for ref, actual in zip(dy_out, st_out):
+                np.testing.assert_allclose(
+                    ref, actual, rtol=self.tol, atol=self.tol
+                )
+
+            for dr, d in zip(dy_grads, st_grads):
+                np.testing.assert_allclose(dr, d, rtol=self.tol, atol=self.tol)
+
+
+class TestScatterAddAPIDynamicShape1(TestScatterAddAPIDynamicShape):
+    def setUp(self):
+        np.random.seed(2024)
+        self.net = scatter_add_net
+        self.enable_cinn = False
+        self.tol = 1e-6
+        self.dtype = "float32"
+        self.axis = 0
+        self.input_specs = [
+            InputSpec(
+                shape=(-1, -1, -1, -1),
+                dtype=self.dtype,
+                stop_gradient=False,
+            )
+        ]
+        self.arr = np.random.random([16, 16, 16, 16]).astype(self.dtype)
+
+
+class TestScatterAddAPIDynamicShape2(TestScatterAddAPIDynamicShape):
+    def setUp(self):
+        np.random.seed(2024)
+        self.net = scatter_add_net
+        self.enable_cinn = False
+        self.tol = 1e-6
+        self.dtype = "float32"
+        self.axis = -1
+        self.input_specs = [
+            InputSpec(
+                shape=(-1, -1, -1, -1),
+                dtype=self.dtype,
+                stop_gradient=False,
+            )
+        ]
+        self.arr = np.random.random([20, 20, 20, 20]).astype(self.dtype)
+
+
+class TestScatterAddAPIDynamicShape3(TestScatterAddAPIDynamicShape):
+    def setUp(self):
+        np.random.seed(2024)
+        self.net = scatter_add_net
+        self.enable_cinn = False
+        self.tol = 1e-6
+        self.dtype = "float32"
+        self.axis = 3
+        self.input_specs = [
+            InputSpec(
+                shape=(-1, -1, -1, -1),
+                dtype=self.dtype,
+                stop_gradient=False,
+            )
+        ]
+        self.arr = np.random.random([32, 32, 32, 32]).astype(self.dtype)
+
+
+class TestScatterAddAPIDynamicShape_ZeroSize(TestScatterAddAPIDynamicShape):
+    def setUp(self):
+        np.random.seed(2024)
+        self.net = scatter_add_net
+        self.enable_cinn = False
+        self.tol = 1e-6
+        self.dtype = "float32"
+        self.axis = -2
+        self.input_specs = [
+            InputSpec(
+                shape=(-1, -1, -1, -1),
+                dtype=self.dtype,
+                stop_gradient=False,
+            )
+        ]
+        self.arr = np.random.random([0, 10, 10, 10]).astype(self.dtype)
+
+
+if __name__ == "__main__":
+    paddle.enable_static()
+    unittest.main()