diff --git a/test/ir/inference/test_trt_convert_where.py b/test/ir/inference/test_trt_convert_where.py
index fafa1a137fa9b5..e4a27504aaf95e 100644
--- a/test/ir/inference/test_trt_convert_where.py
+++ b/test/ir/inference/test_trt_convert_where.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import annotations
+
 import unittest
 from functools import partial
-from typing import List
 
 import numpy as np
 from program_config import ProgramConfig, TensorConfig
@@ -110,7 +111,7 @@ def generate_input3(dims, batch):
 
     def sample_predictor_configs(
         self, program_config
-    ) -> (paddle_infer.Config, List[int], float):
+    ) -> tuple[paddle_infer.Config, list[int], float]:
         def generate_dynamic_shape(attrs):
             if self.dims == 1:
                 self.dynamic_shape.min_input_shape = {
diff --git a/test/ir/inference/test_trt_convert_yolo_box.py b/test/ir/inference/test_trt_convert_yolo_box.py
index 079db6e2039011..9537154d0685a7 100644
--- a/test/ir/inference/test_trt_convert_yolo_box.py
+++ b/test/ir/inference/test_trt_convert_yolo_box.py
@@ -18,7 +18,7 @@
 import unittest
 from functools import partial
 from itertools import product
-from typing import Any, Generator
+from typing import TYPE_CHECKING, Any
 
 import numpy as np
 from program_config import ProgramConfig, TensorConfig
@@ -26,6 +26,9 @@
 
 import paddle.inference as paddle_infer
 
+if TYPE_CHECKING:
+    from collections.abc import Generator
+
 
 class TrtConvertYoloBoxTest(TrtLayerAutoScanTest):
     def is_program_valid(self, program_config: ProgramConfig) -> bool:
diff --git a/test/ir/inference/test_trt_convert_yolo_box_head.py b/test/ir/inference/test_trt_convert_yolo_box_head.py
index df53e2fec654f7..f61b43821427ae 100644
--- a/test/ir/inference/test_trt_convert_yolo_box_head.py
+++ b/test/ir/inference/test_trt_convert_yolo_box_head.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import annotations
+
 import unittest
 from functools import partial
-from typing import Any, Dict, List
+from typing import Any
 
 import numpy as np
 from program_config import ProgramConfig, TensorConfig
@@ -25,7 +27,7 @@
 
 class TrtConvertYoloBoxHeadTest(TrtLayerAutoScanTest):
     def sample_program_configs(self):
-        def generate_input(attrs: List[Dict[str, Any]], batch, shape):
+        def generate_input(attrs: list[dict[str, Any]], batch, shape):
             gen_shape = shape.copy()
             gen_shape.insert(0, batch)
             return np.random.uniform(0, 1, gen_shape).astype("float32")
@@ -76,7 +78,7 @@ def generate_input(attrs: List[Dict[str, Any]], batch, shape):
 
     def sample_predictor_configs(
         self, program_config
-    ) -> (paddle_infer.Config, List[int], float):
+    ) -> tuple[paddle_infer.Config, list[int], float]:
         # for static_shape
         self.trt_param.precision = paddle_infer.PrecisionType.Float32
         program_config.set_input_type(np.float32)
diff --git a/test/ir/inference/test_trt_float64.py b/test/ir/inference/test_trt_float64.py
index 9e707f31ba5ff1..c65d7d578de865 100644
--- a/test/ir/inference/test_trt_float64.py
+++ b/test/ir/inference/test_trt_float64.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import annotations
+
 import unittest
 from functools import partial
-from typing import List
 
 import numpy as np
 from program_config import ProgramConfig, TensorConfig
@@ -76,7 +77,7 @@ def generate_input(shape, op_type):
 
     def sample_predictor_configs(
         self, program_config
-    ) -> (paddle_infer.Config, List[int], float):
+    ) -> tuple[paddle_infer.Config, list[int], float]:
         def generate_dynamic_shape(attrs):
             self.dynamic_shape.min_input_shape = {
                 "input_data1": [1, 4, 4, 4],
diff --git a/test/ir/inference/test_trt_int64.py b/test/ir/inference/test_trt_int64.py
index 2ea5aef705bcdb..a10faef5a73c7b 100644
--- a/test/ir/inference/test_trt_int64.py
+++ b/test/ir/inference/test_trt_int64.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import annotations
+
 import unittest
 from functools import partial
-from typing import Any, Dict, List
+from typing import Any
 
 import numpy as np
 from program_config import ProgramConfig, TensorConfig
@@ -61,7 +63,7 @@ def is_program_valid(self, program_config: ProgramConfig) -> bool:
         return True
 
     def sample_program_configs(self):
-        def generate_input1(attrs: List[Dict[str, Any]]):
+        def generate_input1(attrs: list[dict[str, Any]]):
             return (10 * np.random.random([6, 6, 64, 64])).astype(np.int64)
 
         for axes in [[0, 1], [1, 3], [2, 3]]:
@@ -108,7 +110,7 @@ def generate_input1(attrs: List[Dict[str, Any]]):
 
     def sample_predictor_configs(
         self, program_config
-    ) -> (paddle_infer.Config, List[int], float):
+    ) -> tuple[paddle_infer.Config, list[int], float]:
         def generate_dynamic_shape(attrs):
             self.dynamic_shape.min_input_shape = {"input_data": [1, 3, 32, 32]}
             self.dynamic_shape.max_input_shape = {"input_data": [8, 8, 64, 64]}
@@ -187,7 +189,7 @@ def generate_input(shape, op_type):
 
     def sample_predictor_configs(
         self, program_config
-    ) -> (paddle_infer.Config, List[int], float):
+    ) -> tuple[paddle_infer.Config, list[int], float]:
         def generate_dynamic_shape(attrs):
             if self.dims == 3:
                 self.dynamic_shape.min_input_shape = {
diff --git a/test/ir/inference/test_trt_ops_fp32_mix_precision.py b/test/ir/inference/test_trt_ops_fp32_mix_precision.py
index 188ef1e10668dc..b978491069b43c 100644
--- a/test/ir/inference/test_trt_ops_fp32_mix_precision.py
+++ b/test/ir/inference/test_trt_ops_fp32_mix_precision.py
@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import annotations
+
 import unittest
 from functools import partial
-from typing import Any, Dict, List
+from typing import Any
 
 import numpy as np
 from program_config import ProgramConfig, TensorConfig
@@ -54,10 +56,10 @@ def generate_elementwise_weight(op_type):
             else:
                 return np.random.randn(33, 1).astype(np.float32)
 
-        def generate_input1(attrs: List[Dict[str, Any]], shape_input):
+        def generate_input1(attrs: list[dict[str, Any]], shape_input):
             return np.random.random(shape_input).astype(np.float32)
 
-        def generate_input2(attrs: List[Dict[str, Any]], shape_input):
+        def generate_input2(attrs: list[dict[str, Any]], shape_input):
             begin = attrs[0]["begin_norm_axis"]
             sum = 1
             for x in range(begin, len(shape_input)):
@@ -176,7 +178,7 @@ def generate_input2(attrs: List[Dict[str, Any]], shape_input):
 
     def sample_predictor_configs(
         self, program_config
-    ) -> (paddle_infer.Config, List[int], float):
+    ) -> tuple[paddle_infer.Config, list[int], float]:
         def generate_dynamic_shape(attrs):
             self.dynamic_shape.min_input_shape = {
                 "conv2d_input": [1, 3, 64, 64],
diff --git a/test/ir/pir/cinn/llama_test_model.py b/test/ir/pir/cinn/llama_test_model.py
index db52e073803943..4761aa6f649246 100644
--- a/test/ir/pir/cinn/llama_test_model.py
+++ b/test/ir/pir/cinn/llama_test_model.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import annotations
+
 import math
 import sys
 from os.path import dirname
-from typing import Optional, Tuple
 
 import paddle
 import paddle.nn.functional as F
@@ -313,13 +314,13 @@ def _init_rope(self):
     def forward(
         self,
         hidden_states,
-        position_ids: Optional[Tuple[paddle.Tensor]] = None,
-        past_key_value: Optional[Tuple[paddle.Tensor]] = None,
-        attention_mask: Optional[paddle.Tensor] = None,
+        position_ids: tuple[paddle.Tensor] | None = None,
+        past_key_value: tuple[paddle.Tensor] | None = None,
+        attention_mask: paddle.Tensor | None = None,
         output_attentions: bool = False,
         use_cache: bool = False,
-    ) -> Tuple[
-        paddle.Tensor, Optional[paddle.Tensor], Optional[Tuple[paddle.Tensor]]
+    ) -> tuple[
+        paddle.Tensor, paddle.Tensor | None, tuple[paddle.Tensor] | None
     ]:
         """Input shape: Batch x Time x Channel"""
         # [bs, seq_len, num_head * head_dim] -> [seq_len / n, bs, num_head * head_dim] (n is model parallelism)
@@ -398,12 +399,12 @@ def __init__(self, config):
     def forward(
         self,
         hidden_states: paddle.Tensor,
-        position_ids: Optional[Tuple[paddle.Tensor]] = None,
-        attention_mask: Optional[paddle.Tensor] = None,
-        output_attentions: Optional[bool] = False,
-        past_key_value: Optional[Tuple[paddle.Tensor]] = None,
-        use_cache: Optional[bool] = False,
-    ) -> Tuple[paddle.Tensor, Optional[Tuple[paddle.Tensor, paddle.Tensor]]]:
+        position_ids: tuple[paddle.Tensor] | None = None,
+        attention_mask: paddle.Tensor | None = None,
+        output_attentions: bool | None = False,
+        past_key_value: tuple[paddle.Tensor] | None = None,
+        use_cache: bool | None = False,
+    ) -> tuple[paddle.Tensor, tuple[paddle.Tensor, paddle.Tensor] | None]:
         """
         Args:
             hidden_states (`paddle.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
diff --git a/test/legacy_test/auto_parallel_op_test.py b/test/legacy_test/auto_parallel_op_test.py
index 3e4c56047d62b8..cc93b1fa400b8f 100644
--- a/test/legacy_test/auto_parallel_op_test.py
+++ b/test/legacy_test/auto_parallel_op_test.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import annotations
+
 import os
 import pathlib
 import pickle
@@ -20,7 +22,7 @@
 import tempfile
 import uuid
 from collections import defaultdict
-from typing import Dict, List, Tuple, cast
+from typing import cast
 
 import numpy as np
 from prim_op_test import OpTestUtils, _as_list, convert_uint16_to_float, flatten
@@ -297,7 +299,7 @@ def run_subprocess(start_command, env, timeout):
         )
 
 
-def convert_input_placements_to_dims_map(placements: Dict, inputs: Dict):
+def convert_input_placements_to_dims_map(placements: dict, inputs: dict):
     all_dims_map = {}
     for name, item in inputs.items():
         if name not in placements:
@@ -322,7 +324,7 @@ def convert_input_placements_to_dims_map(placements: Dict, inputs: Dict):
 
 
 def convert_input_dims_map_to_placements(
-    dims_map: Dict, inputs: Dict, mesh_ndim: int
+    dims_map: dict, inputs: dict, mesh_ndim: int
 ):
     placements_map = {}
     for name, item in inputs.items():
@@ -348,7 +350,7 @@ def convert_input_dims_map_to_placements(
 # TODO: This method has been implementd in
 # paddle/phi/core/distributed/auto_parallel/placement_types.h, bind it
 # python and it's logic.
-def placements_to_dims_map(placements: List, tensor_ndim: int) -> Tuple[int]:
+def placements_to_dims_map(placements: list, tensor_ndim: int) -> tuple[int]:
     r = [-1] * tensor_ndim
     for i, placement in enumerate(placements):
         if placement.is_shard():
@@ -367,13 +369,13 @@ def placements_to_dims_map(placements: List, tensor_ndim: int) -> Tuple[int]:
 # paddle/phi/core/distributed/auto_parallel/placement_types.h, and bind it to
 # python
 def dims_map_to_placements(
-    dim_map: Tuple[int], mesh_ndim: int, sums: Tuple[int] = ()
-) -> Tuple[dist.Placement]:
+    dim_map: tuple[int], mesh_ndim: int, sums: tuple[int] = ()
+) -> tuple[dist.Placement]:
     """
     Construct a placements from dim_map list and pending sum.
 
     Args:
-        dim_map (Tuple[int]): a list of integer that represents sharding on each
+        dim_map (tuple[int]): a list of integer that represents sharding on each
             tensor dimension, see `dim_map` property doc for details
         mesh_ndim (int): the ndim of Process mesh.
         sums (Tuple[int]): a list of integer that represents the dist tensor have
@@ -383,7 +385,7 @@ def dims_map_to_placements(
         a placement sequence.
     """
     # by default replicate on device mesh dims
-    placements: List[dist.Placement] = [
+    placements: list[dist.Placement] = [
         dist.Replicate() for _ in range(mesh_ndim)
     ]
 
diff --git a/test/legacy_test/test_memory_efficient_attention.py b/test/legacy_test/test_memory_efficient_attention.py
index 6298a3100a930c..80526aa16cf8d2 100644
--- a/test/legacy_test/test_memory_efficient_attention.py
+++ b/test/legacy_test/test_memory_efficient_attention.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import annotations
+
 import logging
 import os
 import random
 import re
 import unittest
-from typing import List, Sequence, Tuple
+from typing import TYPE_CHECKING
 
 import numpy as np
 
@@ -29,6 +31,9 @@
     memory_efficient_attention,
 )
 
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+
 paddle.seed(2023)
 
 
@@ -90,11 +95,11 @@ def create_attn_bias(
 
 def _rand_seqlens(
     r: random.Random, bs: int, q_len: int, kv_len: int
-) -> Tuple[Sequence[int], Sequence[int]]:
+) -> tuple[Sequence[int], Sequence[int]]:
     q_len *= bs
     kv_len *= bs
-    seqlens_q: List[int] = []
-    seqlens_k: List[int] = []
+    seqlens_q: list[int] = []
+    seqlens_k: list[int] = []
 
     step_q = [max(1, q_len // 10), max(2, q_len // 2)]
     step_k = [max(1, kv_len // 10), max(2, kv_len // 2)]