From 066fe69ae8dff24fe0fe9fa74685359de137dbfd Mon Sep 17 00:00:00 2001
From: enkilee <jeffrey0122@163.com>
Date: Wed, 17 Jul 2024 16:53:24 +0800
Subject: [PATCH 1/3] fix

---
 python/paddle/amp/debugging.py | 102 ++++++++++++++++++++++-----------
 1 file changed, 67 insertions(+), 35 deletions(-)

diff --git a/python/paddle/amp/debugging.py b/python/paddle/amp/debugging.py
index 7dffae644dc6e..c361c0288be0d 100644
--- a/python/paddle/amp/debugging.py
+++ b/python/paddle/amp/debugging.py
@@ -11,12 +11,21 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import annotations
 
 import contextlib
 import random
 from enum import Enum
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Sequence,
+    TypeVar,
+)
 
 import numpy as np
+from typing_extensions import ParamSpec
 
 import paddle
 from paddle import _C_ops
@@ -24,6 +33,13 @@
 
 from ..framework import LayerHelper, in_dynamic_or_pir_mode
 
+if TYPE_CHECKING:
+    from typing import Generator
+
+    from paddle import Tensor
+
+_InputT = ParamSpec("_InputT")
+_RetT = TypeVar("_RetT")
 __all__ = [
     "DebugMode",
     "TensorCheckerConfig",
@@ -60,7 +76,9 @@ class DebugMode(Enum):
     # DUMP_ALL = 5
 
 
-def check_layer_numerics(func):
+def check_layer_numerics(
+    func: Callable[_InputT, _RetT]
+) -> Callable[_InputT, _RetT]:
     """
     This decorator is used to check the numerical values of the layer's input and output data.
 
@@ -110,7 +128,7 @@ def check_layer_numerics(func):
             >>> # RuntimeError: (PreconditionNotMet) There are NAN or INF (num_nan=0, num_inf=4, num_zero=0) in [device=gpu:0, op=divide, tensor=, dtype=fp32].
     """
 
-    def wrapper(self, *args, **kwargs):
+    def wrapper(self, *args: _InputT.args, **kwargs: _InputT.kwargs) -> _RetT:
         if args:
             # Set temp data and temp.gradient = False
             start_data = args[0]
@@ -133,7 +151,7 @@ def wrapper(self, *args, **kwargs):
     return wrapper
 
 
-def set_checked_op_list(checked_op_list):
+def set_checked_op_list(checked_op_list: Sequence[str]) -> None:
     # check checked_op_list
     if checked_op_list is not None:
         if isinstance(checked_op_list, (list, tuple)):
@@ -143,7 +161,7 @@ def set_checked_op_list(checked_op_list):
             raise ValueError("checked_op_list must be list or tuple")
 
 
-def set_skipped_op_list(skipped_op_list):
+def set_skipped_op_list(skipped_op_list: Sequence[str]) -> None:
     # check skipped_op_list
     if skipped_op_list is not None:
         if isinstance(skipped_op_list, (list, tuple)):
@@ -162,13 +180,13 @@ class TensorCheckerConfig:
 
         debug_mode(DebugMode, optional): A parameter that determines the type of debugging to be used. Default is DebugMode.CHECK_NAN_INF_AND_ABORT.
 
-        output_dir(string, optional): The path to store collected data. If this parameter is set to None, the data will be printed to the terminal. Default is None.
+        output_dir(string|None, optional): The path to store collected data. If this parameter is set to None, the data will be printed to the terminal. Default is None.
 
-        checked_op_list(list|tuple, optional): Specifies a list of operators that need to be checked during program execution, for example, checked_op_list=['elementwise_add', 'conv2d'], indicating that the output results of elementwise_add and conv2d should be checked for nan/inf during program execution. Default is None.
+        checked_op_list(list|tuple|None, optional): Specifies a list of operators that need to be checked during program execution, for example, checked_op_list=['elementwise_add', 'conv2d'], indicating that the output results of elementwise_add and conv2d should be checked for nan/inf during program execution. Default is None.
 
-        skipped_op_list(list|tuple, optional): Specifies a list of operators that do not need to be checked during program execution, for example, skipped_op_list=['elementwise_add', 'conv2d'], indicating that the output results of elementwise_add and conv2d should not be checked for nan/inf during program execution. None is None.
+        skipped_op_list(list|tuple|None, optional): Specifies a list of operators that do not need to be checked during program execution, for example, skipped_op_list=['elementwise_add', 'conv2d'], indicating that the output results of elementwise_add and conv2d should not be checked for nan/inf during program execution. None is None.
 
-        debug_step(list|tuple, optional): A list or tuple used primarily for nan/inf checking during model training. For example, debug_step=[1,5] indicates that nan/inf checking should only be performed on model training iterations 1 to 5. Default is None.
+        debug_step(list|tuple|None, optional): A list or tuple used primarily for nan/inf checking during model training. For example, debug_step=[1,5] indicates that nan/inf checking should only be performed on model training iterations 1 to 5. Default is None.
 
         stack_height_limit(int, optional): An integer value specifying the maximum depth of the call stack. This feature supports printing the call stack at the error location. Currently, only enabling or disabling call stack printing is supported. If you want to print the corresponding C++ call stack when NaN is detected in GPU Kernel, set stack_height_limit to 1, otherwise set it to 0. Default is 1.
 
@@ -199,16 +217,27 @@ class TensorCheckerConfig:
 
     # For module debugging
     current_step_id = 0
+    enable: bool
+    debug_mode: DebugMode
+    output_dir: str | None
+    checked_op_list: Sequence[str] | None
+    skipped_op_list: Sequence[str] | None
+    debug_step: Sequence[int] | None
+    stack_height_limit: int
+    start_step: Sequence[int]
+    end_step: Sequence[int]
+    seed: int
+    initial_seed: int
 
     def __init__(
         self,
-        enable,
-        debug_mode=DebugMode.CHECK_NAN_INF_AND_ABORT,
-        output_dir=None,
-        checked_op_list=None,
-        skipped_op_list=None,
-        debug_step=None,
-        stack_height_limit=1,
+        enable: bool,
+        debug_mode: DebugMode = DebugMode.CHECK_NAN_INF_AND_ABORT,
+        output_dir: str | None = None,
+        checked_op_list: Sequence[str] | None = None,
+        skipped_op_list: Sequence[str] | None = None,
+        debug_step: Sequence[int] | None = None,
+        stack_height_limit: int = 1,
     ):
         self.enable = enable
         self.debug_mode = debug_mode
@@ -264,7 +293,7 @@ def __init__(
         if self.enable:
             self._set_seed(self.enable)
 
-    def _set_seed(self, flag):
+    def _set_seed(self, flag: int) -> None:
         if self.initial_seed != self.seed:
             self.seed = self.initial_seed
 
@@ -288,7 +317,7 @@ def _set_seed(self, flag):
                 flag,
             )
 
-    def _set_env(self, check_flag):
+    def _set_env(self, check_flag: int) -> None:
         paddle.set_flags({"FLAGS_check_nan_inf": check_flag})
         if check_flag:
             # set debug level
@@ -308,7 +337,7 @@ def _set_env(self, check_flag):
             else:
                 raise ValueError("stack_height_limit must be int")
 
-    def update_and_check_step_id(self):
+    def update_and_check_step_id(self) -> bool:
         if self.enable:
             if self.start_step is not None and self.end_step is not None:
                 if (
@@ -321,17 +350,20 @@ def update_and_check_step_id(self):
             return True
         return False
 
-    def start_check_nan_inf(self):
+    def start_check_nan_inf(self) -> None:
         if self.enable:
             self._set_env(self.enable)
 
-    def stop_check_nan_inf(self):
+    def stop_check_nan_inf(self) -> None:
         self._set_env(False)
 
 
 def check_numerics(
-    tensor, op_type, var_name, debug_mode=DebugMode.CHECK_NAN_INF_AND_ABORT
-):
+    tensor: Tensor,
+    op_type: str,
+    var_name: str,
+    debug_mode: DebugMode = DebugMode.CHECK_NAN_INF_AND_ABORT,
+) -> tuple[Tensor, Tensor]:
     """
     This function is used to debugging a tensor, finding the number of NaNs, Infs and zeros in the tensor.
 
@@ -397,12 +429,12 @@ def check_numerics(
     return stats, values
 
 
-def _get_operator_stats_flag():
+def _get_operator_stats_flag() -> Any:
     flags = paddle.get_flags(["FLAGS_low_precision_op_list"])
     return flags["FLAGS_low_precision_op_list"]
 
 
-def _print_operator_stats(op_count_dict):
+def _print_operator_stats(op_count_dict: dict[str, str | list[int]]) -> str:
     """
     Parse and print the stats of operators, mainly including the calls of
     dtypes such as different fp32, fp16, bf16 and others.
@@ -446,7 +478,7 @@ def _print_operator_stats(op_count_dict):
     print("<{:-^120}>\n".format(" op count: " + str(total_ops) + " "))
 
 
-def enable_operator_stats_collection():
+def enable_operator_stats_collection() -> None:
     """
     Enable to collect the number of operators for different data types.
     The statistical data are categorized according to four data types, namely
@@ -484,7 +516,7 @@ def enable_operator_stats_collection():
     paddle.set_flags({'FLAGS_low_precision_op_list': 1})
 
 
-def disable_operator_stats_collection():
+def disable_operator_stats_collection() -> None:
     """
     Disable the collection the number of operators for different data types.
     This function is used in pair with the corresponding enable function.
@@ -525,7 +557,7 @@ def disable_operator_stats_collection():
 
 
 @contextlib.contextmanager
-def collect_operator_stats():
+def collect_operator_stats() -> Generator[None, None, None]:
     """
     The context switcher to enable to collect the number of operators for
     different data types. The statistical data are categorized according
@@ -561,12 +593,12 @@ def collect_operator_stats():
 
 
 def compare_accuracy(
-    dump_path,
-    another_dump_path,
-    output_filename,
-    loss_scale=1,
-    dump_all_tensors=False,
-):
+    dump_path: str,
+    another_dump_path: str,
+    output_filename: str,
+    loss_scale: float = 1,
+    dump_all_tensors: bool = False,
+) -> None:
     r"""
     This is a precision comparison tool that can be used to compare log data of float16 and float32.
 
@@ -619,7 +651,7 @@ def compare_accuracy(
     )
 
 
-def enable_tensor_checker(checker_config):
+def enable_tensor_checker(checker_config: TensorCheckerConfig) -> None:
     """
     The enable_tensor_checker(checker_config) function enables model-level accuracy checking and is used in combination with disables_tensor_checker() to achieve model-level precision checking by checking the output Tensors of all operators within the specified range.
 
@@ -660,7 +692,7 @@ def enable_tensor_checker(checker_config):
         checker_config.stop_check_nan_inf()
 
 
-def disable_tensor_checker():
+def disable_tensor_checker() -> None:
     """
     disable_tensor_checker() is used to disable accuracy checking, and is used together with enable_tensor_checker(config) to achieve model-level precision checking by checking the output Tensors of all operators within the specified range.
 

From 2f5007882586b7ee0dc2a6f628a7321f88edea20 Mon Sep 17 00:00:00 2001
From: enkilee <jeffrey0122@163.com>
Date: Thu, 18 Jul 2024 10:19:59 +0800
Subject: [PATCH 2/3] fix

---
 python/paddle/amp/debugging.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/paddle/amp/debugging.py b/python/paddle/amp/debugging.py
index c361c0288be0d..6cd8ca3bbd7d7 100644
--- a/python/paddle/amp/debugging.py
+++ b/python/paddle/amp/debugging.py
@@ -434,7 +434,7 @@ def _get_operator_stats_flag() -> Any:
     return flags["FLAGS_low_precision_op_list"]
 
 
-def _print_operator_stats(op_count_dict: dict[str, str | list[int]]) -> str:
+def _print_operator_stats(op_count_dict: dict[str, str | list[int]]) -> None:
     """
     Parse and print the stats of operators, mainly including the calls of
     dtypes such as different fp32, fp16, bf16 and others.

From 70f900c7c36594d0740bda3bc653d3d8def078c8 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Sat, 20 Jul 2024 22:50:42 +0800
Subject: [PATCH 3/3] fix some typing

---
 python/paddle/amp/debugging.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/python/paddle/amp/debugging.py b/python/paddle/amp/debugging.py
index 6cd8ca3bbd7d7..7e093dfe731d3 100644
--- a/python/paddle/amp/debugging.py
+++ b/python/paddle/amp/debugging.py
@@ -151,7 +151,7 @@ def wrapper(self, *args: _InputT.args, **kwargs: _InputT.kwargs) -> _RetT:
     return wrapper
 
 
-def set_checked_op_list(checked_op_list: Sequence[str]) -> None:
+def set_checked_op_list(checked_op_list: Sequence[str] | None) -> None:
     # check checked_op_list
     if checked_op_list is not None:
         if isinstance(checked_op_list, (list, tuple)):
@@ -161,7 +161,7 @@ def set_checked_op_list(checked_op_list: Sequence[str]) -> None:
             raise ValueError("checked_op_list must be list or tuple")
 
 
-def set_skipped_op_list(skipped_op_list: Sequence[str]) -> None:
+def set_skipped_op_list(skipped_op_list: Sequence[str] | None) -> None:
     # check skipped_op_list
     if skipped_op_list is not None:
         if isinstance(skipped_op_list, (list, tuple)):
@@ -216,7 +216,8 @@ class TensorCheckerConfig:
     """
 
     # For module debugging
-    current_step_id = 0
+    current_step_id: int = 0
+
     enable: bool
     debug_mode: DebugMode
     output_dir: str | None
@@ -224,8 +225,8 @@ class TensorCheckerConfig:
     skipped_op_list: Sequence[str] | None
     debug_step: Sequence[int] | None
     stack_height_limit: int
-    start_step: Sequence[int]
-    end_step: Sequence[int]
+    start_step: int | None
+    end_step: int | None
     seed: int
     initial_seed: int