diff --git a/python/paddle/base/dataset.py b/python/paddle/base/dataset.py index fe00b751a4195..f1db49b373937 100644 --- a/python/paddle/base/dataset.py +++ b/python/paddle/base/dataset.py @@ -13,6 +13,8 @@ # limitations under the License. """This is definition of dataset class, which is high performance IO.""" +from __future__ import annotations + from google.protobuf import text_format import paddle @@ -41,7 +43,7 @@ def __init__(self): """Init.""" pass - def create_dataset(self, datafeed_class="QueueDataset"): + def create_dataset(self, datafeed_class="QueueDataset") -> DatasetBase: """ Create "QueueDataset" or "InMemoryDataset", or "FileInstantDataset", the default is "QueueDataset". diff --git a/python/paddle/base/executor.py b/python/paddle/base/executor.py index 77560d87aae0d..22144ed588ce3 100755 --- a/python/paddle/base/executor.py +++ b/python/paddle/base/executor.py @@ -12,12 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import copy import logging import os import sys import warnings from functools import lru_cache +from typing import TYPE_CHECKING, Any, Literal, overload import numpy as np @@ -50,6 +53,17 @@ from .trainer_factory import FetchHandlerMonitor, TrainerFactory from .wrapped_decorator import signature_safe_contextmanager +if TYPE_CHECKING: + from collections.abc import Generator + + import numpy.typing as npt + + from paddle import Tensor + from paddle._typing import PlaceLike + from paddle._typing.device_like import _Place + from paddle.base.dataset import DatasetBase + from paddle.static import CompiledProgram + __all__ = [] g_scope = core.Scope() @@ -57,7 +71,7 @@ InferAnalysisConfig = core.AnalysisConfig -def global_scope(): +def global_scope() -> core.Scope: """ :api_attr: Static Graph @@ -79,7 +93,7 @@ def global_scope(): return g_scope -def _switch_scope(scope): +def _switch_scope(scope: core.Scope) -> core.Scope: global g_scope ex = g_scope g_scope = scope @@ -87,7 +101,7 @@ def _switch_scope(scope): @signature_safe_contextmanager -def scope_guard(scope): +def scope_guard(scope: core.Scope) -> Generator[None, None, None]: """ This function switches scope through python `with` statement. @@ -1260,7 +1274,9 @@ class Executor: """ - def __init__(self, place=None): + place: _Place + + def __init__(self, place: PlaceLike | None = None) -> None: if place is None: expected_place = framework._current_expected_place_() self.place = expected_place @@ -1301,7 +1317,7 @@ def _is_optimizer_op(self, op): op.all_attrs()[self.op_role_key] ) & int(core.op_proto_and_checker_maker.OpRole.Optimize) - def __del__(self): + def __del__(self) -> None: # NOTE(Ruibiao): The manually call of clear is required. Because in Python, executor_cache # may not immediately destructed after Executor instance deleted (so does not the _StandaloneExecutor), # that brings errors to mkl-dnn unit tests (see ClearMKLDNNCache in interpretercore.cc for why). @@ -1616,7 +1632,7 @@ def _update_feed(cls, program, feed): TODO(panyx0718): Why ParallelExecutor doesn't have close? ''' - def close(self): + def close(self) -> None: """ Close the executor. This interface is used for distributed training (PServers mode). This executor can not be used after calling the interface, because @@ -1643,7 +1659,7 @@ def close(self): del trainer_instance self._default_executor.close() - def flush(self): + def flush(self) -> None: """ flush all trainer param to root_scope """ @@ -1654,6 +1670,51 @@ def flush(self): del trainer_instance self.trainer_caches.clear() + @overload + def run( + self, + program: Program | CompiledProgram | None = ..., + feed: dict[str, npt.NDArray[Any]] | list[npt.NDArray[Any]] | None = ..., + fetch_list: list[str | Tensor] | None = ..., + feed_var_name: str = ..., + fetch_var_name: str = ..., + scope: core.Scope | None = ..., + return_numpy: Literal[True] = ..., + use_program_cache: bool = ..., + use_prune: bool = ..., + ) -> list[npt.NDArray[Any]]: + ... + + @overload + def run( + self, + program: Program | CompiledProgram | None = ..., + feed: dict[str, npt.NDArray[Any]] | list[npt.NDArray[Any]] | None = ..., + fetch_list: list[str | Tensor] | None = ..., + feed_var_name: str = ..., + fetch_var_name: str = ..., + scope: core.Scope | None = ..., + return_numpy: Literal[False] = ..., + use_program_cache: bool = ..., + use_prune: bool = ..., + ) -> list[Tensor]: + ... + + @overload + def run( + self, + program: Program | CompiledProgram | None = ..., + feed: dict[str, npt.NDArray[Any]] | list[npt.NDArray[Any]] | None = ..., + fetch_list: list[str | Tensor] | None = ..., + feed_var_name: str = ..., + fetch_var_name: str = ..., + scope: core.Scope | None = ..., + return_numpy: bool = ..., + use_program_cache: bool = ..., + use_prune: bool = ..., + ) -> list[Tensor] | list[npt.NDArray[Any]]: + ... + def run( self, program=None, @@ -1741,8 +1802,10 @@ def run( >>> exe.run(paddle.static.default_startup_program()) >>> x = numpy.random.random(size=(10, 1)).astype('float32') - >>> loss_val, array_val = exe.run(feed={'X': x}, - ... fetch_list=[loss.name, array.name]) + >>> loss_val, array_val = exe.run( + ... feed={'X': x}, + ... fetch_list=[loss.name, array.name] # type: ignore[union-attr] + ... ) >>> print(array_val) >>> # doctest: +SKIP("Random output") [array(0.16870381, dtype=float32)] @@ -1771,17 +1834,20 @@ def run( >>> exe.run(paddle.static.default_startup_program()) >>> build_strategy = paddle.static.BuildStrategy() >>> binary = paddle.static.CompiledProgram( - ... paddle.static.default_main_program(), build_strategy=build_strategy) + ... paddle.static.default_main_program(), + ... build_strategy=build_strategy + ... ) >>> batch_size = 6 >>> x = np.random.random(size=(batch_size, 1)).astype('float32') - >>> prediction, = exe.run(binary, - ... feed={'X': x}, - ... fetch_list=[prediction.name]) + >>> prediction, = exe.run( + ... binary, + ... feed={'X': x}, + ... fetch_list=[prediction.name] + ... ) >>> # If the user uses two GPU cards to run this python code, the printed result will be >>> # (6, class_dim). The first dimension value of the printed result is the batch_size. - >>> print("The prediction shape: {}".format( - ... np.array(prediction).shape)) + >>> print("The prediction shape: {}".format(np.array(prediction).shape)) The prediction shape: (6, 2) >>> print(prediction) @@ -2219,7 +2285,7 @@ def _adjust_pipeline_resource(self, pipeline_opt, dataset, pipeline_num): dataset.set_thread(pipeline_opt["concurrency_list"][0] * pipeline_num) return pipeline_num - def split_program_by_device(self, program): + def split_program_by_device(self, program: Program) -> list[int] | None: ops_list = [] type_list = [] pre = None @@ -3098,16 +3164,16 @@ def _run_pipeline( def infer_from_dataset( self, - program=None, - dataset=None, - scope=None, - thread=0, - debug=False, - fetch_list=None, - fetch_info=None, - print_period=100, - fetch_handler=None, - ): + program: Program | CompiledProgram | None = None, + dataset: DatasetBase | None = None, + scope: core.Scope | None = None, + thread: int = 0, + debug: bool = False, + fetch_list: list[Tensor] | None = None, + fetch_info: list[str] | None = None, + print_period: int = 100, + fetch_handler: FetchHandler | None = None, + ) -> None: """ Infer from a pre-defined Dataset. Dataset is defined in paddle.base.dataset. Given a program, either a program or compiled program, infer_from_dataset will @@ -3177,14 +3243,14 @@ def infer_from_dataset( def start_heter_trainer( self, - program=None, - scope=None, - debug=False, - fetch_list=None, - fetch_info=None, - print_period=100, - fetch_handler=None, - ): + program: Program | None = None, + scope: core.Scope | None = None, + debug: bool = False, + fetch_list: list[Tensor] | None = None, + fetch_info: list[str] | None = None, + print_period: int = 100, + fetch_handler: FetchHandler | None = None, + ) -> core.TrainerBase: scope, trainer = self._prepare_trainer( program=program, dataset=None, @@ -3221,16 +3287,16 @@ def start_heter_trainer( def train_from_dataset( self, - program=None, - dataset=None, - scope=None, - thread=0, - debug=False, - fetch_list=None, - fetch_info=None, - print_period=100, - fetch_handler=None, - ): + program: Program | CompiledProgram | None = None, + dataset: DatasetBase | None = None, + scope: core.Scope | None = None, + thread: int = 0, + debug: bool = False, + fetch_list: list[Tensor] | None = None, + fetch_info: list[str] | None = None, + print_period: int = 100, + fetch_handler: FetchHandler | None = None, + ) -> None: """ Train from a pre-defined Dataset. Dataset is defined in paddle.base.dataset. Given a program, either a program or compiled program, train_from_dataset will