[Fea] Support python inference (PaddlePaddle#773)

* [Doc] Add pretrained model for laplace2d & refine comments (PaddlePaddle#639) * update laplace2d pretrained model * remove 'after finished training' comment in evaluate function * update README.md * add deploy module for aneurysm * update code * update aneurysm code * update code * update code * update code * update aneurysm document * update export and inference document * fix docstring
huohuohuohuohuo123 · Feb 4, 2024 · 8d756ce · 8d756ce
1 parent 7cabf19
commit 8d756ce
Show file tree

Hide file tree

Showing 13 changed files with 706 additions and 226 deletions.
diff --git a/deploy/__init__.py b/deploy/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+deploy module is designed for inference and deployment.
+"""
diff --git a/deploy/python_infer/__init__.py b/deploy/python_infer/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/deploy/python_infer/base.py b/deploy/python_infer/base.py
@@ -0,0 +1,219 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import platform
+from os import path as osp
+from typing import TYPE_CHECKING
+from typing import Optional
+from typing import Tuple
+
+from paddle import inference as paddle_inference
+from typing_extensions import Literal
+
+from ppsci.utils import logger
+
+if TYPE_CHECKING:
+    import onnxruntime
+
+
+class Predictor:
+    """
+    Initializes the inference engine with the given parameters.
+
+    Args:
+        pdmodel_path (Optional[str]): Path to the PaddlePaddle model file. Defaults to None.
+        pdpiparams_path (Optional[str]): Path to the PaddlePaddle model parameters file. Defaults to None.
+        device (Literal["gpu", "cpu", "npu", "xpu"], optional): Device to use for inference. Defaults to "cpu".
+        engine (Literal["native", "tensorrt", "onnx", "mkldnn"], optional): Inference engine to use. Defaults to "native".
+        precision (Literal["fp32", "fp16", "int8"], optional): Precision to use for inference. Defaults to "fp32".
+        onnx_path (Optional[str], optional): Path to the ONNX model file. Defaults to None.
+        ir_optim (bool, optional): Whether to use IR optimization. Defaults to True.
+        min_subgraph_size (int, optional): Minimum subgraph size for IR optimization. Defaults to 15.
+        gpu_mem (int, optional): Initial size of GPU memory pool(MB). Defaults to 500(MB).
+        gpu_id (int, optional): GPU ID to use. Defaults to 0.
+        num_cpu_threads (int, optional): Number of CPU threads to use. Defaults to 1.
+    """
+
+    def __init__(
+        self,
+        pdmodel_path: Optional[str] = None,
+        pdpiparams_path: Optional[str] = None,
+        *,
+        device: Literal["gpu", "cpu", "npu", "xpu"] = "cpu",
+        engine: Literal["native", "tensorrt", "onnx", "mkldnn"] = "native",
+        precision: Literal["fp32", "fp16", "int8"] = "fp32",
+        onnx_path: Optional[str] = None,
+        ir_optim: bool = True,
+        min_subgraph_size: int = 15,
+        gpu_mem: int = 500,
+        gpu_id: int = 0,
+        max_batch_size: int = 10,
+        num_cpu_threads: int = 10,
+    ):
+        self.pdmodel_path = pdmodel_path
+        self.pdpiparams_path = pdpiparams_path
+
+        self._check_device(device)
+        self.device = device
+        self._check_engine(engine)
+        self.engine = engine
+        self._check_precision(precision)
+        self.precision = precision
+
+        self.onnx_path = onnx_path
+        self.ir_optim = ir_optim
+        self.min_subgraph_size = min_subgraph_size
+        self.gpu_mem = gpu_mem
+        self.gpu_id = gpu_id
+        self.max_batch_size = max_batch_size
+        self.num_cpu_threads = num_cpu_threads
+
+        if self.engine == "onnx":
+            self.predictor, self.config = self._create_onnx_predictor()
+        else:
+            self.predictor, self.config = self._create_paddle_predictor()
+
+        logger.message(
+            f"Inference with engine: {self.engine}, precision: {self.precision}, "
+            f"device: {self.device}."
+        )
+
+    def predict(self, image):
+        raise NotImplementedError
+
+    def _create_paddle_predictor(
+        self,
+    ) -> Tuple[paddle_inference.Predictor, paddle_inference.Config]:
+        if not osp.exists(self.pdmodel_path):
+            raise FileNotFoundError(
+                f"Given 'pdmodel_path': {self.pdmodel_path} does not exist. "
+                "Please check if it is correct."
+            )
+        if not osp.exists(self.pdpiparams_path):
+            raise FileNotFoundError(
+                f"Given 'pdpiparams_path': {self.pdpiparams_path} does not exist. "
+                "Please check if it is correct."
+            )
+
+        config = paddle_inference.Config(self.pdmodel_path, self.pdpiparams_path)
+        if self.device == "gpu":
+            config.enable_use_gpu(self.gpu_mem, self.gpu_id)
+            if self.engine == "tensorrt":
+                if self.precision == "fp16":
+                    precision = paddle_inference.Config.Precision.Half
+                elif self.precision == "int8":
+                    precision = paddle_inference.Config.Precision.Int8
+                else:
+                    precision = paddle_inference.Config.Precision.Float32
+                config.enable_tensorrt_engine(
+                    workspace_size=1 << 30,
+                    precision_mode=precision,
+                    max_batch_size=self.max_batch_size,
+                    min_subgraph_size=self.min_subgraph_size,
+                    use_calib_mode=False,
+                )
+                # collect shape
+                pdmodel_dir = osp.dirname(self.pdmodel_path)
+                trt_shape_path = osp.join(pdmodel_dir, "trt_dynamic_shape.txt")
+
+                if not osp.exists(trt_shape_path):
+                    config.collect_shape_range_info(trt_shape_path)
+                    logger.info(
+                        f"Save collected dynamic shape info to: {trt_shape_path}"
+                    )
+                try:
+                    config.enable_tuned_tensorrt_dynamic_shape(trt_shape_path, True)
+                except Exception as e:
+                    logger.warning(e)
+                    logger.warning(
+                        "TRT dynamic shape is disabled for your paddlepaddle < 2.3.0"
+                    )
+
+        elif self.device == "npu":
+            config.enable_custom_device("npu")
+        elif self.device == "xpu":
+            config.enable_xpu(10 * 1024 * 1024)
+        else:
+            config.disable_gpu()
+            if self.engine == "mkldnn":
+                # 'set_mkldnn_cache_capatity' is not available on macOS
+                if platform.system() != "Darwin":
+                    ...
+                    # cache 10 different shapes for mkldnn to avoid memory leak
+                    # config.set_mkldnn_cache_capacity(10)
+                config.enable_mkldnn()
+
+                if self.precision == "fp16":
+                    config.enable_mkldnn_bfloat16()
+
+                config.set_cpu_math_library_num_threads(self.num_cpu_threads)
+
+        # enable memory optim
+        config.enable_memory_optim()
+        config.disable_glog_info()
+        # enable zero copy
+        config.switch_use_feed_fetch_ops(False)
+        config.switch_ir_optim(self.ir_optim)
+
+        predictor = paddle_inference.create_predictor(config)
+        return predictor, config
+
+    def _create_onnx_predictor(
+        self,
+    ) -> Tuple["onnxruntime.InferenceSession", "onnxruntime.SessionOptions"]:
+        if not osp.exists(self.onnx_path):
+            raise FileNotFoundError(
+                f"Given 'onnx_path' {self.onnx_path} does not exist. "
+                "Please check if it is correct."
+            )
+
+        try:
+            import onnxruntime as ort
+        except ModuleNotFoundError:
+            raise ModuleNotFoundError(
+                "Please install onnxruntime with `pip install onnxruntime`."
+            )
+
+        # set config for onnx predictor
+        config = ort.SessionOptions()
+        config.intra_op_num_threads = self.num_cpu_threads
+        if self.ir_optim:
+            config.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+
+        # instantiate onnx predictor
+        predictor = ort.InferenceSession(self.onnx_path, sess_options=config)
+        return predictor, config
+
+    def _check_device(self, device: str):
+        if device not in ["gpu", "cpu", "npu", "xpu"]:
+            raise ValueError(
+                "Inference only supports 'gpu', 'cpu', 'npu' and 'xpu' devices, "
+                f"but got {device}."
+            )
+
+    def _check_engine(self, engine: str):
+        if engine not in ["native", "tensorrt", "onnx", "mkldnn"]:
+            raise ValueError(
+                "Inference only supports 'native', 'tensorrt', 'onnx' and 'mkldnn' "
+                f"engines, but got {engine}."
+            )
+
+    def _check_precision(self, precision: str):
+        if precision not in ["fp32", "fp16", "int8"]:
+            raise ValueError(
+                "Inference only supports 'fp32', 'fp16' and 'int8' "
+                f"precision, but got {precision}."
+            )
diff --git a/deploy/python_infer/pinn_predictor.py b/deploy/python_infer/pinn_predictor.py
@@ -0,0 +1,120 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict
+from typing import Union
+
+import numpy as np
+import paddle
+from omegaconf import DictConfig
+
+from deploy.python_infer import base
+from ppsci.utils import logger
+from ppsci.utils import misc
+
+
+class PINNPredictor(base.Predictor):
+    """General predictor for PINN-based models.
+
+    Args:
+        cfg (DictConfig): Running configuration.
+    """
+
+    def __init__(
+        self,
+        cfg: DictConfig,
+    ):
+        super().__init__(
+            cfg.INFER.pdmodel_path,
+            cfg.INFER.pdpiparams_path,
+            device=cfg.INFER.device,
+            engine=cfg.INFER.engine,
+            precision=cfg.INFER.precision,
+            onnx_path=cfg.INFER.onnx_path,
+            ir_optim=cfg.INFER.ir_optim,
+            min_subgraph_size=cfg.INFER.min_subgraph_size,
+            gpu_mem=cfg.INFER.gpu_mem,
+            gpu_id=cfg.INFER.gpu_id,
+            max_batch_size=cfg.INFER.max_batch_size,
+            num_cpu_threads=cfg.INFER.num_cpu_threads,
+        )
+        self.log_freq = cfg.log_freq
+
+    def predict(
+        self,
+        input_dict: Dict[str, Union[np.ndarray, paddle.Tensor]],
+        batch_size: int = 64,
+    ) -> Dict[str, np.ndarray]:
+        """
+        Predicts the output of the model for the given input.
+
+        Args:
+            input_dict (Dict[str, Union[np.ndarray, paddle.Tensor]]):
+                A dictionary containing the input data.
+            batch_size (int, optional): The batch size to use for prediction.
+                Defaults to 64.
+
+        Returns:
+            Dict[str, np.ndarray]: A dictionary containing the predicted output.
+        """
+        if batch_size > self.max_batch_size:
+            logger.warning(
+                f"batch_size({batch_size}) is larger than "
+                f"max_batch_size({self.max_batch_size}), which may occur error."
+            )
+
+        # prepare input handle(s)
+        input_handles = {
+            name: self.predictor.get_input_handle(name) for name in input_dict
+        }
+        # prepare output handle(s)
+        output_handles = {
+            name: self.predictor.get_output_handle(name)
+            for name in self.predictor.get_output_names()
+        }
+
+        num_samples = len(next(iter(input_dict.values())))
+        batch_num = (num_samples + (batch_size - 1)) // batch_size
+        pred_dict = misc.Prettydefaultdict(list)
+
+        # inference by batch
+        for batch_id in range(1, batch_num + 1):
+            if batch_id % self.log_freq == 0 or batch_id == batch_num:
+                logger.info(f"Predicting batch {batch_id}/{batch_num}")
+
+            # prepare batch input dict
+            st = (batch_id - 1) * batch_size
+            ed = min(num_samples, batch_id * batch_size)
+            batch_input_dict = {key: input_dict[key][st:ed] for key in input_dict}
+
+            # send batch input data to input handle(s)
+            for name, handle in input_handles.items():
+                handle.copy_from_cpu(batch_input_dict[name])
+
+            # run predictor
+            self.predictor.run()
+
+            # receive batch output data from output handle(s)
+            batch_output_dict = {
+                name: output_handles[name].copy_to_cpu() for name in output_handles
+            }
+
+            # collect batch output data
+            for key, batch_output in batch_output_dict.items():
+                pred_dict[key].append(batch_output)
+
+        # concatenate local predictions
+        pred_dict = {key: np.concatenate(value) for key, value in pred_dict.items()}
+
+        return pred_dict