-
Notifications
You must be signed in to change notification settings - Fork 185
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Fea] Support python inference (#773)
* [Doc] Add pretrained model for laplace2d & refine comments (#639) * update laplace2d pretrained model * remove 'after finished training' comment in evaluate function * update README.md * add deploy module for aneurysm * update code * update aneurysm code * update code * update code * update code * update aneurysm document * update export and inference document * fix docstring
- Loading branch information
1 parent
a1ed7a3
commit 988fd33
Showing
13 changed files
with
706 additions
and
226 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. | ||
|
||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
|
||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
""" | ||
deploy module is designed for inference and deployment. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. | ||
|
||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
|
||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,219 @@ | ||
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. | ||
|
||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
|
||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from __future__ import annotations | ||
|
||
import platform | ||
from os import path as osp | ||
from typing import TYPE_CHECKING | ||
from typing import Optional | ||
from typing import Tuple | ||
|
||
from paddle import inference as paddle_inference | ||
from typing_extensions import Literal | ||
|
||
from ppsci.utils import logger | ||
|
||
if TYPE_CHECKING: | ||
import onnxruntime | ||
|
||
|
||
class Predictor: | ||
""" | ||
Initializes the inference engine with the given parameters. | ||
Args: | ||
pdmodel_path (Optional[str]): Path to the PaddlePaddle model file. Defaults to None. | ||
pdpiparams_path (Optional[str]): Path to the PaddlePaddle model parameters file. Defaults to None. | ||
device (Literal["gpu", "cpu", "npu", "xpu"], optional): Device to use for inference. Defaults to "cpu". | ||
engine (Literal["native", "tensorrt", "onnx", "mkldnn"], optional): Inference engine to use. Defaults to "native". | ||
precision (Literal["fp32", "fp16", "int8"], optional): Precision to use for inference. Defaults to "fp32". | ||
onnx_path (Optional[str], optional): Path to the ONNX model file. Defaults to None. | ||
ir_optim (bool, optional): Whether to use IR optimization. Defaults to True. | ||
min_subgraph_size (int, optional): Minimum subgraph size for IR optimization. Defaults to 15. | ||
gpu_mem (int, optional): Initial size of GPU memory pool(MB). Defaults to 500(MB). | ||
gpu_id (int, optional): GPU ID to use. Defaults to 0. | ||
num_cpu_threads (int, optional): Number of CPU threads to use. Defaults to 1. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
pdmodel_path: Optional[str] = None, | ||
pdpiparams_path: Optional[str] = None, | ||
*, | ||
device: Literal["gpu", "cpu", "npu", "xpu"] = "cpu", | ||
engine: Literal["native", "tensorrt", "onnx", "mkldnn"] = "native", | ||
precision: Literal["fp32", "fp16", "int8"] = "fp32", | ||
onnx_path: Optional[str] = None, | ||
ir_optim: bool = True, | ||
min_subgraph_size: int = 15, | ||
gpu_mem: int = 500, | ||
gpu_id: int = 0, | ||
max_batch_size: int = 10, | ||
num_cpu_threads: int = 10, | ||
): | ||
self.pdmodel_path = pdmodel_path | ||
self.pdpiparams_path = pdpiparams_path | ||
|
||
self._check_device(device) | ||
self.device = device | ||
self._check_engine(engine) | ||
self.engine = engine | ||
self._check_precision(precision) | ||
self.precision = precision | ||
|
||
self.onnx_path = onnx_path | ||
self.ir_optim = ir_optim | ||
self.min_subgraph_size = min_subgraph_size | ||
self.gpu_mem = gpu_mem | ||
self.gpu_id = gpu_id | ||
self.max_batch_size = max_batch_size | ||
self.num_cpu_threads = num_cpu_threads | ||
|
||
if self.engine == "onnx": | ||
self.predictor, self.config = self._create_onnx_predictor() | ||
else: | ||
self.predictor, self.config = self._create_paddle_predictor() | ||
|
||
logger.message( | ||
f"Inference with engine: {self.engine}, precision: {self.precision}, " | ||
f"device: {self.device}." | ||
) | ||
|
||
def predict(self, image): | ||
raise NotImplementedError | ||
|
||
def _create_paddle_predictor( | ||
self, | ||
) -> Tuple[paddle_inference.Predictor, paddle_inference.Config]: | ||
if not osp.exists(self.pdmodel_path): | ||
raise FileNotFoundError( | ||
f"Given 'pdmodel_path': {self.pdmodel_path} does not exist. " | ||
"Please check if it is correct." | ||
) | ||
if not osp.exists(self.pdpiparams_path): | ||
raise FileNotFoundError( | ||
f"Given 'pdpiparams_path': {self.pdpiparams_path} does not exist. " | ||
"Please check if it is correct." | ||
) | ||
|
||
config = paddle_inference.Config(self.pdmodel_path, self.pdpiparams_path) | ||
if self.device == "gpu": | ||
config.enable_use_gpu(self.gpu_mem, self.gpu_id) | ||
if self.engine == "tensorrt": | ||
if self.precision == "fp16": | ||
precision = paddle_inference.Config.Precision.Half | ||
elif self.precision == "int8": | ||
precision = paddle_inference.Config.Precision.Int8 | ||
else: | ||
precision = paddle_inference.Config.Precision.Float32 | ||
config.enable_tensorrt_engine( | ||
workspace_size=1 << 30, | ||
precision_mode=precision, | ||
max_batch_size=self.max_batch_size, | ||
min_subgraph_size=self.min_subgraph_size, | ||
use_calib_mode=False, | ||
) | ||
# collect shape | ||
pdmodel_dir = osp.dirname(self.pdmodel_path) | ||
trt_shape_path = osp.join(pdmodel_dir, "trt_dynamic_shape.txt") | ||
|
||
if not osp.exists(trt_shape_path): | ||
config.collect_shape_range_info(trt_shape_path) | ||
logger.info( | ||
f"Save collected dynamic shape info to: {trt_shape_path}" | ||
) | ||
try: | ||
config.enable_tuned_tensorrt_dynamic_shape(trt_shape_path, True) | ||
except Exception as e: | ||
logger.warning(e) | ||
logger.warning( | ||
"TRT dynamic shape is disabled for your paddlepaddle < 2.3.0" | ||
) | ||
|
||
elif self.device == "npu": | ||
config.enable_custom_device("npu") | ||
elif self.device == "xpu": | ||
config.enable_xpu(10 * 1024 * 1024) | ||
else: | ||
config.disable_gpu() | ||
if self.engine == "mkldnn": | ||
# 'set_mkldnn_cache_capatity' is not available on macOS | ||
if platform.system() != "Darwin": | ||
... | ||
# cache 10 different shapes for mkldnn to avoid memory leak | ||
# config.set_mkldnn_cache_capacity(10) | ||
config.enable_mkldnn() | ||
|
||
if self.precision == "fp16": | ||
config.enable_mkldnn_bfloat16() | ||
|
||
config.set_cpu_math_library_num_threads(self.num_cpu_threads) | ||
|
||
# enable memory optim | ||
config.enable_memory_optim() | ||
config.disable_glog_info() | ||
# enable zero copy | ||
config.switch_use_feed_fetch_ops(False) | ||
config.switch_ir_optim(self.ir_optim) | ||
|
||
predictor = paddle_inference.create_predictor(config) | ||
return predictor, config | ||
|
||
def _create_onnx_predictor( | ||
self, | ||
) -> Tuple["onnxruntime.InferenceSession", "onnxruntime.SessionOptions"]: | ||
if not osp.exists(self.onnx_path): | ||
raise FileNotFoundError( | ||
f"Given 'onnx_path' {self.onnx_path} does not exist. " | ||
"Please check if it is correct." | ||
) | ||
|
||
try: | ||
import onnxruntime as ort | ||
except ModuleNotFoundError: | ||
raise ModuleNotFoundError( | ||
"Please install onnxruntime with `pip install onnxruntime`." | ||
) | ||
|
||
# set config for onnx predictor | ||
config = ort.SessionOptions() | ||
config.intra_op_num_threads = self.num_cpu_threads | ||
if self.ir_optim: | ||
config.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL | ||
|
||
# instantiate onnx predictor | ||
predictor = ort.InferenceSession(self.onnx_path, sess_options=config) | ||
return predictor, config | ||
|
||
def _check_device(self, device: str): | ||
if device not in ["gpu", "cpu", "npu", "xpu"]: | ||
raise ValueError( | ||
"Inference only supports 'gpu', 'cpu', 'npu' and 'xpu' devices, " | ||
f"but got {device}." | ||
) | ||
|
||
def _check_engine(self, engine: str): | ||
if engine not in ["native", "tensorrt", "onnx", "mkldnn"]: | ||
raise ValueError( | ||
"Inference only supports 'native', 'tensorrt', 'onnx' and 'mkldnn' " | ||
f"engines, but got {engine}." | ||
) | ||
|
||
def _check_precision(self, precision: str): | ||
if precision not in ["fp32", "fp16", "int8"]: | ||
raise ValueError( | ||
"Inference only supports 'fp32', 'fp16' and 'int8' " | ||
f"precision, but got {precision}." | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. | ||
|
||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
|
||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from typing import Dict | ||
from typing import Union | ||
|
||
import numpy as np | ||
import paddle | ||
from omegaconf import DictConfig | ||
|
||
from deploy.python_infer import base | ||
from ppsci.utils import logger | ||
from ppsci.utils import misc | ||
|
||
|
||
class PINNPredictor(base.Predictor): | ||
"""General predictor for PINN-based models. | ||
Args: | ||
cfg (DictConfig): Running configuration. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
cfg: DictConfig, | ||
): | ||
super().__init__( | ||
cfg.INFER.pdmodel_path, | ||
cfg.INFER.pdpiparams_path, | ||
device=cfg.INFER.device, | ||
engine=cfg.INFER.engine, | ||
precision=cfg.INFER.precision, | ||
onnx_path=cfg.INFER.onnx_path, | ||
ir_optim=cfg.INFER.ir_optim, | ||
min_subgraph_size=cfg.INFER.min_subgraph_size, | ||
gpu_mem=cfg.INFER.gpu_mem, | ||
gpu_id=cfg.INFER.gpu_id, | ||
max_batch_size=cfg.INFER.max_batch_size, | ||
num_cpu_threads=cfg.INFER.num_cpu_threads, | ||
) | ||
self.log_freq = cfg.log_freq | ||
|
||
def predict( | ||
self, | ||
input_dict: Dict[str, Union[np.ndarray, paddle.Tensor]], | ||
batch_size: int = 64, | ||
) -> Dict[str, np.ndarray]: | ||
""" | ||
Predicts the output of the model for the given input. | ||
Args: | ||
input_dict (Dict[str, Union[np.ndarray, paddle.Tensor]]): | ||
A dictionary containing the input data. | ||
batch_size (int, optional): The batch size to use for prediction. | ||
Defaults to 64. | ||
Returns: | ||
Dict[str, np.ndarray]: A dictionary containing the predicted output. | ||
""" | ||
if batch_size > self.max_batch_size: | ||
logger.warning( | ||
f"batch_size({batch_size}) is larger than " | ||
f"max_batch_size({self.max_batch_size}), which may occur error." | ||
) | ||
|
||
# prepare input handle(s) | ||
input_handles = { | ||
name: self.predictor.get_input_handle(name) for name in input_dict | ||
} | ||
# prepare output handle(s) | ||
output_handles = { | ||
name: self.predictor.get_output_handle(name) | ||
for name in self.predictor.get_output_names() | ||
} | ||
|
||
num_samples = len(next(iter(input_dict.values()))) | ||
batch_num = (num_samples + (batch_size - 1)) // batch_size | ||
pred_dict = misc.Prettydefaultdict(list) | ||
|
||
# inference by batch | ||
for batch_id in range(1, batch_num + 1): | ||
if batch_id % self.log_freq == 0 or batch_id == batch_num: | ||
logger.info(f"Predicting batch {batch_id}/{batch_num}") | ||
|
||
# prepare batch input dict | ||
st = (batch_id - 1) * batch_size | ||
ed = min(num_samples, batch_id * batch_size) | ||
batch_input_dict = {key: input_dict[key][st:ed] for key in input_dict} | ||
|
||
# send batch input data to input handle(s) | ||
for name, handle in input_handles.items(): | ||
handle.copy_from_cpu(batch_input_dict[name]) | ||
|
||
# run predictor | ||
self.predictor.run() | ||
|
||
# receive batch output data from output handle(s) | ||
batch_output_dict = { | ||
name: output_handles[name].copy_to_cpu() for name in output_handles | ||
} | ||
|
||
# collect batch output data | ||
for key, batch_output in batch_output_dict.items(): | ||
pred_dict[key].append(batch_output) | ||
|
||
# concatenate local predictions | ||
pred_dict = {key: np.concatenate(value) for key, value in pred_dict.items()} | ||
|
||
return pred_dict |
Oops, something went wrong.