diff --git a/deploy/python_infer/base.py b/deploy/python_infer/base.py index f129f40ca..05f9b467e 100644 --- a/deploy/python_infer/base.py +++ b/deploy/python_infer/base.py @@ -163,7 +163,7 @@ def _create_paddle_predictor( # enable memory optim config.enable_memory_optim() - config.disable_glog_info() + # config.disable_glog_info() # enable zero copy config.switch_use_feed_fetch_ops(False) config.switch_ir_optim(self.ir_optim) diff --git a/examples/yinglong/conf/yinglong_12.yaml b/examples/yinglong/conf/yinglong_12.yaml new file mode 100644 index 000000000..e2e809448 --- /dev/null +++ b/examples/yinglong/conf/yinglong_12.yaml @@ -0,0 +1,55 @@ +hydra: + run: + # dynamic output directory according to running time and override name + # dir: outputs_yinglong/${now:%Y-%m-%d}/${now:%H-%M-%S}/${hydra.job.override_dirname} + dir: ./outputs_yinglong_12 + job: + name: ${mode} # name of logfile + chdir: false # keep current working direcotry unchaned + config: + override_dirname: + exclude_keys: + - TRAIN.checkpoint_path + - TRAIN.pretrained_model_path + - EVAL.pretrained_model_path + - INFER.pretrained_model_path + - mode + - output_dir + - log_freq + callbacks: + init_callback: + _target_: ppsci.utils.callbacks.InitCallback + sweep: + # output directory for multirun + dir: ${hydra.run.dir} + subdir: ./ + +# general settings +mode: train # running mode: train/eval +seed: 2023 +output_dir: ${hydra:run.dir} +log_freq: 20 + +# inference settings +INFER: + pretrained_model_path: null + export_path: ./inference/yinglong_12 + pdmodel_path: ${INFER.export_path}.pdmodel + pdpiparams_path: ${INFER.export_path}.pdiparams + onnx_path: ${INFER.export_path}.onnx + device: gpu + engine: native + precision: fp32 + ir_optim: false + min_subgraph_size: 30 + gpu_mem: 100 + gpu_id: 0 + max_batch_size: 1 + num_cpu_threads: 10 + batch_size: 1 + mean_path: ./hrrr_example_24vars/stat/mean_crop.npy + std_path: ./hrrr_example_24vars/stat/std_crop.npy + input_file: ./hrrr_example_24vars/valid/2022/01/01.h5 + init_time: 2022/01/01/00 + nwp_file: ./hrrr_example_24vars/nwp_convert/2022/01/01/00.h5 + num_timestamps: 22 diff --git a/examples/yinglong/conf/yinglong_24.yaml b/examples/yinglong/conf/yinglong_24.yaml new file mode 100644 index 000000000..1983c8c94 --- /dev/null +++ b/examples/yinglong/conf/yinglong_24.yaml @@ -0,0 +1,55 @@ +hydra: + run: + # dynamic output directory according to running time and override name + # dir: outputs_yinglong/${now:%Y-%m-%d}/${now:%H-%M-%S}/${hydra.job.override_dirname} + dir: ./outputs_yinglong_24 + job: + name: ${mode} # name of logfile + chdir: false # keep current working direcotry unchaned + config: + override_dirname: + exclude_keys: + - TRAIN.checkpoint_path + - TRAIN.pretrained_model_path + - EVAL.pretrained_model_path + - INFER.pretrained_model_path + - mode + - output_dir + - log_freq + callbacks: + init_callback: + _target_: ppsci.utils.callbacks.InitCallback + sweep: + # output directory for multirun + dir: ${hydra.run.dir} + subdir: ./ + +# general settings +mode: train # running mode: train/eval +seed: 2023 +output_dir: ${hydra:run.dir} +log_freq: 20 + +# inference settings +INFER: + pretrained_model_path: null + export_path: ./inference/yinglong_24 + pdmodel_path: ${INFER.export_path}.pdmodel + pdpiparams_path: ${INFER.export_path}.pdiparams + onnx_path: ${INFER.export_path}.onnx + device: gpu + engine: native + precision: fp32 + ir_optim: false + min_subgraph_size: 30 + gpu_mem: 100 + gpu_id: 0 + max_batch_size: 1 + num_cpu_threads: 10 + batch_size: 1 + mean_path: ./hrrr_example_69vars/stat/mean_crop.npy + std_path: ./hrrr_example_69vars/stat/std_crop.npy + input_file: ./hrrr_example_69vars/valid/2022/01/01.h5 + init_time: 2022/01/01/00 + nwp_file: ./hrrr_example_69vars/nwp_convert/2022/01/01/00.h5 + num_timestamps: 22 diff --git a/examples/yinglong/predict_12layers.py b/examples/yinglong/predict_12layers.py index 02c442371..cb9626695 100644 --- a/examples/yinglong/predict_12layers.py +++ b/examples/yinglong/predict_12layers.py @@ -12,74 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -import argparse from os import path as osp import h5py +import hydra import numpy as np import paddle import pandas as pd +from omegaconf import DictConfig from packaging import version from examples.yinglong.plot import save_plot_weather_from_dict -from examples.yinglong.predictor import YingLong +from examples.yinglong.predictor import YingLongPredictor from ppsci.utils import logger -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument( - "--model_file", - type=str, - default="./yinglong_models/yinglong_12.pdmodel", - help="Model filename", - ) - parser.add_argument( - "--params_file", - type=str, - default="./yinglong_models/yinglong_12.pdiparams", - help="Parameter filename", - ) - parser.add_argument( - "--mean_path", - type=str, - default="./hrrr_example_24vars/stat/mean_crop.npy", - help="Mean filename", - ) - parser.add_argument( - "--std_path", - type=str, - default="./hrrr_example_24vars/stat/std_crop.npy", - help="Standard deviation filename", - ) - parser.add_argument( - "--input_file", - type=str, - default="./hrrr_example_24vars/valid/2022/01/01.h5", - help="Input filename", - ) - parser.add_argument( - "--init_time", type=str, default="2022/01/01/00", help="Init time" - ) - parser.add_argument( - "--nwp_file", - type=str, - default="./hrrr_example_24vars/nwp_convert/2022/01/01/00.h5", - help="NWP filename", - ) - parser.add_argument( - "--num_timestamps", type=int, default=22, help="Number of timestamps" - ) - parser.add_argument( - "--output_path", type=str, default="output", help="Output file path" - ) - - return parser.parse_args() - - -def main(): - args = parse_args() - logger.init_logger("ppsci", osp.join(args.output_path, "predict.log"), "info") +def inference(cfg: DictConfig): # log paddlepaddle's version if version.Version(paddle.__version__) != version.Version("0.0.0"): paddle_version = paddle.__version__ @@ -93,19 +41,20 @@ def main(): logger.info(f"Using paddlepaddle {paddle_version}") - num_timestamps = args.num_timestamps + num_timestamps = cfg.INFER.num_timestamps # create predictor - predictor = YingLong( - args.model_file, args.params_file, args.mean_path, args.std_path - ) + # predictor = YingLong( + # args.model_file, args.params_file, args.mean_path, args.std_path + # ) + predictor = YingLongPredictor(cfg) # load data # HRRR Crop use 24 atmospheric variableļ¼Œtheir index in the dataset is from 0 to 23. # The variable name is 'z50', 'z500', 'z850', 'z1000', 't50', 't500', 't850', 'z1000', # 's50', 's500', 's850', 's1000', 'u50', 'u500', 'u850', 'u1000', 'v50', 'v500', # 'v850', 'v1000', 'mslp', 'u10', 'v10', 't2m'. - input_file = h5py.File(args.input_file, "r")["fields"] - nwp_file = h5py.File(args.nwp_file, "r")["fields"] + input_file = h5py.File(cfg.INFER.input_file, "r")["fields"] + nwp_file = h5py.File(cfg.INFER.nwp_file, "r")["fields"] # input_data.shape: (1, 24, 440, 408) input_data = input_file[0:1] @@ -115,18 +64,18 @@ def main(): ground_truth = input_file[1 : num_timestamps + 1] # create time stamps - cur_time = pd.to_datetime(args.init_time, format="%Y/%m/%d/%H") + cur_time = pd.to_datetime(cfg.INFER.init_time, format="%Y/%m/%d/%H") time_stamps = [[cur_time]] for _ in range(num_timestamps): cur_time += pd.Timedelta(hours=1) time_stamps.append([cur_time]) # run predictor - pred_data = predictor(input_data, time_stamps, nwp_data) + pred_data = predictor.predict(input_data, time_stamps, nwp_data) pred_data = pred_data.squeeze(axis=1) # (num_timestamps, 24, 440, 408) # save predict data - save_path = osp.join(args.output_path, "result.npy") + save_path = osp.join(cfg.output_dir, "result.npy") np.save(save_path, pred_data) logger.info(f"Save output to {save_path}") @@ -139,15 +88,15 @@ def main(): data_dict = {} visu_keys = [] for i in range(num_timestamps): - visu_key = f"Init time: {args.init_time}h\n Ground truth: {i+1}h" + visu_key = f"Init time: {cfg.INFER.init_time}h\n Ground truth: {i+1}h" visu_keys.append(visu_key) data_dict[visu_key] = ground_truth_wind[i] - visu_key = f"Init time: {args.init_time}h\n YingLong-12 Layers: {i+1}h" + visu_key = f"Init time: {cfg.INFER.init_time}h\n YingLong-12 Layers: {i+1}h" visu_keys.append(visu_key) data_dict[visu_key] = pred_wind[i] save_plot_weather_from_dict( - foldername=args.output_path, + foldername=cfg.output_dir, data_dict=data_dict, visu_keys=visu_keys, xticks=np.linspace(0, 407, 7), @@ -159,7 +108,15 @@ def main(): colorbar_label="m/s", num_timestamps=12, # only plot 12 timestamps ) - logger.info(f"Save plot to {args.output_path}") + logger.info(f"Save plot to {cfg.output_dir}") + + +@hydra.main(version_base=None, config_path="./conf", config_name="yinglong_12.yaml") +def main(cfg: DictConfig): + if cfg.mode == "infer": + inference(cfg) + else: + raise ValueError(f"cfg.mode should in ['infer'], but got '{cfg.mode}'") if __name__ == "__main__": diff --git a/examples/yinglong/predict_24layers.py b/examples/yinglong/predict_24layers.py index dbde77c31..ee55c539a 100644 --- a/examples/yinglong/predict_24layers.py +++ b/examples/yinglong/predict_24layers.py @@ -12,74 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -import argparse from os import path as osp import h5py +import hydra import numpy as np import paddle import pandas as pd +from omegaconf import DictConfig from packaging import version from examples.yinglong.plot import save_plot_weather_from_dict -from examples.yinglong.predictor import YingLong +from examples.yinglong.predictor import YingLongPredictor from ppsci.utils import logger -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument( - "--model_file", - type=str, - default="./yinglong_models/yinglong_24.pdmodel", - help="Model filename", - ) - parser.add_argument( - "--params_file", - type=str, - default="./yinglong_models/yinglong_24.pdiparams", - help="Parameter filename", - ) - parser.add_argument( - "--mean_path", - type=str, - default="./hrrr_example_69vars/stat/mean_crop.npy", - help="Mean filename", - ) - parser.add_argument( - "--std_path", - type=str, - default="./hrrr_example_69vars/stat/std_crop.npy", - help="Standard deviation filename", - ) - parser.add_argument( - "--input_file", - type=str, - default="./hrrr_example_69vars/valid/2022/01/01.h5", - help="Input filename", - ) - parser.add_argument( - "--init_time", type=str, default="2022/01/01/00", help="Init time" - ) - parser.add_argument( - "--nwp_file", - type=str, - default="./hrrr_example_69vars/nwp_convert/2022/01/01/00.h5", - help="NWP filename", - ) - parser.add_argument( - "--num_timestamps", type=int, default=22, help="Number of timestamps" - ) - parser.add_argument( - "--output_path", type=str, default="output_24layers", help="Output file path" - ) - - return parser.parse_args() - - -def main(): - args = parse_args() - logger.init_logger("ppsci", osp.join(args.output_path, "predict.log"), "info") +def inference(cfg: DictConfig): # log paddlepaddle's version if version.Version(paddle.__version__) != version.Version("0.0.0"): paddle_version = paddle.__version__ @@ -93,11 +41,9 @@ def main(): logger.info(f"Using paddlepaddle {paddle_version}") - num_timestamps = args.num_timestamps + num_timestamps = cfg.INFER.num_timestamps # create predictor - predictor = YingLong( - args.model_file, args.params_file, args.mean_path, args.std_path - ) + predictor = YingLongPredictor(cfg) # load data # HRRR Crop use 69 atmospheric variableļ¼Œtheir index in the dataset is from 0 to 68. @@ -109,8 +55,8 @@ def main(): # "u700", "u850", "u925", "u1000", "v50", "v100", "v150", "v200", "v250", "v300", # "v400", "v500", "v600", "v700", "v850", "v925", "v1000", "mslp", "u10", "v10", # "t2m", - input_file = h5py.File(args.input_file, "r")["fields"] - nwp_file = h5py.File(args.nwp_file, "r")["fields"] + input_file = h5py.File(cfg.INFER.input_file, "r")["fields"] + nwp_file = h5py.File(cfg.INFER.nwp_file, "r")["fields"] # input_data.shape: (1, 69, 440, 408) input_data = input_file[0:1] @@ -120,18 +66,18 @@ def main(): ground_truth = input_file[1 : num_timestamps + 1] # create time stamps - cur_time = pd.to_datetime(args.init_time, format="%Y/%m/%d/%H") + cur_time = pd.to_datetime(cfg.INFER.init_time, format="%Y/%m/%d/%H") time_stamps = [[cur_time]] for _ in range(num_timestamps): cur_time += pd.Timedelta(hours=1) time_stamps.append([cur_time]) # run predictor - pred_data = predictor(input_data, time_stamps, nwp_data) + pred_data = predictor.predict(input_data, time_stamps, nwp_data) pred_data = pred_data.squeeze(axis=1) # (num_timestamps, 69, 440, 408) # save predict data - save_path = osp.join(args.output_path, "result.npy") + save_path = osp.join(cfg.output_dir, "result.npy") np.save(save_path, pred_data) logger.info(f"Save output to {save_path}") @@ -144,15 +90,15 @@ def main(): data_dict = {} visu_keys = [] for i in range(num_timestamps): - visu_key = f"Init time: {args.init_time}h\n Ground truth: {i+1}h" + visu_key = f"Init time: {cfg.INFER.init_time}h\n Ground truth: {i+1}h" visu_keys.append(visu_key) data_dict[visu_key] = ground_truth_wind[i] - visu_key = f"Init time: {args.init_time}h\n YingLong-24 Layers: {i+1}h" + visu_key = f"Init time: {cfg.INFER.init_time}h\n YingLong-24 Layers: {i+1}h" visu_keys.append(visu_key) data_dict[visu_key] = pred_wind[i] save_plot_weather_from_dict( - foldername=args.output_path, + foldername=cfg.output_dir, data_dict=data_dict, visu_keys=visu_keys, xticks=np.linspace(0, 407, 7), @@ -164,7 +110,15 @@ def main(): colorbar_label="m/s", num_timestamps=12, # only plot 12 timestamps ) - logger.info(f"Save plot to {args.output_path}") + logger.info(f"Save plot to {cfg.output_dir}") + + +@hydra.main(version_base=None, config_path="./conf", config_name="yinglong_24.yaml") +def main(cfg: DictConfig): + if cfg.mode == "infer": + inference(cfg) + else: + raise ValueError(f"cfg.mode should in ['infer'], but got '{cfg.mode}'") if __name__ == "__main__": diff --git a/examples/yinglong/predictor.py b/examples/yinglong/predictor.py index 93b435608..a768ea30a 100644 --- a/examples/yinglong/predictor.py +++ b/examples/yinglong/predictor.py @@ -13,26 +13,44 @@ # limitations under the License. +from typing import List +from typing import Tuple + import numpy as np -import paddle.inference as paddle_infer import pandas as pd +from omegaconf import DictConfig +from deploy.python_infer import base from examples.yinglong.timefeatures import time_features +from ppsci.utils import logger -class YingLong: - def __init__( - self, model_file: str, params_file: str, mean_path: str, std_path: str - ): - self.model_file = model_file - self.params_file = params_file +class YingLongPredictor(base.Predictor): + """General predictor for YingLong model. - config = paddle_infer.Config(model_file, params_file) - config.switch_ir_optim(False) - config.enable_use_gpu(100, 0) - config.enable_memory_optim() + Args: + cfg (DictConfig): Running configuration. + """ - self.predictor = paddle_infer.create_predictor(config) + def __init__( + self, + cfg: DictConfig, + ): + super().__init__( + cfg.INFER.pdmodel_path, + cfg.INFER.pdpiparams_path, + device=cfg.INFER.device, + engine=cfg.INFER.engine, + precision=cfg.INFER.precision, + onnx_path=cfg.INFER.onnx_path, + ir_optim=cfg.INFER.ir_optim, + min_subgraph_size=cfg.INFER.min_subgraph_size, + gpu_mem=cfg.INFER.gpu_mem, + gpu_id=cfg.INFER.gpu_id, + max_batch_size=cfg.INFER.max_batch_size, + num_cpu_threads=cfg.INFER.num_cpu_threads, + ) + self.log_freq = cfg.log_freq # get input names and data handles self.input_names = self.predictor.get_input_names() @@ -45,10 +63,15 @@ def __init__( self.output_handle = self.predictor.get_output_handle(self.output_names[0]) # load mean and std data - self.mean = np.load(mean_path).reshape(-1, 1, 1).astype(np.float32) - self.std = np.load(std_path).reshape(-1, 1, 1).astype(np.float32) - - def _preprocess_data(self, input_data, time_stamps, nwp_data): + self.mean = np.load(cfg.INFER.mean_path).reshape(-1, 1, 1).astype("float32") + self.std = np.load(cfg.INFER.std_path).reshape(-1, 1, 1).astype("float32") + + def _preprocess_data( + self, + input_data: np.ndarray, + time_stamps: List[List[pd.Timestamp]], + nwp_data: np.ndarray, + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: # normalize data input_data = (input_data - self.mean) / self.std nwp_data = (nwp_data - self.mean) / self.std @@ -62,28 +85,76 @@ def _preprocess_data(self, input_data, time_stamps, nwp_data): time_stamps = np.asarray(time_stamps) return input_data, time_stamps, nwp_data - def _postprocess_data(self, data): + def _postprocess_data(self, data: np.ndarray): # denormalize data data = data * self.std + self.mean return data - def __call__(self, input_data, time_stamp, nwp_data): - # preprocess data - input_data, time_stamps, nwp_data = self._preprocess_data( - input_data, time_stamp, nwp_data - ) + def predict( + self, + input_data: np.ndarray, + time_stamps: List[List[pd.Timestamp]], + nwp_data: np.ndarray, + batch_size: int = 1, + ) -> np.ndarray: + """Predicts the output of the yinglong model for the given input. + + Args: + input_data (np.ndarray): Input data of shape (N, T, H, W). + time_stamps (List[List[pd.Timestamp]]): Timestamps data. + nwp_data (np.ndarray): NWP data. + batch_size (int, optional): Batch size, now only support 1. Defaults to 1. + + Returns: + np.ndarray: Prediction. + """ + if batch_size != 1: + raise ValueError( + f"YingLongPredictor only support batch_size=1, but got {batch_size}" + ) + + # prepare input handle(s) + input_handles = { + self.input_names[0]: self.input_data_handle, + self.input_names[1]: self.time_stamps_handle, + self.input_names[2]: self.nwp_data_handle, + } + # prepare output handle(s) + output_handles = {self.output_names[0]: self.output_handle} + + num_samples = len(input_data) + if num_samples != 1: + raise ValueError( + f"YingLongPredictor only support num_samples=1, but got {num_samples}" + ) + + batch_num = 1 + + # inference by batch + for batch_id in range(1, batch_num + 1): + if batch_id % self.log_freq == 0 or batch_id == batch_num: + logger.info(f"Predicting batch {batch_id}/{batch_num}") + + # preprocess data + input_data, time_stamps, nwp_data = self._preprocess_data( + input_data, time_stamps, nwp_data + ) + # prepare batch input dict + batch_input_dict = { + self.input_names[0]: input_data, + self.input_names[1]: time_stamps, + self.input_names[2]: nwp_data, + } - # set input data - self.input_data_handle.copy_from_cpu(input_data) - self.time_stamps_handle.copy_from_cpu(time_stamps) - self.nwp_data_handle.copy_from_cpu(nwp_data) + # send batch input data to input handle(s) + for name, handle in input_handles.items(): + handle.copy_from_cpu(batch_input_dict[name]) - # run predictor - self.predictor.run() + # run predictor + self.predictor.run() - # get predict data - pred_data = self.output_handle.copy_to_cpu() + # receive batch output data from output handle(s) + pred = output_handles[self.output_names[0]].copy_to_cpu() + pred = self._postprocess_data(pred) - # postprocess data - pred_data = self._postprocess_data(pred_data) - return pred_data + return pred