diff --git a/inference/benchmarks/sam_h/README.md b/inference/benchmarks/sam_h/README.md index 02d6cf352..3ab043f48 100644 --- a/inference/benchmarks/sam_h/README.md +++ b/inference/benchmarks/sam_h/README.md @@ -36,6 +36,24 @@ - TensorRT 8.6.1 +#### 2.2 昆仑芯R200 + +- ##### 硬件环境 + - 机器、加速卡型号: R200 + +- ##### 软件环境 + - OS版本:Ubuntu 20.04 + - OS kernel版本: 5.15.0-56-generic + - 加速卡驱动版本:4.0 + - Docker 版本:20.10.21 + - 依赖软件版本: + - pytorch: 1.13.0+cpu + - onnx: 1.14.0 + +- 推理工具包 + + - XTCL 2.0.0.67 + ### 3. 运行情况 * 指标列表 diff --git a/inference/benchmarks/sam_h/pytorch/forward.py b/inference/benchmarks/sam_h/pytorch/forward.py index df61177fa..9ff355c68 100644 --- a/inference/benchmarks/sam_h/pytorch/forward.py +++ b/inference/benchmarks/sam_h/pytorch/forward.py @@ -84,7 +84,6 @@ def engine_forward(model, dataloader, evaluator, config): for step, (x, y, osize, dsize) in enumerate(dataloader): if config.fp16: x = x.to(torch.float16) - y = y.to(torch.float16) torch_sync(config) core_time_start = time.time() @@ -101,7 +100,7 @@ def engine_forward(model, dataloader, evaluator, config): torch_sync(config) core_time += time.time() - core_time_start - pred = pred[0] + pred = pred[1] pred = pred.reshape(config.batch_size, 1, 3, 256, 256).float() pred = pred.cpu() diff --git a/inference/configs/bertLarge/vendor_config/kunlunxin_configurations.yaml b/inference/configs/bertLarge/vendor_config/kunlunxin_configurations.yaml index c29b9c46b..7cb3e921a 100644 --- a/inference/configs/bertLarge/vendor_config/kunlunxin_configurations.yaml +++ b/inference/configs/bertLarge/vendor_config/kunlunxin_configurations.yaml @@ -1,3 +1,4 @@ compiler: xtcl no_validation: true +vm_enable: false exist_onnx_path: onnxs/bertLarge/bertLarge_bs32_pytorch_fp16False.onnx diff --git a/inference/configs/sam_h/vendor_config/kunlunxin_configurations.yaml b/inference/configs/sam_h/vendor_config/kunlunxin_configurations.yaml new file mode 100644 index 000000000..81b04fceb --- /dev/null +++ b/inference/configs/sam_h/vendor_config/kunlunxin_configurations.yaml @@ -0,0 +1,10 @@ +compiler: xtcl +no_validation: true +build_config: + FuseWithoutPattern: + - FuseConv2dTransposeBiasAdd + pattern_match: + - fuse_attention_sam +disabled_pass: + - xgraph_layout_opt +exist_onnx_path: onnxs/sam_h_bs4_pytorch_fp16True.onnx diff --git a/inference/inference_engine/kunlunxin/xtcl.py b/inference/inference_engine/kunlunxin/xtcl.py index 5e38a7e41..eb31dfe06 100755 --- a/inference/inference_engine/kunlunxin/xtcl.py +++ b/inference/inference_engine/kunlunxin/xtcl.py @@ -1,33 +1,28 @@ +import os +import time + import onnx +import torch import tvm import tvm.relay as relay -from tvm.contrib.download import download_testdata -from tvm.relay import param_dict from tvm.contrib import graph_executor, xpu_config +from tvm.relay.xpu.patterns import custom_fuse_patterns from tvm.runtime.vm import VirtualMachine -import torch -import os -import subprocess -from loguru import logger -import numpy as np -import time -USE_VM_COMPILE = False class InferModel: - def __init__(self, config , onnx_path, model): + def __init__(self, config, onnx_path, model): self.input_names = [] self.engine = self.build_engine(config, onnx_path) + self.vm_enable = True def build_engine(self, config, onnx_path): onnx_model = onnx.load(onnx_path) shape_dict = {} - for input in onnx_model.graph.input: - input_shape = input.type.tensor_type.shape.dim - input_shape = [a.dim_value for a in input_shape] - #input_shape[0] = config.batch_size - input_name = input.name #'inputs:0' + for inp in onnx_model.graph.input: + input_name, input_shape, _, _ = relay.frontend.onnx.get_info(inp) + input_shape[0] = config.batch_size self.input_names.append(input_name) shape_dict[input_name] = input_shape @@ -35,56 +30,51 @@ def build_engine(self, config, onnx_path): target_host = f'llvm -acc=xpu{os.environ.get("XPUSIM_DEVICE_MODEL", "KUNLUN1")[-1]}' ctx = tvm.device("xpu", 0) - build_config = { - } + build_config = config.build_config if 'build_config' in config._fields else {} + disabled_pass = config.disabled_pass if 'disabled_pass' in config._fields else [] + self.vm_enable = config.vm_enable if 'vm_enable' in config._fields else True + if "pattern_match" in build_config: + build_config["XPUFuzzyMatch"] = xpu_config.XPUGraphMatchConfig( + pattern_match=build_config["pattern_match"]).value() + del build_config["pattern_match"] #os.environ["XTCL_BUILD_DEBUG"] = '1' if config.resnet50_fuse: os.environ["XTCL_FUSE_RES50V15"] = '1' if config.fp16 == True: os.environ["XTCL_USE_NEW_ALTER_PASS"] = '1' - input_fp16 = { name:"float16" for name in self.input_names} build_config["XPUOutDtypeConfig"] = xpu_config.XPUOutDtypeConfig( - default_precision="float16", - config_last_node=True, - config_map={ - }, - config_var_dtype_map=input_fp16, - ).value() + default_precision="float16", + config_last_node=True, + config_map={}, + ).value() else: ## fp32 os.environ["XTCL_USE_NEW_ALTER_PASS"] = '1' os.environ['XTCL_USE_FP16'] = '1' os.environ['XTCL_QUANTIZE_WEIGHT'] = '1' - with tvm.transform.PassContext(opt_level=3, config=build_config): - if USE_VM_COMPILE: - vm_exec = relay.backend.vm.compile(mod, - target=target_host, - target_host=target_host, - params=params) - + with tvm.transform.PassContext(opt_level=3, config=build_config, disabled_pass=disabled_pass): + if self.vm_enable: + vm_exec = relay.backend.vm.compile(mod, target=target_host, target_host=target_host, params=params) vm = VirtualMachine(vm_exec, ctx) return vm else: graph, lib, params = relay.build(mod, - target="xpu -libs=xdnn -split-device-funcs -device-type=xpu2", - params=params) + target="xpu -libs=xdnn -split-device-funcs -device-type=xpu2", + params=params) m = graph_executor.create(graph, lib, ctx) m.set_input(**params) return m def __call__(self, model_inputs: list): for index, input_name in enumerate(self.input_names): - if USE_VM_COMPILE: - self.engine.set_one_input("main",input_name, model_inputs[index].numpy()) + if self.vm_enable: + self.engine.set_one_input("main", input_name, model_inputs[index].numpy()) else: - self.engine.set_input(input_name, model_inputs[index].numpy()) + self.engine.set_input(input_name, tvm.nd.array(model_inputs[index])) self.engine.run() foo_time_start = time.time() output_list = [self.engine.get_output(i) for i in range(self.engine.get_num_outputs())] # d2h - output_list = [torch.from_numpy(output.asnumpy()) for output in output_list] + output_list = [torch.from_numpy(output.numpy()) for output in output_list] foo_time = time.time() - foo_time_start return output_list, foo_time - - -