Skip to content

Commit

Permalink
kunlunxin sam_h (#244)
Browse files Browse the repository at this point in the history
  • Loading branch information
liquanfeng authored Sep 25, 2023
1 parent 6b0ae6c commit 98d85df
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 42 deletions.
18 changes: 18 additions & 0 deletions inference/benchmarks/sam_h/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,24 @@

- TensorRT 8.6.1

#### 2.2 昆仑芯R200

- ##### 硬件环境
- 机器、加速卡型号: R200

- ##### 软件环境
- OS版本:Ubuntu 20.04
- OS kernel版本: 5.15.0-56-generic
- 加速卡驱动版本:4.0
- Docker 版本:20.10.21
- 依赖软件版本:
- pytorch: 1.13.0+cpu
- onnx: 1.14.0

- 推理工具包

- XTCL 2.0.0.67

### 3. 运行情况

* 指标列表
Expand Down
3 changes: 1 addition & 2 deletions inference/benchmarks/sam_h/pytorch/forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ def engine_forward(model, dataloader, evaluator, config):
for step, (x, y, osize, dsize) in enumerate(dataloader):
if config.fp16:
x = x.to(torch.float16)
y = y.to(torch.float16)
torch_sync(config)
core_time_start = time.time()

Expand All @@ -101,7 +100,7 @@ def engine_forward(model, dataloader, evaluator, config):
torch_sync(config)
core_time += time.time() - core_time_start

pred = pred[0]
pred = pred[1]
pred = pred.reshape(config.batch_size, 1, 3, 256, 256).float()
pred = pred.cpu()

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
compiler: xtcl
no_validation: true
vm_enable: false
exist_onnx_path: onnxs/bertLarge/bertLarge_bs32_pytorch_fp16False.onnx
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
compiler: xtcl
no_validation: true
build_config:
FuseWithoutPattern:
- FuseConv2dTransposeBiasAdd
pattern_match:
- fuse_attention_sam
disabled_pass:
- xgraph_layout_opt
exist_onnx_path: onnxs/sam_h_bs4_pytorch_fp16True.onnx
70 changes: 30 additions & 40 deletions inference/inference_engine/kunlunxin/xtcl.py
Original file line number Diff line number Diff line change
@@ -1,90 +1,80 @@
import os
import time

import onnx
import torch
import tvm
import tvm.relay as relay
from tvm.contrib.download import download_testdata
from tvm.relay import param_dict
from tvm.contrib import graph_executor, xpu_config
from tvm.relay.xpu.patterns import custom_fuse_patterns
from tvm.runtime.vm import VirtualMachine
import torch
import os
import subprocess
from loguru import logger
import numpy as np
import time

USE_VM_COMPILE = False

class InferModel:

def __init__(self, config , onnx_path, model):
def __init__(self, config, onnx_path, model):
self.input_names = []
self.engine = self.build_engine(config, onnx_path)
self.vm_enable = True

def build_engine(self, config, onnx_path):
onnx_model = onnx.load(onnx_path)
shape_dict = {}
for input in onnx_model.graph.input:
input_shape = input.type.tensor_type.shape.dim
input_shape = [a.dim_value for a in input_shape]
#input_shape[0] = config.batch_size
input_name = input.name #'inputs:0'
for inp in onnx_model.graph.input:
input_name, input_shape, _, _ = relay.frontend.onnx.get_info(inp)
input_shape[0] = config.batch_size
self.input_names.append(input_name)
shape_dict[input_name] = input_shape

mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)

target_host = f'llvm -acc=xpu{os.environ.get("XPUSIM_DEVICE_MODEL", "KUNLUN1")[-1]}'
ctx = tvm.device("xpu", 0)
build_config = {
}
build_config = config.build_config if 'build_config' in config._fields else {}
disabled_pass = config.disabled_pass if 'disabled_pass' in config._fields else []
self.vm_enable = config.vm_enable if 'vm_enable' in config._fields else True
if "pattern_match" in build_config:
build_config["XPUFuzzyMatch"] = xpu_config.XPUGraphMatchConfig(
pattern_match=build_config["pattern_match"]).value()
del build_config["pattern_match"]
#os.environ["XTCL_BUILD_DEBUG"] = '1'
if config.resnet50_fuse:
os.environ["XTCL_FUSE_RES50V15"] = '1'
if config.fp16 == True:
os.environ["XTCL_USE_NEW_ALTER_PASS"] = '1'
input_fp16 = { name:"float16" for name in self.input_names}
build_config["XPUOutDtypeConfig"] = xpu_config.XPUOutDtypeConfig(
default_precision="float16",
config_last_node=True,
config_map={
},
config_var_dtype_map=input_fp16,
).value()
default_precision="float16",
config_last_node=True,
config_map={},
).value()
else: ## fp32
os.environ["XTCL_USE_NEW_ALTER_PASS"] = '1'
os.environ['XTCL_USE_FP16'] = '1'
os.environ['XTCL_QUANTIZE_WEIGHT'] = '1'

with tvm.transform.PassContext(opt_level=3, config=build_config):
if USE_VM_COMPILE:
vm_exec = relay.backend.vm.compile(mod,
target=target_host,
target_host=target_host,
params=params)

with tvm.transform.PassContext(opt_level=3, config=build_config, disabled_pass=disabled_pass):
if self.vm_enable:
vm_exec = relay.backend.vm.compile(mod, target=target_host, target_host=target_host, params=params)
vm = VirtualMachine(vm_exec, ctx)
return vm
else:
graph, lib, params = relay.build(mod,
target="xpu -libs=xdnn -split-device-funcs -device-type=xpu2",
params=params)
target="xpu -libs=xdnn -split-device-funcs -device-type=xpu2",
params=params)
m = graph_executor.create(graph, lib, ctx)
m.set_input(**params)
return m

def __call__(self, model_inputs: list):
for index, input_name in enumerate(self.input_names):
if USE_VM_COMPILE:
self.engine.set_one_input("main",input_name, model_inputs[index].numpy())
if self.vm_enable:
self.engine.set_one_input("main", input_name, model_inputs[index].numpy())
else:
self.engine.set_input(input_name, model_inputs[index].numpy())
self.engine.set_input(input_name, tvm.nd.array(model_inputs[index]))
self.engine.run()
foo_time_start = time.time()
output_list = [self.engine.get_output(i) for i in range(self.engine.get_num_outputs())]
# d2h
output_list = [torch.from_numpy(output.asnumpy()) for output in output_list]
output_list = [torch.from_numpy(output.numpy()) for output in output_list]
foo_time = time.time() - foo_time_start
return output_list, foo_time



0 comments on commit 98d85df

Please sign in to comment.