From 493624444b3e7a33131768041c6fa1245735cca1 Mon Sep 17 00:00:00 2001 From: stezpy Date: Mon, 7 Aug 2023 15:12:55 +0800 Subject: [PATCH 1/8] add ixrt --- .../iluvatar_configurations.yaml | 7 + .../iluvatar/iluvatar_analysis.py | 14 + .../iluvatar/iluvatar_monitor.py | 256 ++++++++++++++++++ .../docker_images/iluvatar/pytorch/Dockerfile | 61 +++++ .../iluvatar/pytorch/packages/README.md | 7 + .../iluvatar/pytorch/pytorch_install.sh | 25 ++ .../iluvatar/pytorch/sdk_installers/README.md | 5 + inference/inference_engine/iluvatar/ixrt.py | 130 +++++++++ 8 files changed, 505 insertions(+) create mode 100644 inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml create mode 100644 inference/docker_images/iluvatar/iluvatar_analysis.py create mode 100644 inference/docker_images/iluvatar/iluvatar_monitor.py create mode 100644 inference/docker_images/iluvatar/pytorch/Dockerfile create mode 100644 inference/docker_images/iluvatar/pytorch/packages/README.md create mode 100644 inference/docker_images/iluvatar/pytorch/pytorch_install.sh create mode 100644 inference/docker_images/iluvatar/pytorch/sdk_installers/README.md create mode 100644 inference/inference_engine/iluvatar/ixrt.py diff --git a/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml b/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml new file mode 100644 index 000000000..4e63bd183 --- /dev/null +++ b/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml @@ -0,0 +1,7 @@ +ixrt_tmp_path: iluvatar_tmp/resnet50-fp16.engine +has_dynamic_axis: false +repeat: 1 +image_size: 224 +batch_size: 128 +exist_onnx_path: onnxs/resnet50.onnx +# exist_compiler_path: resnet50-fp16.engine \ No newline at end of file diff --git a/inference/docker_images/iluvatar/iluvatar_analysis.py b/inference/docker_images/iluvatar/iluvatar_analysis.py new file mode 100644 index 000000000..26132d19d --- /dev/null +++ b/inference/docker_images/iluvatar/iluvatar_analysis.py @@ -0,0 +1,14 @@ +def analysis_log(logpath): + logfile = open(logpath) + + max_usage = 0.0 + max_mem = 0.0 + for line in logfile.readlines(): + if "MiB" in line: + usage = line.split(" ")[2] + usage = float(usage[:-3]) + max_usage = max(max_usage, usage) + max_mem = line.split(" ")[3] + max_mem = float(max_mem[:-3]) + + return round(max_usage / 1024.0, 2), round(max_mem / 1024.0, 2) diff --git a/inference/docker_images/iluvatar/iluvatar_monitor.py b/inference/docker_images/iluvatar/iluvatar_monitor.py new file mode 100644 index 000000000..ace3d8888 --- /dev/null +++ b/inference/docker_images/iluvatar/iluvatar_monitor.py @@ -0,0 +1,256 @@ +# !/usr/bin/env python3 +# encoding: utf-8 +''' +Usage: python3 sys-monitor.py -o operation -l [log_path] + -o, --operation start|stop|restart|status + -l, --log log path , ./logs/ default +''' + +import os +import sys +import time +import signal +import atexit +import argparse +import datetime +from multiprocessing import Process +import subprocess +import schedule + + +class Daemon: + ''' + daemon subprocess class. + usage: subclass this daemon and override the run() method. + sys-monitor.pid: in the /tmp/, auto del when unexpected exit. + verbose: debug mode, disabled default. + ''' + + def __init__(self, + pid_file, + log_file, + err_file, + gpu_log, + log_path, + rate=5, + stdin=os.devnull, + stdout=os.devnull, + stderr=os.devnull, + home_dir='.', + umask=0o22, + verbose=0): + self.stdin = stdin + self.stdout = stdout + self.stderr = stderr + self.home_dir = home_dir + self.verbose = verbose + self.pidfile = pid_file + self.logfile = log_file + self.errfile = err_file + self.gpufile = gpu_log + self.logpath = log_path + self.rate = rate + self.umask = umask + self.verbose = verbose + self.daemon_alive = True + + def get_pid(self): + try: + with open(self.pidfile, 'r') as pf: + pid = int(pf.read().strip()) + except IOError: + pid = None + except SystemExit: + pid = None + return pid + + def del_pid(self): + if os.path.exists(self.pidfile): + os.remove(self.pidfile) + + def run(self): + ''' + NOTE: override the method in subclass + ''' + + def gpu_mon(file): + TIMESTAMP = datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S') + cmd = "ixsmi |grep 'Default'|awk '{print $3,$5,$9,$11,$13}'" + process = subprocess.Popen(cmd, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + encoding='utf-8') + try: + out = process.communicate(timeout=10) + except subprocess.TimeoutExpired: + process.kill() + out = process.communicate() + + if process.returncode != 0: + result = "error" + result = TIMESTAMP + "\n" + out[0] + "\n" + with open(file, 'a') as f: + f.write(result) + + def timer_gpu_mon(): + gpu_process = Process(target=gpu_mon, args=(self.gpufile, )) + gpu_process.start() + + schedule.every(self.rate).seconds.do(timer_gpu_mon) + while True: + schedule.run_pending() + time.sleep(5) + + def daemonize(self): + if self.verbose >= 1: + print('daemon process starting ...') + try: + pid = os.fork() + if pid > 0: + sys.exit(0) + except OSError as e: + sys.stderr.write('fork #1 failed: %d (%s)\n' % + (e.errno, e.strerror)) + sys.exit(1) + os.chdir(self.home_dir) + os.setsid() + os.umask(self.umask) + try: + pid = os.fork() + if pid > 0: + sys.exit(0) + except OSError as e: + sys.stderr.write('fork #2 failed: %d (%s)\n' % + (e.errno, e.strerror)) + sys.exit(1) + sys.stdout.flush() + sys.stderr.flush() + si = open(self.stdin, 'r') + so = open(self.stdout, 'a+') + if self.stderr: + se = open(self.stderr, 'a+') + else: + se = so + os.dup2(si.fileno(), sys.stdin.fileno()) + os.dup2(so.fileno(), sys.stdout.fileno()) + os.dup2(se.fileno(), sys.stderr.fileno()) + atexit.register(self.del_pid) + pid = str(os.getpid()) + with open(self.pidfile, 'w+') as f: + f.write('%s\n' % pid) + + def start(self): + if not os.path.exists(self.logpath): + os.makedirs(self.logpath) + elif os.path.exists(self.gpufile): + os.remove(self.gpufile) + if self.verbose >= 1: + print('ready to start ......') + # check for a pid file to see if the daemon already runs + pid = self.get_pid() + if pid: + msg = 'pid file %s already exists, is it already running?\n' + sys.stderr.write(msg % self.pidfile) + sys.exit(1) + # start the daemon + self.daemonize() + self.run() + + def stop(self): + if self.verbose >= 1: + print('stopping ...') + pid = self.get_pid() + if not pid: + msg = 'pid file [%s] does not exist. Not running?\n' % self.pidfile + sys.stderr.write(msg) + if os.path.exists(self.pidfile): + os.remove(self.pidfile) + return + # try to kill the daemon process + try: + i = 0 + while 1: + os.kill(pid, signal.SIGTERM) + time.sleep(1) + i = i + 1 + if i % 10 == 0: + os.kill(pid, signal.SIGHUP) + except OSError as err: + err = str(err) + if err.find('No such process') > 0: + if os.path.exists(self.pidfile): + os.remove(self.pidfile) + else: + print(str(err)) + sys.exit(1) + if self.verbose >= 1: + print('Stopped!') + + def restart(self): + self.stop() + self.start() + + def status(self): + pid = self.get_pid() + if pid: + if os.path.exists('/proc/%d' % pid): + return pid + return False + + +def parse_args(): + ''' Check script input parameter. ''' + parse = argparse.ArgumentParser(description='Sys monitor script') + parse.add_argument('-o', + type=str, + metavar='[operation]', + required=True, + help='start|stop|restart|status') + parse.add_argument('-l', + type=str, + metavar='[log_path]', + required=False, + default='./logs/', + help='log path') + args = parse.parse_args() + return args + + +def main(): + sample_rate1 = 5 + args = parse_args() + operation = args.o + log_path = args.l + pid_fn = str('/tmp/gpu_monitor.pid') + log_fn = str(log_path + '/iluvatar_monitor.log') + err_fn = str(log_path + '/iluvatar_monitor.err') + # result for gpu + gpu_fn = str(log_path + '/iluvatar_monitor.log') + + subdaemon = Daemon(pid_fn, + log_fn, + err_fn, + gpu_fn, + log_path, + verbose=1, + rate=sample_rate1) + if operation == 'start': + subdaemon.start() + elif operation == 'stop': + subdaemon.stop() + elif operation == 'restart': + subdaemon.restart() + elif operation == 'status': + pid = subdaemon.status() + if pid: + print('process [%s] is running ......' % pid) + else: + print('daemon process [%s] stopped' % pid) + else: + print("invalid argument!") + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/inference/docker_images/iluvatar/pytorch/Dockerfile b/inference/docker_images/iluvatar/pytorch/Dockerfile new file mode 100644 index 000000000..3e72721cf --- /dev/null +++ b/inference/docker_images/iluvatar/pytorch/Dockerfile @@ -0,0 +1,61 @@ +FROM ubuntu:20.04 + +RUN /bin/bash -c "source /root/.bashrc" + +ENV DEBIAN_FRONTEND=noninteractive +ENV PATH /root/miniconda/bin:$PATH + +RUN sed -i 's#http://archive.ubuntu.com/#http://mirrors.tuna.tsinghua.edu.cn/#' /etc/apt/sources.list +RUN apt-get update -y +RUN apt-get install -y --fix-missing \ + apt-utils \ + sudo \ + openssh-server \ + vim \ + git \ + curl \ + wget \ + tree \ + perl \ + kmod \ + make \ + pciutils \ + build-essential \ + python3.8-dev \ + python3-pip \ + libjpeg-dev \ + zlib1g-dev \ + unzip \ + cmake \ + bzip2 \ + cabextract \ + iputils-ping \ + pbzip2 \ + pv \ + numactl \ + ninja-build \ + libgl1-mesa-dev + + +# Configure anaconda +RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py38_4.10.3-Linux-x86_64.sh && \ + bash ./Miniconda3-py38_4.10.3-Linux-x86_64.sh -b -p /root/miniconda && \ + /root/miniconda/bin/conda clean -tipsy && \ + ln -s /root/miniconda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ + echo ". /root/miniconda/etc/profile.d/conda.sh" >> ~/.bashrc && \ + echo "conda activate base" >> ~/.bashrc && \ + conda config --set always_yes yes --set changeps1 no && \ + echo 'LD_LIBRARY_PATH="/usr/local/corex/lib:${LD_LIBRARY_PATH}"' >> ~/.bashrc && \ + echo 'PATH="/usr/local/corex/bin:${PATH}"' >> ~/.bashrc + + +RUN /bin/bash -c "apt-get install -y linux-headers-`uname -r`" + +RUN /bin/bash -c "pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple" + +ENV LD_LIBRARY_PATH="/usr/local/corex/lib:${LD_LIBRARY_PATH}" +ENV PATH="/usr/local/corex/bin:${PATH}" +ENV NVCC_ARGUMENTS="-U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -ftemplate-depth=1024" + +RUN pip install loguru +RUN pip install pyyaml \ No newline at end of file diff --git a/inference/docker_images/iluvatar/pytorch/packages/README.md b/inference/docker_images/iluvatar/pytorch/packages/README.md new file mode 100644 index 000000000..dca100acb --- /dev/null +++ b/inference/docker_images/iluvatar/pytorch/packages/README.md @@ -0,0 +1,7 @@ +# 以下软件包需联系天数智芯获取 + +ixrt-0.4.0+corex.3.2.0-cp38-cp38-linux_x86_64.whl + +torch-1.13.1+corex.3.2.0-cp38-cp38-linux_x86_64.whl + +torchvision-0.14.1+corex.3.2.0-cp38-cp38-linux_x86_64.whl \ No newline at end of file diff --git a/inference/docker_images/iluvatar/pytorch/pytorch_install.sh b/inference/docker_images/iluvatar/pytorch/pytorch_install.sh new file mode 100644 index 000000000..859591930 --- /dev/null +++ b/inference/docker_images/iluvatar/pytorch/pytorch_install.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +SDK_DIR="/workspace/docker_image/sdk_installers" +PKG_DIR="/workspace/docker_image/packages" + +search_cuda_results=`find ${SDK_DIR} -name "partial_install_cuda_header.tar.gz"` +for installer in $search_cuda_results; do + echo "Install ${installer}" + tar zxvf ${installer} + sh "$(echo $(basename ${installer}) | cut -d . -f1)/install-cuda-header.sh" -- --silent --toolkit + rm -rf "$(echo $(basename ${installer}) | cut -d . -f1)" +done + +search_sdk_results=`find ${SDK_DIR} -name "corex*.run"` +for installer in $search_sdk_results; do + echo "Install ${installer}" + sh "${installer}" -- --silent --driver --toolkit +done + +search_packages_results=`find ${PKG_DIR} -name "*.whl"` +for pkg in $search_packages_results; do + echo "Install ${pkg}" + pip3 install "${pkg}" +done + diff --git a/inference/docker_images/iluvatar/pytorch/sdk_installers/README.md b/inference/docker_images/iluvatar/pytorch/sdk_installers/README.md new file mode 100644 index 000000000..fc9bf8738 --- /dev/null +++ b/inference/docker_images/iluvatar/pytorch/sdk_installers/README.md @@ -0,0 +1,5 @@ +# 以下软件包需联系天数智芯获取 + +corex-installer-linux64-3.2.0-20230718_x86_64_10.2.run + +partial_install_cuda_header.tar.gz \ No newline at end of file diff --git a/inference/inference_engine/iluvatar/ixrt.py b/inference/inference_engine/iluvatar/ixrt.py new file mode 100644 index 000000000..625749235 --- /dev/null +++ b/inference/inference_engine/iluvatar/ixrt.py @@ -0,0 +1,130 @@ +from ixrt import IxRT, RuntimeConfig, RuntimeContext +import torch +import os +import subprocess +from loguru import logger +import numpy as np +import time + + +class InferModel: + + class HostDeviceMem(object): + + def __init__(self, host_mem, device_mem): + self.host = host_mem + self.device = device_mem + + def __str__(self): + return "Host:\n" + str(self.host) + "\nDevice:\n" + str( + self.device) + + def __repr__(self): + return self.__str__() + + def __init__(self, config, onnx_path, model): + self.engine = self.build_engine(config, onnx_path) + self.outputs = self.allocate_buffers(self.engine) + + def config_init_engine(self, config, onnx_path): + quant_file = None + + runtime_config = RuntimeConfig() + + input_shapes = [config.batch_size, 3, config.image_size, config.image_size] + runtime_config.input_shapes = [("input", input_shapes)] + runtime_config.device_idx = 0 + + precision = "float16" + if precision=="int8": + assert quant_file, "Quant file must provided for int8 inferencing." + + runtime_config.runtime_context = RuntimeContext( + precision, + "nhwc", + use_gpu=True, + pipeline_sync=True, + input_types={"input": "float32"}, + output_types={"output": "float32"}, + input_device="gpu", + output_device="gpu", + ) + + runtime = IxRT.from_onnx(onnx_path, quant_file, runtime_config) + return runtime + + def build_engine(self, config, onnx_path): + if config.exist_compiler_path is None: + output_path = config.log_dir + "/" + config.ixrt_tmp_path + + dir_output_path = os.path.dirname(output_path) + os.makedirs(dir_output_path, exist_ok=True) + + time.sleep(10) + + runtime = self.config_init_engine(config, onnx_path) + print(f"Build Engine File: {output_path}") + runtime.BuildEngine() + runtime.SerializeEngine(output_path) + print("Build Engine done!") + else: + output_path = config.exist_compiler_path + print(f"Use existing engine: {output_path}") + + runtime = IxRT() + runtime.LoadEngine(output_path, config.batch_size) + return runtime + + def allocate_buffers(self, engine): + output_map = engine.GetOutputShape() + output_io_buffers = [] + output_types = {} + config = engine.GetConfig() + for key, val in config.runtime_context.output_types.items(): + output_types[key] = str(val) + for name, shape in output_map.items(): + # 1. apply memory buffer for output of the shape + if output_types[name] =="float32": + buffer = np.zeros(shape.dims, dtype=np.float32) + elif output_types[name] =="int32": + buffer = np.zeros(shape.dims, dtype=np.int32) + elif output_types[name] =="float16": + buffer = np.zeros(shape.dims, dtype=np.float16) + else: + raise RuntimeError("need to add a {} datatype of output".format(output_types[name])) + buffer = torch.tensor(buffer).cuda() + # 2. put the buffer to a list + output_io_buffers.append([name, buffer, shape]) + + engine.BindIOBuffers(output_io_buffers) + return output_io_buffers + + def __call__(self, model_inputs: list): + batch_size = np.unique(np.array([i.size(dim=0) for i in model_inputs])) + batch_size = batch_size[0] + input_map = self.engine.GetInputShape() + input_io_buffers = [] + + for i, model_input in enumerate(model_inputs): + model_input = torch.tensor(model_input.numpy(), dtype=torch.float32).cuda() + if not model_input.is_contiguous(): + model_input = model_input.contiguous() + name, shape = list(input_map.items())[0] + _shape, _padding = shape.dims, shape.padding + _shape = [i + j for i, j in zip(_shape, _padding)] + _shape = [_shape[0], *_shape[2:], _shape[1]] + input_io_buffers.append([name, model_input, shape]) + + self.engine.BindIOBuffers(self.outputs) + self.engine.LoadInput(input_io_buffers) + + # torch.cuda.synchronize() + self.engine.Execute() + # torch.cuda.synchronize() + + gpu_io_buffers = [] + for buffer in self.outputs: + # gpu_io_buffers.append([buffer[0], buffer[1], buffer[2]]) + gpu_io_buffers.append(buffer[1].cpu()) + + return gpu_io_buffers, 0 From b8aade6a4b89edcb63ed06b3f304379f48acadbf Mon Sep 17 00:00:00 2001 From: stezpy Date: Mon, 7 Aug 2023 15:16:54 +0800 Subject: [PATCH 2/8] add torch sync --- inference/tools/torch_sync.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/inference/tools/torch_sync.py b/inference/tools/torch_sync.py index 6e5e8b09a..9fb8ac210 100644 --- a/inference/tools/torch_sync.py +++ b/inference/tools/torch_sync.py @@ -4,3 +4,5 @@ def torch_sync(config): if config.vendor == "nvidia": torch.cuda.synchronize() + elif config.vendor == "iluvatar": + torch.cuda.synchronize() \ No newline at end of file From 790edf515a01095b37b2b1730db192d4759b55c8 Mon Sep 17 00:00:00 2001 From: stezpy Date: Mon, 7 Aug 2023 16:06:27 +0800 Subject: [PATCH 3/8] customized input & output --- .../resnet50/vendor_config/iluvatar_configurations.yaml | 4 +++- inference/inference_engine/iluvatar/ixrt.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml b/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml index 4e63bd183..84d9aae89 100644 --- a/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml +++ b/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml @@ -4,4 +4,6 @@ repeat: 1 image_size: 224 batch_size: 128 exist_onnx_path: onnxs/resnet50.onnx -# exist_compiler_path: resnet50-fp16.engine \ No newline at end of file +# exist_compiler_path: resnet50-fp16.engine +output_types: {"output":"float32"} +input_types: {"input": "float32"} \ No newline at end of file diff --git a/inference/inference_engine/iluvatar/ixrt.py b/inference/inference_engine/iluvatar/ixrt.py index 625749235..85b835062 100644 --- a/inference/inference_engine/iluvatar/ixrt.py +++ b/inference/inference_engine/iluvatar/ixrt.py @@ -44,8 +44,8 @@ def config_init_engine(self, config, onnx_path): "nhwc", use_gpu=True, pipeline_sync=True, - input_types={"input": "float32"}, - output_types={"output": "float32"}, + input_types=config.input_types, + output_types=config.output_types, input_device="gpu", output_device="gpu", ) From e63b174da7ea81d405c66962ac4bf4dac067fbca Mon Sep 17 00:00:00 2001 From: stezpy Date: Thu, 10 Aug 2023 16:59:35 +0800 Subject: [PATCH 4/8] merge latest --- inference/benchmarks/resnet50/README.md | 21 +++++++++++++- .../iluvatar_configurations.yaml | 3 +- .../iluvatar/iluvatar_analysis.py | 3 +- .../iluvatar/pytorch/packages/README.md | 2 ++ .../iluvatar/pytorch/sdk_installers/README.md | 2 ++ inference/inference_engine/iluvatar/ixrt.py | 29 +++++++++---------- 6 files changed, 41 insertions(+), 19 deletions(-) diff --git a/inference/benchmarks/resnet50/README.md b/inference/benchmarks/resnet50/README.md index 024b7f417..c92390391 100644 --- a/inference/benchmarks/resnet50/README.md +++ b/inference/benchmarks/resnet50/README.md @@ -60,6 +60,25 @@ find ./val -name "*JPEG" | wc -l - TensorRT 8.5.1.7 - torch_tensorrt 1.3.0 +#### 2.3 Nvidia A100 + +- ##### 硬件环境 + - 机器、加速卡型号: MR-V100 + +- ##### 软件环境 + - OS版本:Ubuntu 18.04 + - OS kernel版本: 5.15.0-78-generic + - 加速卡驱动版本:3.2.0 + - Docker 版本:24.0.4 + - 训练框架版本:torch-1.13.1+corex.3.2.0 + - 依赖软件版本: + - cuda: 10.2 + +- 推理工具包 + + - TensorRT 8.5.1.7 + - torch_tensorrt 1.3.0 + ### 3. 运行情况 * 指标列表 @@ -84,4 +103,4 @@ find ./val -name "*JPEG" | wc -l | tensorrt | fp16 | 256 |613.4 | 1358.9 | 4469.4 | 1391.4 | 12698.7 | 16.8% | 76.2/76.2 | 19.7/40.0 | | tensorrt | fp32 | 256 | 474.4 | 1487.3 | 2653.2 | 1560.3 | 6091.6 | 16.1% | 76.2/76.2 | 28.86/40.0 | | torchtrt | fp16 | 256 | 716.4 | 1370.4 | 4282.6 | 1320.0 | 4723.0 | 6.3% | 76.2/76.2 | 9.42/40.0 | - +| ixrt | fp16 | 256 | 200.3 | | | 276.8 | 1914.3 | 8.2% | 76.2/76.2 | 4.3/32.0 | diff --git a/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml b/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml index 84d9aae89..c721ede09 100644 --- a/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml +++ b/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml @@ -2,8 +2,7 @@ ixrt_tmp_path: iluvatar_tmp/resnet50-fp16.engine has_dynamic_axis: false repeat: 1 image_size: 224 -batch_size: 128 -exist_onnx_path: onnxs/resnet50.onnx +exist_onnx_path: onnxs/resnet50_bs256_pytorch_fp16False.onnx # exist_compiler_path: resnet50-fp16.engine output_types: {"output":"float32"} input_types: {"input": "float32"} \ No newline at end of file diff --git a/inference/docker_images/iluvatar/iluvatar_analysis.py b/inference/docker_images/iluvatar/iluvatar_analysis.py index 26132d19d..77e9ac0a3 100644 --- a/inference/docker_images/iluvatar/iluvatar_analysis.py +++ b/inference/docker_images/iluvatar/iluvatar_analysis.py @@ -11,4 +11,5 @@ def analysis_log(logpath): max_mem = line.split(" ")[3] max_mem = float(max_mem[:-3]) - return round(max_usage / 1024.0, 2), round(max_mem / 1024.0, 2) + return round(max_usage / 1024.0, + 2), round(max_mem / 1024.0, 2), eval("24e12"), eval("96e12") diff --git a/inference/docker_images/iluvatar/pytorch/packages/README.md b/inference/docker_images/iluvatar/pytorch/packages/README.md index dca100acb..88a18b3dc 100644 --- a/inference/docker_images/iluvatar/pytorch/packages/README.md +++ b/inference/docker_images/iluvatar/pytorch/packages/README.md @@ -1,5 +1,7 @@ # 以下软件包需联系天数智芯获取 +>联系邮箱: contact-us@iluvatar.com + ixrt-0.4.0+corex.3.2.0-cp38-cp38-linux_x86_64.whl torch-1.13.1+corex.3.2.0-cp38-cp38-linux_x86_64.whl diff --git a/inference/docker_images/iluvatar/pytorch/sdk_installers/README.md b/inference/docker_images/iluvatar/pytorch/sdk_installers/README.md index fc9bf8738..73564e7c8 100644 --- a/inference/docker_images/iluvatar/pytorch/sdk_installers/README.md +++ b/inference/docker_images/iluvatar/pytorch/sdk_installers/README.md @@ -1,5 +1,7 @@ # 以下软件包需联系天数智芯获取 +>联系邮箱: contact-us@iluvatar.com + corex-installer-linux64-3.2.0-20230718_x86_64_10.2.run partial_install_cuda_header.tar.gz \ No newline at end of file diff --git a/inference/inference_engine/iluvatar/ixrt.py b/inference/inference_engine/iluvatar/ixrt.py index 85b835062..620cc32f3 100644 --- a/inference/inference_engine/iluvatar/ixrt.py +++ b/inference/inference_engine/iluvatar/ixrt.py @@ -16,13 +16,17 @@ def __init__(self, host_mem, device_mem): self.device = device_mem def __str__(self): - return "Host:\n" + str(self.host) + "\nDevice:\n" + str( - self.device) + return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device) def __repr__(self): return self.__str__() def __init__(self, config, onnx_path, model): + self.str_to_numpy_dict = { + "int32": np.int32, + "float16": np.float16, + "float32": np.float32, + } self.engine = self.build_engine(config, onnx_path) self.outputs = self.allocate_buffers(self.engine) @@ -31,12 +35,12 @@ def config_init_engine(self, config, onnx_path): runtime_config = RuntimeConfig() - input_shapes = [config.batch_size, 3, config.image_size, config.image_size] + input_shapes = [config.batch_size, 3, config.image_size, config.image_size] runtime_config.input_shapes = [("input", input_shapes)] runtime_config.device_idx = 0 precision = "float16" - if precision=="int8": + if precision == "int8": assert quant_file, "Quant file must provided for int8 inferencing." runtime_config.runtime_context = RuntimeContext( @@ -77,25 +81,20 @@ def build_engine(self, config, onnx_path): def allocate_buffers(self, engine): output_map = engine.GetOutputShape() - output_io_buffers = [] + output_io_buffers = [] output_types = {} config = engine.GetConfig() for key, val in config.runtime_context.output_types.items(): output_types[key] = str(val) for name, shape in output_map.items(): # 1. apply memory buffer for output of the shape - if output_types[name] =="float32": - buffer = np.zeros(shape.dims, dtype=np.float32) - elif output_types[name] =="int32": - buffer = np.zeros(shape.dims, dtype=np.int32) - elif output_types[name] =="float16": - buffer = np.zeros(shape.dims, dtype=np.float16) - else: - raise RuntimeError("need to add a {} datatype of output".format(output_types[name])) + buffer = np.zeros( + shape.dims, dtype=self.str_to_numpy_dict[output_types[name]] + ) buffer = torch.tensor(buffer).cuda() # 2. put the buffer to a list output_io_buffers.append([name, buffer, shape]) - + engine.BindIOBuffers(output_io_buffers) return output_io_buffers @@ -125,6 +124,6 @@ def __call__(self, model_inputs: list): gpu_io_buffers = [] for buffer in self.outputs: # gpu_io_buffers.append([buffer[0], buffer[1], buffer[2]]) - gpu_io_buffers.append(buffer[1].cpu()) + gpu_io_buffers.append(buffer[1]) return gpu_io_buffers, 0 From 69e2abc7f30b7512039357b3d8154df04283ad96 Mon Sep 17 00:00:00 2001 From: stezpy Date: Thu, 10 Aug 2023 17:09:18 +0800 Subject: [PATCH 5/8] update --- inference/benchmarks/resnet50/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inference/benchmarks/resnet50/README.md b/inference/benchmarks/resnet50/README.md index c92390391..d6dcbe3b5 100644 --- a/inference/benchmarks/resnet50/README.md +++ b/inference/benchmarks/resnet50/README.md @@ -103,4 +103,4 @@ find ./val -name "*JPEG" | wc -l | tensorrt | fp16 | 256 |613.4 | 1358.9 | 4469.4 | 1391.4 | 12698.7 | 16.8% | 76.2/76.2 | 19.7/40.0 | | tensorrt | fp32 | 256 | 474.4 | 1487.3 | 2653.2 | 1560.3 | 6091.6 | 16.1% | 76.2/76.2 | 28.86/40.0 | | torchtrt | fp16 | 256 | 716.4 | 1370.4 | 4282.6 | 1320.0 | 4723.0 | 6.3% | 76.2/76.2 | 9.42/40.0 | -| ixrt | fp16 | 256 | 200.3 | | | 276.8 | 1914.3 | 8.2% | 76.2/76.2 | 4.3/32.0 | +| ixrt | fp16 | 256 | 275.6 | | | 276.8 | 1914.3 | 8.2% | 76.2/76.2 | 4.3/32.0 | From 640b53d17e48e4c33b731ac39f85a045a886be4c Mon Sep 17 00:00:00 2001 From: stezpy Date: Thu, 10 Aug 2023 17:28:21 +0800 Subject: [PATCH 6/8] update readme --- inference/benchmarks/resnet50/README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/inference/benchmarks/resnet50/README.md b/inference/benchmarks/resnet50/README.md index d6dcbe3b5..931a44810 100644 --- a/inference/benchmarks/resnet50/README.md +++ b/inference/benchmarks/resnet50/README.md @@ -60,7 +60,7 @@ find ./val -name "*JPEG" | wc -l - TensorRT 8.5.1.7 - torch_tensorrt 1.3.0 -#### 2.3 Nvidia A100 +#### 2.2 MR-V100 - ##### 硬件环境 - 机器、加速卡型号: MR-V100 @@ -76,8 +76,7 @@ find ./val -name "*JPEG" | wc -l - 推理工具包 - - TensorRT 8.5.1.7 - - torch_tensorrt 1.3.0 + - ixrt-0.4.0+corex.3.2.0 ### 3. 运行情况 @@ -103,4 +102,4 @@ find ./val -name "*JPEG" | wc -l | tensorrt | fp16 | 256 |613.4 | 1358.9 | 4469.4 | 1391.4 | 12698.7 | 16.8% | 76.2/76.2 | 19.7/40.0 | | tensorrt | fp32 | 256 | 474.4 | 1487.3 | 2653.2 | 1560.3 | 6091.6 | 16.1% | 76.2/76.2 | 28.86/40.0 | | torchtrt | fp16 | 256 | 716.4 | 1370.4 | 4282.6 | 1320.0 | 4723.0 | 6.3% | 76.2/76.2 | 9.42/40.0 | -| ixrt | fp16 | 256 | 275.6 | | | 276.8 | 1914.3 | 8.2% | 76.2/76.2 | 4.3/32.0 | +| ixrt | fp16 | 256 | 275.6 | | | 276.8 | 1914.3 | 8.2% | 76.2 | 4.3/32.0 | From 6515344117d8f7bd1cc1b03fb6c5d06dddb96737 Mon Sep 17 00:00:00 2001 From: stezpy Date: Thu, 17 Aug 2023 20:41:43 +0800 Subject: [PATCH 7/8] update readme --- inference/benchmarks/resnet50/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inference/benchmarks/resnet50/README.md b/inference/benchmarks/resnet50/README.md index a6566c073..42fc5149c 100644 --- a/inference/benchmarks/resnet50/README.md +++ b/inference/benchmarks/resnet50/README.md @@ -115,10 +115,10 @@ find ./val -name "*JPEG" | wc -l * 指标值 | 推理工具 | precision | bs | e2e_time | p_val_whole | p_val_core | p_infer_whole | \*p_infer_core | \*MFU | acc | mem | -| ----------- | --------- | ---- | ---- | -------- | ----------- | ---------- | ------------- | ------------ | ----------- | ----------- | ---------- | +| ----------- | --------- | ---- | ---- | -------- | ----------- | ---------- | ------------- | ------------ | ----------- | ----------- | | tensorrt | fp16 | 256 |613.4 | 1358.9 | 4469.4 | 1391.4 | 12698.7 | 16.8% | 76.2/76.2 | 19.7/40.0 | | tensorrt | fp32 | 256 | 474.4 | 1487.3 | 2653.2 | 1560.3 | 6091.6 | 16.1% | 76.2/76.2 | 28.86/40.0 | | torchtrt | fp16 | 256 | 716.4 | 1370.4 | 4282.6 | 1320.0 | 4723.0 | 6.3% | 76.2/76.2 | 9.42/40.0 | -| ixrt | fp16 | 256 | 275.6 | | | 276.8 | 1914.3 | 8.2% | 76.2 | 4.3/32.0 | +| ixrt | fp16 | 256 | 136.4 | / | / | 1146.6 | 2679.9 | 11.5% | 76.2 | 4.3/32.0 | | kunlunxin_xtcl | fp32 | 128 | 311.215 | / | / | 837.507 | 1234.727 | / | 76.2/76.2 | / | From dd16af4edc29e7253d7d9cd9750b3eb6810bccb3 Mon Sep 17 00:00:00 2001 From: stezpy Date: Fri, 18 Aug 2023 09:51:58 +0800 Subject: [PATCH 8/8] update --- inference/benchmarks/resnet50/README.md | 35 +++++++++++++------------ 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/inference/benchmarks/resnet50/README.md b/inference/benchmarks/resnet50/README.md index 42fc5149c..aaf3c14fa 100644 --- a/inference/benchmarks/resnet50/README.md +++ b/inference/benchmarks/resnet50/README.md @@ -60,23 +60,6 @@ find ./val -name "*JPEG" | wc -l - TensorRT 8.5.1.7 - torch_tensorrt 1.3.0 -#### 2.2 MR-V100 - -- ##### 硬件环境 - - 机器、加速卡型号: MR-V100 - -- ##### 软件环境 - - OS版本:Ubuntu 18.04 - - OS kernel版本: 5.15.0-78-generic - - 加速卡驱动版本:3.2.0 - - Docker 版本:24.0.4 - - 训练框架版本:torch-1.13.1+corex.3.2.0 - - 依赖软件版本: - - cuda: 10.2 - -- 推理工具包 - - - ixrt-0.4.0+corex.3.2.0 #### 2.2 昆仑芯R200 - ##### 硬件环境 @@ -95,6 +78,24 @@ find ./val -name "*JPEG" | wc -l - XTCL 2.1 +#### 2.3 天数智芯 MR-V100 + +- ##### 硬件环境 + - 机器、加速卡型号: MR-V100 + +- ##### 软件环境 + - OS版本:Ubuntu 18.04 + - OS kernel版本: 5.15.0-78-generic + - 加速卡驱动版本:3.2.0 + - Docker 版本:24.0.4 + - 训练框架版本:torch-1.13.1+corex.3.2.0 + - 依赖软件版本: + - cuda: 10.2 + +- 推理工具包 + + - IXRT: ixrt-0.4.0+corex.3.2.0 + ### 3. 运行情况 * 指标列表