From 493624444b3e7a33131768041c6fa1245735cca1 Mon Sep 17 00:00:00 2001
From: stezpy <stezpy@gmail.com>
Date: Mon, 7 Aug 2023 15:12:55 +0800
Subject: [PATCH 1/8] add ixrt

---
 .../iluvatar_configurations.yaml              |   7 +
 .../iluvatar/iluvatar_analysis.py             |  14 +
 .../iluvatar/iluvatar_monitor.py              | 256 ++++++++++++++++++
 .../docker_images/iluvatar/pytorch/Dockerfile |  61 +++++
 .../iluvatar/pytorch/packages/README.md       |   7 +
 .../iluvatar/pytorch/pytorch_install.sh       |  25 ++
 .../iluvatar/pytorch/sdk_installers/README.md |   5 +
 inference/inference_engine/iluvatar/ixrt.py   | 130 +++++++++
 8 files changed, 505 insertions(+)
 create mode 100644 inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml
 create mode 100644 inference/docker_images/iluvatar/iluvatar_analysis.py
 create mode 100644 inference/docker_images/iluvatar/iluvatar_monitor.py
 create mode 100644 inference/docker_images/iluvatar/pytorch/Dockerfile
 create mode 100644 inference/docker_images/iluvatar/pytorch/packages/README.md
 create mode 100644 inference/docker_images/iluvatar/pytorch/pytorch_install.sh
 create mode 100644 inference/docker_images/iluvatar/pytorch/sdk_installers/README.md
 create mode 100644 inference/inference_engine/iluvatar/ixrt.py

diff --git a/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml b/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml
new file mode 100644
index 000000000..4e63bd183
--- /dev/null
+++ b/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml
@@ -0,0 +1,7 @@
+ixrt_tmp_path: iluvatar_tmp/resnet50-fp16.engine
+has_dynamic_axis: false
+repeat: 1
+image_size: 224
+batch_size: 128
+exist_onnx_path: onnxs/resnet50.onnx
+# exist_compiler_path: resnet50-fp16.engine
\ No newline at end of file
diff --git a/inference/docker_images/iluvatar/iluvatar_analysis.py b/inference/docker_images/iluvatar/iluvatar_analysis.py
new file mode 100644
index 000000000..26132d19d
--- /dev/null
+++ b/inference/docker_images/iluvatar/iluvatar_analysis.py
@@ -0,0 +1,14 @@
+def analysis_log(logpath):
+    logfile = open(logpath)
+
+    max_usage = 0.0
+    max_mem = 0.0
+    for line in logfile.readlines():
+        if "MiB" in line:
+            usage = line.split(" ")[2]
+            usage = float(usage[:-3])
+            max_usage = max(max_usage, usage)
+            max_mem = line.split(" ")[3]
+            max_mem = float(max_mem[:-3])
+
+    return round(max_usage / 1024.0, 2), round(max_mem / 1024.0, 2)
diff --git a/inference/docker_images/iluvatar/iluvatar_monitor.py b/inference/docker_images/iluvatar/iluvatar_monitor.py
new file mode 100644
index 000000000..ace3d8888
--- /dev/null
+++ b/inference/docker_images/iluvatar/iluvatar_monitor.py
@@ -0,0 +1,256 @@
+# ！/usr/bin/env python3
+# encoding: utf-8
+'''
+Usage:  python3 sys-monitor.py -o operation -l [log_path]
+            -o, --operation     start|stop|restart|status
+            -l, --log           log path , ./logs/ default
+'''
+
+import os
+import sys
+import time
+import signal
+import atexit
+import argparse
+import datetime
+from multiprocessing import Process
+import subprocess
+import schedule
+
+
+class Daemon:
+    '''
+    daemon subprocess class.
+    usage: subclass this daemon and override the run() method.
+    sys-monitor.pid: in the /tmp/, auto del when unexpected exit.
+    verbose: debug mode, disabled default.
+    '''
+
+    def __init__(self,
+                 pid_file,
+                 log_file,
+                 err_file,
+                 gpu_log,
+                 log_path,
+                 rate=5,
+                 stdin=os.devnull,
+                 stdout=os.devnull,
+                 stderr=os.devnull,
+                 home_dir='.',
+                 umask=0o22,
+                 verbose=0):
+        self.stdin = stdin
+        self.stdout = stdout
+        self.stderr = stderr
+        self.home_dir = home_dir
+        self.verbose = verbose
+        self.pidfile = pid_file
+        self.logfile = log_file
+        self.errfile = err_file
+        self.gpufile = gpu_log
+        self.logpath = log_path
+        self.rate = rate
+        self.umask = umask
+        self.verbose = verbose
+        self.daemon_alive = True
+
+    def get_pid(self):
+        try:
+            with open(self.pidfile, 'r') as pf:
+                pid = int(pf.read().strip())
+        except IOError:
+            pid = None
+        except SystemExit:
+            pid = None
+        return pid
+
+    def del_pid(self):
+        if os.path.exists(self.pidfile):
+            os.remove(self.pidfile)
+
+    def run(self):
+        '''
+        NOTE: override the method in subclass
+        '''
+
+        def gpu_mon(file):
+            TIMESTAMP = datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
+            cmd = "ixsmi |grep 'Default'|awk '{print $3,$5,$9,$11,$13}'"
+            process = subprocess.Popen(cmd,
+                                       shell=True,
+                                       stdout=subprocess.PIPE,
+                                       stderr=subprocess.STDOUT,
+                                       encoding='utf-8')
+            try:
+                out = process.communicate(timeout=10)
+            except subprocess.TimeoutExpired:
+                process.kill()
+                out = process.communicate()
+
+            if process.returncode != 0:
+                result = "error"
+            result = TIMESTAMP + "\n" + out[0] + "\n"
+            with open(file, 'a') as f:
+                f.write(result)
+
+        def timer_gpu_mon():
+            gpu_process = Process(target=gpu_mon, args=(self.gpufile, ))
+            gpu_process.start()
+
+        schedule.every(self.rate).seconds.do(timer_gpu_mon)
+        while True:
+            schedule.run_pending()
+            time.sleep(5)
+
+    def daemonize(self):
+        if self.verbose >= 1:
+            print('daemon process starting ...')
+        try:
+            pid = os.fork()
+            if pid > 0:
+                sys.exit(0)
+        except OSError as e:
+            sys.stderr.write('fork #1 failed: %d (%s)\n' %
+                             (e.errno, e.strerror))
+            sys.exit(1)
+        os.chdir(self.home_dir)
+        os.setsid()
+        os.umask(self.umask)
+        try:
+            pid = os.fork()
+            if pid > 0:
+                sys.exit(0)
+        except OSError as e:
+            sys.stderr.write('fork #2 failed: %d (%s)\n' %
+                             (e.errno, e.strerror))
+            sys.exit(1)
+        sys.stdout.flush()
+        sys.stderr.flush()
+        si = open(self.stdin, 'r')
+        so = open(self.stdout, 'a+')
+        if self.stderr:
+            se = open(self.stderr, 'a+')
+        else:
+            se = so
+        os.dup2(si.fileno(), sys.stdin.fileno())
+        os.dup2(so.fileno(), sys.stdout.fileno())
+        os.dup2(se.fileno(), sys.stderr.fileno())
+        atexit.register(self.del_pid)
+        pid = str(os.getpid())
+        with open(self.pidfile, 'w+') as f:
+            f.write('%s\n' % pid)
+
+    def start(self):
+        if not os.path.exists(self.logpath):
+            os.makedirs(self.logpath)
+        elif os.path.exists(self.gpufile):
+            os.remove(self.gpufile)
+        if self.verbose >= 1:
+            print('ready to start ......')
+        # check for a pid file to see if the daemon already runs
+        pid = self.get_pid()
+        if pid:
+            msg = 'pid file %s already exists, is it already running?\n'
+            sys.stderr.write(msg % self.pidfile)
+            sys.exit(1)
+        # start the daemon
+        self.daemonize()
+        self.run()
+
+    def stop(self):
+        if self.verbose >= 1:
+            print('stopping ...')
+        pid = self.get_pid()
+        if not pid:
+            msg = 'pid file [%s] does not exist. Not running?\n' % self.pidfile
+            sys.stderr.write(msg)
+            if os.path.exists(self.pidfile):
+                os.remove(self.pidfile)
+            return
+        # try to kill the daemon process
+        try:
+            i = 0
+            while 1:
+                os.kill(pid, signal.SIGTERM)
+                time.sleep(1)
+                i = i + 1
+                if i % 10 == 0:
+                    os.kill(pid, signal.SIGHUP)
+        except OSError as err:
+            err = str(err)
+            if err.find('No such process') > 0:
+                if os.path.exists(self.pidfile):
+                    os.remove(self.pidfile)
+            else:
+                print(str(err))
+                sys.exit(1)
+            if self.verbose >= 1:
+                print('Stopped!')
+
+    def restart(self):
+        self.stop()
+        self.start()
+
+    def status(self):
+        pid = self.get_pid()
+        if pid:
+            if os.path.exists('/proc/%d' % pid):
+                return pid
+        return False
+
+
+def parse_args():
+    ''' Check script input parameter. '''
+    parse = argparse.ArgumentParser(description='Sys monitor script')
+    parse.add_argument('-o',
+                       type=str,
+                       metavar='[operation]',
+                       required=True,
+                       help='start|stop|restart|status')
+    parse.add_argument('-l',
+                       type=str,
+                       metavar='[log_path]',
+                       required=False,
+                       default='./logs/',
+                       help='log path')
+    args = parse.parse_args()
+    return args
+
+
+def main():
+    sample_rate1 = 5
+    args = parse_args()
+    operation = args.o
+    log_path = args.l
+    pid_fn = str('/tmp/gpu_monitor.pid')
+    log_fn = str(log_path + '/iluvatar_monitor.log')
+    err_fn = str(log_path + '/iluvatar_monitor.err')
+    # result for gpu
+    gpu_fn = str(log_path + '/iluvatar_monitor.log')
+
+    subdaemon = Daemon(pid_fn,
+                       log_fn,
+                       err_fn,
+                       gpu_fn,
+                       log_path,
+                       verbose=1,
+                       rate=sample_rate1)
+    if operation == 'start':
+        subdaemon.start()
+    elif operation == 'stop':
+        subdaemon.stop()
+    elif operation == 'restart':
+        subdaemon.restart()
+    elif operation == 'status':
+        pid = subdaemon.status()
+        if pid:
+            print('process [%s] is running ......' % pid)
+        else:
+            print('daemon process [%s] stopped' % pid)
+    else:
+        print("invalid argument!")
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/inference/docker_images/iluvatar/pytorch/Dockerfile b/inference/docker_images/iluvatar/pytorch/Dockerfile
new file mode 100644
index 000000000..3e72721cf
--- /dev/null
+++ b/inference/docker_images/iluvatar/pytorch/Dockerfile
@@ -0,0 +1,61 @@
+FROM ubuntu:20.04
+
+RUN /bin/bash -c "source /root/.bashrc"
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PATH /root/miniconda/bin:$PATH
+
+RUN sed -i 's#http://archive.ubuntu.com/#http://mirrors.tuna.tsinghua.edu.cn/#' /etc/apt/sources.list
+RUN apt-get update -y
+RUN apt-get install -y --fix-missing \
+     apt-utils \
+     sudo \
+     openssh-server \
+     vim \
+     git \
+     curl \
+     wget \
+     tree \
+     perl \
+     kmod \
+     make \
+     pciutils \
+     build-essential \
+     python3.8-dev \
+     python3-pip \
+     libjpeg-dev \
+     zlib1g-dev \
+     unzip \
+     cmake \
+     bzip2 \
+     cabextract \
+     iputils-ping \
+     pbzip2 \
+     pv \
+     numactl \
+     ninja-build \
+     libgl1-mesa-dev 
+     
+
+# Configure anaconda
+RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py38_4.10.3-Linux-x86_64.sh && \
+    bash ./Miniconda3-py38_4.10.3-Linux-x86_64.sh -b -p /root/miniconda && \
+    /root/miniconda/bin/conda clean -tipsy && \
+    ln -s /root/miniconda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
+    echo ". /root/miniconda/etc/profile.d/conda.sh" >> ~/.bashrc && \
+    echo "conda activate base" >> ~/.bashrc && \
+    conda config --set always_yes yes --set changeps1 no && \
+    echo 'LD_LIBRARY_PATH="/usr/local/corex/lib:${LD_LIBRARY_PATH}"' >> ~/.bashrc && \
+    echo 'PATH="/usr/local/corex/bin:${PATH}"' >> ~/.bashrc 
+
+
+RUN /bin/bash -c "apt-get install -y linux-headers-`uname -r`"
+
+RUN /bin/bash -c "pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple"
+
+ENV LD_LIBRARY_PATH="/usr/local/corex/lib:${LD_LIBRARY_PATH}"
+ENV PATH="/usr/local/corex/bin:${PATH}"
+ENV NVCC_ARGUMENTS="-U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -ftemplate-depth=1024"
+
+RUN pip install loguru
+RUN pip install pyyaml
\ No newline at end of file
diff --git a/inference/docker_images/iluvatar/pytorch/packages/README.md b/inference/docker_images/iluvatar/pytorch/packages/README.md
new file mode 100644
index 000000000..dca100acb
--- /dev/null
+++ b/inference/docker_images/iluvatar/pytorch/packages/README.md
@@ -0,0 +1,7 @@
+# 以下软件包需联系天数智芯获取
+
+ixrt-0.4.0+corex.3.2.0-cp38-cp38-linux_x86_64.whl
+
+torch-1.13.1+corex.3.2.0-cp38-cp38-linux_x86_64.whl
+
+torchvision-0.14.1+corex.3.2.0-cp38-cp38-linux_x86_64.whl
\ No newline at end of file
diff --git a/inference/docker_images/iluvatar/pytorch/pytorch_install.sh b/inference/docker_images/iluvatar/pytorch/pytorch_install.sh
new file mode 100644
index 000000000..859591930
--- /dev/null
+++ b/inference/docker_images/iluvatar/pytorch/pytorch_install.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+SDK_DIR="/workspace/docker_image/sdk_installers"
+PKG_DIR="/workspace/docker_image/packages"
+
+search_cuda_results=`find ${SDK_DIR} -name "partial_install_cuda_header.tar.gz"`
+for installer in $search_cuda_results; do
+    echo "Install ${installer}"
+    tar zxvf ${installer}
+    sh "$(echo $(basename ${installer}) | cut -d . -f1)/install-cuda-header.sh" -- --silent --toolkit
+    rm -rf "$(echo $(basename ${installer}) | cut -d . -f1)"
+done
+
+search_sdk_results=`find ${SDK_DIR} -name "corex*.run"`
+for installer in $search_sdk_results; do
+    echo "Install ${installer}"
+    sh "${installer}" -- --silent --driver --toolkit
+done
+
+search_packages_results=`find ${PKG_DIR} -name "*.whl"`
+for pkg in $search_packages_results; do
+    echo "Install ${pkg}"
+    pip3 install "${pkg}"
+done
+
diff --git a/inference/docker_images/iluvatar/pytorch/sdk_installers/README.md b/inference/docker_images/iluvatar/pytorch/sdk_installers/README.md
new file mode 100644
index 000000000..fc9bf8738
--- /dev/null
+++ b/inference/docker_images/iluvatar/pytorch/sdk_installers/README.md
@@ -0,0 +1,5 @@
+# 以下软件包需联系天数智芯获取
+
+corex-installer-linux64-3.2.0-20230718_x86_64_10.2.run
+
+partial_install_cuda_header.tar.gz
\ No newline at end of file
diff --git a/inference/inference_engine/iluvatar/ixrt.py b/inference/inference_engine/iluvatar/ixrt.py
new file mode 100644
index 000000000..625749235
--- /dev/null
+++ b/inference/inference_engine/iluvatar/ixrt.py
@@ -0,0 +1,130 @@
+from ixrt import IxRT, RuntimeConfig, RuntimeContext
+import torch
+import os
+import subprocess
+from loguru import logger
+import numpy as np
+import time
+
+
+class InferModel:
+
+    class HostDeviceMem(object):
+
+        def __init__(self, host_mem, device_mem):
+            self.host = host_mem
+            self.device = device_mem
+
+        def __str__(self):
+            return "Host:\n" + str(self.host) + "\nDevice:\n" + str(
+                self.device)
+
+        def __repr__(self):
+            return self.__str__()
+
+    def __init__(self, config, onnx_path, model):
+        self.engine = self.build_engine(config, onnx_path)
+        self.outputs = self.allocate_buffers(self.engine)
+
+    def config_init_engine(self, config, onnx_path):
+        quant_file = None
+
+        runtime_config = RuntimeConfig()
+
+        input_shapes = [config.batch_size, 3, config.image_size, config.image_size]    
+        runtime_config.input_shapes = [("input", input_shapes)]
+        runtime_config.device_idx = 0
+
+        precision = "float16"
+        if precision=="int8":
+            assert quant_file, "Quant file must provided for int8 inferencing."
+
+        runtime_config.runtime_context = RuntimeContext(
+            precision,
+            "nhwc",
+            use_gpu=True,
+            pipeline_sync=True,
+            input_types={"input": "float32"},
+            output_types={"output": "float32"},
+            input_device="gpu",
+            output_device="gpu",
+        )
+
+        runtime = IxRT.from_onnx(onnx_path, quant_file, runtime_config)
+        return runtime
+
+    def build_engine(self, config, onnx_path):
+        if config.exist_compiler_path is None:
+            output_path = config.log_dir + "/" + config.ixrt_tmp_path
+
+            dir_output_path = os.path.dirname(output_path)
+            os.makedirs(dir_output_path, exist_ok=True)
+
+            time.sleep(10)
+
+            runtime = self.config_init_engine(config, onnx_path)
+            print(f"Build Engine File: {output_path}")
+            runtime.BuildEngine()
+            runtime.SerializeEngine(output_path)
+            print("Build Engine done!")
+        else:
+            output_path = config.exist_compiler_path
+            print(f"Use existing engine: {output_path}")
+
+        runtime = IxRT()
+        runtime.LoadEngine(output_path, config.batch_size)
+        return runtime
+
+    def allocate_buffers(self, engine):
+        output_map = engine.GetOutputShape()
+        output_io_buffers = []   
+        output_types = {}
+        config = engine.GetConfig()
+        for key, val in config.runtime_context.output_types.items():
+            output_types[key] = str(val)
+        for name, shape in output_map.items():
+            # 1. apply memory buffer for output of the shape
+            if output_types[name] =="float32":
+                buffer = np.zeros(shape.dims, dtype=np.float32)
+            elif output_types[name] =="int32":
+                buffer = np.zeros(shape.dims, dtype=np.int32)
+            elif output_types[name] =="float16":
+                buffer = np.zeros(shape.dims, dtype=np.float16)
+            else:
+                raise RuntimeError("need to add a {} datatype of output".format(output_types[name]))
+            buffer = torch.tensor(buffer).cuda()
+            # 2. put the buffer to a list
+            output_io_buffers.append([name, buffer, shape])
+        
+        engine.BindIOBuffers(output_io_buffers)
+        return output_io_buffers
+
+    def __call__(self, model_inputs: list):
+        batch_size = np.unique(np.array([i.size(dim=0) for i in model_inputs]))
+        batch_size = batch_size[0]
+        input_map = self.engine.GetInputShape()
+        input_io_buffers = []
+
+        for i, model_input in enumerate(model_inputs):
+            model_input = torch.tensor(model_input.numpy(), dtype=torch.float32).cuda()
+            if not model_input.is_contiguous():
+                model_input = model_input.contiguous()
+            name, shape = list(input_map.items())[0]
+            _shape, _padding = shape.dims, shape.padding
+            _shape = [i + j for i, j in zip(_shape, _padding)]
+            _shape = [_shape[0], *_shape[2:], _shape[1]]
+            input_io_buffers.append([name, model_input, shape])
+
+        self.engine.BindIOBuffers(self.outputs)
+        self.engine.LoadInput(input_io_buffers)
+
+        # torch.cuda.synchronize()
+        self.engine.Execute()
+        # torch.cuda.synchronize()
+
+        gpu_io_buffers = []
+        for buffer in self.outputs:
+            # gpu_io_buffers.append([buffer[0], buffer[1], buffer[2]])
+            gpu_io_buffers.append(buffer[1].cpu())
+
+        return gpu_io_buffers, 0

From b8aade6a4b89edcb63ed06b3f304379f48acadbf Mon Sep 17 00:00:00 2001
From: stezpy <stezpy@gmail.com>
Date: Mon, 7 Aug 2023 15:16:54 +0800
Subject: [PATCH 2/8] add torch sync

---
 inference/tools/torch_sync.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/inference/tools/torch_sync.py b/inference/tools/torch_sync.py
index 6e5e8b09a..9fb8ac210 100644
--- a/inference/tools/torch_sync.py
+++ b/inference/tools/torch_sync.py
@@ -4,3 +4,5 @@
 def torch_sync(config):
     if config.vendor == "nvidia":
         torch.cuda.synchronize()
+    elif config.vendor == "iluvatar":
+        torch.cuda.synchronize()
\ No newline at end of file

From 790edf515a01095b37b2b1730db192d4759b55c8 Mon Sep 17 00:00:00 2001
From: stezpy <stezpy@gmail.com>
Date: Mon, 7 Aug 2023 16:06:27 +0800
Subject: [PATCH 3/8] customized input & output

---
 .../resnet50/vendor_config/iluvatar_configurations.yaml       | 4 +++-
 inference/inference_engine/iluvatar/ixrt.py                   | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml b/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml
index 4e63bd183..84d9aae89 100644
--- a/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml
+++ b/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml
@@ -4,4 +4,6 @@ repeat: 1
 image_size: 224
 batch_size: 128
 exist_onnx_path: onnxs/resnet50.onnx
-# exist_compiler_path: resnet50-fp16.engine
\ No newline at end of file
+# exist_compiler_path: resnet50-fp16.engine
+output_types: {"output":"float32"}
+input_types: {"input": "float32"}
\ No newline at end of file
diff --git a/inference/inference_engine/iluvatar/ixrt.py b/inference/inference_engine/iluvatar/ixrt.py
index 625749235..85b835062 100644
--- a/inference/inference_engine/iluvatar/ixrt.py
+++ b/inference/inference_engine/iluvatar/ixrt.py
@@ -44,8 +44,8 @@ def config_init_engine(self, config, onnx_path):
             "nhwc",
             use_gpu=True,
             pipeline_sync=True,
-            input_types={"input": "float32"},
-            output_types={"output": "float32"},
+            input_types=config.input_types,
+            output_types=config.output_types,
             input_device="gpu",
             output_device="gpu",
         )

From e63b174da7ea81d405c66962ac4bf4dac067fbca Mon Sep 17 00:00:00 2001
From: stezpy <stezpy@gmail.com>
Date: Thu, 10 Aug 2023 16:59:35 +0800
Subject: [PATCH 4/8] merge latest

---
 inference/benchmarks/resnet50/README.md       | 21 +++++++++++++-
 .../iluvatar_configurations.yaml              |  3 +-
 .../iluvatar/iluvatar_analysis.py             |  3 +-
 .../iluvatar/pytorch/packages/README.md       |  2 ++
 .../iluvatar/pytorch/sdk_installers/README.md |  2 ++
 inference/inference_engine/iluvatar/ixrt.py   | 29 +++++++++----------
 6 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/inference/benchmarks/resnet50/README.md b/inference/benchmarks/resnet50/README.md
index 024b7f417..c92390391 100644
--- a/inference/benchmarks/resnet50/README.md
+++ b/inference/benchmarks/resnet50/README.md
@@ -60,6 +60,25 @@ find ./val -name "*JPEG" | wc -l
    - TensorRT 8.5.1.7
    - torch_tensorrt 1.3.0
 
+#### 2.3 Nvidia A100
+
+- ##### 硬件环境
+    - 机器、加速卡型号: MR-V100
+    
+- ##### 软件环境
+   - OS版本：Ubuntu 18.04
+   - OS kernel版本: 5.15.0-78-generic
+   - 加速卡驱动版本：3.2.0
+   - Docker 版本：24.0.4
+   - 训练框架版本：torch-1.13.1+corex.3.2.0
+   - 依赖软件版本：
+     - cuda: 10.2
+   
+- 推理工具包
+
+   - TensorRT 8.5.1.7
+   - torch_tensorrt 1.3.0
+
 ### 3. 运行情况
 
 * 指标列表
@@ -84,4 +103,4 @@ find ./val -name "*JPEG" | wc -l
 | tensorrt | fp16      | 256  |613.4 | 1358.9   | 4469.4 | 1391.4   | 12698.7 | 16.8% | 76.2/76.2 | 19.7/40.0 |
 | tensorrt | fp32   | 256  | 474.4    | 1487.3      | 2653.2     | 1560.3        | 6091.6  | 16.1% | 76.2/76.2 | 28.86/40.0 |
 | torchtrt | fp16     | 256  | 716.4 | 1370.4 | 4282.6 | 1320.0 | 4723.0 | 6.3% | 76.2/76.2 | 9.42/40.0 |
-
+| ixrt | fp16     | 256  | 200.3 |  |  | 276.8 | 1914.3 | 8.2% | 76.2/76.2 | 4.3/32.0 |
diff --git a/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml b/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml
index 84d9aae89..c721ede09 100644
--- a/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml
+++ b/inference/configs/resnet50/vendor_config/iluvatar_configurations.yaml
@@ -2,8 +2,7 @@ ixrt_tmp_path: iluvatar_tmp/resnet50-fp16.engine
 has_dynamic_axis: false
 repeat: 1
 image_size: 224
-batch_size: 128
-exist_onnx_path: onnxs/resnet50.onnx
+exist_onnx_path: onnxs/resnet50_bs256_pytorch_fp16False.onnx
 # exist_compiler_path: resnet50-fp16.engine
 output_types: {"output":"float32"}
 input_types: {"input": "float32"}
\ No newline at end of file
diff --git a/inference/docker_images/iluvatar/iluvatar_analysis.py b/inference/docker_images/iluvatar/iluvatar_analysis.py
index 26132d19d..77e9ac0a3 100644
--- a/inference/docker_images/iluvatar/iluvatar_analysis.py
+++ b/inference/docker_images/iluvatar/iluvatar_analysis.py
@@ -11,4 +11,5 @@ def analysis_log(logpath):
             max_mem = line.split(" ")[3]
             max_mem = float(max_mem[:-3])
 
-    return round(max_usage / 1024.0, 2), round(max_mem / 1024.0, 2)
+    return round(max_usage / 1024.0,
+                 2), round(max_mem / 1024.0, 2), eval("24e12"), eval("96e12")
diff --git a/inference/docker_images/iluvatar/pytorch/packages/README.md b/inference/docker_images/iluvatar/pytorch/packages/README.md
index dca100acb..88a18b3dc 100644
--- a/inference/docker_images/iluvatar/pytorch/packages/README.md
+++ b/inference/docker_images/iluvatar/pytorch/packages/README.md
@@ -1,5 +1,7 @@
 # 以下软件包需联系天数智芯获取
 
+>联系邮箱: contact-us@iluvatar.com
+
 ixrt-0.4.0+corex.3.2.0-cp38-cp38-linux_x86_64.whl
 
 torch-1.13.1+corex.3.2.0-cp38-cp38-linux_x86_64.whl
diff --git a/inference/docker_images/iluvatar/pytorch/sdk_installers/README.md b/inference/docker_images/iluvatar/pytorch/sdk_installers/README.md
index fc9bf8738..73564e7c8 100644
--- a/inference/docker_images/iluvatar/pytorch/sdk_installers/README.md
+++ b/inference/docker_images/iluvatar/pytorch/sdk_installers/README.md
@@ -1,5 +1,7 @@
 # 以下软件包需联系天数智芯获取
 
+>联系邮箱: contact-us@iluvatar.com
+
 corex-installer-linux64-3.2.0-20230718_x86_64_10.2.run
 
 partial_install_cuda_header.tar.gz
\ No newline at end of file
diff --git a/inference/inference_engine/iluvatar/ixrt.py b/inference/inference_engine/iluvatar/ixrt.py
index 85b835062..620cc32f3 100644
--- a/inference/inference_engine/iluvatar/ixrt.py
+++ b/inference/inference_engine/iluvatar/ixrt.py
@@ -16,13 +16,17 @@ def __init__(self, host_mem, device_mem):
             self.device = device_mem
 
         def __str__(self):
-            return "Host:\n" + str(self.host) + "\nDevice:\n" + str(
-                self.device)
+            return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
 
         def __repr__(self):
             return self.__str__()
 
     def __init__(self, config, onnx_path, model):
+        self.str_to_numpy_dict = {
+            "int32": np.int32,
+            "float16": np.float16,
+            "float32": np.float32,
+        }
         self.engine = self.build_engine(config, onnx_path)
         self.outputs = self.allocate_buffers(self.engine)
 
@@ -31,12 +35,12 @@ def config_init_engine(self, config, onnx_path):
 
         runtime_config = RuntimeConfig()
 
-        input_shapes = [config.batch_size, 3, config.image_size, config.image_size]    
+        input_shapes = [config.batch_size, 3, config.image_size, config.image_size]
         runtime_config.input_shapes = [("input", input_shapes)]
         runtime_config.device_idx = 0
 
         precision = "float16"
-        if precision=="int8":
+        if precision == "int8":
             assert quant_file, "Quant file must provided for int8 inferencing."
 
         runtime_config.runtime_context = RuntimeContext(
@@ -77,25 +81,20 @@ def build_engine(self, config, onnx_path):
 
     def allocate_buffers(self, engine):
         output_map = engine.GetOutputShape()
-        output_io_buffers = []   
+        output_io_buffers = []
         output_types = {}
         config = engine.GetConfig()
         for key, val in config.runtime_context.output_types.items():
             output_types[key] = str(val)
         for name, shape in output_map.items():
             # 1. apply memory buffer for output of the shape
-            if output_types[name] =="float32":
-                buffer = np.zeros(shape.dims, dtype=np.float32)
-            elif output_types[name] =="int32":
-                buffer = np.zeros(shape.dims, dtype=np.int32)
-            elif output_types[name] =="float16":
-                buffer = np.zeros(shape.dims, dtype=np.float16)
-            else:
-                raise RuntimeError("need to add a {} datatype of output".format(output_types[name]))
+            buffer = np.zeros(
+                shape.dims, dtype=self.str_to_numpy_dict[output_types[name]]
+            )
             buffer = torch.tensor(buffer).cuda()
             # 2. put the buffer to a list
             output_io_buffers.append([name, buffer, shape])
-        
+
         engine.BindIOBuffers(output_io_buffers)
         return output_io_buffers
 
@@ -125,6 +124,6 @@ def __call__(self, model_inputs: list):
         gpu_io_buffers = []
         for buffer in self.outputs:
             # gpu_io_buffers.append([buffer[0], buffer[1], buffer[2]])
-            gpu_io_buffers.append(buffer[1].cpu())
+            gpu_io_buffers.append(buffer[1])
 
         return gpu_io_buffers, 0

From 69e2abc7f30b7512039357b3d8154df04283ad96 Mon Sep 17 00:00:00 2001
From: stezpy <stezpy@gmail.com>
Date: Thu, 10 Aug 2023 17:09:18 +0800
Subject: [PATCH 5/8] update

---
 inference/benchmarks/resnet50/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inference/benchmarks/resnet50/README.md b/inference/benchmarks/resnet50/README.md
index c92390391..d6dcbe3b5 100644
--- a/inference/benchmarks/resnet50/README.md
+++ b/inference/benchmarks/resnet50/README.md
@@ -103,4 +103,4 @@ find ./val -name "*JPEG" | wc -l
 | tensorrt | fp16      | 256  |613.4 | 1358.9   | 4469.4 | 1391.4   | 12698.7 | 16.8% | 76.2/76.2 | 19.7/40.0 |
 | tensorrt | fp32   | 256  | 474.4    | 1487.3      | 2653.2     | 1560.3        | 6091.6  | 16.1% | 76.2/76.2 | 28.86/40.0 |
 | torchtrt | fp16     | 256  | 716.4 | 1370.4 | 4282.6 | 1320.0 | 4723.0 | 6.3% | 76.2/76.2 | 9.42/40.0 |
-| ixrt | fp16     | 256  | 200.3 |  |  | 276.8 | 1914.3 | 8.2% | 76.2/76.2 | 4.3/32.0 |
+| ixrt | fp16     | 256  | 275.6 |  |  | 276.8 | 1914.3 | 8.2% | 76.2/76.2 | 4.3/32.0 |

From 640b53d17e48e4c33b731ac39f85a045a886be4c Mon Sep 17 00:00:00 2001
From: stezpy <stezpy@gmail.com>
Date: Thu, 10 Aug 2023 17:28:21 +0800
Subject: [PATCH 6/8] update readme

---
 inference/benchmarks/resnet50/README.md | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/inference/benchmarks/resnet50/README.md b/inference/benchmarks/resnet50/README.md
index d6dcbe3b5..931a44810 100644
--- a/inference/benchmarks/resnet50/README.md
+++ b/inference/benchmarks/resnet50/README.md
@@ -60,7 +60,7 @@ find ./val -name "*JPEG" | wc -l
    - TensorRT 8.5.1.7
    - torch_tensorrt 1.3.0
 
-#### 2.3 Nvidia A100
+#### 2.2 MR-V100
 
 - ##### 硬件环境
     - 机器、加速卡型号: MR-V100
@@ -76,8 +76,7 @@ find ./val -name "*JPEG" | wc -l
    
 - 推理工具包
 
-   - TensorRT 8.5.1.7
-   - torch_tensorrt 1.3.0
+   - ixrt-0.4.0+corex.3.2.0
 
 ### 3. 运行情况
 
@@ -103,4 +102,4 @@ find ./val -name "*JPEG" | wc -l
 | tensorrt | fp16      | 256  |613.4 | 1358.9   | 4469.4 | 1391.4   | 12698.7 | 16.8% | 76.2/76.2 | 19.7/40.0 |
 | tensorrt | fp32   | 256  | 474.4    | 1487.3      | 2653.2     | 1560.3        | 6091.6  | 16.1% | 76.2/76.2 | 28.86/40.0 |
 | torchtrt | fp16     | 256  | 716.4 | 1370.4 | 4282.6 | 1320.0 | 4723.0 | 6.3% | 76.2/76.2 | 9.42/40.0 |
-| ixrt | fp16     | 256  | 275.6 |  |  | 276.8 | 1914.3 | 8.2% | 76.2/76.2 | 4.3/32.0 |
+| ixrt | fp16     | 256  | 275.6 |  |  | 276.8 | 1914.3 | 8.2% | 76.2 | 4.3/32.0 |

From 6515344117d8f7bd1cc1b03fb6c5d06dddb96737 Mon Sep 17 00:00:00 2001
From: stezpy <peiyuan.zhang@iluvatar.com>
Date: Thu, 17 Aug 2023 20:41:43 +0800
Subject: [PATCH 7/8] update readme

---
 inference/benchmarks/resnet50/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/inference/benchmarks/resnet50/README.md b/inference/benchmarks/resnet50/README.md
index a6566c073..42fc5149c 100644
--- a/inference/benchmarks/resnet50/README.md
+++ b/inference/benchmarks/resnet50/README.md
@@ -115,10 +115,10 @@ find ./val -name "*JPEG" | wc -l
 * 指标值
 
 | 推理工具  | precision | bs   | e2e_time | p_val_whole | p_val_core | p_infer_whole | \*p_infer_core | \*MFU     | acc         | mem        |
-| ----------- | --------- | ---- | ---- | -------- | ----------- | ---------- | ------------- | ------------ | ----------- | ----------- | ---------- |
+| ----------- | --------- | ---- | ---- | -------- | ----------- | ---------- | ------------- | ------------ | ----------- | ----------- |
 | tensorrt | fp16      | 256  |613.4 | 1358.9   | 4469.4 | 1391.4   | 12698.7 | 16.8% | 76.2/76.2 | 19.7/40.0 |
 | tensorrt | fp32   | 256  | 474.4    | 1487.3      | 2653.2     | 1560.3        | 6091.6  | 16.1% | 76.2/76.2 | 28.86/40.0 |
 | torchtrt | fp16     | 256  | 716.4 | 1370.4 | 4282.6 | 1320.0 | 4723.0 | 6.3% | 76.2/76.2 | 9.42/40.0 |
-| ixrt | fp16     | 256  | 275.6 |  |  | 276.8 | 1914.3 | 8.2% | 76.2 | 4.3/32.0 |
+| ixrt     | fp16     | 256  | 136.4 | /      | /      | 1146.6 | 2679.9 | 11.5% | 76.2 | 4.3/32.0 |
 | kunlunxin_xtcl | fp32   | 128  | 311.215    | /      | /     |  837.507    | 1234.727  | / | 76.2/76.2 | / |
 

From dd16af4edc29e7253d7d9cd9750b3eb6810bccb3 Mon Sep 17 00:00:00 2001
From: stezpy <peiyuan.zhang@iluvatar.com>
Date: Fri, 18 Aug 2023 09:51:58 +0800
Subject: [PATCH 8/8] update

---
 inference/benchmarks/resnet50/README.md | 35 +++++++++++++------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/inference/benchmarks/resnet50/README.md b/inference/benchmarks/resnet50/README.md
index 42fc5149c..aaf3c14fa 100644
--- a/inference/benchmarks/resnet50/README.md
+++ b/inference/benchmarks/resnet50/README.md
@@ -60,23 +60,6 @@ find ./val -name "*JPEG" | wc -l
    - TensorRT 8.5.1.7
    - torch_tensorrt 1.3.0
 
-#### 2.2 MR-V100
-
-- ##### 硬件环境
-    - 机器、加速卡型号: MR-V100
-    
-- ##### 软件环境
-   - OS版本：Ubuntu 18.04
-   - OS kernel版本: 5.15.0-78-generic
-   - 加速卡驱动版本：3.2.0
-   - Docker 版本：24.0.4
-   - 训练框架版本：torch-1.13.1+corex.3.2.0
-   - 依赖软件版本：
-     - cuda: 10.2
-   
-- 推理工具包
-
-   - ixrt-0.4.0+corex.3.2.0
 #### 2.2 昆仑芯R200
 
 - ##### 硬件环境
@@ -95,6 +78,24 @@ find ./val -name "*JPEG" | wc -l
    
    - XTCL 2.1
 
+#### 2.3 天数智芯 MR-V100
+
+- ##### 硬件环境
+    - 机器、加速卡型号: MR-V100
+    
+- ##### 软件环境
+   - OS版本：Ubuntu 18.04
+   - OS kernel版本: 5.15.0-78-generic
+   - 加速卡驱动版本：3.2.0
+   - Docker 版本：24.0.4
+   - 训练框架版本：torch-1.13.1+corex.3.2.0
+   - 依赖软件版本：
+     - cuda: 10.2
+   
+- 推理工具包
+
+   - IXRT: ixrt-0.4.0+corex.3.2.0
+
 ### 3. 运行情况
 
 * 指标列表