Skip to content

Commit

Permalink
Kunlunxin inference (#192)
Browse files Browse the repository at this point in the history
* kunlunxin inference

* change docker version

* xtcl support fp16 onnx

* add kunlun monitor

* kunlunxin sync and remove d2h time

---------

Co-authored-by: zhaoyixuan02 <zhaoyixuan02@baidu.com>
Co-authored-by: zhoujiamin01 <zhoujiamin01@baidu.com>
  • Loading branch information
3 people authored Aug 16, 2023
1 parent dc86b14 commit 9e4076b
Show file tree
Hide file tree
Showing 7 changed files with 453 additions and 0 deletions.
19 changes: 19 additions & 0 deletions inference/benchmarks/resnet50/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,24 @@ find ./val -name "*JPEG" | wc -l
- TensorRT 8.5.1.7
- torch_tensorrt 1.3.0

#### 2.2 昆仑芯R200

- ##### 硬件环境
- 机器、加速卡型号: R200

- ##### 软件环境
- OS版本:Ubuntu 20.04
- OS kernel版本: 5.15.0-56-generic
- 加速卡驱动版本:4.0
- Docker 版本:20.10.21
- 依赖软件版本:
- pytorch: 1.13.0+cpu
- onnx: 1.14.0

- 推理工具包

- XTCL 2.1

### 3. 运行情况

* 指标列表
Expand All @@ -84,4 +102,5 @@ find ./val -name "*JPEG" | wc -l
| tensorrt | fp16 | 256 |613.4 | 1358.9 | 4469.4 | 1391.4 | 12698.7 | 16.8% | 76.2/76.2 | 19.7/40.0 |
| tensorrt | fp32 | 256 | 474.4 | 1487.3 | 2653.2 | 1560.3 | 6091.6 | 16.1% | 76.2/76.2 | 28.86/40.0 |
| torchtrt | fp16 | 256 | 716.4 | 1370.4 | 4282.6 | 1320.0 | 4723.0 | 6.3% | 76.2/76.2 | 9.42/40.0 |
| kunlunxin_xtcl | fp32 | 128 | 311.215 | / | / | 837.507 | 1234.727 | / | 76.2/76.2 | / |

Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
fp16: false
compiler: xtcl
no_validation: true
exist_onnx_path: onnxs/resnet50_bs256_pytorch_fp16False.onnx
23 changes: 23 additions & 0 deletions inference/docker_images/kunlunxin/kunlunxin_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
def analysis_log(logpath):
logfile = open(logpath)

max_usage = 0.0 ## usage_mem
max_mem = 0.0
for line in logfile.readlines():
'''
xpu_smi temp power mem w_mem use_rate
'''
if "xpu_smi" in line:
line = line[:-1]
usage = line.split(" ")[4]
usage = float(usage)
max_usage = max(max_usage, usage)
max_mem = line.split(" ")[5]
max_mem = float(max_mem)

return round(max_usage / 1024.0,
2), round(max_mem / 1024.0, 2), eval("32e12"), eval("128e12")


if __name__ == "__main__":
max1, max2, max2,max4 = analysis_log("/home/zhoujiamin01/workspace/zjm_flag/FlagPerf/inference/result/run20230809192313/resnet50:pytorch_1.13/127.0.0.1_noderank0/kunlunxin_monitor.log")
256 changes: 256 additions & 0 deletions inference/docker_images/kunlunxin/kunlunxin_monitor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
# !/usr/bin/env python3
# encoding: utf-8
'''
Usage: python3 sys-monitor.py -o operation -l [log_path]
-o, --operation start|stop|restart|status
-l, --log log path , ./logs/ default
'''

import os
import sys
import time
import signal
import atexit
import argparse
import datetime
from multiprocessing import Process
import subprocess
import schedule


class Daemon:
'''
daemon subprocess class.
usage: subclass this daemon and override the run() method.
sys-monitor.pid: in the /tmp/, auto del when unexpected exit.
verbose: debug mode, disabled default.
'''

def __init__(self,
pid_file,
log_file,
err_file,
gpu_log,
log_path,
rate=5,
stdin=os.devnull,
stdout=os.devnull,
stderr=os.devnull,
home_dir='.',
umask=0o22,
verbose=0):
self.stdin = stdin
self.stdout = stdout
self.stderr = stderr
self.home_dir = home_dir
self.verbose = verbose
self.pidfile = pid_file
self.logfile = log_file
self.errfile = err_file
self.gpufile = gpu_log
self.logpath = log_path
self.rate = rate
self.umask = umask
self.verbose = verbose
self.daemon_alive = True

def get_pid(self):
try:
with open(self.pidfile, 'r') as pf:
pid = int(pf.read().strip())
except IOError:
pid = None
except SystemExit:
pid = None
return pid

def del_pid(self):
if os.path.exists(self.pidfile):
os.remove(self.pidfile)

def run(self):
'''
NOTE: override the method in subclass
'''

def gpu_mon(file):
TIMESTAMP = datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
cmd = "xpu_smi |grep '/dev/xpu0'|awk '{print $29,$27,$22,$24,$14}'" ## temp power mem w_mem use_rate
process = subprocess.Popen(cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
encoding='utf-8')
try:
out = process.communicate(timeout=10)
except subprocess.TimeoutExpired:
process.kill()
out = process.communicate()

if process.returncode != 0:
result = "error"
result = TIMESTAMP + "\n xpu_smi " + out[0] + "\n"
with open(file, 'a') as f:
f.write(result)

def timer_gpu_mon():
gpu_process = Process(target=gpu_mon, args=(self.gpufile, ))
gpu_process.start()

schedule.every(self.rate).seconds.do(timer_gpu_mon)
while True:
schedule.run_pending()
time.sleep(5)

def daemonize(self):
if self.verbose >= 1:
print('daemon process starting ...')
try:
pid = os.fork()
if pid > 0:
sys.exit(0)
except OSError as e:
sys.stderr.write('fork #1 failed: %d (%s)\n' %
(e.errno, e.strerror))
sys.exit(1)
os.chdir(self.home_dir)
os.setsid()
os.umask(self.umask)
try:
pid = os.fork()
if pid > 0:
sys.exit(0)
except OSError as e:
sys.stderr.write('fork #2 failed: %d (%s)\n' %
(e.errno, e.strerror))
sys.exit(1)
sys.stdout.flush()
sys.stderr.flush()
si = open(self.stdin, 'r')
so = open(self.stdout, 'a+')
if self.stderr:
se = open(self.stderr, 'a+')
else:
se = so
os.dup2(si.fileno(), sys.stdin.fileno())
os.dup2(so.fileno(), sys.stdout.fileno())
os.dup2(se.fileno(), sys.stderr.fileno())
atexit.register(self.del_pid)
pid = str(os.getpid())
with open(self.pidfile, 'w+') as f:
f.write('%s\n' % pid)

def start(self):
if not os.path.exists(self.logpath):
os.makedirs(self.logpath)
elif os.path.exists(self.gpufile):
os.remove(self.gpufile)
if self.verbose >= 1:
print('ready to start ......')
# check for a pid file to see if the daemon already runs
pid = self.get_pid()
if pid:
msg = 'pid file %s already exists, is it already running?\n'
sys.stderr.write(msg % self.pidfile)
sys.exit(1)
# start the daemon
self.daemonize()
self.run()

def stop(self):
if self.verbose >= 1:
print('stopping ...')
pid = self.get_pid()
if not pid:
msg = 'pid file [%s] does not exist. Not running?\n' % self.pidfile
sys.stderr.write(msg)
if os.path.exists(self.pidfile):
os.remove(self.pidfile)
return
# try to kill the daemon process
try:
i = 0
while 1:
os.kill(pid, signal.SIGTERM)
time.sleep(1)
i = i + 1
if i % 10 == 0:
os.kill(pid, signal.SIGHUP)
except OSError as err:
err = str(err)
if err.find('No such process') > 0:
if os.path.exists(self.pidfile):
os.remove(self.pidfile)
else:
print(str(err))
sys.exit(1)
if self.verbose >= 1:
print('Stopped!')

def restart(self):
self.stop()
self.start()

def status(self):
pid = self.get_pid()
if pid:
if os.path.exists('/proc/%d' % pid):
return pid
return False


def parse_args():
''' Check script input parameter. '''
parse = argparse.ArgumentParser(description='Sys monitor script')
parse.add_argument('-o',
type=str,
metavar='[operation]',
required=True,
help='start|stop|restart|status')
parse.add_argument('-l',
type=str,
metavar='[log_path]',
required=False,
default='./logs/',
help='log path')
args = parse.parse_args()
return args


def main():
sample_rate1 = 5
args = parse_args()
operation = args.o
log_path = args.l
pid_fn = str('/tmp/xpu_monitor.pid')
log_fn = str(log_path + '/kunlunxin_monitor.log')
err_fn = str(log_path + '/kunlunxin_monitor.err')
# result for gpu
gpu_fn = str(log_path + '/kunlunxin_monitor.log')

subdaemon = Daemon(pid_fn,
log_fn,
err_fn,
gpu_fn,
log_path,
verbose=1,
rate=sample_rate1)
if operation == 'start':
subdaemon.start()
elif operation == 'stop':
subdaemon.stop()
elif operation == 'restart':
subdaemon.restart()
elif operation == 'status':
pid = subdaemon.status()
if pid:
print('process [%s] is running ......' % pid)
else:
print('daemon process [%s] stopped' % pid)
else:
print("invalid argument!")
sys.exit(1)


if __name__ == '__main__':
main()
77 changes: 77 additions & 0 deletions inference/docker_images/kunlunxin/pytorch_1.13/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
FROM ubuntu:18.04

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
bash \
build-essential \
ca-certificates \
wget \
git \
locales \
locales-all \
python3.8-dev \
lsb-release && \
rm -rf /var/lib/apt/lists/*


# Set timezone
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
RUN echo 'Asia/Shanghai' >/etc/timezone

# Install miniconda
# Manually invoke bash on miniconda script per https://github.com/conda/conda/issues/10431
# RUN curl -fsSL -v -o ~/miniconda.sh -O "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" && \
RUN wget -O ~/miniconda.sh "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" && \
chmod +x ~/miniconda.sh && \
bash ~/miniconda.sh -b -p /root/miniconda && \
rm ~/miniconda.sh && \
/root/miniconda/bin/conda config --set show_channel_urls yes && \
/root/miniconda/bin/conda create --name python38 python=3.8 -y && \
/root/miniconda/bin/conda clean -ya

# hyperparamer, typing_extensions, numpy requests
RUN /root/miniconda/envs/python38/bin/pip install \
--no-cache-dir \
-i https://pypi.tuna.tsinghua.edu.cn/simple \
hyperparameter \
typing_extensions \
numpy \
requests \
onnx \
onnxruntime \
attrs \
regex \
decorator \
loguru \
schedule \
munch \
pyyaml \
tqdm \
scipy

RUN /root/miniconda/envs/python38/bin/pip install torch==1.13.0+cpu torchvision==0.14.0+cpu torchaudio==0.13.0 --extra-index-url https://download.pytorch.org/whl/cpu

RUN cd /root && wget https://baidu-kunlun-public.su.bcebos.com/XTCL/XTCL-2.1/XTCL-ubuntu_x86_64.tar.gz && tar -xzf XTCL-ubuntu_x86_64.tar.gz

RUN cd /root && wget https://klx-sdk-release-public.su.bcebos.com/xre/release/4.0.18.1/xre-ubuntu_2004_x86_64.tar.gz && tar -xzf xre-ubuntu_2004_x86_64.tar.gz

ENV LD_LIBRARY_PATH=/root/XTCL-ubuntu_x86_64/3rdparty/lib:/root/XTCL-ubuntu_x86_64/runtime/shlib:/root/XTCL-ubuntu_x86_64/shlib
ENV KERNEL_INCLUDE=/root/XTCL-ubuntu_x86_64/xpu/kernels
ENV XTCL_L3_SIZE=67104768
ENV XPU_PADDLE_L3_SIZE=67104768
ENV RT_LIBRARY_PATH=/root/XTCL-ubuntu_x86_64/runtime/shlib
ENV THIRDPARTY_LIB_DIR=/root/XTCL-ubuntu_x86_64/3rdparty/lib
ENV XTCL_INSTALL_DIR=/root/XTCL-ubuntu_x86_64
ENV XTCL_QUANTIZE_WEIGHT=1
ENV XTCL_USE_FP16=1
ENV PYTHONPATH=/root/XTCL-ubuntu_x86_64/python:/root/XTCL-ubuntu_x86_64/python/tvm:/root/XTCL-ubuntu_x86_64/python/topi
ENV CLANG_PATH=/root/XTCL-ubuntu_x86_64
ENV KERNEL_SEARCH_PATH=/root/XTCL-ubuntu_x86_64/xpu/kernels
ENV XPUSIM_DEVICE_MODEL=KUNLUN2
ENV XTCL_AUTO_ALLOC_L3=1
ENV TVM_DIR=/root/XTCL-ubuntu_x86_64



ENV PATH /root/xre-ubuntu_2004_x86_64/bin:$PATH
ENV PATH /root/miniconda/envs/python38/bin:$PATH

Loading

0 comments on commit 9e4076b

Please sign in to comment.