Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add caffe2paddle tests #1014

Merged
merged 1 commit into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4,402 changes: 4,402 additions & 0 deletions test_benchmark/Caffe/3class/3class.prototxt

Large diffs are not rendered by default.

41 changes: 41 additions & 0 deletions test_benchmark/Caffe/3class/benchmark_infer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import os
import argparse
import pickle
import numpy as np
import sys
sys.path.append('../tools/')

from predict import BenchmarkPipeline

def parse_args():
parser = argparse.ArgumentParser(description='Model inference')
parser.add_argument(
'--batch_size',
dest='batch_size',
help='Mini batch size of one gpu or cpu.',
type=int,
default=1)
def str2bool(v):
return v.lower() in ("true", "t", "1")
parser.add_argument("--use_gpu", type=str2bool, default=True)
parser.add_argument("--enable_trt", type=str2bool, default=True, help="enable trt")
parser.add_argument("--cpu_threads", type=int, default=1)
parser.add_argument("--enable_mkldnn", type=str2bool, default=True)
return parser.parse_args()

def main(args):
data = np.load("input.npy")
caffe_result = np.load("output.npy")
benchmark_pipeline = BenchmarkPipeline(model_dir="pd_model_dygraph/inference_model/",
model_name='3class',
use_gpu=args.use_gpu,
enable_trt=args.enable_trt,
cpu_threads=args.cpu_threads,
enable_mkldnn=args.enable_mkldnn)
benchmark_pipeline.run_benchmark(data=data, caffe_result=caffe_result, warmup=1, repeats=1)
benchmark_pipeline.analysis_operators(model_dir="pd_model_dygraph/inference_model/")
benchmark_pipeline.report()

if __name__ == '__main__':
args = parse_args()
main(args)
175 changes: 175 additions & 0 deletions test_benchmark/Caffe/3class/deploy_infer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
import os
import time

import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.inference import Config
from paddle.inference import create_predictor

def get_current_memory_mb():
"""
It is used to Obtain the memory usage of the CPU and GPU during the running of the program.
And this function Current program is time-consuming.
"""
import pynvml
import psutil
import GPUtil
gpu_id = int(os.environ.get('CUDA_VISIBLE_DEVICES', 0))

pid = os.getpid()
p = psutil.Process(pid)
info = p.memory_full_info()
cpu_mem = info.uss / 1024. / 1024.
gpu_mem = 0
gpu_percent = 0
gpus = GPUtil.getGPUs()
if gpu_id is not None and len(gpus) > 0:
gpu_percent = gpus[gpu_id].load
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(gpu_id)
meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
gpu_mem = meminfo.used / 1024. / 1024.
return round(cpu_mem, 4), round(gpu_mem, 4), round(gpus[gpu_id].memoryUtil * 100, 4)

class Predictor(object):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel
use_gpu (bool): whether use gpu
batch_size (int): size of pre batch in inference
threshold (float): threshold to reserve the result for output.
"""

def __init__(self,
model_dir,
use_gpu=False,
batch_size=1,
cpu_threads=1,
enable_mkldnn=False):
self.predictor, self.config = load_predictor(
model_dir,
batch_size=batch_size,
use_gpu=use_gpu,
cpu_threads=cpu_threads,
enable_mkldnn=enable_mkldnn)
self.inference_time = 0.0

def predict(self, warmup=0, repeats=1):
results = None
input_names = self.predictor.get_input_names()
for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i])
data = np.random.rand(8,3,224,224).astype("float32")
input_tensor.copy_from_cpu(data)
for i in range(warmup):
self.predictor.run()
output_names = self.predictor.get_output_names()
output_tensor = self.predictor.get_output_handle(output_names[0])
results = output_tensor.copy_to_cpu()
start_time = time.time()
for i in range(repeats):
self.predictor.run()
output_names = self.predictor.get_output_names()
output_tensor = self.predictor.get_output_handle(output_names[0])
results = output_tensor.copy_to_cpu()
end_time = time.time()
self.inference_time = (end_time - start_time) / repeats
return results

def load_predictor(model_dir,
batch_size=1,
use_gpu=False,
cpu_threads=1,
enable_mkldnn=False):
"""set AnalysisConfig, generate AnalysisPredictor
Args:
model_dir (str): root path of __model__ and __params__
use_gpu (bool): whether use gpu
Returns:
predictor (PaddlePredictor): AnalysisPredictor
"""
config = Config(
os.path.join(model_dir, 'model.pdmodel'),
os.path.join(model_dir, 'model.pdiparams'))
if use_gpu:
# initial GPU memory(M), device ID
config.enable_use_gpu(200, 0)
# optimize graph and fuse op
config.switch_ir_optim(True)
else:
config.disable_gpu()
config.set_cpu_math_library_num_threads(cpu_threads)
if enable_mkldnn:
try:
# cache 10 different shapes for mkldnn to avoid memory leak
config.set_mkldnn_cache_capacity(10)
config.enable_mkldnn()
except Exception as e:
print(
"The current environment does not support `mkldnn`, so disable mkldnn."
)
pass

# disable print log when predict
config.disable_glog_info()
# enable shared memory
config.enable_memory_optim()
# disable feed, fetch OP, needed by zero_copy_run
config.switch_use_feed_fetch_ops(False)
predictor = create_predictor(config)
return predictor, config

def main():
predictor = Predictor("pd_model_dygraph/inference_model/",
use_gpu=True,
cpu_threads=1,
enable_mkldnn=False)
predictor.predict(warmup=10, repeats=10)
cm, gm, gu = get_current_memory_mb()
cost_time = predictor.inference_time

#record change
if os.path.exists('result_mem.txt'):
with open('result_mem.txt','r') as f1:
lines = f1.readlines()
inference_time_pre = lines[0].strip().split(',')[0].split(':')[1]
cpu_mem_pre = lines[1].strip().split(',')[0].split(':')[1]
gpu_mem_pre = lines[2].strip().split(',')[0].split(':')[1]
gpu_percent_pre = lines[3].strip().split(',')[0].split(':')[1]

inference_time_change = cost_time - float(inference_time_pre)
cpu_mem_change = cm - float(cpu_mem_pre)
gpu_mem_change = gm - float(gpu_mem_pre)
gpu_percent_change = gu - float(gpu_percent_pre)
if cpu_mem_change >= 1000 or gpu_mem_change >= 1000:
assert 'change is so big! please check the model!'
with open('result_mem.txt','w') as f2:
f2.write("inference_time:"+ str(cost_time)+ ",change:"+ str(inference_time_change)+ "\n")
f2.write("cpu_mem:"+ str(cm)+ ",change:"+ str(cpu_mem_change)+ "\n")
f2.write("gpu_mem:"+ str(gm)+ ",change:"+ str(gpu_mem_change)+ "\n")
f2.write("gpu_percent:"+ str(gu)+ ",change:"+ str(gpu_percent_change)+ "\n")
f1.close()
f2.close()
else:
with open('result_mem.txt','w') as f1:
f1.write("inference_time:"+ str(cost_time)+ ",change:0"+ "\n")
f1.write("cpu_mem:"+ str(cm)+ ",change:0"+ "\n")
f1.write("gpu_mem:"+ str(gm)+ ",change:0"+ '\n')
f1.write("gpu_percent:"+ str(gu)+ ",change:0"+ '\n')
f1.close()

print_info = {
'inference_time': cost_time,
'cpu_mem': cm,
'gpu_mem': gm,
'gpu_percent': gu
}
return print_info


if __name__ == '__main__':
paddle.enable_static()

print_info = main()

34 changes: 34 additions & 0 deletions test_benchmark/Caffe/3class/pd_infer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from __future__ import print_function
import paddle
import paddle.fluid as fluid
import sys
import os
import numpy as np
import pickle

f = open("result.txt", "w")
f.write("======3class: \n")

try:
with open('../AlexNet/caffe_input.pkl', 'rb') as inp:
input_data = np.random.rand(8,3,224,224).astype("float32")

paddle.enable_static()
exe = paddle.static.Executor(paddle.CPUPlace())
# test dygraph
[prog, inputs, outputs] = fluid.io.load_inference_model(dirname="pd_model_dygraph/inference_model/",
executor=exe,
model_filename="model.pdmodel",
params_filename="model.pdiparams")
# test dygraph
paddle.disable_static()
from pd_model_dygraph.x2paddle_code import main
input_data = paddle.to_tensor(input_data)
result = main(input_data)
f.write("Dygraph Successed\n")

except:
f.write("!!!!!Failed\n")

f.close()

8 changes: 8 additions & 0 deletions test_benchmark/Caffe/3class/run_benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# 进行转换
x2paddle -f caffe -p 3class.prototxt -w 3class.caffemodel -s pd_model_dygraph -df True
# 运行推理程序
python pd_infer.py
python benchmark_infer.py --use_gpu True --enable_trt True
python benchmark_infer.py --use_gpu True --enable_trt False
python benchmark_infer.py --use_gpu False --enable_mkldnn True
python benchmark_infer.py --use_gpu False --enable_mkldnn False
4 changes: 4 additions & 0 deletions test_benchmark/Caffe/3class/run_convert.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# 进行转换
x2paddle -f caffe -p 3class.prototxt -w 3class.caffemodel -s pd_model_dygraph -df True
# 运行推理程序
python pd_infer.py
Loading