Skip to content
This repository has been archived by the owner on Jan 24, 2024. It is now read-only.

Update model.py #34

Closed
wants to merge 2 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 79 additions & 56 deletions resnet50/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import cProfile
import pstats
import StringIO
from itertools import *

import paddle
import paddle.fluid as fluid
Expand All @@ -21,6 +22,8 @@

from continuous_evaluation import tracking_kpis

fluid.default_startup_program().random_seed = 91


def parse_args():
parser = argparse.ArgumentParser('Convolution model benchmark.')
Expand Down Expand Up @@ -69,8 +72,8 @@ def parse_args():
help='The device type.')
parser.add_argument(
"--gpu_id",
type=int,
default=3,
type=str,
default='0,1,2,3',
help="The GPU Card Id. (default: %(default)d)")
parser.add_argument(
'--data_set',
Expand Down Expand Up @@ -202,57 +205,68 @@ def run_benchmark(model, args):
else:
dshape = [224, 224, 3]

input = fluid.layers.data(name='data', shape=dshape, dtype='float32')
# Input data
image = fluid.layers.data(name='image', shape=dshape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
predict = model(input, class_dim)

#Train program
predict = model(image, class_dim)
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost)

# Evaluator
batch_size_tensor = fluid.layers.create_tensor(dtype='int64')
batch_acc = fluid.layers.accuracy(
input=predict, label=label, total=batch_size_tensor)

inference_program = fluid.default_main_program().clone()
with fluid.program_guard(inference_program):
inference_program = fluid.io.get_inference_program(
target_vars=[batch_acc, batch_size_tensor])

# Optimization to minimize lost
optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9)
opts = optimizer.minimize(avg_cost)

fluid.memory_optimize(fluid.default_main_program())

# Reader
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.train10()
if args.data_set == 'cifar10' else paddle.dataset.flowers.train(),
buf_size=5120),
batch_size=args.batch_size)

test_reader = paddle.batch(
paddle.dataset.cifar.test10()
if args.data_set == 'cifar10' else paddle.dataset.flowers.test(),
batch_size=args.batch_size)

# Register test program
test_program = fluid.default_main_program().clone()
with fluid.program_guard(test_program):
test_program = fluid.io.get_inference_program(
target_vars=[batch_acc])

# Initialize executor
place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())

def test(exe):
test_accuracy = fluid.average.WeightedAverage()
# Define parallel exe
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
test_exe = fluid.ParallelExecutor(
use_cuda=True, main_program=test_program, share_vars_from=train_exe)

feeder = fluid.DataFeeder(place=place, feed_list=[image, label])

def test(test_exe):
test_accuracy = []
for batch_id, data in enumerate(test_reader()):
img_data = np.array(map(lambda x: x[0].reshape(dshape),
data)).astype("float32")
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([-1, 1])

acc, weight = exe.run(inference_program,
feed={"data": img_data,
"label": y_data},
fetch_list=[batch_acc, batch_size_tensor])
test_accuracy.add(value=acc, weight=weight)
acc, = test_exe.run(
fetch_list=[batch_acc.name],
feed=feeder.feed(data)
)
acc_avg = np.mean(np.array(acc))
test_accuracy.append(acc_avg)

return np.array(test_accuracy).mean()

return test_accuracy.eval()

place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
accuracy = fluid.average.WeightedAverage()
if args.use_fake_data:
data = train_reader().next()
image = np.array(map(lambda x: x[0].reshape(dshape), data)).astype(
Expand All @@ -262,7 +276,6 @@ def test(exe):

im_num = 0
total_train_time = 0.0
total_iters = 0

train_acc_kpi = None
for kpi in tracking_kpis:
Expand All @@ -275,7 +288,7 @@ def test(exe):

for pass_id in range(args.pass_num):
every_pass_loss = []
accuracy.reset()
every_pass_acc = []
iter = 0
pass_duration = 0.0
for batch_id, data in enumerate(train_reader()):
Expand All @@ -287,32 +300,35 @@ def test(exe):
data)).astype('float32')
label = np.array(map(lambda x: x[1], data)).astype('int64')
label = label.reshape([-1, 1])
loss, acc, weight = exe.run(
fluid.default_main_program(),
feed={'data': image,
'label': label},
fetch_list=[avg_cost, batch_acc, batch_size_tensor])
accuracy.add(value=acc, weight=weight)
loss, acc, _ = train_exe.run(
fetch_list=[avg_cost.name, batch_acc.name, batch_size_tensor.name],
feed=feeder.feed(data)
)

loss_avg, acc_avg = np.mean(np.array(loss)), np.mean(np.array(acc))
print("Pass: %d, Iter: %d, loss: %s, acc: %s" % \
(pass_id, batch_id, loss_avg, acc_avg))

if iter >= args.skip_batch_num or pass_id != 0:
batch_duration = time.time() - batch_start
pass_duration += batch_duration
im_num += label.shape[0]
every_pass_loss.append(loss)
# print("Pass: %d, Iter: %d, loss: %s, acc: %s" %
# (pass_id, iter, str(loss), str(acc)))
every_pass_loss.append(loss_avg)
every_pass_acc.append(acc_avg)
iter += 1
total_iters += 1

total_train_time += pass_duration
pass_train_acc = accuracy.eval()
pass_test_acc = test(exe)
# Begin test
pass_test_acc = test(test_exe)
print(
"Pass:%d, Loss:%f, Train Accuray:%f, Test Accuray:%f, Handle Images Duration: %f\n"
% (pass_id, np.mean(every_pass_loss), pass_train_acc,
"Pass:%d, Loss:%f, Train Accuray:%f, Test Accuray:%f,\
Handle Images Duration: %f\n"
% (pass_id, np.mean(every_pass_loss), np.mean(every_pass_acc),
pass_test_acc, pass_duration))
if pass_id == args.pass_num - 1 and args.data_set == 'cifar10':
train_acc_kpi.add_record(np.array(pass_train_acc, dtype='float32'))
train_acc_kpi.persist()
examples_per_sec = 0
if total_train_time > 0.0 and iter != args.skip_batch_num:
examples_per_sec = im_num / total_train_time
sec_per_batch = total_train_time / \
Expand All @@ -339,16 +355,23 @@ def collect_gpu_memory_data(alive):
collect the GPU memory data
"""
global is_alive
status, output = commands.getstatusoutput('rm -rf memory.txt')
status, output = commands.getstatusoutput('rm -rf memory.*')
if status == 0:
print('del memory.txt')
command = "nvidia-smi --id=%s --query-compute-apps=used_memory --format=csv -lms 1 > memory.txt" % args.gpu_id
p = subprocess.Popen(command, shell=True)
if p.pid < 0:
print('Get GPU memory data error')
print('del memory')
pid_list = []
for gpu_id in args.gpu_id.split(','):
command = "nvidia-smi --id=%s --query-compute-apps=used_memory --format=csv\
-lms 1000 > memory_%s.txt" % (gpu_id, gpu_id)
p = subprocess.Popen(command, shell=True)
if p.pid < 0:
print('Get GPU memory data error')
else:
pid_list.append(p)

while (is_alive):
time.sleep(1)
p.kill()
for p in pid_list:
p.kill()


def save_gpu_data(mem_list):
Expand All @@ -371,11 +394,11 @@ def save_gpu_data(mem_list):
is_alive = True
if args.data_format == 'NHWC':
raise ValueError('Only support NCHW data_format now.')
if args.device == 'GPU':
collect_memory_thread = threading.Thread(
target=collect_gpu_memory_data, args=(is_alive, ))
collect_memory_thread.setDaemon(True)
collect_memory_thread.start()
#if args.device == 'GPU':
# collect_memory_thread = threading.Thread(
# target=collect_gpu_memory_data, args=(is_alive, ))
# collect_memory_thread.setDaemon(True)
# collect_memory_thread.start()
if args.use_nvprof and args.device == 'GPU':
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
run_benchmark(model_map[args.model], args)
Expand Down