From f4d2d4aa488d52845afd286f83833b06e3db7cc7 Mon Sep 17 00:00:00 2001 From: guochaorong <32069604+guochaorong@users.noreply.github.com> Date: Tue, 5 Jun 2018 11:06:23 +0800 Subject: [PATCH 1/2] Update model.py --- resnet50/model.py | 47 +++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/resnet50/model.py b/resnet50/model.py index f4e7beea..6cb1fefa 100644 --- a/resnet50/model.py +++ b/resnet50/model.py @@ -21,7 +21,7 @@ from continuous_evaluation import tracking_kpis - +fluid.default_startup_program().random_seed = 91 def parse_args(): parser = argparse.ArgumentParser('Convolution model benchmark.') parser.add_argument( @@ -202,9 +202,9 @@ def run_benchmark(model, args): else: dshape = [224, 224, 3] - input = fluid.layers.data(name='data', shape=dshape, dtype='float32') + image = fluid.layers.data(name='image', shape=dshape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') - predict = model(input, class_dim) + predict = model(image, class_dim) cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(x=cost) @@ -212,9 +212,9 @@ def run_benchmark(model, args): batch_acc = fluid.layers.accuracy( input=predict, label=label, total=batch_size_tensor) - inference_program = fluid.default_main_program().clone() - with fluid.program_guard(inference_program): - inference_program = fluid.io.get_inference_program( + test_program = fluid.default_main_program().clone() + with fluid.program_guard(test_program): + test_program = fluid.io.get_inference_program( target_vars=[batch_acc, batch_size_tensor]) optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9) @@ -232,6 +232,14 @@ def run_benchmark(model, args): paddle.dataset.cifar.test10() if args.data_set == 'cifar10' else paddle.dataset.flowers.test(), batch_size=args.batch_size) + + place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0) + exe = fluid.Executor(place) + #exe.run(fluid.default_startup_program()) + + train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name) + test_exe = fluid.ParallelExecutor( + use_cuda=True, main_program=test_program, share_vars_from=train_exe) def test(exe): test_accuracy = fluid.average.WeightedAverage() @@ -241,7 +249,7 @@ def test(exe): y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = y_data.reshape([-1, 1]) - acc, weight = exe.run(inference_program, + acc, weight = test_exe.run(inference_program, feed={"data": img_data, "label": y_data}, fetch_list=[batch_acc, batch_size_tensor]) @@ -249,16 +257,13 @@ def test(exe): return test_accuracy.eval() - place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0) - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) accuracy = fluid.average.WeightedAverage() - if args.use_fake_data: - data = train_reader().next() - image = np.array(map(lambda x: x[0].reshape(dshape), data)).astype( - 'float32') - label = np.array(map(lambda x: x[1], data)).astype('int64') - label = label.reshape([-1, 1]) + #if args.use_fake_data: + # data = train_reader().next() + # image = np.array(map(lambda x: x[0].reshape(dshape), data)).astype( + # 'float32') + # label = np.array(map(lambda x: x[1], data)).astype('int64') + # label = label.reshape([-1, 1]) im_num = 0 total_train_time = 0.0 @@ -272,7 +277,7 @@ def test(exe): for kpi in tracking_kpis: if kpi.name == '%s_%s_train_speed' % (args.data_set, args.batch_size): train_speed_kpi = kpi - + feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) for pass_id in range(args.pass_num): every_pass_loss = [] accuracy.reset() @@ -287,11 +292,9 @@ def test(exe): data)).astype('float32') label = np.array(map(lambda x: x[1], data)).astype('int64') label = label.reshape([-1, 1]) - loss, acc, weight = exe.run( - fluid.default_main_program(), - feed={'data': image, - 'label': label}, - fetch_list=[avg_cost, batch_acc, batch_size_tensor]) + loss, acc, weight = train_exe.run( + fetch_list=[avg_cost.name, batch_acc.name, batch_size_tensor.name], + feed=feeder.feed(data)) accuracy.add(value=acc, weight=weight) if iter >= args.skip_batch_num or pass_id != 0: batch_duration = time.time() - batch_start From a2d1273cc7b8083e21cf79e5580e175b00c3295c Mon Sep 17 00:00:00 2001 From: guochaorong <32069604+guochaorong@users.noreply.github.com> Date: Wed, 6 Jun 2018 11:08:05 +0800 Subject: [PATCH 2/2] Update model.py --- resnet50/model.py | 138 ++++++++++++++++++++++++++-------------------- 1 file changed, 79 insertions(+), 59 deletions(-) diff --git a/resnet50/model.py b/resnet50/model.py index 6cb1fefa..919f2fed 100644 --- a/resnet50/model.py +++ b/resnet50/model.py @@ -13,6 +13,7 @@ import cProfile import pstats import StringIO +from itertools import * import paddle import paddle.fluid as fluid @@ -22,6 +23,8 @@ from continuous_evaluation import tracking_kpis fluid.default_startup_program().random_seed = 91 + + def parse_args(): parser = argparse.ArgumentParser('Convolution model benchmark.') parser.add_argument( @@ -69,8 +72,8 @@ def parse_args(): help='The device type.') parser.add_argument( "--gpu_id", - type=int, - default=3, + type=str, + default='0,1,2,3', help="The GPU Card Id. (default: %(default)d)") parser.add_argument( '--data_set', @@ -202,72 +205,77 @@ def run_benchmark(model, args): else: dshape = [224, 224, 3] + # Input data image = fluid.layers.data(name='image', shape=dshape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') + + #Train program predict = model(image, class_dim) cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(x=cost) + # Evaluator batch_size_tensor = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy( input=predict, label=label, total=batch_size_tensor) - test_program = fluid.default_main_program().clone() - with fluid.program_guard(test_program): - test_program = fluid.io.get_inference_program( - target_vars=[batch_acc, batch_size_tensor]) - + # Optimization to minimize lost optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9) opts = optimizer.minimize(avg_cost) - fluid.memory_optimize(fluid.default_main_program()) + # Reader train_reader = paddle.batch( - paddle.reader.shuffle( paddle.dataset.cifar.train10() if args.data_set == 'cifar10' else paddle.dataset.flowers.train(), - buf_size=5120), batch_size=args.batch_size) + test_reader = paddle.batch( paddle.dataset.cifar.test10() if args.data_set == 'cifar10' else paddle.dataset.flowers.test(), batch_size=args.batch_size) + # Register test program + test_program = fluid.default_main_program().clone() + with fluid.program_guard(test_program): + test_program = fluid.io.get_inference_program( + target_vars=[batch_acc]) + + # Initialize executor place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0) exe = fluid.Executor(place) - #exe.run(fluid.default_startup_program()) + exe.run(fluid.default_startup_program()) + # Define parallel exe train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name) test_exe = fluid.ParallelExecutor( use_cuda=True, main_program=test_program, share_vars_from=train_exe) + + feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) - def test(exe): - test_accuracy = fluid.average.WeightedAverage() + def test(test_exe): + test_accuracy = [] for batch_id, data in enumerate(test_reader()): - img_data = np.array(map(lambda x: x[0].reshape(dshape), - data)).astype("float32") - y_data = np.array(map(lambda x: x[1], data)).astype("int64") - y_data = y_data.reshape([-1, 1]) - - acc, weight = test_exe.run(inference_program, - feed={"data": img_data, - "label": y_data}, - fetch_list=[batch_acc, batch_size_tensor]) - test_accuracy.add(value=acc, weight=weight) - - return test_accuracy.eval() - - accuracy = fluid.average.WeightedAverage() - #if args.use_fake_data: - # data = train_reader().next() - # image = np.array(map(lambda x: x[0].reshape(dshape), data)).astype( - # 'float32') - # label = np.array(map(lambda x: x[1], data)).astype('int64') - # label = label.reshape([-1, 1]) + + acc, = test_exe.run( + fetch_list=[batch_acc.name], + feed=feeder.feed(data) + ) + acc_avg = np.mean(np.array(acc)) + test_accuracy.append(acc_avg) + + return np.array(test_accuracy).mean() + + + if args.use_fake_data: + data = train_reader().next() + image = np.array(map(lambda x: x[0].reshape(dshape), data)).astype( + 'float32') + label = np.array(map(lambda x: x[1], data)).astype('int64') + label = label.reshape([-1, 1]) im_num = 0 total_train_time = 0.0 - total_iters = 0 train_acc_kpi = None for kpi in tracking_kpis: @@ -277,10 +285,10 @@ def test(exe): for kpi in tracking_kpis: if kpi.name == '%s_%s_train_speed' % (args.data_set, args.batch_size): train_speed_kpi = kpi - feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) + for pass_id in range(args.pass_num): every_pass_loss = [] - accuracy.reset() + every_pass_acc = [] iter = 0 pass_duration = 0.0 for batch_id, data in enumerate(train_reader()): @@ -292,30 +300,35 @@ def test(exe): data)).astype('float32') label = np.array(map(lambda x: x[1], data)).astype('int64') label = label.reshape([-1, 1]) - loss, acc, weight = train_exe.run( + loss, acc, _ = train_exe.run( fetch_list=[avg_cost.name, batch_acc.name, batch_size_tensor.name], - feed=feeder.feed(data)) - accuracy.add(value=acc, weight=weight) + feed=feeder.feed(data) + ) + + loss_avg, acc_avg = np.mean(np.array(loss)), np.mean(np.array(acc)) + print("Pass: %d, Iter: %d, loss: %s, acc: %s" % \ + (pass_id, batch_id, loss_avg, acc_avg)) + if iter >= args.skip_batch_num or pass_id != 0: batch_duration = time.time() - batch_start pass_duration += batch_duration im_num += label.shape[0] - every_pass_loss.append(loss) - # print("Pass: %d, Iter: %d, loss: %s, acc: %s" % - # (pass_id, iter, str(loss), str(acc))) + every_pass_loss.append(loss_avg) + every_pass_acc.append(acc_avg) iter += 1 - total_iters += 1 total_train_time += pass_duration - pass_train_acc = accuracy.eval() - pass_test_acc = test(exe) + # Begin test + pass_test_acc = test(test_exe) print( - "Pass:%d, Loss:%f, Train Accuray:%f, Test Accuray:%f, Handle Images Duration: %f\n" - % (pass_id, np.mean(every_pass_loss), pass_train_acc, + "Pass:%d, Loss:%f, Train Accuray:%f, Test Accuray:%f,\ + Handle Images Duration: %f\n" + % (pass_id, np.mean(every_pass_loss), np.mean(every_pass_acc), pass_test_acc, pass_duration)) if pass_id == args.pass_num - 1 and args.data_set == 'cifar10': train_acc_kpi.add_record(np.array(pass_train_acc, dtype='float32')) train_acc_kpi.persist() + examples_per_sec = 0 if total_train_time > 0.0 and iter != args.skip_batch_num: examples_per_sec = im_num / total_train_time sec_per_batch = total_train_time / \ @@ -342,16 +355,23 @@ def collect_gpu_memory_data(alive): collect the GPU memory data """ global is_alive - status, output = commands.getstatusoutput('rm -rf memory.txt') + status, output = commands.getstatusoutput('rm -rf memory.*') if status == 0: - print('del memory.txt') - command = "nvidia-smi --id=%s --query-compute-apps=used_memory --format=csv -lms 1 > memory.txt" % args.gpu_id - p = subprocess.Popen(command, shell=True) - if p.pid < 0: - print('Get GPU memory data error') + print('del memory') + pid_list = [] + for gpu_id in args.gpu_id.split(','): + command = "nvidia-smi --id=%s --query-compute-apps=used_memory --format=csv\ + -lms 1000 > memory_%s.txt" % (gpu_id, gpu_id) + p = subprocess.Popen(command, shell=True) + if p.pid < 0: + print('Get GPU memory data error') + else: + pid_list.append(p) + while (is_alive): time.sleep(1) - p.kill() + for p in pid_list: + p.kill() def save_gpu_data(mem_list): @@ -374,11 +394,11 @@ def save_gpu_data(mem_list): is_alive = True if args.data_format == 'NHWC': raise ValueError('Only support NCHW data_format now.') - if args.device == 'GPU': - collect_memory_thread = threading.Thread( - target=collect_gpu_memory_data, args=(is_alive, )) - collect_memory_thread.setDaemon(True) - collect_memory_thread.start() + #if args.device == 'GPU': + # collect_memory_thread = threading.Thread( + # target=collect_gpu_memory_data, args=(is_alive, )) + # collect_memory_thread.setDaemon(True) + # collect_memory_thread.start() if args.use_nvprof and args.device == 'GPU': with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: run_benchmark(model_map[args.model], args)