[MXNET-1291] solve pylint errors in examples with issue no.12205 (apa…

…che#13815) * Unify the style here Unify the style here and remove the testing 'print' code segment. * Unify the description of comment Change the description of comment from "multi-layer perceptron" to "Get multi-layer perceptron" * Unify the style of comments Unify the style of comments suggested by @sandeep-krishnamurthy * git pull the lastest code from master of incubator-mxnet * Complete rebase * Solve PEP8 [C0304 ] Final newline missing Sovle example/deep-embedded-clustering/solver.py(150): [C0304 ] Final newline missing
drivanov · Mar 4, 2019 · 8ff7202 · 8ff7202
1 parent ff2c180
commit 8ff7202
Show file tree

Hide file tree

Showing 14 changed files with 396 additions and 253 deletions.
diff --git a/example/bayesian-methods/algos.py b/example/bayesian-methods/algos.py
@@ -14,13 +14,13 @@
 # KIND, either express or implied. See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
+"""Create implementation of algorithms of HMC, stepHMC, SGD, SGLD and DistilledSGLD"""
 from __future__ import print_function
+import time
+import numpy
 import mxnet as mx
 import mxnet.ndarray as nd
-import time
-import logging
-from utils import *
+from utils import copy_param, get_executor, sample_test_regression, sample_test_acc
 
 
 def calc_potential(exe, params, label_name, noise_precision, prior_precision):
@@ -35,6 +35,7 @@ def calc_potential(exe, params, label_name, noise_precision, prior_precision):
 
 
 def calc_grad(exe, exe_grads, params, X, Y, label_name=None, outgrad_f=None):
+ """Calculate gradient"""
  exe.copy_params_from(params)
  exe.arg_dict['data'][:] = X
  if outgrad_f is None:
@@ -48,8 +49,8 @@ def calc_grad(exe, exe_grads, params, X, Y, label_name=None, outgrad_f=None):
  v.wait_to_read()
 
 
-def step_HMC(exe, exe_params, exe_grads, label_key, noise_precision, prior_precision, L=10,
-  eps=1E-6):
+def step_HMC(exe, exe_params, exe_grads, label_key, noise_precision, prior_precision, L=10, eps=1E-6):
+ """Generate the implementation of step HMC"""
  init_params = {k: v.copyto(v.context) for k, v in exe_params.items()}
  end_params = {k: v.copyto(v.context) for k, v in exe_params.items()}
  init_momentums = {k: mx.random.normal(0, 1, v.shape) for k, v in init_params.items()}
@@ -102,6 +103,7 @@ def step_HMC(exe, exe_params, exe_grads, label_key, noise_precision, prior_preci
 def HMC(sym, data_inputs, X, Y, X_test, Y_test, sample_num,
  initializer=None, noise_precision=1 / 9.0, prior_precision=0.1,
  learning_rate=1E-6, L=10, dev=mx.gpu()):
+ """Generate the implementation of HMC"""
  label_key = list(set(data_inputs.keys()) - set(['data']))[0]
  exe, exe_params, exe_grads, _ = get_executor(sym, dev, data_inputs, initializer)
  exe.arg_dict['data'][:] = X
@@ -134,6 +136,7 @@ def SGD(sym, data_inputs, X, Y, X_test, Y_test, total_iter_num,
  out_grad_f=None,
  initializer=None,
  minibatch_size=100, dev=mx.gpu()):
+ """Generate the implementation of SGD"""
  if out_grad_f is None:
  label_key = list(set(data_inputs.keys()) - set(['data']))[0]
  exe, params, params_grad, _ = get_executor(sym, dev, data_inputs, initializer)
@@ -173,6 +176,7 @@ def SGLD(sym, X, Y, X_test, Y_test, total_iter_num,
  initializer=None,
  minibatch_size=100, thin_interval=100, burn_in_iter_num=1000, task='classification',
  dev=mx.gpu()):
+ """Generate the implementation of SGLD"""
  if out_grad_f is None:
  label_key = list(set(data_inputs.keys()) - set(['data']))[0]
  exe, params, params_grad, _ = get_executor(sym, dev, data_inputs, initializer)
@@ -200,7 +204,7 @@ def SGLD(sym, X, Y, X_test, Y_test, total_iter_num,
  if i < burn_in_iter_num:
  continue
  else:
- if 0 == (i - burn_in_iter_num) % thin_interval:
+ if (i - burn_in_iter_num) % thin_interval == 0:
  if optimizer.lr_scheduler is not None:
  lr = optimizer.lr_scheduler(optimizer.num_update)
  else:
@@ -238,6 +242,7 @@ def DistilledSGLD(teacher_sym, student_sym,
  minibatch_size=100,
  task='classification',
  dev=mx.gpu()):
+ """Generate the implementation of DistilledSGLD"""
  teacher_exe, teacher_params, teacher_params_grad, _ = \
  get_executor(teacher_sym, dev, teacher_data_inputs, teacher_initializer)
  student_exe, student_params, student_params_grad, _ = \
@@ -323,13 +328,14 @@ def DistilledSGLD(teacher_sym, student_sym,
  sample_test_acc(teacher_exe, X=X, Y=Y, label_num=10,
  minibatch_size=minibatch_size)
  print("Student: Test ACC %d/%d=%f, Train ACC %d/%d=%f" % (test_correct, test_total,
- test_acc, train_correct, train_total, train_acc))
+ test_acc, train_correct,
+ train_total, train_acc))
  print("Teacher: Test ACC %d/%d=%f, Train ACC %d/%d=%f" \
  % (teacher_test_correct, teacher_test_total, teacher_test_acc,
  teacher_train_correct, teacher_train_total, teacher_train_acc))
  else:
  print("Current Iter Num: %d" % (i + 1), "Time Spent: %f" % (end - start), "MSE:",
-  sample_test_regression(exe=student_exe, X=X_test, Y=Y_test,
+ sample_test_regression(exe=student_exe, X=X_test, Y=Y_test,
  minibatch_size=minibatch_size,
  save_path='regression_DSGLD.txt'))
  start = time.time()

diff --git a/example/bayesian-methods/bdk_demo.py b/example/bayesian-methods/bdk_demo.py
@@ -14,21 +14,21 @@
 # KIND, either express or implied. See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
+"""Run Stochastic Gradient Langevin Dynamics (SGLD) and Bayesian Dark Knowledge (BDK)"""
 from __future__ import print_function
-import mxnet as mx
-import mxnet.ndarray as nd
+import argparse
+import time
 import numpy
-import logging
 import matplotlib.pyplot as plt
-from scipy.stats import gaussian_kde
-import argparse
-from algos import *
-from data_loader import *
-from utils import *
+import mxnet as mx
+import mxnet.ndarray as nd
+from algos import HMC, SGD, SGLD, DistilledSGLD
+from data_loader import load_mnist, load_toy, load_synthetic
+from utils import BiasXavier, SGLDScheduler
 
 
 class CrossEntropySoftmax(mx.operator.NumpyOp):
+ """Calculate CrossEntropy softmax function"""
  def __init__(self):
  super(CrossEntropySoftmax, self).__init__(False)
 
@@ -58,6 +58,7 @@ def backward(self, out_grad, in_data, out_data, in_grad):
 
 
 class LogSoftmax(mx.operator.NumpyOp):
+ """Generate helper functions to evaluate softmax loss function"""
  def __init__(self):
  super(LogSoftmax, self).__init__(False)
 
@@ -103,6 +104,7 @@ def regression_student_grad(student_outputs, teacher_pred, teacher_noise_precisi
 
 
 def get_mnist_sym(output_op=None, num_hidden=400):
+ """Get symbol of mnist"""
  net = mx.symbol.Variable('data')
  net = mx.symbol.FullyConnected(data=net, name='mnist_fc1', num_hidden=num_hidden)
  net = mx.symbol.Activation(data=net, name='mnist_relu1', act_type="relu")
@@ -117,6 +119,7 @@ def get_mnist_sym(output_op=None, num_hidden=400):
 
 
 def synthetic_grad(X, theta, sigma1, sigma2, sigmax, rescale_grad=1.0, grad=None):
+ """Get synthetic gradient value"""
  if grad is None:
  grad = nd.empty(theta.shape, theta.context)
  theta1 = theta.asnumpy()[0]
@@ -128,17 +131,16 @@ def synthetic_grad(X, theta, sigma1, sigma2, sigmax, rescale_grad=1.0, grad=None
  -(X - theta1 - theta2) ** 2 / (2 * vx))
  grad_npy = numpy.zeros(theta.shape)
  grad_npy[0] = -rescale_grad * ((numpy.exp(-(X - theta1) ** 2 / (2 * vx)) * (X - theta1) / vx
- + numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * (
- X - theta1 - theta2) / vx) / denominator).sum() \
- + theta1 / v1
- grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * (
- X - theta1 - theta2) / vx) / denominator).sum() \
- + theta2 / v2
+ + numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) *
+ (X - theta1 - theta2) / vx) / denominator).sum() + theta1 / v1
+ grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) *
+ (X - theta1 - theta2) / vx) / denominator).sum() + theta2 / v2
  grad[:] = grad_npy
  return grad
 
 
 def get_toy_sym(teacher=True, teacher_noise_precision=None):
+ """Get toy symbol"""
  if teacher:
  net = mx.symbol.Variable('data')
  net = mx.symbol.FullyConnected(data=net, name='teacher_fc1', num_hidden=100)
@@ -160,8 +162,9 @@ def dev(gpu_id=None):
  return mx.gpu(gpu_id) if gpu_id else mx.cpu()
 
 
-def run_mnist_SGD(training_num=50000, gpu_id=None):
- X, Y, X_test, Y_test = load_mnist(training_num)
+
+def run_mnist_SGD(num_training=50000, gpu_id=None):
+ X, Y, X_test, Y_test = load_mnist(num_training)
  minibatch_size = 100
  net = get_mnist_sym()
  data_shape = (minibatch_size,) + X.shape[1::]
@@ -175,8 +178,8 @@ def run_mnist_SGD(training_num=50000, gpu_id=None):
  lr=5E-6, prior_precision=1.0, minibatch_size=100)
 
 
-def run_mnist_SGLD(training_num=50000, gpu_id=None):
- X, Y, X_test, Y_test = load_mnist(training_num)
+def run_mnist_SGLD(num_training=50000, gpu_id=None):
+ X, Y, X_test, Y_test = load_mnist(num_training)
  minibatch_size = 100
  net = get_mnist_sym()
  data_shape = (minibatch_size,) + X.shape[1::]
@@ -191,10 +194,11 @@ def run_mnist_SGLD(training_num=50000, gpu_id=None):
  thin_interval=100, burn_in_iter_num=1000)
 
 
-def run_mnist_DistilledSGLD(training_num=50000, gpu_id=None):
- X, Y, X_test, Y_test = load_mnist(training_num)
+def run_mnist_DistilledSGLD(num_training=50000, gpu_id=None):
+ """Run DistilledSGLD on mnist dataset"""
+ X, Y, X_test, Y_test = load_mnist(num_training)
  minibatch_size = 100
- if training_num >= 10000:
+ if num_training >= 10000:
  num_hidden = 800
  total_iter_num = 1000000
  teacher_learning_rate = 1E-6
@@ -235,6 +239,7 @@ def run_mnist_DistilledSGLD(training_num=50000, gpu_id=None):
 
 
 def run_toy_SGLD(gpu_id=None):
+ """Run SGLD on toy dataset"""
  X, Y, X_test, Y_test = load_toy()
  minibatch_size = 1
  teacher_noise_precision = 1.0 / 9.0
@@ -243,20 +248,26 @@ def run_toy_SGLD(gpu_id=None):
  data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
  'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(gpu_id))}
  initializer = mx.init.Uniform(0.07)
- exe, params, _ = \
- SGLD(sym=net, data_inputs=data_inputs,
- X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=50000,
- initializer=initializer,
- learning_rate=1E-4,
- # lr_scheduler=mx.lr_scheduler.FactorScheduler(100000, 0.5),
- prior_precision=0.1,
- burn_in_iter_num=1000,
- thin_interval=10,
- task='regression',
- minibatch_size=minibatch_size, dev=dev(gpu_id))
-
-
-def run_toy_DistilledSGLD(gpu_id=None):
+ exe, params, _ = SGLD(sym=net,
+ data_inputs=data_inputs,
+ X=X,
+ Y=Y,
+ X_test=X_test,
+ Y_test=Y_test,
+ total_iter_num=50000,
+ initializer=initializer,
+ learning_rate=1E-4,
+ # lr_scheduler=mx.lr_scheduler.FactorScheduler(100000, 0.5),
+ prior_precision=0.1,
+ burn_in_iter_num=1000,
+ thin_interval=10,
+ task='regression',
+ minibatch_size=minibatch_size,
+ dev=dev(gpu_id)) # disable=unbalanced-tuple-unpacking
+
+
+def run_toy_DistilledSGLD(gpu_id):
+ """Run DistilledSGLD on toy dataset"""
  X, Y, X_test, Y_test = load_toy()
  minibatch_size = 1
  teacher_noise_precision = 1.0
@@ -288,6 +299,7 @@ def run_toy_DistilledSGLD(gpu_id=None):
 
 
 def run_toy_HMC(gpu_id=None):
+ """Run HMC on toy dataset"""
  X, Y, X_test, Y_test = load_toy()
  minibatch_size = Y.shape[0]
  noise_precision = 1 / 9.0
@@ -302,6 +314,7 @@ def run_toy_HMC(gpu_id=None):
 
 
 def run_synthetic_SGLD():
+ """Run synthetic SGLD"""
  theta1 = 0
  theta2 = 1
  sigma1 = numpy.sqrt(10)
@@ -322,14 +335,14 @@ def run_synthetic_SGLD():
  grad = nd.empty((2,), mx.cpu())
  samples = numpy.zeros((2, total_iter_num))
  start = time.time()
- for i in xrange(total_iter_num):
+ for i in range(total_iter_num):
  if (i + 1) % 100000 == 0:
  end = time.time()
  print("Iter:%d, Time spent: %f" % (i + 1, end - start))
  start = time.time()
  ind = numpy.random.randint(0, X.shape[0])
- synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax, rescale_grad=
- X.shape[0] / float(minibatch_size), grad=grad)
+ synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax,
+  rescale_grad=X.shape[0] / float(minibatch_size), grad=grad)
  updater('theta', grad, theta)
  samples[:, i] = theta.asnumpy()
  plt.hist2d(samples[0, :], samples[1, :], (200, 200), cmap=plt.cm.jet)
@@ -354,18 +367,18 @@ def run_synthetic_SGLD():
  args = parser.parse_args()
  training_num = args.training
  if args.dataset == 1:
- if 0 == args.algorithm:
+ if args.algorithm == 0:
  run_mnist_SGD(training_num, gpu_id=args.gpu)
- elif 1 == args.algorithm:
+ elif args.algorithm == 1:
  run_mnist_SGLD(training_num, gpu_id=args.gpu)
  else:
  run_mnist_DistilledSGLD(training_num, gpu_id=args.gpu)
  elif args.dataset == 0:
- if 1 == args.algorithm:
+ if args.algorithm == 1:
  run_toy_SGLD(gpu_id=args.gpu)
- elif 2 == args.algorithm:
+ elif args.algorithm == 2:
  run_toy_DistilledSGLD(gpu_id=args.gpu)
- elif 3 == args.algorithm:
+ elif args.algorithm == 3:
  run_toy_HMC(gpu_id=args.gpu)
  else:
  run_synthetic_SGLD()
diff --git a/example/bayesian-methods/data_loader.py b/example/bayesian-methods/data_loader.py
@@ -14,14 +14,15 @@
 # KIND, either express or implied. See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
+"""Create helper functions to load mnist dataset and toy dataset"""
 from __future__ import print_function
-import numpy
 import os
 import ssl
+import numpy
 
 
 def load_mnist(training_num=50000):
+ """Load mnist dataset"""
  data_path = os.path.join(os.path.dirname(os.path.realpath('__file__')), 'mnist.npz')
  if not os.path.isfile(data_path):
  from six.moves import urllib