diff --git a/benchmark/python/sparse/sparse_end2end.py b/benchmark/python/sparse/sparse_end2end.py index d032f9d6c38e..fc949b649767 100644 --- a/benchmark/python/sparse/sparse_end2end.py +++ b/benchmark/python/sparse/sparse_end2end.py @@ -225,7 +225,7 @@ def row_sparse_pull(kv, key, data, slices, weight_array, priority): learning_rate=0.1, rescale_grad=1.0/batch_size/num_worker) mod.init_optimizer(optimizer=sgd, kvstore=kv) # use accuracy as the metric - metric = mx.metric.create('acc') + metric = mx.gluon.metric.create('acc') index = mod._exec_group.param_names.index('w') # weight_array bound to executors of the contexts diff --git a/example/adversary/adversary_generation.ipynb b/example/adversary/adversary_generation.ipynb index 76c5f4cff569..0dda371a8f41 100644 --- a/example/adversary/adversary_generation.ipynb +++ b/example/adversary/adversary_generation.ipynb @@ -168,7 +168,7 @@ "epoch = 3\n", "for e in range(epoch):\n", " train_loss = 0.\n", - " acc = mx.metric.Accuracy()\n", + " acc = mx.gluon.metric.Accuracy()\n", " for i, (data, label) in enumerate(train_data):\n", " data = data.as_in_context(ctx)\n", " label = label.as_in_context(ctx)\n", @@ -223,7 +223,7 @@ " l = loss(output, label)\n", "l.backward()\n", "\n", - "acc = mx.metric.Accuracy()\n", + "acc = mx.gluon.metric.Accuracy()\n", "acc.update(label, output)\n", "\n", "print(\"Validation batch accuracy {}\".format(acc.get()[1]))" @@ -256,7 +256,7 @@ "\n", "output = net(data_perturbated) \n", "\n", - "acc = mx.metric.Accuracy()\n", + "acc = mx.gluon.metric.Accuracy()\n", "acc.update(label, output)\n", "\n", "print(\"Validation batch accuracy after perturbation {}\".format(acc.get()[1]))" diff --git a/example/autoencoder/variational_autoencoder/VAE_example.ipynb b/example/autoencoder/variational_autoencoder/VAE_example.ipynb index 964e13725c69..7de336611b38 100755 --- a/example/autoencoder/variational_autoencoder/VAE_example.ipynb +++ b/example/autoencoder/variational_autoencoder/VAE_example.ipynb @@ -610,7 +610,7 @@ ], "source": [ "# calculate the ELBO which is minus the loss for test set\n", - "metric = mx.metric.Loss()\n", + "metric = mx.gluon.metric.Loss()\n", "model.score(nd_iter_test, metric)" ] }, diff --git a/example/caffe/caffe_net.py b/example/caffe/caffe_net.py index 803efda9b68e..d748298a2965 100644 --- a/example/caffe/caffe_net.py +++ b/example/caffe/caffe_net.py @@ -140,6 +140,6 @@ def parse_args(): # train if use_caffe_loss: - train_model.fit(args, net, get_iterator(data_shape, use_caffe_data), mx.metric.Caffe()) + train_model.fit(args, net, get_iterator(data_shape, use_caffe_data), mx.gluon.metric.Caffe()) else: train_model.fit(args, net, get_iterator(data_shape, use_caffe_data)) diff --git a/example/caffe/train_model.py b/example/caffe/train_model.py index d7dfd5d7a31e..96e81e06add4 100644 --- a/example/caffe/train_model.py +++ b/example/caffe/train_model.py @@ -93,7 +93,7 @@ def fit(args, network, data_loader, eval_metrics=None, batch_end_callback=None): eval_metrics = ['accuracy'] # TopKAccuracy only allows top_k > 1 for top_k in [5, 10, 20]: - eval_metrics.append(mx.metric.create('top_k_accuracy', top_k=top_k)) + eval_metrics.append(mx.gluon.metric.create('top_k_accuracy', top_k=top_k)) if batch_end_callback is not None: if not isinstance(batch_end_callback, list): diff --git a/example/capsnet/capsulenet.py b/example/capsnet/capsulenet.py index 4d455dbc504c..2e38d85fbdea 100644 --- a/example/capsnet/capsulenet.py +++ b/example/capsnet/capsulenet.py @@ -122,7 +122,7 @@ def to4d(img): return img.reshape(img.shape[0], 1, 28, 28).astype(np.float32)/255 -class LossMetric(mx.metric.EvalMetric): +class LossMetric(mx.gluon.metric.EvalMetric): """Evaluate the loss function""" def __init__(self, batch_size, num_gpus): super(LossMetric, self).__init__('LossMetric') diff --git a/example/ctc/lstm_ocr_train.py b/example/ctc/lstm_ocr_train.py index 49d9531920ae..e774ff73ab08 100644 --- a/example/ctc/lstm_ocr_train.py +++ b/example/ctc/lstm_ocr_train.py @@ -103,7 +103,7 @@ def main(): module.fit(train_data=data_train, eval_data=data_val, # use metrics.accuracy or metrics.accuracy_lcs - eval_metric=mx.metric.np(metrics.accuracy, allow_extra_outputs=True), + eval_metric=mx.gluon.metric.np(metrics.accuracy, allow_extra_outputs=True), optimizer='sgd', optimizer_params={'learning_rate': hp.learning_rate, 'momentum': hp.momentum, diff --git a/example/deep-embedded-clustering/autoencoder.py b/example/deep-embedded-clustering/autoencoder.py index c75634475e3a..d6c15ae19df1 100644 --- a/example/deep-embedded-clustering/autoencoder.py +++ b/example/deep-embedded-clustering/autoencoder.py @@ -165,7 +165,7 @@ def l2_norm(label, pred): return np.mean(np.square(label-pred))/2.0 solver = Solver(optimizer, momentum=0.9, wd=decay, learning_rate=l_rate, lr_scheduler=lr_scheduler) - solver.set_metric(mx.metric.CustomMetric(l2_norm)) + solver.set_metric(mx.gluon.metric.CustomMetric(l2_norm)) solver.set_monitor(Monitor(print_every)) data_iter = mx.io.NDArrayIter({'data': X}, batch_size=batch_size, shuffle=True, last_batch_handle='roll_over') @@ -188,7 +188,7 @@ def l2_norm(label, pred): return np.mean(np.square(label-pred))/2.0 solver = Solver(optimizer, momentum=0.9, wd=decay, learning_rate=l_rate, lr_scheduler=lr_scheduler) - solver.set_metric(mx.metric.CustomMetric(l2_norm)) + solver.set_metric(mx.gluon.metric.CustomMetric(l2_norm)) solver.set_monitor(Monitor(print_every)) data_iter = mx.io.NDArrayIter({'data': X}, batch_size=batch_size, shuffle=True, last_batch_handle='roll_over') diff --git a/example/deep-embedded-clustering/dec.py b/example/deep-embedded-clustering/dec.py index 8fb3891e3e99..f67792f0fe37 100644 --- a/example/deep-embedded-clustering/dec.py +++ b/example/deep-embedded-clustering/dec.py @@ -122,7 +122,7 @@ def cluster(self, X, y=None, update_interval=None): def ce(label, pred): return np.sum(label*np.log(label/(pred+0.000001)))/label.shape[0] - solver.set_metric(mx.metric.CustomMetric(ce)) + solver.set_metric(mx.gluon.metric.CustomMetric(ce)) label_buff = np.zeros((X.shape[0], self.num_centers)) train_iter = mx.io.NDArrayIter({'data': X}, {'label': label_buff}, batch_size=batch_size, diff --git a/example/distributed_training-horovod/gluon_mnist.py b/example/distributed_training-horovod/gluon_mnist.py index 7b39f5776a42..c2e6f0bdc533 100644 --- a/example/distributed_training-horovod/gluon_mnist.py +++ b/example/distributed_training-horovod/gluon_mnist.py @@ -104,7 +104,7 @@ def conv_nets(): # Function to evaluate accuracy for a model def evaluate(model, data_iter, context): data_iter.reset() - metric = mx.metric.Accuracy() + metric = mx.gluon.metric.Accuracy() for _, batch in enumerate(data_iter): data = batch.data[0].as_in_context(context) label = batch.label[0].as_in_context(context) @@ -149,7 +149,7 @@ def evaluate(model, data_iter, context): # Create loss function and train metric loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() -metric = mx.metric.Accuracy() +metric = mx.gluon.metric.Accuracy() # Train model for epoch in range(args.epochs): diff --git a/example/distributed_training-horovod/module_mnist.py b/example/distributed_training-horovod/module_mnist.py index 4fcb02a46996..74f6bc9daf21 100644 --- a/example/distributed_training-horovod/module_mnist.py +++ b/example/distributed_training-horovod/module_mnist.py @@ -157,7 +157,7 @@ def conv_net(): num_epoch=args.epochs) # train for at most 10 dataset passes # Step 7: evaluate model accuracy -acc = mx.metric.Accuracy() +acc = mx.gluon.metric.Accuracy() model.score(val_iter, acc) if hvd.rank() == 0: diff --git a/example/distributed_training-horovod/resnet50_imagenet.py b/example/distributed_training-horovod/resnet50_imagenet.py index 5e5169e98ece..ae8a56100929 100644 --- a/example/distributed_training-horovod/resnet50_imagenet.py +++ b/example/distributed_training-horovod/resnet50_imagenet.py @@ -286,8 +286,8 @@ def evaluate(epoch): return val_data.reset() - acc_top1 = mx.metric.Accuracy() - acc_top5 = mx.metric.TopKAccuracy(5) + acc_top1 = mx.gluon.metric.Accuracy() + acc_top5 = mx.gluon.metric.TopKAccuracy(5) for _, batch in enumerate(val_data): data, label = batch_fn(batch, context) output = net(data.astype(args.dtype, copy=False)) @@ -321,7 +321,7 @@ def evaluate(epoch): # Create loss function and train metric loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() - metric = mx.metric.Accuracy() + metric = mx.gluon.metric.Accuracy() # Train model for epoch in range(args.num_epochs): @@ -450,8 +450,8 @@ def train_module(): # Evaluate performance if not using synthetic data if args.use_rec: - acc_top1 = mx.metric.Accuracy() - acc_top5 = mx.metric.TopKAccuracy(5) + acc_top1 = mx.gluon.metric.Accuracy() + acc_top5 = mx.gluon.metric.TopKAccuracy(5) res = mod.score(val_data, [acc_top1, acc_top5]) for name, val in res: logging.info('Epoch[%d] Rank[%d] Validation-%s=%f', diff --git a/example/distributed_training/cifar10_dist.py b/example/distributed_training/cifar10_dist.py index d3ba515776f6..8c5fb3639ef9 100644 --- a/example/distributed_training/cifar10_dist.py +++ b/example/distributed_training/cifar10_dist.py @@ -121,7 +121,7 @@ def evaluate_accuracy(data_iterator, network): ---------- tuple of array element """ - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() # Iterate through data and label for i, (data, label) in enumerate(data_iterator): diff --git a/example/distributed_training/cifar10_kvstore_hvd.py b/example/distributed_training/cifar10_kvstore_hvd.py index e6780e5db85e..ff679864f7c3 100644 --- a/example/distributed_training/cifar10_kvstore_hvd.py +++ b/example/distributed_training/cifar10_kvstore_hvd.py @@ -123,7 +123,7 @@ def evaluate(data_iterator, network, context): ---------- tuple of array element """ - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() # Iterate through data and label for i, (data, label) in enumerate(data_iterator): @@ -208,7 +208,7 @@ def __len__(self): optimizer_params={'learning_rate': args.lr}, kvstore=store) -train_metric = mx.metric.Accuracy() +train_metric = mx.gluon.metric.Accuracy() # Run as many epochs as required for epoch in range(args.epochs): diff --git a/example/fcn-xs/solver.py b/example/fcn-xs/solver.py index e99b31a13055..ab8964f80898 100644 --- a/example/fcn-xs/solver.py +++ b/example/fcn-xs/solver.py @@ -23,7 +23,7 @@ from collections import namedtuple from mxnet import optimizer as opt from mxnet.optimizer import get_updater -from mxnet import metric +from mxnet.gluon import metric # Parameter to pass to batch_end_callback BatchEndParam = namedtuple('BatchEndParams', ['epoch', 'nbatch', 'eval_metric']) diff --git a/example/gluon/audio/urban_sounds/train.py b/example/gluon/audio/urban_sounds/train.py index c88f9fb55187..8a55c5b5bc67 100644 --- a/example/gluon/audio/urban_sounds/train.py +++ b/example/gluon/audio/urban_sounds/train.py @@ -28,7 +28,7 @@ def evaluate_accuracy(data_iterator, net): """Function to evaluate accuracy of any data iterator passed to it as an argument""" - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() for data, label in data_iterator: output = net(data) predictions = nd.argmax(output, axis=1) diff --git a/example/gluon/dc_gan/dcgan.py b/example/gluon/dc_gan/dcgan.py index 93af13ababf3..1b1fa75c1c2a 100644 --- a/example/gluon/dc_gan/dcgan.py +++ b/example/gluon/dc_gan/dcgan.py @@ -259,7 +259,7 @@ def main(): real_label = mx.nd.ones((opt.batch_size,), ctx=ctx) fake_label = mx.nd.zeros((opt.batch_size,), ctx=ctx) - metric = mx.metric.Accuracy() + metric = mx.gluon.metric.Accuracy() print('Training... ') stamp = datetime.now().strftime('%Y_%m_%d-%H_%M') diff --git a/example/gluon/image_classification.py b/example/gluon/image_classification.py index 44a2afea3681..de31b06655eb 100644 --- a/example/gluon/image_classification.py +++ b/example/gluon/image_classification.py @@ -27,7 +27,7 @@ from mxnet.gluon.model_zoo import vision as models from mxnet import autograd as ag from mxnet.test_utils import get_mnist_iterator -from mxnet.metric import Accuracy, TopKAccuracy, CompositeEvalMetric +from mxnet.gluon.metric import Accuracy, TopKAccuracy, CompositeEvalMetric import numpy as np from data import (get_cifar10_iterator, get_imagenet_iterator, diff --git a/example/gluon/mnist/mnist.py b/example/gluon/mnist/mnist.py index 6aea3abc5041..4c1cc16bb7df 100644 --- a/example/gluon/mnist/mnist.py +++ b/example/gluon/mnist/mnist.py @@ -70,7 +70,7 @@ def transformer(data, label): # train def test(ctx): - metric = mx.metric.Accuracy() + metric = mx.gluon.metric.Accuracy() for data, label in val_data: data = data.as_in_context(ctx) label = label.as_in_context(ctx) @@ -86,7 +86,7 @@ def train(epochs, ctx): # Trainer is for updating parameters with gradient. trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': opt.lr, 'momentum': opt.momentum}) - metric = mx.metric.Accuracy() + metric = mx.gluon.metric.Accuracy() loss = gluon.loss.SoftmaxCrossEntropyLoss() for epoch in range(epochs): diff --git a/example/gluon/sn_gan/train.py b/example/gluon/sn_gan/train.py index 46e44791cebd..fc4e87d632fe 100644 --- a/example/gluon/sn_gan/train.py +++ b/example/gluon/sn_gan/train.py @@ -102,7 +102,7 @@ def facc(label, pred): g_net.collect_params().zero_grad() d_net.collect_params().zero_grad() # define evaluation metric -metric = mx.metric.CustomMetric(facc) +metric = mx.gluon.metric.CustomMetric(facc) # initialize labels real_label = nd.ones(BATCH_SIZE, CTX) fake_label = nd.zeros(BATCH_SIZE, CTX) diff --git a/example/gluon/super_resolution/super_resolution.py b/example/gluon/super_resolution/super_resolution.py index 4a3e8d92aa39..52bfc2241f82 100644 --- a/example/gluon/super_resolution/super_resolution.py +++ b/example/gluon/super_resolution/super_resolution.py @@ -156,7 +156,7 @@ def hybrid_forward(self, F, x): return x net = SuperResolutionNet(upscale_factor) -metric = mx.metric.MSE() +metric = mx.gluon.metric.MSE() def test(ctx): val_data.reset() diff --git a/example/gluon/tree_lstm/main.py b/example/gluon/tree_lstm/main.py index 53af3fa019e9..41e4f4f13ed8 100644 --- a/example/gluon/tree_lstm/main.py +++ b/example/gluon/tree_lstm/main.py @@ -96,7 +96,7 @@ net = SimilarityTreeLSTM(sim_hidden_size, rnn_hidden_size, vocab.size, vocab.embed.shape[1], num_classes) # use pearson correlation and mean-square error for evaluation -metric = mx.metric.create(['pearsonr', 'mse']) +metric = mx.gluon.metric.create(['pearsonr', 'mse']) def to_target(x): target = np.zeros((1, num_classes)) diff --git a/example/image-classification/common/fit.py b/example/image-classification/common/fit.py index 38ca296cf986..8662db3baba4 100644 --- a/example/image-classification/common/fit.py +++ b/example/image-classification/common/fit.py @@ -290,7 +290,7 @@ def fit(args, network, data_loader, **kwargs): # evaluation metrices eval_metrics = ['accuracy'] if args.top_k > 0: - eval_metrics.append(mx.metric.create( + eval_metrics.append(mx.gluon.metric.create( 'top_k_accuracy', top_k=args.top_k)) supported_loss = ['ce', 'nll_loss'] @@ -306,7 +306,7 @@ def fit(args, network, data_loader, **kwargs): logging.warning(loss_type + ' is not an valid loss type, only cross-entropy or ' \ 'negative likelihood loss is supported!') else: - eval_metrics.append(mx.metric.create(loss_type)) + eval_metrics.append(mx.gluon.metric.create(loss_type)) else: logging.warning("The output is not softmax_output, loss argument will be skipped!") diff --git a/example/image-classification/score.py b/example/image-classification/score.py index f40e649f1f42..dbad44ef6981 100644 --- a/example/image-classification/score.py +++ b/example/image-classification/score.py @@ -97,8 +97,8 @@ def score(model, data_val, metrics, gpus, batch_size, rgb_mean=None, mean_img=No logger = logging.getLogger() logger.setLevel(logging.DEBUG) - metrics = [mx.metric.create('acc'), - mx.metric.create('top_k_accuracy', top_k = 5)] + metrics = [mx.gluon.metric.create('acc'), + mx.gluon.metric.create('top_k_accuracy', top_k = 5)] (speed,) = score(metrics = metrics, **vars(args)) logging.info('Finished with %f images per second', speed) diff --git a/example/image-classification/test_score.py b/example/image-classification/test_score.py index 58c5c66a7f1f..1a82bcff5ba3 100644 --- a/example/image-classification/test_score.py +++ b/example/image-classification/test_score.py @@ -43,7 +43,7 @@ def test_imagenet1k_resnet(imagenet_val_5k_settings): models = ['imagenet1k-resnet-50', 'imagenet1k-resnet-152'] accs = [.77, .78] for (m, g) in zip(models, accs): - acc = mx.metric.create('acc') + acc = mx.gluon.metric.create('acc') (speed,) = score(model=m, data_val=imagenet_val_5k, rgb_mean='0,0,0', metrics=acc, **kwargs) r = acc.get()[1] @@ -52,7 +52,7 @@ def test_imagenet1k_resnet(imagenet_val_5k_settings): def test_imagenet1k_inception_bn(imagenet_val_5k_settings): imagenet_val_5k, kwargs = imagenet_val_5k_settings - acc = mx.metric.create('acc') + acc = mx.gluon.metric.create('acc') m = 'imagenet1k-inception-bn' g = 0.75 (speed,) = score(model=m, diff --git a/example/kaggle-ndsb2/Train.py b/example/kaggle-ndsb2/Train.py index 51e308a2e21c..c3ab165d11da 100644 --- a/example/kaggle-ndsb2/Train.py +++ b/example/kaggle-ndsb2/Train.py @@ -111,7 +111,7 @@ def encode_csv(label_csv, systole_csv, diastole_csv): wd = 0.00001, momentum = 0.9) -systole_model.fit(X=data_train, eval_metric = mx.metric.np(CRPS)) +systole_model.fit(X=data_train, eval_metric = mx.gluon.metric.np(CRPS)) # # Predict systole @@ -139,7 +139,7 @@ def encode_csv(label_csv, systole_csv, diastole_csv): wd = 0.00001, momentum = 0.9) -diastole_model.fit(X=data_train, eval_metric = mx.metric.np(CRPS)) +diastole_model.fit(X=data_train, eval_metric = mx.gluon.metric.np(CRPS)) # # Predict diastole diff --git a/example/model-parallel/matrix_factorization/train.py b/example/model-parallel/matrix_factorization/train.py index 591dab3a6534..fea2c153f853 100644 --- a/example/model-parallel/matrix_factorization/train.py +++ b/example/model-parallel/matrix_factorization/train.py @@ -94,7 +94,7 @@ 'rescale_grad': 1.0/batch_size} # use MSE as the metric - metric = mx.metric.create(['MSE']) + metric = mx.gluon.metric.create(['MSE']) speedometer = mx.callback.Speedometer(batch_size, print_every) diff --git a/example/module/mnist_mlp.py b/example/module/mnist_mlp.py index 7d63a584aec9..f6d5bf306bd8 100644 --- a/example/module/mnist_mlp.py +++ b/example/module/mnist_mlp.py @@ -55,7 +55,7 @@ mod.init_params() mod.init_optimizer(optimizer_params={'learning_rate':0.01, 'momentum': 0.9}) -metric = mx.metric.create('acc') +metric = mx.gluon.metric.create('acc') for i_epoch in range(n_epoch): for i_iter, batch in enumerate(train_dataiter): diff --git a/example/multi-task/multi-task-learning.ipynb b/example/multi-task/multi-task-learning.ipynb index 048d6d9862b8..e615559441f6 100644 --- a/example/multi-task/multi-task-learning.ipynb +++ b/example/multi-task/multi-task-learning.ipynb @@ -267,8 +267,8 @@ "outputs": [], "source": [ "def evaluate_accuracy(net, data_iterator):\n", - " acc_digits = mx.metric.Accuracy(name='digits')\n", - " acc_odd_even = mx.metric.Accuracy(name='odd_even')\n", + " acc_digits = mx.gluon.metric.Accuracy(name='digits')\n", + " acc_odd_even = mx.gluon.metric.Accuracy(name='odd_even')\n", " \n", " for i, (data, label_digit, label_odd_even) in enumerate(data_iterator):\n", " data = data.as_in_context(ctx)\n", @@ -335,8 +335,8 @@ "source": [ "for e in range(epochs):\n", " # Accuracies for each task\n", - " acc_digits = mx.metric.Accuracy(name='digits')\n", - " acc_odd_even = mx.metric.Accuracy(name='odd_even')\n", + " acc_digits = mx.gluon.metric.Accuracy(name='digits')\n", + " acc_odd_even = mx.gluon.metric.Accuracy(name='odd_even')\n", " # Accumulative losses\n", " l_digits_ = 0.\n", " l_odd_even_ = 0. \n", diff --git a/example/multivariate_time_series/src/metrics.py b/example/multivariate_time_series/src/metrics.py index 4818591068f8..6dd8e765f0ed 100644 --- a/example/multivariate_time_series/src/metrics.py +++ b/example/multivariate_time_series/src/metrics.py @@ -46,10 +46,10 @@ def get_custom_metrics(): """ :return: mxnet metric object """ - _rse = mx.metric.create(rse) - _rae = mx.metric.create(rae) - _corr = mx.metric.create(corr) - return mx.metric.create([_rae, _rse, _corr]) + _rse = mx.gluon.metric.create(rse) + _rae = mx.gluon.metric.create(rae) + _corr = mx.gluon.metric.create(corr) + return mx.gluon.metric.create([_rae, _rse, _corr]) def evaluate(pred, label): return {"RAE":rae(label, pred), "RSE":rse(label,pred),"CORR": corr(label,pred)} \ No newline at end of file diff --git a/example/named_entity_recognition/src/metrics.py b/example/named_entity_recognition/src/metrics.py index a1d270af6863..d04904c7763e 100644 --- a/example/named_entity_recognition/src/metrics.py +++ b/example/named_entity_recognition/src/metrics.py @@ -79,9 +79,9 @@ def entity_f1(label, pred): return classifer_metrics(label, pred)[2] def composite_classifier_metrics(): - metric1 = mx.metric.CustomMetric(feval=entity_precision, name='entity precision') - metric2 = mx.metric.CustomMetric(feval=entity_recall, name='entity recall') - metric3 = mx.metric.CustomMetric(feval=entity_f1, name='entity f1 score') - metric4 = mx.metric.Accuracy() + metric1 = mx.gluon.metric.CustomMetric(feval=entity_precision, name='entity precision') + metric2 = mx.gluon.metric.CustomMetric(feval=entity_recall, name='entity recall') + metric3 = mx.gluon.metric.CustomMetric(feval=entity_f1, name='entity f1 score') + metric4 = mx.gluon.metric.Accuracy() - return mx.metric.CompositeEvalMetric([metric4, metric1, metric2, metric3]) + return mx.gluon.metric.CompositeEvalMetric([metric4, metric1, metric2, metric3]) diff --git a/example/nce-loss/nce.py b/example/nce-loss/nce.py index e59220a026a8..6764e9c20852 100644 --- a/example/nce-loss/nce.py +++ b/example/nce-loss/nce.py @@ -62,7 +62,7 @@ def nce_loss_subwords( label=label_weight) -class NceAccuracy(mx.metric.EvalMetric): +class NceAccuracy(mx.gluon.metric.EvalMetric): def __init__(self): super(NceAccuracy, self).__init__('nce-accuracy') @@ -75,7 +75,7 @@ def update(self, labels, preds): self.num_inst += 1 -class NceAuc(mx.metric.EvalMetric): +class NceAuc(mx.gluon.metric.EvalMetric): def __init__(self): super(NceAuc, self).__init__('nce-auc') @@ -105,7 +105,7 @@ def update(self, labels, preds): self.num_inst += 1 -class NceLSTMAuc(mx.metric.EvalMetric): +class NceLSTMAuc(mx.gluon.metric.EvalMetric): def __init__(self): super(NceLSTMAuc, self).__init__('nce-lstm-auc') diff --git a/example/neural_collaborative_filtering/train.py b/example/neural_collaborative_filtering/train.py index c68f271a6f0d..f99b16fd5b0e 100644 --- a/example/neural_collaborative_filtering/train.py +++ b/example/neural_collaborative_filtering/train.py @@ -124,7 +124,7 @@ def cross_entropy(label, pred, eps=1e-12): mod.init_params() mod.init_optimizer(optimizer='adam', optimizer_params=[('learning_rate', learning_rate), ('beta1',beta1), ('beta2',beta2), ('epsilon',eps)]) - metric = mx.metric.create(cross_entropy) + metric = mx.gluon.metric.create(cross_entropy) speedometer = mx.callback.Speedometer(batch_size, log_interval) best_hr, best_ndcg, best_iter = -1, -1, -1 logging.info('Training started ...') diff --git a/example/quantization/imagenet_inference.py b/example/quantization/imagenet_inference.py index 4d690d37d00c..2f41fec2a9a3 100644 --- a/example/quantization/imagenet_inference.py +++ b/example/quantization/imagenet_inference.py @@ -70,8 +70,8 @@ def advance_data_iter(data_iter, n): def score(sym, arg_params, aux_params, data, devs, label_name, max_num_examples, logger=None): - metrics = [mx.metric.create('acc'), - mx.metric.create('top_k_accuracy', top_k=5)] + metrics = [mx.gluon.metric.create('acc'), + mx.gluon.metric.create('top_k_accuracy', top_k=5)] if not isinstance(metrics, list): metrics = [metrics, ] mod = mx.mod.Module(symbol=sym, context=devs, label_names=[label_name, ]) diff --git a/example/rcnn/symnet/metric.py b/example/rcnn/symnet/metric.py index fa8d7919e919..6509ba436d75 100644 --- a/example/rcnn/symnet/metric.py +++ b/example/rcnn/symnet/metric.py @@ -25,7 +25,7 @@ def get_names(): return pred, label -class RPNAccMetric(mx.metric.EvalMetric): +class RPNAccMetric(mx.gluon.metric.EvalMetric): def __init__(self): super(RPNAccMetric, self).__init__('RPNAcc') self.pred, self.label = get_names() @@ -49,7 +49,7 @@ def update(self, labels, preds): self.num_inst += len(pred_label.flat) -class RCNNAccMetric(mx.metric.EvalMetric): +class RCNNAccMetric(mx.gluon.metric.EvalMetric): def __init__(self): super(RCNNAccMetric, self).__init__('RCNNAcc') self.pred, self.label = get_names() @@ -66,7 +66,7 @@ def update(self, labels, preds): self.num_inst += len(pred_label.flat) -class RPNLogLossMetric(mx.metric.EvalMetric): +class RPNLogLossMetric(mx.gluon.metric.EvalMetric): def __init__(self): super(RPNLogLossMetric, self).__init__('RPNLogLoss') self.pred, self.label = get_names() @@ -93,7 +93,7 @@ def update(self, labels, preds): self.num_inst += label.shape[0] -class RCNNLogLossMetric(mx.metric.EvalMetric): +class RCNNLogLossMetric(mx.gluon.metric.EvalMetric): def __init__(self): super(RCNNLogLossMetric, self).__init__('RCNNLogLoss') self.pred, self.label = get_names() @@ -114,7 +114,7 @@ def update(self, labels, preds): self.num_inst += label.shape[0] -class RPNL1LossMetric(mx.metric.EvalMetric): +class RPNL1LossMetric(mx.gluon.metric.EvalMetric): def __init__(self): super(RPNL1LossMetric, self).__init__('RPNL1Loss') self.pred, self.label = get_names() @@ -130,7 +130,7 @@ def update(self, labels, preds): self.num_inst += num_inst -class RCNNL1LossMetric(mx.metric.EvalMetric): +class RCNNL1LossMetric(mx.gluon.metric.EvalMetric): def __init__(self): super(RCNNL1LossMetric, self).__init__('RCNNL1Loss') self.pred, self.label = get_names() diff --git a/example/rcnn/train.py b/example/rcnn/train.py index 7b1f2f7f31a5..4d89ac6e2cdd 100644 --- a/example/rcnn/train.py +++ b/example/rcnn/train.py @@ -85,7 +85,7 @@ def train_net(sym, roidb, args): eval_metric = RCNNAccMetric() cls_metric = RCNNLogLossMetric() bbox_metric = RCNNL1LossMetric() - eval_metrics = mx.metric.CompositeEvalMetric() + eval_metrics = mx.gluon.metric.CompositeEvalMetric() for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) diff --git a/example/rnn/bucketing/cudnn_rnn_bucketing.py b/example/rnn/bucketing/cudnn_rnn_bucketing.py index 38275ae3dfb8..8f77172087ef 100644 --- a/example/rnn/bucketing/cudnn_rnn_bucketing.py +++ b/example/rnn/bucketing/cudnn_rnn_bucketing.py @@ -156,7 +156,7 @@ def sym_gen(seq_len): model.fit( train_data = data_train, eval_data = data_val, - eval_metric = mx.metric.Perplexity(invalid_label), + eval_metric = mx.gluon.metric.Perplexity(invalid_label), kvstore = args.kv_store, optimizer = args.optimizer, optimizer_params = opt_params, @@ -244,14 +244,14 @@ def sym_gen(seq_len): if args.dtype == "float32": model.set_params(arg_params, aux_params) - model.score(data_val, mx.metric.Perplexity(invalid_label), + model.score(data_val, mx.gluon.metric.Perplexity(invalid_label), batch_end_callback=mx.callback.Speedometer(args.batch_size, 5)) else: assert args.dtype == "float16", "Only float32 and float16 are supported currently" model = amp.convert_bucketing_module(model, target_dtype="float16") model.bind(data_val.provide_data, data_val.provide_label, for_training=False) - model.score(data_val, mx.metric.Perplexity(invalid_label), + model.score(data_val, mx.gluon.metric.Perplexity(invalid_label), batch_end_callback=mx.callback.Speedometer(args.batch_size, 5)) if __name__ == '__main__': diff --git a/example/rnn/bucketing/lstm_bucketing.py b/example/rnn/bucketing/lstm_bucketing.py index 7f150104f458..281aa8988ab0 100644 --- a/example/rnn/bucketing/lstm_bucketing.py +++ b/example/rnn/bucketing/lstm_bucketing.py @@ -115,7 +115,7 @@ def sym_gen(seq_len): model.fit( train_data = data_train, eval_data = data_val, - eval_metric = mx.metric.Perplexity(invalid_label), + eval_metric = mx.gluon.metric.Perplexity(invalid_label), kvstore = args.kv_store, optimizer = args.optimizer, optimizer_params = { 'learning_rate': args.lr, diff --git a/example/rnn/old/char-rnn.ipynb b/example/rnn/old/char-rnn.ipynb index 1ec56cd9aa8c..4fd32d932512 100644 --- a/example/rnn/old/char-rnn.ipynb +++ b/example/rnn/old/char-rnn.ipynb @@ -347,7 +347,7 @@ "source": [ "# Fit it\n", "model.fit(X=data_train,\n", - " eval_metric = mx.metric.np(Perplexity),\n", + " eval_metric = mx.gluon.metric.np(Perplexity),\n", " batch_end_callback=mx.callback.Speedometer(batch_size, 50),\n", " epoch_end_callback=mx.callback.do_checkpoint(\"obama\"))" ] diff --git a/example/rnn/old/gru_bucketing.py b/example/rnn/old/gru_bucketing.py index b9f651a90dc0..47c13ec0db43 100644 --- a/example/rnn/old/gru_bucketing.py +++ b/example/rnn/old/gru_bucketing.py @@ -88,6 +88,6 @@ def sym_gen(seq_len): logging.basicConfig(level=logging.DEBUG, format=head) model.fit(X=data_train, eval_data=data_val, - eval_metric = mx.metric.np(Perplexity), + eval_metric = mx.gluon.metric.np(Perplexity), batch_end_callback=mx.callback.Speedometer(batch_size, 50),) diff --git a/example/rnn/old/lstm_bucketing.py b/example/rnn/old/lstm_bucketing.py index 0fe4116250a2..2bea6cc3898f 100644 --- a/example/rnn/old/lstm_bucketing.py +++ b/example/rnn/old/lstm_bucketing.py @@ -90,6 +90,6 @@ def sym_gen(seq_len): logging.basicConfig(level=logging.DEBUG, format=head) model.fit(X=data_train, eval_data=data_val, kvstore='device', - eval_metric = mx.metric.np(Perplexity), + eval_metric = mx.gluon.metric.np(Perplexity), batch_end_callback=mx.callback.Speedometer(batch_size, 50),) diff --git a/example/rnn/old/rnn_cell_demo.py b/example/rnn/old/rnn_cell_demo.py index c5772fa3a5b7..64a8ee0fe72b 100644 --- a/example/rnn/old/rnn_cell_demo.py +++ b/example/rnn/old/rnn_cell_demo.py @@ -144,7 +144,7 @@ def sym_gen(seq_len): logging.basicConfig(level=logging.DEBUG, format=head) mod.fit(data_train, eval_data=data_val, num_epoch=num_epoch, - eval_metric=mx.metric.np(Perplexity), + eval_metric=mx.gluon.metric.np(Perplexity), batch_end_callback=mx.callback.Speedometer(batch_size, 50), initializer=mx.init.Xavier(factor_type="in", magnitude=2.34), optimizer='sgd', diff --git a/example/sparse/factorization_machine/metric.py b/example/sparse/factorization_machine/metric.py index a8c52c781c0f..8c80f0092203 100644 --- a/example/sparse/factorization_machine/metric.py +++ b/example/sparse/factorization_machine/metric.py @@ -19,9 +19,9 @@ import numpy as np from operator import itemgetter -@mx.metric.register -@mx.metric.alias('log_loss') -class LogLossMetric(mx.metric.EvalMetric): +@mx.gluon.metric.register +@mx.gluon.metric.alias('log_loss') +class LogLossMetric(mx.gluon.metric.EvalMetric): """Computes the negative log-likelihood loss. The negative log-likelihoodd loss over a batch of sample size :math:`N` is given by @@ -51,7 +51,7 @@ class LogLossMetric(mx.metric.EvalMetric): -------- >>> predicts = [mx.nd.array([[0.3], [0], [0.4]])] >>> labels = [mx.nd.array([0, 1, 1])] - >>> log_loss= mx.metric.NegativeLogLikelihood() + >>> log_loss= mx.gluon.metric.NegativeLogLikelihood() >>> log_loss.update(labels, predicts) >>> print(log_loss.get()) ('log-loss', 0.57159948348999023) @@ -74,7 +74,7 @@ def update(self, labels, preds): preds : list of `NDArray` Predicted values. """ - mx.metric.check_label_shapes(labels, preds) + mx.gluon.metric.check_label_shapes(labels, preds) for label, pred in zip(labels, preds): label = label.asnumpy() @@ -88,16 +88,16 @@ def update(self, labels, preds): self.sum_metric += (-np.log(prob + self.eps)).sum() self.num_inst += num_examples -@mx.metric.register -@mx.metric.alias('auc') -class AUCMetric(mx.metric.EvalMetric): +@mx.gluon.metric.register +@mx.gluon.metric.alias('auc') +class AUCMetric(mx.gluon.metric.EvalMetric): def __init__(self, eps=1e-12): super(AUCMetric, self).__init__( 'auc') self.eps = eps def update(self, labels, preds): - mx.metric.check_label_shapes(labels, preds) + mx.gluon.metric.check_label_shapes(labels, preds) label_weight = labels[0].asnumpy() preds = preds[0].asnumpy() tmp = [] diff --git a/example/sparse/factorization_machine/train.py b/example/sparse/factorization_machine/train.py index b30f9cc81acf..1e2ab0e2f0ff 100644 --- a/example/sparse/factorization_machine/train.py +++ b/example/sparse/factorization_machine/train.py @@ -110,7 +110,7 @@ def all_row_ids(data_batch): mod.init_optimizer(optimizer='adam', kvstore=kv, optimizer_params=optimizer_params) # metrics - metric = mx.metric.create(['log_loss', 'auc']) + metric = mx.gluon.metric.create(['log_loss', 'auc']) speedometer = mx.callback.Speedometer(batch_size, log_interval) logging.info('Training started ...') diff --git a/example/sparse/linear_classification/train.py b/example/sparse/linear_classification/train.py index 0a8acfd87bef..77eb2c09de28 100644 --- a/example/sparse/linear_classification/train.py +++ b/example/sparse/linear_classification/train.py @@ -100,7 +100,7 @@ def all_row_ids(data_batch): optim = mx.optimizer.create(optimizer, learning_rate=0.01, rescale_grad=1.0/batch_size/num_worker) mod.init_optimizer(optimizer=optim, kvstore=kv) # use accuracy as the metric - metric = mx.metric.create(['nll_loss']) + metric = mx.gluon.metric.create(['nll_loss']) # get the sparse weight parameter speedometer = mx.callback.Speedometer(batch_size, 100) diff --git a/example/sparse/matrix_factorization/train.py b/example/sparse/matrix_factorization/train.py index 44bab2c416ba..d9dccce89459 100644 --- a/example/sparse/matrix_factorization/train.py +++ b/example/sparse/matrix_factorization/train.py @@ -101,7 +101,7 @@ def all_row_ids(data_batch): rescale_grad=1.0/batch_size) mod.init_optimizer(optimizer=optim, kvstore='device') # use MSE as the metric - metric = mx.metric.create(['MSE']) + metric = mx.gluon.metric.create(['MSE']) speedometer = mx.callback.Speedometer(batch_size, log_interval) logging.info('Training started ...') for epoch in range(num_epoch): diff --git a/example/sparse/wide_deep/inference.py b/example/sparse/wide_deep/inference.py index e14396e50c15..c615020200e2 100644 --- a/example/sparse/wide_deep/inference.py +++ b/example/sparse/wide_deep/inference.py @@ -93,7 +93,7 @@ else: logging.info('Inference started ...') # use accuracy as the metric - metric = mx.metric.create(['acc']) + metric = mx.gluon.metric.create(['acc']) accuracy_avg = 0.0 for batch in data_iter: nbatch += 1 diff --git a/example/sparse/wide_deep/train.py b/example/sparse/wide_deep/train.py index eea70301660d..c8c2b157865a 100644 --- a/example/sparse/wide_deep/train.py +++ b/example/sparse/wide_deep/train.py @@ -83,7 +83,7 @@ optim = mx.optimizer.create(optimizer, learning_rate=lr, rescale_grad=1.0/batch_size) mod.init_optimizer(optimizer=optim) # use accuracy as the metric - metric = mx.metric.create(['acc']) + metric = mx.gluon.metric.create(['acc']) # get the sparse weight parameter speedometer = mx.callback.Speedometer(batch_size, log_interval) diff --git a/example/speech_recognition/stt_metric.py b/example/speech_recognition/stt_metric.py index 26609627ea58..1eb77aa301cb 100644 --- a/example/speech_recognition/stt_metric.py +++ b/example/speech_recognition/stt_metric.py @@ -35,7 +35,7 @@ def check_label_shapes(labels, preds, shape=0): "predictions {}".format(label_shape, pred_shape)) -class STTMetric(mx.metric.EvalMetric): +class STTMetric(mx.gluon.metric.EvalMetric): def __init__(self, batch_size, num_gpu, is_epoch_end=False, is_logging=True): super(STTMetric, self).__init__('STTMetric') diff --git a/example/ssd/evaluate/eval_metric.py b/example/ssd/evaluate/eval_metric.py index 1deb381fb859..b038d3afb376 100644 --- a/example/ssd/evaluate/eval_metric.py +++ b/example/ssd/evaluate/eval_metric.py @@ -18,7 +18,7 @@ import mxnet as mx import numpy as np -class MApMetric(mx.metric.EvalMetric): +class MApMetric(mx.gluon.metric.EvalMetric): """ Calculate mean AP for object detection task diff --git a/example/ssd/train/metric.py b/example/ssd/train/metric.py index eeb9796bf4a8..a99c8762de16 100644 --- a/example/ssd/train/metric.py +++ b/example/ssd/train/metric.py @@ -19,7 +19,7 @@ import numpy as np -class MultiBoxMetric(mx.metric.EvalMetric): +class MultiBoxMetric(mx.gluon.metric.EvalMetric): """Calculate metrics for Multibox training """ def __init__(self, eps=1e-8): super(MultiBoxMetric, self).__init__('MultiBox') @@ -39,17 +39,6 @@ def reset(self): self.num_inst = [0] * self.num self.sum_metric = [0.0] * self.num - def reset_local(self): - """ - override reset behavior - """ - if getattr(self, 'num', None) is None: - self.num_inst = 0 - self.sum_metric = 0.0 - else: - self.num_inst = [0] * self.num - self.sum_metric = [0.0] * self.num - def update(self, labels, preds): """ Implementation of updating metrics diff --git a/example/svm_mnist/svm_mnist.py b/example/svm_mnist/svm_mnist.py index e166cb6ac707..9ceae6d4588b 100644 --- a/example/svm_mnist/svm_mnist.py +++ b/example/svm_mnist/svm_mnist.py @@ -113,8 +113,8 @@ 'momentum': 0.9, # Momentum for SGD with momentum 'wd': 0.00001, # Weight decay for regularization }) - results[output.name] = mod.score(test_iter, mx.metric.Accuracy())[0][1]*100 - print('Accuracy for %s:'%output.name, mod.score(test_iter, mx.metric.Accuracy())[0][1]*100, '%\n') + results[output.name] = mod.score(test_iter, mx.gluon.metric.Accuracy())[0][1]*100 + print('Accuracy for %s:'%output.name, mod.score(test_iter, mx.gluon.metric.Accuracy())[0][1]*100, '%\n') for key, value in results.items(): print(key, value, "%s") diff --git a/example/svrg_module/api_usage_example/example_api_train.py b/example/svrg_module/api_usage_example/example_api_train.py index f6cd1b2e592c..cc9987fe3edb 100644 --- a/example/svrg_module/api_usage_example/example_api_train.py +++ b/example/svrg_module/api_usage_example/example_api_train.py @@ -40,7 +40,7 @@ def test_svrg_intermediate_level_api(args): mod.init_params(initializer=mx.init.Uniform(0.01), allow_missing=False, force_init=False, allow_extra=False) kv = mx.kv.create("local") mod.init_optimizer(kvstore=kv, optimizer='sgd', optimizer_params=(('learning_rate', 0.025),)) - metrics = mx.metric.create("mse") + metrics = mx.gluon.metric.create("mse") for e in range(num_epoch): metrics.reset() if e % mod.update_freq == 0: diff --git a/example/svrg_module/api_usage_example/example_inference.py b/example/svrg_module/api_usage_example/example_inference.py index 312f9796074d..7e5b7a40abe2 100644 --- a/example/svrg_module/api_usage_example/example_inference.py +++ b/example/svrg_module/api_usage_example/example_inference.py @@ -42,7 +42,7 @@ def get_validation_score(args): mod.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label) mod.init_params(initializer=mx.init.Uniform(0.01), allow_missing=False, force_init=False, allow_extra=False) mod.init_optimizer(kvstore='local', optimizer='sgd', optimizer_params=(('learning_rate', 0.025),)) - metrics = mx.metric.create("mse") + metrics = mx.gluon.metric.create("mse") for e in range(epoch): metrics.reset() if e % mod.update_freq == 0: diff --git a/example/svrg_module/benchmarks/svrg_benchmark.ipynb b/example/svrg_module/benchmarks/svrg_benchmark.ipynb index 54ae81281db3..66f52d70be5f 100644 --- a/example/svrg_module/benchmarks/svrg_benchmark.ipynb +++ b/example/svrg_module/benchmarks/svrg_benchmark.ipynb @@ -127,7 +127,7 @@ " mod.bind(data_shapes=di.provide_data, label_shapes=di.provide_label)\n", " mod.init_params(initializer=mx.init.Zero(), allow_missing=False, force_init=False, allow_extra=False)\n", " mod.init_optimizer(kvstore='local', optimizer='sgd', optimizer_params=optimizer_params)\n", - " metrics = mx.metric.create(\"mse\")\n", + " metrics = mx.gluon.metric.create(\"mse\")\n", " \n", " results = {}\n", " for e in range(num_epoch):\n", @@ -170,7 +170,7 @@ " mod.bind(data_shapes=di.provide_data, label_shapes=di.provide_label)\n", " mod.init_params(initializer=mx.init.Zero(), allow_missing=False, force_init=False, allow_extra=False)\n", " mod.init_optimizer(kvstore='local', optimizer='sgd', optimizer_params=optimizer_params)\n", - " metrics = mx.metric.create(\"mse\")\n", + " metrics = mx.gluon.metric.create(\"mse\")\n", " \n", " results = {}\n", " for e in range(num_epoch):\n", diff --git a/example/svrg_module/linear_regression/common.py b/example/svrg_module/linear_regression/common.py index 14a144f40ce2..edf4f729f3e6 100644 --- a/example/svrg_module/linear_regression/common.py +++ b/example/svrg_module/linear_regression/common.py @@ -39,7 +39,7 @@ def create_lin_reg_network(train_features, train_labels, feature_dim, batch_size def create_metrics(metrics): - metric = mx.metric.create(metrics) + metric = mx.gluon.metric.create(metrics) return metric diff --git a/example/vae-gan/vaegan_mxnet.py b/example/vae-gan/vaegan_mxnet.py index 38e7e2ecc92f..1881f383c18b 100644 --- a/example/vae-gan/vaegan_mxnet.py +++ b/example/vae-gan/vaegan_mxnet.py @@ -424,10 +424,10 @@ def kldivergence(label, pred): KLLoss = KLLoss / nElements return KLLoss - mG = mx.metric.CustomMetric(fentropy) - mD = mx.metric.CustomMetric(fentropy) - mE = mx.metric.CustomMetric(kldivergence) - mACC = mx.metric.CustomMetric(facc) + mG = mx.gluon.metric.CustomMetric(fentropy) + mD = mx.gluon.metric.CustomMetric(fentropy) + mE = mx.gluon.metric.CustomMetric(kldivergence) + mACC = mx.gluon.metric.CustomMetric(facc) print('Training...') stamp = datetime.now().strftime('%Y_%m_%d-%H_%M') diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py index 49f10aace531..284788fa2276 100644 --- a/python/mxnet/__init__.py +++ b/python/mxnet/__init__.py @@ -51,7 +51,6 @@ from . import random from . import optimizer from . import model -from . import metric from . import notebook from . import initializer # use mx.init as short for mx.initializer diff --git a/python/mxnet/callback.py b/python/mxnet/callback.py index 4be509270fd3..bd515707eace 100644 --- a/python/mxnet/callback.py +++ b/python/mxnet/callback.py @@ -112,7 +112,7 @@ def _callback(param): logging.info('Iter[%d] Batch[%d] Train-%s=%f', param.epoch, param.nbatch, name, value) if auto_reset: - param.eval_metric.reset_local() + param.eval_metric.reset() return _callback @@ -163,7 +163,7 @@ def __call__(self, param): if param.eval_metric is not None: name_value = param.eval_metric.get_name_value() if self.auto_reset: - param.eval_metric.reset_local() + param.eval_metric.reset() msg = 'Epoch[%d] Batch [%d-%d]\tSpeed: %.2f samples/sec' msg += '\t%s=%f'*len(name_value) logging.info(msg, param.epoch, count-self.frequent, count, speed, *sum(name_value, ())) diff --git a/python/mxnet/contrib/svrg_optimization/svrg_module.py b/python/mxnet/contrib/svrg_optimization/svrg_module.py index eecb87cf25bb..fc5a6c224809 100644 --- a/python/mxnet/contrib/svrg_optimization/svrg_module.py +++ b/python/mxnet/contrib/svrg_optimization/svrg_module.py @@ -478,8 +478,8 @@ def fit(self, train_data, eval_data=None, eval_metric='acc', if validation_metric is None: validation_metric = eval_metric - if not isinstance(eval_metric, mx.metric.EvalMetric): - eval_metric = mx.metric.create(eval_metric) + if not isinstance(eval_metric, mx.gluon.metric.EvalMetric): + eval_metric = mx.gluon.metric.create(eval_metric) ################################################################################ # training loop diff --git a/python/mxnet/gluon/__init__.py b/python/mxnet/gluon/__init__.py index 288937cf4a03..514087049edb 100644 --- a/python/mxnet/gluon/__init__.py +++ b/python/mxnet/gluon/__init__.py @@ -19,6 +19,8 @@ # pylint: disable=wildcard-import """Neural network module.""" +from . import metric + from .parameter import * from .block import * diff --git a/python/mxnet/gluon/block.py b/python/mxnet/gluon/block.py index 6706abcb40de..27428e3191b8 100644 --- a/python/mxnet/gluon/block.py +++ b/python/mxnet/gluon/block.py @@ -28,7 +28,8 @@ import numpy as np from ..base import mx_real_t, MXNetError, NDArrayHandle, py_str -from .. import symbol, ndarray, initializer, np_symbol, autograd, _deferred_compute as dc +from .. import symbol, ndarray, initializer, autograd, _deferred_compute as dc +from ..symbol.numpy import _symbol as np_symbol from ..symbol import Symbol from ..ndarray import NDArray from .. import name as _name diff --git a/python/mxnet/gluon/contrib/data/text.py b/python/mxnet/gluon/contrib/data/text.py index 0536ac585484..916b41880d45 100644 --- a/python/mxnet/gluon/contrib/data/text.py +++ b/python/mxnet/gluon/contrib/data/text.py @@ -29,7 +29,7 @@ from ...data import dataset from ...utils import download, check_sha1, _get_repo_file_url from ....contrib import text -from .... import nd, base +from .... import ndarray as nd, base class _LanguageModelDataset(dataset._DownloadedDataset): # pylint: disable=abstract-method def __init__(self, root, namespace, vocabulary): diff --git a/python/mxnet/gluon/contrib/data/vision/dataloader.py b/python/mxnet/gluon/contrib/data/vision/dataloader.py index 0c71d90453d8..3213398b2214 100644 --- a/python/mxnet/gluon/contrib/data/vision/dataloader.py +++ b/python/mxnet/gluon/contrib/data/vision/dataloader.py @@ -21,9 +21,9 @@ import logging import numpy as np -from ..... import nd +from ..... import ndarray as nd from .....util import is_np_array -from ..... import np as _mx_np # pylint: disable=reimported +from ..... import numpy as _mx_np # pylint: disable=reimported from ....nn import HybridSequential, Sequential, HybridBlock, Block from ....data.vision import transforms from ....data import DataLoader diff --git a/python/mxnet/gluon/contrib/data/vision/transforms/bbox/bbox.py b/python/mxnet/gluon/contrib/data/vision/transforms/bbox/bbox.py index 1629c212957f..65a18aaf80cd 100644 --- a/python/mxnet/gluon/contrib/data/vision/transforms/bbox/bbox.py +++ b/python/mxnet/gluon/contrib/data/vision/transforms/bbox/bbox.py @@ -23,7 +23,7 @@ from .......base import numeric_types from ......block import Block from .......util import is_np_array -from ....... import nd, npx, np +from ....... import ndarray as nd, numpy_extension as npx, numpy as np from .utils import _check_bbox_shape, bbox_crop, bbox_translate from .utils import bbox_resize, bbox_random_crop_with_constraints diff --git a/python/mxnet/gluon/contrib/estimator/estimator.py b/python/mxnet/gluon/contrib/estimator/estimator.py index ed8a53d7c3a6..c47e02b7213f 100644 --- a/python/mxnet/gluon/contrib/estimator/estimator.py +++ b/python/mxnet/gluon/contrib/estimator/estimator.py @@ -33,7 +33,7 @@ from ...trainer import Trainer from ...utils import split_and_load from ....context import Context, cpu, gpu, num_gpus -from ....metric import Loss as metric_loss +from ...metric import Loss as metric_loss from .batch_processor import BatchProcessor __all__ = ['Estimator'] diff --git a/python/mxnet/gluon/contrib/estimator/event_handler.py b/python/mxnet/gluon/contrib/estimator/event_handler.py index 338c7f00e05e..5709a803a610 100644 --- a/python/mxnet/gluon/contrib/estimator/event_handler.py +++ b/python/mxnet/gluon/contrib/estimator/event_handler.py @@ -25,8 +25,8 @@ import numpy as np -from ....metric import CompositeEvalMetric, EvalMetric -from ....metric import Loss as metric_loss +from ...metric import CompositeEvalMetric, EvalMetric +from ...metric import Loss as metric_loss from .utils import _check_metrics __all__ = ['TrainBegin', 'TrainEnd', 'EpochBegin', 'EpochEnd', 'BatchBegin', 'BatchEnd', diff --git a/python/mxnet/gluon/contrib/estimator/utils.py b/python/mxnet/gluon/contrib/estimator/utils.py index d9126a2f6763..dc0c4bf8f081 100644 --- a/python/mxnet/gluon/contrib/estimator/utils.py +++ b/python/mxnet/gluon/contrib/estimator/utils.py @@ -20,7 +20,7 @@ """Gluon Estimator Utility Functions""" from ...loss import SoftmaxCrossEntropyLoss -from ....metric import Accuracy, EvalMetric, CompositeEvalMetric +from ...metric import Accuracy, EvalMetric, CompositeEvalMetric def _check_metrics(metrics): if isinstance(metrics, CompositeEvalMetric): @@ -31,7 +31,7 @@ def _check_metrics(metrics): metrics = metrics or [] if not all([isinstance(metric, EvalMetric) for metric in metrics]): raise ValueError("metrics must be a Metric or a list of Metric, " - "refer to mxnet.metric.EvalMetric: {}".format(metrics)) + "refer to mxnet.gluon.metric.EvalMetric: {}".format(metrics)) return metrics def _check_handler_metric_ref(handler, known_metrics): diff --git a/python/mxnet/gluon/contrib/nn/basic_layers.py b/python/mxnet/gluon/contrib/nn/basic_layers.py index bc7c3ce19e09..5df1a1e83660 100644 --- a/python/mxnet/gluon/contrib/nn/basic_layers.py +++ b/python/mxnet/gluon/contrib/nn/basic_layers.py @@ -24,7 +24,7 @@ 'PixelShuffle3D'] import warnings -from .... import nd, context +from .... import ndarray as nd, context from ...block import HybridBlock, Block from ...nn import Sequential, HybridSequential, BatchNorm diff --git a/python/mxnet/gluon/data/dataloader.py b/python/mxnet/gluon/data/dataloader.py index d991bc769ac9..c51981678367 100644 --- a/python/mxnet/gluon/data/dataloader.py +++ b/python/mxnet/gluon/data/dataloader.py @@ -39,7 +39,7 @@ from . import sampler as _sampler from . import batchify as _batchify -from ... import nd, context +from ... import ndarray as nd, context from ...util import is_np_shape, is_np_array, set_np from ... import numpy as _mx_np # pylint: disable=reimported diff --git a/python/mxnet/gluon/data/vision/datasets.py b/python/mxnet/gluon/data/vision/datasets.py index 9912a139ffa7..14fd0d03d1c5 100644 --- a/python/mxnet/gluon/data/vision/datasets.py +++ b/python/mxnet/gluon/data/vision/datasets.py @@ -30,7 +30,7 @@ from .. import dataset from ...utils import download, check_sha1, _get_repo_file_url -from .... import nd, image, recordio, base +from .... import ndarray as nd, image, recordio, base from .... import numpy as _mx_np # pylint: disable=reimported from ....util import is_np_array, default_array from ....base import numeric_types diff --git a/python/mxnet/metric.py b/python/mxnet/gluon/metric.py similarity index 66% rename from python/mxnet/metric.py rename to python/mxnet/gluon/metric.py index eb8f99a66d48..5b081ceac4d8 100644 --- a/python/mxnet/metric.py +++ b/python/mxnet/gluon/metric.py @@ -22,11 +22,12 @@ import math from collections import OrderedDict -import numpy +from .. import numpy +from ..util import use_np -from .base import numeric_types, string_types -from . import ndarray -from . import registry +from ..base import numeric_types, string_types +from .. import ndarray +from .. import registry def check_label_shapes(labels, preds, wrap=False, shape=False): @@ -89,7 +90,6 @@ def __init__(self, name, output_names=None, self.name = str(name) self.output_names = output_names self.label_names = label_names - self._has_global_stats = kwargs.pop("has_global_stats", False) self._kwargs = kwargs self.reset() @@ -148,13 +148,6 @@ def reset(self): """Resets the internal evaluation result to initial state.""" self.num_inst = 0 self.sum_metric = 0.0 - self.global_num_inst = 0 - self.global_sum_metric = 0.0 - - def reset_local(self): - """Resets the local portion of the internal evaluation results to initial state.""" - self.num_inst = 0 - self.sum_metric = 0.0 def get(self): """Gets the current evaluation result. @@ -169,25 +162,13 @@ def get(self): if self.num_inst == 0: return (self.name, float('nan')) else: - return (self.name, self.sum_metric / self.num_inst) - - def get_global(self): - """Gets the current global evaluation result. - - Returns - ------- - names : list of str - Name of the metrics. - values : list of float - Value of the evaluations. - """ - if self._has_global_stats: - if self.global_num_inst == 0: - return (self.name, float('nan')) - else: - return (self.name, self.global_sum_metric / self.global_num_inst) - else: - return self.get() + res = self.sum_metric / self.num_inst + if isinstance(res, numpy.ndarray) and len(res.shape) == 0: + # currently calling ' c = mxnet.numpy.array([1,2,3]).sum() ' would get + # ' array(6.) ', a ndarray with shape () + # In this case, returning a 'float' in .get() is more explicit. + res = res.item() + return (self.name, res) def get_name_value(self): """Returns zipped name and value pairs. @@ -204,24 +185,6 @@ def get_name_value(self): value = [value] return list(zip(name, value)) - def get_global_name_value(self): - """Returns zipped name and value pairs for global results. - - Returns - ------- - list of tuples - A (name, value) tuple list. - """ - if self._has_global_stats: - name, value = self.get_global() - if not isinstance(name, list): - name = [name] - if not isinstance(value, list): - value = [value] - return list(zip(name, value)) - else: - return self.get_name_value() - # pylint: disable=invalid-name register = registry.get_register_func(EvalMetric, 'metric') alias = registry.get_alias_func(EvalMetric, 'metric') @@ -256,9 +219,9 @@ def create(metric, *args, **kwargs): >>> def custom_metric(label, pred): ... return np.mean(np.abs(label - pred)) ... - >>> metric1 = mx.metric.create('acc') - >>> metric2 = mx.metric.create(custom_metric) - >>> metric3 = mx.metric.create([metric1, metric2, 'rmse']) + >>> metric1 = mx.gluon.metric.create('acc') + >>> metric2 = mx.gluon.metric.create(custom_metric) + >>> metric3 = mx.gluon.metric.create([metric1, metric2, 'rmse']) """ if callable(metric): return CustomMetric(metric, *args, **kwargs) @@ -293,9 +256,9 @@ class CompositeEvalMetric(EvalMetric): -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] >>> labels = [mx.nd.array([0, 1, 1])] - >>> eval_metrics_1 = mx.metric.Accuracy() - >>> eval_metrics_2 = mx.metric.F1() - >>> eval_metrics = mx.metric.CompositeEvalMetric() + >>> eval_metrics_1 = mx.gluon.metric.Accuracy() + >>> eval_metrics_2 = mx.gluon.metric.F1() + >>> eval_metrics = mx.gluon.metric.CompositeEvalMetric() >>> for child_metric in [eval_metrics_1, eval_metrics_2]: >>> eval_metrics.add(child_metric) >>> eval_metrics.update(labels = labels, preds = predicts) @@ -306,8 +269,7 @@ class CompositeEvalMetric(EvalMetric): def __init__(self, metrics=None, name='composite', output_names=None, label_names=None): super(CompositeEvalMetric, self).__init__( - name, output_names=output_names, label_names=label_names, - has_global_stats=True) + name, output_names=output_names, label_names=label_names) if metrics is None: metrics = [] self.metrics = [create(i) for i in metrics] @@ -369,14 +331,6 @@ def reset(self): except AttributeError: pass - def reset_local(self): - """Resets the local portion of the internal evaluation results to initial state.""" - try: - for metric in self.metrics: - metric.reset_local() - except AttributeError: - pass - def get(self): """Returns the current evaluation result. @@ -399,28 +353,6 @@ def get(self): values.extend(value) return (names, values) - def get_global(self): - """Returns the current evaluation result. - - Returns - ------- - names : list of str - Name of the metrics. - values : list of float - Value of the evaluations. - """ - names = [] - values = [] - for metric in self.metrics: - name, value = metric.get_global() - if isinstance(name, string_types): - name = [name] - if isinstance(value, numeric_types): - value = [value] - names.extend(name) - values.extend(value) - return (names, values) - def get_config(self): config = super(CompositeEvalMetric, self).get_config() config.update({'metrics': [i.get_config() for i in self.metrics]}) @@ -434,6 +366,7 @@ def get_config(self): @register @alias('acc') +@use_np class Accuracy(EvalMetric): """Computes accuracy classification score. @@ -460,7 +393,7 @@ class Accuracy(EvalMetric): -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] >>> labels = [mx.nd.array([0, 1, 1])] - >>> acc = mx.metric.Accuracy() + >>> acc = mx.gluon.metric.Accuracy() >>> acc.update(preds = predicts, labels = labels) >>> print acc.get() ('accuracy', 0.6666666666666666) @@ -469,8 +402,7 @@ def __init__(self, axis=1, name='accuracy', output_names=None, label_names=None): super(Accuracy, self).__init__( name, axis=axis, - output_names=output_names, label_names=label_names, - has_global_stats=True) + output_names=output_names, label_names=label_names) self.axis = axis def update(self, labels, preds): @@ -488,25 +420,26 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred_label in zip(labels, preds): + pred_label = pred_label.as_np_ndarray().as_in_ctx(label.ctx) + label = label.as_np_ndarray() if pred_label.shape != label.shape: - pred_label = ndarray.argmax(pred_label, axis=self.axis) - pred_label = pred_label.asnumpy().astype('int32') - label = label.asnumpy().astype('int32') + pred_label = pred_label.argmax(axis=self.axis) + pred_label = pred_label.astype('int32') + label = label.astype('int32') # flatten before checking shapes to avoid shape miss match - label = label.flat - pred_label = pred_label.flat + label = label.reshape(-1) + pred_label = pred_label.reshape(-1) check_label_shapes(label, pred_label) - num_correct = (pred_label == label).sum() + num_correct = (pred_label == label).sum().astype('float64') self.sum_metric += num_correct - self.global_sum_metric += num_correct self.num_inst += len(pred_label) - self.global_num_inst += len(pred_label) @register @alias('top_k_accuracy', 'top_k_acc') +@use_np class TopKAccuracy(EvalMetric): """Computes top k predictions accuracy. @@ -535,7 +468,7 @@ class TopKAccuracy(EvalMetric): >>> top_k = 3 >>> labels = [mx.nd.array([2, 6, 9, 2, 3, 4, 7, 8, 9, 6])] >>> predicts = [mx.nd.array(np.random.rand(10, 10))] - >>> acc = mx.metric.TopKAccuracy(top_k=top_k) + >>> acc = mx.gluon.metric.TopKAccuracy(top_k=top_k) >>> acc.update(labels, predicts) >>> print acc.get() ('top_k_accuracy', 0.3) @@ -545,8 +478,7 @@ def __init__(self, top_k=1, name='top_k_accuracy', output_names=None, label_names=None): super(TopKAccuracy, self).__init__( name, top_k=top_k, - output_names=output_names, label_names=label_names, - has_global_stats=True) + output_names=output_names, label_names=label_names) self.top_k = top_k assert(self.top_k > 1), 'Please use Accuracy if top_k is no more than 1' self.name += '_%d' % self.top_k @@ -570,43 +502,89 @@ def update(self, labels, preds): # we do not care about the order of top k elements. It is # much faster, which is important since that computation is # single-threaded due to Python GIL. - pred_label = numpy.argpartition(pred_label.asnumpy().astype('float32'), -self.top_k) - label = label.asnumpy().astype('int32') + pred_label = pred_label.as_np_ndarray().as_in_ctx(label.ctx).astype('float32') + pred_label = numpy.argpartition(pred_label, -self.top_k) + label = label.as_np_ndarray().astype('int32') check_label_shapes(label, pred_label) num_samples = pred_label.shape[0] num_dims = len(pred_label.shape) if num_dims == 1: - self.sum_metric += (pred_label.flat == label.flat).sum() + num_correct = (pred_label.reshape(-1) == label.reshape(-1)).sum() + self.sum_metric += num_correct.astype('float64') elif num_dims == 2: num_classes = pred_label.shape[1] top_k = min(num_classes, self.top_k) for j in range(top_k): - num_correct = (pred_label[:, num_classes - 1 - j].flat == label.flat).sum() - self.sum_metric += num_correct - self.global_sum_metric += num_correct + num_correct = (pred_label[:, num_classes - 1 - j].reshape(-1) == label.reshape(-1)).sum() + self.sum_metric += num_correct.astype('float64') self.num_inst += num_samples - self.global_num_inst += num_samples -class _BinaryClassificationMetrics(object): +def predict_with_threshold(pred, threshold=0.5): + """Do thresholding of predictions in binary and multilabel cases. + + Parameters + ---------- + preds : ndarray + predictions in shape of (batch_size, ...) or (batch_size, ..., num_categories) + + preds : float or ndarray + threshold(s) in shape of float or (num_categories) + """ + if isinstance(threshold, float): + return pred > threshold + elif isinstance(threshold, (numpy.ndarray, ndarray.ndarray.NDArray)): + num_classes = pred.shape[-1] + assert threshold.shape[-1] == num_classes, \ + "shape mismatch: %s vs. %s"%(pred.shape[-1], threshold.shape[-1]) + return pred > threshold + else: + raise ValueError("{} is a wrong type for threshold!".format(type(threshold))) + + +def one_hot(idx, num): + return (numpy.arange(num).astype(idx) == idx[:, None]).astype('int32') + + +@use_np +class _ClassificationMetrics(object): """Private container class for classification metric statistics. True/false positive and true/false negative counts are sufficient statistics for various classification metrics. This class provides the machinery to track those statistics across mini-batches of (label, prediction) pairs. + + Parameters + ---------- + class_type : str, default "binary" + "binary": f1 for binary classification. + "multiclass": f1 for multiclassification problem. + "multilabel": f1 for multilabel classification. + beta : float, default 1 + weight of precision in harmonic mean. + threshold : float, default 0.5 + threshold for deciding whether the predictions are positive or negative. + """ - def __init__(self): - self.true_positives = 0 - self.false_negatives = 0 - self.false_positives = 0 - self.true_negatives = 0 - self.global_true_positives = 0 - self.global_false_negatives = 0 - self.global_false_positives = 0 - self.global_true_negatives = 0 - - def update_binary_stats(self, label, pred): + def __init__(self, class_type="binary", threshold=0.5, beta=1): + self.class_type = class_type + self.threshold = threshold + self.beta = beta + self.reset_stats() + + def _set(self, num, ctx): + if self.num_classes is None: + self.num_classes = num + self.true_positives = numpy.zeros(num, dtype='float64').as_in_ctx(ctx) + self.false_negatives = numpy.zeros(num, dtype='float64').as_in_ctx(ctx) + self.false_positives = numpy.zeros(num, dtype='float64').as_in_ctx(ctx) + self.true_negatives = numpy.zeros(num, dtype='float64').as_in_ctx(ctx) + else: + assert self.num_classes == num, \ + "Input number of classes has changed from {} to {}".format(self.num_classes, num) + + def update_stats(self, label, pred): """Update various binary classification counts for a single (label, pred) pair. Parameters @@ -617,92 +595,107 @@ def update_binary_stats(self, label, pred): pred : `NDArray` Predicted values. """ - pred = pred.asnumpy() - label = label.asnumpy().astype('int32') - pred_label = numpy.argmax(pred, axis=1) - - check_label_shapes(label, pred) - if len(numpy.unique(label)) > 2: - raise ValueError("%s currently only supports binary classification." - % self.__class__.__name__) + pred = pred.as_np_ndarray().as_in_ctx(label.ctx) + label = label.as_np_ndarray().astype('int32') + if self.class_type == "binary": + self._set(1, label.ctx) + if label.max() > 1: + raise ValueError("Wrong label for binary classification.") + if pred.shape == label.shape: + pass + elif pred.shape[-1] > 2: + raise ValueError("The shape of prediction {} is wrong for binary classification.".format(pred.shape)) + elif pred.shape[-1] == 2: + pred = pred.reshape(-1, 2)[:, 1] + pred_label = predict_with_threshold(pred, self.threshold).reshape(-1) + label = label.reshape(-1) + + elif self.class_type == "multiclass": + num = pred.shape[-1] + self._set(num, label.ctx) + assert label.max() < num, "pred contains fewer classes than label!" + pred_label = one_hot(pred.argmax(axis=-1).reshape(-1), num) + label = one_hot(label.reshape(-1), num) + + elif self.class_type == "multilabel": + num = pred.shape[-1] + self._set(num, label.ctx) + assert pred.shape == label.shape, \ + "The shape of label should be same as that of prediction for multilabel classification." + pred_label = predict_with_threshold(pred, self.threshold).reshape(-1, num) + label = label.reshape(-1, num) + else: + raise ValueError( + "Wrong class_type {}! Only supports ['binary', 'multiclass', 'multilabel']".format(self.class_type)) + + check_label_shapes(label, pred_label) + pred_true = (pred_label == 1) - pred_false = 1 - pred_true + pred_false = (pred_label == 0) label_true = (label == 1) - label_false = 1 - label_true + label_false = (label == 0) - true_pos = (pred_true * label_true).sum() - false_pos = (pred_true * label_false).sum() - false_neg = (pred_false * label_true).sum() - true_neg = (pred_false * label_false).sum() + true_pos = (pred_true * label_true).sum(0) + false_pos = (pred_true * label_false).sum(0) + false_neg = (pred_false * label_true).sum(0) + true_neg = (pred_false * label_false).sum(0) self.true_positives += true_pos - self.global_true_positives += true_pos self.false_positives += false_pos - self.global_false_positives += false_pos self.false_negatives += false_neg - self.global_false_negatives += false_neg self.true_negatives += true_neg - self.global_true_negatives += true_neg @property def precision(self): - if self.true_positives + self.false_positives > 0: - return float(self.true_positives) / (self.true_positives + self.false_positives) + if self.num_classes is not None: + return self.true_positives / numpy.maximum(self.true_positives + self.false_positives, 1e-12) else: return 0. @property - def global_precision(self): - if self.global_true_positives + self.global_false_positives > 0: - return float(self.global_true_positives) / (self.global_true_positives + self.global_false_positives) + def micro_precision(self): + if self.num_classes is not None: + return self.true_positives.sum() / \ + numpy.maximum(self.true_positives.sum() + self.false_positives.sum(), 1e-12) else: return 0. @property def recall(self): - if self.true_positives + self.false_negatives > 0: - return float(self.true_positives) / (self.true_positives + self.false_negatives) + if self.num_classes is not None: + return self.true_positives / numpy.maximum(self.true_positives + self.false_negatives, 1e-12) else: return 0. @property - def global_recall(self): - if self.global_true_positives + self.global_false_negatives > 0: - return float(self.global_true_positives) / (self.global_true_positives + self.global_false_negatives) + def micro_recall(self): + if self.num_classes is not None: + return self.true_positives.sum() / \ + numpy.maximum(self.true_positives.sum() + self.false_negatives.sum(), 1e-12) else: return 0. @property def fscore(self): - if self.precision + self.recall > 0: - return 2 * self.precision * self.recall / (self.precision + self.recall) - else: - return 0. + return (1 + self.beta ** 2) * self.precision * self.recall / \ + numpy.maximum(self.beta ** 2 * self.precision + self.recall, 1e-12) @property - def global_fscore(self): - if self.global_precision + self.global_recall > 0: - return 2 * self.global_precision * self.global_recall / (self.global_precision + self.global_recall) + def micro_fscore(self): + if self.micro_precision + self.micro_recall > 0: + return (1 + self.beta ** 2) * self.micro_precision * self.micro_recall / \ + (self.beta ** 2 * self.micro_precision + self.micro_recall) else: return 0. - def matthewscc(self, use_global=False): + def binary_matthewscc(self): """Calculate the Matthew's Correlation Coefficent""" - if use_global: - if not self.global_total_examples: - return 0. - - true_pos = float(self.global_true_positives) - false_pos = float(self.global_false_positives) - false_neg = float(self.global_false_negatives) - true_neg = float(self.global_true_negatives) - else: - if not self.total_examples: - return 0. + if not self.total_examples: + return 0. - true_pos = float(self.true_positives) - false_pos = float(self.false_positives) - false_neg = float(self.false_negatives) - true_neg = float(self.true_negatives) + true_pos = float(self.true_positives) + false_pos = float(self.false_positives) + false_neg = float(self.false_negatives) + true_neg = float(self.true_negatives) terms = [(true_pos + false_pos), (true_pos + false_neg), @@ -715,32 +708,21 @@ def matthewscc(self, use_global=False): @property def total_examples(self): - return self.false_negatives + self.false_positives + \ - self.true_negatives + self.true_positives - - @property - def global_total_examples(self): - return self.global_false_negatives + self.global_false_positives + \ - self.global_true_negatives + self.global_true_positives - - def local_reset_stats(self): - self.false_positives = 0 - self.false_negatives = 0 - self.true_positives = 0 - self.true_negatives = 0 + if self.num_classes is None: + return 0 + return int(self.false_negatives[0] + self.false_positives[0] + \ + self.true_negatives[0] + self.true_positives[0]) def reset_stats(self): - self.false_positives = 0 - self.false_negatives = 0 - self.true_positives = 0 - self.true_negatives = 0 - self.global_false_positives = 0 - self.global_false_negatives = 0 - self.global_true_positives = 0 - self.global_true_negatives = 0 + self.num_classes = None + self.true_positives = None + self.false_negatives = None + self.false_positives = None + self.true_negatives = None @register +@use_np class F1(EvalMetric): """Computes the F1 score of a binary classification problem. @@ -768,28 +750,34 @@ class F1(EvalMetric): label_names : list of str, or None Name of labels that should be used when updating with update_dict. By default include all labels. - average : str, default 'macro' + class_type : str, default "binary" + "binary": f1 for binary classification. + "multiclass": f1 for multiclassification problem. + "multilabel": f1 for multilabel classification. + threshold : float, default 0.5 + threshold for postive confidence value. + average : str, default 'micro' Strategy to be used for aggregating across mini-batches. - "macro": average the F1 scores for each batch. - "micro": compute a single F1 score across all batches. + "macro": Calculate metrics for each label and return unweighted mean of f1. + "micro": Calculate metrics globally by counting the total TP, FN and FP. + None: Return f1 scores for each class (numpy.ndarray) . Examples -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0., 1.], [0.4, 0.6]])] >>> labels = [mx.nd.array([0., 1., 1.])] - >>> f1 = mx.metric.F1() + >>> f1 = mx.gluon.metric.F1() >>> f1.update(preds = predicts, labels = labels) >>> print f1.get() ('f1', 0.8) """ def __init__(self, name='f1', - output_names=None, label_names=None, average="macro"): + output_names=None, label_names=None, class_type="binary", threshold=0.5, average="micro"): self.average = average - self.metrics = _BinaryClassificationMetrics() + self.metrics = _ClassificationMetrics(class_type=class_type, threshold=threshold) EvalMetric.__init__(self, name=name, - output_names=output_names, label_names=label_names, - has_global_stats=True) + output_names=output_names, label_names=label_names) def update(self, labels, preds): """Updates the internal evaluation result. @@ -805,36 +793,149 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - self.metrics.update_binary_stats(label, pred) + self.metrics.update_stats(label, pred) - if self.average == "macro": - self.sum_metric += self.metrics.fscore - self.global_sum_metric += self.metrics.global_fscore - self.num_inst += 1 - self.global_num_inst += 1 - self.metrics.reset_stats() + if self.average == "micro": + self.sum_metric = self.metrics.micro_fscore * self.metrics.total_examples + elif self.average == "macro": + self.sum_metric = self.metrics.fscore.mean() * self.metrics.total_examples else: self.sum_metric = self.metrics.fscore * self.metrics.total_examples - self.global_sum_metric = self.metrics.global_fscore * self.metrics.global_total_examples - self.num_inst = self.metrics.total_examples - self.global_num_inst = self.metrics.global_total_examples + self.num_inst = self.metrics.total_examples def reset(self): """Resets the internal evaluation result to initial state.""" self.sum_metric = 0. self.num_inst = 0 - self.global_num_inst = 0 - self.global_sum_metric = 0.0 self.metrics.reset_stats() - def reset_local(self): - """Resets the internal evaluation result to initial state.""" - self.sum_metric = 0. - self.num_inst = 0 - self.metrics.local_reset_stats() + +@register +@use_np +class Fbeta(F1): + """Computes the Fbeta score of a binary classification problem. + + The Fbeta score is equivalent to harmonic mean of the precision and recall, + where the best value is 1.0 and the worst value is 0.0. The formula for Fbeta score is:: + + Fbeta = (1 + beta ** 2) * (precision * recall) / (beta ** 2 * precision + recall) + + The formula for precision and recall is:: + + precision = true_positives / (true_positives + false_positives) + recall = true_positives / (true_positives + false_negatives) + + .. note:: + + This Fbeta score only supports binary classification. + + Parameters + ---------- + name : str + Name of this metric instance for display. + output_names : list of str, or None + Name of predictions that should be used when updating with update_dict. + By default include all predictions. + label_names : list of str, or None + Name of labels that should be used when updating with update_dict. + By default include all labels. + class_type : str, default "binary" + "binary": f1 for binary classification. + "multiclass": f1 for multiclassification problem. + "multilabel": f1 for multilabel classification. + beta : float, default 1 + weight of precision in harmonic mean. + threshold : float, default 0.5 + threshold for postive confidence value. + average : str, default 'micro' + Strategy to be used for aggregating across mini-batches. + "macro": Calculate metrics for each label and return unweighted mean of f1. + "micro": Calculate metrics globally by counting the total TP, FN and FP. + None: Return f1 scores for each class. + + Examples + -------- + >>> predicts = [mx.nd.array([[0.3, 0.7], [0., 1.], [0.4, 0.6]])] + >>> labels = [mx.nd.array([0., 1., 1.])] + >>> fbeta = mx.gluon.metric.Fbeta(beta=2) + >>> fbeta.update(preds = predicts, labels = labels) + >>> print fbeta.get() + ('fbeta', 0.9090909090909091) + """ + + def __init__(self, name='fbeta', + output_names=None, label_names=None, class_type="binary", beta=1, threshold=0.5, average="micro"): + super(Fbeta, self).__init__( + name=name, output_names=output_names, label_names=label_names, + class_type=class_type, threshold=threshold, average=average) + self.metrics = _ClassificationMetrics(class_type=class_type, threshold=threshold, beta=beta) + + +@register +@use_np +class BinaryAccuracy(EvalMetric): + """Computes the accuracy of a binary or multilabel classification problem. + + Parameters + ---------- + name : str + Name of this metric instance for display. + output_names : list of str, or None + Name of predictions that should be used when updating with update_dict. + By default include all predictions. + label_names : list of str, or None + Name of labels that should be used when updating with update_dict. + By default include all labels. + threshold : float or ndarray, default 0.5 + threshold for deciding whether the predictions are positive or negative. + + Examples + -------- + >>> predicts = [mx.nd.array([0.7, 1, 0.55])] + >>> labels = [mx.nd.array([0., 1., 0.])] + >>> bacc = mx.gluon.metric.BinaryAccuracy(threshold=0.6) + >>> bacc.update(preds = predicts, labels = labels) + >>> print bacc.get() + ('binary_accuracy', 0.6666666666666666) + """ + + def __init__(self, name='binary_accuracy', + output_names=None, label_names=None, threshold=0.5): + self.threshold = threshold + EvalMetric.__init__(self, name=name, + output_names=output_names, label_names=label_names) + + def update(self, labels, preds): + """Updates the internal evaluation result. + + Parameters + ---------- + labels : list of `NDArray` + Each label denotes positive/negative for each class. + + preds : list of `NDArray` + Each prediction value is a confidence value of being positive for each class. + """ + labels, preds = check_label_shapes(labels, preds, True) + + for label, pred_label in zip(labels, preds): + pred_label = predict_with_threshold(pred_label, self.threshold) + + pred_label = pred_label.as_np_ndarray().astype('int32').as_in_ctx(label.ctx) + label = label.as_np_ndarray().astype('int32') + # flatten before checking shapes to avoid shape miss match + label = label.reshape(-1) + pred_label = pred_label.reshape(-1) + + check_label_shapes(label, pred_label) + + num_correct = (pred_label == label).sum().astype('float64') + self.sum_metric += num_correct + self.num_inst += len(pred_label) @register +@use_np class MCC(EvalMetric): """Computes the Matthews Correlation Coefficient of a binary classification problem. @@ -865,10 +966,6 @@ class MCC(EvalMetric): label_names : list of str, or None Name of labels that should be used when updating with update_dict. By default include all labels. - average : str, default 'macro' - Strategy to be used for aggregating across mini-batches. - "macro": average the MCC for each batch. - "micro": compute a single MCC across all batches. Examples -------- @@ -887,9 +984,9 @@ class MCC(EvalMetric): [0.]*(false_positives + true_negatives) + [1.]*(false_negatives + true_positives) )] - >>> f1 = mx.metric.F1() + >>> f1 = mx.gluon.metric.F1() >>> f1.update(preds = predicts, labels = labels) - >>> mcc = mx.metric.MCC() + >>> mcc = mx.gluon.metric.MCC() >>> mcc.update(preds = predicts, labels = labels) >>> print f1.get() ('f1', 0.95233560306652054) @@ -898,12 +995,10 @@ class MCC(EvalMetric): """ def __init__(self, name='mcc', - output_names=None, label_names=None, average="macro"): - self._average = average - self._metrics = _BinaryClassificationMetrics() + output_names=None, label_names=None): + self._metrics = _ClassificationMetrics() EvalMetric.__init__(self, name=name, - output_names=output_names, label_names=label_names, - has_global_stats=True) + output_names=output_names, label_names=label_names) def update(self, labels, preds): """Updates the internal evaluation result. @@ -919,72 +1014,35 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - self._metrics.update_binary_stats(label, pred) + self._metrics.update_stats(label, pred) - if self._average == "macro": - self.sum_metric += self._metrics.matthewscc() - self.global_sum_metric += self._metrics.matthewscc(use_global=True) - self.num_inst += 1 - self.global_num_inst += 1 - self._metrics.reset_stats() - else: - self.sum_metric = self._metrics.matthewscc() * self._metrics.total_examples - self.global_sum_metric = self._metrics.matthewscc(use_global=True) * \ - self._metrics.global_total_examples - self.num_inst = self._metrics.total_examples - self.global_num_inst = self._metrics.global_total_examples + self.sum_metric = self._metrics.binary_matthewscc() * self._metrics.total_examples + self.num_inst = self._metrics.total_examples def reset(self): """Resets the internal evaluation result to initial state.""" self.sum_metric = 0. self.num_inst = 0. - self.global_sum_metric = 0. - self.global_num_inst = 0. self._metrics.reset_stats() - def reset_local(self): - """Resets the internal evaluation result to initial state.""" - self.sum_metric = 0. - self.num_inst = 0. - self._metrics.local_reset_stats() +#################### +# REGRESSION METRICS +#################### -@register -class Perplexity(EvalMetric): - """Computes perplexity. - Perplexity is a measurement of how well a probability distribution - or model predicts a sample. A low perplexity indicates the model - is good at predicting the sample. +@register +@use_np +class MAE(EvalMetric): + """Computes Mean Absolute Error (MAE) loss. - The perplexity of a model q is defined as + The mean absolute error is given by .. math:: - b^{\\big(-\\frac{1}{N} \\sum_{i=1}^N \\log_b q(x_i) \\big)} - = \\exp \\big(-\\frac{1}{N} \\sum_{i=1}^N \\log q(x_i)\\big) - - where we let `b = e`. - - :math:`q(x_i)` is the predicted value of its ground truth - label on sample :math:`x_i`. - - For example, we have three samples :math:`x_1, x_2, x_3` and their labels - are :math:`[0, 1, 1]`. - Suppose our model predicts :math:`q(x_1) = p(y_1 = 0 | x_1) = 0.3` - and :math:`q(x_2) = 1.0`, - :math:`q(x_3) = 0.6`. The perplexity of model q is - :math:`exp\\big(-(\\log 0.3 + \\log 1.0 + \\log 0.6) / 3\\big) = 1.77109762852`. + \\frac{\\sum_i^n |y_i - \\hat{y}_i|}{n} Parameters ---------- - ignore_label : int or None - Index of invalid label to ignore when - counting. By default, sets to -1. - If set to `None`, it will include all entries. - axis : int (default -1) - The axis from prediction that was used to - compute softmax. By default use the last - axis. name : str Name of this metric instance for display. output_names : list of str, or None @@ -996,21 +1054,18 @@ class Perplexity(EvalMetric): Examples -------- - >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] - >>> labels = [mx.nd.array([0, 1, 1])] - >>> perp = mx.metric.Perplexity(ignore_label=None) - >>> perp.update(labels, predicts) - >>> print perp.get() - ('Perplexity', 1.7710976285155853) + >>> predicts = [mx.nd.array([3, -0.5, 2, 7])] + >>> labels = [mx.nd.array([2.5, 0.0, 2, 8])] + >>> mean_absolute_error = mx.gluon.metric.MAE() + >>> mean_absolute_error.update(labels = labels, preds = predicts) + >>> print mean_absolute_error.get() + ('mae', 0.5) """ - def __init__(self, ignore_label, axis=-1, name='perplexity', + + def __init__(self, name='mae', output_names=None, label_names=None): - super(Perplexity, self).__init__( - name, ignore_label=ignore_label, - output_names=output_names, label_names=label_names, - has_global_stats=True) - self.ignore_label = ignore_label - self.axis = axis + super(MAE, self).__init__( + name, output_names=output_names, label_names=label_names) def update(self, labels, preds): """Updates the internal evaluation result. @@ -1023,64 +1078,28 @@ def update(self, labels, preds): preds : list of `NDArray` Predicted values. """ - assert len(labels) == len(preds) - loss = 0. - num = 0 - for label, pred in zip(labels, preds): - assert label.size == pred.size/pred.shape[-1], \ - "shape mismatch: %s vs. %s"%(label.shape, pred.shape) - label = label.as_in_context(pred.context).reshape((label.size,)) - pred = ndarray.pick(pred, label.astype(dtype='int32'), axis=self.axis) - if self.ignore_label is not None: - ignore = (label == self.ignore_label).astype(pred.dtype) - num -= ndarray.sum(ignore).asscalar() - pred = pred*(1-ignore) + ignore - loss -= ndarray.sum(ndarray.log(ndarray.maximum(1e-10, pred))).asscalar() - num += pred.size - self.sum_metric += loss - self.global_sum_metric += loss - self.num_inst += num - self.global_num_inst += num - - def get(self): - """Returns the current evaluation result. - - Returns - ------- - Tuple of (str, float) - Representing name of the metric and evaluation result. - """ - if self.num_inst == 0: - return (self.name, float('nan')) - else: - return (self.name, math.exp(self.sum_metric/self.num_inst)) + labels, preds = check_label_shapes(labels, preds, True) - def get_global(self): - """Returns the current global evaluation result. + for label, pred in zip(labels, preds): + label = label.as_np_ndarray() + pred = pred.as_np_ndarray().as_in_ctx(label.ctx) - Returns - ------- - Tuple of (str, float) - Representing name of the metric and evaluation result. - """ - if self.global_num_inst == 0: - return (self.name, float('nan')) - else: - return (self.name, math.exp(self.global_sum_metric/self.global_num_inst)) + num_inst = label.shape[0] + mae = numpy.abs(label - pred).reshape(num_inst, -1).mean(axis=-1).sum() -#################### -# REGRESSION METRICS -#################### + self.sum_metric += mae + self.num_inst += num_inst @register -class MAE(EvalMetric): - """Computes Mean Absolute Error (MAE) loss. +@use_np +class MSE(EvalMetric): + """Computes Mean Squared Error (MSE) loss. - The mean absolute error is given by + The mean squared error is given by .. math:: - \\frac{\\sum_i^n |y_i - \\hat{y}_i|}{n} + \\frac{\\sum_i^n (y_i - \\hat{y}_i)^2}{n} Parameters ---------- @@ -1095,19 +1114,17 @@ class MAE(EvalMetric): Examples -------- - >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] - >>> labels = [mx.nd.array(np.array([2.5, 0.0, 2, 8]).reshape(4,1))] - >>> mean_absolute_error = mx.metric.MAE() - >>> mean_absolute_error.update(labels = labels, preds = predicts) - >>> print mean_absolute_error.get() - ('mae', 0.5) + >>> predicts = [mx.nd.array([3, -0.5, 2, 7])] + >>> labels = [mx.nd.array([2.5, 0.0, 2, 8])] + >>> mean_squared_error = mx.gluon.metric.MSE() + >>> mean_squared_error.update(labels = labels, preds = predicts) + >>> print mean_squared_error.get() + ('mse', 0.375) """ - - def __init__(self, name='mae', + def __init__(self, name='mse', output_names=None, label_names=None): - super(MAE, self).__init__( - name, output_names=output_names, label_names=label_names, - has_global_stats=True) + super(MSE, self).__init__( + name, output_names=output_names, label_names=label_names) def update(self, labels, preds): """Updates the internal evaluation result. @@ -1123,29 +1140,25 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.asnumpy() - pred = pred.asnumpy() + label = label.as_np_ndarray() + pred = pred.as_np_ndarray().as_in_ctx(label.ctx) - if len(label.shape) == 1: - label = label.reshape(label.shape[0], 1) - if len(pred.shape) == 1: - pred = pred.reshape(pred.shape[0], 1) + num_inst = label.shape[0] + mse = ((label - pred)**2.0).reshape(num_inst, -1).mean(axis=-1).sum() - mae = numpy.abs(label - pred).mean() - self.sum_metric += mae - self.global_sum_metric += mae - self.num_inst += 1 # numpy.prod(label.shape) - self.global_num_inst += 1 # numpy.prod(label.shape) + self.sum_metric += mse + self.num_inst += num_inst @register -class MSE(EvalMetric): - """Computes Mean Squared Error (MSE) loss. +@use_np +class RMSE(MSE): + """Computes Root Mean Squred Error (RMSE) loss. - The mean squared error is given by + The root mean squared error is given by .. math:: - \\frac{\\sum_i^n (y_i - \\hat{y}_i)^2}{n} + \\sqrt{\\frac{\\sum_i^n (y_i - \\hat{y}_i)^2}{n}} Parameters ---------- @@ -1160,18 +1173,62 @@ class MSE(EvalMetric): Examples -------- - >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] - >>> labels = [mx.nd.array(np.array([2.5, 0.0, 2, 8]).reshape(4,1))] - >>> mean_squared_error = mx.metric.MSE() - >>> mean_squared_error.update(labels = labels, preds = predicts) - >>> print mean_squared_error.get() - ('mse', 0.375) + >>> predicts = [mx.nd.array([3, -0.5, 2, 7])] + >>> labels = [mx.nd.array([2.5, 0.0, 2, 8])] + >>> root_mean_squared_error = mx.gluon.metric.RMSE() + >>> root_mean_squared_error.update(labels = labels, preds = predicts) + >>> print root_mean_squared_error.get() + ('rmse', 0.612372457981) """ - def __init__(self, name='mse', + def __init__(self, name='rmse', output_names=None, label_names=None): - super(MSE, self).__init__( - name, output_names=output_names, label_names=label_names, - has_global_stats=True) + super(RMSE, self).__init__( + name, output_names=output_names, label_names=label_names) + + def get(self): + if self.num_inst == 0: + return (self.name, float('nan')) + else: + return (self.name, math.sqrt(self.sum_metric / self.num_inst)) + + +@register +@use_np +class MeanPairwiseDistance(EvalMetric): + """Computes Mean Pairwise Distance. + + The mean pairwise distance is given by + + .. math:: + \\sqrt{\\frac{(\\sum_i^n (y_i - \\hat{y}_i)^p)^\\frac{1}{p}}{n}} + + Parameters + ---------- + name : str + Name of this metric instance for display. + output_names : list of str, or None + Name of predictions that should be used when updating with update_dict. + By default include all predictions. + label_names : list of str, or None + Name of labels that should be used when updating with update_dict. + By default include all labels. + p : float, default 2 + calculating distance using the p-norm + + Examples + -------- + >>> predicts = [mx.nd.array([[1., 2.], [3., 4.]])] + >>> labels = [mx.nd.array([[1., 0.], [4., 2.]])] + >>> mpd = mx.gluon.metric.MeanPairwiseDistance() + >>> mpd.update(labels = labels, preds = predicts) + >>> print mpd.get() + ('mpd', 2.1180338859558105) + """ + def __init__(self, name='mpd', + output_names=None, label_names=None, p=2): + super(MeanPairwiseDistance, self).__init__( + name, output_names=output_names, label_names=label_names) + self.p = p def update(self, labels, preds): """Updates the internal evaluation result. @@ -1187,29 +1244,30 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.asnumpy() - pred = pred.asnumpy() + label = label.as_np_ndarray() + pred = pred.as_np_ndarray().as_in_ctx(label.ctx) - if len(label.shape) == 1: - label = label.reshape(label.shape[0], 1) - if len(pred.shape) == 1: - pred = pred.reshape(pred.shape[0], 1) + label = label.reshape(label.shape[0], -1) + pred = pred.reshape(pred.shape[0], -1) - mse = ((label - pred)**2.0).mean() - self.sum_metric += mse - self.global_sum_metric += mse - self.num_inst += 1 # numpy.prod(label.shape) - self.global_num_inst += 1 # numpy.prod(label.shape) + dis = (((label - pred) ** self.p).sum(axis=-1)) ** (1./self.p) + dis = dis.sum() + num_inst = label.shape[0] + + self.sum_metric += dis + self.num_inst += num_inst @register -class RMSE(EvalMetric): - """Computes Root Mean Squred Error (RMSE) loss. +@use_np +class MeanCosineSimilarity(EvalMetric): + """Computes Mean Cosine Similarity. - The root mean squared error is given by + The mean cosine similarity is given by .. math:: - \\sqrt{\\frac{\\sum_i^n (y_i - \\hat{y}_i)^2}{n}} + cos_sim(label, pred) = \frac{{label}.{pred}}{max(||label||.||pred||, eps)} + (calculating on the last dimension of label and pred.) Parameters ---------- @@ -1221,21 +1279,23 @@ class RMSE(EvalMetric): label_names : list of str, or None Name of labels that should be used when updating with update_dict. By default include all labels. + eps : float, default 1e-8 + small vale to avoid division by zero. Examples -------- - >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] - >>> labels = [mx.nd.array(np.array([2.5, 0.0, 2, 8]).reshape(4,1))] - >>> root_mean_squared_error = mx.metric.RMSE() - >>> root_mean_squared_error.update(labels = labels, preds = predicts) - >>> print root_mean_squared_error.get() - ('rmse', 0.612372457981) + >>> predicts = [mx.nd.array([[1., 0.], [1., 1.]])] + >>> labels = [mx.nd.array([[3., 4.], [2., 2.]])] + >>> mcs = mx.gluon.metric.MeanCosineSimilarity() + >>> mcs.update(labels = labels, preds = predicts) + >>> print mcs.get() + ('cos_sim', 0.8) """ - def __init__(self, name='rmse', - output_names=None, label_names=None): - super(RMSE, self).__init__( - name, output_names=output_names, label_names=label_names, - has_global_stats=True) + def __init__(self, name='cos_sim', + output_names=None, label_names=None, eps=1e-8): + super(MeanCosineSimilarity, self).__init__( + name, output_names=output_names, label_names=label_names) + self.eps = eps def update(self, labels, preds): """Updates the internal evaluation result. @@ -1251,23 +1311,27 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.asnumpy() - pred = pred.asnumpy() + label = label.as_np_ndarray() + pred = pred.as_np_ndarray().as_in_ctx(label.ctx) if len(label.shape) == 1: - label = label.reshape(label.shape[0], 1) + label = label.reshape(1, label.shape[0]) if len(pred.shape) == 1: - pred = pred.reshape(pred.shape[0], 1) + pred = pred.reshape(1, pred.shape[0]) - rmse = numpy.sqrt(((label - pred)**2.0).mean()) - self.sum_metric += rmse - self.global_sum_metric += rmse - self.num_inst += 1 - self.global_num_inst += 1 + sim = (label * pred).sum(axis=-1) + n_p = numpy.linalg.norm(pred, axis=-1) + n_l = numpy.linalg.norm(label, axis=-1) + sim = sim / numpy.maximum(n_l * n_p, self.eps) + sim = sim.sum() + num_inst = len(label.reshape(-1, label.shape[-1])) # numpy.prod(label.shape[:-1]) is not supported + self.sum_metric += sim + self.num_inst += num_inst @register @alias('ce') +@use_np class CrossEntropy(EvalMetric): """Computes Cross Entropy loss. @@ -1282,9 +1346,15 @@ class :math:`k`. Parameters ---------- - eps : float - Cross Entropy loss is undefined for predicted value is 0 or 1, - so predicted values are added with the small constant. + eps : float, default 1e-12 + Use small constant for the case that predicted value is 0. + ignore_label : int or None, default None + Index of invalid label to ignore when + counting. By default, sets to -1. + If set to `None`, it will include all entries. + axis : int (default -1) + The axis from prediction that was used to + compute softmax. By default use the last axis. name : str Name of this metric instance for display. output_names : list of str, or None @@ -1298,17 +1368,17 @@ class :math:`k`. -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] >>> labels = [mx.nd.array([0, 1, 1])] - >>> ce = mx.metric.CrossEntropy() + >>> ce = mx.gluon.metric.CrossEntropy() >>> ce.update(labels, predicts) >>> print ce.get() ('cross-entropy', 0.57159948348999023) """ - def __init__(self, eps=1e-12, name='cross-entropy', + def __init__(self, eps=1e-12, ignore_label=None, axis=-1, name='cross-entropy', output_names=None, label_names=None): super(CrossEntropy, self).__init__( - name, eps=eps, - output_names=output_names, label_names=label_names, - has_global_stats=True) + name, output_names=output_names, label_names=label_names) + self.ignore_label = ignore_label + self.axis = axis self.eps = eps def update(self, labels, preds): @@ -1324,22 +1394,97 @@ def update(self, labels, preds): """ labels, preds = check_label_shapes(labels, preds, True) + loss = 0. + num = 0 for label, pred in zip(labels, preds): - label = label.asnumpy() - pred = pred.asnumpy() + assert label.size == pred.size/pred.shape[-1], \ + "shape mismatch: %s vs. %s"%(label.shape, pred.shape) + label = label.reshape((label.size,)) + pred = ndarray.pick(pred.as_in_context(label.ctx), label.astype(dtype='int32'), axis=self.axis) + label = label.as_np_ndarray() + pred = pred.as_np_ndarray() + if self.ignore_label is not None: + ignore = (label == self.ignore_label).astype(pred.dtype) + num -= ignore.sum() + pred = pred * (1 - ignore) + ignore + loss -= numpy.log(numpy.maximum(self.eps, pred)).sum() + num += pred.size + self.sum_metric += loss + self.num_inst += num - label = label.ravel() - assert label.shape[0] == pred.shape[0] - prob = pred[numpy.arange(label.shape[0]), numpy.int64(label)] - cross_entropy = (-numpy.log(prob + self.eps)).sum() - self.sum_metric += cross_entropy - self.global_sum_metric += cross_entropy - self.num_inst += label.shape[0] - self.global_num_inst += label.shape[0] +@register +@use_np +class Perplexity(CrossEntropy): + """Computes perplexity. + + Perplexity is a measurement of how well a probability distribution + or model predicts a sample. A low perplexity indicates the model + is good at predicting the sample. + + The perplexity of a model q is defined as + + .. math:: + b^{\\big(-\\frac{1}{N} \\sum_{i=1}^N \\log_b q(x_i) \\big)} + = \\exp \\big(-\\frac{1}{N} \\sum_{i=1}^N \\log q(x_i)\\big) + + where we let `b = e`. + + :math:`q(x_i)` is the predicted value of its ground truth + label on sample :math:`x_i`. + + For example, we have three samples :math:`x_1, x_2, x_3` and their labels + are :math:`[0, 1, 1]`. + Suppose our model predicts :math:`q(x_1) = p(y_1 = 0 | x_1) = 0.3` + and :math:`q(x_2) = 1.0`, + :math:`q(x_3) = 0.6`. The perplexity of model q is + :math:`exp\\big(-(\\log 0.3 + \\log 1.0 + \\log 0.6) / 3\\big) = 1.77109762852`. + + Parameters + ---------- + eps : float, default 1e-12 + Use small constant for the case that predicted value is 0. + ignore_label : int or None, default None + Index of invalid label to ignore when + counting. By default, sets to -1. + If set to `None`, it will include all entries. + axis : int (default -1) + The axis from prediction that was used to + compute softmax. By default use the last axis. + name : str + Name of this metric instance for display. + output_names : list of str, or None + Name of predictions that should be used when updating with update_dict. + By default include all predictions. + label_names : list of str, or None + Name of labels that should be used when updating with update_dict. + By default include all labels. + + Examples + -------- + >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] + >>> labels = [mx.nd.array([0, 1, 1])] + >>> perp = mx.gluon.metric.Perplexity(ignore_label=None) + >>> perp.update(labels, predicts) + >>> print perp.get() + ('Perplexity', 1.7710976285155853) + """ + def __init__(self, eps=1e-12, ignore_label=None, axis=-1, name='perplexity', + output_names=None, label_names=None): + super(Perplexity, self).__init__( + name=name, eps=eps, ignore_label=ignore_label, axis=axis, + output_names=output_names, label_names=label_names) + + def get(self): + if self.num_inst == 0: + return (self.name, float('nan')) + else: + return (self.name, math.exp(self.sum_metric/self.num_inst)) + @register @alias('nll_loss') +@use_np class NegativeLogLikelihood(EvalMetric): """Computes the negative log-likelihood loss. @@ -1370,7 +1515,7 @@ class NegativeLogLikelihood(EvalMetric): -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] >>> labels = [mx.nd.array([0, 1, 1])] - >>> nll_loss = mx.metric.NegativeLogLikelihood() + >>> nll_loss = mx.gluon.metric.NegativeLogLikelihood() >>> nll_loss.update(labels, predicts) >>> print nll_loss.get() ('nll-loss', 0.57159948348999023) @@ -1379,8 +1524,7 @@ def __init__(self, eps=1e-12, name='nll-loss', output_names=None, label_names=None): super(NegativeLogLikelihood, self).__init__( name, eps=eps, - output_names=output_names, label_names=label_names, - has_global_stats=True) + output_names=output_names, label_names=label_names) self.eps = eps def update(self, labels, preds): @@ -1397,21 +1541,21 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): - label = label.asnumpy() - pred = pred.asnumpy() + label = label.as_np_ndarray() + pred = pred.as_np_ndarray().as_in_ctx(label.ctx) - label = label.ravel() + label = label.reshape(-1) num_examples = pred.shape[0] assert label.shape[0] == num_examples, (label.shape[0], num_examples) prob = pred[numpy.arange(num_examples, dtype=numpy.int64), numpy.int64(label)] nll = (-numpy.log(prob + self.eps)).sum() self.sum_metric += nll - self.global_sum_metric += nll self.num_inst += num_examples - self.global_num_inst += num_examples + @register @alias('pearsonr') +@use_np class PearsonCorrelation(EvalMetric): """Computes Pearson correlation. @@ -1430,30 +1574,23 @@ class PearsonCorrelation(EvalMetric): label_names : list of str, or None Name of labels that should be used when updating with update_dict. By default include all labels. - average : str, default 'macro' - Strategy to be used for aggregating across mini-batches. - "macro": average the pearsonr scores for each batch. - "micro": compute a single pearsonr score across all batches. Examples -------- >>> predicts = [mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]])] >>> labels = [mx.nd.array([[1, 0], [0, 1], [0, 1]])] - >>> pr = mx.metric.PearsonCorrelation() + >>> pr = mx.gluon.metric.PearsonCorrelation() >>> pr.update(labels, predicts) >>> print pr.get() ('pearsonr', 0.42163704544016178) """ def __init__(self, name='pearsonr', - output_names=None, label_names=None, average='macro'): - self.average = average + output_names=None, label_names=None): super(PearsonCorrelation, self).__init__( - name, output_names=output_names, label_names=label_names, - has_global_stats=True) - if self.average == 'micro': - self.reset_micro() + name, output_names=output_names, label_names=label_names) + self.reset() - def reset_micro(self): + def reset(self): self._sse_p = 0 self._mean_p = 0 self._sse_l = 0 @@ -1462,13 +1599,8 @@ def reset_micro(self): self._label_nums = 0 self._conv = 0 - def reset(self): self.num_inst = 0 self.sum_metric = 0.0 - self.global_num_inst = 0 - self.global_sum_metric = 0.0 - if self.average == 'micro': - self.reset_micro() def update_variance(self, new_values, *aggregate): #Welford's online algorithm for variance update @@ -1496,34 +1628,26 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): check_label_shapes(label, pred, False, True) - label = label.asnumpy().ravel().astype(numpy.float64) - pred = pred.asnumpy().ravel().astype(numpy.float64) - if self.average == 'macro': - pearson_corr = numpy.corrcoef(pred, label)[0, 1] - self.sum_metric += pearson_corr - self.global_sum_metric += pearson_corr - self.num_inst += 1 - self.global_num_inst += 1 - else: - self.global_num_inst += 1 - self.num_inst += 1 - self._label_nums, self._mean_l, self._sse_l = \ - self.update_variance(label, self._label_nums, self._mean_l, self._sse_l) - self.update_cov(label, pred) - self._pred_nums, self._mean_p, self._sse_p = \ - self.update_variance(pred, self._pred_nums, self._mean_p, self._sse_p) + label = label.as_np_ndarray().reshape(-1).astype(numpy.float64) + pred = pred.as_np_ndarray().as_in_ctx(label.ctx).reshape(-1).astype(numpy.float64) + + self.num_inst += 1 + self._label_nums, self._mean_l, self._sse_l = \ + self.update_variance(label, self._label_nums, self._mean_l, self._sse_l) + self.update_cov(label, pred) + self._pred_nums, self._mean_p, self._sse_p = \ + self.update_variance(pred, self._pred_nums, self._mean_p, self._sse_p) def get(self): if self.num_inst == 0: return (self.name, float('nan')) - if self.average == 'macro': - return (self.name, self.sum_metric / self.num_inst) - else: - n = self._label_nums - pearsonr = self._conv / ((n-1) * numpy.sqrt(self._sse_p / (n - 1)) * numpy.sqrt(self._sse_l / (n - 1))) - return (self.name, pearsonr) + + n = self._label_nums + pearsonr = self._conv / ((n-1) * numpy.sqrt(self._sse_p / (n - 1)) * numpy.sqrt(self._sse_l / (n - 1))) + return (self.name, float(pearsonr)) @register +@use_np class PCC(EvalMetric): """PCC is a multiclass equivalent for the Matthews correlation coefficient derived from a discrete solution to the Pearson correlation coefficient. @@ -1567,9 +1691,9 @@ class PCC(EvalMetric): [0]*(false_positives + true_negatives) + [1]*(false_negatives + true_positives) )] - >>> f1 = mx.metric.F1() + >>> f1 = mx.gluon.metric.F1() >>> f1.update(preds = predicts, labels = labels) - >>> pcc = mx.metric.PCC() + >>> pcc = mx.gluon.metric.PCC() >>> pcc.update(preds = predicts, labels = labels) >>> print f1.get() ('f1', 0.95233560306652054) @@ -1577,18 +1701,14 @@ class PCC(EvalMetric): ('pcc', 0.01917751877733392) """ def __init__(self, name='pcc', - output_names=None, label_names=None, - has_global_stats=True): + output_names=None, label_names=None): self.k = 2 super(PCC, self).__init__( - name=name, output_names=output_names, label_names=label_names, - has_global_stats=has_global_stats) + name=name, output_names=output_names, label_names=label_names) def _grow(self, inc): self.lcm = numpy.pad( self.lcm, ((0, inc), (0, inc)), 'constant', constant_values=(0)) - self.gcm = numpy.pad( - self.gcm, ((0, inc), (0, inc)), 'constant', constant_values=(0)) self.k += inc def _calc_mcc(self, cmat): @@ -1599,7 +1719,8 @@ def _calc_mcc(self, cmat): cov_yy = numpy.sum(y * (n - y)) if cov_xx == 0 or cov_yy == 0: return float('nan') - i = cmat.diagonal() + # i = cmat.diagonal() # mxnet.numpy.ndarray.diagonal() is currently not available. + i = cmat[numpy.arange(self.k), numpy.arange(self.k)] cov_xy = numpy.sum(i * n - x * y) return cov_xy / (cov_xx * cov_yy) ** 0.5 @@ -1618,42 +1739,29 @@ def update(self, labels, preds): # update the confusion matrix for label, pred in zip(labels, preds): - label = label.astype('int32', copy=False).asnumpy() - pred = pred.asnumpy() + label = label.astype('int32', copy=False).as_np_ndarray() + pred = pred.as_np_ndarray().as_in_ctx(label.ctx) if pred.shape != label.shape: - pred = pred.argmax(axis=1) + pred = pred.argmax(axis=1).astype(label, copy=False) else: pred = pred.astype('int32', copy=False) - n = max(pred.max(), label.max()) + n = int(max(pred.max(), label.max())) if n >= self.k: self._grow(n + 1 - self.k) - bcm = numpy.zeros((self.k, self.k)) + bcm = numpy.zeros((self.k, self.k), dtype='float64') for i, j in zip(pred, label): bcm[i, j] += 1 self.lcm += bcm - self.gcm += bcm - self.num_inst += 1 - self.global_num_inst += 1 @property def sum_metric(self): return self._calc_mcc(self.lcm) * self.num_inst - @property - def global_sum_metric(self): - return self._calc_mcc(self.gcm) * self.global_num_inst - def reset(self): """Resets the internal evaluation result to initial state.""" - self.global_num_inst = 0. - self.gcm = numpy.zeros((self.k, self.k)) - self.reset_local() - - def reset_local(self): - """Resets the local portion of the internal evaluation results to initial state.""" self.num_inst = 0. - self.lcm = numpy.zeros((self.k, self.k)) + self.lcm = numpy.zeros((self.k, self.k), dtype='float64') @register @@ -1674,8 +1782,7 @@ class Loss(EvalMetric): def __init__(self, name='loss', output_names=None, label_names=None): super(Loss, self).__init__( - name, output_names=output_names, label_names=label_names, - has_global_stats=True) + name, output_names=output_names, label_names=label_names) def update(self, _, preds): @@ -1685,9 +1792,7 @@ def update(self, _, preds): for pred in preds: loss = ndarray.sum(pred).asscalar() self.sum_metric += loss - self.global_sum_metric += loss self.num_inst += pred.size - self.global_num_inst += pred.size @register @@ -1709,6 +1814,7 @@ def __init__(self, name='caffe', @register +@use_np class CustomMetric(EvalMetric): """Computes a customized evaluation metric. @@ -1739,7 +1845,7 @@ class CustomMetric(EvalMetric): >>> predicts = [mx.nd.array(np.array([3, -0.5, 2, 7]).reshape(4,1))] >>> labels = [mx.nd.array(np.array([2.5, 0.0, 2, 8]).reshape(4,1))] >>> feval = lambda x, y : (x + y).mean() - >>> eval_metrics = mx.metric.CustomMetric(feval=feval) + >>> eval_metrics = mx.gluon.metric.CustomMetric(feval=feval) >>> eval_metrics.update(labels, predicts) >>> print eval_metrics.get() ('custom()', 6.0) @@ -1753,8 +1859,7 @@ def __init__(self, feval, name=None, allow_extra_outputs=False, super(CustomMetric, self).__init__( name, feval=feval, allow_extra_outputs=allow_extra_outputs, - output_names=output_names, label_names=label_names, - has_global_stats=True) + output_names=output_names, label_names=label_names) self._feval = feval self._allow_extra_outputs = allow_extra_outputs @@ -1773,21 +1878,17 @@ def update(self, labels, preds): labels, preds = check_label_shapes(labels, preds, True) for pred, label in zip(preds, labels): - label = label.asnumpy() - pred = pred.asnumpy() + label = label.as_np_ndarray() + pred = pred.as_np_ndarray().as_in_ctx(label.ctx) reval = self._feval(label, pred) if isinstance(reval, tuple): (sum_metric, num_inst) = reval self.sum_metric += sum_metric - self.global_sum_metric += sum_metric self.num_inst += num_inst - self.global_num_inst += num_inst else: self.sum_metric += reval - self.global_sum_metric += reval self.num_inst += 1 - self.global_num_inst += 1 def get_config(self): raise NotImplementedError("CustomMetric cannot be serialized") @@ -1819,7 +1920,7 @@ def np(numpy_feval, name=None, allow_extra_outputs=False): >>> def custom_metric(label, pred): ... return np.mean(np.abs(label-pred)) ... - >>> metric = mx.metric.np(custom_metric) + >>> metric = mx.gluon.metric.np(custom_metric) """ def feval(label, pred): """Internal eval function.""" diff --git a/python/mxnet/gluon/nn/basic_layers.py b/python/mxnet/gluon/nn/basic_layers.py index 2011b5bf36b2..016f523be4d0 100644 --- a/python/mxnet/gluon/nn/basic_layers.py +++ b/python/mxnet/gluon/nn/basic_layers.py @@ -27,7 +27,7 @@ from .activations import Activation from ..block import Block, HybridBlock from ..utils import _indent -from ... import nd, sym +from ... import ndarray as nd, symbol as sym from ...util import is_np_array diff --git a/python/mxnet/model.py b/python/mxnet/model.py index fa247624975d..bd80ec01738b 100644 --- a/python/mxnet/model.py +++ b/python/mxnet/model.py @@ -30,7 +30,7 @@ from . import ndarray as nd from . import symbol as sym from . import optimizer as opt -from . import metric +from .gluon import metric from . import kvstore as kvs from .context import Context, cpu from .initializer import Uniform diff --git a/python/mxnet/module/base_module.py b/python/mxnet/module/base_module.py index 053a00b3abba..92fb7f188bfb 100644 --- a/python/mxnet/module/base_module.py +++ b/python/mxnet/module/base_module.py @@ -24,7 +24,7 @@ import warnings import numpy as np -from .. import metric +from ..gluon import metric from .. import ndarray from ..context import cpu @@ -231,7 +231,7 @@ def score(self, eval_data, eval_metric, num_batch=None, batch_end_callback=None, -------- >>> # An example of using score for prediction. >>> # Evaluate accuracy on val_dataiter - >>> metric = mx.metric.Accuracy() + >>> metric = mx.gluon.metric.Accuracy() >>> mod.score(val_dataiter, metric) >>> mod.score(val_dataiter, ['mse', 'acc']) """ @@ -543,7 +543,7 @@ def fit(self, train_data, eval_data=None, eval_metric='acc', monitor.toc_print() if end_of_batch: - eval_name_vals = eval_metric.get_global_name_value() + eval_name_vals = eval_metric.get_name_value() if batch_end_callback is not None: batch_end_params = BatchEndParam(epoch=epoch, nbatch=nbatch, diff --git a/tests/nightly/estimator/test_estimator_cnn.py b/tests/nightly/estimator/test_estimator_cnn.py index 0d113cdf4984..466c01019575 100644 --- a/tests/nightly/estimator/test_estimator_cnn.py +++ b/tests/nightly/estimator/test_estimator_cnn.py @@ -116,7 +116,7 @@ def test_estimator_cpu(): # Define estimator est = estimator.Estimator(net=net, loss=loss, - train_metrics=mx.metric.Accuracy(), + train_metrics=mx.gluon.metric.Accuracy(), trainer=trainer, context=context) # Call fit() @@ -140,7 +140,7 @@ def test_estimator_gpu(): train_data, test_data = load_data_mnist(batch_size, resize=224) loss = gluon.loss.SoftmaxCrossEntropyLoss() net.hybridize() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) # Define estimator est = estimator.Estimator(net=net, diff --git a/tests/nightly/estimator/test_sentiment_rnn.py b/tests/nightly/estimator/test_sentiment_rnn.py index 367c69b88a0b..7d3561db3789 100644 --- a/tests/nightly/estimator/test_sentiment_rnn.py +++ b/tests/nightly/estimator/test_sentiment_rnn.py @@ -190,11 +190,11 @@ def run(net, train_dataloader, test_dataloader, num_epochs, ctx, lr): trainer = mx.gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': lr}) # Define loss and evaluation metrics loss = gluon.loss.SoftmaxCrossEntropyLoss() - metrics = mx.metric.CompositeEvalMetric() - acc = mx.metric.Accuracy() - nested_metrics = mx.metric.CompositeEvalMetric() - metrics.add([acc, mx.metric.Loss()]) - nested_metrics.add([metrics, mx.metric.Accuracy()]) + metrics = mx.gluon.metric.CompositeEvalMetric() + acc = mx.gluon.metric.Accuracy() + nested_metrics = mx.gluon.metric.CompositeEvalMetric() + metrics.add([acc, mx.gluon.metric.Loss()]) + nested_metrics.add([metrics, mx.gluon.metric.Accuracy()]) # Define estimator est = estimator.Estimator(net=net, loss=loss, train_metrics=nested_metrics, diff --git a/tests/nightly/test_optimizer.py b/tests/nightly/test_optimizer.py index 0a87368d991e..9c2fcb8a62cf 100644 --- a/tests/nightly/test_optimizer.py +++ b/tests/nightly/test_optimizer.py @@ -83,7 +83,7 @@ def test_lars(): num_epoch=num_epochs) # predict accuracy for lenet - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() lenet_model.score(test_iter, acc) accuracy = acc.get()[1] assert accuracy > 0.98, "LeNet-5 training accuracy on MNIST was too low" diff --git a/tests/nightly/test_tlocal_racecondition.py b/tests/nightly/test_tlocal_racecondition.py index d43c45937c05..986e1f464bfb 100644 --- a/tests/nightly/test_tlocal_racecondition.py +++ b/tests/nightly/test_tlocal_racecondition.py @@ -91,7 +91,7 @@ def infer_type(self, in_type): def create_operator(self, ctx, shapes, dtypes): return MyCustom() -class MyMetric(mx.metric.EvalMetric): +class MyMetric(mx.gluon.metric.EvalMetric): def __init__(self): super(MyMetric, self).__init__("MyMetric") self.name = ['empty'] diff --git a/tests/python/gpu/test_contrib_amp.py b/tests/python/gpu/test_contrib_amp.py index b7aeeb7ccf37..0716635d6343 100644 --- a/tests/python/gpu/test_contrib_amp.py +++ b/tests/python/gpu/test_contrib_amp.py @@ -334,7 +334,7 @@ def check_amp_convert_bucketing_module(): data_val = mx.rnn.BucketSentenceIter(val_sent, batch_size, buckets=buckets, invalid_label=invalid_label) result_model.bind(data_val.provide_data, data_val.provide_label, for_training=False) - result_model.score(data_val, mx.metric.Perplexity(invalid_label), + result_model.score(data_val, mx.gluon.metric.Perplexity(invalid_label), batch_end_callback=mx.callback.Speedometer(batch_size, 1)) # AMP conversion with cast_optional_params set to true @@ -342,7 +342,7 @@ def check_amp_convert_bucketing_module(): ''' result_model = amp.convert_bucketing_module(model, cast_optional_params=True) result_model.bind(data_val.provide_data, data_val.provide_label, for_training=False) - result_model.score(data_val, mx.metric.Perplexity(invalid_label), + result_model.score(data_val, mx.gluon.metric.Perplexity(invalid_label), batch_end_callback=mx.callback.Speedometer(batch_size, 1)) ''' diff --git a/tests/python/tensorrt/lenet5_train.py b/tests/python/tensorrt/lenet5_train.py index 441729fe0d56..5603180e1347 100644 --- a/tests/python/tensorrt/lenet5_train.py +++ b/tests/python/tensorrt/lenet5_train.py @@ -75,7 +75,7 @@ def train_lenet5(num_epochs, batch_size, train_iter, val_iter, test_iter): num_epoch=num_epochs) # predict accuracy for lenet - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() lenet_model.score(test_iter, acc) accuracy = acc.get()[1] assert accuracy > 0.95, "LeNet-5 training accuracy on MNIST was too low" diff --git a/tests/python/tensorrt/test_cvnets.py b/tests/python/tensorrt/test_cvnets.py index 99312d76dc7a..cd090c5e2f5c 100644 --- a/tests/python/tensorrt/test_cvnets.py +++ b/tests/python/tensorrt/test_cvnets.py @@ -16,7 +16,6 @@ # under the License. import gc -import gluoncv import mxnet as mx import numpy as np @@ -29,7 +28,12 @@ def get_classif_model(model_name, use_tensorrt, ctx=mx.gpu(0), batch_size=128): mx.contrib.tensorrt.set_use_fp16(False) h, w = 32, 32 - net = gluoncv.model_zoo.get_model(model_name, pretrained=True) + model_url = "https://raw.githubusercontent.com/dmlc/web-data/221ce5b7c6d5b0777a1e3471f7f03ff98da90a0a/gluoncv/models" + param_file = "{}-0000.params".format(model_name) + symbol_file = "{}-symbol.json".format(model_name) + mx.test_utils.download("{}/{}".format(model_url, param_file), fname=param_file, overwrite=True) + mx.test_utils.download("{}/{}".format(model_url, symbol_file), fname=symbol_file, overwrite=True) + net = gluon.SymbolBlock.imports(symbol_file, ['data'], param_file) net.hybridize() net.forward(mx.nd.zeros((batch_size, 3, h, w))) net.export(model_name) @@ -130,10 +134,7 @@ def test_tensorrt_on_cifar_resnets(batch_size=32, tolerance=0.1, num_workers=1): 'cifar_resnet20_v2', 'cifar_resnet56_v2', 'cifar_resnet110_v2', - 'cifar_wideresnet16_10', - 'cifar_wideresnet28_10', - 'cifar_wideresnet40_8', - 'cifar_resnext29_16x64d' + 'cifar_wideresnet16_10' ] num_models = len(models) diff --git a/tests/python/train/test_autograd.py b/tests/python/train/test_autograd.py index 02a3601eb362..f0fdc5ea2576 100644 --- a/tests/python/train/test_autograd.py +++ b/tests/python/train/test_autograd.py @@ -55,7 +55,7 @@ def get_net(): batch_size=batch_size, shuffle=True, flat=True, silent=False) def score(net, ctx_list): - metric = mx.metric.Accuracy() + metric = gluon.metric.Accuracy() val_data.reset() for batch in val_data: datas = gluon.utils.split_and_load(batch.data[0], ctx_list, batch_axis=0) @@ -69,7 +69,7 @@ def score(net, ctx_list): def train(net, epoch, ctx_list): net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx_list) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.5}) - metric = mx.metric.Accuracy() + metric = gluon.metric.Accuracy() loss = gluon.loss.SoftmaxCrossEntropyLoss() for i in range(epoch): diff --git a/tests/python/train/test_bucketing.py b/tests/python/train/test_bucketing.py index a233e46e0992..f4b8f417a2cc 100644 --- a/tests/python/train/test_bucketing.py +++ b/tests/python/train/test_bucketing.py @@ -98,7 +98,7 @@ def sym_gen(seq_len): model.fit( train_data=data_train, eval_data=data_val, - eval_metric=mx.metric.Perplexity(invalid_label), # Use Perplexity for multiclass classification. + eval_metric=mx.gluon.metric.Perplexity(invalid_label), # Use Perplexity for multiclass classification. kvstore='device', optimizer='sgd', optimizer_params={'learning_rate': 0.01, @@ -114,7 +114,7 @@ def sym_gen(seq_len): def test_bucket_module(): # This test forecasts random sequence of words to check bucketing. # We cannot guarantee the accuracy of such an impossible task, and comments out the following line. - # assert model.score(data_val, mx.metric.MSE())[0][1] < 350, "High mean square error." + # assert model.score(data_val, mx.gluon.metric.MSE())[0][1] < 350, "High mean square error." model = train_model() diff --git a/tests/python/train/test_mlp.py b/tests/python/train/test_mlp.py index 80885b33f955..24947cc9c476 100644 --- a/tests/python/train/test_mlp.py +++ b/tests/python/train/test_mlp.py @@ -37,8 +37,9 @@ def test_mlp(tmpdir): def accuracy(label, pred): py = np.argmax(pred, axis=1) - return np.sum(py == label) / float(label.size) - + return np.sum(py == label.astype(py)) / float(label.size) + # currently mxnet.numpy (which used in gluon.metric) did not support "==" between different types + num_epoch = 4 prefix = './mlp' @@ -65,7 +66,7 @@ def accuracy(label, pred): softmax, X=train_dataiter, eval_data=val_dataiter, - eval_metric=mx.metric.np(accuracy), + eval_metric=mx.gluon.metric.np(accuracy), epoch_end_callback=mx.callback.do_checkpoint(prefix), ctx=[mx.cpu(i) for i in range(2)], num_epoch=num_epoch, diff --git a/tests/python/train/test_sparse_fm.py b/tests/python/train/test_sparse_fm.py index 76a2705fe4e5..0d52ab555b56 100644 --- a/tests/python/train/test_sparse_fm.py +++ b/tests/python/train/test_sparse_fm.py @@ -108,7 +108,7 @@ def fm(factor_size, feature_dim, init): else: raise AssertionError("Unsupported optimizer type '" + optimizer + "' specified") # use accuracy as the metric - metric = mx.metric.create('MSE') + metric = mx.gluon.metric.create('MSE') # train 'num_epochs' epoch for epoch in range(num_epochs): train_iter.reset() diff --git a/tests/python/unittest/test_contrib_svrg_module.py b/tests/python/unittest/test_contrib_svrg_module.py index e9509f743f73..8c25742bd74c 100644 --- a/tests/python/unittest/test_contrib_svrg_module.py +++ b/tests/python/unittest/test_contrib_svrg_module.py @@ -240,7 +240,7 @@ def create_module_with_sgd(): num_epoch = 10 # Use metric MSE - metrics = mx.metric.create("mse") + metrics = mx.gluon.metric.create("mse") # Train with SVRGModule for e in range(num_epoch): @@ -297,7 +297,7 @@ def test_accumulate_kvstore(): def test_fit(): di, mod = setup() num_epoch = 100 - metric = mx.metric.create("mse") + metric = mx.gluon.metric.create("mse") mod.fit(di, eval_metric=metric, optimizer='sgd', optimizer_params=(('learning_rate', 0.025),), num_epoch=num_epoch, kvstore='local') diff --git a/tests/python/unittest/test_gluon_batch_processor.py b/tests/python/unittest/test_gluon_batch_processor.py index 952ed1c4a0da..bff80813bb12 100644 --- a/tests/python/unittest/test_gluon_batch_processor.py +++ b/tests/python/unittest/test_gluon_batch_processor.py @@ -52,7 +52,7 @@ def test_batch_processor_fit(): num_epochs = 1 ctx = mx.cpu() loss = gluon.loss.L2Loss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() net.initialize(ctx=ctx) processor = BatchProcessor() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) @@ -83,7 +83,7 @@ def test_batch_processor_validation(): num_epochs = 1 ctx = mx.cpu() loss = gluon.loss.L2Loss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() val_loss = gluon.loss.L1Loss() net.initialize(ctx=ctx) processor = BatchProcessor() diff --git a/tests/python/unittest/test_gluon_estimator.py b/tests/python/unittest/test_gluon_estimator.py index e33aa74b3ca7..360d25544f7e 100644 --- a/tests/python/unittest/test_gluon_estimator.py +++ b/tests/python/unittest/test_gluon_estimator.py @@ -58,7 +58,7 @@ def test_fit(): num_epochs = 1 ctx = mx.cpu() loss = gluon.loss.L2Loss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) est = Estimator(net=net, @@ -87,7 +87,7 @@ def test_validation(): num_epochs = 1 ctx = mx.cpu() loss = gluon.loss.L2Loss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() val_loss = gluon.loss.L1Loss() net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) @@ -126,7 +126,7 @@ def test_initializer(): ctx = mx.cpu() loss = gluon.loss.L2Loss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() # no initializer est = Estimator(net=net, loss=loss, @@ -166,7 +166,7 @@ def test_trainer(): ctx = mx.cpu() loss = gluon.loss.L2Loss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() net.initialize(ctx=ctx) # input no trainer with warnings.catch_warnings(record=True) as w: @@ -206,7 +206,7 @@ def test_metric(): est.fit(train_data=train_data, epochs=num_epochs) # input list of metrics - metrics = [mx.metric.Accuracy(), mx.metric.Accuracy()] + metrics = [mx.gluon.metric.Accuracy(), mx.gluon.metric.Accuracy()] est = Estimator(net=net, loss=loss, train_metrics=metrics, @@ -227,14 +227,14 @@ def test_metric(): loss=loss, trainer=trainer, context=ctx) - assert isinstance(est.train_metrics[0], mx.metric.Accuracy) + assert isinstance(est.train_metrics[0], mx.gluon.metric.Accuracy) def test_loss(): ''' test with invalid loss ''' net = _get_test_network() ctx = mx.cpu() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) # input invalid loss @@ -250,7 +250,7 @@ def test_context(): ''' test with no context, list of context, invalid context ''' net = _get_test_network() loss = gluon.loss.L2Loss() - metrics = mx.metric.Accuracy() + metrics = mx.gluon.metric.Accuracy() # input no context est = Estimator(net=net, loss=loss, @@ -332,7 +332,7 @@ def test_default_handlers(): net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) - train_acc = mx.metric.RMSE() + train_acc = mx.gluon.metric.RMSE() loss = gluon.loss.L2Loss() est = Estimator(net=net, @@ -359,7 +359,7 @@ def test_default_handlers(): # handler with mixed metrics, some handler use metrics prepared by estimator # some handler use metrics user prepared - logging = LoggingHandler(metrics=[mx.metric.RMSE("val acc")]) + logging = LoggingHandler(metrics=[mx.gluon.metric.RMSE("val acc")]) with pytest.raises(ValueError): est.fit(train_data=train_data, epochs=num_epochs, event_handlers=[logging]) @@ -383,7 +383,7 @@ def test_val_net(): ctx = mx.cpu() loss = gluon.loss.L2Loss() val_loss = gluon.loss.L2Loss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) est = Estimator(net=net, @@ -448,7 +448,7 @@ def test_val_handlers(): net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) - train_acc = mx.metric.RMSE() + train_acc = mx.gluon.metric.RMSE() loss = gluon.loss.L2Loss() est = Estimator(net=net, diff --git a/tests/python/unittest/test_gluon_event_handler.py b/tests/python/unittest/test_gluon_event_handler.py index a07282cd46dd..4cadc9466ed1 100644 --- a/tests/python/unittest/test_gluon_event_handler.py +++ b/tests/python/unittest/test_gluon_event_handler.py @@ -84,7 +84,7 @@ def test_checkpoint_handler(): net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) checkpoint_handler = event_handler.CheckpointHandler(model_dir=tmpdir, model_prefix=model_prefix, @@ -130,7 +130,7 @@ def test_resume_checkpoint(): net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) checkpoint_handler = event_handler.CheckpointHandler(model_dir=tmpdir, model_prefix=model_prefix, @@ -155,7 +155,7 @@ def test_early_stopping(): net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) early_stopping = event_handler.EarlyStoppingHandler(monitor=acc, patience=0, @@ -179,7 +179,7 @@ def test_logging(): net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) est.logger.addHandler(logging.FileHandler(output_dir)) @@ -226,7 +226,7 @@ def epoch_end(self, estimator, *args, **kwargs): test_data = _get_test_data() net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) custom_handler = CustomStopHandler(3, 2) est.fit(test_data, event_handlers=[custom_handler], epochs=3) @@ -249,7 +249,7 @@ def test_logging_interval(): dataloader = _get_test_data(in_size=data_size) num_epochs = 1 ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() logging = LoggingHandler(metrics=[acc], log_interval=log_interval) est = estimator.Estimator(net=net, loss=ce_loss, @@ -273,7 +273,7 @@ def test_logging_interval(): ''' test case #2: log interval is 5 ''' old_stdout = sys.stdout sys.stdout = mystdout = StringIO() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() log_interval = 5 logging = LoggingHandler(metrics=[acc], log_interval=log_interval) est = estimator.Estimator(net=net, @@ -299,7 +299,7 @@ def test_validation_handler_batch_axis(): test_data = _get_test_data() net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) est.fit(test_data, epochs=3) @@ -315,7 +315,7 @@ def test_validation_handler(): net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() - acc = mx.metric.Accuracy() + acc = mx.gluon.metric.Accuracy() est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) val_handler = ValidationHandler(val_data=test_data, eval_fn=est.evaluate, diff --git a/tests/python/unittest/test_loss.py b/tests/python/unittest/test_loss.py index e779fd672701..8ba5e5ee0610 100644 --- a/tests/python/unittest/test_loss.py +++ b/tests/python/unittest/test_loss.py @@ -79,9 +79,9 @@ def test_ce_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - eval_metric=mx.metric.Loss(), optimizer='adam', + eval_metric=mx.gluon.metric.Loss(), optimizer='adam', initializer=mx.init.Xavier(magnitude=2)) - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 # tracked at: https://github.com/apache/incubator-mxnet/issues/11691 @with_seed() @@ -97,9 +97,9 @@ def test_bce_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - eval_metric=mx.metric.Loss(), optimizer='adam', + eval_metric=mx.gluon.metric.Loss(), optimizer='adam', initializer=mx.init.Xavier(magnitude=2)) - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.01 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.01 # Test against npy data = mx.random.uniform(-5, 5, shape=(10,)) label = mx.random.uniform(0, 1, shape=(10,)) @@ -142,8 +142,8 @@ def test_kl_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - eval_metric=mx.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 + eval_metric=mx.gluon.metric.Loss(), optimizer='adam') + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 @with_seed() @@ -159,9 +159,9 @@ def test_l2_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), + initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.gluon.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 @with_seed() @@ -177,9 +177,9 @@ def test_l1_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), + initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.gluon.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.1 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.1 @with_seed() @@ -222,9 +222,9 @@ def test_ctc_loss_train(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), + initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.gluon.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 10 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 10 @with_seed() @@ -243,12 +243,12 @@ def test_sample_weight_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label', 'w')) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - eval_metric=mx.metric.Loss(), optimizer='adam') + eval_metric=mx.gluon.metric.Loss(), optimizer='adam') data_iter = mx.io.NDArrayIter(data[10:], {'label': label, 'w': weight}, batch_size=10) - score = mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] + score = mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] assert score > 1 data_iter = mx.io.NDArrayIter(data[:10], {'label': label, 'w': weight}, batch_size=10) - score = mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] + score = mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] assert score < 0.05 @@ -266,13 +266,13 @@ def test_saveload(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=100, optimizer_params={'learning_rate': 1.}, - eval_metric=mx.metric.Loss()) + eval_metric=mx.gluon.metric.Loss()) mod.save_checkpoint('test', 100, save_optimizer_states=True) mod = mx.mod.Module.load('test', 100, load_optimizer_states=True, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=100, optimizer_params={'learning_rate': 1.}, - eval_metric=mx.metric.Loss()) - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 + eval_metric=mx.gluon.metric.Loss()) + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 @with_seed() def test_huber_loss(): @@ -287,9 +287,9 @@ def test_huber_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), + initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.gluon.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 @with_seed() @@ -305,9 +305,9 @@ def test_hinge_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), + initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.gluon.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.06 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.06 @with_seed() @@ -323,9 +323,9 @@ def test_squared_hinge_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), + initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.gluon.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 @with_seed() @@ -344,9 +344,9 @@ def test_triplet_loss(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('pos','neg')) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.metric.Loss(), + initializer=mx.init.Xavier(magnitude=2), eval_metric=mx.gluon.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 @with_seed() def test_sdml_loss(): @@ -453,9 +453,9 @@ def test_poisson_nllloss_mod(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label',)) mod.fit(data_iter, num_epoch=20, optimizer_params={'learning_rate': 0.01}, - initializer=mx.init.Normal(sigma=0.1), eval_metric=mx.metric.Loss(), + initializer=mx.init.Normal(sigma=0.1), eval_metric=mx.gluon.metric.Loss(), optimizer='adam') - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.05 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.05 @with_seed() def test_bce_loss_with_pos_weight(): @@ -474,9 +474,9 @@ def test_bce_loss_with_pos_weight(): loss = mx.sym.make_loss(loss) mod = mx.mod.Module(loss, data_names=('data',), label_names=('label', 'pos_w')) mod.fit(data_iter, num_epoch=200, optimizer_params={'learning_rate': 0.01}, - eval_metric=mx.metric.Loss(), optimizer='adam', + eval_metric=mx.gluon.metric.Loss(), optimizer='adam', initializer=mx.init.Xavier(magnitude=2)) - assert mod.score(data_iter, eval_metric=mx.metric.Loss())[0][1] < 0.01 + assert mod.score(data_iter, eval_metric=mx.gluon.metric.Loss())[0][1] < 0.01 # Test against npy data = mx.nd.random.uniform(-5, 5, shape=(N, 5)) label = mx.nd.array(np.random.randint(2, size=(N, 5)), dtype='float32') diff --git a/tests/python/unittest/test_metric.py b/tests/python/unittest/test_metric.py index d1e1c5a35fb3..c2e4783de411 100644 --- a/tests/python/unittest/test_metric.py +++ b/tests/python/unittest/test_metric.py @@ -16,6 +16,7 @@ # under the License. import mxnet as mx +from mxnet.test_utils import use_np import numpy as np import scipy from scipy.stats import pearsonr @@ -25,9 +26,9 @@ from copy import deepcopy def check_metric(metric, *args, **kwargs): - metric = mx.metric.create(metric, *args, **kwargs) + metric = mx.gluon.metric.create(metric, *args, **kwargs) str_metric = json.dumps(metric.get_config()) - metric2 = mx.metric.create(str_metric) + metric2 = mx.gluon.metric.create(str_metric) assert metric.get_config() == metric2.get_config() @@ -35,93 +36,16 @@ def test_metrics(): check_metric('acc', axis=0) check_metric('f1') check_metric('mcc') - check_metric('perplexity', -1) + check_metric('perplexity', axis=-1) check_metric('pearsonr') check_metric('pcc') check_metric('nll_loss') check_metric('loss') - composite = mx.metric.create(['acc', 'f1']) + composite = mx.gluon.metric.create(['acc', 'f1']) check_metric(composite) -def _check_global_metric(metric, *args, **kwargs): - def _create_pred_label(): - if use_same_shape: - pred = mx.nd.random.uniform(0, 1, shape=shape) - label = mx.nd.random.uniform(0, 1, shape=shape) - else: - # Make a random prediction - idx = np.random.rand(*shape).argsort(1) - pred = mx.nd.array(1 - 0.1 * idx) - # Label is half 1 and half 0 - # Setting all 0s or all 1s would make either - # MCC or F1 metrics always produce 0 - label = mx.nd.ones(shape[0]) - label[:shape[0] // 2] = 0 - return pred, label - - def _compare_metric_result(m1, m2): - # Compare names - assert m1[0] == m2[0] - # Compare values - if isinstance(m1[1], (list, tuple)): - assert len(m1[1]) == len(m2[1]) - for r1, r2 in zip(m1[1], m2[1]): - assert r1 == r2 or \ - (math.isnan(r1) and - math.isnan(r2)) - else: - assert m1[1] == m2[1] or \ - (math.isnan(m1[1]) and - math.isnan(m2[1])) - - shape = kwargs.pop('shape', (10,10)) - use_same_shape = kwargs.pop('use_same_shape', False) - m1 = mx.metric.create(metric, *args, **kwargs) - m2 = deepcopy(m1) - # check that global stats are not reset when calling - # reset_local() - for i in range(10): - pred, label = _create_pred_label() - m1.update([label], [pred]) - m1.reset_local() - m2.update([label], [pred]) - assert m1.get_global() == m2.get() - - # check that reset_local() properly resets the local state - m1.reset_local() - m2.reset() - pred, label = _create_pred_label() - m1.update([label], [pred]) - m1.reset_local() - pred, label = _create_pred_label() - m1.update([label], [pred]) - m2.update([label], [pred]) - _compare_metric_result(m1.get(), m2.get()) - -@with_seed() -def test_global_metric(): - _check_global_metric('acc') - _check_global_metric('TopKAccuracy', top_k=3) - _check_global_metric('f1', shape=(10,2)) - _check_global_metric('f1', shape=(10,2), average='micro') - _check_global_metric('mcc', shape=(10,2)) - _check_global_metric('mcc', shape=(10,2), average='micro') - _check_global_metric('perplexity', -1) - _check_global_metric('pearsonr', use_same_shape=True) - _check_global_metric('pcc', shape=(10,2)) - _check_global_metric('nll_loss') - _check_global_metric('loss') - _check_global_metric('ce') - _check_global_metric('mae', use_same_shape=True) - _check_global_metric('mse', use_same_shape=True) - _check_global_metric('rmse', use_same_shape=True) - def custom_metric(label, pred): - return np.mean(np.abs(label-pred)) - _check_global_metric(custom_metric, use_same_shape=True) - _check_global_metric(['acc', 'f1'], shape=(10,2)) - def test_nll_loss(): - metric = mx.metric.create('nll_loss') + metric = mx.gluon.metric.create('nll_loss') pred = mx.nd.array([[0.2, 0.3, 0.5], [0.6, 0.1, 0.3]]) label = mx.nd.array([2, 1]) metric.update([label], [pred]) @@ -132,36 +56,36 @@ def test_nll_loss(): def test_acc(): pred = mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]]) label = mx.nd.array([0, 1, 1]) - metric = mx.metric.create('acc') + metric = mx.gluon.metric.create('acc') metric.update([label], [pred]) _, acc = metric.get() expected_acc = (np.argmax(pred, axis=1) == label).sum().asscalar() / label.size - assert acc == expected_acc + np.testing.assert_almost_equal(acc, expected_acc) def test_acc_2d_label(): # label maybe provided in 2d arrays in custom data iterator pred = mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6], [0.8, 0.2], [0.3, 0.5], [0.6, 0.4]]) label = mx.nd.array([[0, 1, 1], [1, 0, 1]]) - metric = mx.metric.create('acc') + metric = mx.gluon.metric.create('acc') metric.update([label], [pred]) _, acc = metric.get() expected_acc = (np.argmax(pred, axis=1).asnumpy() == label.asnumpy().ravel()).sum() / \ float(label.asnumpy().ravel().size) - assert acc == expected_acc + np.testing.assert_almost_equal(acc, expected_acc) def test_loss_update(): pred = mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]]) - metric1 = mx.metric.create('loss') - metric2 = mx.metric.create('loss') + metric1 = mx.gluon.metric.create('loss') + metric2 = mx.gluon.metric.create('loss') metric1.update(None, [pred]) metric2.update(None, pred) _, acc1 = metric1.get() _, acc2 = metric2.get() assert acc1 == acc2 -def test_f1(): - microF1 = mx.metric.create("f1", average="micro") - macroF1 = mx.metric.F1(average="macro") +def test_binary_f1(): + microF1 = mx.gluon.metric.create("f1", average="micro") + macroF1 = mx.gluon.metric.F1(average="macro") assert np.isnan(macroF1.get()[1]) assert np.isnan(microF1.get()[1]) @@ -191,7 +115,7 @@ def test_f1(): microF1.update([label11, label12], [pred11, pred12]) macroF1.update([label11, label12], [pred11, pred12]) assert microF1.num_inst == 4 - assert macroF1.num_inst == 1 + assert macroF1.num_inst == 4 # f1 = 2 * tp / (2 * tp + fp + fn) fscore1 = 2. * (1) / (2 * 1 + 1 + 0) np.testing.assert_almost_equal(microF1.get()[1], fscore1) @@ -200,29 +124,96 @@ def test_f1(): microF1.update([label21, label22], [pred21, pred22]) macroF1.update([label21, label22], [pred21, pred22]) assert microF1.num_inst == 6 - assert macroF1.num_inst == 2 + assert macroF1.num_inst == 6 fscore2 = 2. * (1) / (2 * 1 + 0 + 0) fscore_total = 2. * (1 + 1) / (2 * (1 + 1) + (1 + 0) + (0 + 0)) np.testing.assert_almost_equal(microF1.get()[1], fscore_total) - np.testing.assert_almost_equal(macroF1.get()[1], (fscore1 + fscore2) / 2.) + np.testing.assert_almost_equal(macroF1.get()[1], fscore_total) + +def test_multiclass_f1(): + microF1 = mx.gluon.metric.create("f1", class_type="multiclass", average="micro") + macroF1 = mx.gluon.metric.F1(class_type="multiclass", average="macro") + + assert np.isnan(macroF1.get()[1]) + assert np.isnan(microF1.get()[1]) + # check one class is zero + pred = mx.nd.array([[0.9, 0.1], + [0.8, 0.2]]) + label = mx.nd.array([0, 0]) + macroF1.update([label], [pred]) + microF1.update([label], [pred]) + assert macroF1.get()[1] == 0.5 # one class is 1.0, the other is 0. (divided by 0) + assert microF1.get()[1] == 1.0 # globally f1 is 1.0 + macroF1.reset() + microF1.reset() + + # test case from sklearn, here pred is probabilistic distributions instead of predicted labels + pred11 = mx.nd.array([[1, 0, 0], [0, 1, 0]]) + label11 = mx.nd.array([0, 2]) + pred12 = mx.nd.array([[0, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]]) + label12 = mx.nd.array([1, 0, 0, 1]) + + microF1.update([label11, label12], [pred11, pred12]) + macroF1.update([label11, label12], [pred11, pred12]) + assert microF1.num_inst == 6 + assert macroF1.num_inst == 6 + + # from sklearn.metrics import f1_score + # overall_pred = [0, 1, 2, 0, 1, 2] + # overall_label = [0, 2, 1, 0, 0, 1] + fmacro = 0.26666666666666666 #f1_score(overall_label, overall_pred, average="macro") + fmicro = 0.3333333333333333 #f1_score(overall_label, overall_pred, average="micro") + np.testing.assert_almost_equal(microF1.get()[1], fmicro) + np.testing.assert_almost_equal(macroF1.get()[1], fmacro) + +def test_multilabel_f1(): + microF1 = mx.gluon.metric.create("f1", class_type="multilabel", average="micro") + macroF1 = mx.gluon.metric.F1(class_type="multilabel", average="macro") + + assert np.isnan(macroF1.get()[1]) + assert np.isnan(microF1.get()[1]) + + # check one class is zero + pred = mx.nd.array([[0.9, 0.1], + [0.8, 0.2]]) + label = mx.nd.array([[1, 1], [1, 1]]) + macroF1.update([label], [pred]) + microF1.update([label], [pred]) + assert macroF1.get()[1] == 0.5 # one class is 1.0, the other is 0. (divided by 0) + np.testing.assert_almost_equal(microF1.get()[1], 2.0 / 3) + macroF1.reset() + microF1.reset() + + pred11 = mx.nd.array([[0.9, 0.4, 0.3], [0.2, 0.7, 0.8]]) + label11 = mx.nd.array([[1, 0, 1], [0, 0, 1]]) + pred12 = mx.nd.array([[0.6, 0.6, 0.7]]) + label12 = mx.nd.array([[0, 1, 1]]) + + microF1.update([label11, label12], [pred11, pred12]) + macroF1.update([label11, label12], [pred11, pred12]) + assert microF1.num_inst == 3 + assert macroF1.num_inst == 3 + #from sklearn.metrics import f1_score + #overall_pred = [[1, 0, 0], [0, 1, 1], [1, 1, 1]] + #overall_label = [[1, 0, 1], [0, 0, 1], [0, 1, 1]] + fmacro = 0.7111111111111111 #f1_score(overall_label, overall_pred, average="macro") + fmicro = 0.7272727272727272 #f1_score(overall_label, overall_pred, average="micro") + np.testing.assert_almost_equal(microF1.get()[1], fmicro) + np.testing.assert_almost_equal(macroF1.get()[1], fmacro) + def test_mcc(): - microMCC = mx.metric.create("mcc", average="micro") - macroMCC = mx.metric.MCC(average="macro") + microMCC = mx.gluon.metric.create("mcc") assert np.isnan(microMCC.get()[1]) - assert np.isnan(macroMCC.get()[1]) - + # check divide by zero pred = mx.nd.array([[0.9, 0.1], [0.8, 0.2]]) label = mx.nd.array([0, 0]) microMCC.update([label], [pred]) - macroMCC.update([label], [pred]) assert microMCC.get()[1] == 0.0 - assert macroMCC.get()[1] == 0.0 microMCC.reset() - macroMCC.reset() pred11 = mx.nd.array([[0.1, 0.9], [0.5, 0.5]]) @@ -235,51 +226,40 @@ def test_mcc(): pred22 = mx.nd.array([[0.2, 0.8]]) label22 = mx.nd.array([1]) microMCC.update([label11, label12], [pred11, pred12]) - macroMCC.update([label11, label12], [pred11, pred12]) assert microMCC.num_inst == 4 - assert macroMCC.num_inst == 1 tp1 = 1; fp1 = 0; fn1 = 1; tn1=2 mcc1 = (tp1*tn1 - fp1*fn1) / np.sqrt((tp1+fp1)*(tp1+fn1)*(tn1+fp1)*(tn1+fn1)) np.testing.assert_almost_equal(microMCC.get()[1], mcc1) - np.testing.assert_almost_equal(macroMCC.get()[1], mcc1) microMCC.update([label21, label22], [pred21, pred22]) - macroMCC.update([label21, label22], [pred21, pred22]) assert microMCC.num_inst == 6 - assert macroMCC.num_inst == 2 tp2 = 1; fp2 = 0; fn2 = 0; tn2=1 mcc2 = (tp2*tn2 - fp2*fn2) / np.sqrt((tp2+fp2)*(tp2+fn2)*(tn2+fp2)*(tn2+fn2)) tpT = tp1+tp2; fpT = fp1+fp2; fnT = fn1+fn2; tnT = tn1+tn2; mccT = (tpT*tnT - fpT*fnT) / np.sqrt((tpT+fpT)*(tpT+fnT)*(tnT+fpT)*(tnT+fnT)) np.testing.assert_almost_equal(microMCC.get()[1], mccT) - np.testing.assert_almost_equal(macroMCC.get()[1], .5*(mcc1+mcc2)) def test_perplexity(): pred = mx.nd.array([[0.8, 0.2], [0.2, 0.8], [0, 1.]]) label = mx.nd.array([0, 1, 1]) p = pred.asnumpy()[np.arange(label.size), label.asnumpy().astype('int32')] perplexity_expected = np.exp(-np.log(p).sum()/label.size) - metric = mx.metric.create('perplexity', -1) + metric = mx.gluon.metric.create('perplexity', axis=-1) metric.update([label], [pred]) _, perplexity = metric.get() - assert perplexity == perplexity_expected + np.testing.assert_almost_equal(perplexity, perplexity_expected) def test_pearsonr(): pred1 = mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]]) label1 = mx.nd.array([[1, 0], [0, 1], [0, 1]]) pearsonr_expected_np = np.corrcoef(pred1.asnumpy().ravel(), label1.asnumpy().ravel())[0, 1] pearsonr_expected_scipy, _ = pearsonr(pred1.asnumpy().ravel(), label1.asnumpy().ravel()) - macro_pr = mx.metric.create('pearsonr', average='macro') - micro_pr = mx.metric.create('pearsonr', average='micro') + micro_pr = mx.gluon.metric.create('pearsonr') - assert np.isnan(macro_pr.get()[1]) assert np.isnan(micro_pr.get()[1]) - macro_pr.update([label1], [pred1]) micro_pr.update([label1], [pred1]) - np.testing.assert_almost_equal(macro_pr.get()[1], pearsonr_expected_np) - np.testing.assert_almost_equal(macro_pr.get()[1], pearsonr_expected_scipy) np.testing.assert_almost_equal(micro_pr.get()[1], pearsonr_expected_np) np.testing.assert_almost_equal(micro_pr.get()[1], pearsonr_expected_scipy) @@ -292,11 +272,7 @@ def test_pearsonr(): pearsonr_expected_np = np.corrcoef(pred12.asnumpy().ravel(), label12.asnumpy().ravel())[0, 1] pearsonr_expected_scipy, _ = pearsonr(pred12.asnumpy().ravel(), label12.asnumpy().ravel()) - macro_pr.reset() micro_pr.update([label2], [pred2]) - macro_pr.update([label12], [pred12]) - np.testing.assert_almost_equal(macro_pr.get()[1], pearsonr_expected_np) - np.testing.assert_almost_equal(macro_pr.get()[1], pearsonr_expected_scipy) np.testing.assert_almost_equal(micro_pr.get()[1], pearsonr_expected_np) np.testing.assert_almost_equal(micro_pr.get()[1], pearsonr_expected_scipy) @@ -317,18 +293,18 @@ def test_pcc(): [ 7, 3 ], [ 2, 5 ], ]) - met_pcc = mx.metric.create('pcc') + met_pcc = mx.gluon.metric.create('pcc') met_pcc.update(labels, preds) _, pcc = met_pcc.get() # pcc should agree with mcc for binary classification - met_mcc = mx.metric.create('mcc') + met_mcc = mx.gluon.metric.create('mcc') met_mcc.update(labels, preds) _, mcc = met_mcc.get() np.testing.assert_almost_equal(pcc, mcc) # pcc should agree with Pearson for binary classification - met_pear = mx.metric.create('pearsonr') + met_pear = mx.gluon.metric.create('pearsonr') met_pear.update(labels, [p.argmax(axis=1) for p in preds]) _, pear = met_pear.get() np.testing.assert_almost_equal(pcc, pear) @@ -377,7 +353,7 @@ def test_pcc(): # * order # * batch size # * update frequency - labels = [ [ i ] for i in labels[0] ] + labels = [ [ i.reshape(-1) ] for i in labels[0] ] labels.reverse() preds = [ [ i.reshape((1, -1)) ] for i in preds[0] ] preds.reverse() @@ -391,19 +367,20 @@ def test_single_array_input(): pred = mx.nd.array([[1,2,3,4]]) label = pred + 0.1 - mse = mx.metric.create('mse') + mse = mx.gluon.metric.create('mse') mse.update(label, pred) _, mse_res = mse.get() np.testing.assert_almost_equal(mse_res, 0.01) - mae = mx.metric.create('mae') + mae = mx.gluon.metric.create('mae') mae.update(label, pred) mae.get() _, mae_res = mae.get() np.testing.assert_almost_equal(mae_res, 0.1) - rmse = mx.metric.create('rmse') + rmse = mx.gluon.metric.create('rmse') rmse.update(label, pred) rmse.get() _, rmse_res = rmse.get() np.testing.assert_almost_equal(rmse_res, 0.1) + diff --git a/tests/python/unittest/test_metric_perf.py b/tests/python/unittest/test_metric_perf.py index fc0f8da5d451..3c9abf6e3cc0 100644 --- a/tests/python/unittest/test_metric_perf.py +++ b/tests/python/unittest/test_metric_perf.py @@ -66,7 +66,7 @@ def data(self): def run_metric(name, data_gen_cls, i, n, c, pred_ctx, label_ctx, **kwargs): """ Helper function for running one metric benchmark """ - metric = mx.metric.create(name, **kwargs) + metric = mx.gluon.metric.create(name, **kwargs) data_gen = data_gen_cls(n, c, pred_ctx, label_ctx) try: label, pred = data_gen.data() @@ -105,7 +105,7 @@ def test_metric_performance(): output_dims = [128, 1024, 8192] ctxs = [mx.cpu(), mx.gpu()] - print("\nmx.metric benchmarks", file=sys.stderr) + print("\nmx.gluon.metric benchmarks", file=sys.stderr) print( "{:15}{:10}{:12}{:12}{:15}{:15}{}".format( 'Metric', 'Data-Ctx', 'Label-Ctx', 'Data Size', 'Batch Size', 'Output Dim', 'Elapsed Time'), diff --git a/tests/python/unittest/test_module.py b/tests/python/unittest/test_module.py index 65d86f62baf4..7941eec5004d 100644 --- a/tests/python/unittest/test_module.py +++ b/tests/python/unittest/test_module.py @@ -275,7 +275,7 @@ def sym_gen(seq_len): mod2.fit( train_data=data_train, eval_data=data_val, - eval_metric=mx.metric.Perplexity(invalid_label), # Use Perplexity for multiclass classification. + eval_metric=mx.gluon.metric.Perplexity(invalid_label), # Use Perplexity for multiclass classification. kvstore='device', optimizer='sgd', optimizer_params={'learning_rate': 0.01, @@ -711,7 +711,7 @@ def fm(factor_size, feature_dim, init): expected_accuracy = 0.02 # use accuracy as the metric - metric = mx.metric.create('MSE') + metric = mx.gluon.metric.create('MSE') # train 'num_epochs' epoch for epoch in range(num_epochs): train_iter.reset() diff --git a/tools/caffe_converter/test_converter.py b/tools/caffe_converter/test_converter.py index 49f8bdb167c2..880de1be449f 100644 --- a/tools/caffe_converter/test_converter.py +++ b/tools/caffe_converter/test_converter.py @@ -40,7 +40,7 @@ def test_imagenet_model_performance(model_name, val_data, gpus, batch_size): meta_info = get_model_meta_info(model_name) [model_name, mean] = convert_caffe_model(model_name, meta_info) sym, arg_params, aux_params = mx.model.load_checkpoint(model_name, 0) - acc = [mx.metric.create('acc'), mx.metric.create('top_k_accuracy', top_k=5)] + acc = [mx.gluon.metric.create('acc'), mx.gluon.metric.create('top_k_accuracy', top_k=5)] if isinstance(mean, str): mean_args = {'mean_img':mean} else: