From f0a757bc189e77e11a8d19af9cee9e2c9b8d2149 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Wed, 5 Sep 2018 17:30:32 +0000 Subject: [PATCH 01/15] Support integer type in ctc_loss --- src/operator/contrib/ctc_loss-inl.h | 123 ++++++++++++++-------------- 1 file changed, 62 insertions(+), 61 deletions(-) diff --git a/src/operator/contrib/ctc_loss-inl.h b/src/operator/contrib/ctc_loss-inl.h index 9380be47451f..70e6894320dc 100644 --- a/src/operator/contrib/ctc_loss-inl.h +++ b/src/operator/contrib/ctc_loss-inl.h @@ -256,68 +256,69 @@ class CTCLossOp : public Operator { exceed_cudnn_limit = false; Stream *s = ctx.get_stream(); - Tensor data = - in_data[ctc_loss::kData].get(s); - Tensor labels = - in_data[ctc_loss::kLabel].get(s); - - Tensor costs = - out_data[ctc_loss::kOut].get(s); - Tensor grad = - out_data[ctc_loss::kGrad].get(s); - - int max_seq_len = data.size(0); - int batch_size = data.size(1); - int alphabet_size = data.size(2); - - // data_lengths - std::vector data_lengths(batch_size, max_seq_len); - if (param_.use_data_lengths) { - int kInputLength = 2; - IndexTensorToVector(in_data[kInputLength].get(s), &data_lengths); - } - - // label_lengths - std::vector packed_labels; - std::vector label_lengths(batch_size); - - if (param_.use_label_lengths) { - int kLabelLength = 2+param_.use_data_lengths; - exceed_cudnn_limit = PackLabelByLength(labels, in_data[kLabelLength].get(s), - &packed_labels, &label_lengths); - } else { - exceed_cudnn_limit = LabelTensorToPackedVector(labels, param_.blank_label == 0?0:-1, - &packed_labels, &label_lengths); - } - -// CUDNN is disabled due to lack of support for input lengths -/* #if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7 */ -/* if (!exceed_cudnn_limit) { */ -/* cudnn_forward(ctx, s, data, costs, grad, */ -/* &data_lengths, &label_lengths, &packed_labels, */ -/* max_seq_len, batch_size, alphabet_size, */ -/* req[ctc_loss::kGrad] != mxnet::kNullOp); */ -/* } else { */ -/* baidu_forward(ctx, s, data, costs, grad, */ -/* &data_lengths, &label_lengths, &packed_labels, */ -/* batch_size, alphabet_size, req[ctc_loss::kGrad] != mxnet::kNullOp); */ -/* } */ -/* #else */ - - baidu_forward(ctx, s, data, costs, grad, - &data_lengths, &label_lengths, &packed_labels, - batch_size, alphabet_size, req[ctc_loss::kGrad] != mxnet::kNullOp); - - if (param_.use_data_lengths) { - // baidu warp CTC implementation sometimes includes undefined gradients - // for data outside of length mask. Setting to 0 to make it consistent - // with CPU implementation. - int kInputLength = 2; - mxnet_op::SequenceMask(grad, in_data[kInputLength].get(s), - static_cast(0)); - } + MSHADOW_TYPE_SWITCH(in_data[0].type_flag_, DType, { + Tensor data = + in_data[ctc_loss::kData].get(s); + Tensor labels = + in_data[ctc_loss::kLabel].get(s); + + Tensor costs = + out_data[ctc_loss::kOut].get(s); + Tensor grad = + out_data[ctc_loss::kGrad].get(s); + + int max_seq_len = data.size(0); + int batch_size = data.size(1); + int alphabet_size = data.size(2); + + // data_lengths + std::vector data_lengths(batch_size, max_seq_len); + if (param_.use_data_lengths) { + int kInputLength = 2; + IndexTensorToVector(in_data[kInputLength].get(s), &data_lengths); + } + + // label_lengths + std::vector packed_labels; + std::vector label_lengths(batch_size); + + if (param_.use_label_lengths) { + int kLabelLength = 2 + param_.use_data_lengths; + exceed_cudnn_limit = PackLabelByLength(labels, in_data[kLabelLength].get(s), + &packed_labels, &label_lengths); + } else { + exceed_cudnn_limit = LabelTensorToPackedVector(labels, param_.blank_label == 0?0:-1, + &packed_labels, &label_lengths); + } + + // CUDNN is disabled due to lack of support for input lengths + /* #if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7 */ + /* if (!exceed_cudnn_limit) { */ + /* cudnn_forward(ctx, s, data, costs, grad, */ + /* &data_lengths, &label_lengths, &packed_labels, */ + /* max_seq_len, batch_size, alphabet_size, */ + /* req[ctc_loss::kGrad] != mxnet::kNullOp); */ + /* } else { */ + /* baidu_forward(ctx, s, data, costs, grad, */ + /* &data_lengths, &label_lengths, &packed_labels, */ + /* batch_size, alphabet_size, req[ctc_loss::kGrad] != mxnet::kNullOp); */ + /* } */ + /* #else */ + + baidu_forward(ctx, s, data, costs, grad, + &data_lengths, &label_lengths, &packed_labels, + batch_size, alphabet_size, req[ctc_loss::kGrad] != mxnet::kNullOp); + + if (param_.use_data_lengths) { + // baidu warp CTC implementation sometimes includes undefined gradients + // for data outside of length mask. Setting to 0 to make it consistent + // with CPU implementation. + int kInputLength = 2; + mxnet_op::SequenceMask(grad, in_data[kInputLength].get(s), + static_cast(0)); + } + }); } - virtual void Backward(const OpContext &ctx, const std::vector &out_grad, const std::vector &in_data, From 7af727465843c32d50ed1f572fcf1ff7a13bbc5b Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Wed, 5 Sep 2018 11:38:09 -0700 Subject: [PATCH 02/15] Support any data type in ctc_loss operator --- src/operator/contrib/ctc_loss-inl.h | 182 ++++++++++++++-------------- 1 file changed, 92 insertions(+), 90 deletions(-) diff --git a/src/operator/contrib/ctc_loss-inl.h b/src/operator/contrib/ctc_loss-inl.h index 70e6894320dc..85718896e848 100644 --- a/src/operator/contrib/ctc_loss-inl.h +++ b/src/operator/contrib/ctc_loss-inl.h @@ -50,11 +50,10 @@ namespace mxnet { namespace op { - namespace ctc_loss { -enum CTCLossOpInputs { kData, kLabel }; -enum CTCLossOpOutputs { kOut, kGrad }; -enum CTCLossOpForwardResource { kTempSpace }; + enum CTCLossOpInputs { kData, kLabel }; + enum CTCLossOpOutputs { kOut, kGrad }; + enum CTCLossOpForwardResource { kTempSpace }; } template @@ -256,69 +255,70 @@ class CTCLossOp : public Operator { exceed_cudnn_limit = false; Stream *s = ctx.get_stream(); - MSHADOW_TYPE_SWITCH(in_data[0].type_flag_, DType, { - Tensor data = - in_data[ctc_loss::kData].get(s); - Tensor labels = - in_data[ctc_loss::kLabel].get(s); - - Tensor costs = - out_data[ctc_loss::kOut].get(s); - Tensor grad = - out_data[ctc_loss::kGrad].get(s); - - int max_seq_len = data.size(0); - int batch_size = data.size(1); - int alphabet_size = data.size(2); - - // data_lengths - std::vector data_lengths(batch_size, max_seq_len); - if (param_.use_data_lengths) { - int kInputLength = 2; - IndexTensorToVector(in_data[kInputLength].get(s), &data_lengths); - } - - // label_lengths - std::vector packed_labels; - std::vector label_lengths(batch_size); - - if (param_.use_label_lengths) { - int kLabelLength = 2 + param_.use_data_lengths; - exceed_cudnn_limit = PackLabelByLength(labels, in_data[kLabelLength].get(s), - &packed_labels, &label_lengths); - } else { - exceed_cudnn_limit = LabelTensorToPackedVector(labels, param_.blank_label == 0?0:-1, - &packed_labels, &label_lengths); - } - - // CUDNN is disabled due to lack of support for input lengths - /* #if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7 */ - /* if (!exceed_cudnn_limit) { */ - /* cudnn_forward(ctx, s, data, costs, grad, */ - /* &data_lengths, &label_lengths, &packed_labels, */ - /* max_seq_len, batch_size, alphabet_size, */ - /* req[ctc_loss::kGrad] != mxnet::kNullOp); */ - /* } else { */ - /* baidu_forward(ctx, s, data, costs, grad, */ - /* &data_lengths, &label_lengths, &packed_labels, */ - /* batch_size, alphabet_size, req[ctc_loss::kGrad] != mxnet::kNullOp); */ - /* } */ - /* #else */ - - baidu_forward(ctx, s, data, costs, grad, - &data_lengths, &label_lengths, &packed_labels, - batch_size, alphabet_size, req[ctc_loss::kGrad] != mxnet::kNullOp); - - if (param_.use_data_lengths) { - // baidu warp CTC implementation sometimes includes undefined gradients - // for data outside of length mask. Setting to 0 to make it consistent - // with CPU implementation. - int kInputLength = 2; - mxnet_op::SequenceMask(grad, in_data[kInputLength].get(s), - static_cast(0)); - } - }); + MSHADOW_TYPE_SWITCH(in_data[0].type_flag_, DType, { + Tensor data = + in_data[ctc_loss::kData].get(s); + Tensor labels = + in_data[ctc_loss::kLabel].get(s); + + Tensor costs = + out_data[ctc_loss::kOut].get(s); + Tensor grad = + out_data[ctc_loss::kGrad].get(s); + + int max_seq_len = data.size(0); + int batch_size = data.size(1); + int alphabet_size = data.size(2); + + // data_lengths + std::vector data_lengths(batch_size, max_seq_len); + if (param_.use_data_lengths) { + int kInputLength = 2; + IndexTensorToVector(in_data[kInputLength].get(s), &data_lengths); + } + + // label_lengths + std::vector packed_labels; + std::vector label_lengths(batch_size); + + if (param_.use_label_lengths) { + int kLabelLength = 2 + param_.use_data_lengths; + exceed_cudnn_limit = PackLabelByLength(labels, in_data[kLabelLength].get(s), + &packed_labels, &label_lengths); + } else { + exceed_cudnn_limit = LabelTensorToPackedVector(labels, param_.blank_label == 0?0:-1, + &packed_labels, &label_lengths); + } + + // CUDNN is disabled due to lack of support for input lengths + /* #if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7 */ + /* if (!exceed_cudnn_limit) { */ + /* cudnn_forward(ctx, s, data, costs, grad, */ + /* &data_lengths, &label_lengths, &packed_labels, */ + /* max_seq_len, batch_size, alphabet_size, */ + /* req[ctc_loss::kGrad] != mxnet::kNullOp); */ + /* } else { */ + /* baidu_forward(ctx, s, data, costs, grad, */ + /* &data_lengths, &label_lengths, &packed_labels, */ + /* batch_size, alphabet_size, req[ctc_loss::kGrad] != mxnet::kNullOp); */ + /* } */ + /* #else */ + + baidu_forward(ctx, s, data, costs, grad, + &data_lengths, &label_lengths, &packed_labels, + batch_size, alphabet_size, req[ctc_loss::kGrad] != mxnet::kNullOp); + + if (param_.use_data_lengths) { + // baidu warp CTC implementation sometimes includes undefined gradients + // for data outside of length mask. Setting to 0 to make it consistent + // with CPU implementation. + int kInputLength = 2; + mxnet_op::SequenceMask(grad, in_data[kInputLength].get(s), + static_cast(0)); + } + }); } + virtual void Backward(const OpContext &ctx, const std::vector &out_grad, const std::vector &in_data, @@ -331,16 +331,18 @@ class CTCLossOp : public Operator { Stream *s = ctx.get_stream(); - Tensor data_grad = - in_grad[ctc_loss::kData].get(s); - Tensor output_grad = - out_grad[ctc_loss::kOut].get(s); + MSHADOW_TYPE_SWITCH(in_data[0].type_flag_, DType, { + Tensor data_grad = + in_grad[ctc_loss::kData].get(s); + Tensor output_grad = + out_grad[ctc_loss::kOut].get(s); - Tensor data_grad_computed = - out_data[ctc_loss::kGrad].get(s); + Tensor data_grad_computed = + out_data[ctc_loss::kGrad].get(s); - Assign(data_grad, req[ctc_loss::kData], - mshadow::expr::broadcast<1>(output_grad, data_grad.shape_) * data_grad_computed); + Assign(data_grad, req[ctc_loss::kData], + mshadow::expr::broadcast<1>(output_grad, data_grad.shape_) * data_grad_computed); + }); } private: @@ -434,29 +436,29 @@ class CTCLossOp : public Operator { } } #endif // __CUDACC__ && CUDNN - - inline virtual void baidu_forward(const OpContext &ctx, - mshadow::Stream* s, - mshadow::Tensor data, - mshadow::Tensor costs, - mshadow::Tensor grad, - std::vector* data_lengths, - std::vector* label_lengths, - std::vector* packed_labels, - int batch_size, - int alphabet_size, - bool req_grad) { + template + inline void baidu_forward(const OpContext &ctx, + mshadow::Stream* s, + mshadow::Tensor data, + mshadow::Tensor costs, + mshadow::Tensor grad, + std::vector* data_lengths, + std::vector* label_lengths, + std::vector* packed_labels, + int batch_size, + int alphabet_size, + bool req_grad) { using namespace mshadow; // allocate temporary workspace size_t size_bytes; bool gpu = data.kDevCPU ? false : true; - get_workspace_size(label_lengths, data_lengths, alphabet_size, + get_workspace_size(label_lengths, data_lengths, alphabet_size, batch_size, gpu, &size_bytes); // round-up so there are enough elems in memory - int num_tmp_elems = (size_bytes + sizeof(real_t) - 1) / sizeof(real_t); - Tensor workspace = - ctx.requested[ctc_loss::kTempSpace].get_space_typed( + int num_tmp_elems = (size_bytes + sizeof(DType) - 1) / sizeof(DType); + Tensor workspace = + ctx.requested[ctc_loss::kTempSpace].get_space_typed( Shape1(num_tmp_elems), s); compute_ctc_cost(data, costs.dptr_, grad.dptr_, packed_labels->data(), From 5e99e7ee6f8e0911269c74f6a4ad17696e69194e Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Wed, 5 Sep 2018 15:47:05 -0700 Subject: [PATCH 03/15] Enable integer type in labels and fix lint errors --- src/operator/contrib/ctc_loss-inl.h | 145 +++++++++++++++------------- 1 file changed, 80 insertions(+), 65 deletions(-) diff --git a/src/operator/contrib/ctc_loss-inl.h b/src/operator/contrib/ctc_loss-inl.h index 85718896e848..3e4a52766c4f 100644 --- a/src/operator/contrib/ctc_loss-inl.h +++ b/src/operator/contrib/ctc_loss-inl.h @@ -255,68 +255,71 @@ class CTCLossOp : public Operator { exceed_cudnn_limit = false; Stream *s = ctx.get_stream(); - MSHADOW_TYPE_SWITCH(in_data[0].type_flag_, DType, { - Tensor data = - in_data[ctc_loss::kData].get(s); - Tensor labels = - in_data[ctc_loss::kLabel].get(s); - - Tensor costs = - out_data[ctc_loss::kOut].get(s); - Tensor grad = - out_data[ctc_loss::kGrad].get(s); - - int max_seq_len = data.size(0); - int batch_size = data.size(1); - int alphabet_size = data.size(2); - - // data_lengths - std::vector data_lengths(batch_size, max_seq_len); - if (param_.use_data_lengths) { - int kInputLength = 2; - IndexTensorToVector(in_data[kInputLength].get(s), &data_lengths); - } - - // label_lengths - std::vector packed_labels; - std::vector label_lengths(batch_size); - - if (param_.use_label_lengths) { - int kLabelLength = 2 + param_.use_data_lengths; - exceed_cudnn_limit = PackLabelByLength(labels, in_data[kLabelLength].get(s), - &packed_labels, &label_lengths); - } else { - exceed_cudnn_limit = LabelTensorToPackedVector(labels, param_.blank_label == 0?0:-1, - &packed_labels, &label_lengths); - } - - // CUDNN is disabled due to lack of support for input lengths - /* #if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7 */ - /* if (!exceed_cudnn_limit) { */ - /* cudnn_forward(ctx, s, data, costs, grad, */ - /* &data_lengths, &label_lengths, &packed_labels, */ - /* max_seq_len, batch_size, alphabet_size, */ - /* req[ctc_loss::kGrad] != mxnet::kNullOp); */ - /* } else { */ - /* baidu_forward(ctx, s, data, costs, grad, */ - /* &data_lengths, &label_lengths, &packed_labels, */ - /* batch_size, alphabet_size, req[ctc_loss::kGrad] != mxnet::kNullOp); */ - /* } */ - /* #else */ - - baidu_forward(ctx, s, data, costs, grad, - &data_lengths, &label_lengths, &packed_labels, - batch_size, alphabet_size, req[ctc_loss::kGrad] != mxnet::kNullOp); - - if (param_.use_data_lengths) { - // baidu warp CTC implementation sometimes includes undefined gradients - // for data outside of length mask. Setting to 0 to make it consistent - // with CPU implementation. - int kInputLength = 2; - mxnet_op::SequenceMask(grad, in_data[kInputLength].get(s), - static_cast(0)); - } - }); + MSHADOW_REAL_TYPE_SWITCH(in_data[ctc_loss::kData].type_flag_, DType, { + MSHADOW_TYPE_SWITCH(in_data[ctc_loss::kLabel].type_flag_, LType, { + Tensor data = + in_data[ctc_loss::kData].get(s); + Tensor labels = + in_data[ctc_loss::kLabel].get(s); + + Tensor costs = + out_data[ctc_loss::kOut].get(s); + Tensor grad = + out_data[ctc_loss::kGrad].get(s); + + int max_seq_len = data.size(0); + int batch_size = data.size(1); + int alphabet_size = data.size(2); + + // data_lengths + std::vector data_lengths(batch_size, max_seq_len); + if (param_.use_data_lengths) { + int kInputLength = 2; + IndexTensorToVector(in_data[kInputLength].get(s), &data_lengths); + } + + // label_lengths + std::vector packed_labels; + std::vector label_lengths(batch_size); + + if (param_.use_label_lengths) { + int kLabelLength = 2 + param_.use_data_lengths; + exceed_cudnn_limit = + PackLabelByLength(labels, in_data[kLabelLength].get(s), + &packed_labels, &label_lengths); + } else { + exceed_cudnn_limit = LabelTensorToPackedVector(labels, param_.blank_label == 0?0:-1, + &packed_labels, &label_lengths); + } + + // CUDNN is disabled due to lack of support for input lengths + /* #if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7 */ + /* if (!exceed_cudnn_limit) { */ + /* cudnn_forward(ctx, s, data, costs, grad, */ + /* &data_lengths, &label_lengths, &packed_labels, */ + /* max_seq_len, batch_size, alphabet_size, */ + /* req[ctc_loss::kGrad] != mxnet::kNullOp); */ + /* } else { */ + /* baidu_forward(ctx, s, data, costs, grad, */ + /* &data_lengths, &label_lengths, &packed_labels, */ + /* batch_size, alphabet_size, req[ctc_loss::kGrad] != mxnet::kNullOp);*/ + /* } */ + /* #else */ + + baidu_forward(ctx, s, data, costs, grad, + &data_lengths, &label_lengths, &packed_labels, + batch_size, alphabet_size, req[ctc_loss::kGrad] != mxnet::kNullOp); + + if (param_.use_data_lengths) { + // baidu warp CTC implementation sometimes includes undefined gradients + // for data outside of length mask. Setting to 0 to make it consistent + // with CPU implementation. + int kInputLength = 2; + mxnet_op::SequenceMask(grad, in_data[kInputLength].get(s), + static_cast(0)); + } + }); + }); } virtual void Backward(const OpContext &ctx, @@ -331,7 +334,7 @@ class CTCLossOp : public Operator { Stream *s = ctx.get_stream(); - MSHADOW_TYPE_SWITCH(in_data[0].type_flag_, DType, { + MSHADOW_REAL_TYPE_SWITCH(in_data[ctc_loss::kData].type_flag_, DType, { Tensor data_grad = in_grad[ctc_loss::kData].get(s); Tensor output_grad = @@ -348,7 +351,6 @@ class CTCLossOp : public Operator { private: CTCLossParam param_; bool exceed_cudnn_limit; - #if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7 cudnnDataType_t dtype_; cudnnCTCLossDescriptor_t ctc_desc_; @@ -436,6 +438,7 @@ class CTCLossOp : public Operator { } } #endif // __CUDACC__ && CUDNN + template inline void baidu_forward(const OpContext &ctx, mshadow::Stream* s, @@ -537,11 +540,23 @@ class CTCLossProp : public OperatorProperty { TShape oshape(1); oshape[0] = dshape[1]; // batch size out_shape->clear(); - out_shape->push_back(oshape); + out_shape->push_back(oshape); // forward output out_shape->push_back(dshape); // grad output return true; } + bool InferType(std::vector *in_type, + std::vector *out_type, + std::vector *aux_type) const override { + CHECK_LE(in_type->size(), this->ListArguments().size()); + int dtype = (*in_type)[ctc_loss::kData]; + CHECK_NE(dtype, -1) << "Input data must have specified type"; + + out_type->clear(); + out_type->push_back(dtype); // forward output + out_type->push_back(dtype); // grad output + return true; + } OperatorProperty *Copy() const override { auto ptr = new CTCLossProp(); ptr->param_ = param_; From eb3096436c3326bfa6486e33a7b0d1837d7552dc Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Wed, 5 Sep 2018 23:24:43 +0000 Subject: [PATCH 04/15] Fix compilation error in GPU --- src/operator/contrib/ctc_loss-inl.h | 47 ++++++++++++++--------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/src/operator/contrib/ctc_loss-inl.h b/src/operator/contrib/ctc_loss-inl.h index 3e4a52766c4f..1f9c8ef06e4b 100644 --- a/src/operator/contrib/ctc_loss-inl.h +++ b/src/operator/contrib/ctc_loss-inl.h @@ -255,17 +255,16 @@ class CTCLossOp : public Operator { exceed_cudnn_limit = false; Stream *s = ctx.get_stream(); - MSHADOW_REAL_TYPE_SWITCH(in_data[ctc_loss::kData].type_flag_, DType, { MSHADOW_TYPE_SWITCH(in_data[ctc_loss::kLabel].type_flag_, LType, { - Tensor data = - in_data[ctc_loss::kData].get(s); + Tensor data = + in_data[ctc_loss::kData].get(s); Tensor labels = in_data[ctc_loss::kLabel].get(s); - Tensor costs = - out_data[ctc_loss::kOut].get(s); - Tensor grad = - out_data[ctc_loss::kGrad].get(s); + Tensor costs = + out_data[ctc_loss::kOut].get(s); + Tensor grad = + out_data[ctc_loss::kGrad].get(s); int max_seq_len = data.size(0); int batch_size = data.size(1); @@ -275,7 +274,7 @@ class CTCLossOp : public Operator { std::vector data_lengths(batch_size, max_seq_len); if (param_.use_data_lengths) { int kInputLength = 2; - IndexTensorToVector(in_data[kInputLength].get(s), &data_lengths); + IndexTensorToVector(in_data[kInputLength].get(s), &data_lengths); } // label_lengths @@ -315,11 +314,10 @@ class CTCLossOp : public Operator { // for data outside of length mask. Setting to 0 to make it consistent // with CPU implementation. int kInputLength = 2; - mxnet_op::SequenceMask(grad, in_data[kInputLength].get(s), - static_cast(0)); + mxnet_op::SequenceMask(grad, in_data[kInputLength].get(s), + static_cast(0)); } }); - }); } virtual void Backward(const OpContext &ctx, @@ -335,13 +333,13 @@ class CTCLossOp : public Operator { Stream *s = ctx.get_stream(); MSHADOW_REAL_TYPE_SWITCH(in_data[ctc_loss::kData].type_flag_, DType, { - Tensor data_grad = - in_grad[ctc_loss::kData].get(s); - Tensor output_grad = - out_grad[ctc_loss::kOut].get(s); + Tensor data_grad = + in_grad[ctc_loss::kData].get(s); + Tensor output_grad = + out_grad[ctc_loss::kOut].get(s); - Tensor data_grad_computed = - out_data[ctc_loss::kGrad].get(s); + Tensor data_grad_computed = + out_data[ctc_loss::kGrad].get(s); Assign(data_grad, req[ctc_loss::kData], mshadow::expr::broadcast<1>(output_grad, data_grad.shape_) * data_grad_computed); @@ -439,12 +437,11 @@ class CTCLossOp : public Operator { } #endif // __CUDACC__ && CUDNN - template inline void baidu_forward(const OpContext &ctx, mshadow::Stream* s, - mshadow::Tensor data, - mshadow::Tensor costs, - mshadow::Tensor grad, + mshadow::Tensor data, + mshadow::Tensor costs, + mshadow::Tensor grad, std::vector* data_lengths, std::vector* label_lengths, std::vector* packed_labels, @@ -455,13 +452,13 @@ class CTCLossOp : public Operator { // allocate temporary workspace size_t size_bytes; bool gpu = data.kDevCPU ? false : true; - get_workspace_size(label_lengths, data_lengths, alphabet_size, + get_workspace_size(label_lengths, data_lengths, alphabet_size, batch_size, gpu, &size_bytes); // round-up so there are enough elems in memory - int num_tmp_elems = (size_bytes + sizeof(DType) - 1) / sizeof(DType); - Tensor workspace = - ctx.requested[ctc_loss::kTempSpace].get_space_typed( + int num_tmp_elems = (size_bytes + sizeof(real_t) - 1) / sizeof(real_t); + Tensor workspace = + ctx.requested[ctc_loss::kTempSpace].get_space_typed( Shape1(num_tmp_elems), s); compute_ctc_cost(data, costs.dptr_, grad.dptr_, packed_labels->data(), From 774c61bc52ba7482065d5ca8d1b50eb469271610 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Wed, 5 Sep 2018 20:43:09 -0700 Subject: [PATCH 05/15] Add unit tests --- src/operator/contrib/ctc_loss-inl.h | 144 +++++++++--------- .../python/unittest/test_contrib_operator.py | 42 +++++ 2 files changed, 113 insertions(+), 73 deletions(-) diff --git a/src/operator/contrib/ctc_loss-inl.h b/src/operator/contrib/ctc_loss-inl.h index 1f9c8ef06e4b..321d8a437fb6 100644 --- a/src/operator/contrib/ctc_loss-inl.h +++ b/src/operator/contrib/ctc_loss-inl.h @@ -255,69 +255,69 @@ class CTCLossOp : public Operator { exceed_cudnn_limit = false; Stream *s = ctx.get_stream(); - MSHADOW_TYPE_SWITCH(in_data[ctc_loss::kLabel].type_flag_, LType, { - Tensor data = - in_data[ctc_loss::kData].get(s); - Tensor labels = - in_data[ctc_loss::kLabel].get(s); - - Tensor costs = - out_data[ctc_loss::kOut].get(s); - Tensor grad = - out_data[ctc_loss::kGrad].get(s); - - int max_seq_len = data.size(0); - int batch_size = data.size(1); - int alphabet_size = data.size(2); - - // data_lengths - std::vector data_lengths(batch_size, max_seq_len); - if (param_.use_data_lengths) { - int kInputLength = 2; - IndexTensorToVector(in_data[kInputLength].get(s), &data_lengths); - } - - // label_lengths - std::vector packed_labels; - std::vector label_lengths(batch_size); - - if (param_.use_label_lengths) { - int kLabelLength = 2 + param_.use_data_lengths; - exceed_cudnn_limit = - PackLabelByLength(labels, in_data[kLabelLength].get(s), - &packed_labels, &label_lengths); - } else { - exceed_cudnn_limit = LabelTensorToPackedVector(labels, param_.blank_label == 0?0:-1, - &packed_labels, &label_lengths); - } - - // CUDNN is disabled due to lack of support for input lengths - /* #if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7 */ - /* if (!exceed_cudnn_limit) { */ - /* cudnn_forward(ctx, s, data, costs, grad, */ - /* &data_lengths, &label_lengths, &packed_labels, */ - /* max_seq_len, batch_size, alphabet_size, */ - /* req[ctc_loss::kGrad] != mxnet::kNullOp); */ - /* } else { */ - /* baidu_forward(ctx, s, data, costs, grad, */ - /* &data_lengths, &label_lengths, &packed_labels, */ - /* batch_size, alphabet_size, req[ctc_loss::kGrad] != mxnet::kNullOp);*/ - /* } */ - /* #else */ - - baidu_forward(ctx, s, data, costs, grad, - &data_lengths, &label_lengths, &packed_labels, - batch_size, alphabet_size, req[ctc_loss::kGrad] != mxnet::kNullOp); - - if (param_.use_data_lengths) { - // baidu warp CTC implementation sometimes includes undefined gradients - // for data outside of length mask. Setting to 0 to make it consistent - // with CPU implementation. - int kInputLength = 2; - mxnet_op::SequenceMask(grad, in_data[kInputLength].get(s), - static_cast(0)); - } - }); + MSHADOW_TYPE_SWITCH(in_data[ctc_loss::kLabel].type_flag_, DType, { + Tensor data = + in_data[ctc_loss::kData].get(s); + Tensor labels = + in_data[ctc_loss::kLabel].get(s); + + Tensor costs = + out_data[ctc_loss::kOut].get(s); + Tensor grad = + out_data[ctc_loss::kGrad].get(s); + + int max_seq_len = data.size(0); + int batch_size = data.size(1); + int alphabet_size = data.size(2); + + // data_lengths + std::vector data_lengths(batch_size, max_seq_len); + if (param_.use_data_lengths) { + int kInputLength = 2; + IndexTensorToVector(in_data[kInputLength].get(s), &data_lengths); + } + + // label_lengths + std::vector packed_labels; + std::vector label_lengths(batch_size); + + if (param_.use_label_lengths) { + int kLabelLength = 2 + param_.use_data_lengths; + exceed_cudnn_limit = + PackLabelByLength(labels, in_data[kLabelLength].get(s), + &packed_labels, &label_lengths); + } else { + exceed_cudnn_limit = LabelTensorToPackedVector(labels, param_.blank_label == 0?0:-1, + &packed_labels, &label_lengths); + } + + // CUDNN is disabled due to lack of support for input lengths + /* #if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7 */ + /* if (!exceed_cudnn_limit) { */ + /* cudnn_forward(ctx, s, data, costs, grad, */ + /* &data_lengths, &label_lengths, &packed_labels, */ + /* max_seq_len, batch_size, alphabet_size, */ + /* req[ctc_loss::kGrad] != mxnet::kNullOp); */ + /* } else { */ + /* baidu_forward(ctx, s, data, costs, grad, */ + /* &data_lengths, &label_lengths, &packed_labels, */ + /* batch_size, alphabet_size, req[ctc_loss::kGrad] != mxnet::kNullOp);*/ + /* } */ + /* #else */ + + baidu_forward(ctx, s, data, costs, grad, + &data_lengths, &label_lengths, &packed_labels, + batch_size, alphabet_size, req[ctc_loss::kGrad] != mxnet::kNullOp); + + if (param_.use_data_lengths) { + // baidu warp CTC implementation sometimes includes undefined gradients + // for data outside of length mask. Setting to 0 to make it consistent + // with CPU implementation. + int kInputLength = 2; + mxnet_op::SequenceMask(grad, in_data[kInputLength].get(s), + static_cast(0)); + } + }); } virtual void Backward(const OpContext &ctx, @@ -332,18 +332,16 @@ class CTCLossOp : public Operator { Stream *s = ctx.get_stream(); - MSHADOW_REAL_TYPE_SWITCH(in_data[ctc_loss::kData].type_flag_, DType, { - Tensor data_grad = - in_grad[ctc_loss::kData].get(s); - Tensor output_grad = - out_grad[ctc_loss::kOut].get(s); + Tensor data_grad = + in_grad[ctc_loss::kData].get(s); + Tensor output_grad = + out_grad[ctc_loss::kOut].get(s); - Tensor data_grad_computed = - out_data[ctc_loss::kGrad].get(s); + Tensor data_grad_computed = + out_data[ctc_loss::kGrad].get(s); - Assign(data_grad, req[ctc_loss::kData], - mshadow::expr::broadcast<1>(output_grad, data_grad.shape_) * data_grad_computed); - }); + Assign(data_grad, req[ctc_loss::kData], + mshadow::expr::broadcast<1>(output_grad, data_grad.shape_) * data_grad_computed); } private: diff --git a/tests/python/unittest/test_contrib_operator.py b/tests/python/unittest/test_contrib_operator.py index fc6c1be9c3a1..03764546b2c8 100644 --- a/tests/python/unittest/test_contrib_operator.py +++ b/tests/python/unittest/test_contrib_operator.py @@ -244,6 +244,48 @@ def assert_match(inputs, x, y, threshold, is_ascend=False): assert_match([[0.5, 0.6], [0.1, 0.2], [0.3, 0.4]], [1, -1, 0], [2, 0], 1e-12, False) assert_match([[0.5, 0.6], [0.1, 0.2], [0.3, 0.4]], [-1, 0, 1], [1, 2], 100, True) +def test_ctc_loss_op(): + batch_size = 10 + seq_len = 5 + label_len = 3 + num_classes = 6 + np.random.seed(1) + x = np.random.uniform(size=(seq_len, batch_size, num_classes)) + y = np.random.randint(0, num_classes, size=(batch_size, label_len)) + + def test_cpu(x, y): + data = mx.nd.array(x, ctx=mx.cpu(0)) + label = mx.nd.array(y, ctx=mx.cpu(0)) + loss = mx.nd.contrib.ctc_loss(data=data, label=label) + loss = mx.nd.make_loss(loss) + expected_output = [9.604521, 7.096151, 4.906869, 5.5237527, 5.9895644, 5.584548, + 5.528411, 5.765914, 6.740701, 5.2625823] + assert np.isclose(loss.asnumpy(), expected_output).all() + + def test_gpu(x, y): + data = mx.nd.array(x, ctx=mx.gpu(0)) + label = mx.nd.array(y, ctx=mx.gpu(0)) + loss = mx.nd.contrib.ctc_loss(data=data, label=label) + loss = mx.nd.make_loss(loss) + expected_output = [9.604521, 7.096151, 4.906869, 5.5237527, 5.9895644, 5.584548, + 5.528411, 5.765914, 6.740701, 5.2625823] + assert np.isclose(loss.asnumpy(), expected_output).all() + + def test_integer_label(x, y): + data = mx.nd.array(x, ctx=mx.cpu(0)) + label = mx.nd.array(y, ctx=mx.cpu(0), dtype=np.int32) + loss = mx.nd.contrib.ctc_loss(data=data, label=label) + loss = mx.nd.make_loss(loss) + expected_output = [9.604521, 7.096151, 4.906869, 5.5237527, 5.9895644, 5.584548, + 5.528411, 5.765914, 6.740701, 5.2625823] + assert np.isclose(loss.asnumpy(), expected_output).all() + + test_cpu(x, y) + if default_context().device_type == 'gpu': + test_gpu(x, y) + test_integer_label(x, y) + + if __name__ == '__main__': import nose nose.runmodule() From 59f48f2ec2551f16e7871fecd6f12072dac4ea3d Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Thu, 6 Sep 2018 10:42:14 -0700 Subject: [PATCH 06/15] Undo indentation --- src/operator/contrib/ctc_loss-inl.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/operator/contrib/ctc_loss-inl.h b/src/operator/contrib/ctc_loss-inl.h index 321d8a437fb6..011b56038dd9 100644 --- a/src/operator/contrib/ctc_loss-inl.h +++ b/src/operator/contrib/ctc_loss-inl.h @@ -51,9 +51,9 @@ namespace mxnet { namespace op { namespace ctc_loss { - enum CTCLossOpInputs { kData, kLabel }; - enum CTCLossOpOutputs { kOut, kGrad }; - enum CTCLossOpForwardResource { kTempSpace }; +enum CTCLossOpInputs { kData, kLabel }; +enum CTCLossOpOutputs { kOut, kGrad }; +enum CTCLossOpForwardResource { kTempSpace }; } template @@ -287,7 +287,7 @@ class CTCLossOp : public Operator { PackLabelByLength(labels, in_data[kLabelLength].get(s), &packed_labels, &label_lengths); } else { - exceed_cudnn_limit = LabelTensorToPackedVector(labels, param_.blank_label == 0?0:-1, + exceed_cudnn_limit = LabelTensorToPackedVector(labels, param_.blank_label == 0 ? 0 : -1, &packed_labels, &label_lengths); } @@ -333,15 +333,15 @@ class CTCLossOp : public Operator { Stream *s = ctx.get_stream(); Tensor data_grad = - in_grad[ctc_loss::kData].get(s); + in_grad[ctc_loss::kData].get(s); Tensor output_grad = - out_grad[ctc_loss::kOut].get(s); + out_grad[ctc_loss::kOut].get(s); Tensor data_grad_computed = - out_data[ctc_loss::kGrad].get(s); + out_data[ctc_loss::kGrad].get(s); Assign(data_grad, req[ctc_loss::kData], - mshadow::expr::broadcast<1>(output_grad, data_grad.shape_) * data_grad_computed); + mshadow::expr::broadcast<1>(output_grad, data_grad.shape_) * data_grad_computed); } private: @@ -462,7 +462,7 @@ class CTCLossOp : public Operator { compute_ctc_cost(data, costs.dptr_, grad.dptr_, packed_labels->data(), label_lengths->data(), data_lengths->data(), workspace.dptr_, req_grad, - param_.blank_label == 0?0:(alphabet_size-1)); + param_.blank_label == 0 ? 0 : (alphabet_size-1)); } }; // class CTCLossOp @@ -552,6 +552,7 @@ class CTCLossProp : public OperatorProperty { out_type->push_back(dtype); // grad output return true; } + OperatorProperty *Copy() const override { auto ptr = new CTCLossProp(); ptr->param_ = param_; From 1b3d1413a00be886183cd8ed538ef3850b934661 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Thu, 6 Sep 2018 10:46:49 -0700 Subject: [PATCH 07/15] Undo blank line --- src/operator/contrib/ctc_loss-inl.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/operator/contrib/ctc_loss-inl.h b/src/operator/contrib/ctc_loss-inl.h index 011b56038dd9..a3aeb0c1836a 100644 --- a/src/operator/contrib/ctc_loss-inl.h +++ b/src/operator/contrib/ctc_loss-inl.h @@ -50,6 +50,7 @@ namespace mxnet { namespace op { + namespace ctc_loss { enum CTCLossOpInputs { kData, kLabel }; enum CTCLossOpOutputs { kOut, kGrad }; @@ -228,6 +229,7 @@ class CTCLossOp : public Operator { explicit CTCLossOp(CTCLossParam p) { this->param_ = p; exceed_cudnn_limit = false; + #if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7 CUDNN_CALL(cudnnCreateCTCLossDescriptor(&ctc_desc_)); CUDNN_CALL(cudnnSetCTCLossDescriptor(ctc_desc_, CUDNN_DATA_FLOAT)); From 299b1e70e979c846c7af52fccbd07f32a525e8c3 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Thu, 6 Sep 2018 10:48:04 -0700 Subject: [PATCH 08/15] Undo blank line --- src/operator/contrib/ctc_loss-inl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operator/contrib/ctc_loss-inl.h b/src/operator/contrib/ctc_loss-inl.h index a3aeb0c1836a..c8a8b2637401 100644 --- a/src/operator/contrib/ctc_loss-inl.h +++ b/src/operator/contrib/ctc_loss-inl.h @@ -229,7 +229,6 @@ class CTCLossOp : public Operator { explicit CTCLossOp(CTCLossParam p) { this->param_ = p; exceed_cudnn_limit = false; - #if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7 CUDNN_CALL(cudnnCreateCTCLossDescriptor(&ctc_desc_)); CUDNN_CALL(cudnnSetCTCLossDescriptor(ctc_desc_, CUDNN_DATA_FLOAT)); @@ -349,6 +348,7 @@ class CTCLossOp : public Operator { private: CTCLossParam param_; bool exceed_cudnn_limit; + #if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7 cudnnDataType_t dtype_; cudnnCTCLossDescriptor_t ctc_desc_; From ec5cc3c7bc48e4956c4cd06a32d52fdce35fb8bf Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Thu, 6 Sep 2018 18:07:15 +0000 Subject: [PATCH 09/15] Add unit test for large number of classes --- tests/python/unittest/test_contrib_operator.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tests/python/unittest/test_contrib_operator.py b/tests/python/unittest/test_contrib_operator.py index 03764546b2c8..0b96d3734bcd 100644 --- a/tests/python/unittest/test_contrib_operator.py +++ b/tests/python/unittest/test_contrib_operator.py @@ -280,10 +280,26 @@ def test_integer_label(x, y): 5.528411, 5.765914, 6.740701, 5.2625823] assert np.isclose(loss.asnumpy(), expected_output).all() + def test_large_classes(): + batch_size = 1024 + seq_len = 35 + label_len = 10 + num_classes = 6000 + x = np.random.uniform(size=(seq_len, batch_size, num_classes)) + y = np.random.randint(0, num_classes, size=(batch_size, label_len)) + + data = mx.nd.array(x, ctx=mx.gpu(0)) + label = mx.nd.array(y, ctx=mx.gpu(0)) + loss = mx.nd.contrib.ctc_loss(data=data, label=label) + loss = mx.nd.make_loss(loss) + expected_output_sum = 282733.95318603516 + assert np.isclose(sum(loss.asnumpy(), expected_output_sum)) + test_cpu(x, y) + test_integer_label(x, y) if default_context().device_type == 'gpu': test_gpu(x, y) - test_integer_label(x, y) + test_large_classes() if __name__ == '__main__': From c8b7cd4e31e0f8d207b7e80fe5487718f88d7b2c Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Thu, 6 Sep 2018 12:07:37 -0700 Subject: [PATCH 10/15] move unit tests to test_operator.py per reviewer advice --- .../python/unittest/test_contrib_operator.py | 57 ------------------- tests/python/unittest/test_operator.py | 22 +++++++ 2 files changed, 22 insertions(+), 57 deletions(-) diff --git a/tests/python/unittest/test_contrib_operator.py b/tests/python/unittest/test_contrib_operator.py index 0b96d3734bcd..76efe305bceb 100644 --- a/tests/python/unittest/test_contrib_operator.py +++ b/tests/python/unittest/test_contrib_operator.py @@ -244,63 +244,6 @@ def assert_match(inputs, x, y, threshold, is_ascend=False): assert_match([[0.5, 0.6], [0.1, 0.2], [0.3, 0.4]], [1, -1, 0], [2, 0], 1e-12, False) assert_match([[0.5, 0.6], [0.1, 0.2], [0.3, 0.4]], [-1, 0, 1], [1, 2], 100, True) -def test_ctc_loss_op(): - batch_size = 10 - seq_len = 5 - label_len = 3 - num_classes = 6 - np.random.seed(1) - x = np.random.uniform(size=(seq_len, batch_size, num_classes)) - y = np.random.randint(0, num_classes, size=(batch_size, label_len)) - - def test_cpu(x, y): - data = mx.nd.array(x, ctx=mx.cpu(0)) - label = mx.nd.array(y, ctx=mx.cpu(0)) - loss = mx.nd.contrib.ctc_loss(data=data, label=label) - loss = mx.nd.make_loss(loss) - expected_output = [9.604521, 7.096151, 4.906869, 5.5237527, 5.9895644, 5.584548, - 5.528411, 5.765914, 6.740701, 5.2625823] - assert np.isclose(loss.asnumpy(), expected_output).all() - - def test_gpu(x, y): - data = mx.nd.array(x, ctx=mx.gpu(0)) - label = mx.nd.array(y, ctx=mx.gpu(0)) - loss = mx.nd.contrib.ctc_loss(data=data, label=label) - loss = mx.nd.make_loss(loss) - expected_output = [9.604521, 7.096151, 4.906869, 5.5237527, 5.9895644, 5.584548, - 5.528411, 5.765914, 6.740701, 5.2625823] - assert np.isclose(loss.asnumpy(), expected_output).all() - - def test_integer_label(x, y): - data = mx.nd.array(x, ctx=mx.cpu(0)) - label = mx.nd.array(y, ctx=mx.cpu(0), dtype=np.int32) - loss = mx.nd.contrib.ctc_loss(data=data, label=label) - loss = mx.nd.make_loss(loss) - expected_output = [9.604521, 7.096151, 4.906869, 5.5237527, 5.9895644, 5.584548, - 5.528411, 5.765914, 6.740701, 5.2625823] - assert np.isclose(loss.asnumpy(), expected_output).all() - - def test_large_classes(): - batch_size = 1024 - seq_len = 35 - label_len = 10 - num_classes = 6000 - x = np.random.uniform(size=(seq_len, batch_size, num_classes)) - y = np.random.randint(0, num_classes, size=(batch_size, label_len)) - - data = mx.nd.array(x, ctx=mx.gpu(0)) - label = mx.nd.array(y, ctx=mx.gpu(0)) - loss = mx.nd.contrib.ctc_loss(data=data, label=label) - loss = mx.nd.make_loss(loss) - expected_output_sum = 282733.95318603516 - assert np.isclose(sum(loss.asnumpy(), expected_output_sum)) - - test_cpu(x, y) - test_integer_label(x, y) - if default_context().device_type == 'gpu': - test_gpu(x, y) - test_large_classes() - if __name__ == '__main__': import nose diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 9842a69e18d4..b1ec6ed1d5a9 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -4516,6 +4516,28 @@ def test_ctc_loss(): true_loss = np.array([7.3557, 5.4091], dtype=np.float32) # from Torch check_ctc_loss(acts2, labels2, true_loss) + # Test 3: check use integer type as label + labels3 = np.array([[2, 3, 1], [2, 0, 0]], dtype=np.int32) + true_loss = np.array([7.3557, 5.4091], dtype=np.float32) # from Torch + check_ctc_loss(acts2, labels3, true_loss) + +@with_seed(1) +def test_ctc_loss_with_large_classes(): + if not (default_context() == mx.gpu()): + return + batch_size = 1024 + seq_len = 35 + label_len = 10 + num_classes = 6000 + x = np.random.uniform(size=(seq_len, batch_size, num_classes)) + y = np.random.randint(0, num_classes, size=(batch_size, label_len)) + + data = mx.nd.array(x, ctx=mx.gpu(0)) + label = mx.nd.array(y, ctx=mx.gpu(0)) + loss = mx.nd.contrib.ctc_loss(data=data, label=label) + loss = mx.nd.make_loss(loss) + expected_output_sum = 282733.95318603516 + assert np.isclose(sum(loss.asnumpy(), expected_output_sum)) @with_seed() def test_ctc_loss_grad(): From 973daca7a67a9d7bb991c0928fd42c62a020a3ee Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Thu, 6 Sep 2018 12:11:28 -0700 Subject: [PATCH 11/15] update unit test --- tests/python/unittest/test_operator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index b1ec6ed1d5a9..c0963b2414d2 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -4523,8 +4523,9 @@ def test_ctc_loss(): @with_seed(1) def test_ctc_loss_with_large_classes(): - if not (default_context() == mx.gpu()): + if not (default_context().device_type == 'gpu'): return + batch_size = 1024 seq_len = 35 label_len = 10 From 217069ee59da06fd02fe082949b1d293a76eff64 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Thu, 6 Sep 2018 19:28:41 +0000 Subject: [PATCH 12/15] update unit test --- tests/python/unittest/test_operator.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index c0963b2414d2..1789407d94c1 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -4523,9 +4523,7 @@ def test_ctc_loss(): @with_seed(1) def test_ctc_loss_with_large_classes(): - if not (default_context().device_type == 'gpu'): - return - + ctx = default_context() batch_size = 1024 seq_len = 35 label_len = 10 @@ -4533,12 +4531,12 @@ def test_ctc_loss_with_large_classes(): x = np.random.uniform(size=(seq_len, batch_size, num_classes)) y = np.random.randint(0, num_classes, size=(batch_size, label_len)) - data = mx.nd.array(x, ctx=mx.gpu(0)) - label = mx.nd.array(y, ctx=mx.gpu(0)) + data = mx.nd.array(x, ctx=ctx) + label = mx.nd.array(y, ctx=ctx) loss = mx.nd.contrib.ctc_loss(data=data, label=label) loss = mx.nd.make_loss(loss) expected_output_sum = 282733.95318603516 - assert np.isclose(sum(loss.asnumpy(), expected_output_sum)) + assert np.isclose(sum(loss.asnumpy()), expected_output_sum) @with_seed() def test_ctc_loss_grad(): From 4574c7c93d5e103727bd5039c20c415dda0f2bd3 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Thu, 6 Sep 2018 15:31:11 -0700 Subject: [PATCH 13/15] update unit test using random seed --- tests/python/unittest/test_operator.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 1789407d94c1..8b947cb85207 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -4521,22 +4521,20 @@ def test_ctc_loss(): true_loss = np.array([7.3557, 5.4091], dtype=np.float32) # from Torch check_ctc_loss(acts2, labels3, true_loss) -@with_seed(1) +@with_seed() def test_ctc_loss_with_large_classes(): ctx = default_context() - batch_size = 1024 - seq_len = 35 - label_len = 10 + m = 1024 + n = 35 + l = 10 num_classes = 6000 - x = np.random.uniform(size=(seq_len, batch_size, num_classes)) - y = np.random.randint(0, num_classes, size=(batch_size, label_len)) + x = np.random.uniform(size=(n, m, num_classes)) + y = np.random.randint(0, num_classes, size=(m, l)) data = mx.nd.array(x, ctx=ctx) label = mx.nd.array(y, ctx=ctx) loss = mx.nd.contrib.ctc_loss(data=data, label=label) - loss = mx.nd.make_loss(loss) - expected_output_sum = 282733.95318603516 - assert np.isclose(sum(loss.asnumpy()), expected_output_sum) + assert loss.asnumpy().shape[0] == m @with_seed() def test_ctc_loss_grad(): From fa61a0a2651925494bddda079f4fc6bef0d0cb6b Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Fri, 7 Sep 2018 17:13:19 -0700 Subject: [PATCH 14/15] Update unit test --- tests/python/unittest/test_operator.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 8b947cb85207..43dafb0b7b27 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -4524,17 +4524,22 @@ def test_ctc_loss(): @with_seed() def test_ctc_loss_with_large_classes(): ctx = default_context() - m = 1024 - n = 35 - l = 10 num_classes = 6000 - x = np.random.uniform(size=(n, m, num_classes)) - y = np.random.randint(0, num_classes, size=(m, l)) - - data = mx.nd.array(x, ctx=ctx) - label = mx.nd.array(y, ctx=ctx) - loss = mx.nd.contrib.ctc_loss(data=data, label=label) - assert loss.asnumpy().shape[0] == m + seq_len = 8 + batch_size = 2 + data = np.empty((num_classes, 0)) + for i in range(seq_len * batch_size) : + row = np.roll(np.arange(num_classes), i).reshape(num_classes, 1) + data = np.append(data, row/13, axis=1) + data = data.reshape(seq_len, batch_size, num_classes) + label = np.array([ + [100, 200, 300, 400, 500, 0, 0, 0], + [1000, 2000, 3000, 4000, 0, 5000, 0, 0]], dtype=np.int32) + nd_data = mx.nd.array(data) + nd_label = mx.nd.array(label) + loss = mx.nd.contrib.ctc_loss(data=nd_data, label=nd_label) + expected_loss = np.array([688.02826, 145.34462]) + assert_almost_equal(loss.asnumpy(), expected_loss) @with_seed() def test_ctc_loss_grad(): From 3fbb3f545e2d7d4d16c0bc62886ebd977204a4f6 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Tue, 11 Sep 2018 10:27:28 -0700 Subject: [PATCH 15/15] Fix unit test difference Python2 and Python3 --- tests/python/unittest/test_operator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 43dafb0b7b27..4ec4bf1b384f 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -4529,7 +4529,7 @@ def test_ctc_loss_with_large_classes(): batch_size = 2 data = np.empty((num_classes, 0)) for i in range(seq_len * batch_size) : - row = np.roll(np.arange(num_classes), i).reshape(num_classes, 1) + row = np.roll(np.arange(num_classes, dtype=np.float32), i).reshape(num_classes, 1) data = np.append(data, row/13, axis=1) data = data.reshape(seq_len, batch_size, num_classes) label = np.array([