-
Notifications
You must be signed in to change notification settings - Fork 6.8k
[MXNET-807] Support integer label type in ctc_loss operator #12468
Changes from 5 commits
f0a757b
7af7274
5e99e7e
eb30964
774c61b
d9dc6e6
59f48f2
1b3d141
299b1e7
ec5cc3c
59e5d7c
c8b7cd4
973daca
217069e
4574c7c
fa61a0a
3fbb3f5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,11 +50,10 @@ | |
|
||
namespace mxnet { | ||
namespace op { | ||
|
||
namespace ctc_loss { | ||
enum CTCLossOpInputs { kData, kLabel }; | ||
enum CTCLossOpOutputs { kOut, kGrad }; | ||
enum CTCLossOpForwardResource { kTempSpace }; | ||
enum CTCLossOpInputs { kData, kLabel }; | ||
enum CTCLossOpOutputs { kOut, kGrad }; | ||
enum CTCLossOpForwardResource { kTempSpace }; | ||
} | ||
|
||
template <typename T> | ||
|
@@ -256,66 +255,69 @@ class CTCLossOp : public Operator { | |
exceed_cudnn_limit = false; | ||
Stream<xpu> *s = ctx.get_stream<xpu>(); | ||
|
||
Tensor<xpu, 3, real_t> data = | ||
MSHADOW_TYPE_SWITCH(in_data[ctc_loss::kLabel].type_flag_, DType, { | ||
Tensor<xpu, 3, real_t> data = | ||
in_data[ctc_loss::kData].get<xpu, 3, real_t>(s); | ||
Tensor<xpu, 2, real_t> labels = | ||
in_data[ctc_loss::kLabel].get<xpu, 2, real_t>(s); | ||
Tensor<xpu, 2, DType> labels = | ||
in_data[ctc_loss::kLabel].get<xpu, 2, DType>(s); | ||
|
||
Tensor<xpu, 1, real_t> costs = | ||
Tensor<xpu, 1, real_t> costs = | ||
out_data[ctc_loss::kOut].get<xpu, 1, real_t>(s); | ||
Tensor<xpu, 3, real_t> grad = | ||
Tensor<xpu, 3, real_t> grad = | ||
out_data[ctc_loss::kGrad].get<xpu, 3, real_t>(s); | ||
|
||
int max_seq_len = data.size(0); | ||
int batch_size = data.size(1); | ||
int alphabet_size = data.size(2); | ||
|
||
// data_lengths | ||
std::vector<int> data_lengths(batch_size, max_seq_len); | ||
if (param_.use_data_lengths) { | ||
int kInputLength = 2; | ||
IndexTensorToVector(in_data[kInputLength].get<xpu, 1, real_t>(s), &data_lengths); | ||
} | ||
|
||
// label_lengths | ||
std::vector<int> packed_labels; | ||
std::vector<int> label_lengths(batch_size); | ||
|
||
if (param_.use_label_lengths) { | ||
int kLabelLength = 2+param_.use_data_lengths; | ||
exceed_cudnn_limit = PackLabelByLength(labels, in_data[kLabelLength].get<xpu, 1, real_t>(s), | ||
&packed_labels, &label_lengths); | ||
} else { | ||
exceed_cudnn_limit = LabelTensorToPackedVector(labels, param_.blank_label == 0?0:-1, | ||
&packed_labels, &label_lengths); | ||
} | ||
|
||
// CUDNN is disabled due to lack of support for input lengths | ||
/* #if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7 */ | ||
/* if (!exceed_cudnn_limit) { */ | ||
/* cudnn_forward(ctx, s, data, costs, grad, */ | ||
/* &data_lengths, &label_lengths, &packed_labels, */ | ||
/* max_seq_len, batch_size, alphabet_size, */ | ||
/* req[ctc_loss::kGrad] != mxnet::kNullOp); */ | ||
/* } else { */ | ||
/* baidu_forward(ctx, s, data, costs, grad, */ | ||
/* &data_lengths, &label_lengths, &packed_labels, */ | ||
/* batch_size, alphabet_size, req[ctc_loss::kGrad] != mxnet::kNullOp); */ | ||
/* } */ | ||
/* #else */ | ||
|
||
baidu_forward(ctx, s, data, costs, grad, | ||
&data_lengths, &label_lengths, &packed_labels, | ||
batch_size, alphabet_size, req[ctc_loss::kGrad] != mxnet::kNullOp); | ||
|
||
if (param_.use_data_lengths) { | ||
// baidu warp CTC implementation sometimes includes undefined gradients | ||
// for data outside of length mask. Setting to 0 to make it consistent | ||
// with CPU implementation. | ||
int kInputLength = 2; | ||
mxnet_op::SequenceMask(grad, in_data[kInputLength].get<xpu, 1, real_t>(s), | ||
static_cast<real_t>(0)); | ||
} | ||
int max_seq_len = data.size(0); | ||
int batch_size = data.size(1); | ||
int alphabet_size = data.size(2); | ||
|
||
// data_lengths | ||
std::vector<int> data_lengths(batch_size, max_seq_len); | ||
if (param_.use_data_lengths) { | ||
int kInputLength = 2; | ||
IndexTensorToVector(in_data[kInputLength].get<xpu, 1, real_t>(s), &data_lengths); | ||
} | ||
|
||
// label_lengths | ||
std::vector<int> packed_labels; | ||
std::vector<int> label_lengths(batch_size); | ||
|
||
if (param_.use_label_lengths) { | ||
int kLabelLength = 2 + param_.use_data_lengths; | ||
exceed_cudnn_limit = | ||
PackLabelByLength(labels, in_data[kLabelLength].get<xpu, 1, DType>(s), | ||
&packed_labels, &label_lengths); | ||
} else { | ||
exceed_cudnn_limit = LabelTensorToPackedVector(labels, param_.blank_label == 0?0:-1, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some formatting issues with whitespaces and indentation There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, what was exactly the issue? The make lint seems to pass. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, should be |
||
&packed_labels, &label_lengths); | ||
} | ||
|
||
// CUDNN is disabled due to lack of support for input lengths | ||
/* #if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7 */ | ||
/* if (!exceed_cudnn_limit) { */ | ||
/* cudnn_forward(ctx, s, data, costs, grad, */ | ||
/* &data_lengths, &label_lengths, &packed_labels, */ | ||
/* max_seq_len, batch_size, alphabet_size, */ | ||
/* req[ctc_loss::kGrad] != mxnet::kNullOp); */ | ||
/* } else { */ | ||
/* baidu_forward(ctx, s, data, costs, grad, */ | ||
/* &data_lengths, &label_lengths, &packed_labels, */ | ||
/* batch_size, alphabet_size, req[ctc_loss::kGrad] != mxnet::kNullOp);*/ | ||
/* } */ | ||
/* #else */ | ||
|
||
baidu_forward(ctx, s, data, costs, grad, | ||
&data_lengths, &label_lengths, &packed_labels, | ||
batch_size, alphabet_size, req[ctc_loss::kGrad] != mxnet::kNullOp); | ||
|
||
if (param_.use_data_lengths) { | ||
// baidu warp CTC implementation sometimes includes undefined gradients | ||
// for data outside of length mask. Setting to 0 to make it consistent | ||
// with CPU implementation. | ||
int kInputLength = 2; | ||
mxnet_op::SequenceMask(grad, in_data[kInputLength].get<xpu, 1, real_t>(s), | ||
static_cast<real_t>(0)); | ||
} | ||
}); | ||
} | ||
|
||
virtual void Backward(const OpContext &ctx, | ||
|
@@ -331,21 +333,20 @@ class CTCLossOp : public Operator { | |
Stream<xpu> *s = ctx.get_stream<xpu>(); | ||
|
||
Tensor<xpu, 3, real_t> data_grad = | ||
in_grad[ctc_loss::kData].get<xpu, 3, real_t>(s); | ||
in_grad[ctc_loss::kData].get<xpu, 3, real_t>(s); | ||
Tensor<xpu, 1, real_t> output_grad = | ||
out_grad[ctc_loss::kOut].get<xpu, 1, real_t>(s); | ||
out_grad[ctc_loss::kOut].get<xpu, 1, real_t>(s); | ||
|
||
Tensor<xpu, 3, real_t> data_grad_computed = | ||
out_data[ctc_loss::kGrad].get<xpu, 3, real_t>(s); | ||
out_data[ctc_loss::kGrad].get<xpu, 3, real_t>(s); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed. However, these 4 space indentation seems a violation of Google C++ style guide. Are we ignoring them in the lint? |
||
|
||
Assign(data_grad, req[ctc_loss::kData], | ||
mshadow::expr::broadcast<1>(output_grad, data_grad.shape_) * data_grad_computed); | ||
mshadow::expr::broadcast<1>(output_grad, data_grad.shape_) * data_grad_computed); | ||
} | ||
|
||
private: | ||
CTCLossParam param_; | ||
bool exceed_cudnn_limit; | ||
|
||
#if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 7 | ||
cudnnDataType_t dtype_; | ||
cudnnCTCLossDescriptor_t ctc_desc_; | ||
|
@@ -434,17 +435,17 @@ class CTCLossOp : public Operator { | |
} | ||
#endif // __CUDACC__ && CUDNN | ||
|
||
inline virtual void baidu_forward(const OpContext &ctx, | ||
mshadow::Stream<xpu>* s, | ||
mshadow::Tensor<xpu, 3, real_t> data, | ||
mshadow::Tensor<xpu, 1, real_t> costs, | ||
mshadow::Tensor<xpu, 3, real_t> grad, | ||
std::vector<int>* data_lengths, | ||
std::vector<int>* label_lengths, | ||
std::vector<int>* packed_labels, | ||
int batch_size, | ||
int alphabet_size, | ||
bool req_grad) { | ||
inline void baidu_forward(const OpContext &ctx, | ||
mshadow::Stream<xpu>* s, | ||
mshadow::Tensor<xpu, 3, real_t> data, | ||
mshadow::Tensor<xpu, 1, real_t> costs, | ||
mshadow::Tensor<xpu, 3, real_t> grad, | ||
std::vector<int>* data_lengths, | ||
std::vector<int>* label_lengths, | ||
std::vector<int>* packed_labels, | ||
int batch_size, | ||
int alphabet_size, | ||
bool req_grad) { | ||
using namespace mshadow; | ||
// allocate temporary workspace | ||
size_t size_bytes; | ||
|
@@ -534,11 +535,23 @@ class CTCLossProp : public OperatorProperty { | |
TShape oshape(1); | ||
oshape[0] = dshape[1]; // batch size | ||
out_shape->clear(); | ||
out_shape->push_back(oshape); | ||
out_shape->push_back(oshape); // forward output | ||
out_shape->push_back(dshape); // grad output | ||
return true; | ||
} | ||
|
||
bool InferType(std::vector<int> *in_type, | ||
std::vector<int> *out_type, | ||
std::vector<int> *aux_type) const override { | ||
CHECK_LE(in_type->size(), this->ListArguments().size()); | ||
int dtype = (*in_type)[ctc_loss::kData]; | ||
CHECK_NE(dtype, -1) << "Input data must have specified type"; | ||
|
||
out_type->clear(); | ||
out_type->push_back(dtype); // forward output | ||
out_type->push_back(dtype); // grad output | ||
return true; | ||
} | ||
OperatorProperty *Copy() const override { | ||
auto ptr = new CTCLossProp(); | ||
ptr->param_ = param_; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -244,6 +244,48 @@ def assert_match(inputs, x, y, threshold, is_ascend=False): | |
assert_match([[0.5, 0.6], [0.1, 0.2], [0.3, 0.4]], [1, -1, 0], [2, 0], 1e-12, False) | ||
assert_match([[0.5, 0.6], [0.1, 0.2], [0.3, 0.4]], [-1, 0, 1], [1, 2], 100, True) | ||
|
||
def test_ctc_loss_op(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CTC loss tests can be found at https://github.com/apache/incubator-mxnet/blob/master/tests/python/unittest/test_operator.py#L4500, and integration at https://github.com/apache/incubator-mxnet/blob/master/tests/python/unittest/test_loss.py#L186. Test cases are from hand calculated examples. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Feel free to add test cases for large labels there. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the reference. Moving the unit test there. |
||
batch_size = 10 | ||
seq_len = 5 | ||
label_len = 3 | ||
num_classes = 6 | ||
np.random.seed(1) | ||
x = np.random.uniform(size=(seq_len, batch_size, num_classes)) | ||
y = np.random.randint(0, num_classes, size=(batch_size, label_len)) | ||
|
||
def test_cpu(x, y): | ||
data = mx.nd.array(x, ctx=mx.cpu(0)) | ||
label = mx.nd.array(y, ctx=mx.cpu(0)) | ||
loss = mx.nd.contrib.ctc_loss(data=data, label=label) | ||
loss = mx.nd.make_loss(loss) | ||
expected_output = [9.604521, 7.096151, 4.906869, 5.5237527, 5.9895644, 5.584548, | ||
5.528411, 5.765914, 6.740701, 5.2625823] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This testing strategy (i.e. compare the output from random input and labels with fixed seed from recorded output) is not meaningful and does not guarantee anything. It merely increases the line coverage. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did not notice the unit test in test_operator.py. I have removed this one. |
||
assert np.isclose(loss.asnumpy(), expected_output).all() | ||
|
||
def test_gpu(x, y): | ||
data = mx.nd.array(x, ctx=mx.gpu(0)) | ||
label = mx.nd.array(y, ctx=mx.gpu(0)) | ||
loss = mx.nd.contrib.ctc_loss(data=data, label=label) | ||
loss = mx.nd.make_loss(loss) | ||
expected_output = [9.604521, 7.096151, 4.906869, 5.5237527, 5.9895644, 5.584548, | ||
5.528411, 5.765914, 6.740701, 5.2625823] | ||
assert np.isclose(loss.asnumpy(), expected_output).all() | ||
|
||
def test_integer_label(x, y): | ||
data = mx.nd.array(x, ctx=mx.cpu(0)) | ||
label = mx.nd.array(y, ctx=mx.cpu(0), dtype=np.int32) | ||
loss = mx.nd.contrib.ctc_loss(data=data, label=label) | ||
loss = mx.nd.make_loss(loss) | ||
expected_output = [9.604521, 7.096151, 4.906869, 5.5237527, 5.9895644, 5.584548, | ||
5.528411, 5.765914, 6.740701, 5.2625823] | ||
assert np.isclose(loss.asnumpy(), expected_output).all() | ||
|
||
test_cpu(x, y) | ||
if default_context().device_type == 'gpu': | ||
test_gpu(x, y) | ||
test_integer_label(x, y) | ||
|
||
|
||
if __name__ == '__main__': | ||
import nose | ||
nose.runmodule() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
which case does this part fall into? how were we able to check it in without breaking master build if it's either of the cases?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, overlooked these lines. I have removed them.