Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove copy of generator in Multinomial #1611

Merged
merged 2 commits into from
Aug 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 44 additions & 75 deletions onnxruntime/core/providers/cpu/generator/random.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,6 @@ void GenerateData(std::default_random_engine& generator, TDistribution distribut
static Status RandomNormalCompute(float mean, float scale, std::default_random_engine& generator, TensorProto::DataType dtype, Tensor& Y);
static Status RandomUniformCompute(float high, float low, std::default_random_engine& generator, TensorProto::DataType dtype, Tensor& Y);

// Leaving in case we need to change to this approach
//static Status CreateOutputTensorFromTensorValues(OpKernelContext* ctx, const Tensor& X,Tensor** Y);
static Status CreateOutputTensorFromTensorShape(OpKernelContext* ctx, const Tensor& X, Tensor** Y);
static TensorProto::DataType InferDataType(const Tensor& tensor);

Expand Down Expand Up @@ -168,53 +166,48 @@ static Status MultinomialCompute(OpKernelContext* ctx,
Eigen::array<int64_t, 2> Y_dims = {{batch_size, num_samples}};
Matrix<OutputType> output = Matrix<OutputType>(Y.template MutableData<OutputType>(), Y_dims);

// TODO (perf optimization) - the idea behind making this a lambda is so that we can parallelize across batches.
// When we do that this lamdba will act as one task given to a thread
auto DoWork = [ctx, num_samples, num_classes, &generator, &logits, &output](int64_t start_row,
int64_t limit_row) {
std::default_random_engine generator_copy = generator;
// BEGIN create temporary tensor
AllocatorPtr alloc;
ctx->GetTempSpaceAllocator(&alloc);
auto cdf_data = static_cast<double*>(alloc->Alloc(sizeof(double) * num_classes));
BufferUniquePtr cdf_buffer(cdf_data, BufferDeleter(alloc));
Eigen::array<int64_t, 1> cdf_dims = {{num_classes}};
auto cdf = EigenVector<double>(cdf_data, cdf_dims);
// END create temporary tensor

std::uniform_real_distribution<double> dist(0.0, 1.0); // TODO: should this be initialized per batch?
for (int64_t b = start_row; b < limit_row; ++b) {
const float* logits_row = &(logits(b, 0));
// Takes an along-class maximum (for numerical stability).
float maxx = std::numeric_limits<float>::lowest();
for (int64_t j = 0; j < num_classes; ++j) {
if (Eigen::numext::isfinite(logits_row[j])) {
maxx = std::max(maxx, logits_row[j]);
}
// BEGIN create temporary tensor
AllocatorPtr alloc;
ORT_RETURN_IF_ERROR(ctx->GetTempSpaceAllocator(&alloc));
auto cdf_data = static_cast<double*>(alloc->Alloc(sizeof(double) * num_classes));
BufferUniquePtr cdf_buffer(cdf_data, BufferDeleter(alloc));
Eigen::array<int64_t, 1> cdf_dims = {{num_classes}};
auto cdf = EigenVector<double>(cdf_data, cdf_dims);
// END create temporary tensor

std::uniform_real_distribution<double> dist(0.0, 1.0); // TODO: should this be initialized per batch?

for (int64_t b = 0; b < batch_size; ++b) {
const float* logits_row = &(logits(b, 0));
// Takes an along-class maximum (for numerical stability).
float maxx = std::numeric_limits<float>::lowest();
for (int64_t j = 0; j < num_classes; ++j) {
if (Eigen::numext::isfinite(logits_row[j])) {
maxx = std::max(maxx, logits_row[j]);
}
const auto max_logit = static_cast<double>(maxx);

// Precompute cumulative probability distribution across classes.
// Note: This isn't normalized.
cdf = (logits.chip<0>(b).cast<double>() - max_logit).exp();
double running_total = 0;
for (int64_t j = 0; j < num_classes; ++j) {
if (Eigen::numext::isfinite(logits_row[j])) {
running_total += cdf(j);
}
cdf(j) = running_total;
}
// Generate each sample.
const double* cdf_begin = cdf.data();
const double* cdf_end = cdf.data() + num_classes;
for (int64_t j = 0; j < num_samples; ++j) {
const double to_find = dist(generator_copy) * running_total;
auto found_iter = std::upper_bound(cdf_begin, cdf_end, to_find);
output(b, j) = static_cast<OutputType>(std::distance(cdf_begin, found_iter));
}
const auto max_logit = static_cast<double>(maxx);

// Precompute cumulative probability distribution across classes.
// Note: This isn't normalized.
cdf = (logits.chip<0>(b).cast<double>() - max_logit).exp();
double running_total = 0;
for (int64_t j = 0; j < num_classes; ++j) {
if (Eigen::numext::isfinite(logits_row[j])) {
running_total += cdf(j);
}
cdf(j) = running_total;
}
// Generate each sample.
const double* cdf_begin = cdf.data();
const double* cdf_end = cdf.data() + num_classes;
for (int64_t j = 0; j < num_samples; ++j) {
const double to_find = dist(generator) * running_total;
auto found_iter = std::upper_bound(cdf_begin, cdf_end, to_find);
output(b, j) = static_cast<OutputType>(std::distance(cdf_begin, found_iter));
}
};
DoWork(0, batch_size);
}

return Status::OK();
}

Expand Down Expand Up @@ -262,32 +255,6 @@ Status Multinomial::Compute(OpKernelContext* ctx) const {
return status;
}

/*
alternative interpretation of the spec is that the input tensor contains the dimensions as ints.
Keeping this temporarily in case we go back to that.

// read shape information from input tensor and create output tensor with it
static Status CreateOutputTensorFromTensorValues(OpKernelContext* ctx, const Tensor& X, Tensor** Y) {
const TensorShape& shape = X.Shape();
auto size = shape.Size();
auto num_dims = shape.NumDimensions();

if (num_dims != 1) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Expected 1 dimension tensor with shape information. Dimensions=", num_dims);
}

std::vector<int64_t> dims;
dims.reserve(shape.Size());

auto data = gsl::make_span(tensor.template Data<int64_t>(), shape.Size());
dims.insert(dims.cbegin(), data.cbegin(), data.cend());

*Y = ctx->Output(0, TensorShape(dims));

return Status::OK();
}
*/

// create output tensor using shape of input tensor
static Status CreateOutputTensorFromTensorShape(OpKernelContext* ctx, const Tensor& X, Tensor** Y) {
const TensorShape& shape = X.Shape();
Expand Down Expand Up @@ -363,9 +330,11 @@ static Status RandomUniformCompute(float low, float high,

template <typename T, typename TDistribution>
void GenerateData(std::default_random_engine& generator, TDistribution distribution, Tensor& tensor) {
auto out = gsl::make_span(tensor.template MutableData<T>(), tensor.Shape().Size());

std::for_each(out.begin(), out.end(), [&generator, &distribution](T& value) { value = distribution(generator); });
skottmckay marked this conversation as resolved.
Show resolved Hide resolved
T* out = tensor.MutableData<T>();
for (int64_t i = 0, end = tensor.Shape().Size(); i < end; ++i) {
*out = distribution(generator);
++out;
}
}

} // namespace onnxruntime
51 changes: 33 additions & 18 deletions onnxruntime/test/providers/cpu/generator/random_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ TEST(Random, MultinomialGoodCase) {
const std::vector<int64_t> output_dims{batch_size, num_samples};
#ifdef _WIN32
const std::vector<int64_t> expected_output{2, 0, 0, 2, 2, 2, 0, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 0};
#elif defined(__MACH__) || defined (__ANDROID__)
#elif defined(__MACH__) || defined(__ANDROID__)
const std::vector<int64_t> expected_output{1, 1, 2, 2, 0, 2, 2, 2, 0, 2, 1, 1, 2, 0, 2, 2, 0, 2, 1, 1};
#else
const std::vector<int64_t> expected_output{2, 0, 0, 1, 0, 1, 2, 0, 1, 0, 0, 1, 1, 0, 1, 0, 2, 0, 2, 0};
Expand All @@ -257,31 +257,46 @@ TEST(Random, MultinomialGoodCase) {
}

TEST(Random, MultinomialDefaultDType) {
OpTester test("Multinomial");
auto run_test = [](int num_run_calls, const std::vector<int32_t>& expected_output) {
OpTester test("Multinomial");
const int64_t num_samples = 10;
const int batch_size = 2;
const float seed = 1618.f;

const std::vector<int64_t> input_dims{2, 3};
std::vector<float> input(TensorShape(input_dims).Size());
std::fill(input.begin(), input.end(), -10.f);
test.AddInput<float>("X", input_dims, input);

test.AddAttribute("sample_size", num_samples);
test.AddAttribute("seed", seed);

const int64_t num_samples = 10;
const int batch_size = 2;
const float seed = 1618.f;
const std::vector<int64_t> output_dims{batch_size, num_samples};
test.AddOutput<int32_t>("Y", output_dims, expected_output);

const std::vector<int64_t> input_dims{2, 3};
std::vector<float> input(TensorShape(input_dims).Size());
std::fill(input.begin(), input.end(), -10.f);
test.AddInput<float>("X", input_dims, input);
// test.Run() re-loads the model each time, so we need to do multiple calls to InferenceSession::Run inside of it
// to test that the second call to Compute produces different data
test.SetNumRunCalls(num_run_calls);

test.AddAttribute("sample_size", num_samples);
test.AddAttribute("seed", seed);
test.Run();
};

const std::vector<int64_t> output_dims{batch_size, num_samples};
#ifdef _WIN32
const std::vector<int32_t> expected_output{2, 0, 0, 2, 2, 2, 0, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 0};
#elif defined(__MACH__) || defined (__ANDROID__)
const std::vector<int32_t> expected_output{1, 1, 2, 2, 0, 2, 2, 2, 0, 2, 1, 1, 2, 0, 2, 2, 0, 2, 1, 1};
const std::vector<int32_t> expected_output_1{2, 0, 0, 2, 2, 2, 0, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 0};
const std::vector<int32_t> expected_output_2{0, 0, 1, 0, 2, 2, 2, 0, 2, 1, 2, 1, 0, 2, 0, 2, 2, 1, 2, 1};
#elif defined(__MACH__) || defined(__ANDROID__)
const std::vector<int32_t> expected_output_1{1, 1, 2, 2, 0, 2, 2, 2, 0, 2, 1, 1, 2, 0, 2, 2, 0, 2, 1, 1};
const std::vector<int32_t> expected_output_2{1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 2, 0, 1, 1, 0, 2, 2, 2, 1};
#else
const std::vector<int32_t> expected_output{2, 0, 0, 1, 0, 1, 2, 0, 1, 0, 0, 1, 1, 0, 1, 0, 2, 0, 2, 0};
const std::vector<int32_t> expected_output_1{2, 0, 0, 1, 0, 1, 2, 0, 1, 0, 0, 1, 1, 0, 1, 0, 2, 0, 2, 0};
const std::vector<int32_t> expected_output_2{2, 2, 1, 1, 0, 2, 2, 1, 1, 2, 0, 0, 0, 2, 0, 1, 1, 1, 0, 0};
#endif
test.AddOutput<int32_t>("Y", output_dims, expected_output);

test.Run();
// Test output from a single call to Multinomial::Compute
run_test(1, expected_output_1);

// Test output from 2 calls to Multinomial::Compute
run_test(2, expected_output_2);
}

TEST(Random, MultinomialInvalidDtype) {
Expand Down
62 changes: 36 additions & 26 deletions onnxruntime/test/providers/provider_test_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ void Check(const OpTester::Data& expected_data, const Tensor& output_tensor, con
auto size = output_tensor.Shape().Size();

for (int i = 0; i < size; ++i) {
EXPECT_EQ(expected[i], output[i]) << "provider_type: " << provider_type;
EXPECT_EQ(expected[i], output[i]) << "i:" << i << ", provider_type: " << provider_type;
}
}

Expand All @@ -51,19 +51,21 @@ void Check<double>(const OpTester::Data& expected_data, const Tensor& output_ten

for (int i = 0; i < size; ++i) {
if (std::isinf(expected[i])) { // Test infinity for equality
EXPECT_EQ(expected[i], output[i]);
EXPECT_EQ(expected[i], output[i]) << "i:" << i;
} else if (std::isnan(expected[i])) {
EXPECT_TRUE(std::isnan(output[i])) << "Expected output " << i << " to be NaN";
} else {
if (!has_abs_err && !has_rel_err) {
// the default for existing tests
EXPECT_NEAR(expected[i], output[i], threshold) << "provider_type: " << provider_type;
EXPECT_NEAR(expected[i], output[i], threshold) << "i:" << i << ", provider_type: " << provider_type;
} else {
if (has_abs_err) {
EXPECT_NEAR(expected[i], output[i], expected_data.absolute_error_.value()) << "provider_type: " << provider_type;
EXPECT_NEAR(expected[i], output[i], expected_data.absolute_error_.value())
<< "i:" << i << ", provider_type: " << provider_type;
}
if (has_rel_err) {
EXPECT_NEAR(expected[i], output[i], expected_data.relative_error_.value() * std::abs(expected[i])) << "provider_type: " << provider_type;
EXPECT_NEAR(expected[i], output[i], expected_data.relative_error_.value() * std::abs(expected[i]))
<< "i:" << i << ", provider_type: " << provider_type;
}
}
}
Expand All @@ -87,19 +89,21 @@ void Check<float>(const OpTester::Data& expected_data, const Tensor& output_tens

for (int i = 0; i < size; ++i) {
if (std::isinf(expected[i])) { // Test infinity for equality
EXPECT_EQ(expected[i], output[i]);
EXPECT_EQ(expected[i], output[i]) << "i:" << i;
} else if (std::isnan(expected[i])) {
EXPECT_TRUE(std::isnan(output[i])) << "Expected output " << i << " to be NaN";
} else {
if (!has_abs_err && !has_rel_err) {
// the default for existing tests
EXPECT_NEAR(expected[i], output[i], threshold) << "provider_type: " << provider_type;
EXPECT_NEAR(expected[i], output[i], threshold) << "i:" << i << ", provider_type: " << provider_type;
} else {
if (has_abs_err) {
EXPECT_NEAR(expected[i], output[i], expected_data.absolute_error_.value()) << "provider_type: " << provider_type;
EXPECT_NEAR(expected[i], output[i], expected_data.absolute_error_.value())
<< "i:" << i << ", provider_type: " << provider_type;
}
if (has_rel_err) {
EXPECT_NEAR(expected[i], output[i], expected_data.relative_error_.value() * std::abs(expected[i])) << "provider_type: " << provider_type;
EXPECT_NEAR(expected[i], output[i], expected_data.relative_error_.value() * std::abs(expected[i]))
<< "i:" << i << ", provider_type: " << provider_type;
}
}
}
Expand All @@ -121,10 +125,10 @@ void Check<MLFloat16>(const OpTester::Data& expected_data, const Tensor& output_
float threshold = 0.001f;
for (int i = 0; i < size; ++i) {
if (std::isinf(f_expected[i])) // Test infinity for equality
EXPECT_EQ(f_expected[i], f_output[i]);
EXPECT_EQ(f_expected[i], f_output[i]) << "i:" << i;
else {
// the default for existing tests
EXPECT_NEAR(f_expected[i], f_output[i], threshold) << "provider_type: " << provider_type;
EXPECT_NEAR(f_expected[i], f_output[i], threshold) << "i:" << i << ", provider_type: " << provider_type;
}
}
}
Expand Down Expand Up @@ -342,23 +346,27 @@ void OpTester::ExecuteModel(Model& model, InferenceSession& session_object, Expe
default_run_options.run_log_verbosity_level = 1;

std::vector<OrtValue> fetches;
status = session_object.Run(run_options ? *run_options : default_run_options, feeds, output_names, &fetches);
if (status.IsOK()) {
EXPECT_TRUE(expect_result == ExpectResult::kExpectSuccess) << "Expected failure but Run was successful";
if (expect_result == ExpectResult::kExpectFailure) {
return;
}
} else {
if (expect_result == ExpectResult::kExpectFailure) {
// Disable expected_failure_string checks for MKL-DNN and nGraph EP's
if (provider_type != kMklDnnExecutionProvider && provider_type != kNGraphExecutionProvider) {
EXPECT_THAT(status.ErrorMessage(), testing::HasSubstr(expected_failure_string));
for (int i = 0; i < num_run_calls_; ++i) {
fetches.clear();
status = session_object.Run(run_options ? *run_options : default_run_options, feeds, output_names, &fetches);

if (status.IsOK()) {
EXPECT_TRUE(expect_result == ExpectResult::kExpectSuccess) << "Expected failure but Run was successful";
if (expect_result == ExpectResult::kExpectFailure) {
return;
}
} else {
LOGS_DEFAULT(ERROR) << "Run failed with status: " << status.ErrorMessage();
EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
if (expect_result == ExpectResult::kExpectFailure) {
// Disable expected_failure_string checks for MKL-DNN and nGraph EP's
if (provider_type != kMklDnnExecutionProvider && provider_type != kNGraphExecutionProvider) {
EXPECT_THAT(status.ErrorMessage(), testing::HasSubstr(expected_failure_string));
}
} else {
LOGS_DEFAULT(ERROR) << "Run failed with status: " << status.ErrorMessage();
EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
}
return;
}
return;
}

// Verify the outputs
Expand Down Expand Up @@ -515,7 +523,9 @@ void OpTester::Run(ExpectResult expect_result,

//if node is not registered for the provider, skip
node.SetExecutionProviderType(provider_type);
if (provider_type == onnxruntime::kNGraphExecutionProvider || provider_type == onnxruntime::kTensorrtExecutionProvider || provider_type == onnxruntime::kOpenVINOExecutionProvider)
if (provider_type == onnxruntime::kNGraphExecutionProvider ||
provider_type == onnxruntime::kTensorrtExecutionProvider ||
provider_type == onnxruntime::kOpenVINOExecutionProvider)
continue;
auto reg = execution_provider->GetKernelRegistry();
const KernelCreateInfo* kci = reg->TryFindKernel(node, execution_provider->Type());
Expand Down
8 changes: 8 additions & 0 deletions onnxruntime/test/providers/provider_test_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,13 @@ class OpTester {
void SetOutputAbsErr(const char* name, float v);
void SetOutputRelErr(const char* name, float v);

// Number of times to call InferenceSession::Run. The same feeds are used each time.
// e.g. used to verify the generator ops behave as expected
void SetNumRunCalls(int n) {
ORT_ENFORCE(n > 0);
num_run_calls_ = n;
}

template <typename T>
void AddAttribute(std::string name, T value) {
// Generate a the proper AddAttribute call for later
Expand Down Expand Up @@ -318,6 +325,7 @@ class OpTester {
int opset_version_;
bool add_shape_to_tensor_data_ = true;
int add_symbolic_dim_to_tensor_data_ = -1;
int num_run_calls_ = 1;
std::vector<Data> input_data_;
std::vector<Data> output_data_;
std::vector<size_t> initializer_index_;
Expand Down