microsoft · skottmckay · Aug 14, 2019 · Aug 13, 2019 · Aug 13, 2019
diff --git a/onnxruntime/core/providers/cpu/generator/random.cc b/onnxruntime/core/providers/cpu/generator/random.cc
@@ -76,8 +76,6 @@ void GenerateData(std::default_random_engine& generator, TDistribution distribut
 static Status RandomNormalCompute(float mean, float scale, std::default_random_engine& generator, TensorProto::DataType dtype, Tensor& Y);
 static Status RandomUniformCompute(float high, float low, std::default_random_engine& generator, TensorProto::DataType dtype, Tensor& Y);
 
-// Leaving in case we need to change to this approach
-//static Status CreateOutputTensorFromTensorValues(OpKernelContext* ctx, const Tensor& X,Tensor** Y);
 static Status CreateOutputTensorFromTensorShape(OpKernelContext* ctx, const Tensor& X, Tensor** Y);
 static TensorProto::DataType InferDataType(const Tensor& tensor);
 
@@ -168,53 +166,48 @@ static Status MultinomialCompute(OpKernelContext* ctx,
  Eigen::array<int64_t, 2> Y_dims = {{batch_size, num_samples}};
  Matrix<OutputType> output = Matrix<OutputType>(Y.template MutableData<OutputType>(), Y_dims);
 
- // TODO (perf optimization) - the idea behind making this a lambda is so that we can parallelize across batches.
- // When we do that this lamdba will act as one task given to a thread
- auto DoWork = [ctx, num_samples, num_classes, &generator, &logits, &output](int64_t start_row,
- int64_t limit_row) {
- std::default_random_engine generator_copy = generator;
- // BEGIN create temporary tensor
- AllocatorPtr alloc;
- ctx->GetTempSpaceAllocator(&alloc);
- auto cdf_data = static_cast<double*>(alloc->Alloc(sizeof(double) * num_classes));
- BufferUniquePtr cdf_buffer(cdf_data, BufferDeleter(alloc));
- Eigen::array<int64_t, 1> cdf_dims = {{num_classes}};
- auto cdf = EigenVector<double>(cdf_data, cdf_dims);
- // END create temporary tensor
-
- std::uniform_real_distribution<double> dist(0.0, 1.0); // TODO: should this be initialized per batch?
- for (int64_t b = start_row; b < limit_row; ++b) {
- const float* logits_row = &(logits(b, 0));
- // Takes an along-class maximum (for numerical stability).
- float maxx = std::numeric_limits<float>::lowest();
- for (int64_t j = 0; j < num_classes; ++j) {
- if (Eigen::numext::isfinite(logits_row[j])) {
- maxx = std::max(maxx, logits_row[j]);
- }
+ // BEGIN create temporary tensor
+ AllocatorPtr alloc;
+ ORT_RETURN_IF_ERROR(ctx->GetTempSpaceAllocator(&alloc));
+ auto cdf_data = static_cast<double*>(alloc->Alloc(sizeof(double) * num_classes));
+ BufferUniquePtr cdf_buffer(cdf_data, BufferDeleter(alloc));
+ Eigen::array<int64_t, 1> cdf_dims = {{num_classes}};
+ auto cdf = EigenVector<double>(cdf_data, cdf_dims);
+ // END create temporary tensor
+
+ std::uniform_real_distribution<double> dist(0.0, 1.0); // TODO: should this be initialized per batch?
+
+ for (int64_t b = 0; b < batch_size; ++b) {
+ const float* logits_row = &(logits(b, 0));
+ // Takes an along-class maximum (for numerical stability).
+ float maxx = std::numeric_limits<float>::lowest();
+ for (int64_t j = 0; j < num_classes; ++j) {
+ if (Eigen::numext::isfinite(logits_row[j])) {
+ maxx = std::max(maxx, logits_row[j]);
  }
- const auto max_logit = static_cast<double>(maxx);
-
- // Precompute cumulative probability distribution across classes.
- // Note: This isn't normalized.
- cdf = (logits.chip<0>(b).cast<double>() - max_logit).exp();
- double running_total = 0;
- for (int64_t j = 0; j < num_classes; ++j) {
- if (Eigen::numext::isfinite(logits_row[j])) {
- running_total += cdf(j);
- }
- cdf(j) = running_total;
- }
- // Generate each sample.
- const double* cdf_begin = cdf.data();
- const double* cdf_end = cdf.data() + num_classes;
- for (int64_t j = 0; j < num_samples; ++j) {
- const double to_find = dist(generator_copy) * running_total;
- auto found_iter = std::upper_bound(cdf_begin, cdf_end, to_find);
- output(b, j) = static_cast<OutputType>(std::distance(cdf_begin, found_iter));
+ }
+ const auto max_logit = static_cast<double>(maxx);
+
+ // Precompute cumulative probability distribution across classes.
+ // Note: This isn't normalized.
+ cdf = (logits.chip<0>(b).cast<double>() - max_logit).exp();
+ double running_total = 0;
+ for (int64_t j = 0; j < num_classes; ++j) {
+ if (Eigen::numext::isfinite(logits_row[j])) {
+ running_total += cdf(j);
  }
+ cdf(j) = running_total;
+ }
+ // Generate each sample.
+ const double* cdf_begin = cdf.data();
+ const double* cdf_end = cdf.data() + num_classes;
+ for (int64_t j = 0; j < num_samples; ++j) {
+ const double to_find = dist(generator) * running_total;
+ auto found_iter = std::upper_bound(cdf_begin, cdf_end, to_find);
+ output(b, j) = static_cast<OutputType>(std::distance(cdf_begin, found_iter));
  }
- };
- DoWork(0, batch_size);
+ }
+
  return Status::OK();
 }
 
@@ -262,32 +255,6 @@ Status Multinomial::Compute(OpKernelContext* ctx) const {
  return status;
 }
 
-/*
-alternative interpretation of the spec is that the input tensor contains the dimensions as ints.
-Keeping this temporarily in case we go back to that.
-
-// read shape information from input tensor and create output tensor with it
-static Status CreateOutputTensorFromTensorValues(OpKernelContext* ctx, const Tensor& X, Tensor** Y) {
- const TensorShape& shape = X.Shape();
- auto size = shape.Size();
- auto num_dims = shape.NumDimensions();
-
- if (num_dims != 1) {
- return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Expected 1 dimension tensor with shape information. Dimensions=", num_dims);
- }
-
- std::vector<int64_t> dims;
- dims.reserve(shape.Size());
-
- auto data = gsl::make_span(tensor.template Data<int64_t>(), shape.Size());
- dims.insert(dims.cbegin(), data.cbegin(), data.cend());
-
- *Y = ctx->Output(0, TensorShape(dims));
-
- return Status::OK();
-}
-*/
-
 // create output tensor using shape of input tensor
 static Status CreateOutputTensorFromTensorShape(OpKernelContext* ctx, const Tensor& X, Tensor** Y) {
  const TensorShape& shape = X.Shape();
@@ -363,9 +330,11 @@ static Status RandomUniformCompute(float low, float high,
 
 template <typename T, typename TDistribution>
 void GenerateData(std::default_random_engine& generator, TDistribution distribution, Tensor& tensor) {
- auto out = gsl::make_span(tensor.template MutableData<T>(), tensor.Shape().Size());
-
- std::for_each(out.begin(), out.end(), [&generator, &distribution](T& value) { value = distribution(generator); });
+ T* out = tensor.MutableData<T>();
+ for (int64_t i = 0, end = tensor.Shape().Size(); i < end; ++i) {
+ *out = distribution(generator);
+ ++out;
+ }
 }
 
 } // namespace onnxruntime
diff --git a/onnxruntime/test/providers/cpu/generator/random_test.cc b/onnxruntime/test/providers/cpu/generator/random_test.cc
@@ -246,7 +246,7 @@ TEST(Random, MultinomialGoodCase) {
  const std::vector<int64_t> output_dims{batch_size, num_samples};
 #ifdef _WIN32
  const std::vector<int64_t> expected_output{2, 0, 0, 2, 2, 2, 0, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 0};
-#elif defined(__MACH__) || defined (__ANDROID__)
+#elif defined(__MACH__) || defined(__ANDROID__)
  const std::vector<int64_t> expected_output{1, 1, 2, 2, 0, 2, 2, 2, 0, 2, 1, 1, 2, 0, 2, 2, 0, 2, 1, 1};
 #else
  const std::vector<int64_t> expected_output{2, 0, 0, 1, 0, 1, 2, 0, 1, 0, 0, 1, 1, 0, 1, 0, 2, 0, 2, 0};
@@ -257,31 +257,46 @@ TEST(Random, MultinomialGoodCase) {
 }
 
 TEST(Random, MultinomialDefaultDType) {
- OpTester test("Multinomial");
+ auto run_test = [](int num_run_calls, const std::vector<int32_t>& expected_output) {
+ OpTester test("Multinomial");
+ const int64_t num_samples = 10;
+ const int batch_size = 2;
+ const float seed = 1618.f;
+
+ const std::vector<int64_t> input_dims{2, 3};
+ std::vector<float> input(TensorShape(input_dims).Size());
+ std::fill(input.begin(), input.end(), -10.f);
+ test.AddInput<float>("X", input_dims, input);
+
+ test.AddAttribute("sample_size", num_samples);
+ test.AddAttribute("seed", seed);
 
- const int64_t num_samples = 10;
- const int batch_size = 2;
- const float seed = 1618.f;
+ const std::vector<int64_t> output_dims{batch_size, num_samples};
+ test.AddOutput<int32_t>("Y", output_dims, expected_output);
 
- const std::vector<int64_t> input_dims{2, 3};
- std::vector<float> input(TensorShape(input_dims).Size());
- std::fill(input.begin(), input.end(), -10.f);
- test.AddInput<float>("X", input_dims, input);
+ // test.Run() re-loads the model each time, so we need to do multiple calls to InferenceSession::Run inside of it
+ // to test that the second call to Compute produces different data
+ test.SetNumRunCalls(num_run_calls);
 
- test.AddAttribute("sample_size", num_samples);
- test.AddAttribute("seed", seed);
+  test.Run();
+ };
 
- const std::vector<int64_t> output_dims{batch_size, num_samples};
 #ifdef _WIN32
- const std::vector<int32_t> expected_output{2, 0, 0, 2, 2, 2, 0, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 0};
-#elif defined(__MACH__) || defined (__ANDROID__)
- const std::vector<int32_t> expected_output{1, 1, 2, 2, 0, 2, 2, 2, 0, 2, 1, 1, 2, 0, 2, 2, 0, 2, 1, 1};
+ const std::vector<int32_t> expected_output_1{2, 0, 0, 2, 2, 2, 0, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 0};
+ const std::vector<int32_t> expected_output_2{0, 0, 1, 0, 2, 2, 2, 0, 2, 1, 2, 1, 0, 2, 0, 2, 2, 1, 2, 1};
+#elif defined(__MACH__) || defined(__ANDROID__)
+ const std::vector<int32_t> expected_output_1{1, 1, 2, 2, 0, 2, 2, 2, 0, 2, 1, 1, 2, 0, 2, 2, 0, 2, 1, 1};
+ const std::vector<int32_t> expected_output_2{1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 2, 0, 1, 1, 0, 2, 2, 2, 1};
 #else
- const std::vector<int32_t> expected_output{2, 0, 0, 1, 0, 1, 2, 0, 1, 0, 0, 1, 1, 0, 1, 0, 2, 0, 2, 0};
+ const std::vector<int32_t> expected_output_1{2, 0, 0, 1, 0, 1, 2, 0, 1, 0, 0, 1, 1, 0, 1, 0, 2, 0, 2, 0};
+ const std::vector<int32_t> expected_output_2{2, 2, 1, 1, 0, 2, 2, 1, 1, 2, 0, 0, 0, 2, 0, 1, 1, 1, 0, 0};
 #endif
- test.AddOutput<int32_t>("Y", output_dims, expected_output);
 
- test.Run();
+ // Test output from a single call to Multinomial::Compute
+ run_test(1, expected_output_1);
+
+ // Test output from 2 calls to Multinomial::Compute
+ run_test(2, expected_output_2);
 }
 
 TEST(Random, MultinomialInvalidDtype) {

diff --git a/onnxruntime/test/providers/provider_test_utils.cc b/onnxruntime/test/providers/provider_test_utils.cc
@@ -30,7 +30,7 @@ void Check(const OpTester::Data& expected_data, const Tensor& output_tensor, con
  auto size = output_tensor.Shape().Size();
 
  for (int i = 0; i < size; ++i) {
- EXPECT_EQ(expected[i], output[i]) << "provider_type: " << provider_type;
+ EXPECT_EQ(expected[i], output[i]) << "i:" << i << ", provider_type: " << provider_type;
  }
 }
 
@@ -51,19 +51,21 @@ void Check<double>(const OpTester::Data& expected_data, const Tensor& output_ten
 
  for (int i = 0; i < size; ++i) {
  if (std::isinf(expected[i])) { // Test infinity for equality
- EXPECT_EQ(expected[i], output[i]);
+ EXPECT_EQ(expected[i], output[i]) << "i:" << i;
  } else if (std::isnan(expected[i])) {
  EXPECT_TRUE(std::isnan(output[i])) << "Expected output " << i << " to be NaN";
  } else {
  if (!has_abs_err && !has_rel_err) {
  // the default for existing tests
- EXPECT_NEAR(expected[i], output[i], threshold) << "provider_type: " << provider_type;
+ EXPECT_NEAR(expected[i], output[i], threshold) << "i:" << i << ", provider_type: " << provider_type;
  } else {
  if (has_abs_err) {
- EXPECT_NEAR(expected[i], output[i], expected_data.absolute_error_.value()) << "provider_type: " << provider_type;
+ EXPECT_NEAR(expected[i], output[i], expected_data.absolute_error_.value())
+ << "i:" << i << ", provider_type: " << provider_type;
  }
  if (has_rel_err) {
- EXPECT_NEAR(expected[i], output[i], expected_data.relative_error_.value() * std::abs(expected[i])) << "provider_type: " << provider_type;
+ EXPECT_NEAR(expected[i], output[i], expected_data.relative_error_.value() * std::abs(expected[i]))
+ << "i:" << i << ", provider_type: " << provider_type;
  }
  }
  }
@@ -87,19 +89,21 @@ void Check<float>(const OpTester::Data& expected_data, const Tensor& output_tens
 
  for (int i = 0; i < size; ++i) {
  if (std::isinf(expected[i])) { // Test infinity for equality
- EXPECT_EQ(expected[i], output[i]);
+ EXPECT_EQ(expected[i], output[i]) << "i:" << i;
  } else if (std::isnan(expected[i])) {
  EXPECT_TRUE(std::isnan(output[i])) << "Expected output " << i << " to be NaN";
  } else {
  if (!has_abs_err && !has_rel_err) {
  // the default for existing tests
- EXPECT_NEAR(expected[i], output[i], threshold) << "provider_type: " << provider_type;
+ EXPECT_NEAR(expected[i], output[i], threshold) << "i:" << i << ", provider_type: " << provider_type;
  } else {
  if (has_abs_err) {
- EXPECT_NEAR(expected[i], output[i], expected_data.absolute_error_.value()) << "provider_type: " << provider_type;
+ EXPECT_NEAR(expected[i], output[i], expected_data.absolute_error_.value())
+ << "i:" << i << ", provider_type: " << provider_type;
  }
  if (has_rel_err) {
- EXPECT_NEAR(expected[i], output[i], expected_data.relative_error_.value() * std::abs(expected[i])) << "provider_type: " << provider_type;
+ EXPECT_NEAR(expected[i], output[i], expected_data.relative_error_.value() * std::abs(expected[i]))
+ << "i:" << i << ", provider_type: " << provider_type;
  }
  }
  }
@@ -121,10 +125,10 @@ void Check<MLFloat16>(const OpTester::Data& expected_data, const Tensor& output_
  float threshold = 0.001f;
  for (int i = 0; i < size; ++i) {
  if (std::isinf(f_expected[i])) // Test infinity for equality
- EXPECT_EQ(f_expected[i], f_output[i]);
+ EXPECT_EQ(f_expected[i], f_output[i]) << "i:" << i;
  else {
  // the default for existing tests
- EXPECT_NEAR(f_expected[i], f_output[i], threshold) << "provider_type: " << provider_type;
+ EXPECT_NEAR(f_expected[i], f_output[i], threshold) << "i:" << i << ", provider_type: " << provider_type;
  }
  }
 }
@@ -342,23 +346,27 @@ void OpTester::ExecuteModel(Model& model, InferenceSession& session_object, Expe
  default_run_options.run_log_verbosity_level = 1;
 
  std::vector<OrtValue> fetches;
- status = session_object.Run(run_options ? *run_options : default_run_options, feeds, output_names, &fetches);
- if (status.IsOK()) {
- EXPECT_TRUE(expect_result == ExpectResult::kExpectSuccess) << "Expected failure but Run was successful";
- if (expect_result == ExpectResult::kExpectFailure) {
- return;
- }
- } else {
- if (expect_result == ExpectResult::kExpectFailure) {
- // Disable expected_failure_string checks for MKL-DNN and nGraph EP's
- if (provider_type != kMklDnnExecutionProvider && provider_type != kNGraphExecutionProvider) {
- EXPECT_THAT(status.ErrorMessage(), testing::HasSubstr(expected_failure_string));
+ for (int i = 0; i < num_run_calls_; ++i) {
+ fetches.clear();
+ status = session_object.Run(run_options ? *run_options : default_run_options, feeds, output_names, &fetches);
+
+ if (status.IsOK()) {
+ EXPECT_TRUE(expect_result == ExpectResult::kExpectSuccess) << "Expected failure but Run was successful";
+ if (expect_result == ExpectResult::kExpectFailure) {
+ return;
  }
  } else {
- LOGS_DEFAULT(ERROR) << "Run failed with status: " << status.ErrorMessage();
- EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
+ if (expect_result == ExpectResult::kExpectFailure) {
+ // Disable expected_failure_string checks for MKL-DNN and nGraph EP's
+ if (provider_type != kMklDnnExecutionProvider && provider_type != kNGraphExecutionProvider) {
+ EXPECT_THAT(status.ErrorMessage(), testing::HasSubstr(expected_failure_string));
+ }
+ } else {
+ LOGS_DEFAULT(ERROR) << "Run failed with status: " << status.ErrorMessage();
+ EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
+ }
+ return;
  }
- return;
  }
 
  // Verify the outputs
@@ -515,7 +523,9 @@ void OpTester::Run(ExpectResult expect_result,
 
  //if node is not registered for the provider, skip
  node.SetExecutionProviderType(provider_type);
- if (provider_type == onnxruntime::kNGraphExecutionProvider || provider_type == onnxruntime::kTensorrtExecutionProvider || provider_type == onnxruntime::kOpenVINOExecutionProvider)
+ if (provider_type == onnxruntime::kNGraphExecutionProvider ||
+ provider_type == onnxruntime::kTensorrtExecutionProvider ||
+ provider_type == onnxruntime::kOpenVINOExecutionProvider)
  continue;
  auto reg = execution_provider->GetKernelRegistry();
  const KernelCreateInfo* kci = reg->TryFindKernel(node, execution_provider->Type());

diff --git a/onnxruntime/test/providers/provider_test_utils.h b/onnxruntime/test/providers/provider_test_utils.h
@@ -227,6 +227,13 @@ class OpTester {
  void SetOutputAbsErr(const char* name, float v);
  void SetOutputRelErr(const char* name, float v);
 
+ // Number of times to call InferenceSession::Run. The same feeds are used each time.
+ // e.g. used to verify the generator ops behave as expected
+ void SetNumRunCalls(int n) {
+ ORT_ENFORCE(n > 0);
+ num_run_calls_ = n;
+ }
+
  template <typename T>
  void AddAttribute(std::string name, T value) {
  // Generate a the proper AddAttribute call for later
@@ -318,6 +325,7 @@ class OpTester {
  int opset_version_;
  bool add_shape_to_tensor_data_ = true;
  int add_symbolic_dim_to_tensor_data_ = -1;
+ int num_run_calls_ = 1;
  std::vector<Data> input_data_;
  std::vector<Data> output_data_;
  std::vector<size_t> initializer_index_;