diff --git a/src/toolkits/object_detection/one_shot_object_detection/one_shot_object_detector.cpp b/src/toolkits/object_detection/one_shot_object_detection/one_shot_object_detector.cpp index 174122d981..94a1750170 100644 --- a/src/toolkits/object_detection/one_shot_object_detection/one_shot_object_detector.cpp +++ b/src/toolkits/object_detection/one_shot_object_detection/one_shot_object_detector.cpp @@ -32,9 +32,11 @@ namespace one_shot_object_detection { namespace data_augmentation { flex_dict build_annotation(ParameterSampler ¶meter_sampler, - std::string label, size_t object_width, - size_t object_height, long seed) { - parameter_sampler.sample(seed); + const std::string &label, + size_t object_width, size_t object_height, + size_t background_width, size_t background_height, + size_t seed, size_t row_number) { + parameter_sampler.sample(background_width, background_height, seed, row_number); size_t original_top_left_x = 0; size_t original_top_left_y = 0; @@ -118,20 +120,19 @@ flex_image create_rgba_flex_image(const flex_image &object_input) { } std::pair -create_synthetic_image_from_background_and_starter(const flex_image &starter, +create_synthetic_image_from_background_and_starter(ParameterSampler ¶meter_sampler, + const flex_image &starter, const flex_image &background, std::string &label, size_t seed, size_t row_number) { - ParameterSampler parameter_sampler = - ParameterSampler(background.m_width, background.m_height, - (background.m_width - starter.m_width) / 2, - (background.m_height - starter.m_height) / 2); // construct annotation dictionary from parameters flex_dict annotation = - build_annotation(parameter_sampler, label, starter.m_width, - starter.m_height, seed + row_number); + build_annotation(parameter_sampler, label, + starter.m_width, starter.m_height, + background.m_width, background.m_height, + seed, row_number); if (background.get_image_data() == nullptr) { log_and_throw("Background image has null image data."); @@ -156,7 +157,7 @@ create_synthetic_image_from_background_and_starter(const flex_image &starter, background.m_width, background.m_height, reinterpret_cast( background.get_image_data()), - background.m_channels * background.m_width // row length in bytes + background.m_channels * background.m_width // row length in bytes ); flex_image synthetic_image = create_synthetic_image( starter_image_view, background_view, parameter_sampler); @@ -209,23 +210,28 @@ gl_sframe augment_data(const gl_sframe &data, * Replacing the `for` with a `parallel_for` fails the export_coreml unit test * with an EXC_BAD_ACCESS in the function call to boost::gil::resample_pixels */ - for (size_t segment_id = 0; segment_id < nsegments; segment_id++) { - size_t segment_start = (segment_id * backgrounds.size()) / nsegments; - size_t segment_end = ((segment_id + 1) * backgrounds.size()) / nsegments; - size_t row_number = segment_start; - for (const auto &background_ft : - backgrounds.range_iterator(segment_start, segment_end)) { - row_number++; - flex_image flex_background = - image_util::decode_image(background_ft.to()); - for (const auto &row : decompressed_data.range_iterator()) { - // go through all the starter images and create augmented images for - // all starter images and the respective chunk of background images - const flex_image &object = row[image_column_index].get(); - std::string label = row[target_column_index].to(); + for (const sframe_rows::row &row : decompressed_data.range_iterator()) { + // go through all the starter images and create augmented images for + // all starter images and the respective chunk of background images + const flex_image &object = row[image_column_index].get(); + std::string label = row[target_column_index].to(); + ParameterSampler parameter_sampler = ParameterSampler( + object.m_width, object.m_height, 0, 0); + + for (size_t segment_id = 0; segment_id < nsegments; segment_id++) { + size_t segment_start = (segment_id * backgrounds.size()) / nsegments; + size_t segment_end = ((segment_id + 1) * backgrounds.size()) / nsegments; + size_t row_number = segment_start; + for (const flexible_type &background_ft : + backgrounds.range_iterator(segment_start, segment_end)) { + row_number++; + flex_image flex_background = + image_util::decode_image(background_ft.to()); + std::pair synthetic_row = create_synthetic_image_from_background_and_starter( - object, flex_background, label, seed, row_number); + parameter_sampler, object, flex_background, + label, seed, row_number); flex_image synthetic_image = synthetic_row.first; flex_dict annotation = synthetic_row.second; // write the synthetically generated image and the constructed diff --git a/src/toolkits/object_detection/one_shot_object_detection/util/parameter_sampler.cpp b/src/toolkits/object_detection/one_shot_object_detection/util/parameter_sampler.cpp index f35af75b73..fec323dc8a 100644 --- a/src/toolkits/object_detection/one_shot_object_detection/util/parameter_sampler.cpp +++ b/src/toolkits/object_detection/one_shot_object_detection/util/parameter_sampler.cpp @@ -6,6 +6,8 @@ */ #include +#include +#include #include @@ -16,9 +18,9 @@ namespace turi { namespace one_shot_object_detection { -ParameterSampler::ParameterSampler(size_t width, size_t height, size_t dx, - size_t dy) - : width_(width), height_(height), dx_(dx), dy_(dy) {} +ParameterSampler::ParameterSampler(size_t starter_width, size_t starter_height, + size_t dx, size_t dy) + : starter_width_(starter_width), starter_height_(starter_height), dx_(dx), dy_(dy) {} double deg_to_rad(double angle) { return angle * M_PI / 180.0; } @@ -61,36 +63,40 @@ void ParameterSampler::set_warped_corners( warped_corners_[3] = warped_corners[2]; } +int generate_random_index(std::mt19937 *engine_pointer, int range) { + DASSERT_GT(range, 0); + std::uniform_int_distribution index_distribution(0, range-1); + return index_distribution(*engine_pointer); +} + /* Function to sample all the parameters needed to build a transform, and * then also build the transform. */ -void ParameterSampler::sample(long seed) { +void ParameterSampler::sample(size_t background_width, size_t background_height, + size_t seed, size_t row_number) { double theta_mean, phi_mean, gamma_mean; - std::srand(seed); - theta_mean = theta_means_[std::rand() % theta_means_.size()]; - std::srand(seed + 1); - phi_mean = phi_means_[std::rand() % phi_means_.size()]; - std::srand(seed + 2); - gamma_mean = gamma_means_[std::rand() % gamma_means_.size()]; + std::seed_seq seed_seq = {static_cast(seed), static_cast(row_number)}; + std::mt19937 engine(seed_seq); + + theta_mean = theta_means_[generate_random_index(&engine, theta_means_.size())]; + phi_mean = phi_means_[generate_random_index(&engine, phi_means_.size())]; + gamma_mean = gamma_means_[generate_random_index(&engine, gamma_means_.size())]; + std::normal_distribution theta_distribution(theta_mean, angle_stdev_); std::normal_distribution phi_distribution(phi_mean, angle_stdev_); std::normal_distribution gamma_distribution(gamma_mean, angle_stdev_); - std::normal_distribution focal_distribution((double)width_, + std::normal_distribution focal_distribution(static_cast(background_width), focal_stdev_); - theta_generator_.seed(seed + 3); - theta_ = deg_to_rad(theta_distribution(theta_generator_)); - phi_generator_.seed(seed + 4); - phi_ = deg_to_rad(phi_distribution(phi_generator_)); - gamma_generator_.seed(seed + 5); - gamma_ = deg_to_rad(gamma_distribution(gamma_generator_)); - focal_generator_.seed(seed + 6); - focal_ = focal_distribution(focal_generator_); - std::uniform_int_distribution dz_distribution(std::max(width_, height_), + + theta_ = deg_to_rad(theta_distribution(engine)); + phi_ = deg_to_rad(phi_distribution(engine)); + gamma_ = deg_to_rad(gamma_distribution(engine)); + focal_ = focal_distribution(engine); + std::uniform_int_distribution dz_distribution(std::max(background_width, background_height), max_depth_); - dz_generator_.seed(seed + 7); - dz_ = focal_ + dz_distribution(dz_generator_); + dz_ = focal_ + dz_distribution(engine); transform_ = warp_perspective::get_transformation_matrix( - width_, height_, theta_, phi_, gamma_, dx_, dy_, dz_, focal_); + starter_width_, starter_height_, theta_, phi_, gamma_, dx_, dy_, dz_, focal_); warped_corners_.reserve(4); } diff --git a/src/toolkits/object_detection/one_shot_object_detection/util/parameter_sampler.hpp b/src/toolkits/object_detection/one_shot_object_detection/util/parameter_sampler.hpp index 46a8c410c9..68afc841b7 100644 --- a/src/toolkits/object_detection/one_shot_object_detection/util/parameter_sampler.hpp +++ b/src/toolkits/object_detection/one_shot_object_detection/util/parameter_sampler.hpp @@ -26,7 +26,7 @@ namespace one_shot_object_detection { */ class ParameterSampler { public: - ParameterSampler(size_t width, size_t height, size_t dx, size_t dy); + ParameterSampler(size_t starter_width, size_t starter_height, size_t dx, size_t dy); /* Getters for all the parameters: * theta: rotation around the x axis. @@ -54,22 +54,18 @@ class ParameterSampler { /* Function to sample all the parameters needed to build a transform, and * then also build the transform. */ - void sample(long seed); + void sample(size_t background_width, size_t background_height, + size_t seed, size_t row_number); private: - size_t width_; - size_t height_; + size_t starter_width_; + size_t starter_height_; size_t max_depth_ = 13000; double angle_stdev_ = 20.0; double focal_stdev_ = 40.0; std::vector theta_means_ = {-180.0, 0.0, 180.0}; std::vector phi_means_ = {-180.0, 0.0, 180.0}; std::vector gamma_means_ = {-180.0, -90.0, 0.0, 90.0, 180.0}; - std::default_random_engine theta_generator_; - std::default_random_engine phi_generator_; - std::default_random_engine gamma_generator_; - std::default_random_engine dz_generator_; - std::default_random_engine focal_generator_; double theta_; double phi_; double gamma_;