Skip to content
This repository has been archived by the owner on Dec 21, 2023. It is now read-only.

Changed seeding strategy and have one generator per row of output data #2297

Merged
merged 6 commits into from
Sep 17, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@ namespace one_shot_object_detection {
namespace data_augmentation {

flex_dict build_annotation(ParameterSampler &parameter_sampler,
std::string label, size_t object_width,
size_t object_height, long seed) {
parameter_sampler.sample(seed);
const std::string &label,
size_t object_width, size_t object_height,
size_t background_width, size_t background_height,
size_t seed, size_t row_number) {
parameter_sampler.sample(background_width, background_height, seed, row_number);

size_t original_top_left_x = 0;
size_t original_top_left_y = 0;
Expand Down Expand Up @@ -118,20 +120,19 @@ flex_image create_rgba_flex_image(const flex_image &object_input) {
}

std::pair<flex_image, flex_dict>
create_synthetic_image_from_background_and_starter(const flex_image &starter,
create_synthetic_image_from_background_and_starter(ParameterSampler &parameter_sampler,
const flex_image &starter,
const flex_image &background,
std::string &label,
size_t seed,
size_t row_number) {
ParameterSampler parameter_sampler =
ParameterSampler(background.m_width, background.m_height,
(background.m_width - starter.m_width) / 2,
(background.m_height - starter.m_height) / 2);

// construct annotation dictionary from parameters
flex_dict annotation =
build_annotation(parameter_sampler, label, starter.m_width,
starter.m_height, seed + row_number);
build_annotation(parameter_sampler, label,
starter.m_width, starter.m_height,
background.m_width, background.m_height,
seed, row_number);

if (background.get_image_data() == nullptr) {
log_and_throw("Background image has null image data.");
Expand All @@ -156,7 +157,7 @@ create_synthetic_image_from_background_and_starter(const flex_image &starter,
background.m_width, background.m_height,
reinterpret_cast<const boost::gil::rgb8_pixel_t *>(
background.get_image_data()),
background.m_channels * background.m_width // row length in bytes
background.m_channels * background.m_width // row length in bytes
);
flex_image synthetic_image = create_synthetic_image(
starter_image_view, background_view, parameter_sampler);
Expand Down Expand Up @@ -209,23 +210,28 @@ gl_sframe augment_data(const gl_sframe &data,
* Replacing the `for` with a `parallel_for` fails the export_coreml unit test
* with an EXC_BAD_ACCESS in the function call to boost::gil::resample_pixels
*/
for (size_t segment_id = 0; segment_id < nsegments; segment_id++) {
size_t segment_start = (segment_id * backgrounds.size()) / nsegments;
size_t segment_end = ((segment_id + 1) * backgrounds.size()) / nsegments;
size_t row_number = segment_start;
for (const auto &background_ft :
backgrounds.range_iterator(segment_start, segment_end)) {
row_number++;
flex_image flex_background =
image_util::decode_image(background_ft.to<flex_image>());
for (const auto &row : decompressed_data.range_iterator()) {
// go through all the starter images and create augmented images for
// all starter images and the respective chunk of background images
const flex_image &object = row[image_column_index].get<flex_image>();
std::string label = row[target_column_index].to<flex_string>();
for (const sframe_rows::row &row : decompressed_data.range_iterator()) {
// go through all the starter images and create augmented images for
// all starter images and the respective chunk of background images
const flex_image &object = row[image_column_index].get<flex_image>();
std::string label = row[target_column_index].to<flex_string>();
ParameterSampler parameter_sampler = ParameterSampler(
object.m_width, object.m_height, 0, 0);

for (size_t segment_id = 0; segment_id < nsegments; segment_id++) {
size_t segment_start = (segment_id * backgrounds.size()) / nsegments;
size_t segment_end = ((segment_id + 1) * backgrounds.size()) / nsegments;
size_t row_number = segment_start;
for (const flexible_type &background_ft :
backgrounds.range_iterator(segment_start, segment_end)) {
row_number++;
flex_image flex_background =
image_util::decode_image(background_ft.to<flex_image>());

std::pair<flex_image, flex_dict> synthetic_row =
create_synthetic_image_from_background_and_starter(
object, flex_background, label, seed, row_number);
parameter_sampler, object, flex_background,
label, seed, row_number);
flex_image synthetic_image = synthetic_row.first;
flex_dict annotation = synthetic_row.second;
// write the synthetically generated image and the constructed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
*/

#include <core/data/image/numeric_extension/perspective_projection.hpp>
#include <core/logging/assertions.hpp>
#include <limits>

#include <toolkits/object_detection/one_shot_object_detection/util/parameter_sampler.hpp>

Expand All @@ -16,9 +18,9 @@
namespace turi {
namespace one_shot_object_detection {

ParameterSampler::ParameterSampler(size_t width, size_t height, size_t dx,
size_t dy)
: width_(width), height_(height), dx_(dx), dy_(dy) {}
ParameterSampler::ParameterSampler(size_t starter_width, size_t starter_height,
size_t dx, size_t dy)
: starter_width_(starter_width), starter_height_(starter_height), dx_(dx), dy_(dy) {}

double deg_to_rad(double angle) { return angle * M_PI / 180.0; }

Expand Down Expand Up @@ -61,36 +63,40 @@ void ParameterSampler::set_warped_corners(
warped_corners_[3] = warped_corners[2];
}

int generate_random_index(std::mt19937 *engine_pointer, int range) {
DASSERT_GT(range, 0);
std::uniform_int_distribution<int> index_distribution(0, range-1);
return index_distribution(*engine_pointer);
}

/* Function to sample all the parameters needed to build a transform, and
* then also build the transform.
*/
void ParameterSampler::sample(long seed) {
void ParameterSampler::sample(size_t background_width, size_t background_height,
size_t seed, size_t row_number) {
double theta_mean, phi_mean, gamma_mean;
std::srand(seed);
theta_mean = theta_means_[std::rand() % theta_means_.size()];
std::srand(seed + 1);
phi_mean = phi_means_[std::rand() % phi_means_.size()];
std::srand(seed + 2);
gamma_mean = gamma_means_[std::rand() % gamma_means_.size()];
std::seed_seq seed_seq = {static_cast<int>(seed), static_cast<int>(row_number)};
std::mt19937 engine(seed_seq);

theta_mean = theta_means_[generate_random_index(&engine, theta_means_.size())];
phi_mean = phi_means_[generate_random_index(&engine, phi_means_.size())];
gamma_mean = gamma_means_[generate_random_index(&engine, gamma_means_.size())];

std::normal_distribution<double> theta_distribution(theta_mean, angle_stdev_);
std::normal_distribution<double> phi_distribution(phi_mean, angle_stdev_);
std::normal_distribution<double> gamma_distribution(gamma_mean, angle_stdev_);
std::normal_distribution<double> focal_distribution((double)width_,
std::normal_distribution<double> focal_distribution(static_cast<double>(background_width),
focal_stdev_);
theta_generator_.seed(seed + 3);
theta_ = deg_to_rad(theta_distribution(theta_generator_));
phi_generator_.seed(seed + 4);
phi_ = deg_to_rad(phi_distribution(phi_generator_));
gamma_generator_.seed(seed + 5);
gamma_ = deg_to_rad(gamma_distribution(gamma_generator_));
focal_generator_.seed(seed + 6);
focal_ = focal_distribution(focal_generator_);
std::uniform_int_distribution<int> dz_distribution(std::max(width_, height_),

theta_ = deg_to_rad(theta_distribution(engine));
phi_ = deg_to_rad(phi_distribution(engine));
gamma_ = deg_to_rad(gamma_distribution(engine));
focal_ = focal_distribution(engine);
std::uniform_int_distribution<int> dz_distribution(std::max(background_width, background_height),
max_depth_);
dz_generator_.seed(seed + 7);
dz_ = focal_ + dz_distribution(dz_generator_);
dz_ = focal_ + dz_distribution(engine);
transform_ = warp_perspective::get_transformation_matrix(
width_, height_, theta_, phi_, gamma_, dx_, dy_, dz_, focal_);
starter_width_, starter_height_, theta_, phi_, gamma_, dx_, dy_, dz_, focal_);
warped_corners_.reserve(4);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace one_shot_object_detection {
*/
class ParameterSampler {
public:
ParameterSampler(size_t width, size_t height, size_t dx, size_t dy);
ParameterSampler(size_t starter_width, size_t starter_height, size_t dx, size_t dy);

/* Getters for all the parameters:
* theta: rotation around the x axis.
Expand Down Expand Up @@ -54,22 +54,18 @@ class ParameterSampler {
/* Function to sample all the parameters needed to build a transform, and
* then also build the transform.
*/
void sample(long seed);
void sample(size_t background_width, size_t background_height,
size_t seed, size_t row_number);

private:
size_t width_;
size_t height_;
size_t starter_width_;
size_t starter_height_;
size_t max_depth_ = 13000;
double angle_stdev_ = 20.0;
double focal_stdev_ = 40.0;
std::vector<double> theta_means_ = {-180.0, 0.0, 180.0};
std::vector<double> phi_means_ = {-180.0, 0.0, 180.0};
std::vector<double> gamma_means_ = {-180.0, -90.0, 0.0, 90.0, 180.0};
std::default_random_engine theta_generator_;
std::default_random_engine phi_generator_;
std::default_random_engine gamma_generator_;
std::default_random_engine dz_generator_;
std::default_random_engine focal_generator_;
double theta_;
double phi_;
double gamma_;
Expand Down