This repository has been archived by the owner on Sep 27, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 623
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add first implementation of augmentedNN to predict selectivity (#1473)
* add first implementation of augmentedNN to predict selectivity for range predicates * add first implementation of augmentedNN to predict selectivity * add first implementation of augmentedNN to predict selectivity * add comments and modify variable names * rename some variables * create brain/selectivity; create new test file for augmented_nn. * remove duplicated files * check if travis is ok
- Loading branch information
Showing
14 changed files
with
1,108 additions
and
250 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
#===----------------------------------------------------------------------===# | ||
# | ||
# Peloton | ||
# | ||
# AugmentedNN.py | ||
# | ||
# Identification: src/brain/modelgen/AugmentedNN.py | ||
# | ||
# Copyright (c) 2015-2018, Carnegie Mellon University Database Group | ||
# | ||
#===----------------------------------------------------------------------===# | ||
|
||
import tensorflow as tf | ||
import functools | ||
import os | ||
import argparse | ||
|
||
def lazy_property(function): | ||
attribute = '_cache_' + function.__name__ | ||
|
||
@property | ||
@functools.wraps(function) | ||
def decorator(self): | ||
if not hasattr(self, attribute): | ||
setattr(self, attribute, function(self)) | ||
return getattr(self, attribute) | ||
|
||
return decorator | ||
|
||
class AugmentedNN: | ||
|
||
def __init__(self, column_num, order=1, neuron_num=16, lr=0.1, **kwargs): | ||
tf.reset_default_graph() | ||
self.data = tf.placeholder(tf.float32, [None, column_num*2], name="data_") | ||
self.target = tf.placeholder(tf.float32, [None, 1], name="target_") | ||
self._column_num = column_num | ||
self._order = order | ||
self._neuron_num = neuron_num | ||
self._lr = tf.placeholder_with_default(lr, shape=None, | ||
name="learn_rate_") | ||
self.tf_init = tf.global_variables_initializer | ||
self.prediction | ||
self.loss | ||
self.optimize | ||
|
||
@staticmethod | ||
def jump_activation(k): | ||
""" | ||
This is an activation function used to learn discontinuous functions. | ||
Reference: https://dl.acm.org/citation.cfm?id=2326898 | ||
""" | ||
def jump_activation_k(x): | ||
return tf.pow(tf.maximum(0.0, 1-tf.exp(-x)), k) | ||
return jump_activation_k | ||
|
||
@lazy_property | ||
def prediction(self): | ||
net = self.data | ||
kernel_init = tf.random_normal_initializer(mean=0.0001, stddev=0.0001) | ||
with tf.name_scope("hidden_layer"): | ||
net_shape = tf.shape(net) | ||
bsz = net_shape[0] | ||
|
||
h1_layers = [] | ||
for i in range(1, self._order+1): | ||
h1 = tf.layers.dense(net, self._neuron_num, | ||
activation=self.jump_activation(i), | ||
kernel_initializer=kernel_init) | ||
h1_layers.append(h1) | ||
h1_layers = tf.concat(h1_layers, 1) | ||
with tf.name_scope("output_layer"): | ||
net = tf.layers.dense(h1_layers, 1, | ||
activation=self.jump_activation(1), | ||
kernel_initializer=kernel_init) | ||
net = tf.reshape(net, [bsz, -1], name="pred_") | ||
return net | ||
|
||
@lazy_property | ||
def loss(self): | ||
loss = tf.reduce_mean(tf.squared_difference(self.target, self.prediction), name='lossOp_') | ||
return loss | ||
|
||
@lazy_property | ||
def optimize(self): | ||
params = tf.trainable_variables() | ||
gradients = tf.gradients(self.loss, params) | ||
optimizer = tf.train.AdagradOptimizer(learning_rate=self._lr) | ||
return optimizer.apply_gradients(zip(gradients, | ||
params), name="optimizeOp_") | ||
|
||
def write_graph(self, dir): | ||
fname = "{}.pb".format(self.__repr__()) | ||
abs_path = os.path.join(dir, fname) | ||
if not os.path.exists(abs_path): | ||
tf.train.write_graph(tf.get_default_graph(), | ||
dir, fname, False) | ||
|
||
def __repr__(self): | ||
return "augmented_nn" | ||
|
||
def main(): | ||
parser = argparse.ArgumentParser(description='AugmentedNN Model Generator') | ||
|
||
parser.add_argument('--column_num', type=int, default=1, help='Number of augmentedNN Hidden units') | ||
parser.add_argument('--order', type=int, default=3, help='Max order of activation function') | ||
parser.add_argument('--neuron_num', type=int, default=20, help='Number of neurons in hidden layer') | ||
parser.add_argument('--lr', type=float, default=0.001, help='Learning rate') | ||
parser.add_argument('graph_out_path', type=str, help='Path to write graph output', nargs='+') | ||
args = parser.parse_args() | ||
model = AugmentedNN(args.column_num, args.order, args.neuron_num, args.lr) | ||
model.tf_init() | ||
model.write_graph(' '.join(args.graph_out_path)) | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Peloton | ||
// | ||
// augmented_nn.cpp | ||
// | ||
// Identification: src/brain/workload/augmented_nn.cpp | ||
// | ||
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "brain/selectivity/augmented_nn.h" | ||
#include "brain/util/model_util.h" | ||
#include "brain/util/tf_session_entity/tf_session_entity.h" | ||
#include "brain/util/tf_session_entity/tf_session_entity_input.h" | ||
#include "brain/util/tf_session_entity/tf_session_entity_output.h" | ||
#include "util/file_util.h" | ||
|
||
namespace peloton { | ||
namespace brain { | ||
|
||
AugmentedNN::AugmentedNN(int column_num, int order, int neuron_num, | ||
float learn_rate, int batch_size, int epochs) | ||
: BaseTFModel("src/brain/modelgen", "src/brain/modelgen/augmented_nn.py", | ||
"src/brain/modelgen/augmented_nn.pb"), | ||
column_num_(column_num), | ||
order_(order), | ||
neuron_num_(neuron_num), | ||
learn_rate_(learn_rate), | ||
batch_size_(batch_size), | ||
epochs_(epochs) { | ||
GenerateModel(ConstructModelArgsString()); | ||
// Import the Model | ||
tf_session_entity_->ImportGraph(graph_path_); | ||
// Initialize the model | ||
TFInit(); | ||
} | ||
|
||
std::string AugmentedNN::ConstructModelArgsString() const { | ||
std::stringstream args_str_builder; | ||
args_str_builder << " --column_num " << column_num_; | ||
args_str_builder << " --order " << order_; | ||
args_str_builder << " --neuron_num " << neuron_num_; | ||
args_str_builder << " --lr " << learn_rate_; | ||
args_str_builder << " " << this->modelgen_path_; | ||
return args_str_builder.str(); | ||
} | ||
|
||
std::string AugmentedNN::ToString() const { | ||
std::stringstream model_str_builder; | ||
model_str_builder << "augmented_nn("; | ||
model_str_builder << "column_num = " << column_num_; | ||
model_str_builder << ", order = " << order_; | ||
model_str_builder << ", neuron_num = " << neuron_num_; | ||
model_str_builder << ", lr = " << learn_rate_; | ||
model_str_builder << ", batch_size = " << batch_size_; | ||
model_str_builder << ")"; | ||
return model_str_builder.str(); | ||
} | ||
|
||
// returns a batch | ||
void AugmentedNN::GetBatch(const matrix_eig &mat, size_t batch_offset, | ||
size_t bsz, matrix_eig &data, | ||
matrix_eig &target) { | ||
size_t row_idx = batch_offset * bsz; | ||
data = mat.block(row_idx, 0, bsz, mat.cols() - 1); | ||
target = mat.block(row_idx, mat.cols() - 1, bsz, 1); | ||
} | ||
|
||
// backpropagate once | ||
void AugmentedNN::Fit(const matrix_eig &X, const matrix_eig &y, int bsz) { | ||
auto data_batch = EigenUtil::Flatten(X); | ||
auto target_batch = EigenUtil::Flatten(y); | ||
std::vector<int64_t> dims_data{bsz, X.cols()}; | ||
std::vector<int64_t> dims_target{bsz, 1}; | ||
std::vector<TfFloatIn *> inputs_optimize{ | ||
new TfFloatIn(data_batch.data(), dims_data, "data_"), | ||
new TfFloatIn(target_batch.data(), dims_target, "target_"), | ||
new TfFloatIn(learn_rate_, "learn_rate_")}; | ||
tf_session_entity_->Eval(inputs_optimize, "optimizeOp_"); | ||
std::for_each(inputs_optimize.begin(), inputs_optimize.end(), TFIO_Delete); | ||
} | ||
|
||
float AugmentedNN::TrainEpoch(const matrix_eig &mat) { | ||
std::vector<float> losses; | ||
// Obtain relevant metadata | ||
int min_allowed_bsz = 1; | ||
int bsz = std::min((int)mat.rows(), std::max(batch_size_, min_allowed_bsz)); | ||
int number_of_batches = mat.rows() / bsz; | ||
int num_cols = mat.cols() - 1; | ||
|
||
std::vector<matrix_eig> y_batch, y_hat_batch; | ||
// Run through each batch and compute loss/apply backprop | ||
for (int batch_offset = 0; batch_offset < number_of_batches; | ||
++batch_offset) { | ||
matrix_eig data_batch, target_batch; | ||
GetBatch(mat, batch_offset, bsz, data_batch, target_batch); | ||
|
||
std::vector<int64_t> dims_data{bsz, num_cols}; | ||
std::vector<int64_t> dims_target{bsz, 1}; | ||
|
||
Fit(data_batch, target_batch, bsz); | ||
|
||
matrix_eig y_hat_eig = Predict(data_batch, bsz); | ||
y_hat_batch.push_back(y_hat_eig); | ||
y_batch.push_back(target_batch); | ||
} | ||
matrix_eig y = EigenUtil::VStack(y_batch); | ||
matrix_eig y_hat = EigenUtil::VStack(y_hat_batch); | ||
return ModelUtil::MeanSqError(y, y_hat); | ||
|
||
} | ||
|
||
// x: [bsz, 2] | ||
// return: [bsz, 1] | ||
matrix_eig AugmentedNN::Predict(const matrix_eig &X, int bsz) const { | ||
auto data_batch = EigenUtil::Flatten(X); | ||
std::vector<int64_t> dims_data{bsz, X.cols()}; | ||
std::vector<int64_t> dims_target{bsz, 1}; | ||
|
||
std::vector<TfFloatIn *> inputs_predict{ | ||
new TfFloatIn(data_batch.data(), dims_data, "data_")}; | ||
auto output_predict = new TfFloatOut(dims_target, "pred_"); | ||
// Obtain predicted values | ||
auto out = tf_session_entity_->Eval(inputs_predict, output_predict); | ||
|
||
matrix_t y_hat; | ||
for (int res_idx = 0; res_idx < bsz; res_idx++) { | ||
vector_t res = {out[res_idx]}; | ||
y_hat.push_back(res); | ||
} | ||
std::for_each(inputs_predict.begin(), inputs_predict.end(), TFIO_Delete); | ||
TFIO_Delete(output_predict); | ||
return EigenUtil::ToEigenMat(y_hat); | ||
} | ||
|
||
float AugmentedNN::ValidateEpoch(const matrix_eig &mat) { | ||
// Obtain relevant metadata | ||
int min_allowed_bsz = 1; | ||
int bsz = std::min((int)mat.rows(), std::max(batch_size_, min_allowed_bsz)); | ||
int number_of_batches = mat.rows() / bsz; | ||
int num_cols = mat.cols() - 1; | ||
|
||
std::vector<matrix_eig> y_batch, y_hat_batch; | ||
// Apply Validation | ||
// Run through each batch and compute loss/apply backprop | ||
for (int batch_offset = 0; batch_offset < number_of_batches; | ||
++batch_offset) { | ||
matrix_eig data_batch, target_batch; | ||
GetBatch(mat, batch_offset, bsz, data_batch, target_batch); | ||
|
||
std::vector<int64_t> dims_data{bsz, num_cols}; | ||
std::vector<int64_t> dims_target{bsz, 1}; | ||
|
||
matrix_eig y_hat_eig = Predict(data_batch, bsz); | ||
y_hat_batch.push_back(y_hat_eig); | ||
y_batch.push_back(target_batch); | ||
} | ||
matrix_eig y = EigenUtil::VStack(y_batch); | ||
matrix_eig y_hat = EigenUtil::VStack(y_hat_batch); | ||
return ModelUtil::MeanSqError(y, y_hat); | ||
|
||
} | ||
} // namespace brain | ||
} // namespace peloton | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Peloton | ||
// | ||
// selectivity_defaults.cpp | ||
// | ||
// Identification: src/brain/workload/selectivity_defaults.cpp | ||
// | ||
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "brain/selectivity/selectivity_defaults.h" | ||
|
||
namespace peloton { | ||
namespace brain { | ||
|
||
const int AugmentedNNDefaults::COLUMN_NUM = 1; | ||
const int AugmentedNNDefaults::ORDER = 1; | ||
const int AugmentedNNDefaults::NEURON_NUM = 16; | ||
const float AugmentedNNDefaults::LR = 0.1f; | ||
const int AugmentedNNDefaults::BATCH_SIZE = 256; | ||
const int AugmentedNNDefaults::EPOCHS = 600; | ||
|
||
|
||
} // namespace brain | ||
} // namespace peloton |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,40 +1,40 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Peloton | ||
// | ||
// workload_defaults.cpp | ||
// | ||
// Identification: src/brain/workload/workload_defaults.cpp | ||
// | ||
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "brain/workload/workload_defaults.h" | ||
|
||
namespace peloton { | ||
namespace brain { | ||
|
||
const int CommonWorkloadDefaults::HORIZON = 216; | ||
const int CommonWorkloadDefaults::INTERVAL = 100; | ||
const int CommonWorkloadDefaults::PADDLING_DAYS = 7; | ||
const int CommonWorkloadDefaults::ESTOP_PATIENCE = 10; | ||
const float CommonWorkloadDefaults::ESTOP_DELTA = 0.01f; | ||
|
||
const int LSTMWorkloadDefaults::NFEATS = 3; | ||
const int LSTMWorkloadDefaults::NENCODED = 20; | ||
const int LSTMWorkloadDefaults::NHID = 20; | ||
const int LSTMWorkloadDefaults::NLAYERS = 2; | ||
const float LSTMWorkloadDefaults::LR = 0.01f; | ||
const float LSTMWorkloadDefaults::DROPOUT_RATE = 0.5f; | ||
const float LSTMWorkloadDefaults::CLIP_NORM = 0.5f; | ||
const int LSTMWorkloadDefaults::BATCH_SIZE = 12; | ||
const int LSTMWorkloadDefaults::BPTT = 90; | ||
const int LSTMWorkloadDefaults::EPOCHS = 100; | ||
|
||
const int LinearRegWorkloadDefaults::BPTT = 90; | ||
|
||
const int KernelRegWorkloadDefaults::BPTT = 90; | ||
|
||
} // namespace brain | ||
} // namespace peloton | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Peloton | ||
// | ||
// workload_defaults.cpp | ||
// | ||
// Identification: src/brain/workload/workload_defaults.cpp | ||
// | ||
// Copyright (c) 2015-2018, Carnegie Mellon University Database Group | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "brain/workload/workload_defaults.h" | ||
|
||
namespace peloton { | ||
namespace brain { | ||
|
||
const int CommonWorkloadDefaults::HORIZON = 216; | ||
const int CommonWorkloadDefaults::INTERVAL = 100; | ||
const int CommonWorkloadDefaults::PADDLING_DAYS = 7; | ||
const int CommonWorkloadDefaults::ESTOP_PATIENCE = 10; | ||
const float CommonWorkloadDefaults::ESTOP_DELTA = 0.01f; | ||
|
||
const int LSTMWorkloadDefaults::NFEATS = 3; | ||
const int LSTMWorkloadDefaults::NENCODED = 20; | ||
const int LSTMWorkloadDefaults::NHID = 20; | ||
const int LSTMWorkloadDefaults::NLAYERS = 2; | ||
const float LSTMWorkloadDefaults::LR = 0.01f; | ||
const float LSTMWorkloadDefaults::DROPOUT_RATE = 0.5f; | ||
const float LSTMWorkloadDefaults::CLIP_NORM = 0.5f; | ||
const int LSTMWorkloadDefaults::BATCH_SIZE = 12; | ||
const int LSTMWorkloadDefaults::BPTT = 90; | ||
const int LSTMWorkloadDefaults::EPOCHS = 100; | ||
|
||
const int LinearRegWorkloadDefaults::BPTT = 90; | ||
|
||
const int KernelRegWorkloadDefaults::BPTT = 90; | ||
|
||
} // namespace brain | ||
} // namespace peloton |
Oops, something went wrong.