-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Optimizer library #2190
Optimizer library #2190
Changes from 9 commits
26ad14f
7d884c1
c0ca125
48e85f2
043a589
bcbd264
4804376
c9c2ee8
ee4a0d1
de1c756
29c364f
b8f33bf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
include_directories(${CMAKE_CURRENT_BINARY_DIR}) | ||
add_subdirectory(optimizer) | ||
|
||
go_library(adder SRCS adder.go) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
include_directories(${CMAKE_CURRENT_BINARY_DIR}) | ||
|
||
set(OPITMIZER_SRCS | ||
optimizer_factory.cc | ||
parameter_optimizer.cc | ||
regularizer.cc | ||
) | ||
|
||
set(OPITMIZER_Headers | ||
optimizer.h | ||
Tensor.h | ||
optimizer_factory.h | ||
parameter_optimizer.h | ||
regularizer.h | ||
) | ||
|
||
add_library(optimizer STATIC ${OPITMIZER_SRCS}) | ||
add_dependencies(optimizer gen_proto_cpp) | ||
|
||
# TODO:remove link options | ||
include_directories("/home/work/dongzhihong/github/Paddle/Paddle/third_party/install/glog/include") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you ask @gangliao on how to include glog properly? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
link_directories("/home/work/dongzhihong/github/Paddle/Paddle/third_party/install/glog/lib") | ||
# add_executable(optimizer_test optimizer_test.cpp) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please no commented out code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fix it |
||
# add_dependencies(optimizer_test gen_proto_cpp gtest) | ||
# add_executable(optimizer_factory_test optimizer_factory_test.cpp) | ||
# add_test(NAME optimizer_test COMMAND optimzier_test) | ||
add_simple_unittest(optimizer_test) | ||
add_simple_unittest(optimizer_factory_test) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#ifndef PADDLE_FAKE_TENSOR_H_ | ||
#define PADDLE_FAKE_TENSOR_H_ | ||
/** | ||
* @brief fake tensor for testing | ||
*/ | ||
|
||
#include "paddle/math/BaseMatrix.h" | ||
#include <string.h> | ||
|
||
namespace paddle { | ||
template <class T> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. since this is the tensor only used by optimizer for now, maybe add namespace pserver here. e.g., There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 fix it! I thought we will port it to majel tensor sooner or later. In fact, there may be a conflict between modules. Thanks for you mention it |
||
using TensorBase = BaseMatrixT<T>; | ||
|
||
template <class T> | ||
class Tensor : public TensorBase<T> { | ||
public: | ||
Tensor(T* data, int size) : TensorBase<T>(size, 1, 0, data, false, false) {} | ||
T* get_buffer() { return this->data_; } | ||
// TODO: replace with tensorshape | ||
size_t height() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Conceptually, parameter server optimizes vectors. Since vector is of height=1 and variable width, so maybe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm... the same reason with above, it is a temporary solution since there is no proper tensor available in paddle. |
||
return this->height_; | ||
} | ||
}; | ||
|
||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
#include "optimizer.h" | ||
#include <string> | ||
|
||
#include "parameter_optimizer.h" | ||
|
||
template<class T> | ||
struct EnumToType {}; | ||
|
||
template<class T> | ||
struct TypeToEnum {}; | ||
|
||
#define MATCH_ENUM_TYPE(TYPE, ENUM) \ | ||
template<> \ | ||
struct TypeToEnum<ENUM> { \ | ||
static paddle_element_type v() {return ENUM;}; \ | ||
static constexpr TYPE value = ENUM; | ||
}; | ||
template<> \ | ||
struct EnumToType<ENUM> { \ | ||
typedef TYPE Type; \ | ||
} \ | ||
|
||
MATCH_ENUM_TYPE(int32_t, PADDLE_ELEMENT_TYPE_INT32); | ||
MATCH_ENUM_TYPE(uint32_t, PADDLE_ELEMENT_TYPE_UINT32); | ||
MATCH_ENUM_TYPE(int64_t, PADDLE_ELEMENT_TYPE_INT64); | ||
MATCH_ENUM_TYPE(uint64_t, PADDLE_ELEMENT_TYPE_UINT64); | ||
MATCH_ENUM_TYPE(float, PADDLE_ELEMENT_TYPE_FLOAT32); | ||
MATCH_ENUM_TYPE(double, PADDLE_ELEMENT_TYPE_FLOAT64); | ||
|
||
struct paddle_optimizer { | ||
/*! \brief optmizer in C++ side */ | ||
|
||
paddle::optimizer::ParameterOptimzier* impl; | ||
}; | ||
|
||
paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto, | ||
int config_proto_len) { | ||
paddle_optimizer* optimizer; | ||
std::string config(config_proto, config_proto + config_proto_len); | ||
optimizer->impl->create(config_proto); | ||
return optimizer; | ||
} | ||
|
||
int paddle_release_optimizer(paddle_optimizer* o) { | ||
if (o != nullptr) o->impl->destory(); | ||
return PADDLE_SUCCESS; | ||
} | ||
|
||
int paddle_update_parameter(paddle_optimizer* o, | ||
paddle_element_type data_type, | ||
const void* grad_buffer, | ||
int num_bytes) { | ||
auto type = EnumToType<data_type>::Type; | ||
paddle::Tensor<type> gradient(reinterpret_cast<type*>(grad_buffer), | ||
num_bytes); | ||
o->impl->update(gradient); | ||
return PADDLE_SUCCESS; | ||
} | ||
|
||
int paddle_optimizer_set_weights(paddle_optimizer* o, | ||
paddle_element_type data_type, | ||
void*param_buffer, | ||
int num_bytes) { | ||
auto type = EnumToType<data_type>::Type; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function need proper indentation. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, my bad, I stop the pre-commit script... |
||
paddle::Tensor<type>* param = new paddle::Tensor<type>(reinterpret_cast<t | ||
ype*>(param_buffer), num_bytes); | ||
o.set_weight(param); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could not find There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fix it. |
||
return PADDLE_SUCCESS; | ||
} | ||
|
||
void* paddle_optimizer_get_weights(paddle_optimizer* o) { | ||
void* buffer = (void *)o->impl->get_weight(); | ||
return buffer; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
#ifndef PADDLE_LIB_OPTIMIZER_H_ | ||
#define PADDLE_LIB_OPTIMIZER_H_ | ||
#include <stdbool.h> | ||
#include <stdint.h> | ||
|
||
/*! \brief optimizer export C API. which will be used in | ||
Case A, on Trainer (On ParameterServer Client) optimize gradient | ||
|
||
Case B, on ParameterServer side optimize gradient | ||
|
||
To simplify the configuration parsing. optimizer *do not* parse any config | ||
e.g. learning rate should be calculated by the caller | ||
*/ | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
/*! \brief datatypes */ | ||
typedef enum { | ||
PADDLE_ELEMENT_TYPE_INT32 = 0, | ||
PADDLE_ELEMENT_TYPE_UINT32 = 1, | ||
PADDLE_ELEMENT_TYPE_INT64 = 2, | ||
PADDLE_ELEMENT_TYPE_UINT64 = 3, | ||
PADDLE_ELEMENT_TYPE_FLOAT32 = 4, | ||
PADDLE_ELEMENT_TYPE_FLOAT64 = 5, | ||
} paddle_element_type; | ||
|
||
/*! \brief execute status code */ | ||
const int32_t PADDLE_SUCCESS = 0; | ||
const int32_t PADDLE_ERROR = -1; | ||
|
||
typedef struct paddle_optimizer paddle_optimizer; | ||
/** | ||
* this group interface called in order : | ||
* 1. create optimizer with config | ||
* 2. set weights | ||
* 3. update_parameter | ||
* 4. get_weights | ||
* 5. release optimizer | ||
*/ | ||
|
||
|
||
/** | ||
* @brief create optimizer with proto_config | ||
* @param config_proto, optimizer protobuf, see OptimizerConfig.proto in detail | ||
* @return return optimizer instance | ||
*/ | ||
paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto, | ||
int config_proto_len); | ||
|
||
/** | ||
* @brief release optimizer | ||
* @param optimizer | ||
* @return return exec status | ||
*/ | ||
int paddle_release_optimizer(paddle_optimizer* o); | ||
|
||
/** | ||
* @brief optimizer instance | ||
* @param datatype of gradient and parameter | ||
* @param gradient, calculate by optimzizer caller. | ||
* TODO(zhihong): just pass loss to reduce communicate overhead. | ||
* Project Adam Ms'14 paper for detail | ||
* @param num_bytes, gradient size | ||
* @return return exec status | ||
*/ | ||
int paddle_update_parameter(paddle_optimizer* o, | ||
paddle_element_type data_type, | ||
const void* gradient, | ||
int num_bytes); | ||
|
||
/** | ||
* @brief optimizer instance | ||
* @param data_type datatype of gradient | ||
* @param param_buffer, initilized parameter buffer | ||
* @param num_bytes, parameter size | ||
* @return return exec status | ||
*/ | ||
int paddle_optimizer_set_weights(paddle_optimizer* o, | ||
paddle_element_type data_type, | ||
void* param_buffer, | ||
int num_bytes); | ||
|
||
/** | ||
* @brief optimizer instance | ||
* @return return content of parameter buffer in optimizer | ||
*/ | ||
void* paddle_optimizer_get_weights(paddle_optimizer* o); | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif | ||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
#include "optimizer_factory.h" | ||
|
||
namespace paddle { | ||
namespace optimizer { | ||
|
||
template<class T> | ||
SGDOptimizer<T>::SGDOptimizer(const ::paddle::OptimizerConfig &config) : ParameterOptimizer<T>(config) { | ||
learning_rate = config.learning_rate(); | ||
momentum = config.momentum(); | ||
decay = config.decay(); | ||
nesterov = config.nesterov(); | ||
lr_decay_a = config.lr_decay_a(); | ||
lr_decay_b = config.lr_decay_b(); | ||
} | ||
|
||
template<class T> | ||
void SGDOptimizer<T>::destroy() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not very familiar with C++, just curious why do we need this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @dzhwinter Can you take a look at this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks pointed out that! You are right, we do not need a destroy explicitly. |
||
~SGDOptimizer(); | ||
} | ||
|
||
template<class T> | ||
void SGDOptimizer<T>::set_weight(const Tensor<T> *p) { | ||
// ParameterOptimizer::set_weight(p); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please no commented out code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fix it. Thanks your comment! I will double check it before send PR |
||
size_t size = p->height(); | ||
// TODO: fix it with align aware allocator bind to Tensor | ||
T* ptr = new T[size]; | ||
momentum_ = Tensor<T>(ptr, size); | ||
|
||
} | ||
|
||
template<class T> | ||
void SGDOptimizer<T>::update(const Tensor<T> &gradient) { | ||
num_sample_passed += 1; | ||
learning_rate = get_learning_rate(); | ||
for(size_t i=0; i<parameter_.size(); ++i) { | ||
momentums_[i] = momentum * momentums_[i] - learning_rate*gradient[i] - decay*parameter_[i]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought SGD does not use momentum? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
if(nesterov) { | ||
//TODO(zhihong) : fix nesterov updating | ||
parameter_[i] += momentums_[i]; | ||
} else { | ||
parameter_[i] += momentums_[i]; | ||
} | ||
} | ||
} | ||
|
||
template<class T> | ||
char* SGDOptimizer<T>::get_config_proto() { | ||
ParameterOptimizer::get_config_proto(); | ||
config.set_learning_rate(learning_rate); | ||
config.set_momentum(momentum); | ||
config.set_decay(decay); | ||
config.set_nesterov(nesterov); | ||
return config.SerializeAsString().c_str(); | ||
} | ||
|
||
|
||
template class SGDOptimizer<float>; | ||
template class SGDOptimizer<double>; | ||
template class AdagradOptimizer<float>; | ||
template class AdagradOptimizer<double>; | ||
template class AdadeltaOptimizer<float>; | ||
template class AdadeltaOptimizer<double>; | ||
template class AdamOptimizer<float>; | ||
template class AdamOptimizer<double>; | ||
} // namespace optimizer | ||
} // namespace paddle |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
#ifndef PADDLE_OPTIMIZER_FACTORY_H_ | ||
#define PADDLE_OPTIMIZER_FACTORY_H_ | ||
|
||
#include "parameter_optimizer.h" | ||
|
||
namespace paddle { | ||
namespace optimizer { | ||
|
||
|
||
template <class T> | ||
class SGDOptimizer : public ParameterOptimizer<T> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This file contains declaration for Btw, I think factory typically mean one function that creates instances. Here is the declaration of different optimizers, so There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. agreed. split their into different files |
||
public: | ||
/*! \brief call the applySGD for example */ | ||
SGDOptimizer(const ::paddle::OptimizerConfig &config); | ||
void set_weight(const Tensor<T> *p); | ||
T* get_weight() const; | ||
void update(const Tensor<T> &gradient); | ||
char* get_config_proto(); | ||
void destroy(); | ||
~SGDOptimizer() { | ||
// clear memory by Tensor library | ||
delete momentums_; | ||
} | ||
private: | ||
Tensor<T>* momentums_; | ||
double learning_rate; | ||
double momentum; | ||
double decay; | ||
bool nesterov; | ||
double lr_decay_a; | ||
double lr_decay_b; | ||
}; | ||
|
||
template <class T> | ||
class AdagradOptimizer : public ParameterOptimizer<T> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can't find implementation for AdagradOptimizer, adam, adadelta. Maybe just remove these placeholders? It's confusing when something is not implemented but partially in the code base. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 额 只是作为optimizer library,实现了最常用的4种优化方法,可以覆盖90%的应用...... |
||
public: | ||
void update(const Tensor<T> &gradient) { | ||
} | ||
private: | ||
double learning_rate; | ||
double epsilon; | ||
double decay; | ||
}; | ||
|
||
template <class T> | ||
class AdadeltaOptimizer : public ParameterOptimizer<T> { | ||
public: | ||
/*! \brief call the applySGD for example */ | ||
void update(const Tensor<T> &gradient) { | ||
} | ||
private: | ||
double learning_rate; | ||
double rho; | ||
double epsilon; | ||
double decay; | ||
}; | ||
|
||
template <class T> | ||
class AdamOptimizer : public ParameterOptimizer<T> { | ||
public: | ||
/*! \brief call the applySGD for example */ | ||
void update(const Tensor<T> &gradient) { | ||
} | ||
private: | ||
double learning_rate ; | ||
double beta_1; | ||
double beta_2; | ||
double epsilon; | ||
}; | ||
|
||
|
||
} // namespace optimizer | ||
} // namespace paddle | ||
#endif |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
repo/go
is mostly for go project and their bindings. Maybe optimizer is better located underrepo/paddle/pserver
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
that's a good point. In final form the optimizer will be used only in go pserver side, or will be used in trainer library side and go pserver? if so, should we put it into /repo/paddle/optimizer?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's a good point. In the final form we should share optimizer when running locally and on pserver. Let's move to
/repo/paddle/optimizer
.