Skip to content

Commit

Permalink
Fix sparse training for trainer_count=1 (#204)
Browse files Browse the repository at this point in the history
* Fix sparse training for trainer_count=1

For trainer_count=1, the gradient machine is NeuralNetwork, which does not create parameter buf for PARAMETER_GRADIENT for sparse update in Parameter::enableType. But gradient parameter buf is still used in SgdThreadUpdater.

* Minor update to comment
  • Loading branch information
emailweixu authored and reyoung committed Oct 17, 2016
1 parent b22e50e commit 28bc05b
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 4 deletions.
2 changes: 1 addition & 1 deletion paddle/gserver/evaluators/ChunkEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ class ChunkEvaluator : public Evaluator {

public:
virtual void init(const EvaluatorConfig& config) {
CHECK(!FLAGS_use_gpu) << "Not supported";
Evaluator::init(config);
if (config.chunk_scheme() == "IOB") {
numTagTypes_ = 2;
Expand Down Expand Up @@ -137,6 +136,7 @@ class ChunkEvaluator : public Evaluator {
CHECK_EQ(arguments.size(), (size_t)2);
IVectorPtr& output = arguments[0].ids;
IVectorPtr& label = arguments[1].ids;
CHECK(!output->useGpu() && !label->useGpu()) << "Not supported";
auto sequenceStartPositions =
arguments[1].sequenceStartPositions->getVector(false);
CHECK_EQ(output->getSize(), label->getSize());
Expand Down
1 change: 0 additions & 1 deletion paddle/gserver/gradientmachines/MultiGradientMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -813,7 +813,6 @@ void TrainerThread::mergeGradSparse(
para->getMat(PARAMETER_GRADIENT).get());
std::vector<uint32_t>& ids = mainMat->getIds(threadId_);

ids.clear();
for (auto slaveParams : slaveParameters) {
SparseRowCpuMatrix* mat =
dynamic_cast<SparseRowCpuMatrix*>((*slaveParams)[pid]
Expand Down
6 changes: 6 additions & 0 deletions paddle/parameter/Parameter.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,12 @@ class Parameter {
}
}

void enableBufType(ParameterType type) {
if (bufs_[type]) return;
bufs_[type] = Vector::createParallelVector(config_.size(), useGpu_);
bufs_[type]->zeroMem();
}

void enableIntType(ParameterType type, size_t intStoreSize = 0) {
if (!intBufs_[type]) {
SetDevice device(deviceId_);
Expand Down
16 changes: 15 additions & 1 deletion paddle/trainer/ThreadParameterUpdater.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ limitations under the License. */
#include "paddle/math/SparseRowMatrix.h"
#include "paddle/utils/Thread.h"

P_DECLARE_int32(trainer_count);

namespace paddle {

SgdThreadUpdater::SgdThreadUpdater(const OptimizationConfig& optConfig)
Expand Down Expand Up @@ -48,6 +50,13 @@ void SgdThreadUpdater::init(std::vector<ParameterPtr>& parameters) {
false /*inPserver*/));
size_t numRows = para->isGradSparseUpdate() ? para->getConfig().dims(0) : 0;
optimizers_[pid]->init(numRows, &para->getConfig());
if (para->isGradSparseUpdate() && FLAGS_trainer_count == 1) {
// For trainer_count=1, the gradient machine is NeuralNetwork, which does
// not create parameter buf for PARAMETER_GRADIENT for sparse update in
// Parameter::enableType(). But gradient parameter buf is still used
// in SgdThreadUpdater. We need to explicitly create it.
para->enableBufType(PARAMETER_GRADIENT);
}
}
}

Expand Down Expand Up @@ -211,7 +220,7 @@ void SgdThreadUpdater::threadUpdateSparse(
// From MultiGradientMachine
SparseRowIdsCpuMatrix* mainMat = dynamic_cast<SparseRowIdsCpuMatrix*>(
para->getMat(PARAMETER_GRADIENT).get());
const std::vector<uint32_t>& sparseIds = mainMat->getIds(tid);
std::vector<uint32_t>& sparseIds = mainMat->getIds(tid);

for (auto id : sparseIds) {
// setup sub bufs
Expand All @@ -221,6 +230,7 @@ void SgdThreadUpdater::threadUpdateSparse(
optimizer->update(vecs, para->getConfig(), id);
vecs[PARAMETER_GRADIENT]->zeroMem();
}
sparseIds.clear();
} else if (dynamic_cast<SparseRowCpuMatrix*>(
para->getMat(PARAMETER_GRADIENT).get())) {
// From NeuralNetwork
Expand All @@ -246,6 +256,10 @@ void SgdThreadUpdater::threadUpdateSparse(
optimizer->update(vecs, para->getConfig(), id);
vecs[PARAMETER_GRADIENT]->zeroMem();
}
// For numThreads > 1, MultiGradientMachine is used, which goes
// to the above branch.
CHECK_EQ(numThreads, 1);
mainMat->clearIndices();
} else {
auto & m = *para->getMat(PARAMETER_GRADIENT).get();
LOG(FATAL) << "Internal error: " << para->getName() << " "
Expand Down
2 changes: 1 addition & 1 deletion paddle/utils/Logging.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ void installFailureWriter(void(*callback)(const char*, int));
}
#endif // PADDLE_USE_GLOG

#ifdef NDEBUG
#ifndef NDEBUG
#define DEBUG_LEVEL 5
#define DBG VLOG(DEBUG_LEVEL)
#else
Expand Down

0 comments on commit 28bc05b

Please sign in to comment.