From 28bc05b126b0c1fb5da4825e10c67e13609e1fde Mon Sep 17 00:00:00 2001 From: emailweixu Date: Mon, 17 Oct 2016 17:56:21 +0800 Subject: [PATCH] Fix sparse training for trainer_count=1 (#204) * Fix sparse training for trainer_count=1 For trainer_count=1, the gradient machine is NeuralNetwork, which does not create parameter buf for PARAMETER_GRADIENT for sparse update in Parameter::enableType. But gradient parameter buf is still used in SgdThreadUpdater. * Minor update to comment --- paddle/gserver/evaluators/ChunkEvaluator.cpp | 2 +- .../gradientmachines/MultiGradientMachine.cpp | 1 - paddle/parameter/Parameter.h | 6 ++++++ paddle/trainer/ThreadParameterUpdater.cpp | 16 +++++++++++++++- paddle/utils/Logging.h | 2 +- 5 files changed, 23 insertions(+), 4 deletions(-) diff --git a/paddle/gserver/evaluators/ChunkEvaluator.cpp b/paddle/gserver/evaluators/ChunkEvaluator.cpp index 273925ba55ee4..22579891f397a 100644 --- a/paddle/gserver/evaluators/ChunkEvaluator.cpp +++ b/paddle/gserver/evaluators/ChunkEvaluator.cpp @@ -75,7 +75,6 @@ class ChunkEvaluator : public Evaluator { public: virtual void init(const EvaluatorConfig& config) { - CHECK(!FLAGS_use_gpu) << "Not supported"; Evaluator::init(config); if (config.chunk_scheme() == "IOB") { numTagTypes_ = 2; @@ -137,6 +136,7 @@ class ChunkEvaluator : public Evaluator { CHECK_EQ(arguments.size(), (size_t)2); IVectorPtr& output = arguments[0].ids; IVectorPtr& label = arguments[1].ids; + CHECK(!output->useGpu() && !label->useGpu()) << "Not supported"; auto sequenceStartPositions = arguments[1].sequenceStartPositions->getVector(false); CHECK_EQ(output->getSize(), label->getSize()); diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 787ce703a08ae..0ded30eeb44e9 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -813,7 +813,6 @@ void TrainerThread::mergeGradSparse( para->getMat(PARAMETER_GRADIENT).get()); std::vector& ids = mainMat->getIds(threadId_); - ids.clear(); for (auto slaveParams : slaveParameters) { SparseRowCpuMatrix* mat = dynamic_cast((*slaveParams)[pid] diff --git a/paddle/parameter/Parameter.h b/paddle/parameter/Parameter.h index 2f9606dc68026..ff251fe89f9f8 100644 --- a/paddle/parameter/Parameter.h +++ b/paddle/parameter/Parameter.h @@ -146,6 +146,12 @@ class Parameter { } } + void enableBufType(ParameterType type) { + if (bufs_[type]) return; + bufs_[type] = Vector::createParallelVector(config_.size(), useGpu_); + bufs_[type]->zeroMem(); + } + void enableIntType(ParameterType type, size_t intStoreSize = 0) { if (!intBufs_[type]) { SetDevice device(deviceId_); diff --git a/paddle/trainer/ThreadParameterUpdater.cpp b/paddle/trainer/ThreadParameterUpdater.cpp index 91f7f4d29df93..a26e9239f987f 100644 --- a/paddle/trainer/ThreadParameterUpdater.cpp +++ b/paddle/trainer/ThreadParameterUpdater.cpp @@ -20,6 +20,8 @@ limitations under the License. */ #include "paddle/math/SparseRowMatrix.h" #include "paddle/utils/Thread.h" +P_DECLARE_int32(trainer_count); + namespace paddle { SgdThreadUpdater::SgdThreadUpdater(const OptimizationConfig& optConfig) @@ -48,6 +50,13 @@ void SgdThreadUpdater::init(std::vector& parameters) { false /*inPserver*/)); size_t numRows = para->isGradSparseUpdate() ? para->getConfig().dims(0) : 0; optimizers_[pid]->init(numRows, ¶->getConfig()); + if (para->isGradSparseUpdate() && FLAGS_trainer_count == 1) { + // For trainer_count=1, the gradient machine is NeuralNetwork, which does + // not create parameter buf for PARAMETER_GRADIENT for sparse update in + // Parameter::enableType(). But gradient parameter buf is still used + // in SgdThreadUpdater. We need to explicitly create it. + para->enableBufType(PARAMETER_GRADIENT); + } } } @@ -211,7 +220,7 @@ void SgdThreadUpdater::threadUpdateSparse( // From MultiGradientMachine SparseRowIdsCpuMatrix* mainMat = dynamic_cast( para->getMat(PARAMETER_GRADIENT).get()); - const std::vector& sparseIds = mainMat->getIds(tid); + std::vector& sparseIds = mainMat->getIds(tid); for (auto id : sparseIds) { // setup sub bufs @@ -221,6 +230,7 @@ void SgdThreadUpdater::threadUpdateSparse( optimizer->update(vecs, para->getConfig(), id); vecs[PARAMETER_GRADIENT]->zeroMem(); } + sparseIds.clear(); } else if (dynamic_cast( para->getMat(PARAMETER_GRADIENT).get())) { // From NeuralNetwork @@ -246,6 +256,10 @@ void SgdThreadUpdater::threadUpdateSparse( optimizer->update(vecs, para->getConfig(), id); vecs[PARAMETER_GRADIENT]->zeroMem(); } + // For numThreads > 1, MultiGradientMachine is used, which goes + // to the above branch. + CHECK_EQ(numThreads, 1); + mainMat->clearIndices(); } else { auto & m = *para->getMat(PARAMETER_GRADIENT).get(); LOG(FATAL) << "Internal error: " << para->getName() << " " diff --git a/paddle/utils/Logging.h b/paddle/utils/Logging.h index b3f439804686f..7fdfa3240c1de 100644 --- a/paddle/utils/Logging.h +++ b/paddle/utils/Logging.h @@ -191,7 +191,7 @@ void installFailureWriter(void(*callback)(const char*, int)); } #endif // PADDLE_USE_GLOG -#ifdef NDEBUG +#ifndef NDEBUG #define DEBUG_LEVEL 5 #define DBG VLOG(DEBUG_LEVEL) #else