Fix sparse training for trainer_count=1 (#204)

* Fix sparse training for trainer_count=1 For trainer_count=1, the gradient machine is NeuralNetwork, which does not create parameter buf for PARAMETER_GRADIENT for sparse update in Parameter::enableType. But gradient parameter buf is still used in SgdThreadUpdater. * Minor update to comment
PaddlePaddle · Oct 17, 2016 · 28bc05b · 28bc05b
1 parent b22e50e
commit 28bc05b
Show file tree

Hide file tree

Showing 5 changed files with 23 additions and 4 deletions.
diff --git a/paddle/gserver/evaluators/ChunkEvaluator.cpp b/paddle/gserver/evaluators/ChunkEvaluator.cpp
@@ -75,7 +75,6 @@ class ChunkEvaluator : public Evaluator {
 
 public:
   virtual void init(const EvaluatorConfig& config) {
-    CHECK(!FLAGS_use_gpu) << "Not supported";
     Evaluator::init(config);
     if (config.chunk_scheme() == "IOB") {
       numTagTypes_ = 2;
@@ -137,6 +136,7 @@ class ChunkEvaluator : public Evaluator {
     CHECK_EQ(arguments.size(), (size_t)2);
     IVectorPtr& output = arguments[0].ids;
     IVectorPtr& label = arguments[1].ids;
+    CHECK(!output->useGpu() && !label->useGpu()) << "Not supported";
     auto sequenceStartPositions =
         arguments[1].sequenceStartPositions->getVector(false);
     CHECK_EQ(output->getSize(), label->getSize());

diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp
@@ -813,7 +813,6 @@ void TrainerThread::mergeGradSparse(
       para->getMat(PARAMETER_GRADIENT).get());
   std::vector<uint32_t>& ids = mainMat->getIds(threadId_);
 
-  ids.clear();
   for (auto slaveParams : slaveParameters) {
     SparseRowCpuMatrix* mat =
         dynamic_cast<SparseRowCpuMatrix*>((*slaveParams)[pid]

diff --git a/paddle/parameter/Parameter.h b/paddle/parameter/Parameter.h
@@ -146,6 +146,12 @@ class Parameter {
     }
   }
 
+  void enableBufType(ParameterType type) {
+    if (bufs_[type]) return;
+    bufs_[type] = Vector::createParallelVector(config_.size(), useGpu_);
+    bufs_[type]->zeroMem();
+  }
+
   void enableIntType(ParameterType type, size_t intStoreSize = 0) {
     if (!intBufs_[type]) {
       SetDevice device(deviceId_);

diff --git a/paddle/trainer/ThreadParameterUpdater.cpp b/paddle/trainer/ThreadParameterUpdater.cpp
@@ -20,6 +20,8 @@ limitations under the License. */
 #include "paddle/math/SparseRowMatrix.h"
 #include "paddle/utils/Thread.h"
 
+P_DECLARE_int32(trainer_count);
+
 namespace paddle {
 
 SgdThreadUpdater::SgdThreadUpdater(const OptimizationConfig& optConfig)
@@ -48,6 +50,13 @@ void SgdThreadUpdater::init(std::vector<ParameterPtr>& parameters) {
                                               false /*inPserver*/));
     size_t numRows = para->isGradSparseUpdate() ? para->getConfig().dims(0) : 0;
     optimizers_[pid]->init(numRows, &para->getConfig());
+    if (para->isGradSparseUpdate() && FLAGS_trainer_count == 1) {
+      // For trainer_count=1, the gradient machine is NeuralNetwork, which does
+      // not create parameter buf for PARAMETER_GRADIENT for sparse update in
+      // Parameter::enableType(). But gradient parameter buf is still used
+      // in SgdThreadUpdater. We need to explicitly create it.
+      para->enableBufType(PARAMETER_GRADIENT);
+    }
   }
 }
 
@@ -211,7 +220,7 @@ void SgdThreadUpdater::threadUpdateSparse(
     // From MultiGradientMachine
     SparseRowIdsCpuMatrix* mainMat = dynamic_cast<SparseRowIdsCpuMatrix*>(
       para->getMat(PARAMETER_GRADIENT).get());
-    const std::vector<uint32_t>& sparseIds = mainMat->getIds(tid);
+    std::vector<uint32_t>& sparseIds = mainMat->getIds(tid);
 
     for (auto id : sparseIds) {
       // setup sub bufs
@@ -221,6 +230,7 @@ void SgdThreadUpdater::threadUpdateSparse(
       optimizer->update(vecs, para->getConfig(), id);
       vecs[PARAMETER_GRADIENT]->zeroMem();
     }
+    sparseIds.clear();
   } else if (dynamic_cast<SparseRowCpuMatrix*>(
                para->getMat(PARAMETER_GRADIENT).get())) {
     // From NeuralNetwork
@@ -246,6 +256,10 @@ void SgdThreadUpdater::threadUpdateSparse(
       optimizer->update(vecs, para->getConfig(), id);
       vecs[PARAMETER_GRADIENT]->zeroMem();
     }
+    // For numThreads > 1, MultiGradientMachine is used, which goes
+    // to the above branch.
+    CHECK_EQ(numThreads, 1);
+    mainMat->clearIndices();
   } else {
     auto & m = *para->getMat(PARAMETER_GRADIENT).get();
     LOG(FATAL) << "Internal error: " << para->getName() << " "

diff --git a/paddle/utils/Logging.h b/paddle/utils/Logging.h
@@ -191,7 +191,7 @@ void installFailureWriter(void(*callback)(const char*, int));
 }
 #endif  // PADDLE_USE_GLOG
 
-#ifdef NDEBUG
+#ifndef NDEBUG
 #define DEBUG_LEVEL 5
 #define DBG VLOG(DEBUG_LEVEL)
 #else