Add graph documentations (#788)

* add API docs for expression_graph.h * change API docs to doxygen-readable format * add API docs for node_initializers * update doxygen configure file * add hyperlinks and remove layers section from graph documentation * fixing typos and links on graph doc
marian-nmt · Feb 28, 2021 · 2a9c0bb · 2a9c0bb
1 parent f88ded2
commit 2a9c0bb
Show file tree

Hide file tree

Showing 18 changed files with 940 additions and 159 deletions.
diff --git a/Doxyfile.in b/Doxyfile.in
@@ -169,7 +169,7 @@ SHORT_NAMES            = NO
 # description.)
 # The default value is: NO.
 
-JAVADOC_AUTOBRIEF      = NO
+JAVADOC_AUTOBRIEF      = YES
 
 # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
 # line (until the first dot) of a Qt-style comment as the brief description. If

diff --git a/doc/graph.md b/doc/graph.md
diff --git a/doc/images/example1_dot.png b/doc/images/example1_dot.png
diff --git a/doc/images/example1_dot2.png b/doc/images/example1_dot2.png
diff --git a/doc/images/example1_dot3.png b/doc/images/example1_dot3.png
diff --git a/doc/images/example2.png b/doc/images/example2.png
diff --git a/doc/images/graph_example1.jpg b/doc/images/graph_example1.jpg
diff --git a/src/common/definitions.h b/src/common/definitions.h
@@ -127,6 +127,7 @@ IPtr<T> INew(Ptr<T> p) {
   return IPtr<T>(p);
 }
 
+/// enum class DeviceType: defines which device is used for computation
 enum class DeviceType : size_t { gpu = 0, cpu = 1 };
 
 struct DeviceId {

diff --git a/src/common/shape.h b/src/common/shape.h
@@ -28,6 +28,14 @@ struct Slice // Python-like slice/index descriptor
 };
 typedef std::vector<Slice> Slices;
 
+/**
+ * Shape class mainly defines the shape or dimensionality of the node.
+ * Basically, Shape is a wrapper of a std::vector. Its size is the number of
+ * dimension. E.g., shape={2,3} means 2D matrix with dim[0]=2 and dim[1]=3.
+ * WHen the index is negative, the real index is size() + index.
+ * It implements most common functions demanded by operations, e.g., resize(),
+ * slice(), and broadcast().
+ */
 struct Shape {
 private:
   std::vector<int> shape_;

diff --git a/src/common/types.h b/src/common/types.h
@@ -131,7 +131,7 @@ do { \
     default: ABORT("Unknown type {}", type); \
   } \
 } while(0)
-
+/// namespace marian
 namespace marian {
 
 // small struct to enable templating based on types use for packing
@@ -247,36 +247,37 @@ constexpr inline size_t operator+(size_t val, TypeClass typeClass) {
 }
 
 // @TODO: rename to ElementType when things become stable, so it's easier to review
+/// enum class Type: stores all supported data type in Marian
 enum class Type : size_t {
-  int8     = TypeClass::signed_type + 1u,
-  int16    = TypeClass::signed_type + 2u,
-  int32    = TypeClass::signed_type + 4u,
-  int64    = TypeClass::signed_type + 8u,
-
-  uint8    = TypeClass::unsigned_type + 1u,
-  uint16   = TypeClass::unsigned_type + 2u,
-  uint32   = TypeClass::unsigned_type + 4u,
-  uint64   = TypeClass::unsigned_type + 8u,
-
-  float16  = TypeClass::float_type + 2u,
-  float32  = TypeClass::float_type + 4u,
-  float64  = TypeClass::float_type + 8u,
-
-  packed16            = TypeClass::packed_type + 2u,                                   // special type for FBGEMM, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint16) is meaningless.
-  packed8avx2         = TypeClass::packed_type + 1u + TypeClass::avx2_type,            // special type for FBGEMM with AVX2, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint8) is meaningless.
-  packed8avx512       = TypeClass::packed_type + 1u + TypeClass::avx512_type,          // special type for FBGEMM with AVX512, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint8) is meaningless.
-
-  intgemm8            = TypeClass::intgemm_type + 1u,                                  // Int8 quantized (not packed) matrices for intgemm
-  intgemm16           = TypeClass::intgemm_type + 2u,                                  // Int16 quantized (not packed) matrices for intgemm
-
-  intgemm8ssse3       = TypeClass::intgemm_type + 1u + TypeClass::ssse3_type,          // Int8 quantized and packed (ssse3) matrices for intgemm
-  intgemm8avx2        = TypeClass::intgemm_type + 1u + TypeClass::avx2_type,           // Int8 quantized and packed (avx2) matrices for intgemm
-  intgemm8avx512      = TypeClass::intgemm_type + 1u + TypeClass::avx512_type,         // Int8 quantized and packed (avx512) matrices for intgemm
-  intgemm8avx512vnni  = TypeClass::intgemm_type + 1u + TypeClass::avx512_type + 4096u, // Int8 quantized and packed (avx512) matrices for intgemm. VNNI algorithm
-
-  intgemm16sse2       = TypeClass::intgemm_type + 2u + TypeClass::sse2_type,           // Int16 quantized and packed (sse2) matrices for intgemm
-  intgemm16avx2       = TypeClass::intgemm_type + 2u + TypeClass::avx2_type,           // Int16 quantized and packed (avx2) matrices for intgemm
-  intgemm16avx512     = TypeClass::intgemm_type + 2u + TypeClass::avx512_type,         // Int16 quantized and packed (avx512) matrices for intgemm
+  int8     = TypeClass::signed_type + 1u,      ///< int8 type
+  int16    = TypeClass::signed_type + 2u,      ///< int16 type
+  int32    = TypeClass::signed_type + 4u,      ///< int32 type
+  int64    = TypeClass::signed_type + 8u,      ///< int64 type
+
+  uint8    = TypeClass::unsigned_type + 1u,    ///< uint8 type
+  uint16   = TypeClass::unsigned_type + 2u,    ///< uint16 type
+  uint32   = TypeClass::unsigned_type + 4u,    ///< uint32 type
+  uint64   = TypeClass::unsigned_type + 8u,    ///< uint64 type
+
+  float16  = TypeClass::float_type + 2u,       ///< float16 type
+  float32  = TypeClass::float_type + 4u,       ///< float32 type
+  float64  = TypeClass::float_type + 8u,       ///< float64 type
+
+  packed16            = TypeClass::packed_type + 2u,                                   ///< special type for FBGEMM, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint16) is meaningless.
+  packed8avx2         = TypeClass::packed_type + 1u + TypeClass::avx2_type,            ///< special type for FBGEMM with AVX2, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint8) is meaningless.
+  packed8avx512       = TypeClass::packed_type + 1u + TypeClass::avx512_type,          ///< special type for FBGEMM with AVX512, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint8) is meaningless.
+
+  intgemm8            = TypeClass::intgemm_type + 1u,                                  ///< Int8 quantized (not packed) matrices for intgemm
+  intgemm16           = TypeClass::intgemm_type + 2u,                                  ///< Int16 quantized (not packed) matrices for intgemm
+  
+  intgemm8ssse3       = TypeClass::intgemm_type + 1u + TypeClass::ssse3_type,          ///< Int8 quantized and packed (ssse3) matrices for intgemm
+  intgemm8avx2        = TypeClass::intgemm_type + 1u + TypeClass::avx2_type,           ///< Int8 quantized and packed (avx2) matrices for intgemm
+  intgemm8avx512      = TypeClass::intgemm_type + 1u + TypeClass::avx512_type,         ///< Int8 quantized and packed (avx512) matrices for intgemm
+  intgemm8avx512vnni  = TypeClass::intgemm_type + 1u + TypeClass::avx512_type + 4096u, ///< Int8 quantized and packed (avx512) matrices for intgemm. VNNI algorithm
+
+  intgemm16sse2       = TypeClass::intgemm_type + 2u + TypeClass::sse2_type,           ///< Int16 quantized and packed (sse2) matrices for intgemm
+  intgemm16avx2       = TypeClass::intgemm_type + 2u + TypeClass::avx2_type,           ///< Int16 quantized and packed (avx2) matrices for intgemm
+  intgemm16avx512     = TypeClass::intgemm_type + 2u + TypeClass::avx512_type,         ///< Int16 quantized and packed (avx512) matrices for intgemm
 };
 
 static inline size_t operator&(TypeClass typeClass, Type type) {

diff --git a/src/graph/expression_graph.cpp b/src/graph/expression_graph.cpp
@@ -30,7 +30,7 @@ Expr ExpressionGraph::add(Expr node) {
   } else {
     node->setId(count_++);
 
-    // record in foward graph
+    // record in forward graph
     nodesForward_.push_back(node);
 
     // record in backward graph if training, and keep track of roots
@@ -143,6 +143,11 @@ void ExpressionGraph::forward(std::list<Expr>& forwardTape, bool finalPass) {
     if(inferenceOnly_)
       v->children().clear();
 
+    // If checkpointing is disabled, keep the memory for forward signals for all nodes.
+    // If checkpointing is enabled:
+    //  (a) In the forward pass before the backward pass, free the memory for the nodes in the subtape to save memory.
+    //  (b) In the forward calls during the backward pass, keep the memory in the current subtape to accelerate
+    //      gradient computation.
     if(checkpointing_ && !finalPass) {
       auto subtape = v->getSubtape();
       if(subtape) {
@@ -171,12 +176,14 @@ void ExpressionGraph::backward(bool reset, float clipValue) {
     ABORT("Aborting");
   }
 
+  // allocates memory and initialises gradients for parameters
   for(auto kvParams : paramsByElementType_) {
     kvParams.second->allocateBackward();
     if(reset)
       kvParams.second->set_zero_adjoint();
   }
 
+  // for top nodes: allocates memory and initialise gradients to 1
   for(auto&& v : topNodes_)
     v->init_dependent();
 
@@ -186,13 +193,16 @@ void ExpressionGraph::backward(bool reset, float clipValue) {
 
   bool firstNaN = true;
   while(!nodesBackward_.empty()) {
-    auto v = nodesBackward_.back();
-    nodesBackward_.pop_back();
+    auto v = nodesBackward_.back();  // return the last element
+    nodesBackward_.pop_back();       // remove the last element
 
+    // for non-top nodes: allocates memory and initialises gradients to 0
     for(auto&& child : v->children())
       if(child->trainable() && child->type() != "param")
         child->set_zero_adjoint();
 
+    // if using gradient checkpointing,
+    // recompute the forward pass from checkpoint to the root
     if(checkpointing_ && v->getSubtape()) {
       forward(*v->getSubtape(), /*finalPass=*/true);
     }