From a3825de8979044ccdfb66c9021f25f3f4657f763 Mon Sep 17 00:00:00 2001
From: Jiaxiang Wu <jiaxiang.wu.90@gmail.com>
Date: Tue, 5 Apr 2016 16:28:20 +0800
Subject: [PATCH] implement the non-accelerated forward-passing process

---
 .gitignore      |  7 ++++++
 Makefile        |  2 +-
 Makefile.noblas | 35 +++++++++++++++++++++++++++++
 src/CaffeEva.cc | 60 ++++++++++++++++++++++++++++++-------------------
 4 files changed, 80 insertions(+), 24 deletions(-)
 create mode 100644 Makefile.noblas
diff --git a/.gitignore b/.gitignore
index feeb2e7..86b7f82 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,10 @@
 *.o
 QuanCNN
 
+# ignore files for the original AlexNet model
+/AlexNet/Bin.Files/*.convKnl.*
+/AlexNet/Bin.Files/*.fcntWei.*
+
+# ignore the binary file containing 1k images
+/ILSVRC12.227x227.IMG/dataMatTst.single.bin
+
diff --git a/Makefile b/Makefile
index 384b846..cf01b0e 100644
--- a/Makefile
+++ b/Makefile
@@ -11,7 +11,7 @@ OBJS=$(SRCS:$(SRC_DIR)/%.cc=$(OBJ_DIR)/%.o)
 CPPFLAGS=-I/usr/include/atlas -I/opt/OpenVML/include
 CFLAGS=-Wall -std=c++11 -O2
 LDFLAGS=-L/usr/lib/atlas-base -L/opt/OpenVML/lib
-LDLIBS=-lcblas -latlas -pthread -lopenvml
+LDLIBS=-lcblas -latlas -lopenvml
 DFLAGS=-D ENABLE_ATLAS -D ENABLE_OPENVML
 TARGET=$(BIN_DIR)/QuanCNN
 
diff --git a/Makefile.noblas b/Makefile.noblas
new file mode 100644
index 0000000..600456a
--- /dev/null
+++ b/Makefile.noblas
@@ -0,0 +1,35 @@
+# This Makefile requires OpenVML in the compilation
+
+CC=g++
+MKDIR=mkdir -p
+RM=rm -rf
+SRC_DIR=src
+OBJ_DIR=obj
+BIN_DIR=bin
+SRCS=$(wildcard $(SRC_DIR)/*.cc)
+OBJS=$(SRCS:$(SRC_DIR)/%.cc=$(OBJ_DIR)/%.o)
+CPPFLAGS=-I/opt/OpenVML/include
+CFLAGS=-Wall -std=c++11 -O2
+LDFLAGS=-L/opt/OpenVML/lib
+LDLIBS=-lopenvml
+DFLAGS=-D ENABLE_OPENVML
+TARGET=$(BIN_DIR)/QuanCNN
+
+.PHONY: all run clean
+
+all: $(BIN_DIR) $(OBJ_DIR) $(TARGET)
+
+$(BIN_DIR):
+	$(MKDIR) $(BIN_DIR)
+$(OBJ_DIR):
+	$(MKDIR) $(OBJ_DIR)
+$(TARGET): $(OBJS)
+	$(CC) $(LDFLAGS) $^ $(LDLIBS) -o $@
+$(OBJ_DIR)/%.o: $(SRC_DIR)/%.cc
+	$(CC) $(CPPFLAGS) $(CFLAGS) $(DFLAGS) -c $< -o $@
+
+run:
+	$(TARGET)
+
+clean:
+	$(RM) $(BIN_DIR) $(OBJ_DIR) $(OBJS) $(TARGET)
diff --git a/src/CaffeEva.cc b/src/CaffeEva.cc
index a54ff99..abda8df 100644
--- a/src/CaffeEva.cc
+++ b/src/CaffeEva.cc
@@ -448,19 +448,23 @@ void CaffeEva::PrepFeatBuf(void) {
     featBufStrLst.clear();
     switch (layerInfo.type) {
     case ENUM_LyrType::Conv:
-      // feature buffer #0: <featMapSrcRsp>
+      // feature buffer #0: <featMapSrcPrm>
+      InitFeatBuf(&featBufStr, ENUM_BufUsage::PrecComp,
+          dataCnt, imgChnCurr, imgHeiCurr, imgWidCurr);
+      featBufStrLst.push_back(featBufStr);
+      // feature buffer #1: <featMapSrcRsp>
       InitFeatBuf(&featBufStr, ENUM_BufUsage::PrecComp,
           imgChnCurr / grpCnt * knlSiz * knlSiz, imgHeiNext * imgWidNext);
       featBufStrLst.push_back(featBufStr);
-      // feature buffer #1: <featMapDstRsp>
+      // feature buffer #2: <featMapDstRsp>
       InitFeatBuf(&featBufStr, ENUM_BufUsage::PrecComp,
           knlCnt / grpCnt, imgHeiNext * imgWidNext);
       featBufStrLst.push_back(featBufStr);
-      // feature buffer #2: <featMapSrcPerGrp>
+      // feature buffer #3: <featMapSrcPerGrp>
       InitFeatBuf(&featBufStr, ENUM_BufUsage::AprxComp,
           dataCnt, imgHeiCurr, imgWidCurr, imgChnCurr / grpCnt);
       featBufStrLst.push_back(featBufStr);
-      // feature buffer #3: <inPdMat>
+      // feature buffer #4: <inPdMat>
       InitFeatBuf(&featBufStr, ENUM_BufUsage::AprxComp,
           dataCnt * imgHeiCurr * imgWidCurr, subSpaceCnt, ctrdCntPerSpace);
       featBufStrLst.push_back(featBufStr);
@@ -682,33 +686,42 @@ void CaffeEva::CalcFeatMap_Conv(const Matrix<float>& featMapSrc,
 
 void CaffeEva::CalcFeatMap_ConvPrec(const Matrix<float>& featMapSrc,
     const int layerInd, Matrix<float>* pFeatMapDst) {
-  // TODO(Jiaxiang Wu)
-  // implement this function for NxHxWxC inputs
-  // the code below is designed for NxCxHxW inputs
-  printf("[ERROR] CaffeEva::CalcFeatMap_ConvPrec() is not supported\n");
-  return;
-
   // obtain basic variables
   const LayerInfo& layerInfo = caffeParaObj.layerInfoLst[layerInd];
   const LayerPara& layerPara = caffeParaObj.layerParaLst[layerInd];
   int knlCnt = layerPara.convKnlLst.GetDimLen(0);
   int knlSiz = layerPara.convKnlLst.GetDimLen(2);
   int dataCnt = featMapSrc.GetDimLen(0);
-  int imgChnSrc = featMapSrc.GetDimLen(1);
-  int imgHeiDst = pFeatMapDst->GetDimLen(2);
-  int imgWidDst = pFeatMapDst->GetDimLen(3);
+  int imgHeiSrc = featMapSrc.GetDimLen(1);
+  int imgWidSrc = featMapSrc.GetDimLen(2);
+  int imgChnSrc = featMapSrc.GetDimLen(3);
+  int imgHeiDst = pFeatMapDst->GetDimLen(1);
+  int imgWidDst = pFeatMapDst->GetDimLen(2);
+  int imgChnDst = pFeatMapDst->GetDimLen(3);
   int knlCntPerGrp = knlCnt / layerInfo.grpCnt;
   int imgChnSrcPerGrp = imgChnSrc / layerInfo.grpCnt;
 
+  // obtain pre-allocated matrices for auxiliary variables
+  Matrix<float>& featMapSrcPrm = *(featBufStrMat[layerInd][0].pFeatBuf);
+  Matrix<float>& featMapSrcRsp = *(featBufStrMat[layerInd][1].pFeatBuf);
+  Matrix<float>& featMapDstRsp = *(featBufStrMat[layerInd][2].pFeatBuf);
+
+  // permute the input feature map dimensions
+  featMapSrcPrm.Resize(dataCnt, imgHeiSrc, imgWidSrc, imgChnSrc);
+  memcpy(featMapSrcPrm.GetDataPtr(),
+      featMapSrc.GetDataPtr(), sizeof(float) * featMapSrc.GetEleCnt());
+  featMapSrcPrm.Permute(0, 3, 1, 2);
+
+  // reshape the output feature map
+  pFeatMapDst->Resize(dataCnt, imgChnDst, imgHeiDst, imgWidDst);
+
   // compute the feature map after passing a convolutional layer
   const float* biasVec = layerPara.biasVec.GetDataPtr();
-  Matrix<float>& featMapSrcRsp = *(featBufStrMat[layerInd][0].pFeatBuf);
-  Matrix<float>& featMapDstRsp = *(featBufStrMat[layerInd][1].pFeatBuf);
   for (int dataInd = 0; dataInd < dataCnt; dataInd++) {
     for (int grpInd = 0; grpInd < layerInfo.grpCnt; grpInd++) {
       // copy source feature map to feature buffer
       CvtFeatMapToFeatBuf(
-          featMapSrc, dataInd, grpInd, layerInfo, &featMapSrcRsp);
+          featMapSrcPrm, dataInd, grpInd, layerInfo, &featMapSrcRsp);
 
       // call CBLAS function to compute the matrix-matrix multiplication
       int knlIndL = grpInd * knlCntPerGrp;
@@ -745,6 +758,9 @@ void CaffeEva::CalcFeatMap_ConvPrec(const Matrix<float>& featMapSrc,
           featMapDstRsp, dataInd, grpInd, layerInfo, pFeatMapDst);
     }  // ENDFOR: grpInd
   }  // ENDFOR: dataInd
+
+  // permute the output feature map dimensions
+  pFeatMapDst->Permute(0, 2, 3, 1);
 }
 
 void CaffeEva::CalcFeatMap_ConvAprx(const Matrix<float>& featMapSrc,
@@ -770,8 +786,8 @@ void CaffeEva::CalcFeatMap_ConvAprx(const Matrix<float>& featMapSrc,
   int imgChnSrcPerGrp = imgChnSrc / layerInfo.grpCnt;
 
   // obtain pre-allocated matrices for auxiliary variables
-  Matrix<float>& featMapSrcPerGrp = *(featBufStrMat[layerInd][2].pFeatBuf);
-  Matrix<float>& inPdMat = *(featBufStrMat[layerInd][3].pFeatBuf);
+  Matrix<float>& featMapSrcPerGrp = *(featBufStrMat[layerInd][3].pFeatBuf);
+  Matrix<float>& inPdMat = *(featBufStrMat[layerInd][4].pFeatBuf);
 
   // obtain pre-allocated centroid and assignment buffer
   Matrix<float>& ctrdBuf = *(ctrdBufStrLst[layerInd].pCtrdBuf);
@@ -924,10 +940,8 @@ void CaffeEva::CalcFeatMap_FCntPrec(const Matrix<float>& featMapSrc,
   // obtain basic variables
   const LayerPara& layerPara = caffeParaObj.layerParaLst[layerInd];
   int dataCnt = featMapSrc.GetDimLen(0);
-  int imgChnSrc = featMapSrc.GetDimLen(1);
-  int imgChnDst = pFeatMapDst->GetDimLen(1);
-  int imgHeiSrc = featMapSrc.GetDimLen(2);
-  int imgWidSrc = featMapSrc.GetDimLen(3);
+  int imgChnSrc = featMapSrc.GetDimStp(0);
+  int imgChnDst = pFeatMapDst->GetDimStp(0);
 
   // call CBLAS function to compute the matrix-matrix multiplication
   CBLAS_ORDER order = CblasRowMajor;
@@ -935,7 +949,7 @@ void CaffeEva::CalcFeatMap_FCntPrec(const Matrix<float>& featMapSrc,
   CBLAS_TRANSPOSE transB = CblasTrans;
   CBLAS_INT m = dataCnt;
   CBLAS_INT n = imgChnDst;
-  CBLAS_INT k = imgChnSrc * imgHeiSrc * imgWidSrc;
+  CBLAS_INT k = imgChnSrc;
   CBLAS_INT lda = k;
   CBLAS_INT ldb = k;
   CBLAS_INT ldc = n;