From a3825de8979044ccdfb66c9021f25f3f4657f763 Mon Sep 17 00:00:00 2001 From: Jiaxiang Wu Date: Tue, 5 Apr 2016 16:28:20 +0800 Subject: [PATCH] implement the non-accelerated forward-passing process --- .gitignore | 7 ++++++ Makefile | 2 +- Makefile.noblas | 35 +++++++++++++++++++++++++++++ src/CaffeEva.cc | 60 ++++++++++++++++++++++++++++++------------------- 4 files changed, 80 insertions(+), 24 deletions(-) create mode 100644 Makefile.noblas diff --git a/.gitignore b/.gitignore index feeb2e7..86b7f82 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,10 @@ *.o QuanCNN +# ignore files for the original AlexNet model +/AlexNet/Bin.Files/*.convKnl.* +/AlexNet/Bin.Files/*.fcntWei.* + +# ignore the binary file containing 1k images +/ILSVRC12.227x227.IMG/dataMatTst.single.bin + diff --git a/Makefile b/Makefile index 384b846..cf01b0e 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ OBJS=$(SRCS:$(SRC_DIR)/%.cc=$(OBJ_DIR)/%.o) CPPFLAGS=-I/usr/include/atlas -I/opt/OpenVML/include CFLAGS=-Wall -std=c++11 -O2 LDFLAGS=-L/usr/lib/atlas-base -L/opt/OpenVML/lib -LDLIBS=-lcblas -latlas -pthread -lopenvml +LDLIBS=-lcblas -latlas -lopenvml DFLAGS=-D ENABLE_ATLAS -D ENABLE_OPENVML TARGET=$(BIN_DIR)/QuanCNN diff --git a/Makefile.noblas b/Makefile.noblas new file mode 100644 index 0000000..600456a --- /dev/null +++ b/Makefile.noblas @@ -0,0 +1,35 @@ +# This Makefile requires OpenVML in the compilation + +CC=g++ +MKDIR=mkdir -p +RM=rm -rf +SRC_DIR=src +OBJ_DIR=obj +BIN_DIR=bin +SRCS=$(wildcard $(SRC_DIR)/*.cc) +OBJS=$(SRCS:$(SRC_DIR)/%.cc=$(OBJ_DIR)/%.o) +CPPFLAGS=-I/opt/OpenVML/include +CFLAGS=-Wall -std=c++11 -O2 +LDFLAGS=-L/opt/OpenVML/lib +LDLIBS=-lopenvml +DFLAGS=-D ENABLE_OPENVML +TARGET=$(BIN_DIR)/QuanCNN + +.PHONY: all run clean + +all: $(BIN_DIR) $(OBJ_DIR) $(TARGET) + +$(BIN_DIR): + $(MKDIR) $(BIN_DIR) +$(OBJ_DIR): + $(MKDIR) $(OBJ_DIR) +$(TARGET): $(OBJS) + $(CC) $(LDFLAGS) $^ $(LDLIBS) -o $@ +$(OBJ_DIR)/%.o: $(SRC_DIR)/%.cc + $(CC) $(CPPFLAGS) $(CFLAGS) $(DFLAGS) -c $< -o $@ + +run: + $(TARGET) + +clean: + $(RM) $(BIN_DIR) $(OBJ_DIR) $(OBJS) $(TARGET) diff --git a/src/CaffeEva.cc b/src/CaffeEva.cc index a54ff99..abda8df 100644 --- a/src/CaffeEva.cc +++ b/src/CaffeEva.cc @@ -448,19 +448,23 @@ void CaffeEva::PrepFeatBuf(void) { featBufStrLst.clear(); switch (layerInfo.type) { case ENUM_LyrType::Conv: - // feature buffer #0: + // feature buffer #0: + InitFeatBuf(&featBufStr, ENUM_BufUsage::PrecComp, + dataCnt, imgChnCurr, imgHeiCurr, imgWidCurr); + featBufStrLst.push_back(featBufStr); + // feature buffer #1: InitFeatBuf(&featBufStr, ENUM_BufUsage::PrecComp, imgChnCurr / grpCnt * knlSiz * knlSiz, imgHeiNext * imgWidNext); featBufStrLst.push_back(featBufStr); - // feature buffer #1: + // feature buffer #2: InitFeatBuf(&featBufStr, ENUM_BufUsage::PrecComp, knlCnt / grpCnt, imgHeiNext * imgWidNext); featBufStrLst.push_back(featBufStr); - // feature buffer #2: + // feature buffer #3: InitFeatBuf(&featBufStr, ENUM_BufUsage::AprxComp, dataCnt, imgHeiCurr, imgWidCurr, imgChnCurr / grpCnt); featBufStrLst.push_back(featBufStr); - // feature buffer #3: + // feature buffer #4: InitFeatBuf(&featBufStr, ENUM_BufUsage::AprxComp, dataCnt * imgHeiCurr * imgWidCurr, subSpaceCnt, ctrdCntPerSpace); featBufStrLst.push_back(featBufStr); @@ -682,33 +686,42 @@ void CaffeEva::CalcFeatMap_Conv(const Matrix& featMapSrc, void CaffeEva::CalcFeatMap_ConvPrec(const Matrix& featMapSrc, const int layerInd, Matrix* pFeatMapDst) { - // TODO(Jiaxiang Wu) - // implement this function for NxHxWxC inputs - // the code below is designed for NxCxHxW inputs - printf("[ERROR] CaffeEva::CalcFeatMap_ConvPrec() is not supported\n"); - return; - // obtain basic variables const LayerInfo& layerInfo = caffeParaObj.layerInfoLst[layerInd]; const LayerPara& layerPara = caffeParaObj.layerParaLst[layerInd]; int knlCnt = layerPara.convKnlLst.GetDimLen(0); int knlSiz = layerPara.convKnlLst.GetDimLen(2); int dataCnt = featMapSrc.GetDimLen(0); - int imgChnSrc = featMapSrc.GetDimLen(1); - int imgHeiDst = pFeatMapDst->GetDimLen(2); - int imgWidDst = pFeatMapDst->GetDimLen(3); + int imgHeiSrc = featMapSrc.GetDimLen(1); + int imgWidSrc = featMapSrc.GetDimLen(2); + int imgChnSrc = featMapSrc.GetDimLen(3); + int imgHeiDst = pFeatMapDst->GetDimLen(1); + int imgWidDst = pFeatMapDst->GetDimLen(2); + int imgChnDst = pFeatMapDst->GetDimLen(3); int knlCntPerGrp = knlCnt / layerInfo.grpCnt; int imgChnSrcPerGrp = imgChnSrc / layerInfo.grpCnt; + // obtain pre-allocated matrices for auxiliary variables + Matrix& featMapSrcPrm = *(featBufStrMat[layerInd][0].pFeatBuf); + Matrix& featMapSrcRsp = *(featBufStrMat[layerInd][1].pFeatBuf); + Matrix& featMapDstRsp = *(featBufStrMat[layerInd][2].pFeatBuf); + + // permute the input feature map dimensions + featMapSrcPrm.Resize(dataCnt, imgHeiSrc, imgWidSrc, imgChnSrc); + memcpy(featMapSrcPrm.GetDataPtr(), + featMapSrc.GetDataPtr(), sizeof(float) * featMapSrc.GetEleCnt()); + featMapSrcPrm.Permute(0, 3, 1, 2); + + // reshape the output feature map + pFeatMapDst->Resize(dataCnt, imgChnDst, imgHeiDst, imgWidDst); + // compute the feature map after passing a convolutional layer const float* biasVec = layerPara.biasVec.GetDataPtr(); - Matrix& featMapSrcRsp = *(featBufStrMat[layerInd][0].pFeatBuf); - Matrix& featMapDstRsp = *(featBufStrMat[layerInd][1].pFeatBuf); for (int dataInd = 0; dataInd < dataCnt; dataInd++) { for (int grpInd = 0; grpInd < layerInfo.grpCnt; grpInd++) { // copy source feature map to feature buffer CvtFeatMapToFeatBuf( - featMapSrc, dataInd, grpInd, layerInfo, &featMapSrcRsp); + featMapSrcPrm, dataInd, grpInd, layerInfo, &featMapSrcRsp); // call CBLAS function to compute the matrix-matrix multiplication int knlIndL = grpInd * knlCntPerGrp; @@ -745,6 +758,9 @@ void CaffeEva::CalcFeatMap_ConvPrec(const Matrix& featMapSrc, featMapDstRsp, dataInd, grpInd, layerInfo, pFeatMapDst); } // ENDFOR: grpInd } // ENDFOR: dataInd + + // permute the output feature map dimensions + pFeatMapDst->Permute(0, 2, 3, 1); } void CaffeEva::CalcFeatMap_ConvAprx(const Matrix& featMapSrc, @@ -770,8 +786,8 @@ void CaffeEva::CalcFeatMap_ConvAprx(const Matrix& featMapSrc, int imgChnSrcPerGrp = imgChnSrc / layerInfo.grpCnt; // obtain pre-allocated matrices for auxiliary variables - Matrix& featMapSrcPerGrp = *(featBufStrMat[layerInd][2].pFeatBuf); - Matrix& inPdMat = *(featBufStrMat[layerInd][3].pFeatBuf); + Matrix& featMapSrcPerGrp = *(featBufStrMat[layerInd][3].pFeatBuf); + Matrix& inPdMat = *(featBufStrMat[layerInd][4].pFeatBuf); // obtain pre-allocated centroid and assignment buffer Matrix& ctrdBuf = *(ctrdBufStrLst[layerInd].pCtrdBuf); @@ -924,10 +940,8 @@ void CaffeEva::CalcFeatMap_FCntPrec(const Matrix& featMapSrc, // obtain basic variables const LayerPara& layerPara = caffeParaObj.layerParaLst[layerInd]; int dataCnt = featMapSrc.GetDimLen(0); - int imgChnSrc = featMapSrc.GetDimLen(1); - int imgChnDst = pFeatMapDst->GetDimLen(1); - int imgHeiSrc = featMapSrc.GetDimLen(2); - int imgWidSrc = featMapSrc.GetDimLen(3); + int imgChnSrc = featMapSrc.GetDimStp(0); + int imgChnDst = pFeatMapDst->GetDimStp(0); // call CBLAS function to compute the matrix-matrix multiplication CBLAS_ORDER order = CblasRowMajor; @@ -935,7 +949,7 @@ void CaffeEva::CalcFeatMap_FCntPrec(const Matrix& featMapSrc, CBLAS_TRANSPOSE transB = CblasTrans; CBLAS_INT m = dataCnt; CBLAS_INT n = imgChnDst; - CBLAS_INT k = imgChnSrc * imgHeiSrc * imgWidSrc; + CBLAS_INT k = imgChnSrc; CBLAS_INT lda = k; CBLAS_INT ldb = k; CBLAS_INT ldc = n;