diff --git a/.gitignore b/.gitignore index 65ba217de37c8..ee8489c1d71bd 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,6 @@ build/ .vscode .idea .project +.cproject .pydevproject +Makefile diff --git a/paddle/gserver/layers/ConvBaseLayer.cpp b/paddle/gserver/layers/ConvBaseLayer.cpp index 42ff0b70d86f7..6bc3b3b801796 100644 --- a/paddle/gserver/layers/ConvBaseLayer.cpp +++ b/paddle/gserver/layers/ConvBaseLayer.cpp @@ -14,12 +14,15 @@ limitations under the License. */ #include "paddle/utils/Logging.h" #include "ConvBaseLayer.h" +#include "paddle/math/MathUtils.h" namespace paddle { bool ConvBaseLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { /* Initialize the basic parent class */ Layer::init(layerMap, parameterMap); + isDeconv_ = (config_.type() == "exconv" || config_.type() == "cudnn_conv") + ? false : true; /* Initialize the convolutional layer parameter */ numFilters_ = config_.num_filters(); @@ -42,8 +45,20 @@ bool ConvBaseLayer::init(const LayerMap& layerMap, outputW_.push_back(conf.output_x()); } + CHECK(inputLayers_.size() == parameters_.size()); + for (size_t i = 0; i < inputLayers_.size(); i++) { + size_t height, width; + height = filterPixels_[i] * filterChannels_[i]; + width = (!isDeconv_) ? numFilters_ : channels_[i]; + + // create a new weight + CHECK_EQ(parameters_[i]->getSize(), width * height); + Weight* w = new Weight(height, width, parameters_[i]); + weights_.emplace_back(w); + } + /* initialize the biases_ */ - if (biasParameter_.get() != NULL) { + if (biasParameter_.get()) { if (sharedBiases_) { CHECK_EQ((size_t)numFilters_, biasParameter_->getSize()); biases_ = @@ -70,23 +85,48 @@ size_t ConvBaseLayer::calOutputSize() { clearAndReserve(&outputH_); clearAndReserve(&outputW_); size_t layerSize = 0; - for (size_t i = 0; i < inputLayers_.size(); i++) { - imgSizeH_.push_back(inputLayers_[i]->getOutput().getFrameHeight()); - imgSizeW_.push_back(inputLayers_[i]->getOutput().getFrameWidth()); - if (imgSizeH_[i] == 0) - imgSizeH_[i] = config_.inputs(i).conv_conf().img_size(); - if (imgSizeW_[i] == 0) - imgSizeW_[i] = config_.inputs(i).conv_conf().img_size(); - outputH_.push_back(outputSize(imgSizeH_[i], filterSizeY_[i], paddingY_[i], - strideY_[i], caffeMode_)); - outputW_.push_back(outputSize(imgSizeW_[i], filterSize_[i], padding_[i], - stride_[i], caffeMode_)); - CHECK_EQ(outputH_[i], outputH_[0]); - CHECK_EQ(outputW_[i], outputW_[0]); + + auto setLayerSize = [&](IntV& inH, IntV& inW, IntV& outH, IntV& outW) { + for (size_t i = 0; i < inputLayers_.size(); i++) { + inH.push_back(inputLayers_[i]->getOutput().getFrameHeight()); + inW.push_back(inputLayers_[i]->getOutput().getFrameWidth()); + if (isDeconv_) { + if (inH[i] == 0) + inH[i] = config_.inputs(i).conv_conf().output_x(); + if (inW[i] == 0) + inW[i] = config_.inputs(i).conv_conf().output_x(); + outH.push_back( + imageSize(inH[i], filterSizeY_[i], paddingY_[i], strideY_[i], + caffeMode_)); + outW.push_back( + imageSize(inW[i], filterSize_[i], padding_[i], stride_[i], + caffeMode_)); + } else { + if (inH[i] == 0) + inH[i] = config_.inputs(i).conv_conf().img_size(); + if (inW[i] == 0) + inW[i] = config_.inputs(i).conv_conf().img_size(); + outH.push_back( + outputSize(inH[i], filterSizeY_[i], paddingY_[i], strideY_[i], + caffeMode_)); + outW.push_back( + outputSize(inW[i], filterSize_[i], padding_[i], stride_[i], + caffeMode_)); + } + CHECK_EQ(outH[i], outH[0]); + CHECK_EQ(outW[i], outW[0]); + } + getOutput().setFrameHeight(outH[0]); + getOutput().setFrameWidth(outW[0]); + layerSize = outH[0] * outW[0] * size_t(numFilters_); + }; + + if (isDeconv_) { + setLayerSize(outputH_, outputW_, imgSizeH_, imgSizeW_); + } else { + setLayerSize(imgSizeH_, imgSizeW_, outputH_, outputW_); } - getOutput().setFrameHeight(outputH_[0]); - getOutput().setFrameWidth(outputW_[0]); - layerSize = outputH_[0] * outputW_[0] * size_t(numFilters_); + return layerSize; } diff --git a/paddle/gserver/layers/ConvBaseLayer.h b/paddle/gserver/layers/ConvBaseLayer.h index e660a6d6f50ac..b80cab899585e 100644 --- a/paddle/gserver/layers/ConvBaseLayer.h +++ b/paddle/gserver/layers/ConvBaseLayer.h @@ -28,6 +28,9 @@ class ConvBaseLayer : public Layer { protected: typedef std::vector IntV; + /// True if it's deconv layer, false if it's convolution layer + bool isDeconv_; + /// The number of filters. int numFilters_; /// The x dimension of the padding. diff --git a/paddle/gserver/layers/ExpandConvBaseLayer.cpp b/paddle/gserver/layers/ExpandConvBaseLayer.cpp new file mode 100644 index 0000000000000..0bab0ca764f4f --- /dev/null +++ b/paddle/gserver/layers/ExpandConvBaseLayer.cpp @@ -0,0 +1,263 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + + +#include "ExpandConvBaseLayer.h" + +#include "paddle/utils/Logging.h" +namespace paddle { + +bool ExpandConvBaseLayer::init(const LayerMap &layerMap, + const ParameterMap ¶meterMap) { + /* Initialize the basic convolutional parent class */ + ConvBaseLayer::init(layerMap, parameterMap); + + /* The class fields channels_ and numFilters_ are the same as in the config + * i.e., channels_ is the for the input and numFilters_ is for the output + * + * But in order for the variables in convTrans having the same semantic + * meaning as in conv, we need to swap channels_ and numFilters here for + * convTrans, and in other functions too. + * */ + int channel; + int numFilters; + /* Initialize the projection */ + for (auto &inputConfig : config_.inputs()) { + const ConvConfig &conf = inputConfig.conv_conf(); + numFilters = isDeconv_ ? conf.channels() : numFilters_; + subM_.push_back(numFilters / conf.groups()); + subN_.push_back(conf.output_x() * conf.output_x()); + channel = isDeconv_ ? numFilters_ : conf.channels(); + subK_.push_back(channel * conf.filter_size() * conf.filter_size() / + conf.groups()); + /* Consistent caffe mode for multiple input */ + caffeMode_ = conf.caffe_mode(); + } + + getOutputSize(); + + return true; +} + +size_t ExpandConvBaseLayer::getOutputSize() { + CHECK_NE(inputLayers_.size(), 0UL); + size_t layerSize = ConvBaseLayer::calOutputSize(); + subN_.clear(); + for (size_t i = 0; i < inputLayers_.size(); i++) { + subN_.push_back(outputH_[i] * outputW_[i]); + } + return layerSize; +} + +void ExpandConvBaseLayer::resetExpandInput(size_t height, size_t width) { + Matrix::resizeOrCreate(expandInput_, height, width, false, useGpu_); +} + +void ExpandConvBaseLayer::addSharedBias() { + size_t mapW = getOutputSize() / numFilters_; + size_t mapH = getOutputValue()->getElementCnt() / mapW; + MatrixPtr out = + Matrix::create(getOutputValue()->getData(), mapH, mapW, false, useGpu_); + + Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_); + + out->transpose(transOutValue_, false); // false means no memory allocation + transOutValue_->reshape(transOutValue_->getElementCnt() / numFilters_, + numFilters_); + + MatrixPtr bias = + Matrix::create(biases_->getW()->getData(), 1, + biases_->getW()->getElementCnt(), false, useGpu_); + transOutValue_->addBias(*bias, 1.0f); + + transOutValue_->reshape(mapW, mapH); + transOutValue_->transpose(out, false); // false means no memory allocation + + out->clear(); + bias->clear(); +} + +void ExpandConvBaseLayer::addUnsharedBias() { + MatrixPtr outValue = getOutputValue(); + MatrixPtr bias = + Matrix::create(biases_->getW()->getData(), 1, + biases_->getW()->getElementCnt(), false, useGpu_); + outValue->addBias(*bias, 1.0f); +} + + +void ExpandConvBaseLayer::expandOneFrame(MatrixPtr image, size_t startIdx, + int inIdx) { + int channel = isDeconv_ ? numFilters_ : channels_[inIdx]; + + resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]); + real *imgData = image->getData() + startIdx * image->getWidth(); + MatrixPtr imageTmp = Matrix::create( + imgData, 1, imgSizeH_[inIdx] * imgSizeW_[inIdx] * channel, false, + useGpu_); + expandInput_->convExpand(*imageTmp, imgSizeH_[inIdx], imgSizeW_[inIdx], + channel, filterSize_[inIdx], + filterSize_[inIdx], stride_[inIdx], stride_[inIdx], + padding_[inIdx], padding_[inIdx], + outputH_[inIdx], outputW_[inIdx]); + imageTmp->clear(); +} + +void ExpandConvBaseLayer::expandFwdOnce(MatrixPtr image, MatrixPtr out, + int inIdx, int startIdx) { + int subM = subM_[inIdx]; + int subN = subN_[inIdx]; + int subK = subK_[inIdx]; + + expandOneFrame(image, startIdx, inIdx); + + int numFilters = isDeconv_ ? channels_[inIdx] : numFilters_; + + real *outData = + out->getData() + startIdx * subN * numFilters; + + real *wgtData = weights_[inIdx]->getW()->getData(); + real *expInData = expandInput_->getData(); + for (int g = 0; g < groups_[inIdx]; ++g) { + MatrixPtr A = + Matrix::create(wgtData, subK, subM, true, useGpu_); // mark transpose + MatrixPtr B = Matrix::create(expInData, subK, subN, false, useGpu_); + MatrixPtr C = Matrix::create(outData, subM, subN, false, useGpu_); + C->mul(A, B, 1, 1); + + A->clear(); + B->clear(); + C->clear(); + wgtData += subK * subM; + expInData += subK * subN; + outData += subM * subN; + } +} + +void ExpandConvBaseLayer::bpropActs(MatrixPtr out, MatrixPtr image, + int inpIdx) { + int channel = isDeconv_ ? numFilters_ : channels_[inpIdx]; + + int subM = subM_[inpIdx]; + int subN = subN_[inpIdx]; + int subK = subK_[inpIdx]; + size_t batchSize = image->getHeight(); + + /* reset the expand-grad memory */ + resetExpandInput(subK * groups_[inpIdx], subN); + + real *localGradData = out->getData(); + real *tgtGradData = image->getData(); + for (size_t n = 0; n < batchSize; n++) { + real *wgtData = weights_[inpIdx]->getW()->getData(); + real *expandInData = expandInput_->getData(); + + for (int g = 0; g < groups_[inpIdx]; g++) { + // create temporary matrix + MatrixPtr C = Matrix::create(expandInData, subK, subN, false, useGpu_); + MatrixPtr B = Matrix::create(localGradData, subM, subN, false, useGpu_); + MatrixPtr A = Matrix::create(wgtData, subK, subM, false, useGpu_); + C->mul(A, B); // mul + + // clear the temporary matrix + A->clear(); + B->clear(); + C->clear(); + + expandInData += subK * subN; + localGradData += subM * subN; + wgtData += subK * subM; + } + + // shrink one frame outGrad + MatrixPtr oneGradTmp = Matrix::create( + expandInput_->getData(), subK * groups_[inpIdx], subN, false, useGpu_); + MatrixPtr vTmp = Matrix::create( + tgtGradData, 1, + imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel, false, + useGpu_); + vTmp->convShrink(*oneGradTmp, imgSizeH_[inpIdx], imgSizeW_[inpIdx], + channel, filterSize_[inpIdx], + filterSize_[inpIdx], stride_[inpIdx], stride_[inpIdx], + padding_[inpIdx], padding_[inpIdx], + outputH_[inpIdx], outputW_[inpIdx], 1.0f, 1.0f); + vTmp->clear(); + oneGradTmp->clear(); + + // move the data-pointer + tgtGradData += imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel; + } +} + +void ExpandConvBaseLayer::bpropWeights(MatrixPtr image, MatrixPtr out, + int inpIdx) { + MatrixPtr weightGrad = weights_[inpIdx]->getWGrad(); + + int subM = subM_[inpIdx]; + int subN = subN_[inpIdx]; + int subK = subK_[inpIdx]; + size_t batchSize = image->getHeight(); + resetExpandInput(subK * groups_[inpIdx], subN); + + real *gradData = out->getData(); + + for (size_t n = 0; n < batchSize; n++) { // frame by frame + // expand + expandOneFrame(image, n, inpIdx); + real *wGradData = weightGrad->getData(); + real *expandInData = expandInput_->getData(); + + // expand-mul one-group by one + for (int g = 0; g < groups_[inpIdx]; g++) { + MatrixPtr A = Matrix::create(expandInData, subK, subN, false, useGpu_); + MatrixPtr B = Matrix::create(gradData, subM, subN, true, useGpu_); + MatrixPtr C = Matrix::create(wGradData, subK, subM, false, useGpu_); + C->mul(A, B, 1, 1); + + A->clear(); + B->clear(); + C->clear(); + gradData += subM * subN; + wGradData += subK * subM; + expandInData += subK * subN; + } + } +} + +void ExpandConvBaseLayer::bpropSharedBias(MatrixPtr biases, MatrixPtr v) { + size_t mapW = getOutputSize() / numFilters_; + size_t mapH = v->getElementCnt() / mapW; + MatrixPtr vTmp = Matrix::create(v->getData(), mapH, mapW, false, useGpu_); + + Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_); + + vTmp->transpose(transOutValue_, false); // false means no memory allocation + transOutValue_->reshape(transOutValue_->getElementCnt() / numFilters_, + numFilters_); + biases->collectBias(*transOutValue_, 1.0f); +} + +void ExpandConvBaseLayer::bpropBiases(MatrixPtr v) { + MatrixPtr biases = + Matrix::create(biases_->getWGrad()->getData(), 1, + biases_->getWGrad()->getElementCnt(), false, useGpu_); + if (sharedBiases_) { + bpropSharedBias(biases, v); + } else { + biases->collectBias(*v, 1.0f); + } + biases->clear(); +} + +} // namespace paddle diff --git a/paddle/gserver/layers/ExpandConvBaseLayer.h b/paddle/gserver/layers/ExpandConvBaseLayer.h new file mode 100644 index 0000000000000..9858fa348c3fc --- /dev/null +++ b/paddle/gserver/layers/ExpandConvBaseLayer.h @@ -0,0 +1,85 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + + +#pragma once + +#include "ConvBaseLayer.h" +#include "paddle/math/Matrix.h" +#include + +namespace paddle { + +/** + * @brief A subclass of ConvBaseLayer that is a superclass of both + * ExpandConvLayer and ExpandConvTransLayer + */ +class ExpandConvBaseLayer : public ConvBaseLayer { +protected: + /// For expand convolution. + /// subM_ = numFilters_ / groups_. + IntV subM_; + /// subN_ = outputH_ * outputW_. + IntV subN_; + /// subK_ = channels_ * filterPixels_ * groups_. + IntV subK_; + + /*The expandInput_ and transOutValue_ are used for CPU expand conv calc + * Expand one sample at a time. shape: + * (numChannels * filterPixels_, outputSizeH * outputSizeW) + * */ + MatrixPtr expandInput_; + /// The transpose of output, which is an auxiliary matrix. + MatrixPtr transOutValue_; + +public: + explicit ExpandConvBaseLayer(const LayerConfig& config) + : ConvBaseLayer(config) {} + + ~ExpandConvBaseLayer() {} + + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + + size_t getOutputSize(); + /** + * Create or resize expandInput_. + */ + void resetExpandInput(size_t height, size_t width); + + /** + * Add shared bias. + */ + void addSharedBias(); + + /** + * Add unshared bias. + */ + void addUnsharedBias(); + /** + * Expand one input sample. + */ + void expandOneFrame(MatrixPtr image, size_t startIdx, int inIdx); + + /** + * Expand one input sample and perform matrix multiplication. + */ + void expandFwdOnce(MatrixPtr image, MatrixPtr out, int inIdx, int startIdx); + + void bpropSharedBias(MatrixPtr biases, MatrixPtr v); + void bpropBiases(MatrixPtr v); + void bpropWeights(MatrixPtr image, MatrixPtr out, int inpIdx); + void bpropActs(MatrixPtr image, MatrixPtr out, int inpIdx); +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/ExpandConvLayer.cpp b/paddle/gserver/layers/ExpandConvLayer.cpp index 80a6a62b5c0de..5ea1fdece5f7b 100644 --- a/paddle/gserver/layers/ExpandConvLayer.cpp +++ b/paddle/gserver/layers/ExpandConvLayer.cpp @@ -24,150 +24,29 @@ REGISTER_LAYER(exconv, ExpandConvLayer); bool ExpandConvLayer::init(const LayerMap &layerMap, const ParameterMap ¶meterMap) { /* Initialize the basic convolutional parent class */ - ConvBaseLayer::init(layerMap, parameterMap); - - /* Initialize the projection */ - for (auto &inputConfig : config_.inputs()) { - const ConvConfig &conf = inputConfig.conv_conf(); - subM_.push_back(numFilters_ / conf.groups()); - subN_.push_back(conf.output_x() * conf.output_x()); - subK_.push_back(conf.channels() * conf.filter_size() * conf.filter_size() / - conf.groups()); - /* Consistent caffe mode for multiple input */ - caffeMode_ = conf.caffe_mode(); - } - - /* initialize the weightList */ - CHECK(inputLayers_.size() == parameters_.size()); - for (size_t i = 0; i < inputLayers_.size(); i++) { - size_t height, width; - height = filterPixels_[i] * filterChannels_[i]; - width = numFilters_; - - // create a new weight - CHECK_EQ(parameters_[i]->getSize(), width * height); - Weight* w = new Weight(height, width, parameters_[i]); - weights_.emplace_back(w); - } - + ExpandConvBaseLayer::init(layerMap, parameterMap); return true; } -size_t ExpandConvLayer::getOutputSize() { - CHECK_NE(inputLayers_.size(), 0UL); - size_t layerSize = ConvBaseLayer::calOutputSize(); - subN_.clear(); - for (size_t i = 0; i < inputLayers_.size(); i++) { - subN_.push_back(outputH_[i] * outputW_[i]); - } - return layerSize; -} - -void ExpandConvLayer::resetExpandInput(size_t height, size_t width) { - Matrix::resizeOrCreate(expandInput_, height, width, false, useGpu_); -} - -void ExpandConvLayer::resetConvOutput(size_t batchSize, int inIdx) { - Matrix::resizeOrCreate(transOutValue_, batchSize * numFilters_, subN_[inIdx], - false, useGpu_); -} - -void ExpandConvLayer::expandOneFrame(MatrixPtr image, size_t startIdx, - int inIdx) { - resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]); - real *imgData = image->getData() + startIdx * image->getWidth(); - MatrixPtr imageTmp = Matrix::create( - imgData, 1, imgSizeH_[inIdx] * imgSizeW_[inIdx] * channels_[inIdx], false, - useGpu_); - expandInput_->convExpand(*imageTmp, imgSizeH_[inIdx], imgSizeW_[inIdx], - channels_[inIdx], filterSize_[inIdx], - filterSize_[inIdx], stride_[inIdx], stride_[inIdx], - padding_[inIdx], padding_[inIdx], - outputH_[inIdx], outputW_[inIdx]); - imageTmp->clear(); -} - -void ExpandConvLayer::expandFwdOnce(MatrixPtr image, int inIdx, int startIdx) { - int subM = subM_[inIdx]; - int subN = subN_[inIdx]; - int subK = subK_[inIdx]; - - expandOneFrame(image, startIdx, inIdx); - - real *outData = - getOutputValue()->getData() + startIdx * subN * numFilters_; - - real *wgtData = weights_[inIdx]->getW()->getData(); - real *expInData = expandInput_->getData(); - for (int g = 0; g < groups_[inIdx]; ++g) { - MatrixPtr A = - Matrix::create(wgtData, subK, subM, true, useGpu_); // mark transpose - MatrixPtr B = Matrix::create(expInData, subK, subN, false, useGpu_); - MatrixPtr C = Matrix::create(outData, subM, subN, false, useGpu_); - C->mul(A, B, 1, 1); - - A->clear(); - B->clear(); - C->clear(); - wgtData += subK * subM; - expInData += subK * subN; - outData += subM * subN; - } -} - -void ExpandConvLayer::addSharedBias() { - size_t mapW = getOutputValue()->getWidth() / numFilters_; - size_t mapH = getOutputValue()->getElementCnt() / mapW; - MatrixPtr out = - Matrix::create(getOutputValue()->getData(), mapH, mapW, false, useGpu_); - - Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_); - - out->transpose(transOutValue_, false); // false means no memory allocation - transOutValue_->reshape(transOutValue_->getElementCnt() / numFilters_, - numFilters_); - - MatrixPtr bias = - Matrix::create(biases_->getW()->getData(), 1, - biases_->getW()->getElementCnt(), false, useGpu_); - transOutValue_->addBias(*bias, 1.0f); - - transOutValue_->reshape(mapW, mapH); - transOutValue_->transpose(out, false); // false means no memory allocation - - out->clear(); - bias->clear(); -} - -void ExpandConvLayer::addUnsharedBias() { - MatrixPtr outValue = getOutputValue(); - MatrixPtr bias = - Matrix::create(biases_->getW()->getData(), 1, - biases_->getW()->getElementCnt(), false, useGpu_); - outValue->addBias(*bias, 1.0f); -} - void ExpandConvLayer::forward(PassType passType) { Layer::forward(passType); /* malloc memory for the output_ if necessary */ - /* note: one sample correspond to one colum, and the - * transOutValue correspond sample to one row */ - int batchSize = inputLayers_[0]->getOutputValue()->getWidth(); - batchSize = inputLayers_[0]->getOutputValue()->getHeight(); + int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); resetOutput(batchSize, getOutputSize()); MatrixPtr image = nullptr; - for (size_t i = 0; i != inputLayers_.size(); ++i) { + MatrixPtr outV = getOutputValue(); + for (size_t i = 0; i < inputLayers_.size(); ++i) { LayerPtr prevLayer = getPrev(i); image = prevLayer->getOutputValue(); for (size_t off = 0; off < image->getHeight(); off++) { REGISTER_TIMER_INFO("expandFwdOnce", getName().c_str()); - expandFwdOnce(image, i, off); + expandFwdOnce(image, outV, i, off); } } /* add the bias-vector */ - if (biases_.get() != NULL) { + if (biases_.get()) { if (sharedBiases_) { addSharedBias(); } else { @@ -179,29 +58,6 @@ void ExpandConvLayer::forward(PassType passType) { forwardActivation(); } -void ExpandConvLayer::bpropSharedBias(MatrixPtr biases, MatrixPtr v) { - size_t mapW = v->getWidth() / numFilters_; - size_t mapH = v->getElementCnt() / mapW; - MatrixPtr vTmp = Matrix::create(v->getData(), mapH, mapW, false, useGpu_); - - Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_); - - vTmp->transpose(transOutValue_, false); // false means no memory allocation - vTmp->reshape(transOutValue_->getElementCnt() / numFilters_, numFilters_); - biases->collectBias(*vTmp, 1.0f); -} - -void ExpandConvLayer::bpropBiases(MatrixPtr v) { - MatrixPtr biases = - Matrix::create(biases_->getWGrad()->getData(), 1, - biases_->getWGrad()->getElementCnt(), false, useGpu_); - if (sharedBiases_) { - bpropSharedBias(biases, v); - } else { - biases->collectBias(*v, 1.0f); - } - biases->clear(); -} void ExpandConvLayer::backward(const UpdateCallback &callback) { backwardActivation(); @@ -213,111 +69,18 @@ void ExpandConvLayer::backward(const UpdateCallback &callback) { biases_->getParameterPtr()->incUpdate(callback); } - for (size_t i = 0; i != inputLayers_.size(); ++i) { + for (size_t i = 0; i < inputLayers_.size(); ++i) { /* First, calculate the input layers error */ - bpropActs(outGrad, i); + if (getPrev(i)->getOutputGrad()) { + bpropActs(outGrad, getPrev(i)->getOutputGrad(), i); + } if (weights_[i]->getWGrad()) { /* Then, calculate the W-gradient for the current layer */ - bpropWeights(outGrad, i); + bpropWeights(getPrev(i)->getOutputValue(), outGrad, i); /* Increasing the number of gradient */ weights_[i]->getParameterPtr()->incUpdate(callback); } } } -void ExpandConvLayer::bpropWeights(MatrixPtr v, int inpIdx) { - MatrixPtr weightGrad = weights_[inpIdx]->getWGrad(); - MatrixPtr inputV = getPrev(inpIdx)->getOutputValue(); - - int subM = subM_[inpIdx]; - int subN = subN_[inpIdx]; - int subK = subK_[inpIdx]; - size_t batchSize = inputV->getHeight(); - resetExpandInput(subK * groups_[inpIdx], subN); - resetConvOutput(batchSize, inpIdx); - - real *gradData = v->getData(); - - for (size_t n = 0; n < batchSize; n++) { // frame by frame - // expand - expandOneFrame(inputV, n, inpIdx); - real *wGradData = weightGrad->getData(); - real *expandInData = expandInput_->getData(); - - // expand-mul one-group by one - for (int g = 0; g < groups_[inpIdx]; g++) { - MatrixPtr A = Matrix::create(expandInData, subK, subN, false, useGpu_); - MatrixPtr B = Matrix::create(gradData, subM, subN, true, useGpu_); - MatrixPtr C = Matrix::create(wGradData, subK, subM, false, useGpu_); - C->mul(A, B, 1, 1); - - A->clear(); - B->clear(); - C->clear(); - gradData += subM * subN; - wGradData += subK * subM; - expandInData += subK * subN; - } - } -} - -void ExpandConvLayer::bpropActs(MatrixPtr v, int inpIdx) { - LayerPtr prevLayer = getPrev(inpIdx); - if (NULL == prevLayer->getOutputGrad()) { - return; - } - - int subM = subM_[inpIdx]; - int subN = subN_[inpIdx]; - int subK = subK_[inpIdx]; - size_t batchSize = v->getHeight(); - MatrixPtr tgtGrad = prevLayer->getOutputGrad(); - - /* reset the expand-grad memory */ - resetExpandInput(subK * groups_[inpIdx], subN); - resetConvOutput(batchSize, inpIdx); - - real *localGradData = v->getData(); - real *tgtGradData = tgtGrad->getData(); - for (size_t n = 0; n < batchSize; n++) { - real *wgtData = weights_[inpIdx]->getW()->getData(); - real *expandInData = expandInput_->getData(); - - for (int g = 0; g < groups_[inpIdx]; g++) { - // create temporary matrix - MatrixPtr C = Matrix::create(expandInData, subK, subN, false, useGpu_); - MatrixPtr B = Matrix::create(localGradData, subM, subN, false, useGpu_); - MatrixPtr A = Matrix::create(wgtData, subK, subM, false, useGpu_); - C->mul(A, B); // mul - - // clear the temporary matrix - A->clear(); - B->clear(); - C->clear(); - - expandInData += subK * subN; - localGradData += subM * subN; - wgtData += subK * subM; - } - - // shrink one frame outGrad - MatrixPtr oneGradTmp = Matrix::create( - expandInput_->getData(), subK * groups_[inpIdx], subN, false, useGpu_); - MatrixPtr vTmp = Matrix::create( - tgtGradData, 1, - imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channels_[inpIdx], false, - useGpu_); - vTmp->convShrink(*oneGradTmp, imgSizeH_[inpIdx], imgSizeW_[inpIdx], - channels_[inpIdx], filterSize_[inpIdx], - filterSize_[inpIdx], stride_[inpIdx], stride_[inpIdx], - padding_[inpIdx], padding_[inpIdx], - outputH_[inpIdx], outputW_[inpIdx], 1.0f, 1.0f); - vTmp->clear(); - oneGradTmp->clear(); - - // move the data-pointer - tgtGradData += imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channels_[inpIdx]; - } -} - } // namespace paddle diff --git a/paddle/gserver/layers/ExpandConvLayer.h b/paddle/gserver/layers/ExpandConvLayer.h index 030a3ba397ff4..c07188a406183 100644 --- a/paddle/gserver/layers/ExpandConvLayer.h +++ b/paddle/gserver/layers/ExpandConvLayer.h @@ -15,9 +15,9 @@ limitations under the License. */ #pragma once -#include "ConvBaseLayer.h" #include "paddle/math/Matrix.h" #include +#include "ExpandConvBaseLayer.h" namespace paddle { @@ -28,65 +28,18 @@ namespace paddle { * * The config file api is img_conv_layer. */ -class ExpandConvLayer : public ConvBaseLayer { -protected: - /// For expand convolution. - /// subM_ = numFilters_ / groups_. - IntV subM_; - /// subN_ = outputH_ * outputW_. - IntV subN_; - /// subK_ = channels_ * filterPixels_ * groups_. - IntV subK_; - /// Expand one sample at a time. shape: - /// (numChannels * filterPixels_, outputSizeH * outputSizeW) - MatrixPtr expandInput_; - /// The transpose of output, which is an auxiliary matrix. - MatrixPtr transOutValue_; +class ExpandConvLayer : public ExpandConvBaseLayer { public: - explicit ExpandConvLayer(const LayerConfig& config) : ConvBaseLayer(config) {} + explicit ExpandConvLayer(const LayerConfig& config) : + ExpandConvBaseLayer(config) {} ~ExpandConvLayer() {} bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - size_t getOutputSize(); - - /** - * Create or resize expandInput_. - */ - void resetExpandInput(size_t height, size_t width); - - /** - * Create or resize transOutValue_. - */ - void resetConvOutput(size_t batchSize, int inIdx); - - /** - * Expand one input sample. - */ - void expandOneFrame(MatrixPtr image, size_t startIdx, int inIdx); - - /** - * Expand one input sample and perform matrix multiplication. - */ - void expandFwdOnce(MatrixPtr image, int inIdx, int startIdx); - - /** - * Add shared bias. - */ - void addSharedBias(); - - /** - * Add unshared bias. - */ - void addUnsharedBias(); void forward(PassType passType); - void bpropSharedBias(MatrixPtr biases, MatrixPtr v); - void bpropBiases(MatrixPtr v); void backward(const UpdateCallback& callback); - void bpropWeights(MatrixPtr v, int inpIdx); - void bpropActs(MatrixPtr v, int inpIdx); }; } // namespace paddle diff --git a/paddle/gserver/layers/ExpandConvTransLayer.cpp b/paddle/gserver/layers/ExpandConvTransLayer.cpp new file mode 100644 index 0000000000000..a3e160f1f4eb5 --- /dev/null +++ b/paddle/gserver/layers/ExpandConvTransLayer.cpp @@ -0,0 +1,92 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + + +#include "paddle/utils/Logging.h" +#include "paddle/utils/Stat.h" +#include "ExpandConvTransLayer.h" + +/* The implementation of the convTransLayer is basically a swap of forward and + * backward of the original convLayer. + * The variable naming follows the convention of the convLayer. + * */ + +namespace paddle { + +REGISTER_LAYER(exconvt, ExpandConvTransLayer); + +bool ExpandConvTransLayer::init(const LayerMap &layerMap, + const ParameterMap ¶meterMap) { + /* Initialize the basic convolutional parent class */ + ExpandConvBaseLayer::init(layerMap, parameterMap); + + return true; +} + +void ExpandConvTransLayer::forward(PassType passType) { + Layer::forward(passType); + + /* malloc memory for the output_ if necessary */ + int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); + resetOutput(batchSize, getOutputSize()); + + MatrixPtr output = nullptr; + for (size_t i = 0; i < inputLayers_.size(); ++i) { + LayerPtr prevLayer = getPrev(i); + output = prevLayer->getOutputValue(); + REGISTER_TIMER_INFO("shrinkFwd", getName().c_str()); + bpropActs(output, getOutputValue(), i); + } + + /* add the bias-vector */ + if (biases_.get()) { + if (sharedBiases_) { + addSharedBias(); + } else { + addUnsharedBias(); + } + } + + /* activation */ + forwardActivation(); +} + +void ExpandConvTransLayer::backward(const UpdateCallback &callback) { + backwardActivation(); + + MatrixPtr imageGrad = getOutputGrad(); + if (biases_ && biases_->getWGrad()) { + bpropBiases(imageGrad); + /* Increasing the number of gradient */ + biases_->getParameterPtr()->incUpdate(callback); + } + + for (size_t i = 0; i < inputLayers_.size(); ++i) { + /* First, calculate the input layers error */ + for (size_t off = 0; off < imageGrad->getHeight(); off++) { + if (getPrev(i)->getOutputGrad()) { + expandFwdOnce(imageGrad, getPrev(i)->getOutputGrad(), i, off); + } + } + if (weights_[i]->getWGrad()) { + /* Then, calculate the W-gradient for the current layer */ + bpropWeights(imageGrad, getPrev(i)->getOutputValue(), i); + /* Increasing the number of gradient */ + weights_[i]->getParameterPtr()->incUpdate(callback); + } + } +} + + +} // namespace paddle diff --git a/paddle/gserver/layers/ExpandConvTransLayer.h b/paddle/gserver/layers/ExpandConvTransLayer.h new file mode 100644 index 0000000000000..87c464a97f2ed --- /dev/null +++ b/paddle/gserver/layers/ExpandConvTransLayer.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + + +#pragma once + +#include "paddle/math/Matrix.h" +#include +#include "ExpandConvBaseLayer.h" + +namespace paddle { + +/** + * @brief A subclass of convolution layer. + * This layer expands input and use matrix multiplication to + * calculate convolution transpose (deconv) operation. + * + * The config file api is img_conv_layer with flag trans=True. + */ +class ExpandConvTransLayer : public ExpandConvBaseLayer { +public: + explicit ExpandConvTransLayer(const LayerConfig& config) : + ExpandConvBaseLayer(config) {} + + ~ExpandConvTransLayer() {} + + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + + void forward(PassType passType); + void backward(const UpdateCallback& callback); +}; + +} // namespace paddle diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 26ee2b3aae64a..0651d0b4733ea 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -26,6 +26,14 @@ add_unittest_without_exec(test_ActivationGrad TestUtil.cpp) add_test(NAME test_ActivationGrad COMMAND test_ActivationGrad) +################# test_ConvTrans ####################### +add_unittest_without_exec(test_ConvTrans + test_ConvTrans.cpp + LayerGradUtil.cpp + TestUtil.cpp) + +add_test(NAME test_ConvTrans + COMMAND test_ConvTrans) ################## test_Evaluator ####################### add_unittest(test_Evaluator diff --git a/paddle/gserver/tests/test_ConvTrans.cpp b/paddle/gserver/tests/test_ConvTrans.cpp new file mode 100644 index 0000000000000..bff7222b29907 --- /dev/null +++ b/paddle/gserver/tests/test_ConvTrans.cpp @@ -0,0 +1,246 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include "paddle/gserver/layers/DataLayer.h" +#include "ModelConfig.pb.h" +#include "paddle/trainer/Trainer.h" +#include "paddle/utils/GlobalConstants.h" +#include "paddle/gserver/layers/ExpandConvTransLayer.h" +#include "paddle/math/MathUtils.h" + +#include "TestUtil.h" +#include "LayerGradUtil.h" + +using namespace paddle; // NOLINT +using namespace std; // NOLINT + +P_DECLARE_bool(use_gpu); +P_DECLARE_int32(gpu_id); +P_DECLARE_double(checkgrad_eps); +P_DECLARE_bool(thread_local_rand_use_global_seed); +P_DECLARE_bool(prev_batch_state); + +// Test that the convTrans forward is the same as conv backward +TEST(Layer, convTransLayerFwd) { + // Setting up conv-trans layer + TestConfig configt; + configt.biasSize = 3; + configt.layerConfig.set_type("exconvt"); + configt.layerConfig.set_num_filters(3); + configt.layerConfig.set_partial_sum(1); + configt.layerConfig.set_shared_biases(true); + + configt.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 384}); + LayerInputConfig* input = configt.layerConfig.add_inputs(); + ConvConfig* conv = input->mutable_conv_conf(); + conv->set_filter_size(2); + conv->set_filter_size_y(4); + conv->set_channels(16); + conv->set_padding(0); + conv->set_padding_y(1); + conv->set_stride(2); + conv->set_stride_y(2); + conv->set_groups(1); + conv->set_filter_channels(3 / conv->groups()); + conv->set_img_size(16); + conv->set_output_x(outputSize(conv->img_size(), conv->filter_size(), + conv->padding(), conv->stride(), + /* caffeMode */ true)); + configt.layerConfig.set_size(conv->img_size() * conv->img_size() * + configt.layerConfig.num_filters()); + configt.layerConfig.set_name("convTrans"); + + // data layer initialize + std::vector dataLayers; + LayerMap layerMap; + vector datas; + initDataLayer(configt, &dataLayers, &datas, &layerMap, "convTrans", + 100, false, false); + // test layer initialize + std::vector parameters; + LayerPtr convtLayer; + initTestLayer(configt, &layerMap, ¶meters, &convtLayer); + convtLayer->getBiasParameter()->zeroMem(); + convtLayer->forward(PASS_GC); + + // Setting up conv-layer config + TestConfig config; + config.biasSize = 16; + config.layerConfig.set_type("exconv"); + config.layerConfig.set_num_filters(16); + config.layerConfig.set_partial_sum(1); + config.layerConfig.set_shared_biases(true); + + config.inputDefs.push_back({INPUT_DATA, "layer_1", 768, 384}); + input = config.layerConfig.add_inputs(); + conv = input->mutable_conv_conf(); + conv->set_filter_size(2); + conv->set_filter_size_y(4); + conv->set_channels(3); + conv->set_padding(0); + conv->set_padding_y(1); + conv->set_stride(2); + conv->set_stride_y(2); + conv->set_groups(1); + conv->set_filter_channels(conv->channels() / conv->groups()); + conv->set_img_size(16); + conv->set_output_x(outputSize(conv->img_size(), conv->filter_size(), + conv->padding(), conv->stride(), + /* caffeMode */ true)); + config.layerConfig.set_size(conv->output_x() * conv->output_x() * + config.layerConfig.num_filters()); + config.layerConfig.set_name("conv"); + + // data layer initialize + std::vector dataLayers2; + LayerMap layerMap2; + vector datas2; + initDataLayer(config, &dataLayers2, &datas2, &layerMap2, "conv", + 100, false, false); + // test layer initialize + std::vector parameters2; + LayerPtr convLayer; + initTestLayer(config, &layerMap2, ¶meters2, &convLayer); + + // Sync convLayer and convtLayer parameter + convLayer->getBiasParameter()->zeroMem(); + convLayer->getParameters()[0]->getBuf(PARAMETER_VALUE)->copyFrom( + *(convtLayer->getParameters()[0]->getBuf(PARAMETER_VALUE))); + + // Set convLayer outputGrad as convTransLayer input value + convLayer->forward(PASS_GC); + convLayer->getOutput().grad->copyFrom(*(dataLayers[0]->getOutputValue())); + + vector callbackFlags(parameters2.size(), 0); + auto callback = [&](Parameter* para) { ++callbackFlags[para->getID()]; }; + convLayer->backward(callback); + + // Check that the convLayer backward is the same as convTransLayer forward + checkMatrixEqual(convtLayer->getOutputValue(), + dataLayers2[0]->getOutputGrad()); +} + + +// Do one forward pass of convTrans layer and check to see if its output +// matches the given result +void doOneConvtTest(size_t imgSize, size_t output_x, size_t stride, + size_t padding, size_t filter_size, MatrixPtr& result) { + TestConfig configt; + configt.biasSize = 1; + configt.layerConfig.set_type("exconvt"); + configt.layerConfig.set_num_filters(1); + configt.layerConfig.set_partial_sum(1); + configt.layerConfig.set_shared_biases(true); + + configt.inputDefs.push_back({INPUT_DATA, "layer_0", output_x * output_x, + filter_size * filter_size}); + LayerInputConfig* input = configt.layerConfig.add_inputs(); + ConvConfig* conv = input->mutable_conv_conf(); + conv->set_filter_size(filter_size); + conv->set_filter_size_y(filter_size); + conv->set_channels(1); + conv->set_padding(padding); + conv->set_padding_y(padding); + conv->set_stride(stride); + conv->set_stride_y(stride); + conv->set_groups(1); + conv->set_filter_channels(1); + conv->set_img_size(imgSize); + conv->set_output_x(output_x); + + configt.layerConfig.set_size(conv->img_size() * conv->img_size() * + configt.layerConfig.num_filters()); + configt.layerConfig.set_name("convTrans"); + + std::vector dataLayers; + LayerMap layerMap; + vector datas; + initDataLayer(configt, &dataLayers, &datas, &layerMap, "convTrans", + 1, false, false); + dataLayers[0]->getOutputValue()->zeroMem(); + dataLayers[0]->getOutputValue()->add(1.0); + + // test layer initialize + std::vector parameters; + LayerPtr convtLayer; + initTestLayer(configt, &layerMap, ¶meters, &convtLayer); + convtLayer->getBiasParameter()->zeroMem(); + convtLayer->getParameters()[0]->zeroMem(); + convtLayer->getParameters()[0]->getBuf(PARAMETER_VALUE)->add(1.0); + convtLayer->forward(PASS_GC); + + checkMatrixEqual(convtLayer->getOutputValue(), result); +} + +TEST(Layer, convTransLayerFwd2) { + MatrixPtr result; + result = Matrix::create(1, 5 * 5, false, false); + result->zeroMem(); + result->add(1.0); + doOneConvtTest(/* imgSize */ 5, + /* output_x */ 1, + /* stride */ 1, + /* padding */ 0, + /* filter_size */ 5, + result); + + float resultData[] = {1, 2, 2, 2, 1, + 2, 4, 4, 4, 2, + 2, 4, 4, 4, 2, + 2, 4, 4, 4, 2, + 1, 2, 2, 2, 1}; + result->setData(resultData); + doOneConvtTest(/* imgSize */ 5, + /* output_x */ 2, + /* stride */ 1, + /* padding */ 0, + /* filter_size */ 4, + result); + + float resultData2[] = {1, 2, 2, 2, 1, + 2, 4, 4, 4, 2, + 2, 4, 4, 4, 2, + 2, 4, 4, 4, 2, + 1, 2, 2, 2, 1}; + result->setData(resultData2); + doOneConvtTest(/* imgSize */ 5, + /* output_x */ 2, + /* stride */ 2, + /* padding */ 1, + /* filter_size */ 5, + result); + + float resultData3[] = {1, 1, 2, 1, 1, + 1, 1, 2, 1, 1, + 2, 2, 4, 2, 2, + 1, 1, 2, 1, 1, + 1, 1, 2, 1, 1}; + result->setData(resultData3); + doOneConvtTest(/* imgSize */ 5, + /* output_x */ 2, + /* stride */ 2, + /* padding */ 0, + /* filter_size */ 3, + result);} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + initMain(argc, argv); + FLAGS_thread_local_rand_use_global_seed = true; + srand(1); + return RUN_ALL_TESTS(); +} diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 4e01fa91ed2ba..7b6e6fd3999ff 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -302,6 +302,8 @@ void testConvLayer(const string& type, bool trans, bool useGpu) { config.layerConfig.num_filters()); testLayerGrad(config, "conv", 100, trans, useGpu); + // Use small batch_size and useWeight=true to test biasGrad + testLayerGrad(config, "conv", 2, trans, useGpu, true, 0.02); } TEST(Layer, convLayer) { @@ -312,6 +314,46 @@ TEST(Layer, convLayer) { #endif } + +void testConvTransLayer(const string& type, bool trans, bool useGpu) { + TestConfig config; + config.biasSize = 3; + config.layerConfig.set_type(type); + config.layerConfig.set_num_filters(3); + config.layerConfig.set_partial_sum(1); + config.layerConfig.set_shared_biases(true); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 288}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + ConvConfig* conv = input->mutable_conv_conf(); + conv->set_filter_size(2); + conv->set_filter_size_y(3); + conv->set_channels(16); + conv->set_padding(0); + conv->set_padding_y(1); + conv->set_stride(2); + conv->set_stride_y(2); + conv->set_groups(1); + conv->set_filter_channels(3 / conv->groups()); + conv->set_img_size(16); + conv->set_output_x(outputSize(conv->img_size(), conv->filter_size(), + conv->padding(), conv->stride(), + /* caffeMode */ true)); + + config.layerConfig.set_size(conv->img_size() * conv->img_size() * + config.layerConfig.num_filters()); + + testLayerGrad(config, "convTrans", 100, trans, useGpu); + // Use small batch_size and useWeight=true to test biasGrad + testLayerGrad(config, "convTrans", 2, trans, useGpu, true, 0.02); +} + +TEST(Layer, convTransLayer) { + for (auto useGpu : {false, true}) { + testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ useGpu); + } +} + TEST(Layer, blockExpandLayer) { TestConfig config; config.biasSize = 0; diff --git a/paddle/math/MathUtils.cpp b/paddle/math/MathUtils.cpp index c1af8628d03c5..548f17936381c 100644 --- a/paddle/math/MathUtils.cpp +++ b/paddle/math/MathUtils.cpp @@ -80,4 +80,17 @@ int outputSize(int imageSize, int filterSize, int padding, int stride, return outputSize; } +int imageSize(int outputSize, int filterSize, int padding, int stride, + bool caffeMode) { + int imageSize; + if (!caffeMode) { + imageSize = + (outputSize - 1) * stride + filterSize - 2 * padding - stride + 1; + } else { + imageSize = (outputSize - 1) * stride + filterSize - 2 * padding; + } + CHECK_GE(imageSize, 1); + return imageSize; +} + } // namespace paddle diff --git a/paddle/math/MathUtils.h b/paddle/math/MathUtils.h index 49d0c10a8f5e4..91683dc3e9144 100644 --- a/paddle/math/MathUtils.h +++ b/paddle/math/MathUtils.h @@ -60,4 +60,11 @@ void sparseRand(int* major, int* minor, int nnz, int majorLen, int minorMax, int outputSize(int imageSize, int filterSize, int padding, int stride, bool caffeMode); +/** + * Calculate image size based on output size and caffeMode_. + * It is the reverse function of outputSize() + */ +int imageSize(int outputSize, int filterSize, int padding, int stride, + bool caffeMode); + } // namespace paddle diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 73631602a92be..958bfdaf2e283 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -649,7 +649,8 @@ def __init__( parse_conv(conv_conf, input_layer_name, - self.proj_conf.conv_conf) + self.proj_conf.conv_conf, + num_filters) # TODO: support rectangle input self.proj_conf.output_size = (self.proj_conf.conv_conf.output_x ** 2) * num_filters @@ -730,7 +731,8 @@ def __init__( parse_conv(conv_conf, MakeLayerNameInSubmodel(input_layer_names[0]), - self.operator_conf.conv_conf) + self.operator_conf.conv_conf, + num_filters) self.operator_conf.output_size = (self.operator_conf.conv_conf.output_x ** 2) * num_filters config_assert(len(input_layer_names) == 2, "Conv is binary operator") @@ -1017,6 +1019,17 @@ def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode): else: return 1 + int(math.ceil(output)) +''' +calcualte image_size based on output_size for convolution. +It is the reverse function of cnn_output_size +''' +def cnn_image_size(output_size, filter_size, padding, stride, caffe_mode): + if caffe_mode: + img_size = (output_size - 1) * stride + filter_size - 2 * padding + else: + img_size = (output_size - 2) * stride + filter_size - 2 * padding + 1 + return img_size + def parse_pool(pool, input_layer_name, pool_conf): pool_conf.pool_type = pool.pool_type config_assert(pool.pool_type in ['max-projection', 'avg-projection', @@ -1082,7 +1095,11 @@ def parse_norm(norm, input_layer_name, norm_conf): else: norm_conf.scale /= norm.size ** 2 -def parse_conv(conv, input_layer_name, conv_conf): +''' +caffe_mode: compute the output size using floor instead of ceil, + which is consistent of caffe and CuDNN's convention. +''' +def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False): conv_conf.filter_size = conv.filter_size conv_conf.filter_size_y = conv.filter_size_y conv_conf.channels = conv.channels @@ -1091,20 +1108,37 @@ def parse_conv(conv, input_layer_name, conv_conf): conv_conf.stride = conv.stride conv_conf.stride_y = conv.stride_y conv_conf.groups = conv.groups - conv_conf.filter_channels = conv.channels / conv.groups conv_conf.caffe_mode = conv.caffe_mode - - img_pixels = g_layer_map[input_layer_name].size / conv.channels - print('channels=%d size=%d'%(conv.channels, - g_layer_map[input_layer_name].size)) - conv_conf.img_size = int(img_pixels ** 0.5) - config_assert((conv_conf.img_size ** 2) == img_pixels, - ("Input layer %s: Incorrect input image size %d for input " - + "image pixels %d") - % (input_layer_name, conv_conf.img_size, img_pixels)) - conv_conf.output_x = cnn_output_size(conv_conf.img_size, conv_conf.filter_size, - conv_conf.padding, conv_conf.stride, - conv_conf.caffe_mode) + + if not trans: + conv_conf.filter_channels = conv.channels / conv.groups + + img_pixels = g_layer_map[input_layer_name].size / conv.channels + print('channels=%d size=%d'%(conv.channels, + g_layer_map[input_layer_name].size)) + conv_conf.img_size = int(img_pixels ** 0.5) + config_assert((conv_conf.img_size ** 2) == img_pixels, + ("Input layer %s: Incorrect input image size %d for input " + + "image pixels %d") + % (input_layer_name, conv_conf.img_size, img_pixels)) + + conv_conf.output_x = cnn_output_size( + conv_conf.img_size, conv_conf.filter_size, + conv_conf.padding, conv_conf.stride, conv_conf.caffe_mode) + else: + conv_conf.filter_channels = num_filters / conv.groups + + outputSize = g_layer_map[input_layer_name].size / conv.channels + print('channels=%d size=%d'%(conv.channels, + g_layer_map[input_layer_name].size)) + conv_conf.output_x = int(outputSize ** 0.5) + config_assert((conv_conf.output_x ** 2) == outputSize, + ("Input layer %s: Incorrect input image size %d for input " + + "image pixels %d") + % (input_layer_name, conv_conf.output_x, outputSize)) + conv_conf.img_size = cnn_image_size( + conv_conf.output_x, conv_conf.filter_size, + conv_conf.padding, conv_conf.stride, conv_conf.caffe_mode) def parse_block_expand(block_expand, input_layer_name, block_expand_conf): block_expand_conf.channels = block_expand.channels @@ -1587,7 +1621,8 @@ def __init__( parse_conv( self.inputs[input_index].conv, input_layer.name, - self.config.inputs[input_index].conv_conf) + self.config.inputs[input_index].conv_conf, + num_filters) conv_conf = self.config.inputs[input_index].conv_conf psize = self.calc_parameter_size(conv_conf) print("output size for %s is %d " % (name, conv_conf.output_x)) @@ -1612,6 +1647,63 @@ class ConvLayer(ConvLayerBase): class ConvLayer(ConvLayerBase): layer_type = 'cudnn_conv' + +@config_layer('convt') +class ConvTransLayerBase(LayerBase): + layer_type = 'convt' + def __init__( + self, + name, + inputs=[], + bias=True, + num_filters=None, + shared_biases=False, + **xargs): + super(ConvTransLayerBase, self).__init__( + name, self.layer_type, 0, inputs=inputs, **xargs) + + if num_filters is not None: + self.config.num_filters = num_filters + + use_gpu = int(g_command_config_args.get("use_gpu", 0)) + parallel_nn = int(g_command_config_args.get("parallel_nn", 0)) + + # cudnn_convt has not been implemented so use exconvt only + self.layer_type = "exconvt" + # need to specify layer in config + self.config.type = self.layer_type + + if shared_biases is not None: + self.config.shared_biases = shared_biases + + for input_index in xrange(len(self.inputs)): + input_layer = self.get_input_layer(input_index) + parse_conv( + self.inputs[input_index].conv, + input_layer.name, + self.config.inputs[input_index].conv_conf, + num_filters, + trans=True) + conv_conf = self.config.inputs[input_index].conv_conf + psize = self.calc_parameter_size(conv_conf) + print("output size for %s is %d " % (name, conv_conf.output_x)) + self.create_input_parameter(input_index, psize) + self.set_layer_size( + (conv_conf.img_size ** 2) * self.config.num_filters) + + psize = self.config.size + if shared_biases: + psize = self.config.num_filters + self.create_bias_parameter(bias, psize, [psize, 1]) + + def calc_parameter_size(self, conv_conf): + return conv_conf.channels * conv_conf.filter_channels \ + * (conv_conf.filter_size * conv_conf.filter_size_y) + +@config_layer('exconvt') +class ConvTransLayer(ConvTransLayerBase): + layer_type = 'exconvt' + @config_layer('norm') class NormLayer(LayerBase): def __init__( diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 49f0ff3289db7..ecc73c34a833c 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -78,6 +78,7 @@ class LayerType(object): COSINE_SIM = 'cos' HSIGMOID = 'hsigmoid' CONV_LAYER = "conv" + CONVTRANS_LAYER = "convt" POOL_LAYER = "pool" BATCH_NORM_LAYER = 'batch_norm' NORM_LAYER = 'norm' @@ -1514,7 +1515,8 @@ def img_conv_layer(input, filter_size, num_filters, name=None, num_channels=None, act=None, groups=1, stride=1, padding=0, bias_attr=None, param_attr=None, shared_biases=True, layer_attr=None, - filter_size_y=None, stride_y=None, padding_y=None): + filter_size_y=None, stride_y=None, padding_y=None, + trans=False): """ Convolution layer for image. Paddle only support square input currently and thus input image's width equals height. @@ -1522,7 +1524,14 @@ def img_conv_layer(input, filter_size, num_filters, The details of convolution layer, please refer UFLDL's `convolution `_ . + + Convolution Transpose (deconv) layer for image. Paddle only support square + input currently and thus input image's width equals height. + The details of convolution transpose layer, + please refer to the following explanation and references therein + `_ . The num_channel means input image's channel number. It may be 1 or 3 when input is raw pixels of image(mono or RGB), or it may be the previous layer's num_filters * num_group. @@ -1572,6 +1581,8 @@ def img_conv_layer(input, filter_size, num_filters, :type shared_biases: bool :param layer_attr: Layer Extra Attribute. :type layer_attr: ExtraLayerAttribute + :param trans: true if it is a convTransLayer, false if it is a convLayer + :type trans: bool :return: LayerOutput object. :rtype: LayerOutput """ @@ -1607,6 +1618,9 @@ def img_conv_layer(input, filter_size, num_filters, param_attr.attr["initial_std"] = init_w param_attr.attr["initial_strategy"] = 0 param_attr.attr["initial_smart"] = False + + lt = LayerType.CONVTRANS_LAYER if trans else LayerType.CONV_LAYER + Layer( name=name, inputs=Input(input.name, conv=Conv( @@ -1619,10 +1633,10 @@ def img_conv_layer(input, filter_size, num_filters, num_filters=num_filters, bias=ParamAttr.to_bias(bias_attr), shared_biases=shared_biases, - type=LayerType.CONV_LAYER, + type=lt, **ExtraLayerAttribute.to_kwargs(layer_attr) ) - return LayerOutput(name, LayerType.CONV_LAYER, parents=[input], + return LayerOutput(name, lt, parents=[input], activation=act, num_filters=num_filters) diff --git a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh index 77774f6fcfafd..b8687e1d48371 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh @@ -9,7 +9,7 @@ protostr=$PWD/protostr configs=(test_fc layer_activations projections test_print_layer test_sequence_pooling test_lstmemory_layer test_grumemory_layer last_first_seq test_expand_layer test_ntm_layers test_hsigmoid -img_layers util_layers simple_rnn_layers unused_layers test_cost_layers +img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers test_rnn_group shared_fc shared_lstm test_cost_layers_with_weight test_maxout test_bi_grumemory math_ops) diff --git a/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py b/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py new file mode 100644 index 0000000000000..077c78d201648 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py @@ -0,0 +1,22 @@ +from paddle.trainer_config_helpers import * + +settings( + learning_rate=1e-3, + batch_size=1000 +) + +img = data_layer(name='image', size=227*227) + +# the parse_conv in config_parse.py is not strictly accurate when filter_size +# is not square. So here set square filter_size. +img_conv = img_conv_layer(input=img, num_channels=1, num_filters=64, + filter_size=(32, 32), padding=(1, 1), stride=(1, 1), + act=LinearActivation(), trans=True) +img_bn = batch_norm_layer(input=img_conv, act=ReluActivation()) + +img_norm = img_cmrnorm_layer(input=img_bn, size=32) + +img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling()) + + +outputs(img_pool, img_norm) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr new file mode 100644 index 0000000000000..38346354080b0 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr @@ -0,0 +1,176 @@ +type: "nn" +layers { + name: "image" + type: "data" + size: 51529 + active_type: "" +} +layers { + name: "__conv_0__" + type: "exconvt" + size: 4194304 + active_type: "" + inputs { + input_layer_name: "image" + input_parameter_name: "___conv_0__.w0" + conv_conf { + filter_size: 32 + channels: 1 + stride: 1 + padding: 1 + groups: 1 + filter_channels: 64 + output_x: 227 + img_size: 256 + caffe_mode: true + filter_size_y: 32 + padding_y: 1 + stride_y: 1 + } + } + bias_parameter_name: "___conv_0__.wbias" + num_filters: 64 + shared_biases: true +} +layers { + name: "__batch_norm_0__" + type: "batch_norm" + size: 4194304 + active_type: "relu" + inputs { + input_layer_name: "__conv_0__" + input_parameter_name: "___batch_norm_0__.w0" + image_conf { + channels: 64 + img_size: 256 + } + } + inputs { + input_layer_name: "__conv_0__" + input_parameter_name: "___batch_norm_0__.w1" + } + inputs { + input_layer_name: "__conv_0__" + input_parameter_name: "___batch_norm_0__.w2" + } + bias_parameter_name: "___batch_norm_0__.wbias" + moving_average_fraction: 0.9 +} +layers { + name: "__crmnorm_0__" + type: "norm" + size: 4194304 + active_type: "" + inputs { + input_layer_name: "__batch_norm_0__" + norm_conf { + norm_type: "cmrnorm-projection" + channels: 64 + size: 32 + scale: 0.0004 + pow: 0.75 + output_x: 256 + img_size: 256 + blocked: false + } + } +} +layers { + name: "__pool_0__" + type: "pool" + size: 3240000 + active_type: "" + inputs { + input_layer_name: "__conv_0__" + pool_conf { + pool_type: "max-projection" + channels: 64 + size_x: 32 + stride: 1 + output_x: 225 + img_size: 256 + padding: 0 + size_y: 32 + stride_y: 1 + output_y: 225 + img_size_y: 256 + padding_y: 0 + } + } +} +parameters { + name: "___conv_0__.w0" + size: 65536 + initial_mean: 0.0 + initial_std: 0.0441941738242 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___conv_0__.wbias" + size: 64 + initial_mean: 0.0 + initial_std: 0.0 + dims: 64 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___batch_norm_0__.w0" + size: 64 + initial_mean: 1.0 + initial_std: 0.0 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___batch_norm_0__.w1" + size: 64 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 64 + initial_strategy: 0 + initial_smart: false + is_static: true + is_shared: true +} +parameters { + name: "___batch_norm_0__.w2" + size: 64 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 64 + initial_strategy: 0 + initial_smart: false + is_static: true + is_shared: true +} +parameters { + name: "___batch_norm_0__.wbias" + size: 64 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 64 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "image" +output_layer_names: "__pool_0__" +output_layer_names: "__crmnorm_0__" +sub_models { + name: "root" + layer_names: "image" + layer_names: "__conv_0__" + layer_names: "__batch_norm_0__" + layer_names: "__crmnorm_0__" + layer_names: "__pool_0__" + input_layer_names: "image" + output_layer_names: "__pool_0__" + output_layer_names: "__crmnorm_0__" + is_recurrent_layer_group: false +} +