Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add scale_channels, swish, logistic(sigmoid), avgpool ; add enet_coco(EfficientNetB0-Yolov3).cfg #172

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/tkDNN/DarknetParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ namespace tk { namespace dnn {
int new_coords= 0;
float scale_xy = 1;
float nms_thresh = 0.45;
int scale_wh_in_scale_channels = 0;
std::vector<int> layers;
std::string activation = "linear";

Expand Down
28 changes: 27 additions & 1 deletion include/tkDNN/Layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ enum layerType_t {
LAYER_ACTIVATION_CRELU,
LAYER_ACTIVATION_LEAKY,
LAYER_ACTIVATION_MISH,
LAYER_ACTIVATION_SWISH,
LAYER_FLATTEN,
LAYER_RESHAPE,
LAYER_MULADD,
Expand All @@ -27,6 +28,7 @@ enum layerType_t {
LAYER_ROUTE,
LAYER_REORG,
LAYER_SHORTCUT,
LAYER_SCALECHANNELS,
LAYER_UPSAMPLE,
LAYER_REGION,
LAYER_YOLO
Expand Down Expand Up @@ -68,6 +70,7 @@ class Layer {
case LAYER_ACTIVATION_CRELU: return "ActivationCReLU";
case LAYER_ACTIVATION_LEAKY: return "ActivationLeaky";
case LAYER_ACTIVATION_MISH: return "ActivationMish";
case LAYER_ACTIVATION_SWISH: return "ActivationSwish";
case LAYER_FLATTEN: return "Flatten";
case LAYER_RESHAPE: return "Reshape";
case LAYER_MULADD: return "MulAdd";
Expand All @@ -76,6 +79,7 @@ class Layer {
case LAYER_ROUTE: return "Route";
case LAYER_REORG: return "Reorg";
case LAYER_SHORTCUT: return "Shortcut";
case LAYER_SCALECHANNELS: return "ScaleChannels";
case LAYER_UPSAMPLE: return "Upsample";
case LAYER_REGION: return "Region";
case LAYER_YOLO: return "Yolo";
Expand Down Expand Up @@ -212,7 +216,8 @@ class Dense : public LayerWgs {
typedef enum {
ACTIVATION_ELU = 100,
ACTIVATION_LEAKY = 101,
ACTIVATION_MISH = 102
ACTIVATION_MISH = 102,
ACTIVATION_SWISH = 103
} tkdnnActivationMode_t;

/**
Expand All @@ -233,6 +238,8 @@ class Activation : public Layer {
return LAYER_ACTIVATION_LEAKY;
else if (act_mode == ACTIVATION_MISH)
return LAYER_ACTIVATION_MISH;
else if (act_mode == ACTIVATION_SWISH)
return LAYER_ACTIVATION_SWISH;
else
return LAYER_ACTIVATION;
};
Expand Down Expand Up @@ -557,6 +564,25 @@ class Shortcut : public Layer {
Layer *backLayer;
};

/**
ScaleChannels layer
channelwise-multiplication with another layer
*/
class ScaleChannels : public Layer {

public:
ScaleChannels(Network *net, Layer *backLayer, int scale_wh);
virtual ~ScaleChannels();
virtual layerType_t getLayerType() { return LAYER_SCALECHANNELS; };

virtual dnnType* infer(dataDim_t &dim, dnnType* srcData);

public:
Layer *backLayer;
int scale_wh;
};


/**
Upsample layer
Maintains same dimension but change C*H*W distribution
Expand Down
2 changes: 2 additions & 0 deletions include/tkDNN/NetworkRT.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ using namespace nvinfer1;
#include "pluginsRT/ActivationLeakyRT.h"
#include "pluginsRT/ActivationReLUCeilingRT.h"
#include "pluginsRT/ActivationMishRT.h"
#include "pluginsRT/ActivationSwishRT.h"
#include "pluginsRT/ReorgRT.h"
#include "pluginsRT/RegionRT.h"
#include "pluginsRT/RouteRT.h"
Expand Down Expand Up @@ -108,6 +109,7 @@ class NetworkRT {
nvinfer1::ILayer* convert_layer(nvinfer1::ITensor *input, Reorg *l);
nvinfer1::ILayer* convert_layer(nvinfer1::ITensor *input, Region *l);
nvinfer1::ILayer* convert_layer(nvinfer1::ITensor *input, Shortcut *l);
nvinfer1::ILayer* convert_layer(nvinfer1::ITensor *input, ScaleChannels *l);
nvinfer1::ILayer* convert_layer(nvinfer1::ITensor *input, Yolo *l);
nvinfer1::ILayer* convert_layer(nvinfer1::ITensor *input, Upsample *l);
nvinfer1::ILayer* convert_layer(nvinfer1::ITensor *input, DeformConv2d *l);
Expand Down
5 changes: 5 additions & 0 deletions include/tkDNN/kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ void activationReLUCeilingForward(dnnType *srcData, dnnType *dstData, int size,
void activationLOGISTICForward(dnnType *srcData, dnnType *dstData, int size, cudaStream_t stream = cudaStream_t(0));
void activationSIGMOIDForward(dnnType *srcData, dnnType *dstData, int size, cudaStream_t stream = cudaStream_t(0));
void activationMishForward(dnnType* srcData, dnnType* dstData, int size, cudaStream_t stream= cudaStream_t(0));
void activationSwishForward(dnnType* srcData, dnnType* dstData, int size, cudaStream_t stream= cudaStream_t(0));

void fill(dnnType *data, int size, dnnType val, cudaStream_t stream = cudaStream_t(0));

Expand All @@ -27,6 +28,10 @@ void shortcutForward(dnnType *srcData, dnnType *dstData, int n1, int c1, int h1,
int n2, int c2, int h2, int w2, int s2,
cudaStream_t stream = cudaStream_t(0));

void scaleChannelsForward(dnnType *in_w_h_c, int size, int channel_size, int batch_size, int scale_wh,
dnnType *scales_c, dnnType *out,
cudaStream_t stream = cudaStream_t(0));

void upsampleForward(dnnType *srcData, dnnType *dstData,
int n, int c, int h, int w, int s, int forward, float scale,
cudaStream_t stream = cudaStream_t(0));
Expand Down
60 changes: 60 additions & 0 deletions include/tkDNN/pluginsRT/ActivationSwishRT.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#include<cassert>
#include "../kernels.h"

class ActivationSwishRT : public IPlugin {

public:
ActivationSwishRT() {


}

~ActivationSwishRT(){

}

int getNbOutputs() const override {
return 1;
}

Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override {
return inputs[0];
}

void configure(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, int maxBatchSize) override {
size = 1;
for(int i=0; i<outputDims[0].nbDims; i++)
size *= outputDims[0].d[i];
}

int initialize() override {

return 0;
}

virtual void terminate() override {
}

virtual size_t getWorkspaceSize(int maxBatchSize) const override {
return 0;
}

virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override {

activationSwishForward((dnnType*)reinterpret_cast<const dnnType*>(inputs[0]),
reinterpret_cast<dnnType*>(outputs[0]), batchSize*size, stream);
return 0;
}


virtual size_t getSerializationSize() override {
return 1*sizeof(int);
}

virtual void serialize(void* buffer) override {
char *buf = reinterpret_cast<char*>(buffer);
tk::dnn::writeBUF(buf, size);
}

int size;
};
3 changes: 3 additions & 0 deletions src/Activation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ dnnType* Activation::infer(dataDim_t &dim, dnnType* srcData) {
else if(act_mode == ACTIVATION_MISH) {
activationMishForward(srcData, dstData, dim.tot());

}
else if(act_mode == ACTIVATION_SWISH) {
activationSwishForward(srcData, dstData, dim.tot());
} else {
dnnType alpha = dnnType(1);
dnnType beta = dnnType(0);
Expand Down
22 changes: 21 additions & 1 deletion src/DarknetParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ namespace tk { namespace dnn {
fields.groups = std::stoi(value);
else if(name.find("group_id") != std::string::npos)
fields.group_id = std::stoi(value);
else if(name.find("scale_wh") != std::string::npos)
fields.scale_wh_in_scale_channels = std::stoi(value);
else if(name.find("scale_x_y") != std::string::npos)
fields.scale_xy = std::stof(value);
else if(name.find("beta_nms") != std::string::npos)
Expand Down Expand Up @@ -134,7 +136,11 @@ namespace tk { namespace dnn {
f.padding_x, f.padding_y, tk::dnn::POOLING_MAX));

} else if(f.type == "avgpool") {
netLayers.push_back(new tk::dnn::Pooling(net, f.size_x, f.size_y, f.stride_x, f.stride_y,
auto output_dim = net->getOutputDim();
int stride = 1;
assert(f.padding_x == 0 && f.padding_y == 0);

netLayers.push_back(new tk::dnn::Pooling(net, output_dim.h, output_dim.w, stride, stride,
f.padding_x, f.padding_y, tk::dnn::POOLING_AVERAGE));

} else if(f.type == "shortcut") {
Expand All @@ -146,6 +152,18 @@ namespace tk { namespace dnn {
//std::cout<<"shortcut to "<<layerIdx<<" "<<netLayers[layerIdx]->getLayerName()<<"\n";
netLayers.push_back(new tk::dnn::Shortcut(net, netLayers[layerIdx]));

} else if(f.type == "scale_channels") {
if(f.layers.size() != 1) FatalError("no layers to scale_channels\n");
int layerIdx = f.layers[0];
if(layerIdx < 0)
layerIdx = netLayers.size() + layerIdx;
if(layerIdx < 0 || layerIdx >= netLayers.size()) FatalError("impossible to scale_channels\n");

int scale_wh = f.scale_wh_in_scale_channels;
if(scale_wh != 0) FatalError("Currently only support scale_wh=0 in scale_channels\n")

netLayers.push_back(new tk::dnn::ScaleChannels(net, netLayers[layerIdx], scale_wh));

} else if(f.type == "upsample") {
netLayers.push_back(new tk::dnn::Upsample(net, f.stride_x));

Expand Down Expand Up @@ -185,8 +203,10 @@ namespace tk { namespace dnn {
if(netLayers.size() > 0 && f.activation != "linear") {
tkdnnActivationMode_t act;
if(f.activation == "relu") act = tkdnnActivationMode_t(CUDNN_ACTIVATION_RELU);
else if(f.activation == "logistic") act = tkdnnActivationMode_t(CUDNN_ACTIVATION_SIGMOID);
else if(f.activation == "leaky") act = tk::dnn::ACTIVATION_LEAKY;
else if(f.activation == "mish") act = tk::dnn::ACTIVATION_MISH;
else if(f.activation == "swish") act = tk::dnn::ACTIVATION_SWISH;
else { FatalError("activation not supported: " + f.activation); }
netLayers[netLayers.size()-1] = new tk::dnn::Activation(net, act);
};
Expand Down
23 changes: 22 additions & 1 deletion src/NetworkRT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ ILayer* NetworkRT::convert_layer(ITensor *input, Layer *l) {
return convert_layer(input, (Conv2d*) l);
if(type == LAYER_POOLING)
return convert_layer(input, (Pooling*) l);
if(type == LAYER_ACTIVATION || type == LAYER_ACTIVATION_CRELU || type == LAYER_ACTIVATION_LEAKY || type == LAYER_ACTIVATION_MISH)
if(type == LAYER_ACTIVATION || type == LAYER_ACTIVATION_CRELU || type == LAYER_ACTIVATION_LEAKY || type == LAYER_ACTIVATION_MISH || type == LAYER_ACTIVATION_SWISH)
return convert_layer(input, (Activation*) l);
if(type == LAYER_SOFTMAX)
return convert_layer(input, (Softmax*) l);
Expand All @@ -242,6 +242,8 @@ ILayer* NetworkRT::convert_layer(ITensor *input, Layer *l) {
return convert_layer(input, (Region*) l);
if(type == LAYER_SHORTCUT)
return convert_layer(input, (Shortcut*) l);
if(type == LAYER_SCALECHANNELS)
return convert_layer(input, (ScaleChannels*) l);
if(type == LAYER_YOLO)
return convert_layer(input, (Yolo*) l);
if(type == LAYER_UPSAMPLE)
Expand Down Expand Up @@ -421,6 +423,12 @@ ILayer* NetworkRT::convert_layer(ITensor *input, Activation *l) {
checkNULL(lRT);
return lRT;
}
else if(l->act_mode == ACTIVATION_SWISH) {
IPlugin *plugin = new ActivationSwishRT();
IPluginLayer *lRT = networkRT->addPlugin(&input, 1, *plugin);
checkNULL(lRT);
return lRT;
}
else {
FatalError("this Activation mode is not yet implemented");
return NULL;
Expand Down Expand Up @@ -525,6 +533,14 @@ ILayer* NetworkRT::convert_layer(ITensor *input, Shortcut *l) {
}
}

ILayer* NetworkRT::convert_layer(ITensor *input, ScaleChannels *l) {
ITensor *back_tens = tensors[l->backLayer];

IElementWiseLayer *lRT = networkRT->addElementWise(*input, *back_tens, ElementWiseOperation::kPROD);
checkNULL(lRT);
return lRT;
}

ILayer* NetworkRT::convert_layer(ITensor *input, Yolo *l) {
//std::cout<<"convert Yolo\n";

Expand Down Expand Up @@ -653,6 +669,11 @@ IPlugin* PluginFactory::createPlugin(const char* layerName, const void* serialDa
a->size = readBUF<int>(buf);
return a;
}
if(name.find("ActivationSwish") == 0) {
ActivationSwishRT *a = new ActivationSwishRT();
a->size = readBUF<int>(buf);
return a;
}
if(name.find("ActivationCReLU") == 0) {
ActivationReLUCeiling *a = new ActivationReLUCeiling(readBUF<float>(buf));
a->size = readBUF<int>(buf);
Expand Down
37 changes: 37 additions & 0 deletions src/ScaleChannels.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#include <iostream>

#include "Layer.h"
#include "kernels.h"

namespace tk { namespace dnn {
ScaleChannels::ScaleChannels(Network *net, Layer *backLayer, int scale_wh) : Layer(net) {

this->backLayer = backLayer;
this->scale_wh = scale_wh;
output_dim = backLayer->output_dim;
checkCuda( cudaMalloc(&dstData, output_dim.tot()*sizeof(dnnType)) );

if( backLayer->output_dim.c != input_dim.c )
FatalError("ScaleChannels dim missmatch");

}

ScaleChannels::~ScaleChannels() {

checkCuda( cudaFree(dstData) );
}

dnnType* ScaleChannels::infer(dataDim_t &dim, dnnType* srcData) {

int size = output_dim.n * output_dim.c * output_dim.h * output_dim.w;
int channel_size = output_dim.h * output_dim.w;
int batch_size = output_dim.c * output_dim.h * output_dim.w;
scaleChannelsForward(this->backLayer->dstData, size, channel_size, batch_size, scale_wh, srcData, dstData);

//update data dimensions
dim = output_dim;

return dstData;
}

}}
27 changes: 27 additions & 0 deletions src/kernels/activation_swish.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#include "kernels.h"
#include <math.h>

// https://github.com/AlexeyAB/darknet/blob/master/src/activation_kernels.cu
__device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));}

__global__
void activation_swish(dnnType *input, dnnType *output, int size) {
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if (i < size)
{
float x_val = input[i];
float sigmoid = logistic_activate_kernel(x_val);
output[i] = x_val * sigmoid;
}
}

/**
swish activation function
*/
void activationSwishForward(dnnType* srcData, dnnType* dstData, int size, cudaStream_t stream)
{
int blocks = (size+255)/256;
int threads = 256;

activation_swish<<<blocks, threads, 0, stream>>>(srcData, dstData, size);
}
27 changes: 27 additions & 0 deletions src/kernels/scale_channels.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#include "kernels.h"
#include "assert.h"

// https://github.com/AlexeyAB/darknet/blob/master/src/blas_kernels.cu
__global__ void scale_channels_kernel(float *in_w_h_c, int size, int channel_size, int batch_size, int scale_wh, float *scales_c, float *out)
{
const int index = blockIdx.x*blockDim.x + threadIdx.x;
if (index < size) {
if (scale_wh) {
int osd_index = index % channel_size + (index / batch_size)*channel_size;

out[index] = in_w_h_c[index] * scales_c[osd_index];
}
else {
out[index] = in_w_h_c[index] * scales_c[index / channel_size];
}
}
}

void scaleChannelsForward(dnnType *in_w_h_c, int size, int channel_size, int batch_size, int scale_wh,
dnnType *scales_c, dnnType *out, cudaStream_t stream)
{
int blocks = (size+255)/256;
int threads = 256;

scale_channels_kernel <<<blocks, threads, 0, stream>>>(in_w_h_c, size, channel_size, batch_size, scale_wh, scales_c, out);
}
Loading