Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion inference-engine/src/cldnn_engine/cldnn_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ InferenceEngine::ICNNNetwork::Ptr clDNNEngine::CloneNetwork(const InferenceEngin
::ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);

// Note: instead of running all Conversion Transformations you can make up your own transformation pipeline
ngraph::pass::CommonOptimizations().run_on_function(nGraphFunc);
ngraph::pass::CommonOptimizations(transformations_callback).run_on_function(nGraphFunc);
ngraph::pass::ConvertOpSet3ToOpSet2(transformations_callback).run_on_function(nGraphFunc);
ngraph::pass::ConvertOpSet2ToOpSet1(transformations_callback).run_on_function(nGraphFunc);
ngraph::pass::ConvertOpSet1ToLegacy(transformations_callback).run_on_function(nGraphFunc);
Expand Down
6 changes: 3 additions & 3 deletions inference-engine/src/legacy_api/src/ie_layer_validators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1371,9 +1371,9 @@ void DepthToSpaceValidator::checkShapes(const CNNLayer* layer, const vector<Size

if (casted->block_size == 0) THROW_IE_EXCEPTION << layer->name << " Incorrect block_size parameter is zero!";

if (inShapes[0][inShapes[0].size() - 3] % (casted->block_size * casted->block_size))
THROW_IE_EXCEPTION << layer->name
<< " block_size parameter is incompatible with input tensor Color dimension size!";
size_t numSpatialDims = inShapes[0].size() - 2;
if (inShapes[0][1] % static_cast<size_t>(std::pow(casted->block_size, numSpatialDims)))
THROW_IE_EXCEPTION << layer->name << " block_size parameter is incompatible with input tensor Color dimension size!";
}

SpaceToDepthValidator::SpaceToDepthValidator(const std::string& _type): LayerValidator(_type) {}
Expand Down
12 changes: 11 additions & 1 deletion inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,16 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network, const st

if (clonedNetwork->getFunction()) {
const auto transformations_callback = [](const std::shared_ptr<const ::ngraph::Node> &node) -> bool {
// DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
if (auto dtsOp = std::dynamic_pointer_cast<const ::ngraph::opset3::DepthToSpace>(node)) {
return dtsOp->input_value(0).get_shape().size() <= 5lu && dtsOp->input_value(0).get_shape().size() == dtsOp->get_output_shape(0).size();
}

// SpaceToDepth node implementation supports only equal input/output tensors with rank <= 5
if (auto stdOp = std::dynamic_pointer_cast<const ::ngraph::opset3::SpaceToDepth>(node)) {
return stdOp->input_value(0).get_shape().size() <= 5lu && stdOp->input_value(0).get_shape().size() == stdOp->get_output_shape(0).size();
}

return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) ||
std::dynamic_pointer_cast<const ::ngraph::opset2::BatchToSpace>(node) ||
std::dynamic_pointer_cast<const ::ngraph::opset2::SpaceToBatch>(node) ||
Expand All @@ -92,7 +102,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network, const st
::ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);

// Note: instead of running all Conversion Transformations you can make up your own transformation pipeline
ngraph::pass::CommonOptimizations().run_on_function(nGraphFunc);
ngraph::pass::CommonOptimizations(transformations_callback).run_on_function(nGraphFunc);
ngraph::pass::ConvertOpSet3ToOpSet2(transformations_callback).run_on_function(nGraphFunc);
ngraph::pass::ConvertOpSet2ToOpSet1(transformations_callback).run_on_function(nGraphFunc);
ngraph::pass::ConvertOpSet1ToLegacy(transformations_callback).run_on_function(nGraphFunc);
Expand Down
261 changes: 179 additions & 82 deletions inference-engine/src/mkldnn_plugin/nodes/depth_to_space.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,113 +8,210 @@
#include <string>
#include <vector>
#include <cassert>
#include <set>
#include "ie_parallel.hpp"

namespace InferenceEngine {
namespace Extensions {
namespace Cpu {

class DepthToSpaceImpl: public ExtLayerBase {
#define CNTR_SIZE 5
enum class DepthToSpaceMode {
BLOCKS_FIRST,
DEPTH_FIRST
};

public:
explicit DepthToSpaceImpl(const CNNLayer* layer) {
try {
if (layer->insData.empty() || layer->outData.empty())
THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";

SizeVector src_dims = layer->insData[0].lock()->getTensorDesc().getDims();
if (src_dims.size() < 3)
THROW_IE_EXCEPTION << layer->name << " Incorrect number of input dimensions!";
if (layer->insData[0].lock()->getTensorDesc().getPrecision() != Precision::FP32)
THROW_IE_EXCEPTION << layer->name << " Incorrect input precision. Only F32 is supported!";

SizeVector dst_dims = layer->outData[0]->getTensorDesc().getDims();
if (dst_dims.size() < 2)
THROW_IE_EXCEPTION << layer->name << " Incorrect number of output dimensions!";
if (layer->outData[0]->getTensorDesc().getPrecision() != Precision::FP32)
THROW_IE_EXCEPTION << layer->name << " Incorrect output precision. Only F32 is supported!";

size_t block_size = layer->GetParamAsUInt("block_size", 1);
if (block_size == 0)
THROW_IE_EXCEPTION << layer->name << " Incorrect block_size parameter is zero!";

if (src_dims[src_dims.size() - 3] % (block_size * block_size))
THROW_IE_EXCEPTION << layer->name << " block_size parameter is incompatible with input tensor Color dimension size!";

if (dst_dims.size() > 2 && src_dims[src_dims.size() - 3] != (dst_dims[dst_dims.size() - 3] * block_size * block_size))
THROW_IE_EXCEPTION << layer->name << " Input/Output tensor Color dimension is incompatible with block_size!";

if (dst_dims[dst_dims.size() - 2] != (src_dims[src_dims.size() - 2] * block_size))
THROW_IE_EXCEPTION << layer->name << " Input/Output tensor Height dimension is incompatible with block_size!";

if (dst_dims[dst_dims.size() - 1] != (src_dims[src_dims.size() - 1] * block_size))
THROW_IE_EXCEPTION << layer->name << " Input/Output tensor Width dimension is incompatible with block_size!";

own_dims[0] = 1;
for (size_t i = 0; i < (src_dims.size() - 3); i++)
own_dims[0] *= src_dims[i];
own_dims[1] = src_dims[src_dims.size() - 2];
own_dims[2] = src_dims[src_dims.size() - 3] / block_size;
own_dims[3] = src_dims[src_dims.size() - 1];
own_dims[4] = block_size;

size_t C = src_dims[src_dims.size() - 2] * src_dims[src_dims.size() - 1];
ownStrides[0] = src_dims[src_dims.size() - 3] * C;
ownStrides[1] = src_dims[src_dims.size() - 1];
ownStrides[2] = block_size * C;
ownStrides[3] = 1;
ownStrides[4] = C;
work_amount_dst = ownStrides[0] * own_dims[0];

addConfig(layer, { DataConfigurator(ConfLayout::PLN) }, { DataConfigurator(ConfLayout::PLN) });
THROW_IE_EXCEPTION << "DepthToSpace layer with name '" << layer->name << "' has incorrect number of input/output edges";

inDims = layer->insData[0].lock()->getTensorDesc().getDims();
if (inDims.size() < 3)
THROW_IE_EXCEPTION << "DepthToSpace layer with name '" << layer->name << "' has incorrect number of input dimensions";

if (inDims.size() > 5)
THROW_IE_EXCEPTION << "DepthToSpace layer with name '" << layer->name << "' doesn't support dimensions with rank greater than 5";

SizeVector outDims = layer->outData[0]->getTensorDesc().getDims();
if (inDims.size() != outDims.size())
THROW_IE_EXCEPTION << "DepthToSpace layer with name '" << layer->name << "' has incorrect number of input/output dimensions";

std::string modeString = layer->GetParamAsString("mode");
if (modeString == "blocks_first") {
mode = DepthToSpaceMode::BLOCKS_FIRST;
} else if (modeString == "depth_first") {
mode = DepthToSpaceMode::DEPTH_FIRST;
} else {
THROW_IE_EXCEPTION << "DepthToSpace layer with name '" << layer->name << "' doesn't support mode: " << modeString;
}

blockSize = layer->GetParamAsUInt("block_size", 1);
if (blockSize == 0)
THROW_IE_EXCEPTION << layer->name << " Incorrect blockSize parameter is zero!";

size_t numSpatialDims = inDims.size() - 2;
blockStep = static_cast<size_t>(std::pow(blockSize, numSpatialDims));
if (inDims[1] % blockStep)
THROW_IE_EXCEPTION << "DepthToSpace layer with name '" << layer->name <<
"' has block_size parameter which is incompatible with input tensor channels dimension size";

if (inDims[1] / blockStep != outDims[1])
THROW_IE_EXCEPTION << "DepthToSpace layer with name '" << layer->name << " has incompatible input/output channels";

for (int i = 0; i < numSpatialDims; i++) {
if (inDims[i + 2] != outDims[i + 2] / blockSize)
THROW_IE_EXCEPTION << "DepthToSpace layer with name '" << layer->name << " has incompatible spatial dims";
}

auto computePrc = layer->insData[0].lock()->getTensorDesc().getPrecision();
const std::set<size_t> supported_precision_sizes = {1, 2, 4, 8};
if (supported_precision_sizes.find(computePrc.size()) == supported_precision_sizes.end())
THROW_IE_EXCEPTION << "DepthToSpace layer with name '" << layer->name << " doesn't support precision: " << computePrc.name();


if (inDims.size() == 4 || inDims.size() == 5) {
LayerConfig config;
DataConfig inConfig;
inConfig.desc = TensorDesc(computePrc, inDims, inDims.size() == 4 ? NHWC : NDHWC);
config.inConfs.push_back(inConfig);

DataConfig outConfig;
outConfig.desc = TensorDesc(computePrc, outDims, outDims.size() == 4 ? NHWC : NDHWC);
config.outConfs.push_back(outConfig);

config.dynBatchSupport = false;
confs.push_back(config);
}

LayerConfig config;
DataConfig inConfig;
inConfig.desc = TensorDesc(computePrc, inDims, InferenceEngine::TensorDesc::getLayoutByDims(inDims));
config.inConfs.push_back(inConfig);

DataConfig outConfig;
outConfig.desc = TensorDesc(computePrc, outDims, InferenceEngine::TensorDesc::getLayoutByDims(outDims));
config.outConfs.push_back(outConfig);

config.dynBatchSupport = false;
confs.push_back(config);
} catch (InferenceEngine::details::InferenceEngineException &ex) {
errorMsg = ex.what();
}
}

StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
const float *src_data = inputs[0]->cbuffer().as<const float *>() +
inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
float* dst_data = outputs[0]->cbuffer().as<float *>() +
outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();

// Parallel
parallel_nt(0, [&](const int ithr, const int nthr) {
size_t start = 0, end = 0, src_idx = 0;
size_t counters[CNTR_SIZE] = { 0 };
splitter(work_amount_dst, nthr, ithr, start, end);
for (int j = CNTR_SIZE - 1, i = start; j >= 0; j--) {
counters[j] = i % own_dims[j];
src_idx += counters[j] * ownStrides[j];
i /= own_dims[j];
}

for (size_t iwork = start, i = 1; iwork < end; ++iwork) {
dst_data[iwork] = src_data[src_idx];
for (int j = CNTR_SIZE - 1; j >= 0; j--) {
counters[j]++;
if (counters[j] < own_dims[j]) {
src_idx += ownStrides[j];
break;
} else {
counters[j] = i = 0;
}
}
if (!i) {
for (src_idx = 0; i < CNTR_SIZE; ++i)
src_idx += counters[i] * ownStrides[i];
switch (inputs[0]->getTensorDesc().getPrecision().size()) {
case 1: depthToSpaceKernel<PrecisionTrait<Precision::U8>::value_type>(inputs, outputs); break;
case 2: depthToSpaceKernel<PrecisionTrait<Precision::U16>::value_type>(inputs, outputs); break;
case 4: depthToSpaceKernel<PrecisionTrait<Precision::I32>::value_type>(inputs, outputs); break;
case 8: depthToSpaceKernel<PrecisionTrait<Precision::U64>::value_type>(inputs, outputs); break;
default: {
if (resp) {
std::string errorMsg = "DepthToSpace layer with name does not support precision '"
+ std::string(inputs[0]->getTensorDesc().getPrecision().name()) + "'";
errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);

return GENERAL_ERROR;
}
}
});
}

return OK;
}

private:
size_t work_amount_dst;
size_t own_dims[CNTR_SIZE];
size_t ownStrides[CNTR_SIZE];
std::vector<size_t> getShape5D(const SizeVector& shape) {
std::vector<size_t> shape5D(5, 1);
for (int i = 0; i < shape.size(); i++) {
shape5D[i] = shape[i];
}
return shape5D;
}

std::vector<size_t> getBlock3D(const SizeVector& shape, const SizeVector& shape5D) {
std::vector<size_t> block3D(3, 1);
for (int i = 0; i < shape.size() - 2; i++) {
block3D[i] = blockSize;
}
return block3D;
}

template<typename T>
void depthToSpaceKernel(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs) {
const T *src_data = inputs[0]->cbuffer().as<const T *>() + inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
T* dst_data = outputs[0]->buffer().as<T *>() + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();

auto shape5D = getShape5D(inDims);
auto block3D = getBlock3D(inDims, shape5D);

size_t spatialStep = shape5D[2] * shape5D[3] * shape5D[4];
size_t batchStep = shape5D[1] * spatialStep;

size_t srcChannels = shape5D[1];
size_t dstChannels = srcChannels / blockStep;

size_t blockShift = mode == DepthToSpaceMode::BLOCKS_FIRST ? (dstChannels) : 1;
size_t channelShift = mode == DepthToSpaceMode::BLOCKS_FIRST ? 1 : blockStep;

if (inputs[0]->getTensorDesc().getLayout() == NHWC || inputs[0]->getTensorDesc().getLayout() == NDHWC) {
parallel_for2d(shape5D[0], shape5D[2], [&](size_t i0, size_t i2) {
size_t srcIdx1 = i0 * batchStep;
size_t dstIdx1 = i0 * batchStep;
for (size_t b2 = 0; b2 < block3D[0]; b2++) {
size_t srcIdx2 = srcIdx1 + i2 * shape5D[3] * shape5D[4] * srcChannels + b2 * block3D[1] * block3D[2] * blockShift;
size_t dstIdx2 = dstIdx1 + (i2 * block3D[0] + b2) * shape5D[3] * block3D[1] * shape5D[4] * block3D[2] * dstChannels;
for (size_t i3 = 0; i3 < shape5D[3]; i3++) {
for (size_t b3 = 0; b3 < block3D[1]; b3++) {
size_t srcIdx3 = srcIdx2 + i3 * shape5D[4] * srcChannels + b3 * block3D[2] * blockShift;
size_t dstIdx3 = dstIdx2 + (i3 * block3D[1] + b3) * shape5D[4] * block3D[2] * dstChannels;
for (size_t i4 = 0; i4 < shape5D[4]; i4++) {
for (size_t b4 = 0; b4 < block3D[2]; b4++) {
size_t srcIdx4 = srcIdx3 + i4 * srcChannels + b4 * blockShift;
size_t dstIdx4 = dstIdx3 + (i4 * block3D[2] + b4) * dstChannels;
for (size_t i1 = 0; i1 < dstChannels; i1++) {
size_t srcIdx5 = srcIdx4 + i1 * channelShift;
size_t dstIdx5 = dstIdx4 + i1;
dst_data[dstIdx5] = src_data[srcIdx5];
}
}
}
}
}
}
});
} else {
parallel_for2d(shape5D[0], dstChannels, [&](size_t i0, size_t i1) {
size_t srcIdx1 = i0 * batchStep + i1 * channelShift * spatialStep;
size_t dstIdx1 = i0 * batchStep + i1 * blockStep * spatialStep;
for (size_t i2 = 0; i2 < shape5D[2]; i2++) {
for (size_t b2 = 0; b2 < block3D[0]; b2++) {
size_t srcIdx2 = srcIdx1 + i2 * shape5D[3] * shape5D[4] + b2 * block3D[1] * block3D[2] * blockShift * spatialStep;
size_t dstIdx2 = dstIdx1 + (i2 * block3D[0] + b2) * shape5D[3] * block3D[1] * shape5D[4] * block3D[2];
for (size_t i3 = 0; i3 < shape5D[3]; i3++) {
for (size_t b3 = 0; b3 < block3D[1]; b3++) {
size_t srcIdx3 = srcIdx2 + i3 * shape5D[4] + b3 * block3D[2] * blockShift * spatialStep;
size_t dstIdx3 = dstIdx2 + (i3 * block3D[1] + b3) * shape5D[4] * block3D[2];
for (size_t i4 = 0; i4 < shape5D[4]; i4++) {
for (size_t b4 = 0; b4 < block3D[2]; b4++) {
size_t srcIdx4 = srcIdx3 + i4 + b4 * blockShift * spatialStep;
size_t dstIdx4 = dstIdx3 + i4 * block3D[2] + b4;
dst_data[dstIdx4] = src_data[srcIdx4];
}
}
}
}
}
}
});
}
}

DepthToSpaceMode mode;
SizeVector inDims;
size_t blockSize;
size_t blockStep;
};

REG_FACTORY_FOR(DepthToSpaceImpl, DepthToSpace);
Expand Down
Loading