Skip to content

Commit

Permalink
VX_NN - Tile layer (#352)
Browse files Browse the repository at this point in the history
* tile layer

* model compiler support for tile

* Update README.md
  • Loading branch information
hansely123 authored Aug 11, 2020
1 parent 6d05e2b commit 6096057
Show file tree
Hide file tree
Showing 10 changed files with 215 additions and 0 deletions.
1 change: 1 addition & 0 deletions amd_openvx_extensions/amd_nn/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ list(APPEND SOURCES
src/gather_layer.cpp
src/topk_layer.cpp
src/reduce_min.cpp
src/tile_layer.cpp
src/profiler.cpp
)

Expand Down
1 change: 1 addition & 0 deletions amd_openvx_extensions/amd_nn/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ vx_nn is an OpenVX Neural Network extension module. This implementation supports
| Tensor Min|vxTensorMinNode|com.amd.nn_extension.tensor_min |
| Tensor Multiply|vxTensorMultiplyNode|org.khronos.openvx.tensor_multiply |
| Tensor Subtract|vxTensorSubtractNode|org.khronos.openvx.tensor_subtract |
| Tile|vxTileLayer|com.amd.nn_extension.tile_layer |
| TopK|vxTopKLayer|com.amd.nn_extension.topk_layer|
| Upsample Nearest Neighborhood|vxUpsampleNearestLayer|com.amd.nn_extension.upsample_nearest_layer |

Expand Down
11 changes: 11 additions & 0 deletions amd_openvx_extensions/amd_nn/include/vx_amd_nn.h
Original file line number Diff line number Diff line change
Expand Up @@ -357,4 +357,15 @@ VX_API_ENTRY vx_node VX_API_CALL vxTopKLayer(vx_graph graph, vx_tensor x_tensor,
*/
VX_API_ENTRY vx_node VX_API_CALL vxReduceMinLayer(vx_graph graph, vx_tensor data, vx_array axes, vx_int32 keepdims, vx_tensor reduced);

/* \brief [Graph] Creates a Tile Layer Node.
* \details Constructs a tensor by tiling a given tensor.
* \param [in] graph The handle to the graph.
* \param [in] input The input tensor data.
* \param [in] repeats 1D int64 tensor of the same length as input's dimension number, includes numbers of repeated copies along input's dimensions.
* \param [out] output Output tensor of the same dimension and type as tensor input. output_dim[i] = input_dim[i] * repeats[i]
* \return <tt> vx_node</tt>.
* \returns A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a successful creation should be checked using <tt>\ref vxGetStatus</tt>.
*/
VX_API_ENTRY vx_node VX_API_CALL vxTileLayer(vx_graph graph, vx_tensor input, vx_tensor repeats, vx_tensor output);

#endif
1 change: 1 addition & 0 deletions amd_openvx_extensions/amd_nn/src/kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ SHARED_PUBLIC vx_status VX_API_CALL vxPublishKernels(vx_context context)
ERROR_CHECK_STATUS(publishGatherLayer(context));
ERROR_CHECK_STATUS(publishTopKLayer(context));
ERROR_CHECK_STATUS(publishReduceMinLayer(context));
ERROR_CHECK_STATUS(publishTileLayer(context));

// register drama rules
AgoNodeMergeRule softmax_rule = {
Expand Down
2 changes: 2 additions & 0 deletions amd_openvx_extensions/amd_nn/src/kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ enum user_kernel_e
VX_KERNEL_GATHER_LAYER_AMD = VX_KERNEL_BASE(VX_ID_AMD, NN_EXTENSION_LIBRARY) + 0x016,
VX_KERNEL_TOPK_LAYER_AMD = VX_KERNEL_BASE(VX_ID_AMD, NN_EXTENSION_LIBRARY) + 0x017,
VX_KERNEL_REDUCE_MIN_LAYER_AMD = VX_KERNEL_BASE(VX_ID_AMD, NN_EXTENSION_LIBRARY) + 0x018,
VX_KERNEL_TILE_LAYER_AMD = VX_KERNEL_BASE(VX_ID_AMD, NN_EXTENSION_LIBRARY) + 0x019,
};

//////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -173,6 +174,7 @@ vx_status publishNMSLayer(vx_context context);
vx_status publishGatherLayer(vx_context context);
vx_status publishTopKLayer(vx_context context);
vx_status publishReduceMinLayer(vx_context context);
vx_status publishTileLayer(vx_context context);

//////////////////////////////////////////////////////////////////////
//! \brief The module entry point for publishing/unpublishing kernels
Expand Down
167 changes: 167 additions & 0 deletions amd_openvx_extensions/amd_nn/src/tile_layer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
#include <kernels.h>

static vx_status VX_CALLBACK validateTileLayer(vx_node node, const vx_reference *parameters, vx_uint32 num, vx_meta_format metas[]) {
vx_enum type, type2, out_type;
vx_size num_dims, num_dims2, out_num_dims;
vx_size input_dims[4], input_dims2[4], output_dims[4];

ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_dims, sizeof(num_dims)));
ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &type, sizeof(type)));
if ((type != VX_TYPE_FLOAT32) && (type != VX_TYPE_FLOAT16)) return VX_ERROR_INVALID_TYPE;
ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, input_dims, sizeof(input_dims)));

ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_NUMBER_OF_DIMS, &num_dims2, sizeof(num_dims2)));
ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DATA_TYPE, &type2, sizeof(type2)));
if ((type2 != VX_TYPE_INT32) && (type2 != VX_TYPE_INT64)) return VX_ERROR_INVALID_TYPE;
ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DIMS, input_dims2, sizeof(input_dims2)));

ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &out_num_dims, sizeof(out_num_dims)));
ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &out_type, sizeof(out_type)));
if ((out_type != VX_TYPE_FLOAT32) && (out_type != VX_TYPE_FLOAT16)) return VX_ERROR_INVALID_TYPE;
ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, output_dims, sizeof(output_dims)));

if ((num_dims != out_num_dims) || (num_dims != input_dims2[0])) {
printf("validate: tile: Ranks of input, repeat, and output tensors should be equal\n");
return VX_ERROR_INVALID_DIMENSION;
}

ERROR_CHECK_STATUS(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &out_type, sizeof(out_type)));
ERROR_CHECK_STATUS(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &out_num_dims, sizeof(num_dims)));
ERROR_CHECK_STATUS(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &output_dims, sizeof(output_dims)));
return VX_SUCCESS;

}

static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
vx_bool use_opencl_1_2,
vx_uint32& supported_target_affinity
)
{
supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
return VX_SUCCESS;
}

static vx_status VX_CALLBACK opencl_codegen(
vx_node node, // [input] node
const vx_reference parameters[], // [input] parameters
vx_uint32 num, // [input] number of parameters
bool opencl_load_function, // [input] false: normal OpenCL kernel; true: reserved
char opencl_kernel_function_name[64], // [output] kernel_name for clCreateKernel()
std::string& opencl_kernel_code, // [output] string for clCreateProgramWithSource()
std::string& opencl_build_options, // [output] options for clBuildProgram()
vx_uint32& opencl_work_dim, // [output] work_dim for clEnqueueNDRangeKernel()
vx_size opencl_global_work[], // [output] global_work[] for clEnqueueNDRangeKernel()
vx_size opencl_local_work[], // [output] local_work[] for clEnqueueNDRangeKernel()
vx_uint32& opencl_local_buffer_usage_mask, // [output] reserved: must be ZERO
vx_uint32& opencl_local_buffer_size_in_bytes // [output] reserved: must be ZERO
)
{
//get tensor dimensions
vx_size input_dims[4], output_dims[4];
vx_size num_of_dims;
vx_enum type;

ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_of_dims, sizeof(num_of_dims)));
ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, input_dims, sizeof(input_dims)));
ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &type, sizeof(type)));
ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, output_dims, sizeof(output_dims)));

for (int i=0; i<4; i++) { //convert input tensor to 4-D
if(i >= num_of_dims) {
input_dims[i] = 1;
output_dims[i] = 1;
}
}

strcpy(opencl_kernel_function_name, "tile_layer");

opencl_work_dim = 3;
opencl_global_work[0] = output_dims[0];
opencl_global_work[1] = output_dims[1];
opencl_global_work[2] = output_dims[2] * output_dims[3];

// Setting variables required by the interface
opencl_local_buffer_usage_mask = 0;
opencl_local_buffer_size_in_bytes = 0;

if (num_of_dims) {
char item[8192];
if (type == VX_TYPE_FLOAT32) {
sprintf(item,
"#pragma OPENCL EXTENSION cl_amd_media_ops : enable\n"
"__kernel void %s(__global uchar * in, uint in_offset, uint4 in_stride, __global uchar * rep, uint rep_offset, uint4 rep_stride, __global uchar * out, uint out_offset, uint4 out_stride) \n"
"{ \n"
" uint x = get_global_id(0);\n"
" uint y = get_global_id(1);\n"
" uint c = get_global_id(2);\n"
" uint nx = x %% %d;\n"
" uint ny = y %% %d;\n"
" uint nc = c %% %d;\n"
" float value = *(__global float *)&in[in_offset + nx*in_stride.s0 + ny*in_stride.s1 + nc*in_stride.s2];\n"
" uint offset = out_offset + x*out_stride.s0 + y*out_stride.s1 + c*out_stride.s2;\n"
" out += offset;\n"
" *(__global float *)&out[0] = value;\n"
"}\n", opencl_kernel_function_name, (int)input_dims[0], (int)input_dims[1], (int)input_dims[2]);
}
else {
sprintf(item,
"#pragma OPENCL EXTENSION cl_amd_media_ops : enable\n"
"__kernel void %s(__global uchar * in, uint in_offset, uint4 in_stride, __global uchar * rep, uint rep_offset, uint4 rep_stride, __global uchar * out, uint out_offset, uint4 out_stride) \n"
"{ \n"
" uint x = get_global_id(0);\n"
" uint y = get_global_id(1);\n"
" uint c = get_global_id(2);\n"
" uint nx = x %% %d;\n"
" uint ny = y %% %d;\n"
" uint nc = c %% %d;\n"
" half value = *(__global half *)&in[in_offset + nx*in_stride.s0 + ny*in_stride.s1 + nc*in_stride.s2];\n"
" uint offset = out_offset + x*out_stride.s0 + y*out_stride.s1 + c*out_stride.s2;\n"
" out += offset;\n"
" *(__global half *)&out[0] = value;\n"
"}\n", opencl_kernel_function_name, (int)input_dims[0], (int)input_dims[1], (int)input_dims[2]);
}
opencl_kernel_code = item;
}
return VX_SUCCESS;
}

//! \brief The kernel execution.
static vx_status VX_CALLBACK host_kernel(vx_node node, const vx_reference * parameters, vx_uint32 num)
{
return VX_ERROR_NOT_IMPLEMENTED;
}

//! \brief The kernel publisher.
vx_status publishTileLayer(vx_context context) {
vx_kernel kernel = vxAddUserKernel(context, "com.amd.nn_extension.tile_layer", VX_KERNEL_TILE_LAYER_AMD, host_kernel, 3, validateTileLayer, nullptr, nullptr);
ERROR_CHECK_OBJECT(kernel);

amd_kernel_query_target_support_f query_target_support_f = query_target_support;
amd_kernel_opencl_codegen_callback_f opencl_codegen_callback_f = opencl_codegen;
ERROR_CHECK_STATUS(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
ERROR_CHECK_STATUS(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_OPENCL_CODEGEN_CALLBACK, &opencl_codegen_callback_f, sizeof(opencl_codegen_callback_f)));

ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));

ERROR_CHECK_STATUS(vxFinalizeKernel(kernel));
ERROR_CHECK_STATUS(vxReleaseKernel(&kernel));
return VX_SUCCESS;
}

VX_API_ENTRY vx_node VX_API_CALL vxTileLayer(vx_graph graph, vx_tensor input, vx_tensor repeats, vx_tensor output) {
vx_node node = NULL;
vx_context context = vxGetContext((vx_reference)graph);
if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
vx_reference params[] = {
(vx_reference) input,
(vx_reference) repeats,
(vx_reference) output,
};
node = createNode(graph, VX_KERNEL_TILE_LAYER_AMD, params, sizeof(params) / sizeof(params[0]));
}

return node;
}

1 change: 1 addition & 0 deletions model_compiler/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,7 @@ Test code with preprocessing add / multiply values to normalize the input tensor
|Squeeze||&#9745;|&#9745;|
|Sub||&#9745;|&#9745;|
|Sum||&#9745;||
|Tile||&#9745;||
|TopK||&#9745;||
|Transpose||&#9745;|&#9745;|
|Unsqueeze||&#9745;|&#9745;|
Expand Down
20 changes: 20 additions & 0 deletions model_compiler/python/nnir.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ def __init__(self):
'gather' : 1,
'topk' : 1,
'reduce_min' : 1,
'tile' : 1,
}

def set(self,type,inputs,outputs,attr):
Expand Down Expand Up @@ -822,6 +823,25 @@ def updateLocals(self):
local.setInfo(input.type, output_shape)
local.setFormat(input.format)
self.addLocal(local)
elif node.type in ['tile']:
input = self.tensor_dict[node.inputs[0]]
if node.inputs[1] not in self.binaries:
raise ValueError("tile: tile by local tensor is unsupported: " + node.inputs[1])

repeats = np.frombuffer(self.binaries[node.inputs[1]], dtype=npType)

repeats = np.flip(repeats)
while len(repeats) < 4:
repeats.add(1)

for i in range(len(input.shape)):
output_shape = input.shape[i] * repeats[i]

local = IrTensor()
local.setName(output)
local.setInfo(input.type, output_shape)
local.setFormat(input.format)
self.addLocal(local)
else:
raise ValueError("Unsupported IR node type: {}".format(node.type))

Expand Down
10 changes: 10 additions & 0 deletions model_compiler/python/nnir_to_openvx.py
Original file line number Diff line number Diff line change
Expand Up @@ -954,6 +954,16 @@ def generateModuleCPP(graph,fileName,virtual_tensor_flag):
}
"""
% (node.attr.get('axis'), node.inputs[0], node.inputs[1], node.outputs[0]))
elif node.type == 'tile':
f.write( \
"""
{
vx_node node = vxTileLayer(graph, %s, %s, %s);
ERROR_CHECK_OBJECT(node);
ERROR_CHECK_STATUS(vxReleaseNode(&node));
}
"""
% (node.inputs[0], node.inputs[1], node.outputs[0]))
elif node.type == 'reduce_min':
axes = node.attr.get('axes')
axes_len = -1
Expand Down
1 change: 1 addition & 0 deletions model_compiler/python/onnx_to_nnir.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@
'Gather' : 'gather',
'TopK' : 'topk',
'ReduceMin' : 'reduce_min',
'Tile' : 'tile',
}

onnx2ir_data_type = [
Expand Down

0 comments on commit 6096057

Please sign in to comment.