Skip to content

Commit

Permalink
[Relay] [Quantization] WIP - Protoyping the quantized convolution op
Browse files Browse the repository at this point in the history
Goal - Act as medium of discussion for pull request apache#2351

Features
- New quantized conv2D op in Relay
- Python API interface to instantiate the Relay op
- Infer Type implemented
- Lowering of quantized_conv op to low-level Relay ops

Discussion points
- Does the namespace look correct?
    - Relay op is called 'relay.op.nn._quantize.quantized_conv2d'
    - Idea is that any op under '_quantize' namespace will go through rewrite.
- Should we reuse Conv2DRel and Conv2DAttrs
    - Tried protoyping. Found it hard to derive from Conv2DAttr struct
    - Infer Type has a param field. This need to come from the right datatype.

Missing implememtation
    - Lowering of quantized conv into conv+cast is incomplete.
    - Will work on it async. This is orthogonal to the discussion.

[Relay] [Quantization] WIP - Protoyping the quantized convolution op

Goal - Act as medium of discussion for pull request apache#2351

Features
- New quantized conv2D op in Relay
- Python API interface to instantiate the Relay op
- Infer Type implemented
- Lowering of quantized_conv op to low-level Relay ops

Discussion points
- Does the namespace look correct?
    - Relay op is called 'relay.op.nn._quantize.quantized_conv2d'
    - Idea is that any op under '_quantize' namespace will go through rewrite.
- Should we reuse Conv2DRel and Conv2DAttrs
    - Tried protoyping. Found it hard to derive from Conv2DAttr struct
    - Infer Type has a param field. This need to come from the right datatype.

Missing implememtation
    - Lowering of quantized conv into conv+cast is incomplete.
    - Will work on it async. This is orthogonal to the discussion.

Adding the fixed point compute handling for requantiazation.
  • Loading branch information
anijain2305 committed Jul 8, 2019
1 parent 9596535 commit 2f9a5e1
Show file tree
Hide file tree
Showing 13 changed files with 1,303 additions and 4 deletions.
125 changes: 125 additions & 0 deletions include/tvm/relay/attrs/qnn.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file tvm/relay/attrs/nn.h
* \brief Auxiliary attributes for nn operators.
*/
#ifndef TVM_RELAY_ATTRS_NN_QUANTIZE_H_
#define TVM_RELAY_ATTRS_NN_QUANTIZE_H_

#include <tvm/attrs.h>
#include <string>

namespace tvm {
namespace relay {

/*! \brief Attribute for quantized conv2d operator */
struct QConv2DAttrs : public tvm::AttrsNode<QConv2DAttrs> {
// Traditional conv2d attributes.
Array<IndexExpr> strides;
Array<IndexExpr> padding;
Array<IndexExpr> dilation;
int groups;
IndexExpr channels;
Array<IndexExpr> kernel_size;
std::string data_layout;
std::string kernel_layout;
std::string out_layout;
DataType out_dtype;

// Quantization related attributes.
int32_t input_zero_point;
int32_t kernel_zero_point;

TVM_DECLARE_ATTRS(QConv2DAttrs, "relay.attrs.QConv2DAttrs") {
TVM_ATTR_FIELD(strides).set_default(Array<IndexExpr>({1, 1}))
.describe("Specifies the strides of the convolution.");
TVM_ATTR_FIELD(padding).set_default(Array<IndexExpr>({0, 0}))
.describe("If padding is non-zero, then the input is implicitly zero-padded"
"on both sides for padding number of points");
TVM_ATTR_FIELD(dilation).set_default(Array<IndexExpr>({1, 1}))
.describe("Specifies the dilation rate to use for dilated convolution.");
TVM_ATTR_FIELD(groups).set_default(1)
.describe("Controls the connections between inputs and outputs."
"At groups=1, all inputs are convolved to all outputs."
"At groups=2, the operation becomes equivalent to having two convolution"
"layers side by side, each seeing half the input channels, and producing"
"half the output channels, and both subsequently concatenated.");
TVM_ATTR_FIELD(channels)
.describe("The number of output channels in the convolution."
" If it is not set, inferred by shape of the weight.")
.set_default(NullValue<IndexExpr>());
TVM_ATTR_FIELD(kernel_size)
.describe("Specifies the dimensions of the convolution window.")
.set_default(NullValue<Array<IndexExpr> >());
TVM_ATTR_FIELD(data_layout).set_default("NCHW")
.describe("Dimension ordering of input data. Can be 'NCHW', 'NHWC', etc."
"'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
"dimensions respectively. Convolution is applied on the 'H' and"
"'W' dimensions.");
TVM_ATTR_FIELD(kernel_layout).set_default("OIHW")
.describe("Dimension ordering of weight. Can be 'OIHW', 'OIHW16o16i', etc."
"'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width"
"dimensions respectively.");
TVM_ATTR_FIELD(out_layout).set_default("")
.describe("Dimension ordering of output. Can be 'NCHW', 'NHWC', etc."
"'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
"dimensions respectively. Default to be same as input layout.");
TVM_ATTR_FIELD(out_dtype)
.set_default(NullValue<DataType>())
.describe("Output data type, set to explicit type under mixed precision setting");
TVM_ATTR_FIELD(input_zero_point)
.describe("The zero point of the input tensor.");
TVM_ATTR_FIELD(kernel_zero_point)
.describe("The zero point of the kernel tensor.");
}
};


/*! \brief Attribute for requantize operator */
struct RequantizeAttrs : public tvm::AttrsNode<RequantizeAttrs> {
double input_scale;
int32_t input_zero_point;
double output_scale;
int32_t output_zero_point;
bool use_int_compute;
DataType out_dtype;

TVM_DECLARE_ATTRS(RequantizeAttrs, "relay.attrs.RequantizeAttrs") {
TVM_ATTR_FIELD(input_zero_point)
.describe("The zero point of the input tensor.");
TVM_ATTR_FIELD(output_zero_point)
.describe("The zero point of the output tensor.");
TVM_ATTR_FIELD(input_scale)
.describe("The scale of the input tensor.");
TVM_ATTR_FIELD(output_scale)
.describe("The scale of the output tensor.");
TVM_ATTR_FIELD(use_int_compute).set_default(false)
.describe("When true, the integer computation is used to handle output scale");
TVM_ATTR_FIELD(out_dtype)
.set_default(NullValue<DataType>())
.describe("Output data type, set to explicit type under mixed precision setting");
}
};


} // namespace relay
} // namespace tvm
#endif // TVM_RELAY_ATTRS_NN_QUANTIZE_H_
139 changes: 139 additions & 0 deletions include/tvm/relay/quantize_util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file nnvm/compiler/quantize_util.h
* \brief Utility methods needs for quantized ops that can be shared
*/

#ifndef TVM_QUANTIZE_UTIL_H
#define TVM_QUANTIZE_UTIL_H

#include <tvm/expr.h>
#include "./base.h"

namespace tvm {
namespace relay {

inline bool is_Int8(const DataType& dtype) {
return dtype == Int(8);
}

inline bool is_UInt8(const DataType& dtype) {
return dtype == UInt(8);
}


inline bool is_Int16(const DataType& dtype) {
return dtype == Int(16);
}

inline bool is_UInt16(const DataType& dtype) {
return dtype == UInt(16);
}

inline bool is_Int32(const DataType& dtype) {
return dtype == Int(32);
}

inline bool is_UInt32(const DataType& dtype) {
return dtype == UInt(32);
}



inline bool is_Float32(const DataType& dtype) {
return dtype == Float(32);
}

inline bool is_quantized_type(const DataType& dtype) {
return is_Int8(dtype) || is_UInt8(dtype)
|| is_Int16(dtype) || is_UInt16(dtype);
}

enum class QuantizeOpType : uint8_t {
Quantize_Requantize,
Dequantize,
Requantize
};

inline bool is_valid_quantized_op_input_type(const QuantizeOpType &op_type, const DataType &in_dtype) {
switch(op_type) {
case QuantizeOpType::Quantize_Requantize:
return is_Float32(in_dtype) || is_quantized_type(in_dtype);
case QuantizeOpType ::Dequantize:
return is_quantized_type(in_dtype);
case QuantizeOpType ::Requantize:
return is_Int16(in_dtype) || is_Int32(in_dtype);
default:
return false;
}
}

inline bool is_valid_quantized_op_output_type(const QuantizeOpType &op_type, const DataType &in_dtype) {
switch(op_type) {
case QuantizeOpType::Quantize_Requantize:
return is_quantized_type(in_dtype);
case QuantizeOpType::Dequantize:
return is_Float32(in_dtype);
default:
return false;
}
}

inline const int32_t get_qmin(const DataType& dtype) {
if (is_Int8(dtype)) {
return std::numeric_limits<int8_t>::min();
} else if (is_UInt8(dtype)) {
return std::numeric_limits<uint8_t>::min();
} else if (is_Int16(dtype)) {
return std::numeric_limits<int16_t>::min();
} else if (is_UInt16(dtype)) {
return std::numeric_limits<uint16_t>::min();
} else if (is_Int32(dtype)) {
return std::numeric_limits<int32_t>::min();
} else if (is_UInt32(dtype)) {
return std::numeric_limits<uint32_t>::min();
}
LOG(FATAL) << "Type not supported\n";
return -1;
}


inline const int32_t get_qmax(const DataType& dtype) {
if (is_Int8(dtype)) {
return std::numeric_limits<int8_t>::max();
} else if (is_UInt8(dtype)) {
return std::numeric_limits<uint8_t>::max();
} else if (is_Int16(dtype)) {
return std::numeric_limits<int16_t>::max();
} else if (is_UInt16(dtype)) {
return std::numeric_limits<uint16_t>::max();
} else if (is_Int32(dtype)) {
return std::numeric_limits<int32_t>::max();
} else if (is_UInt32(dtype)) {
return std::numeric_limits<uint32_t>::max();
}
LOG(FATAL) << "Type not supported\n";
return -1;
}

} // namespace relay
} // namespace tvm
#endif //TVM_QUANTIZE_UTIL_H
1 change: 1 addition & 0 deletions python/tvm/relay/op/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from .transform import *
from .algorithm import *
from . import nn
from . import qnn
from . import annotation
from . import image
from . import vision
Expand Down
21 changes: 21 additions & 0 deletions python/tvm/relay/op/qnn/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=wildcard-import
"""Neural network related operators."""
from __future__ import absolute_import as _abs
from .qnn import *
# from . import _nn
20 changes: 20 additions & 0 deletions python/tvm/relay/op/qnn/_make.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Constructor APIs"""
from ...._ffi.function import _init_api

_init_api("relay.op.qnn._make", __name__)
Loading

0 comments on commit 2f9a5e1

Please sign in to comment.