Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion backends/cadence/aot/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,6 @@ executorch_generated_lib(
visibility = ["PUBLIC"],
deps = [
"//executorch/backends/cadence/generic/kernels:cadence_kernels",
# Individual operator targets instead of combined cadence_generic_ops
"//executorch/backends/cadence/generic/operators:op_requantize_out",
"//executorch/backends/cadence/generic/operators:im2row_out",
"//executorch/backends/cadence/generic/operators:dequantize_per_tensor",
Expand Down
48 changes: 48 additions & 0 deletions backends/cadence/aot/functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -184,12 +184,60 @@
- arg_meta: null
kernel_name: impl::generic::quantize_per_tensor_out

- func: cadence::quantize_per_tensor_asym8s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::generic::quantize_per_tensor_asym8s_out

- func: cadence::quantize_per_tensor_asym8u.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::generic::quantize_per_tensor_asym8u_out

- func: cadence::quantize_per_tensor_asym16s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::generic::quantize_per_tensor_asym16s_out

- func: cadence::quantize_per_tensor_asym16u.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::generic::quantize_per_tensor_asym16u_out

- func: cadence::dequantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::generic::dequantize_per_tensor_out

- func: cadence::dequantize_per_tensor_asym8s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::generic::dequantize_per_tensor_asym8s_out

- func: cadence::dequantize_per_tensor_asym8u.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::generic::dequantize_per_tensor_asym8u_out

- func: cadence::dequantize_per_tensor_asym16s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::generic::dequantize_per_tensor_asym16s_out

- func: cadence::dequantize_per_tensor_asym16u.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::generic::dequantize_per_tensor_asym16u_out

- func: cadence::quantized_conv2d_nchw.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
Expand Down
49 changes: 49 additions & 0 deletions backends/cadence/aot/functions_hifi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -284,12 +284,61 @@
- arg_meta: null
kernel_name: impl::HiFi::quantize_per_tensor_out

- func: cadence::quantize_per_tensor_asym8s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::HiFi::quantize_per_tensor_asym8s_out

- func: cadence::quantize_per_tensor_asym8u.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::HiFi::quantize_per_tensor_asym8u_out

- func: cadence::quantize_per_tensor_asym16s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::HiFi::quantize_per_tensor_asym16s_out

- func: cadence::quantize_per_tensor_asym16u.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::HiFi::quantize_per_tensor_asym16s_out


- func: cadence::dequantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::HiFi::dequantize_per_tensor_out

- func: cadence::dequantize_per_tensor_asym8s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::HiFi::dequantize_per_tensor_asym8s_out

- func: cadence::dequantize_per_tensor_asym8u.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::HiFi::dequantize_per_tensor_asym8u_out

- func: cadence::dequantize_per_tensor_asym16s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::HiFi::dequantize_per_tensor_asym16s_out

- func: cadence::dequantize_per_tensor_asym16u.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
variants: function
kernels:
- arg_meta: null
kernel_name: impl::HiFi::dequantize_per_tensor_asym16u_out

- func: cadence::quantized_conv2d_nchw.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
Expand Down
148 changes: 148 additions & 0 deletions backends/cadence/aot/ops_registrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,64 @@
"quantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
)

lib.define(
"quantize_per_tensor_asym8s(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
)
lib.define(
"quantize_per_tensor_asym8s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
)

lib.define(
"quantize_per_tensor_asym8u(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
)
lib.define(
"quantize_per_tensor_asym8u.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
)

lib.define(
"quantize_per_tensor_asym16s(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
)
lib.define(
"quantize_per_tensor_asym16s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
)

lib.define(
"quantize_per_tensor_asym16u(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
)
lib.define(
"quantize_per_tensor_asym16u.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
)

lib.define(
"dequantize_per_tensor(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
)
lib.define(
"dequantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
)
lib.define(
"dequantize_per_tensor_asym8s(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
)
lib.define(
"dequantize_per_tensor_asym8s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
)
lib.define(
"dequantize_per_tensor_asym8u(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
)
lib.define(
"dequantize_per_tensor_asym8u.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
)
lib.define(
"dequantize_per_tensor_asym16s(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
)
lib.define(
"dequantize_per_tensor_asym16s.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
)
lib.define(
"dequantize_per_tensor_asym16u(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
)
lib.define(
"dequantize_per_tensor_asym16u.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)"
)

lib.define(
"quantized_layer_norm(Tensor X, Tensor X_scale, Tensor X_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point) -> (Tensor Y)"
Expand Down Expand Up @@ -541,6 +593,54 @@ def quantize_per_tensor_meta(
return input.new_empty(input.size(), dtype=dtype)


@register_fake("cadence::quantize_per_tensor_asym8s")
def quantize_per_tensor_asym8s_meta(
input: torch.Tensor,
scale: float,
zero_point: int,
quant_min: int,
quant_max: int,
dtype: torch.dtype,
) -> torch.Tensor:
return input.new_empty(input.size(), dtype=dtype)


@register_fake("cadence::quantize_per_tensor_asym8u")
def quantize_per_tensor_asym8u_meta(
input: torch.Tensor,
scale: float,
zero_point: int,
quant_min: int,
quant_max: int,
dtype: torch.dtype,
) -> torch.Tensor:
return input.new_empty(input.size(), dtype=dtype)


@register_fake("cadence::quantize_per_tensor_asym16s")
def quantize_per_tensor_asym16s_meta(
input: torch.Tensor,
scale: float,
zero_point: int,
quant_min: int,
quant_max: int,
dtype: torch.dtype,
) -> torch.Tensor:
return input.new_empty(input.size(), dtype=dtype)


@register_fake("cadence::quantize_per_tensor_asym16u")
def quantize_per_tensor_asym16u_meta(
input: torch.Tensor,
scale: float,
zero_point: int,
quant_min: int,
quant_max: int,
dtype: torch.dtype,
) -> torch.Tensor:
return input.new_empty(input.size(), dtype=dtype)


@register_fake("cadence::dequantize_per_tensor")
def dequantize_per_tensor_meta(
input: torch.Tensor,
Expand All @@ -553,6 +653,54 @@ def dequantize_per_tensor_meta(
return input.new_empty(input.size(), dtype=torch.float)


@register_fake("cadence::dequantize_per_tensor_asym8s")
def dequantize_per_tensor_asym8s_meta(
input: torch.Tensor,
scale: float,
zero_point: int,
quant_min: int,
quant_max: int,
dtype: torch.dtype,
) -> torch.Tensor:
return input.new_empty(input.size(), dtype=torch.float)


@register_fake("cadence::dequantize_per_tensor_asym8u")
def dequantize_per_tensor_asym8u_meta(
input: torch.Tensor,
scale: float,
zero_point: int,
quant_min: int,
quant_max: int,
dtype: torch.dtype,
) -> torch.Tensor:
return input.new_empty(input.size(), dtype=torch.float)


@register_fake("cadence::dequantize_per_tensor_asym16s")
def dequantize_per_tensor_asym16s_meta(
input: torch.Tensor,
scale: float,
zero_point: int,
quant_min: int,
quant_max: int,
dtype: torch.dtype,
) -> torch.Tensor:
return input.new_empty(input.size(), dtype=torch.float)


@register_fake("cadence::dequantize_per_tensor_asym16u")
def dequantize_per_tensor_asym16u_meta(
input: torch.Tensor,
scale: float,
zero_point: int,
quant_min: int,
quant_max: int,
dtype: torch.dtype,
) -> torch.Tensor:
return input.new_empty(input.size(), dtype=torch.float)


@register_fake("cadence::quantized_add")
def quantized_add_meta(
X: torch.Tensor,
Expand Down
24 changes: 24 additions & 0 deletions backends/cadence/aot/type_dispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class OpConfig:
base_name: str
type_dispatch_suffixes: dict[tuple[torch.dtype, ...], str]
weight_arg_idx: Optional[int] = None
is_quant_op: bool = False
variant: str = "per_tensor"


Expand Down Expand Up @@ -100,6 +101,27 @@ class CompileTimeTypeDispatchPass(ExportPass):
},
variant="default",
),
exir_ops.edge.cadence.quantize_per_tensor.default: OpConfig(
"quantize_per_tensor",
type_dispatch_suffixes={
(torch.int8,): "asym8s",
(torch.uint8,): "asym8u",
(torch.int16,): "asym16s",
(torch.uint16,): "asym16s",
},
variant="default",
is_quant_op=True,
),
exir_ops.edge.cadence.dequantize_per_tensor.default: OpConfig(
"dequantize_per_tensor",
type_dispatch_suffixes={
(torch.int8,): "asym8s",
(torch.uint8,): "asym8u",
(torch.int16,): "asym16s",
(torch.uint16,): "asym16s",
},
variant="default",
),
}

def call_operator(
Expand All @@ -120,6 +142,8 @@ def call_operator(
if config.weight_arg_idx is not None:
weight_dtype = args[config.weight_arg_idx].to_tensor().dtype
dtype_key = (input_dtype, weight_dtype)
elif config.is_quant_op:
dtype_key = (args[5],)
else:
dtype_key = (input_dtype,)

Expand Down
Loading
Loading