33# This source code is licensed under the BSD-style license found in the
44# LICENSE file in the root directory of this source tree.
55
6+ import copy
67import unittest
78
89import kgb
910import numpy as np
1011import torch
1112
13+ from executorch .backends .nxp .backend .custom_delegation_options import (
14+ CustomDelegationOptions ,
15+ )
1216from executorch .backends .nxp .backend .edge_helper import _is_dequantize , _is_quantize
1317from executorch .backends .nxp .backend .edge_program_converter import (
1418 EdgeProgramToIRConverter ,
1519)
1620from executorch .backends .nxp .backend .ir .converter .node_converters .ops_converters import (
1721 ViewCopyConverter ,
1822)
23+ from executorch .backends .nxp .edge_passes .neutron_edge_pass_manager import (
24+ NeutronEdgePassManager ,
25+ )
26+ from executorch .backends .nxp .edge_passes .remove_additional_quantize_dequantize_nodes_pass import (
27+ RemoveAdditionalQDQClustersPass ,
28+ )
29+ from executorch .backends .nxp .neutron_partitioner import NeutronPartitioner
30+ from executorch .backends .nxp .nxp_backend import generate_neutron_compile_spec
31+ from executorch .backends .nxp .quantizer .neutron_quantizer import NeutronQuantizer
32+ from executorch .backends .nxp .quantizer .utils import post_training_quantize
1933from executorch .backends .nxp .tests .executorch_pipeline import (
34+ get_random_calibration_inputs ,
2035 neutron_target_spec ,
36+ to_model_input_spec ,
2137 to_quantized_edge_program ,
2238)
2339from executorch .backends .nxp .tests .executors import (
40+ compare_output_arrays ,
2441 EdgeProgramExecutor ,
2542 OverrideTargetSupportCheck ,
2643)
44+ from executorch .backends .nxp .tests .ir .converter .node_converter .test_permute_copy_converter import (
45+ Conv2dPermuteModule ,
46+ )
2747from executorch .backends .nxp .tests .models import (
2848 ConvActivationModule ,
2949 ConvFCFCSoftmaxModuleWithoutReshape ,
3050 LinearActivationModule ,
3151)
3252from executorch .exir .dialects ._ops import ops as exir_ops
53+ from executorch .extension .export_util .utils import export_to_edge
3354from parameterized import parameterized
3455from torch .export import ExportedProgram
3556from torch .fx import Graph , Node
@@ -117,7 +138,6 @@ def test_moving_fusable_activations_into_separate_qdq_clusters__addmm(
117138 call_original = True ,
118139 owner = EdgeProgramToIRConverter ,
119140 ) as converter_spy :
120-
121141 input_shape = (1 , 4 )
122142 model = LinearActivationModule (
123143 activation = activation ,
@@ -161,7 +181,6 @@ def test_moving_fusable_activations_into_separate_qdq_clusters__mm(
161181 call_original = True ,
162182 owner = EdgeProgramToIRConverter ,
163183 ) as converter_spy :
164-
165184 input_shape = (1 , 4 )
166185 model = LinearActivationModule (
167186 activation = activation ,
@@ -205,7 +224,6 @@ def test_moving_fusable_activations_into_separate_qdq_clusters__linear(
205224 call_original = True ,
206225 owner = EdgeProgramToIRConverter ,
207226 ) as converter_spy :
208-
209227 input_shape = (1 , 4 )
210228 model = LinearActivationModule (
211229 activation = activation ,
@@ -249,7 +267,6 @@ def test_moving_fusable_activations_into_separate_qdq_clusters__conv(
249267 call_original = True ,
250268 owner = EdgeProgramToIRConverter ,
251269 ) as converter_spy :
252-
253270 input_shape = (1 , 4 , 8 , 8 )
254271 model = ConvActivationModule (
255272 activation = activation , inplace = True , in_channels = input_shape [1 ]
@@ -273,3 +290,91 @@ def test_moving_fusable_activations_into_separate_qdq_clusters__conv(
273290 nodes [13 ]
274291 )
275292 assert _is_quantize (nodes [14 ])
293+
294+ def test_remove_additional_quantize_dequantize_nodes_pass (self ):
295+ input_shape = (1 , 3 , 8 , 16 )
296+ new_dims = (3 , 2 , 1 , 0 )
297+ model = Conv2dPermuteModule (input_shape [1 ], new_dims )
298+ target = "imxrt700"
299+ custom_delegation_options = CustomDelegationOptions ()
300+
301+ calibration_inputs = get_random_calibration_inputs (
302+ to_model_input_spec (input_shape )
303+ )
304+
305+ example_input = calibration_inputs [0 ]
306+ exir_program_aten = torch .export .export (model , example_input , strict = True )
307+
308+ exir_program_aten_quant = post_training_quantize (
309+ exir_program_aten ,
310+ calibration_inputs ,
311+ NeutronQuantizer (neutron_target_spec ),
312+ )
313+ edge_program_manager = export_to_edge (
314+ exir_program_aten_quant ,
315+ example_input ,
316+ )
317+
318+ edge_program_manager = edge_program_manager .transform (NeutronEdgePassManager ())
319+
320+ compile_spec = generate_neutron_compile_spec (target , "SDK_25_09" )
321+ partitioner = NeutronPartitioner (
322+ compile_spec , neutron_target_spec , custom_delegation_options
323+ )
324+
325+ edge_program_manager = edge_program_manager .to_backend (partitioner )
326+
327+ # Make sure QDQ cluster for permute_copy is present.
328+ edge_program_with_qdq_cluster = copy .deepcopy (
329+ edge_program_manager .exported_program ()
330+ )
331+ nodes = list (edge_program_with_qdq_cluster .graph .nodes )
332+ assert len (nodes ) == 10
333+ assert (
334+ nodes [5 ].target
335+ == exir_ops .edge .quantized_decomposed .dequantize_per_tensor .default
336+ )
337+ assert nodes [6 ].target == exir_ops .edge .aten .permute_copy .default
338+ assert "cluster" in nodes [6 ].meta
339+ assert (
340+ nodes [7 ].target
341+ == exir_ops .edge .quantized_decomposed .quantize_per_tensor .default
342+ )
343+
344+ # Run pass for removal of additional QDQ nodes and compute in non-float types where possible
345+ edge_program_manager = edge_program_manager .transform (
346+ NeutronEdgePassManager ([RemoveAdditionalQDQClustersPass ()])
347+ )
348+
349+ # Make sure QDQ cluster for permute_copy is removed.
350+ edge_program_without_qdq_cluster = edge_program_manager .exported_program ()
351+ nodes = list (edge_program_without_qdq_cluster .graph .nodes )
352+ assert len (nodes ) == 8
353+ assert nodes [4 ].name == "getitem"
354+ assert nodes [5 ].target == exir_ops .edge .aten .permute_copy .default
355+ assert "cluster" not in nodes [5 ].meta
356+ assert (
357+ nodes [6 ].target
358+ == exir_ops .edge .quantized_decomposed .dequantize_per_tensor .default
359+ )
360+
361+ edge_program_executor_without_qdq_cluster = EdgeProgramExecutor (
362+ edge_program_without_qdq_cluster
363+ )
364+ edge_program_executor_with_qdq_cluster = EdgeProgramExecutor (
365+ edge_program_with_qdq_cluster
366+ )
367+
368+ input_data = np .random .random (input_shape ).astype (np .float32 )
369+ edge_program_output_without_qdq_cluster = (
370+ edge_program_executor_without_qdq_cluster .inference (input_data )
371+ )
372+ edge_program_output_with_qdq_cluster = (
373+ edge_program_executor_with_qdq_cluster .inference (input_data )
374+ )
375+
376+ compare_output_arrays (
377+ edge_program_output_without_qdq_cluster ,
378+ edge_program_output_with_qdq_cluster ,
379+ "main output" ,
380+ )
0 commit comments