-
Notifications
You must be signed in to change notification settings - Fork 52
/
compression.py
150 lines (134 loc) · 4.72 KB
/
compression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import numpy
import onnx
import onnx_tool
from onnx_tool import Graph
from onnx_tool.fusion import FusionPattern
from onnx_tool.fusion import ConvBNFusion, Fused_Element, Conv_Res
from onnx_tool.serialization import *
def resnet_compress():
file = 'data/public/resnet50-v2-7.onnx'
m = onnx_tool.Model(file)
g = m.graph
max_input = {'data': numpy.zeros((1, 3, 224, 224), dtype=numpy.float32)}
g.shape_infer(max_input)
g.compress_memory()
def resnet_fusion_compression():
remove_shapeop = [
{
'name': 'any',
'op': 'Any',
'attrs': [],
'inport': [],
'outport': [[0, 'fla', 0]],
},
{
'name': 'fla',
'op': ['Flatten', 'Reshape'],
'attrs': [],
'inport': [[0, 'any', 0]],
'outport': [],
}
]
file = 'data/public/resnet50-v2-7.onnx'
m = onnx_tool.Model(file)
g = m.graph
shapeengine = g.shape_regress(
{
'data': [1, 3, 'h', 'w']
},
{
'h': (224, 299),
'w': (224, 299),
})
serialize_shape_engine(shapeengine, 'resnet50_fused.se') # create shape engine before any fusion
max_shape_key = {'h': 224, 'w': 224}
max_shape = {'data': numpy.zeros((1, 3, max_shape_key['h'], max_shape_key['w']))}
g.shape_infer(max_shape)
cg = g.get_compute_graph()
ConvBNFusion(cg)
pattern = FusionPattern(Fused_Element)
nodes = pattern.search_pattern(cg)
for names in nodes:
cg.fuse_postop_node_names(names, True)
pattern = FusionPattern(Conv_Res)
nodes = pattern.search_pattern(cg)
for names in nodes:
cg.fuse_postop_node_names(names, True)
# remove flattern
pattern = FusionPattern(remove_shapeop)
nodes = pattern.search_pattern(cg)
for names in nodes:
cg.fuse_postop_node_names(names, False)
cg.graph_reorder_nodes()
compress_mem = cg.compress_memory()
serialize_memory_compression(compress_mem, max_shape_key, 'resnet50_fused.cm')
serialize_graph(cg, 'resnet50_fused.cg')
cg.save_model('resnet50_fused.onnx')
def bevformer():
from onnx_tool import NODE_REGISTRY
from onnx_tool.node import PWNode,Node,_get_shape
from onnx_tool.tensor import Tensor
# this is the TensorRT version of BEVFormer
# It fused some ops as two TRT plugins
@NODE_REGISTRY.register()
class RotateTRTNode(PWNode):
def __init__(self, n):
super().__init__(n)
self.op_mac = 4 * 4 # assuming 4x4 transformation matrix
@NODE_REGISTRY.register()
class MultiScaleDeformableAttnTRTNode(Node):
def __init__(self, n):
super().__init__(n)
def shape_infer(self, intensors: list[Tensor],outtensors: list[Tensor]):
s0 = intensors[0].get_shape()
s3 = intensors[3].get_shape()
s0[1] = s3[1]
outtensors[0].update_shape(s0)
def profile(self, intensors: [], outtensors: []):
macs = 8
batch = intensors[0].get_shape()[0]
num_heads = intensors[0].get_shape()[2]
channels = intensors[0].get_shape()[3]
num_levels = intensors[1].get_shape()[0]
num_query = intensors[3].get_shape()[1]
num_points = intensors[4].get_shape()[3]
base_num = batch * num_query * num_heads * channels * num_levels * num_points
return [base_num * macs,0]
file = 'data/public/bevformer_tiny.onnx'
m = onnx_tool.Model(file,{'verbose':True,"constant_folding":True})
g = m.graph
g.shape_infer()
g.profile()
g.print_node_map()
m.save_model('bevformer_tiny_shapes.onnx')
compress_mem = g.compress_memory()
print('compressed memory allocation: ',compress_mem[1])
cg=g.get_compute_graph()
cg.graph_reorder_nodes()
cg.compress_memory()
cg.save_model('bevformer_tiny_cg.onnx',rawmodel=m.mproto)
def gpt2():
file = 'data/public/gpt2-10.onnx'
m = onnx_tool.Model(file, {'verbose': True, "constant_folding": True})
g = m.graph
shapeengine = g.shape_regress(
{
'input1': ['batch', 1, 'seq']
},
{
'batch': (1, 4),
'seq': (1, 384),
})
serialize_shape_engine(shapeengine, 'gpt2.se') # create shape engine before any fusion
max_shape_key = {'batch': 4, 'seq': 384}
max_shape = {'input1': numpy.zeros((max_shape_key['batch'],1, max_shape_key['seq']))}
g.shape_infer(max_shape)
cg = g.get_compute_graph()
compress_mem = cg.compress_memory()
serialize_memory_compression(compress_mem, max_shape_key, 'gpt2.cm')
serialize_graph(cg, 'gpt2.cg')
cg.save_model('gpt2_cg.onnx')
resnet_compress()
resnet_fusion_compression()
bevformer()
gpt2()