-
Notifications
You must be signed in to change notification settings - Fork 0
/
single_layer_sim.py
290 lines (229 loc) · 12.1 KB
/
single_layer_sim.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
import os
from scalesim.scale_config import scale_config as cfg
from scalesim.topology_utils import topologies as topo
from scalesim.compute.operand_matrix import operand_matrix as opmat
from scalesim.compute.systolic_compute_os import systolic_compute_os
from scalesim.compute.systolic_compute_ws import systolic_compute_ws
from scalesim.compute.systolic_compute_is import systolic_compute_is
from scalesim.memory.double_buffered_scratchpad_mem import double_buffered_scratchpad as mem_dbsp
from scalesim.memory.double_buffered_tutorial2_scratchpad_mem import double_buffered_scratchpad as tut2mem
class single_layer_sim:
def __init__(self):
self.layer_id = 0
self.topo = topo()
self.config = cfg()
self.op_mat_obj = opmat()
self.compute_system = systolic_compute_os()
#self.memory_system = mem_dbsp()
self.memory_system = tut2mem()
self.verbose = True
# Report items : Compute report
self.total_cycles = 0
self.stall_cycles = 0
self.num_compute = 0
self.num_mac_unit = 0
self.overall_util = 0
self.mapping_eff = 0
self.compute_util = 0
# Report items : BW report
self.avg_ifmap_sram_bw = 0
self.avg_filter_sram_bw = 0
self.avg_ofmap_sram_bw = 0
self.avg_ifmap_dram_bw = 0
self.avg_filter_dram_bw = 0
self.avg_ofmap_dram_bw = 0
# Report items : Detailed Access report
self.ifmap_sram_start_cycle = 0
self.ifmap_sram_stop_cycle = 0
self.ifmap_sram_reads = 0
self.filter_sram_start_cycle = 0
self.filter_sram_stop_cycle = 0
self.filter_sram_reads = 0
self.ofmap_sram_start_cycle = 0
self.ofmap_sram_stop_cycle = 0
self.ofmap_sram_writes = 0
self.ifmap_dram_start_cycle = 0
self.ifmap_dram_stop_cycle = 0
self.ifmap_dram_reads = 0
self.filter_dram_start_cycle = 0
self.filter_dram_stop_cycle = 0
self.filter_dram_reads = 0
self.ofmap_dram_start_cycle = 0
self.ofmap_dram_stop_cycle = 0
self.ofmap_dram_writes = 0
self.params_set_flag = False
self.memory_system_ready_flag = False
self.runs_ready = False
self.report_items_ready = False
def set_params(self,
layer_id=0,
config_obj=cfg(), topology_obj=topo(),
verbose=True):
self.layer_id = layer_id
self.config = config_obj
self.topo = topology_obj
self.op_mat_obj.set_params(layer_id=self.layer_id,
config_obj=self.config,
topoutil_obj=self.topo,
)
self.dataflow = self.config.get_dataflow()
if self.dataflow == 'os':
self.compute_system = systolic_compute_os()
elif self.dataflow == 'ws':
self.compute_system = systolic_compute_ws()
elif self.dataflow == 'is':
self.compute_system = systolic_compute_is()
arr_dims =self.config.get_array_dims()
self.num_mac_unit = arr_dims[0] * arr_dims[1]
self.verbose=verbose
self.params_set_flag = True
# This communicates that the memory is being managed externally
# And the class will not interfere with setting it up
def set_memory_system(self, mem_sys_obj=mem_dbsp()):
self.memory_system = mem_sys_obj
self.memory_system_ready_flag = True
def run(self):
assert self.params_set_flag, 'Parameters are not set. Run set_params()'
# 1. Setup and the get the demand from compute system
# 1.1 Get the operand matrices
_, ifmap_op_mat = self.op_mat_obj.get_ifmap_matrix()
_, filter_op_mat = self.op_mat_obj.get_filter_matrix()
_, ofmap_op_mat = self.op_mat_obj.get_ofmap_matrix()
self.num_compute = self.topo.get_layer_num_ofmap_px(self.layer_id) \
* self.topo.get_layer_window_size(self.layer_id)
# 1.2 Get the prefetch matrices for both operands
self.compute_system.set_params(config_obj=self.config,
ifmap_op_mat=ifmap_op_mat,
filter_op_mat=filter_op_mat,
ofmap_op_mat=ofmap_op_mat)
# 1.3 Get the no compute demand matrices from for 2 operands and the output
ifmap_prefetch_mat, filter_prefetch_mat = self.compute_system.get_prefetch_matrices()
ifmap_demand_mat, filter_demand_mat, ofmap_demand_mat = self.compute_system.get_demand_matrices()
#print('DEBUG: Compute operations done')
# 2. Setup the memory system and run the demands through it to find any memory bottleneck and generate traces
# 2.1 Setup the memory system if it was not setup externally
if not self.memory_system_ready_flag:
word_size = 1 # bytes, this can be incorporated in the config file
active_buf_frac = 0.5 # This can be incorporated in the config as well
ifmap_buf_size_kb, filter_buf_size_kb, ofmap_buf_size_kb = self.config.get_mem_sizes()
ifmap_buf_size_bytes = 1024 * ifmap_buf_size_kb
filter_buf_size_bytes = 1024 * filter_buf_size_kb
ofmap_buf_size_bytes = 1024 * ofmap_buf_size_kb
ifmap_backing_bw = 1
filter_backing_bw = 1
ofmap_backing_bw = 1
estimate_bandwidth_mode = False
if self.config.use_user_dram_bandwidth():
bws = self.config.get_bandwidths_as_list()
ifmap_backing_bw = bws[0]
filter_backing_bw = bws[0]
ofmap_backing_bw = bws[0]
else:
dataflow = self.config.get_dataflow()
arr_row, arr_col = self.config.get_array_dims()
estimate_bandwidth_mode = True
# The number 10 elems per cycle is arbitrary
ifmap_backing_bw = 10
filter_backing_bw = 10
ofmap_backing_bw = arr_col
self.memory_system.set_params(
word_size=word_size,
ifmap_buf_size_bytes=ifmap_buf_size_bytes,
filter_buf_size_bytes=filter_buf_size_bytes,
ofmap_buf_size_bytes=ofmap_buf_size_bytes,
rd_buf_active_frac=active_buf_frac, wr_buf_active_frac=active_buf_frac,
ifmap_backing_buf_bw=ifmap_backing_bw,
filter_backing_buf_bw=filter_backing_bw,
ofmap_backing_buf_bw=ofmap_backing_bw,
verbose=self.verbose,
estimate_bandwidth_mode=estimate_bandwidth_mode
)
# 2.2 Install the prefetch matrices to the read buffers to finish setup
if self.config.use_user_dram_bandwidth() :
self.memory_system.set_read_buf_prefetch_matrices(ifmap_prefetch_mat=ifmap_prefetch_mat,
filter_prefetch_mat=filter_prefetch_mat)
# 2.3 Start sending the requests through the memory system until
# all the OFMAP memory requests have been serviced
self.memory_system.service_memory_requests(ifmap_demand_mat, filter_demand_mat, ofmap_demand_mat)
self.runs_ready = True
# This will write the traces
def save_traces(self, top_path):
assert self.params_set_flag, 'Parameters are not set'
dir_name = top_path + '/layer' + str(self.layer_id)
if not os.path.isdir(dir_name):
cmd = 'mkdir ' + dir_name
os.system(cmd)
ifmap_sram_filename = dir_name + '/IFMAP_SRAM_TRACE.csv'
filter_sram_filename = dir_name + '/FILTER_SRAM_TRACE.csv'
ofmap_sram_filename = dir_name + '/OFMAP_SRAM_TRACE.csv'
ifmap_dram_filename = dir_name + '/IFMAP_DRAM_TRACE.csv'
filter_dram_filename = dir_name + '/FILTER_DRAM_TRACE.csv'
ofmap_dram_filename = dir_name + '/OFMAP_DRAM_TRACE.csv'
self.memory_system.print_ifmap_sram_trace(ifmap_sram_filename)
self.memory_system.print_ifmap_dram_trace(ifmap_dram_filename)
self.memory_system.print_filter_sram_trace(filter_sram_filename)
self.memory_system.print_filter_dram_trace(filter_dram_filename)
self.memory_system.print_ofmap_sram_trace(ofmap_sram_filename)
self.memory_system.print_ofmap_dram_trace(ofmap_dram_filename)
#
def calc_report_data(self):
assert self.runs_ready, 'Runs are not done yet'
# Compute report
self.total_cycles = self.memory_system.get_total_compute_cycles()
self.stall_cycles = self.memory_system.get_stall_cycles()
self.overall_util = (self.num_compute * 100) / (self.total_cycles * self.num_mac_unit)
self.mapping_eff = self.compute_system.get_avg_mapping_efficiency() * 100
self.compute_util = self.compute_system.get_avg_compute_utilization() * 100
# BW report
self.ifmap_sram_reads = self.compute_system.get_ifmap_requests()
self.filter_sram_reads = self.compute_system.get_filter_requests()
self.ofmap_sram_writes = self.compute_system.get_ofmap_requests()
self.avg_ifmap_sram_bw = self.ifmap_sram_reads / self.total_cycles
self.avg_filter_sram_bw = self.filter_sram_reads / self.total_cycles
self.avg_ofmap_sram_bw = self.ofmap_sram_writes / self.total_cycles
# Detail report
self.ifmap_sram_start_cycle, self.ifmap_sram_stop_cycle \
= self.memory_system.get_ifmap_sram_start_stop_cycles()
self.filter_sram_start_cycle, self.filter_sram_stop_cycle \
= self.memory_system.get_filter_sram_start_stop_cycles()
self.ofmap_sram_start_cycle, self.ofmap_sram_stop_cycle \
= self.memory_system.get_ofmap_sram_start_stop_cycles()
self.ifmap_dram_start_cycle, self.ifmap_dram_stop_cycle, self.ifmap_dram_reads \
= self.memory_system.get_ifmap_dram_details()
self.filter_dram_start_cycle, self.filter_dram_stop_cycle, self.filter_dram_reads \
= self.memory_system.get_filter_dram_details()
self.ofmap_dram_start_cycle, self.ofmap_dram_stop_cycle, self.ofmap_dram_writes \
= self.memory_system.get_ofmap_dram_details()
# BW calc for DRAM access
self.avg_ifmap_dram_bw = self.ifmap_dram_reads / (self.ifmap_dram_stop_cycle - self.ifmap_dram_start_cycle + 1)
self.avg_filter_dram_bw = self.filter_dram_reads / (self.filter_dram_stop_cycle - self.filter_dram_start_cycle + 1)
self.avg_ofmap_dram_bw = self.ofmap_dram_writes / (self.ofmap_dram_stop_cycle - self.ofmap_dram_start_cycle + 1)
self.report_items_ready = True
#
def get_layer_id(self):
assert self.params_set_flag, 'Parameters are not set yet'
return self.layer_id
#
def get_compute_report_items(self):
if not self.report_items_ready:
self.calc_report_data()
items = [self.total_cycles, self.stall_cycles, self.overall_util, self.mapping_eff, self.compute_util]
return items
#
def get_bandwidth_report_items(self):
if not self.report_items_ready:
self.calc_report_data()
items = [self.avg_ifmap_sram_bw, self.avg_filter_sram_bw, self.avg_ofmap_sram_bw]
items += [self.avg_ifmap_dram_bw, self.avg_filter_dram_bw, self.avg_ofmap_dram_bw]
return items
#
def get_detail_report_items(self):
if not self.report_items_ready:
self.calc_report_data()
items = [self.ifmap_sram_start_cycle, self.ifmap_sram_stop_cycle, self.ifmap_sram_reads]
items += [self.filter_sram_start_cycle, self.filter_sram_stop_cycle, self.filter_sram_reads]
items += [self.ofmap_sram_start_cycle, self.ofmap_sram_stop_cycle, self.ofmap_sram_writes]
items += [self.ifmap_dram_start_cycle, self.ifmap_dram_stop_cycle, self.ifmap_dram_reads]
items += [self.filter_dram_start_cycle, self.filter_dram_stop_cycle, self.filter_dram_reads]
items += [self.ofmap_dram_start_cycle, self.ofmap_dram_stop_cycle, self.ofmap_dram_writes]
return items