-
Notifications
You must be signed in to change notification settings - Fork 234
/
Copy pathbenchmark.py
424 lines (361 loc) · 15.7 KB
/
benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
import numpy as np
import click
import os
from devito import Device, configuration, info, warning, set_log_level, switchconfig, norm
from devito.arch.compiler import IntelCompiler
from devito.mpi import MPI
from devito.operator.profiling import PerformanceSummary
from devito.tools import all_equal, as_tuple, sweep
from devito.types.dense import DiscreteFunction
from examples.seismic.acoustic.acoustic_example import run as acoustic_run, acoustic_setup
from examples.seismic.tti.tti_example import run as tti_run, tti_setup
from examples.seismic.elastic.elastic_example import run as elastic_run, elastic_setup
from examples.seismic.self_adjoint.example_iso import run as acoustic_sa_run, \
acoustic_sa_setup
from examples.seismic.viscoelastic.viscoelastic_example import run as viscoelastic_run, \
viscoelastic_setup
model_type = {
'viscoelastic': {
'run': viscoelastic_run,
'setup': viscoelastic_setup,
'default-section': 'global'
},
'elastic': {
'run': elastic_run,
'setup': elastic_setup,
'default-section': 'global'
},
'tti': {
'run': tti_run,
'setup': tti_setup,
'default-section': 'global'
},
'acoustic': {
'run': acoustic_run,
'setup': acoustic_setup,
'default-section': 'global'
},
'acoustic_sa': {
'run': acoustic_sa_run,
'setup': acoustic_sa_setup,
'default-section': 'global'
}
}
class NTuple(click.Tuple):
"""
A floating subtype of click's Tuple that allows inputs with fewer elements.
Instead of accepting only tuples of exact length, this accepts tuples
of length up to the definition size.
For example, NTuple([int, int, int]) accepts (1,), (1, 2) and (1, 2, 3) as inputs.
"""
def convert(self, value, param, ctx):
n_value = len(value)
n_type = len(self.types)
if n_value <= n_type:
warning(f"Processing {n_value} out of expected up to {n_type}")
else:
super().convert(value, param, ctx)
return tuple(self.types[i](value[i], param, ctx) for i in range(n_value))
def run_op(solver, operator, **options):
"""
Initialize any necessary input and run the operator associated with the solver.
"""
# Get the operator if exist
try:
op = getattr(solver, operator)
except AttributeError:
raise AttributeError("Operator %s not implemented for %s" % (operator, solver))
# This is a bit ugly but not sure how to make clean input creation for different op
if operator == "forward":
return op(**options)
elif operator == "adjoint":
rec = solver.geometry.adj_src
return op(rec, **options)
elif operator == "jacobian":
dm = solver.model.dm
# Because sometime dm is zero, artificially add a non zero slice
if dm.data.min() == 0 and dm.data.max() == 0:
dm.data[..., np.min([25, dm.shape_global[-1]//4])] = .1
return op(dm, **options)
elif operator == "jacobian_adjoint":
# I think we want the forward + gradient call, need to merge retvals
args = solver.forward(save=True, **options)
assert isinstance(args[-1], PerformanceSummary)
args = args[:-1]
return op(*args, **options)
else:
raise ValueError("Unrecognized operator %s" % operator)
@click.group()
def benchmark():
"""
Benchmarking script for seismic operators.
\b
There are three main 'execution modes':
run: a single run with given optimization level
run-jit-backdoor: a single run using the DEVITO_JIT_BACKDOOR to
experiment with manual customizations
test: tests numerical correctness with different parameters
Further, this script can generate a roofline plot from a benchmark
"""
pass
def option_simulation(f):
def default_list(ctx, param, value):
return list(value if len(value) > 0 else (2, ))
options = [
click.option('-P', '--problem', help='Problem name',
type=click.Choice(['acoustic', 'tti',
'elastic', 'acoustic_sa', 'viscoelastic'])),
click.option('-d', '--shape', default=(50, 50, 50), type=NTuple([int, int, int]),
help='Number of grid points along each axis'),
click.option('-s', '--spacing', default=(20., 20., 20.),
type=NTuple([float, float, float]),
help='Spacing between grid sizes in meters'),
click.option('-n', '--nbl', default=10,
help='Number of boundary layers'),
click.option('-so', '--space-order', type=int, multiple=True,
callback=default_list, help='Space order of the simulation'),
click.option('-to', '--time-order', type=int, multiple=True,
callback=default_list, help='Time order of the simulation'),
click.option('-t', '--tn', default=250,
help='End time of the simulation in ms'),
click.option('-op', '--operator', default='forward', help='Operator to run',
type=click.Choice(['forward', 'adjoint',
'jacobian', 'jacobian_adjoint']))]
for option in reversed(options):
f = option(f)
return f
def option_performance(f):
"""Defines options for all aspects of performance tuning"""
def from_value(ctx, param, value):
"""Prefer preset values and warn for competing values."""
return ctx.params[param.name] or value
def from_opt(ctx, param, value):
"""Process the opt argument."""
try:
# E.g., `('advanced', {'par-tile': True})`
value = eval(value)
if not isinstance(value, tuple) and len(value) >= 1:
raise click.BadParameter("Invalid choice `%s` (`opt` must be "
"either str or tuple)" % str(value))
opt = value[0]
except NameError:
# E.g. `'advanced'`
opt = value
if opt not in configuration._accepted['opt']:
raise click.BadParameter("Invalid choice `%s` (choose from %s)"
% (opt, str(configuration._accepted['opt'])))
return value
def config_blockshape(ctx, param, value):
if isinstance(configuration['platform'], Device):
normalized_value = []
elif value:
# Block innermost loops if a full block shape is provided
# Note: see https://github.com/devitocodes/devito/issues/320 for why
# we use blockinner=True only if the backend compiler is Intel
flag = isinstance(configuration['compiler'], IntelCompiler)
configuration['opt-options']['blockinner'] = flag
# Normalize value:
# 1. integers, not strings
# 2. sanity check the (hierarchical) blocking shape
normalized_value = []
for i, block_shape in enumerate(value):
# If hierarchical blocking is activated, say with N levels, here in
# `bs` we expect to see 3*N entries
bs = [int(x) for x in block_shape.split()]
levels = [bs[x:x+3] for x in range(0, len(bs), 3)]
if any(len(level) != 3 for level in levels):
raise ValueError("Expected 3 entries per block shape level, but got "
"one level with less than 3 entries (`%s`)" % levels)
normalized_value.append(levels)
if not all_equal(len(i) for i in normalized_value):
raise ValueError("Found different block shapes with incompatible "
"number of levels (`%s`)" % normalized_value)
configuration['opt-options']['blocklevels'] = len(normalized_value[0])
else:
normalized_value = []
return tuple(normalized_value)
def config_autotuning(ctx, param, value):
"""Setup auto-tuning to run in ``{basic,aggressive,...}+preemptive`` mode."""
if isinstance(configuration['platform'], Device):
level = False
elif value != 'off':
# Sneak-peek at the `block-shape` -- if provided, keep auto-tuning off
if ctx.params['block_shape']:
warning("Skipping autotuning (using explicit block-shape `%s`)"
% str(ctx.params['block_shape']))
level = False
else:
# Make sure to always run in preemptive mode
configuration['autotuning'] = [value, 'preemptive']
# We apply blocking to all parallel loops, including the innermost ones
# Note: see https://github.com/devitocodes/devito/issues/320 for why
# we use blockinner=True only if the backend compiler is Intel
flag = isinstance(configuration['compiler'], IntelCompiler)
configuration['opt-options']['blockinner'] = flag
level = value
else:
level = False
return level
options = [
click.option('--arch', default='unknown',
help='Architecture on which the simulation is/was run'),
click.option('--opt', callback=from_opt, default='advanced',
help='Performance optimization level'),
click.option('-bs', '--block-shape', callback=config_blockshape, multiple=True,
is_eager=True, help='Loop-blocking shape, bypass autotuning'),
click.option('-a', '--autotune', default='aggressive', callback=config_autotuning,
type=click.Choice([str(tuple(i)) if type(i) is list else i
for i in configuration._accepted['autotuning']]),
help='Select autotuning mode')
]
for option in reversed(options):
f = option(f)
return f
@benchmark.command(name='run')
@option_simulation
@option_performance
@click.option('--warmup', is_flag=True, default=False,
help='Perform a preliminary run to warm up the system')
@click.option('--dump-summary', default=False,
help='File where the performance results are saved')
@click.option('--dump-norms', default=False,
help='File where the output norms are saved')
def cli_run(problem, **kwargs):
"""`click` interface for the `run` mode."""
configuration['develop-mode'] = False
run(problem, **kwargs)
def run(problem, **kwargs):
"""
A single run with a specific set of performance parameters.
"""
setup = model_type[problem]['setup']
options = {}
time_order = kwargs.pop('time_order')[0]
space_order = kwargs.pop('space_order')[0]
autotune = kwargs.pop('autotune')
options['autotune'] = autotune
block_shapes = as_tuple(kwargs.pop('block_shape'))
operator = kwargs.pop('operator', 'forward')
warmup = kwargs.pop('warmup')
# Should a specific block-shape be used? Useful if one wants to skip
# the autotuning pass as a good block-shape is already known
# Note: the following piece of code is horribly *hacky*, but it works for now
for i, block_shape in enumerate(block_shapes):
for n, level in enumerate(block_shape):
for d, s in zip(['x', 'y', 'z'], level):
options['%s%d_blk%d_size' % (d, i, n)] = s
solver = setup(space_order=space_order, time_order=time_order, **kwargs)
if warmup:
info("Performing warm-up run ...")
set_log_level('ERROR', comm=MPI.COMM_WORLD)
run_op(solver, operator, **options)
set_log_level('DEBUG', comm=MPI.COMM_WORLD)
info("DONE!")
retval = run_op(solver, operator, **options)
try:
rank = MPI.COMM_WORLD.rank
except AttributeError:
# MPI not available
rank = 0
dumpfile = kwargs.pop('dump_summary')
if dumpfile:
if configuration['profiling'] != 'advanced':
raise RuntimeError("Must set DEVITO_PROFILING=advanced (or, alternatively, "
"DEVITO_LOGGING=PERF) with --dump-summary")
if rank == 0:
with open(dumpfile, 'w') as f:
summary = retval[-1]
assert isinstance(summary, PerformanceSummary)
f.write(str(summary.globals_all))
dumpfile = kwargs.pop('dump_norms')
if dumpfile:
norms = ["'%s': %f" % (i.name, norm(i)) for i in retval[:-1]
if isinstance(i, DiscreteFunction)]
if rank == 0:
with open(dumpfile, 'w') as f:
f.write("{%s}" % ', '.join(norms))
return retval
@benchmark.command(name='run-jit-backdoor')
@option_simulation
@option_performance
@click.option('--dump-norms', is_flag=True, default=False,
help='Display norms of written fields')
def cli_run_jit_backdoor(problem, **kwargs):
"""`click` interface for the `run_jit_backdoor` mode."""
run_jit_backdoor(problem, **kwargs)
def run_jit_backdoor(problem, **kwargs):
"""
A single run using the DEVITO_JIT_BACKDOOR to test kernel customization.
"""
configuration['develop-mode'] = False
setup = model_type[problem]['setup']
time_order = kwargs.pop('time_order')[0]
space_order = kwargs.pop('space_order')[0]
autotune = kwargs.pop('autotune')
info("Preparing simulation...")
solver = setup(space_order=space_order, time_order=time_order, **kwargs)
# Generate code (but do not JIT yet)
op = solver.op_fwd()
# Get the filename in the JIT cache
cfile = "%s.c" % str(op._compiler.get_jit_dir().joinpath(op._soname))
if not os.path.exists(cfile):
# First time we run this problem, let's generate and jit-compile code
op.cfunction
info("You may now edit the generated code in `%s`. "
"Then save the file, and re-run this benchmark." % cfile)
return
info("Running wave propagation Operator...")
@switchconfig(jit_backdoor=True)
def _run_jit_backdoor():
return run_op(solver, 'forward', autotune=autotune)
retval = _run_jit_backdoor()
dumpnorms = kwargs.pop('dump_norms')
if dumpnorms:
for i in retval[:-1]:
if isinstance(i, DiscreteFunction):
info("'%s': %f" % (i.name, norm(i)))
return retval
@benchmark.command(name='test')
@option_simulation
@option_performance
def cli_test(problem, **kwargs):
"""`click` interface for the `test` mode."""
set_log_level('ERROR')
test(problem, **kwargs)
def test(problem, **kwargs):
"""
Test numerical correctness with different parameters.
"""
run = model_type[problem]['run']
sweep_options = ('space_order', 'time_order', 'opt', 'autotune')
last_res = None
for params in sweep(kwargs, keys=sweep_options):
kwargs.update(params)
_, _, _, res = run(**kwargs)
if last_res is None:
last_res = res
else:
for i in range(len(res)):
assert np.isclose(res[i], last_res[i])
if __name__ == "__main__":
# If running with MPI, we emit logging messages from rank0 only
try:
MPI.Init() # Devito starts off with MPI disabled!
set_log_level('DEBUG', comm=MPI.COMM_WORLD)
if MPI.COMM_WORLD.size > 1 and not configuration['mpi']:
warning("It seems that you're running over MPI with %d processes, but "
"DEVITO_MPI is unset. Setting `DEVITO_MPI=basic`..."
% MPI.COMM_WORLD.size)
configuration['mpi'] = 'basic'
except (TypeError, ModuleNotFoundError):
# MPI not available
pass
# Benchmarking cannot be done at basic level
if configuration['profiling'] == 'basic':
configuration['profiling'] = 'advanced'
benchmark(standalone_mode=False)
try:
MPI.Finalize()
except TypeError:
# MPI not available
pass