-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathnimbench.nim
238 lines (206 loc) · 8.98 KB
/
nimbench.nim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# Copyright 2016 Ivan Koster
#
# This file is based on Folly: Facebook Open-source Library
# Copyright 2015 Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## Introduction
## ============
## .. include:: doc/introduction.txt
##
## A quick example
## ===============
## .. include:: doc/quick_example.txt
##
## Platform support
## ================
## .. include:: doc/platform_support.txt
import strutils
import strfmt
import nimbench/private/utils
import nimbench/private/human_readable
import nimbench/private/timers
type
BenchmarkSample = tuple[timeInNs: int64, iterations: Natural]
BenchmarkFunction = proc(times: Natural): BenchmarkSample
Benchmark = tuple[fileName, name: string, relative: bool,
function: BenchmarkFunction]
var benchmarks: seq[Benchmark]
benchmarks = @[]
proc addBenchmarkImpl(fileName, name: string, relative: bool,
function: BenchmarkFunction) =
benchmarks.add((fileName: fileName, name: name, relative: relative,
function: function))
template benchImpl(relative: bool,
fileName, benchmarkName, cycles, body: untyped): untyped =
proc execute(times: Natural): BenchmarkSample {.gensym.} =
let numIterations = times
result.iterations = numIterations
let cycles = numIterations
let startTicks = getTimeMeasurement()
body
result.timeInNs = getTimeMeasurement() - startTicks
addBenchmarkImpl(fileName, benchmarkName, relative, execute)
template bench*(name, cycles, body: untyped): untyped =
## This template is used to create a benchmark. `name` is the name of the
## benchmark. `cycles` is a counter that must be used inside the code
## snippet. The framework uses this counter to indicate the number of
## iterations the code snippet must perform. For examples check `A quick
## example`_.
let fileName = instantiationInfo(-1).filename
let benchmarkName = stringifyIdentifier(name)
benchImpl(false, fileName, benchmarkName, cycles, body)
template benchRelative*(name, cycles, body: untyped): untyped =
## The same as ``bench()``, except the previous ``bench()`` is taken as a
## baseline. Besides the time, the speedup of this benchmark, relative to the
## baseline, is also printed. This can be used to check how much faster your
## new implementation is. For example: 100% means just as fast, 200% means
## twice as fast. A value below 100% means your code got slower. A new
## baseline is created with every ``bench()``.
let fileName = instantiationInfo(-1).filename
let benchmarkName = stringifyIdentifier(name)
benchImpl(true, fileName, benchmarkName, cycles, body)
template bench*(name, body: untyped): untyped =
## This template is used to create a benchmark. `name` is the name of the
## benchmark. For example:
##
## .. code-block:: nim
## bench(fpOps4):
## var d = 1.0
## var x = float(5)
## d = d + x
## d = d - x
## d = d * x
## d = d / x
## doNotOptimizeAway(d)
## It is advised if you have a loop in your benchmark, to use the other
## `bench()<#bench.t,untyped,untyped,untyped>`_ template, with the `cycles`
## parameter. This gives the module more control over the iterating and might
## produce better results.
let fileName = instantiationInfo(-1).filename
let benchmarkName = stringifyIdentifier(name)
benchImpl(false, fileName, benchmarkName, m):
var i = m
while i > 0:
dec(i)
body
template benchRelative*(name, body: untyped): untyped =
## See `benchRelative()<#benchRelative.t,untyped,untyped,untyped>`_.
let fileName = instantiationInfo(-1).filename
let benchmarkName = stringifyIdentifier(name)
benchImpl(true, fileName, benchmarkName, m):
var i = m
while i > 0:
dec(i)
body
{.compile: "nimbench/private/no_optimize.c".}
proc doNotOptimizeAway*[T](x: var T) {.importc, noDecl.} ## Use this proc on
## variables that are only used for the benchmark. The compiler will otherwise
## see that the variable is unused and optimize the code away.
template memoryClobber*() =
## Like ``doNotOptimizeAway()`` this template could come in handy when the
## compiler optimized too much code away. In general this template is not
## needed. If you think you might be in need, it is best to analyze assembly
## code with and without the call.
when defined(vcc):
proc memoryClobber() {.importc: "_ReadWriteBarrier", header: "intrin.h".}
memoryClobber();
else:
{.emit: """
asm volatile("" ::: "memory");
""".}
proc runBenchmarkGetNsPerIteration(function: BenchmarkFunction,
globalBaseline: float64): float64 =
const
minNanoseconds = 100_000
maxEpochs = 1_000
timeBudgetInNs = 1_000_000_000
var epochSamples: seq[float64] = @[]
let startTicks = getTimeMeasurement()
for epochIndex in 0..<maxEpochs:
for iterations in geometricSequence(1, 1 shl 30, 2):
# We double the number of iterations 30 times as long as we don't get a
# long enough measurement
let benchSample = function(iterations)
if benchSample.timeInNs < minNanoseconds:
continue # we need a longer measurement to reduce noise
let nsPerIteration = float64(benchSample.timeInNs) /
float64(benchSample.iterations) - globalBaseline
epochSamples.add(max(0.0, nsPerIteration))
break # We have a result for this epoch, continue on
if(getTimeMeasurement() - startTicks) >= timeBudgetInNs:
break # time budget exhausted for this benchmark
# return the minimum, because it has the least amount of noise
result = if len(epochSamples) == 0: 0.0 else: min(epochSamples)
bench(GlobalBenchmarkBaselineWithSillyObfuscatingTail):
memoryClobber()
proc getGlobalBenchmarkBaselineIndex(): auto =
let fileName = currentSourcePathShort()
for i, b in benchmarks:
if b.fileName == fileName and
b.name == "GlobalBenchmarkBaselineWithSillyObfuscatingTail":
return i
raise newException(KeyError, "Could not find GlobalBenchmarkBaseline " &
"in the benchmark list!")
type BenchmarkResult = tuple[fileName, name: string, relative: bool,
timeInNs: float64]
proc printBenchmarkResults(data: openArray[BenchmarkResult])
proc runBenchmarks*() =
## Call this proc to run all the created benchmarks and print their results
## to stdout.
var results: seq[BenchmarkResult] = @[]
let
baselineIndex = getGlobalBenchmarkBaselineIndex()
globalBaseline = runBenchmarkGetNsPerIteration(
benchmarks[baselineIndex].function, 0.0)
results.add(("GlobalBenchmark", "GlobalBenchmark", false, globalBaseline))
# the globalBaseline measures how expensive iterating is, so we can subtract
# it from the real benchmarks
for i, b in benchmarks:
if i == baselineIndex: continue
let nsPerIteration = runBenchmarkGetNsPerIteration(b.function,
globalBaseline)
results.add((b.fileName, b.name, b.relative, nsPerIteration))
printBenchmarkResults(results)
proc printBenchmarkResults(data: openArray[BenchmarkResult]) =
let width = 76
let tail = "relative time/iter iters/s"
proc separatingLine(padChar: char) =
stdout.writeLine(padChar.repeat(width))
proc header(fileName: string) =
separatingLine('=')
stdout.writeLine("{:<{}}{}".fmt(fileName, width-len(tail), tail))
separatingLine('=')
var
lastFile = ""
baselineNsPerIteration: float64 = Inf
for record in data:
let file = record.fileName
if file != lastFile:
header(file)
lastFile = file
let
nsPerIteration = record.timeInNs
secPerIteration = nsPerIteration / 1e9
itersPerSec = if secPerIteration == 0.0: Inf else: (1.0 / secPerIteration)
if record.relative:
let relative = baselineNsPerIteration / nsPerIteration
#printf("%*s %7.2f%% %9s %7s\n",
stdout.writeLine("{0:<{1}.{1}} {2:>8.2%} {3:>9.9} {4:>7.7}".fmt(
record.name, width-len(tail)-1, relative,
readableTime(secPerIteration, 2), readableMetric(itersPerSec, 2)))
else:
baselineNsPerIteration = nsPerIteration
stdout.writeLine("{0:<{1}.{1}} {2:>9.9} {3:>7.7}".fmt(
record.name, width-len(tail)-1, readableTime(secPerIteration, 2),
readableMetric(itersPerSec, 2)))