@@ -311,7 +311,7 @@ def run(self):
311
311
312
312
benchmarks = [(layer , self .get_benchmark (c , layer , cuda )) for layer in self .args .layers ]
313
313
for layer , benchmark in benchmarks :
314
- result = utils .benchmark_fn (benchmark , run_time = self .args .run_time , warmup = self .args .warmup )
314
+ result = utils .benchmark_fn (benchmark , run_time = self .args .run_time , warmup = self .args .warmup , cuda = cuda )
315
315
result ["#" ] = str (i ) + "/" + str (len (benchmarks ) * len (params ))
316
316
result ["N" ] = n
317
317
result ["C" ] = c
@@ -336,6 +336,9 @@ def run(self):
336
336
def get_input (self , cuda , n , c , h , w , h_var , w_var , seed ):
337
337
inputs = []
338
338
targets = []
339
+ device = 'cpu'
340
+ if cuda :
341
+ device = 'cuda'
339
342
340
343
torch .manual_seed (seed )
341
344
random .seed (seed )
@@ -344,10 +347,10 @@ def get_input(self, cuda, n, c, h, w, h_var, w_var, seed):
344
347
for i in range (n ):
345
348
h_res = max (1 , int (random .gauss (h , h_var )))
346
349
w_res = max (1 , int (random .gauss (w , w_var )))
347
- input_i = torch .randn (c , h_res , w_res )
348
- target_i = torch .randint (1 , (h_res , w_res ), dtype = torch .int64 )
349
- inputs .append (input_i . cuda () if cuda else input_i )
350
- targets .append (target_i . cuda () if cuda else target_i )
350
+ input_i = torch .randn (c , h_res , w_res , device = device )
351
+ target_i = torch .randint (1 , (h_res , w_res ), dtype = torch .int64 , device = device )
352
+ inputs .append (input_i )
353
+ targets .append (target_i )
351
354
if cuda :
352
355
# Synchronize copy operations so they don't influence the benchmark
353
356
torch .cuda .synchronize ()
0 commit comments