Skip to content
This repository was archived by the owner on Jul 24, 2024. It is now read-only.

Commit 95a2010

Browse files
committed
Added sync for nvidia backend (#84)
1 parent eee627b commit 95a2010

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

dl_bench/utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,10 @@ def to_device(self, x: torch.Tensor):
153153
else:
154154
raise ValueError("Unknown device")
155155

156+
def sync(self):
157+
if self.device_name == 'cuda':
158+
torch.cuda.synchronize()
159+
156160
def prepare_eval_transformer(self, model):
157161
model = model.to(memory_format=torch.channels_last)
158162

@@ -394,6 +398,7 @@ def inference(self, backend: Backend):
394398
# Duration is inconsistent now
395399
with tm.timeit("duration_s"):
396400
for i, x in enumerate(test_loader):
401+
backend.sync()
397402
s = get_time()
398403
x = backend.to_device(x)
399404
if backend.dtype != torch.float32:
@@ -409,6 +414,7 @@ def inference(self, backend: Backend):
409414
start = time.perf_counter()
410415
continue
411416

417+
backend.sync()
412418
fw_times.append(get_time() - s)
413419
n_items += len(x)
414420
outputs.append(y)

0 commit comments

Comments
 (0)