Skip to content
This repository was archived by the owner on Jul 24, 2024. It is now read-only.

Commit cc6e9e4

Browse files
authored
Added sync for nvidia backend (#84)
1 parent 8d51b4f commit cc6e9e4

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

dl_bench/utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,10 @@ def to_device(self, x: torch.Tensor):
152152
else:
153153
raise ValueError("Unknown device")
154154

155+
def sync(self):
156+
if self.device_name == 'cuda':
157+
torch.cuda.synchronize()
158+
155159
def prepare_eval_transformer(self, model):
156160
model = model.to(memory_format=torch.channels_last)
157161

@@ -390,6 +394,7 @@ def inference(self, backend: Backend):
390394
# Duration is inconsistent now
391395
with tm.timeit("duration_s"):
392396
for i, x in enumerate(test_loader):
397+
backend.sync()
393398
s = get_time()
394399
x = backend.to_device(x)
395400
if backend.dtype != torch.float32:
@@ -405,6 +410,7 @@ def inference(self, backend: Backend):
405410
start = time.perf_counter()
406411
continue
407412

413+
backend.sync()
408414
fw_times.append(get_time() - s)
409415
n_items += len(x)
410416
outputs.append(y)

0 commit comments

Comments
 (0)