Skip to content
This repository was archived by the owner on Jul 24, 2024. It is now read-only.

Commit bd18c2d

Browse files
authored
Added more measurement info like p50, p90 (#87)
1 parent 6886feb commit bd18c2d

File tree

4 files changed

+49
-45
lines changed

4 files changed

+49
-45
lines changed

.github/workflows/execute-test-script.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ jobs:
116116
URL="--url ${{ secrets.DB_URL }}"
117117
fi
118118
119-
export DL_BENCH_ARGS="--host ${{ inputs.runner_type }} --compiler ${{ inputs.compiler }} --device ${{ inputs.device }} --tag ${{ inputs.tag }} -v ${URL}"
119+
export DL_BENCH_ARGS="--host ${{ inputs.runner_type }} --compiler ${{ inputs.compiler }} --device ${{ inputs.device }} --tag ${{ inputs.tag }} ${URL}"
120120
121121
# We mainly want to verify our own backend
122122
if [[ ${{ inputs.compiler }} != *torch_mlir* ]]; then

.github/workflows/test-single-config.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ jobs:
7878
torch_mlir_repo: ${{ inputs.torch_mlir_repo }}
7979
torch_mlir_branch: ${{ inputs.torch_mlir_branch }}
8080
runner_type: ${{ inputs.runner_type }}
81-
shutdown_cloud_runner: ${{ inputs.shutdown_cloud_runner }}
8281
test_script: ${{ matrix.test_script }}
8382
secrets:
8483
DB_URL: ${{ secrets.DB_URL }}

dl_bench/cli/launcher.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,6 @@ def parse_args():
110110
parser.add_argument(
111111
"-o", "--output", required=False, help="Path to output report file."
112112
)
113-
parser.add_argument(
114-
"-v", "--verbose", required=False, action="store_true", help="Verbose mode."
115-
)
116113
parser.add_argument(
117114
"--skip_verification",
118115
required=False,
@@ -185,16 +182,17 @@ def main():
185182

186183
db = BenchmarkDb(args.url)
187184

188-
if args.verbose:
189-
print("Report:")
190-
print(
191-
"TFLOPS: {:.3}".format(
192-
results.get("flops_per_sample", 0)
193-
* results.get("samples_per_s", 0)
194-
/ (10**12)
195-
)
185+
print("Report:")
186+
print("FPS: {:.1f}".format(results.get("samples_per_s", 0)))
187+
print(
188+
"TFLOPS: {:.3}".format(
189+
results.get("flops_per_sample", 0)
190+
* results.get("samples_per_s", 0)
191+
/ (10**12)
196192
)
197-
pprint.pprint(report)
193+
)
194+
pprint.pprint(report)
195+
pprint.pprint(results)
198196

199197
if args.output is not None:
200198
with open(args.output, "w", encoding="UTF-8") as out:

dl_bench/utils.py

Lines changed: 38 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -385,54 +385,61 @@ def inference(self, backend: Backend):
385385
self.compile(sample, backend)
386386

387387
n_items = 0
388-
389-
self.net.eval()
390388
outputs = []
391389
fw_times = []
390+
391+
self.net.eval()
392392
with torch.no_grad():
393393
start = time.perf_counter()
394-
# Duration is inconsistent now
395-
with tm.timeit("duration_s"):
396-
for i, x in enumerate(test_loader):
397-
backend.sync()
398-
s = get_time()
399-
x = backend.to_device(x)
400-
if backend.dtype != torch.float32:
401-
with torch.autocast(
402-
device_type=backend.device_name,
403-
dtype=backend.dtype,
404-
):
405-
y = self.net(x)
406-
else:
394+
for i, x in enumerate(test_loader):
395+
backend.sync()
396+
s = get_time()
397+
x = backend.to_device(x)
398+
if backend.dtype != torch.float32:
399+
with torch.autocast(
400+
device_type=backend.device_name,
401+
dtype=backend.dtype,
402+
):
407403
y = self.net(x)
404+
else:
405+
y = self.net(x)
408406

409-
if i < self.warmup_batches:
410-
start = time.perf_counter()
411-
continue
407+
backend.sync()
412408

413-
backend.sync()
414-
fw_times.append(get_time() - s)
415-
n_items += len(x)
416-
outputs.append(y)
409+
if i < self.warmup_batches:
410+
# We restart timer because that was just a warmup
411+
start = time.perf_counter()
412+
continue
417413

418-
# early stopping if we have 10+ batches and were running for 10+ seconds
419-
if (
420-
(time.perf_counter() - start) > self.min_seconds
421-
and n_items >= self.batch_size * self.min_batches
422-
):
423-
break
414+
fw_times.append(get_time() - s)
415+
n_items += len(x)
416+
outputs.append(y)
417+
418+
# early stopping if we have 10+ batches and were running for 10+ seconds
419+
if (
420+
(time.perf_counter() - start) > self.min_seconds
421+
and n_items >= self.batch_size * self.min_batches
422+
):
423+
break
424+
425+
if (get_time() - start) > max_time:
426+
break
424427

425-
if (get_time() - start) > max_time:
426-
break
428+
stop = get_time()
427429

428430
print(
429431
f"Latency 0%-5%-50%-95%-100% are: {np.percentile(fw_times, [0, 5, 50, 95, 100])}"
430432
)
431433

432434
results = tm.get_results()
433-
results["duration_s"] = get_time() - start
435+
results["duration_s"] = stop - start
434436
results["samples_per_s"] = n_items / sum(fw_times)
437+
results["dirty_items_per_s"] = n_items / results["duration_s"]
435438
results["flops_per_sample"] = self.flops_per_sample
439+
results["n_items"] = n_items
440+
results["p50"] = np.percentile(fw_times, 50)
441+
results["p90"] = np.percentile(fw_times, 90)
442+
results["p100"] = max(fw_times)
436443

437444
return results, outputs
438445

0 commit comments

Comments
 (0)