diff --git a/.github/workflows/execute-test-script.yml b/.github/workflows/execute-test-script.yml index 20ac472..5d1f23c 100644 --- a/.github/workflows/execute-test-script.yml +++ b/.github/workflows/execute-test-script.yml @@ -116,7 +116,7 @@ jobs: URL="--url ${{ secrets.DB_URL }}" fi - export DL_BENCH_ARGS="--host ${{ inputs.runner_type }} --compiler ${{ inputs.compiler }} --device ${{ inputs.device }} --tag ${{ inputs.tag }} -v ${URL}" + export DL_BENCH_ARGS="--host ${{ inputs.runner_type }} --compiler ${{ inputs.compiler }} --device ${{ inputs.device }} --tag ${{ inputs.tag }} ${URL}" # We mainly want to verify our own backend if [[ ${{ inputs.compiler }} != *torch_mlir* ]]; then diff --git a/.github/workflows/test-single-config.yml b/.github/workflows/test-single-config.yml index ea2d1de..3e8b323 100644 --- a/.github/workflows/test-single-config.yml +++ b/.github/workflows/test-single-config.yml @@ -78,7 +78,6 @@ jobs: torch_mlir_repo: ${{ inputs.torch_mlir_repo }} torch_mlir_branch: ${{ inputs.torch_mlir_branch }} runner_type: ${{ inputs.runner_type }} - shutdown_cloud_runner: ${{ inputs.shutdown_cloud_runner }} test_script: ${{ matrix.test_script }} secrets: DB_URL: ${{ secrets.DB_URL }} diff --git a/dl_bench/cli/launcher.py b/dl_bench/cli/launcher.py index 6115496..fcf9025 100644 --- a/dl_bench/cli/launcher.py +++ b/dl_bench/cli/launcher.py @@ -110,9 +110,6 @@ def parse_args(): parser.add_argument( "-o", "--output", required=False, help="Path to output report file." ) - parser.add_argument( - "-v", "--verbose", required=False, action="store_true", help="Verbose mode." - ) parser.add_argument( "--skip_verification", required=False, @@ -185,16 +182,17 @@ def main(): db = BenchmarkDb(args.url) - if args.verbose: - print("Report:") - print( - "TFLOPS: {:.3}".format( - results.get("flops_per_sample", 0) - * results.get("samples_per_s", 0) - / (10**12) - ) + print("Report:") + print("FPS: {:.1f}".format(results.get("samples_per_s", 0))) + print( + "TFLOPS: {:.3}".format( + results.get("flops_per_sample", 0) + * results.get("samples_per_s", 0) + / (10**12) ) - pprint.pprint(report) + ) + pprint.pprint(report) + pprint.pprint(results) if args.output is not None: with open(args.output, "w", encoding="UTF-8") as out: diff --git a/dl_bench/utils.py b/dl_bench/utils.py index 4d5f673..96bed2e 100644 --- a/dl_bench/utils.py +++ b/dl_bench/utils.py @@ -385,54 +385,61 @@ def inference(self, backend: Backend): self.compile(sample, backend) n_items = 0 - - self.net.eval() outputs = [] fw_times = [] + + self.net.eval() with torch.no_grad(): start = time.perf_counter() - # Duration is inconsistent now - with tm.timeit("duration_s"): - for i, x in enumerate(test_loader): - backend.sync() - s = get_time() - x = backend.to_device(x) - if backend.dtype != torch.float32: - with torch.autocast( - device_type=backend.device_name, - dtype=backend.dtype, - ): - y = self.net(x) - else: + for i, x in enumerate(test_loader): + backend.sync() + s = get_time() + x = backend.to_device(x) + if backend.dtype != torch.float32: + with torch.autocast( + device_type=backend.device_name, + dtype=backend.dtype, + ): y = self.net(x) + else: + y = self.net(x) - if i < self.warmup_batches: - start = time.perf_counter() - continue + backend.sync() - backend.sync() - fw_times.append(get_time() - s) - n_items += len(x) - outputs.append(y) + if i < self.warmup_batches: + # We restart timer because that was just a warmup + start = time.perf_counter() + continue - # early stopping if we have 10+ batches and were running for 10+ seconds - if ( - (time.perf_counter() - start) > self.min_seconds - and n_items >= self.batch_size * self.min_batches - ): - break + fw_times.append(get_time() - s) + n_items += len(x) + outputs.append(y) + + # early stopping if we have 10+ batches and were running for 10+ seconds + if ( + (time.perf_counter() - start) > self.min_seconds + and n_items >= self.batch_size * self.min_batches + ): + break + + if (get_time() - start) > max_time: + break - if (get_time() - start) > max_time: - break + stop = get_time() print( f"Latency 0%-5%-50%-95%-100% are: {np.percentile(fw_times, [0, 5, 50, 95, 100])}" ) results = tm.get_results() - results["duration_s"] = get_time() - start + results["duration_s"] = stop - start results["samples_per_s"] = n_items / sum(fw_times) + results["dirty_items_per_s"] = n_items / results["duration_s"] results["flops_per_sample"] = self.flops_per_sample + results["n_items"] = n_items + results["p50"] = np.percentile(fw_times, 50) + results["p90"] = np.percentile(fw_times, 90) + results["p100"] = max(fw_times) return results, outputs