diff --git a/torchao/sparsity/prototype/superblock/README.md b/torchao/sparsity/prototype/superblock/README.md index 91110735a..b78bd7875 100644 --- a/torchao/sparsity/prototype/superblock/README.md +++ b/torchao/sparsity/prototype/superblock/README.md @@ -45,13 +45,11 @@ At least one GPU: ## Benchmarking -For all our benchmarking results, you can run `benchmark.sh`. This will run benchmarks with random weights, only testing speedup. - +For all our benchmarking results, you can run `benchmark.sh`. These benchmarks were run on a NVIDIA-A10080GB, with cuSPARSELt v0.5.2. - ## Training Please refer to [TRAINING.md](TRAINING.md) for training from scratch. We use [Torchvision](https://github.com/pytorch/vision/tree/main/references/classification) as our framework for training. Supermask can be applied during training. diff --git a/torchao/sparsity/prototype/superblock/benchmark_results.txt b/torchao/sparsity/prototype/superblock/benchmark_results.txt index 2a94f8455..5197d0cb1 100644 --- a/torchao/sparsity/prototype/superblock/benchmark_results.txt +++ b/torchao/sparsity/prototype/superblock/benchmark_results.txt @@ -1,4 +1,8 @@ -vit_b_16,256,bfloat16,bsr,64,0.8,True,False,52.2149365234375,19.151608075806703 -vit_b_16,256,bfloat16,bsr,64,0.8,True,False,54.3076318359375,18.41361823732223 model,batch_size,dtype,sparsity,bsr,sparsity_level,quantization,tune_kernel_params,latency,img/s vit_h_14,256,bfloat16,None,None,0.0,False,False,489.89296875,2.0412622017245474 +vit_h_14,256,bfloat16,None,None,0.0,True,False,452.5438671875,2.2097305311303126 +vit_h_14,256,bfloat16,semi_structured,None,0.0,False,False,457.6992578125,2.184840772474352 +vit_h_14,256,bfloat16,bsr,64,0.8,False,False,361.4048046875,2.7669803694632154 +vit_h_14,256,bfloat16,bsr,64,0.84,False,False,343.2491015625,2.913336103278675 +vit_h_14,256,bfloat16,bsr,64,0.9,False,False,315.3134375,3.1714474585308468 +vit_h_14,256,bfloat16,semi_structured,None,0.0,True,False,434.479140625,2.301606467370323 diff --git a/torchao/sparsity/prototype/superblock/evaluate.py b/torchao/sparsity/prototype/superblock/evaluate.py index e10953348..aa181f85c 100644 --- a/torchao/sparsity/prototype/superblock/evaluate.py +++ b/torchao/sparsity/prototype/superblock/evaluate.py @@ -59,9 +59,37 @@ def main(args): accelerate_with_sparsity(model, args) criterion = torch.nn.CrossEntropyLoss(label_smoothing=args.label_smoothing) - evaluate(model, criterion, data_loader_test, device=device, dtype=torch.bfloat16) + return evaluate(model, criterion, data_loader_test, device=device, dtype=torch.bfloat16) if __name__ == "__main__": args = get_args_parser(evaluate=True).parse_args() - main(args) + result = main(args) + header = [ + "model", + "batch_size", + "dtype", + "sparsity", + "bsr", + "sparsity_level", + "quantization", + "top-1 acc", + ] + result_string = ",".join( + str(_) + for _ in [ + args.model, + args.batch_size, + args.dtype, + args.sparsity, + args.bsr, + args.sparsity_linear, + args.quantization, + result, + ] + ) + with open("evaluation_results.txt", "a") as f: + if args.header: + f.write(",".join(header) + "\n") + f.write(result_string + "\n") + print(result_string)