44import re
55from datetime import datetime
66
7- def run_benchmark (command ):
8- print (f" Running: { command } " )
9- result = subprocess .run (command , shell = True , capture_output = True , text = True )
10- return result .stdout
7+ def run_benchmark (commands ):
8+ results = []
9+ for cmd in commands :
10+ print (f" Running: { cmd } " )
11+ completed = subprocess .run (cmd , shell = True , capture_output = True , text = True )
12+ results .append (completed .stdout )
13+ return results
14+
15+ def parse_output (outputs ):
1116
12- def parse_output (output ):
1317 pattern = re .compile (r"^(MUL_MAT(?:_ID)?\(.*?\)):\s+\d+\s+runs\s+-\s+([\d.]+)\s+us/run" )
1418 perf_data = {}
1519
16- for line in output .splitlines ():
17- match = pattern .match (line .strip ())
18- if match :
19- kernel = match .group (1 )
20- us_run = float (match .group (2 ))
21- perf_data [kernel ] = us_run
20+ for output in outputs :
21+ for line in output .splitlines ():
22+ match = pattern .match (line .strip ())
23+ if match :
24+ kernel = match .group (1 )
25+ us_run = float (match .group (2 ))
26+ perf_data [kernel ] = us_run
2227 return perf_data
2328
24- def generate_markdown (before , after , label_before , label_after ):
29+ def generate_markdown (before , after ):
2530 from datetime import datetime
2631 timestamp = datetime .now ().strftime ("%Y-%m-%d_%H-%M-%S" )
2732 filename = f"perf_comparison_{ timestamp } .md"
2833
2934 lines = [
3035 f"# Performance Comparison" ,
31- f"Comparing `{ label_before } ` vs `{ label_after } `\n " ,
32- "| Kernel | {0} (us/run) | {1} (us/run) | Δ % |" .format (label_before , label_after ),
36+ "| Kernel | Before(us/run) | After(us/run) | Δ % |" ,
3337 "|--------|--------------|--------------|-----|"
3438 ]
3539
@@ -38,7 +42,7 @@ def generate_markdown(before, after, label_before, label_after):
3842 val2 = after .get (kernel )
3943
4044 if val1 is not None and val2 is not None :
41- delta = ((val2 - val1 ) / val1 ) * 100
45+ delta = ((val1 - val2 ) / val1 ) * 100
4246 lines .append (f"| `{ kernel } ` | { val1 :.2f} | { val2 :.2f} | { delta :+.2f} % |" )
4347 elif val1 is not None :
4448 lines .append (f"| `{ kernel } ` | { val1 :.2f} | N/A | N/A |" )
@@ -50,14 +54,73 @@ def generate_markdown(before, after, label_before, label_after):
5054 print (f"Markdown report saved to: { filename } " )
5155
5256if __name__ == "__main__" :
53- # Customize these two commands
54- cmd_before = '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o MUL_MAT'
55- cmd_after = '/home/stefan/final/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o MUL_MAT'
57+ result = subprocess .run ("env | grep GGML" , shell = True , capture_output = True , text = True )
58+ print (result .stdout )
59+
60+ cmd_before = ['/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o MUL_MAT_ID' ]
61+ # cmd_before = [
62+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=f32,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
63+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=f16,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
64+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=bf16,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
65+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=q4_0,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
66+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=q4_1,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
67+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=q5_0,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
68+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=q5_1,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
69+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=q8_0,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
70+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=mxfp4,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
71+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=q2_K,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
72+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=q3_K,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
73+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=q4_K,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
74+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=q5_K,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
75+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=q6_K,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
76+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=iq2_xxs,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
77+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=iq2_xs,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
78+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=iq2_s,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
79+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=iq3_xxs,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
80+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=iq1_s,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
81+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=iq1_m,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
82+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=iq4_nl,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
83+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=iq3_s,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
84+ # '/home/stefan/sabac/llama.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=iq4_xs,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"'
85+ # ]
86+
5687
88+ cmd_after = ['/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o MUL_MAT_ID' ]
89+
90+ # cmd_after = [
91+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=f32,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
92+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=f16,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
93+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=bf16,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
94+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=q4_0,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
95+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=q4_1,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
96+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=q5_0,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
97+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=q5_1,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
98+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=q8_0,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
99+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=mxfp4,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
100+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=q2_K,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
101+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=q3_K,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
102+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=q4_K,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
103+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=q5_K,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
104+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=q6_K,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
105+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=iq2_xxs,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
106+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=iq2_xs,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
107+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=iq2_s,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
108+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=iq3_xxs,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
109+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=iq1_s,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
110+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=iq1_m,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
111+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=iq4_nl,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
112+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=iq3_s,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"',
113+ # '/home/stefan/finalno/llama-stefan.cpp/./build/bin/test-backend-ops perf -b Vulkan0 -o "MUL_MAT(type_a=iq4_xs,type_b=f32,m=4096,n=512,k=14336,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0,o=1)"'
114+ # ]
115+
116+ print ("BEFORE" )
57117 output_before = run_benchmark (cmd_before )
118+ print ("AFTER" )
58119 output_after = run_benchmark (cmd_after )
59120
60121 data_before = parse_output (output_before )
61122 data_after = parse_output (output_after )
62123
63- generate_markdown (data_before , data_after , "MUL_MAT" , "MUL_MAT" )
124+ # print(data_before)
125+
126+ generate_markdown (data_before , data_after )
0 commit comments