Skip to content

Commit 8d43f22

Browse files
committed
Add a pure python sql engine benchmark.
1 parent 31e9b11 commit 8d43f22

File tree

2 files changed

+82
-33
lines changed

2 files changed

+82
-33
lines changed
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
sqlglot==4.6.0
1+
sqlglot==16.2.1

pyperformance/data-files/benchmarks/bm_sqlglot/run_benchmark.py

+81-32
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import pyperf
22

33
from sqlglot import parse_one, transpile
4+
from sqlglot.executor import execute
45
from sqlglot.optimizer import optimize, normalize
56

67

@@ -48,79 +49,79 @@
4849

4950
TPCH_SCHEMA = {
5051
"lineitem": {
51-
"l_orderkey": "uint64",
52-
"l_partkey": "uint64",
53-
"l_suppkey": "uint64",
54-
"l_linenumber": "uint64",
55-
"l_quantity": "float64",
56-
"l_extendedprice": "float64",
57-
"l_discount": "float64",
58-
"l_tax": "float64",
52+
"l_orderkey": "int",
53+
"l_partkey": "int",
54+
"l_suppkey": "int",
55+
"l_linenumber": "int",
56+
"l_quantity": "double",
57+
"l_extendedprice": "double",
58+
"l_discount": "double",
59+
"l_tax": "double",
5960
"l_returnflag": "string",
6061
"l_linestatus": "string",
61-
"l_shipdate": "date32",
62-
"l_commitdate": "date32",
63-
"l_receiptdate": "date32",
62+
"l_shipdate": "date",
63+
"l_commitdate": "date",
64+
"l_receiptdate": "date",
6465
"l_shipinstruct": "string",
6566
"l_shipmode": "string",
6667
"l_comment": "string",
6768
},
6869
"orders": {
69-
"o_orderkey": "uint64",
70-
"o_custkey": "uint64",
70+
"o_orderkey": "int",
71+
"o_custkey": "int",
7172
"o_orderstatus": "string",
72-
"o_totalprice": "float64",
73-
"o_orderdate": "date32",
73+
"o_totalprice": "double",
74+
"o_orderdate": "date",
7475
"o_orderpriority": "string",
7576
"o_clerk": "string",
76-
"o_shippriority": "int32",
77+
"o_shippriority": "int",
7778
"o_comment": "string",
7879
},
7980
"customer": {
80-
"c_custkey": "uint64",
81+
"c_custkey": "int",
8182
"c_name": "string",
8283
"c_address": "string",
83-
"c_nationkey": "uint64",
84+
"c_nationkey": "int",
8485
"c_phone": "string",
85-
"c_acctbal": "float64",
86+
"c_acctbal": "double",
8687
"c_mktsegment": "string",
8788
"c_comment": "string",
8889
},
8990
"part": {
90-
"p_partkey": "uint64",
91+
"p_partkey": "int",
9192
"p_name": "string",
9293
"p_mfgr": "string",
9394
"p_brand": "string",
9495
"p_type": "string",
95-
"p_size": "int32",
96+
"p_size": "int",
9697
"p_container": "string",
97-
"p_retailprice": "float64",
98+
"p_retailprice": "double",
9899
"p_comment": "string",
99100
},
100101
"supplier": {
101-
"s_suppkey": "uint64",
102+
"s_suppkey": "int",
102103
"s_name": "string",
103104
"s_address": "string",
104-
"s_nationkey": "uint64",
105+
"s_nationkey": "int",
105106
"s_phone": "string",
106-
"s_acctbal": "float64",
107+
"s_acctbal": "double",
107108
"s_comment": "string",
108109
},
109110
"partsupp": {
110-
"ps_partkey": "uint64",
111-
"ps_suppkey": "uint64",
112-
"ps_availqty": "int32",
113-
"ps_supplycost": "float64",
111+
"ps_partkey": "int",
112+
"ps_suppkey": "int",
113+
"ps_availqty": "int",
114+
"ps_supplycost": "double",
114115
"ps_comment": "string",
115116
},
116117
"nation": {
117-
"n_nationkey": "uint64",
118+
"n_nationkey": "int",
118119
"n_name": "string",
119-
"n_regionkey": "uint64",
120+
"n_regionkey": "int",
120121
"n_comment": "string",
121122
},
122123
"region": {
123-
"r_regionkey": "uint64",
124+
"r_regionkey": "int",
124125
"r_name": "string",
125126
"r_comment": "string",
126127
},
@@ -164,10 +165,58 @@ def bench_normalize(loops):
164165
return elapsed
165166

166167

168+
def bench_execute(loops):
169+
tables = {
170+
"sushi": [
171+
{"id": 1, "price": 1.0},
172+
{"id": 2, "price": 2.0},
173+
{"id": 3, "price": 3.0},
174+
],
175+
"order_items": [
176+
{"sushi_id": 1, "order_id": 1},
177+
{"sushi_id": 1, "order_id": 1},
178+
{"sushi_id": 2, "order_id": 1},
179+
{"sushi_id": 3, "order_id": 2},
180+
],
181+
"orders": [
182+
{"id": 1, "user_id": 1},
183+
{"id": 2, "user_id": 2},
184+
],
185+
}
186+
187+
for i in range(10000):
188+
tables["sushi"].append({"id": i, "price": i})
189+
tables["order_items"].append({"sushi_id": i, "order_id": i})
190+
tables["orders"].append({"id": i, "user_id": i})
191+
192+
elapsed = 0
193+
for _ in range(loops):
194+
t0 = pyperf.perf_counter()
195+
execute(
196+
"""
197+
SELECT
198+
o.user_id,
199+
s.price / 2 AS half_price,
200+
AVG(s.price) AS avg_price,
201+
SUM(s.price) AS price
202+
FROM orders o
203+
JOIN order_items i
204+
ON o.id = i.order_id
205+
JOIN sushi s
206+
ON i.sushi_id = s.id
207+
GROUP BY o.user_id
208+
""",
209+
tables=tables,
210+
)
211+
elapsed += pyperf.perf_counter() - t0
212+
return elapsed
213+
214+
167215
if __name__ == "__main__":
168216
runner = pyperf.Runner()
169217
runner.metadata['description'] = "SQLGlot benchmark"
170218
runner.bench_time_func("sqlglot_parse", bench_parse)
171219
runner.bench_time_func("sqlglot_transpile", bench_transpile)
172220
runner.bench_time_func("sqlglot_optimize", bench_optimize)
173221
runner.bench_time_func("sqlglot_normalize", bench_normalize)
222+
runner.bench_time_func("sqlglot_execute", bench_execute)

0 commit comments

Comments
 (0)