|
1 | 1 | import pyperf
|
2 | 2 |
|
3 | 3 | from sqlglot import parse_one, transpile
|
| 4 | +from sqlglot.executor import execute |
4 | 5 | from sqlglot.optimizer import optimize, normalize
|
5 | 6 |
|
6 | 7 |
|
|
48 | 49 |
|
49 | 50 | TPCH_SCHEMA = {
|
50 | 51 | "lineitem": {
|
51 |
| - "l_orderkey": "uint64", |
52 |
| - "l_partkey": "uint64", |
53 |
| - "l_suppkey": "uint64", |
54 |
| - "l_linenumber": "uint64", |
55 |
| - "l_quantity": "float64", |
56 |
| - "l_extendedprice": "float64", |
57 |
| - "l_discount": "float64", |
58 |
| - "l_tax": "float64", |
| 52 | + "l_orderkey": "int", |
| 53 | + "l_partkey": "int", |
| 54 | + "l_suppkey": "int", |
| 55 | + "l_linenumber": "int", |
| 56 | + "l_quantity": "double", |
| 57 | + "l_extendedprice": "double", |
| 58 | + "l_discount": "double", |
| 59 | + "l_tax": "double", |
59 | 60 | "l_returnflag": "string",
|
60 | 61 | "l_linestatus": "string",
|
61 |
| - "l_shipdate": "date32", |
62 |
| - "l_commitdate": "date32", |
63 |
| - "l_receiptdate": "date32", |
| 62 | + "l_shipdate": "date", |
| 63 | + "l_commitdate": "date", |
| 64 | + "l_receiptdate": "date", |
64 | 65 | "l_shipinstruct": "string",
|
65 | 66 | "l_shipmode": "string",
|
66 | 67 | "l_comment": "string",
|
67 | 68 | },
|
68 | 69 | "orders": {
|
69 |
| - "o_orderkey": "uint64", |
70 |
| - "o_custkey": "uint64", |
| 70 | + "o_orderkey": "int", |
| 71 | + "o_custkey": "int", |
71 | 72 | "o_orderstatus": "string",
|
72 |
| - "o_totalprice": "float64", |
73 |
| - "o_orderdate": "date32", |
| 73 | + "o_totalprice": "double", |
| 74 | + "o_orderdate": "date", |
74 | 75 | "o_orderpriority": "string",
|
75 | 76 | "o_clerk": "string",
|
76 |
| - "o_shippriority": "int32", |
| 77 | + "o_shippriority": "int", |
77 | 78 | "o_comment": "string",
|
78 | 79 | },
|
79 | 80 | "customer": {
|
80 |
| - "c_custkey": "uint64", |
| 81 | + "c_custkey": "int", |
81 | 82 | "c_name": "string",
|
82 | 83 | "c_address": "string",
|
83 |
| - "c_nationkey": "uint64", |
| 84 | + "c_nationkey": "int", |
84 | 85 | "c_phone": "string",
|
85 |
| - "c_acctbal": "float64", |
| 86 | + "c_acctbal": "double", |
86 | 87 | "c_mktsegment": "string",
|
87 | 88 | "c_comment": "string",
|
88 | 89 | },
|
89 | 90 | "part": {
|
90 |
| - "p_partkey": "uint64", |
| 91 | + "p_partkey": "int", |
91 | 92 | "p_name": "string",
|
92 | 93 | "p_mfgr": "string",
|
93 | 94 | "p_brand": "string",
|
94 | 95 | "p_type": "string",
|
95 |
| - "p_size": "int32", |
| 96 | + "p_size": "int", |
96 | 97 | "p_container": "string",
|
97 |
| - "p_retailprice": "float64", |
| 98 | + "p_retailprice": "double", |
98 | 99 | "p_comment": "string",
|
99 | 100 | },
|
100 | 101 | "supplier": {
|
101 |
| - "s_suppkey": "uint64", |
| 102 | + "s_suppkey": "int", |
102 | 103 | "s_name": "string",
|
103 | 104 | "s_address": "string",
|
104 |
| - "s_nationkey": "uint64", |
| 105 | + "s_nationkey": "int", |
105 | 106 | "s_phone": "string",
|
106 |
| - "s_acctbal": "float64", |
| 107 | + "s_acctbal": "double", |
107 | 108 | "s_comment": "string",
|
108 | 109 | },
|
109 | 110 | "partsupp": {
|
110 |
| - "ps_partkey": "uint64", |
111 |
| - "ps_suppkey": "uint64", |
112 |
| - "ps_availqty": "int32", |
113 |
| - "ps_supplycost": "float64", |
| 111 | + "ps_partkey": "int", |
| 112 | + "ps_suppkey": "int", |
| 113 | + "ps_availqty": "int", |
| 114 | + "ps_supplycost": "double", |
114 | 115 | "ps_comment": "string",
|
115 | 116 | },
|
116 | 117 | "nation": {
|
117 |
| - "n_nationkey": "uint64", |
| 118 | + "n_nationkey": "int", |
118 | 119 | "n_name": "string",
|
119 |
| - "n_regionkey": "uint64", |
| 120 | + "n_regionkey": "int", |
120 | 121 | "n_comment": "string",
|
121 | 122 | },
|
122 | 123 | "region": {
|
123 |
| - "r_regionkey": "uint64", |
| 124 | + "r_regionkey": "int", |
124 | 125 | "r_name": "string",
|
125 | 126 | "r_comment": "string",
|
126 | 127 | },
|
@@ -164,10 +165,58 @@ def bench_normalize(loops):
|
164 | 165 | return elapsed
|
165 | 166 |
|
166 | 167 |
|
| 168 | +def bench_execute(loops): |
| 169 | + tables = { |
| 170 | + "sushi": [ |
| 171 | + {"id": 1, "price": 1.0}, |
| 172 | + {"id": 2, "price": 2.0}, |
| 173 | + {"id": 3, "price": 3.0}, |
| 174 | + ], |
| 175 | + "order_items": [ |
| 176 | + {"sushi_id": 1, "order_id": 1}, |
| 177 | + {"sushi_id": 1, "order_id": 1}, |
| 178 | + {"sushi_id": 2, "order_id": 1}, |
| 179 | + {"sushi_id": 3, "order_id": 2}, |
| 180 | + ], |
| 181 | + "orders": [ |
| 182 | + {"id": 1, "user_id": 1}, |
| 183 | + {"id": 2, "user_id": 2}, |
| 184 | + ], |
| 185 | + } |
| 186 | + |
| 187 | + for i in range(10000): |
| 188 | + tables["sushi"].append({"id": i, "price": i}) |
| 189 | + tables["order_items"].append({"sushi_id": i, "order_id": i}) |
| 190 | + tables["orders"].append({"id": i, "user_id": i}) |
| 191 | + |
| 192 | + elapsed = 0 |
| 193 | + for _ in range(loops): |
| 194 | + t0 = pyperf.perf_counter() |
| 195 | + execute( |
| 196 | + """ |
| 197 | + SELECT |
| 198 | + o.user_id, |
| 199 | + s.price / 2 AS half_price, |
| 200 | + AVG(s.price) AS avg_price, |
| 201 | + SUM(s.price) AS price |
| 202 | + FROM orders o |
| 203 | + JOIN order_items i |
| 204 | + ON o.id = i.order_id |
| 205 | + JOIN sushi s |
| 206 | + ON i.sushi_id = s.id |
| 207 | + GROUP BY o.user_id |
| 208 | + """, |
| 209 | + tables=tables, |
| 210 | + ) |
| 211 | + elapsed += pyperf.perf_counter() - t0 |
| 212 | + return elapsed |
| 213 | + |
| 214 | + |
167 | 215 | if __name__ == "__main__":
|
168 | 216 | runner = pyperf.Runner()
|
169 | 217 | runner.metadata['description'] = "SQLGlot benchmark"
|
170 | 218 | runner.bench_time_func("sqlglot_parse", bench_parse)
|
171 | 219 | runner.bench_time_func("sqlglot_transpile", bench_transpile)
|
172 | 220 | runner.bench_time_func("sqlglot_optimize", bench_optimize)
|
173 | 221 | runner.bench_time_func("sqlglot_normalize", bench_normalize)
|
| 222 | + runner.bench_time_func("sqlglot_execute", bench_execute) |
0 commit comments