-
Notifications
You must be signed in to change notification settings - Fork 2
/
pgsp_queryid.c
322 lines (281 loc) · 7.4 KB
/
pgsp_queryid.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
/*-------------------------------------------------------------------------------------------------
*
* pgsp_queryid.c: Normalize a query and compute a query identifier.
*
* This is a partial copy of the pg_store_plans/pgsp_json.c
* and pg_store_plans/pg_store_plans.c files.
*
* Copyright (c) 2012-2022, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
*
*-------------------------------------------------------------------------------------------------
*/
#include "postgres.h"
#include <access/hash.h>
#include "nodes/parsenodes.h"
#include "parser/scanner.h"
#include "parser/gram.h"
#define INDENT_STEP 2
void normalize_expr(char *expr, bool preserve_space);
uint64 hash_query(const char* query);
uint64
hash_query(const char* query)
{
uint64 queryid;
char *normquery = pstrdup(query);
normalize_expr(normquery, false);
queryid = hash_any_extended((const unsigned char*)normquery, strlen(normquery),0);
pfree(normquery);
/* If we are unlucky enough to get a hash of zero, use 1 instead */
if (queryid == 0)
queryid = 1;
return queryid;
}
/*
* Look for these operator characters in order to decide whether to strip
* whitespaces which are needless from the view of sql syntax in
* normalize_expr(). This must be synced with op_chars in scan.l.
*/
#define OPCHARS "~!@#^&|`?+-*/%<>="
#define IS_WSCHAR(c) ((c) == ' ' || (c) == '\n' || (c) == '\t')
#define IS_CONST(tok) (tok == FCONST || tok == SCONST || tok == BCONST || \
tok == XCONST || tok == ICONST || tok == NULL_P || \
tok == TRUE_P || tok == FALSE_P || \
tok == CURRENT_DATE || tok == CURRENT_TIME || \
tok == LOCALTIME || tok == LOCALTIMESTAMP)
/*
* norm_yylex: core_yylex with replacing some tokens.
*/
static int
norm_yylex(char *str, core_YYSTYPE *yylval, YYLTYPE *yylloc, core_yyscan_t yyscanner)
{
int tok;
PG_TRY();
{
tok = core_yylex(yylval, yylloc, yyscanner);
}
PG_CATCH();
{
/*
* Error might occur during parsing quoted tokens that chopped
* halfway. Just ignore the rest of this query even if there might
* be other reasons for parsing to fail.
*/
FlushErrorState();
return -1;
}
PG_END_TRY();
/*
* '?' alone is assumed to be an IDENT. If there's a real
* operator '?', this should be confused but there's hardly be.
*/
if (tok == Op && str[*yylloc] == '?' &&
strchr(OPCHARS, str[*yylloc + 1]) == NULL)
tok = SCONST;
/*
* Replace tokens with '=' if the operator is consists of two or
* more opchars only. Assuming that opchars do not compose a token
* with non-opchars, check the first char only is sufficient.
*/
if (tok == Op && strchr(OPCHARS, str[*yylloc]) != NULL)
tok = '=';
return tok;
}
/*
* normalize_expr - Normalize statements or expressions.
*
* Mask constants, strip unnecessary whitespaces and upcase keywords. expr is
* modified in-place (destructively). If readability is more important than
* uniqueness, preserve_space puts one space for one existent whitespace for
* more readability.
*/
/* scanner interface is changed in PG12 */
#if PG_VERSION_NUM < 120000
#define ScanKeywords (*ScanKeywords)
#define ScanKeywordTokens NumScanKeywords
#endif
void
normalize_expr(char *expr, bool preserve_space)
{
core_yyscan_t yyscanner;
core_yy_extra_type yyextra;
core_YYSTYPE yylval;
YYLTYPE yylloc;
YYLTYPE lastloc;
YYLTYPE start;
char *wp;
int tok, lasttok;
int cmptok;
#if PG_VERSION_NUM >= 120000 && PG_VERSION_NUM < 140000
bool inExplain;
bool firstOpenParen;
#endif
wp = expr;
yyscanner = scanner_init(expr,
&yyextra,
&ScanKeywords,
ScanKeywordTokens);
/*
* The warnings about nonstandard escape strings is already emitted in the
* core. Just silence them here.
*/
#if PG_VERSION_NUM >= 90500
yyextra.escape_string_warning = false;
#endif
lasttok = 0;
lastloc = -1;
cmptok = 0;
#if PG_VERSION_NUM >= 120000 && PG_VERSION_NUM < 140000
inExplain = false;
firstOpenParen = false;
#endif
for (;;)
{
tok = norm_yylex(expr, &yylval, &yylloc, yyscanner);
++cmptok;
#if PG_VERSION_NUM >= 120000 && PG_VERSION_NUM < 140000
/* Catch EXPLAIN statement */
#if PG_VERSION_NUM < 130000
if ( tok == 402 && cmptok == 1 )
#endif
#if PG_VERSION_NUM >= 130000
if ( tok == 404 && cmptok == 1 )
#endif
{
inExplain = true;
continue;
}
/* After an EXPLAIN statement without parenthesis */
if ( tok != 40 && cmptok == 2 && inExplain )
{
inExplain = false;
/* With only one parameter */
#if PG_VERSION_NUM < 130000
if ( tok == 286 || tok == 685 )
#endif
#if PG_VERSION_NUM >= 130000
if ( tok == 288 || tok == 695 )
#endif
{
continue;
}
}
/* Catch open parenthesis of EXPLAIN statement */
if ( tok == 40 && inExplain && !firstOpenParen )
{
firstOpenParen = true;
continue;
}
/* Catch close parenthesis of EXPLAIN statement */
if ( tok == 41 && inExplain && firstOpenParen )
{
inExplain = false;
firstOpenParen = false;
continue;
}
/* Catch options of EXPLAIN statement */
if ( inExplain && firstOpenParen )
{
continue;
}
#endif
start = yylloc;
if (lastloc >= 0)
{
int i, i2;
/* Skipping preceding whitespaces */
for(i = lastloc ; i < start && IS_WSCHAR(expr[i]) ; i++);
/* Searching for trailing whitespace */
for(i2 = i; i2 < start && !IS_WSCHAR(expr[i2]) ; i2++);
if (lasttok == IDENT)
{
/* Identifiers are copied in case-sensitive manner. */
memcpy(wp, expr + i, i2 - i);
wp += i2 - i;
}
#if PG_VERSION_NUM >= 100000
/*
* Since PG10 pg_stat_statements doesn't store trailing semicolon
* in the column "query". Normalization is basically useless in the
* version but still usefull to match utility commands so follow
* the behavior change.
*/
else if (lasttok == ';')
{
/* Just do nothing */
}
#endif
else
{
/* Upcase keywords */
char *sp;
for (sp = expr + i ; sp < expr + i2 ; sp++, wp++)
*wp = (*sp >= 'a' && *sp <= 'z' ?
*sp - ('a' - 'A') : *sp);
}
/*
* Because of destructive writing, wp must not go advance the
* reading point.
* Although this function's output does not need any validity as a
* statement or an expression, spaces are added where it should be
* to keep some extent of sanity. If readability is more important
* than uniqueness, preserve_space adds one space for each
* existent whitespace.
*/
if (tok > 0 &&
i2 < start &&
(preserve_space ||
(tok >= IDENT && lasttok >= IDENT &&
!IS_CONST(tok) && !IS_CONST(lasttok))))
*wp++ = ' ';
start = i2;
}
/* Exit on parse error. */
if (tok < 0)
{
*wp = 0;
return;
}
/*
* Negative signs before numbers are tokenized separately. And
* explicit positive signs won't appear in deparsed expressions.
*/
if (tok == '-')
tok = norm_yylex(expr, &yylval, &yylloc, yyscanner);
/* Exit on parse error. */
if (tok < 0)
{
*wp = 0;
return;
}
if (IS_CONST(tok))
{
YYLTYPE end;
tok = norm_yylex(expr, &yylval, &end, yyscanner);
/* Exit on parse error. */
if (tok < 0)
{
*wp = 0;
return;
}
/*
* Negative values may be surrounded with parens by the
* deparser. Mask involving them.
*/
if (lasttok == '(' && tok == ')')
{
wp -= (start - lastloc);
start = lastloc;
end++;
}
while (expr[end - 1] == ' ')
end--;
*wp++ = '?';
yylloc = end;
}
if (tok == 0)
break;
lasttok = tok;
lastloc = yylloc;
}
*wp = 0;
}