Skip to content

Commit 2a1b6c1

Browse files
committed
jdbc: support for sub-set of JDBC escape syntax
Add a driver SQL pre-processing before sending it to the server. The driver supports sub-set of scalar functions defined by the spec (appendix C), outer joins, escape clause for SQL LIKE operator, and limit/offset clause. The processed result can be received using Connection.nativeSQL() method. Closes #79 Closes #76 Closes #81 Closes #83 Closes #84 Affects: #108
1 parent 4ba88fb commit 2a1b6c1

13 files changed

+1302
-13
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,333 @@
1+
package org.tarantool.jdbc;
2+
3+
import static org.tarantool.jdbc.EscapedFunctions.Expression;
4+
import static org.tarantool.jdbc.EscapedFunctions.FunctionExpression;
5+
import static org.tarantool.jdbc.EscapedFunctions.FunctionSignatureKey;
6+
import static org.tarantool.jdbc.EscapedFunctions.functionMappings;
7+
8+
import org.tarantool.util.SQLStates;
9+
import org.tarantool.util.ThrowingBiFunction;
10+
11+
import java.sql.Connection;
12+
import java.sql.SQLSyntaxErrorException;
13+
import java.util.ArrayList;
14+
import java.util.LinkedList;
15+
import java.util.List;
16+
import java.util.regex.Pattern;
17+
18+
/**
19+
* Set of utils to work with JDBC escape processing.
20+
* <p>
21+
* Supported escape syntax:
22+
* <ol>
23+
* <li>Scalar functions (i.e. {@code {fn random()}}).</li>
24+
* <li>Outer joins (i.e. {@code {oj "dept" left outer join "salary" on "dept_id" = 1412}}).</li>
25+
* <li>Like escape character (i.e. {@code like '_|%_3%' {escape '|'}}).</li>
26+
* <li>Limiting returned rows (i.e. {@code {limit 10 offset 20}}).</li>
27+
* </ol>
28+
* <p>
29+
* Most of the supported expressions translates directly omitting escape borders.
30+
* In this way, {@code {fn abs(-5)}} becomes {@code abs(-5)}} or {@code {limit 10 offset 50}}
31+
* becomes {@code limit 10 offset 50} and so on. There are exceptions in case of scalar
32+
* functions where JDBC functions may not match exactly with Tarantool ones (for example,
33+
* JDBC {@code {fn rand()}} function becomes {@code random()} supported by Tarantool.
34+
*/
35+
public class EscapeSyntaxParser {
36+
37+
/**
38+
* Pattern that covers function names described in JDBC Spec
39+
* Appendix C. Scalar functions.
40+
*/
41+
private static final Pattern IDENTIFIER = Pattern.compile("[_a-zA-Z][_a-zA-Z0-9]+");
42+
43+
private final SQLConnection jdbcContext;
44+
45+
public EscapeSyntaxParser(SQLConnection jdbcContext) {
46+
this.jdbcContext = jdbcContext;
47+
}
48+
49+
/**
50+
* Performs escape processing for SQL queries. It translates
51+
* sql text with optional escape expressions such as {@code {fn abs(-1)}}.
52+
*
53+
* <p>
54+
* Comments inside SQL text can be eliminated as parsing goes using preserveComments
55+
* flag. Hence, Comments inside escape syntax are always omitted regardless of
56+
* the flag, though.
57+
*
58+
* @param sql SQL text to be processed
59+
*
60+
* @return native SQL query
61+
*
62+
* @throws SQLSyntaxErrorException if any syntax error happened
63+
*/
64+
public String translate(String sql) throws SQLSyntaxErrorException {
65+
StringBuilder nativeSql = new StringBuilder(sql.length());
66+
StringBuilder escapeBuffer = new StringBuilder();
67+
StringBuilder activeBuffer = nativeSql;
68+
LinkedList<Integer> escapeStartPositions = new LinkedList<>();
69+
70+
int i = 0;
71+
while (i < sql.length()) {
72+
char currentChar = sql.charAt(i);
73+
switch (currentChar) {
74+
case '\'':
75+
case '"':
76+
int endOfString = seekEndOfRegion(sql, i, "" + currentChar, "" + currentChar);
77+
if (endOfString == -1) {
78+
throw new SQLSyntaxErrorException(
79+
"Not enclosed string literal or quoted identifier at position " + i,
80+
SQLStates.SYNTAX_ERROR.getSqlState()
81+
);
82+
}
83+
activeBuffer.append(sql, i, endOfString + 1);
84+
i = endOfString + 1;
85+
break;
86+
87+
case '/':
88+
case '-':
89+
int endOfComment;
90+
if (currentChar == '/') {
91+
endOfComment = seekEndOfRegion(sql, i, "/*", "*/");
92+
if (endOfComment == -1) {
93+
throw new SQLSyntaxErrorException(
94+
"Open block comment at position " + i, SQLStates.SYNTAX_ERROR.getSqlState()
95+
);
96+
}
97+
} else {
98+
endOfComment = seekEndOfRegion(sql, i, "--", "\n");
99+
if (endOfComment == -1) {
100+
endOfComment = sql.length() - 1;
101+
}
102+
}
103+
if (i == endOfComment) {
104+
activeBuffer.append(currentChar);
105+
i++;
106+
} else {
107+
i = endOfComment + 1;
108+
}
109+
break;
110+
111+
case '{':
112+
escapeStartPositions.addFirst(escapeBuffer.length());
113+
escapeBuffer.append(currentChar);
114+
activeBuffer = escapeBuffer;
115+
i++;
116+
break;
117+
118+
case '}':
119+
Integer startPosition = escapeStartPositions.pollFirst();
120+
if (startPosition == null) {
121+
throw new SQLSyntaxErrorException(
122+
"Unexpected '}' at position " + i,
123+
SQLStates.SYNTAX_ERROR.getSqlState()
124+
);
125+
}
126+
escapeBuffer.append(currentChar);
127+
processEscapeExpression(escapeBuffer, startPosition, escapeBuffer.length());
128+
if (escapeStartPositions.isEmpty()) {
129+
nativeSql.append(escapeBuffer);
130+
escapeBuffer.setLength(0);
131+
activeBuffer = nativeSql;
132+
}
133+
i++;
134+
break;
135+
136+
default:
137+
activeBuffer.append(currentChar);
138+
i++;
139+
break;
140+
}
141+
}
142+
143+
if (!escapeStartPositions.isEmpty()) {
144+
throw new SQLSyntaxErrorException(
145+
"Not enclosed escape expression at position " + escapeStartPositions.pollFirst(),
146+
SQLStates.SYNTAX_ERROR.getSqlState()
147+
);
148+
}
149+
return nativeSql.toString();
150+
}
151+
152+
/**
153+
* Parses text like {@code functionName([arg[,args...]])}.
154+
* Arguments are not parsed recursively and saved as-is.
155+
*
156+
* <p>
157+
* In contrast to SQL where function name can be enclosed by double quotes,
158+
* it is not supported within escape syntax.
159+
*
160+
* @param functionString text to be parsed
161+
*
162+
* @return parsed result containing function name and its parameters, if any
163+
*
164+
* @throws SQLSyntaxErrorException if any syntax errors happened
165+
*/
166+
private FunctionExpression parseFunction(String functionString) throws SQLSyntaxErrorException {
167+
int braceNestLevel = 0;
168+
String functionName = null;
169+
List<String> functionParameters = new ArrayList<>();
170+
int parameterStartPosition = 0;
171+
172+
int i = 0;
173+
boolean completed = false;
174+
while (i < functionString.length() && !completed) {
175+
char currentChar = functionString.charAt(i);
176+
switch (currentChar) {
177+
case '\'':
178+
case '"':
179+
i = seekEndOfRegion(functionString, i, "" + currentChar, "" + currentChar) + 1;
180+
break;
181+
182+
case '(':
183+
if (braceNestLevel++ == 0) {
184+
functionName = functionString.substring(0, i).trim().toUpperCase();
185+
if (!IDENTIFIER.matcher(functionName).matches()) {
186+
throw new SQLSyntaxErrorException(
187+
"Invalid function identifier '" + functionName + "'", SQLStates.SYNTAX_ERROR.getSqlState()
188+
);
189+
}
190+
parameterStartPosition = i + 1;
191+
}
192+
i++;
193+
break;
194+
195+
case ')':
196+
if (--braceNestLevel == 0) {
197+
// reach a function closing brace
198+
// parse the last possible function parameter
199+
String param = functionString.substring(parameterStartPosition, i).trim();
200+
if (!param.isEmpty()) {
201+
functionParameters.add(param);
202+
} else if (!functionParameters.isEmpty()) {
203+
throw new SQLSyntaxErrorException(
204+
"Empty function argument at " + (functionParameters.size() + 1) + " position",
205+
SQLStates.SYNTAX_ERROR.getSqlState()
206+
);
207+
}
208+
completed = true;
209+
}
210+
i++;
211+
break;
212+
213+
case ',':
214+
if (braceNestLevel == 1) {
215+
// reach the function argument delimiter
216+
// parse the argument before this comma
217+
String param = functionString.substring(parameterStartPosition, i).trim();
218+
if (param.isEmpty()) {
219+
throw new SQLSyntaxErrorException(
220+
"Empty function argument at " + (functionParameters.size() + 1) + " position",
221+
SQLStates.SYNTAX_ERROR.getSqlState()
222+
);
223+
}
224+
parameterStartPosition = i + 1;
225+
functionParameters.add(param);
226+
}
227+
i++;
228+
break;
229+
230+
default:
231+
i++;
232+
break;
233+
}
234+
}
235+
236+
if (functionName == null || !completed) {
237+
throw new SQLSyntaxErrorException(
238+
"Malformed function expression '" + functionString + "'", SQLStates.SYNTAX_ERROR.getSqlState()
239+
);
240+
}
241+
if (i < functionString.length()) {
242+
String tail = functionString.substring(i).trim();
243+
if (!tail.isEmpty()) {
244+
throw new SQLSyntaxErrorException(
245+
"Unexpected expression '" + tail + "' after a function declaration",
246+
SQLStates.SYNTAX_ERROR.getSqlState()
247+
);
248+
}
249+
}
250+
return new FunctionExpression(functionName, functionParameters);
251+
}
252+
253+
/**
254+
* Handles an escape expression. All expression substitutes are applied to
255+
* the passed {@code buffer} parameter. In case of {@code fn}, the function
256+
* name is case-insensitive.
257+
*
258+
* @param buffer buffer containing current escape expression
259+
* @param start start position of the escape syntax in the buffer, inclusive
260+
* @param end end position of the escape syntax in the buffer, exclusive
261+
*
262+
* @throws SQLSyntaxErrorException if any syntax error happen
263+
*/
264+
private void processEscapeExpression(StringBuilder buffer, int start, int end)
265+
throws SQLSyntaxErrorException {
266+
if (buffer.charAt(start) != '{' || buffer.charAt(end - 1) != '}') {
267+
return;
268+
}
269+
int startExpression = seekFirstNonSpaceSymbol(buffer, start + 1);
270+
int endExpression = seekLastNonSpaceSymbol(buffer, end - 2) + 1;
271+
272+
if (substringMatches(buffer, "fn ", startExpression)) {
273+
FunctionExpression expression = parseFunction(buffer.substring(startExpression + 3, endExpression));
274+
ThrowingBiFunction<FunctionExpression, Connection, Expression, SQLSyntaxErrorException> mapper =
275+
functionMappings.get(FunctionSignatureKey.of(expression.getName(), expression.getParameters().size()));
276+
if (mapper == null) {
277+
throw new SQLSyntaxErrorException(
278+
"Unknown function " + expression.getName(),
279+
SQLStates.SYNTAX_ERROR.getSqlState()
280+
);
281+
}
282+
buffer.replace(start, end, mapper.apply(expression, jdbcContext).toString());
283+
} else if (substringMatches(buffer, "oj ", startExpression)) {
284+
buffer.replace(start, end, buffer.substring(startExpression + 3, endExpression));
285+
} else if (substringMatches(buffer, "escape ", startExpression)) {
286+
buffer.replace(start, end, buffer.substring(startExpression, endExpression));
287+
} else if (substringMatches(buffer, "limit ", startExpression)) {
288+
buffer.replace(start, end, buffer.substring(startExpression, endExpression));
289+
} else {
290+
throw new SQLSyntaxErrorException("Unrecognizable escape expression", SQLStates.SYNTAX_ERROR.getSqlState());
291+
}
292+
}
293+
294+
/**
295+
* Looks for the end of the region defined by its start and end
296+
* substring patterns.
297+
*
298+
* @param text search text
299+
* @param position start position in text to search the region, inclusive
300+
* @param startRegion pattern of the region start
301+
* @param endRegion pattern of the region end
302+
*
303+
* @return found position of the region end, inclusive. Start position if the region start
304+
* pattern does not match the text start position and {@literal -1} if the
305+
* region end is not found.
306+
*/
307+
private int seekEndOfRegion(String text, int position, String startRegion, String endRegion) {
308+
if (!text.regionMatches(position, startRegion, 0, startRegion.length())) {
309+
return position;
310+
}
311+
int end = text.indexOf(endRegion, position + startRegion.length());
312+
return end == -1 ? end : end + endRegion.length() - 1;
313+
}
314+
315+
private boolean substringMatches(StringBuilder text, String substring, int start) {
316+
return text.indexOf(substring, start) == start;
317+
}
318+
319+
private int seekFirstNonSpaceSymbol(StringBuilder text, int position) {
320+
while (position < text.length() && Character.isWhitespace(text.charAt(position))) {
321+
position++;
322+
}
323+
return position;
324+
}
325+
326+
private int seekLastNonSpaceSymbol(StringBuilder text, int position) {
327+
while (position > 0 && Character.isWhitespace(text.charAt(position))) {
328+
position--;
329+
}
330+
return position;
331+
}
332+
333+
}

0 commit comments

Comments
 (0)