diff --git a/apm-agent-plugins/apm-jdbc-plugin/src/main/java/co/elastic/apm/agent/jdbc/signature/JdbcFilter.java b/apm-agent-plugins/apm-jdbc-plugin/src/main/java/co/elastic/apm/agent/jdbc/signature/JdbcFilter.java new file mode 100644 index 0000000000..98d26af4eb --- /dev/null +++ b/apm-agent-plugins/apm-jdbc-plugin/src/main/java/co/elastic/apm/agent/jdbc/signature/JdbcFilter.java @@ -0,0 +1,75 @@ +/*- + * #%L + * Elastic APM Java agent + * %% + * Copyright (C) 2018 - 2019 Elastic and contributors + * %% + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * #L% + */ +package co.elastic.apm.agent.jdbc.signature; + +class JdbcFilter { + + private boolean inQuote = false; + private boolean inJdbcEscape = false; + private boolean jdbcKeyWord = false; + + boolean skip(Scanner s, char c) { + switch (c) { + case '{': + if (!inQuote) { + inJdbcEscape = true; + jdbcKeyWord = true; + return true; + } + break; + case 'o': + case 'O': + if (!inQuote && inJdbcEscape && jdbcKeyWord && s.isNextCharIgnoreCase('j')) { + s.next(); + jdbcKeyWord = false; + return true; + } + break; + case '}': + if (!inQuote) { + inJdbcEscape = false; + return true; + } + break; + case '?': + case '=': + if (!inQuote && inJdbcEscape) { + return true; + } + break; + case '\'': + inQuote = !inQuote; + break; + } + jdbcKeyWord = false; + return false; + } + + void reset() { + inQuote = false; + inJdbcEscape = false; + jdbcKeyWord = false; + } +} diff --git a/apm-agent-plugins/apm-jdbc-plugin/src/main/java/co/elastic/apm/agent/jdbc/signature/Scanner.java b/apm-agent-plugins/apm-jdbc-plugin/src/main/java/co/elastic/apm/agent/jdbc/signature/Scanner.java index 7ef39cfefc..5d5b6316ed 100644 --- a/apm-agent-plugins/apm-jdbc-plugin/src/main/java/co/elastic/apm/agent/jdbc/signature/Scanner.java +++ b/apm-agent-plugins/apm-jdbc-plugin/src/main/java/co/elastic/apm/agent/jdbc/signature/Scanner.java @@ -31,9 +31,11 @@ public class Scanner { private int end; // text end char offset private int pos; // read position char offset private int inputLength; + private final JdbcFilter filter = new JdbcFilter(); public void setQuery(String sql) { this.input = sql; + filter.reset(); inputLength = sql.length(); start = 0; end = 0; @@ -74,7 +76,7 @@ public Token scan() { return Token.EOF; } char c = next(); - while (Character.isSpaceChar(c)) { + while (Character.isSpaceChar(c) || filter.skip(this, c)) { if (hasNext()) { c = next(); } else { @@ -301,7 +303,7 @@ private char peek() { return input.charAt(pos); } - private char next() { + char next() { final char c = peek(); pos++; end = pos; @@ -347,6 +349,10 @@ private boolean isNextChar(char c) { return hasNext() && peek() == c; } + boolean isNextCharIgnoreCase(char c) { + return hasNext() && Character.toLowerCase(peek()) == Character.toLowerCase(c); + } + public enum Token { OTHER, @@ -373,7 +379,9 @@ public enum Token { SET, TABLE, TRUNCATE, // Cassandra/CQL-specific - UPDATE; + UPDATE, + MERGE, + USING; private static final Token[] EMPTY = {}; private static final Token[][] KEYWORDS_BY_LENGTH = { @@ -382,7 +390,7 @@ public enum Token { {AS, OR}, {SET}, {CALL, FROM, INTO}, - {TABLE}, + {TABLE, MERGE, USING}, {DELETE, INSERT, SELECT, UPDATE}, {REPLACE}, {TRUNCATE} diff --git a/apm-agent-plugins/apm-jdbc-plugin/src/main/java/co/elastic/apm/agent/jdbc/signature/SignatureParser.java b/apm-agent-plugins/apm-jdbc-plugin/src/main/java/co/elastic/apm/agent/jdbc/signature/SignatureParser.java index 7dd157933c..3234e9cb6e 100644 --- a/apm-agent-plugins/apm-jdbc-plugin/src/main/java/co/elastic/apm/agent/jdbc/signature/SignatureParser.java +++ b/apm-agent-plugins/apm-jdbc-plugin/src/main/java/co/elastic/apm/agent/jdbc/signature/SignatureParser.java @@ -24,12 +24,14 @@ */ package co.elastic.apm.agent.jdbc.signature; +import javax.annotation.Nullable; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import static co.elastic.apm.agent.jdbc.signature.Scanner.Token.EOF; import static co.elastic.apm.agent.jdbc.signature.Scanner.Token.FROM; import static co.elastic.apm.agent.jdbc.signature.Scanner.Token.IDENT; +import static co.elastic.apm.agent.jdbc.signature.Scanner.Token.INTO; import static co.elastic.apm.agent.jdbc.signature.Scanner.Token.LPAREN; import static co.elastic.apm.agent.jdbc.signature.Scanner.Token.RPAREN; @@ -54,55 +56,61 @@ public class SignatureParser { * When relying on weak keys, we would not leverage any caching benefits if the query string is collected. * That means that we are leaking Strings but as the size of the map is limited that should not be an issue. */ - private final static ConcurrentMap signatureCache = new ConcurrentHashMap(DISABLE_CACHE_THRESHOLD, 0.5f, Runtime.getRuntime().availableProcessors()); + private final static ConcurrentMap signatureCache = new ConcurrentHashMap(DISABLE_CACHE_THRESHOLD, + 0.5f, Runtime.getRuntime().availableProcessors()); private final Scanner scanner = new Scanner(); public void querySignature(String query, StringBuilder signature, boolean preparedStatement) { + querySignature(query, signature, null, preparedStatement); + } + public void querySignature(String query, StringBuilder signature, @Nullable StringBuilder dbLink, boolean preparedStatement) { final boolean cacheable = preparedStatement // non-prepared statements are likely to be dynamic strings && QUERY_LENGTH_CACHE_LOWER_THRESHOLD < query.length() && query.length() < QUERY_LENGTH_CACHE_UPPER_THRESHOLD; if (cacheable) { - final String cachedSignature = signatureCache.get(query); + final String[] cachedSignature = signatureCache.get(query); if (cachedSignature != null) { - signature.append(cachedSignature); + signature.append(cachedSignature[0]); + if (dbLink != null) { + dbLink.append(cachedSignature[1]); + } return; } } scanner.setQuery(query); - parse(query, signature); + parse(query, signature, dbLink); if (cacheable && signatureCache.size() <= DISABLE_CACHE_THRESHOLD) { // we don't mind a small overshoot due to race conditions - signatureCache.put(query, signature.toString()); + signatureCache.put(query, new String[]{signature.toString(), dbLink != null ? dbLink.toString() : ""}); } } - private void parse(String query, StringBuilder signature) { + private void parse(String query, StringBuilder signature, @Nullable StringBuilder dbLink) { final Scanner.Token firstToken = scanner.scanWhile(Scanner.Token.COMMENT); switch (firstToken) { case CALL: signature.append("CALL"); if (scanner.scanUntil(Scanner.Token.IDENT)) { - signature.append(' '); - scanner.appendCurrentTokenText(signature); + appendIdentifiers(signature, dbLink); } return; case DELETE: signature.append("DELETE"); if (scanner.scanUntil(FROM) && scanner.scanUntil(Scanner.Token.IDENT)) { - signature.append(" FROM "); - appendIdentifiers(signature); + signature.append(" FROM"); + appendIdentifiers(signature, dbLink); } return; case INSERT: case REPLACE: signature.append(firstToken.name()); if (scanner.scanUntil(Scanner.Token.INTO) && scanner.scanUntil(Scanner.Token.IDENT)) { - signature.append(" INTO "); - appendIdentifiers(signature); + signature.append(" INTO"); + appendIdentifiers(signature, dbLink); } return; case SELECT: @@ -116,8 +124,8 @@ private void parse(String query, StringBuilder signature) { } else if (t == FROM) { if (level == 0) { if (scanner.scanToken(Scanner.Token.IDENT)) { - signature.append(" FROM "); - appendIdentifiers(signature); + signature.append(" FROM"); + appendIdentifiers(signature, dbLink); } else { return; } @@ -128,7 +136,7 @@ private void parse(String query, StringBuilder signature) { case UPDATE: signature.append("UPDATE"); // Scan for the table name - boolean hasPeriod = false, hasFirstPeriod = false; + boolean hasPeriod = false, hasFirstPeriod = false, isDbLink = false; if (scanner.scanToken(IDENT)) { signature.append(' '); scanner.appendCurrentTokenText(signature); @@ -145,6 +153,11 @@ private void parse(String query, StringBuilder signature) { signature.setLength(0); signature.append("UPDATE "); scanner.appendCurrentTokenText(signature); + } else if (isDbLink) { + if (dbLink != null) { + scanner.appendCurrentTokenText(dbLink); + } + isDbLink = false; } // Two adjacent identifiers found after the first period. // Ignore the secondary ones, in case they are unknown keywords. @@ -155,11 +168,23 @@ private void parse(String query, StringBuilder signature) { signature.append('.'); break; default: - return; + if ("@".equals(scanner.text())) { + isDbLink = true; + break; + } else { + return; + } } } } return; + case MERGE: + signature.append("MERGE"); + if (scanner.scanToken(INTO) && scanner.scanUntil(Scanner.Token.IDENT)) { + signature.append(" INTO"); + appendIdentifiers(signature, dbLink); + } + return; default: query = query.trim(); final int indexOfWhitespace = query.indexOf(' '); @@ -167,11 +192,38 @@ private void parse(String query, StringBuilder signature) { } } - private void appendIdentifiers(StringBuilder signature) { + private void appendIdentifiers(StringBuilder signature, @Nullable StringBuilder dbLink) { + signature.append(' '); scanner.appendCurrentTokenText(signature); - while (scanner.scanToken(Scanner.Token.PERIOD) && scanner.scanToken(Scanner.Token.IDENT)) { - signature.append('.'); - scanner.appendCurrentTokenText(signature); + boolean connectedIdents = false, isDbLink = false; + for (Scanner.Token t = scanner.scan(); t != EOF; t = scanner.scan()) { + switch (t) { + case IDENT: + // do not add tokens which are separated by a space + if (connectedIdents) { + scanner.appendCurrentTokenText(signature); + connectedIdents = false; + } else { + if (isDbLink) { + if (dbLink != null) { + scanner.appendCurrentTokenText(dbLink); + } + } + return; + } + break; + case PERIOD: + signature.append('.'); + connectedIdents = true; + break; + case USING: + return; + default: + if ("@".equals(scanner.text())) { + isDbLink = true; + } + break; + } } } } diff --git a/apm-agent-plugins/apm-jdbc-plugin/src/test/java/co/elastic/apm/agent/jdbc/signature/SignatureParserTest.java b/apm-agent-plugins/apm-jdbc-plugin/src/test/java/co/elastic/apm/agent/jdbc/signature/SignatureParserTest.java index 961660d2f8..b64f09fa9f 100644 --- a/apm-agent-plugins/apm-jdbc-plugin/src/test/java/co/elastic/apm/agent/jdbc/signature/SignatureParserTest.java +++ b/apm-agent-plugins/apm-jdbc-plugin/src/test/java/co/elastic/apm/agent/jdbc/signature/SignatureParserTest.java @@ -11,9 +11,9 @@ * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -26,20 +26,21 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; -import org.junit.jupiter.api.BeforeEach; + +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import static org.assertj.core.api.Assertions.assertThat; class SignatureParserTest { - private SignatureParser signatureParser; - private JsonNode testCases; + private static SignatureParser signatureParser; + private static JsonNode testCases; - @BeforeEach - void setUp() throws Exception { + @BeforeAll + static void setUp() throws Exception { signatureParser = new SignatureParser(); - testCases = new ObjectMapper().readTree(getClass().getResource("/signature_tests.json")); + testCases = new ObjectMapper().readTree(SignatureParserTest.class.getResource("/signature_tests.json")); } @Test @@ -50,6 +51,49 @@ void testScanner() { } } + @Test + void testDbLinkForUpdate() { + final StringBuilder sb = new StringBuilder(); + final StringBuilder dblink = new StringBuilder(); + signatureParser.querySignature("UPDATE foo.bar@\"DBLINK.FQDN.COM@USER\" SET bar=1 WHERE baz=2", sb, dblink, false); + assertThat(dblink.toString()).isEqualTo("DBLINK.FQDN.COM@USER"); + } + + @Test + void testDbLink() { + final StringBuilder sb = new StringBuilder(); + final StringBuilder dblink = new StringBuilder(); + signatureParser.querySignature("SELECT * FROM TABLE1@DBLINK", sb, dblink, false); + assertThat(dblink.toString()).isEqualTo("DBLINK"); + } + + @Test + void testDbLinkCache() { + final StringBuilder sb = new StringBuilder(); + final StringBuilder dblink = new StringBuilder(); + signatureParser.querySignature("SELECT * FROM TABLE1@DBLINK", sb, dblink, true); + sb.setLength(0); + dblink.setLength(0); + signatureParser.querySignature("SELECT * FROM TABLE1@DBLINK", sb, dblink, true); + assertThat(dblink.toString()).isEqualTo("DBLINK"); + } + + @Test + void testDbLinkFqdn() { + final StringBuilder sb = new StringBuilder(); + final StringBuilder dblink = new StringBuilder(); + signatureParser.querySignature("SELECT * FROM TABLE1@\"DBLINK.FQDN.COM\"", sb, dblink, false); + assertThat(dblink.toString()).isEqualTo("DBLINK.FQDN.COM"); + } + + @Test + void testDbLinkFqdnWithUser() { + final StringBuilder sb = new StringBuilder(); + final StringBuilder dblink = new StringBuilder(); + signatureParser.querySignature("SELECT * FROM TABLE1@\"DBLINK.FQDN.COM@USER\"", sb, dblink, false); + assertThat(dblink.toString()).isEqualTo("DBLINK.FQDN.COM@USER"); + } + String getSignature(String query) { final StringBuilder sb = new StringBuilder(); signatureParser.querySignature(query, sb, false); diff --git a/apm-agent-plugins/apm-jdbc-plugin/src/test/resources/signature_tests.json b/apm-agent-plugins/apm-jdbc-plugin/src/test/resources/signature_tests.json index 564fbbf814..1ac1ec59cb 100644 --- a/apm-agent-plugins/apm-jdbc-plugin/src/test/resources/signature_tests.json +++ b/apm-agent-plugins/apm-jdbc-plugin/src/test/resources/signature_tests.json @@ -51,6 +51,15 @@ "comment": "We capture the first table of the outermost select statement", "input": "SELECT *,(SELECT COUNT(*) FROM table2 WHERE table2.field1 = table1.id) AS count FROM table1 WHERE table1.field1 = 'value'", "output": "SELECT FROM table1" + }, + { + "comment": "JDBC escape syntax join", + "input": "SELECT * FROM {oj Countries JOIN Cities ON (Countries.country_ISO_code=Cities.country_ISO_code)}", + "output": "SELECT FROM Countries" + }, + { + "input": "SELECT *,(SELECT COUNT(*) FROM table2 WHERE table2.field1 = table1.id) AS count FROM schema.table1@\"DBLINK.FQDN.COM@USER\" WHERE table1.field1 = 'value'", + "output": "SELECT FROM schema.table1" }, { "comment": "If the outermost select operates on derived tables, then we just return 'SELECT' (i.e. the fallback)", @@ -65,6 +74,10 @@ "input": "UPDATE IGNORE foo.bar SET bar=1 WHERE baz=2", "output": "UPDATE foo.bar" }, + { + "input": "UPDATE IGNORE foo.bar@\"DBLINK.FQDN.COM@USER\" SET bar=1 WHERE baz=2", + "output": "UPDATE foo.bar" + }, { "input": "UPDATE ONLY foo AS bar SET baz=1", "output": "UPDATE foo" @@ -77,6 +90,10 @@ "input": "INSERT LOW_PRIORITY IGNORE INTO foo.bar (col) VALUES(?)", "output": "INSERT INTO foo.bar" }, + { + "input": "INSERT LOW_PRIORITY IGNORE INTO schema.table1@\"DBLINK.FQDN.COM@USER\" (col) VALUES(?)", + "output": "INSERT INTO schema.table1" + }, { "input": "CALL foo(bar, 123)", "output": "CALL foo" @@ -139,8 +156,72 @@ "input": "DELETE FROM", "output": "DELETE" }, + { + "input": "DELETE FROM schema.table1@\"DBLINK.FQDN.COM@USER\"", + "output": "DELETE FROM schema.table1" + }, { "input": "CALL", "output": "CALL" + }, + { + "input": "MERGE INTO TEST USING (SELECT * FROM DUAL) SOURCE on (TEST.X=SOURCE.DUMMY) WHEN NOT MATCHED THEN INSERT VALUES(SOURCE.DUMMY)", + "output": "MERGE INTO TEST" + }, + { + "input": "MERGE INTO TEST ALIAS USING (SELECT * FROM DUAL) SOURCE on (TEST.X=SOURCE.DUMMY) WHEN NOT MATCHED THEN INSERT VALUES(SOURCE.DUMMY)", + "output": "MERGE INTO TEST" + }, + { + "input": "MERGE INTO SCHEMA.TEST USING (SELECT * FROM DUAL) SOURCE on (TEST.X=SOURCE.DUMMY) WHEN NOT MATCHED THEN INSERT VALUES(SOURCE.DUMMY)", + "output": "MERGE INTO SCHEMA.TEST" + }, + { + "input": "MERGE INTO SCHEMA.TEST@DBLINK USING (SELECT * FROM DUAL) SOURCE on (TEST.X=SOURCE.DUMMY) WHEN NOT MATCHED THEN INSERT VALUES(SOURCE.DUMMY)", + "output": "MERGE INTO SCHEMA.TEST" + }, + { + "input": "MERGE INTO SCHEMA.TEST@\"DBLINK.FQDN.COM\" USING (SELECT * FROM DUAL) SOURCE on (TEST.X=SOURCE.DUMMY) WHEN NOT MATCHED THEN INSERT VALUES(SOURCE.DUMMY)", + "output": "MERGE INTO SCHEMA.TEST" + }, + { + "input": "MERGE INTO SCHEMA.TEST@\"DBLINK.FQDN.COM@USER\" USING (SELECT * FROM DUAL) SOURCE on (TEST.X=SOURCE.DUMMY) WHEN NOT MATCHED THEN INSERT VALUES(SOURCE.DUMMY)", + "output": "MERGE INTO SCHEMA.TEST" + }, + { + "input": "MERGE", + "output": "MERGE" + }, + { + "input": "{ ? = call FUNCTION() }", + "output": "CALL FUNCTION" + }, + { + "input": "{ ? = call oj() }", + "output": "CALL oj" + }, + { + "input": "{ ? = call SCHEMA.FUNCTION() }", + "output": "CALL SCHEMA.FUNCTION" + }, + { + "input": "{ ? = call SCHEMA.FUNCTION@\"DBLINK.FQDN.COM@USER\"() }", + "output": "CALL SCHEMA.FUNCTION" + }, + { + "input": "{ call PROCEDURE() }", + "output": "CALL PROCEDURE" + }, + { + "input": "{call SCHEMA.PROCEDURE() }", + "output": "CALL SCHEMA.PROCEDURE" + }, + { + "input": "{call SCHEMA.PROCEDURE@\"DBLINK.FQDN.COM@USER\"() }", + "output": "CALL SCHEMA.PROCEDURE" + }, + { + "input": "{call PROCEDURE() }", + "output": "CALL PROCEDURE" } ]