From be4ceec08cb1b3097d7aa25c4efc358ddd050bdb Mon Sep 17 00:00:00 2001 From: Dan Rogers Date: Fri, 18 Nov 2022 08:01:07 -0500 Subject: [PATCH] Strip leading comments from SQL queries when generating the span name. (#1434) Co-authored-by: Srikanth Chekuri Co-authored-by: Diego Hurtado --- CHANGELOG.md | 2 ++ .../instrumentation/asyncpg/__init__.py | 5 +++- .../instrumentation/dbapi/__init__.py | 5 +++- .../tests/test_dbapi_integration.py | 8 ++++- .../instrumentation/psycopg2/__init__.py | 3 +- .../tests/test_psycopg2_integration.py | 26 +++++++++++++++++ .../instrumentation/sqlalchemy/engine.py | 7 ++++- .../tests/test_sqlalchemy.py | 14 ++++++++- .../tests/asyncpg/test_asyncpg_functional.py | 29 +++++++++++++++++++ 9 files changed, 93 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cfe3bc8d16..b95e366603 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#1350](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1350)) - `opentelemetry-instrumentation-starlette` Add support for regular expression matching and sanitization of HTTP headers. ([#1404](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1404)) +- Strip leading comments from SQL queries when generating the span name. + ([#1434](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1434)) ### Fixed diff --git a/instrumentation/opentelemetry-instrumentation-asyncpg/src/opentelemetry/instrumentation/asyncpg/__init__.py b/instrumentation/opentelemetry-instrumentation-asyncpg/src/opentelemetry/instrumentation/asyncpg/__init__.py index e4074885f2..f1fd43b55d 100644 --- a/instrumentation/opentelemetry-instrumentation-asyncpg/src/opentelemetry/instrumentation/asyncpg/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-asyncpg/src/opentelemetry/instrumentation/asyncpg/__init__.py @@ -34,6 +34,7 @@ --- """ +import re from typing import Collection import asyncpg @@ -99,6 +100,7 @@ def __init__(self, capture_parameters=False): super().__init__() self.capture_parameters = capture_parameters self._tracer = None + self._leading_comment_remover = re.compile(r"^/\*.*?\*/") def instrumentation_dependencies(self) -> Collection[str]: return _instruments @@ -135,7 +137,8 @@ async def _do_execute(self, func, instance, args, kwargs): name = args[0] if args[0] else params.get("database", "postgresql") try: - name = name.split()[0] + # Strip leading comments so we get the operation name. + name = self._leading_comment_remover.sub("", name).split()[0] except IndexError: name = "" diff --git a/instrumentation/opentelemetry-instrumentation-dbapi/src/opentelemetry/instrumentation/dbapi/__init__.py b/instrumentation/opentelemetry-instrumentation-dbapi/src/opentelemetry/instrumentation/dbapi/__init__.py index c2ee79e811..87b1a8d656 100644 --- a/instrumentation/opentelemetry-instrumentation-dbapi/src/opentelemetry/instrumentation/dbapi/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-dbapi/src/opentelemetry/instrumentation/dbapi/__init__.py @@ -39,6 +39,7 @@ import functools import logging +import re import typing import wrapt @@ -368,6 +369,7 @@ def __init__(self, db_api_integration: DatabaseApiIntegration) -> None: else {} ) self._connect_module = self._db_api_integration.connect_module + self._leading_comment_remover = re.compile(r"^/\*.*?\*/") def _populate_span( self, @@ -397,7 +399,8 @@ def _populate_span( def get_operation_name(self, cursor, args): # pylint: disable=no-self-use if args and isinstance(args[0], str): - return args[0].split()[0] + # Strip leading comments so we get the operation name. + return self._leading_comment_remover.sub("", args[0]).split()[0] return "" def get_statement(self, cursor, args): # pylint: disable=no-self-use diff --git a/instrumentation/opentelemetry-instrumentation-dbapi/tests/test_dbapi_integration.py b/instrumentation/opentelemetry-instrumentation-dbapi/tests/test_dbapi_integration.py index 8502bd46ee..9f9371ad66 100644 --- a/instrumentation/opentelemetry-instrumentation-dbapi/tests/test_dbapi_integration.py +++ b/instrumentation/opentelemetry-instrumentation-dbapi/tests/test_dbapi_integration.py @@ -88,11 +88,17 @@ def test_span_name(self): query""" ) cursor.execute("tab\tseparated query") + cursor.execute("/* leading comment */ query") + cursor.execute("/* leading comment */ query /* trailing comment */") + cursor.execute("query /* trailing comment */") spans_list = self.memory_exporter.get_finished_spans() - self.assertEqual(len(spans_list), 3) + self.assertEqual(len(spans_list), 6) self.assertEqual(spans_list[0].name, "Test") self.assertEqual(spans_list[1].name, "multi") self.assertEqual(spans_list[2].name, "tab") + self.assertEqual(spans_list[3].name, "query") + self.assertEqual(spans_list[4].name, "query") + self.assertEqual(spans_list[5].name, "query") def test_span_succeeded_with_capture_of_statement_parameters(self): connection_props = { diff --git a/instrumentation/opentelemetry-instrumentation-psycopg2/src/opentelemetry/instrumentation/psycopg2/__init__.py b/instrumentation/opentelemetry-instrumentation-psycopg2/src/opentelemetry/instrumentation/psycopg2/__init__.py index ebad55cf2a..de2e49f4c3 100644 --- a/instrumentation/opentelemetry-instrumentation-psycopg2/src/opentelemetry/instrumentation/psycopg2/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-psycopg2/src/opentelemetry/instrumentation/psycopg2/__init__.py @@ -216,7 +216,8 @@ def get_operation_name(self, cursor, args): statement = statement.as_string(cursor) if isinstance(statement, str): - return statement.split()[0] + # Strip leading comments so we get the operation name. + return self._leading_comment_remover.sub("", statement).split()[0] return "" diff --git a/instrumentation/opentelemetry-instrumentation-psycopg2/tests/test_psycopg2_integration.py b/instrumentation/opentelemetry-instrumentation-psycopg2/tests/test_psycopg2_integration.py index f516a07882..5eff8b444f 100644 --- a/instrumentation/opentelemetry-instrumentation-psycopg2/tests/test_psycopg2_integration.py +++ b/instrumentation/opentelemetry-instrumentation-psycopg2/tests/test_psycopg2_integration.py @@ -116,6 +116,32 @@ def test_instrumentor(self): spans_list = self.memory_exporter.get_finished_spans() self.assertEqual(len(spans_list), 1) + def test_span_name(self): + Psycopg2Instrumentor().instrument() + + cnx = psycopg2.connect(database="test") + + cursor = cnx.cursor() + + cursor.execute("Test query", ("param1Value", False)) + cursor.execute( + """multi + line + query""" + ) + cursor.execute("tab\tseparated query") + cursor.execute("/* leading comment */ query") + cursor.execute("/* leading comment */ query /* trailing comment */") + cursor.execute("query /* trailing comment */") + spans_list = self.memory_exporter.get_finished_spans() + self.assertEqual(len(spans_list), 6) + self.assertEqual(spans_list[0].name, "Test") + self.assertEqual(spans_list[1].name, "multi") + self.assertEqual(spans_list[2].name, "tab") + self.assertEqual(spans_list[3].name, "query") + self.assertEqual(spans_list[4].name, "query") + self.assertEqual(spans_list[5].name, "query") + # pylint: disable=unused-argument def test_not_recording(self): mock_tracer = mock.Mock() diff --git a/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py b/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py index 73941200c2..2965f45085 100644 --- a/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py +++ b/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import os +import re from sqlalchemy.event import listen # pylint: disable=no-name-in-module @@ -100,6 +101,7 @@ def __init__( self.vendor = _normalize_vendor(engine.name) self.enable_commenter = enable_commenter self.commenter_options = commenter_options if commenter_options else {} + self._leading_comment_remover = re.compile(r"^/\*.*?\*/") listen( engine, "before_cursor_execute", self._before_cur_exec, retval=True @@ -115,7 +117,10 @@ def _operation_name(self, db_name, statement): # use cases and uses the SQL statement in span name correctly as per the spec. # For some very special cases it might not record the correct statement if the SQL # dialect is too weird but in any case it shouldn't break anything. - parts.append(statement.split()[0]) + # Strip leading comments so we get the operation name. + parts.append( + self._leading_comment_remover.sub("", statement).split()[0] + ) if db_name: parts.append(db_name) if not parts: diff --git a/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py b/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py index 5fa74376f9..913b7d3c5e 100644 --- a/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py +++ b/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py @@ -42,15 +42,27 @@ def test_trace_integration(self): ) cnx = engine.connect() cnx.execute("SELECT 1 + 1;").fetchall() + cnx.execute("/* leading comment */ SELECT 1 + 1;").fetchall() + cnx.execute( + "/* leading comment */ SELECT 1 + 1; /* trailing comment */" + ).fetchall() + cnx.execute("SELECT 1 + 1; /* trailing comment */").fetchall() spans = self.memory_exporter.get_finished_spans() - self.assertEqual(len(spans), 2) + self.assertEqual(len(spans), 5) # first span - the connection to the db self.assertEqual(spans[0].name, "connect") self.assertEqual(spans[0].kind, trace.SpanKind.CLIENT) # second span - the query itself self.assertEqual(spans[1].name, "SELECT :memory:") self.assertEqual(spans[1].kind, trace.SpanKind.CLIENT) + # spans for queries with comments + self.assertEqual(spans[2].name, "SELECT :memory:") + self.assertEqual(spans[2].kind, trace.SpanKind.CLIENT) + self.assertEqual(spans[3].name, "SELECT :memory:") + self.assertEqual(spans[3].kind, trace.SpanKind.CLIENT) + self.assertEqual(spans[4].name, "SELECT :memory:") + self.assertEqual(spans[4].kind, trace.SpanKind.CLIENT) def test_instrument_two_engines(self): engine_1 = create_engine("sqlite:///:memory:") diff --git a/tests/opentelemetry-docker-tests/tests/asyncpg/test_asyncpg_functional.py b/tests/opentelemetry-docker-tests/tests/asyncpg/test_asyncpg_functional.py index 8af57aa658..259c99dff4 100644 --- a/tests/opentelemetry-docker-tests/tests/asyncpg/test_asyncpg_functional.py +++ b/tests/opentelemetry-docker-tests/tests/asyncpg/test_asyncpg_functional.py @@ -77,6 +77,35 @@ def test_instrumented_fetch_method_without_arguments(self, *_, **__): spans[0].attributes[SpanAttributes.DB_STATEMENT], "SELECT 42;" ) + def test_instrumented_remove_comments(self, *_, **__): + async_call(self._connection.fetch("/* leading comment */ SELECT 42;")) + async_call( + self._connection.fetch( + "/* leading comment */ SELECT 42; /* trailing comment */" + ) + ) + async_call(self._connection.fetch("SELECT 42; /* trailing comment */")) + spans = self.memory_exporter.get_finished_spans() + self.assertEqual(len(spans), 3) + self.check_span(spans[0]) + self.assertEqual(spans[0].name, "SELECT") + self.assertEqual( + spans[0].attributes[SpanAttributes.DB_STATEMENT], + "/* leading comment */ SELECT 42;", + ) + self.check_span(spans[1]) + self.assertEqual(spans[1].name, "SELECT") + self.assertEqual( + spans[1].attributes[SpanAttributes.DB_STATEMENT], + "/* leading comment */ SELECT 42; /* trailing comment */", + ) + self.check_span(spans[2]) + self.assertEqual(spans[2].name, "SELECT") + self.assertEqual( + spans[2].attributes[SpanAttributes.DB_STATEMENT], + "SELECT 42; /* trailing comment */", + ) + def test_instrumented_transaction_method(self, *_, **__): async def _transaction_execute(): async with self._connection.transaction():