From 5e576df4b57d1f5a483206709170285a654c9a57 Mon Sep 17 00:00:00 2001 From: Gil Teixeira Date: Fri, 6 Oct 2023 17:50:45 +0100 Subject: [PATCH 1/8] Fix sanitization for bulk queries --- .../opentelemetry/instrumentation/elasticsearch/utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py index 97f2bc3b87..9abf60628a 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py @@ -52,9 +52,15 @@ def _unflatten_dict(d): def sanitize_body(body) -> str: + if isinstance(body, bytes): + body = body.decode("utf8") + if isinstance(body, str): body = json.loads(body) + if isinstance(body, list): + return str([sanitize_body(elem) for elem in body]) + flatten_body = _flatten_dict(body) for key in flatten_body: From 06acb8ffd89c9799cb55b6939c0c3ac211ef3bed Mon Sep 17 00:00:00 2001 From: Gil Teixeira Date: Fri, 6 Oct 2023 18:09:55 +0100 Subject: [PATCH 2/8] Update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f672d7652..449ab1b976 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Fix elastic-search instrumentation sanitization to support bulk queries + ([#1990](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1988)) - Fix version of Flask dependency `werkzeug` ([#1980](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1980)) From 90ee6a1de9ad8294bd5a1a5c2ba28c56d07ddaa9 Mon Sep 17 00:00:00 2001 From: Gil Teixeira Date: Mon, 9 Oct 2023 13:27:18 +0100 Subject: [PATCH 3/8] Trigger pipeline From 07b1331d2fdc02f7f74a48daf65673428a4e97d4 Mon Sep 17 00:00:00 2001 From: Gil Teixeira Date: Sat, 3 Feb 2024 18:56:04 +0000 Subject: [PATCH 4/8] Add test to sanitize bulk queries --- .../tests/test_elasticsearch.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py index 0c84cf5cd6..f6f1d94764 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py @@ -486,3 +486,19 @@ def test_body_sanitization(self, _): sanitize_body(json.dumps(sanitization_queries.interval_query)), str(sanitization_queries.interval_query_sanitized), ) + self.assertEqual( + sanitize_body( + [ + json.dumps(sanitization_queries.filter_query).encode("utf-8"), + json.dumps(sanitization_queries.match_query).encode("utf-8"), + json.dumps(sanitization_queries.interval_query).encode("utf-8"), + ] + ), + str( + [ + str(sanitization_queries.filter_query_sanitized), + str(sanitization_queries.match_query_sanitized), + str(sanitization_queries.interval_query_sanitized), + ] + ), + ) From 1e0b1f36bf99bea33d7fb195b104cd60cf823f3a Mon Sep 17 00:00:00 2001 From: Gil Teixeira Date: Thu, 8 Feb 2024 18:31:59 +0000 Subject: [PATCH 5/8] Fix URL Co-authored-by: Srikanth Chekuri --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4769958396..5d05362ac8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `opentelemetry-resource-detector-azure` Changed timeout to 4 seconds due to [timeout bug](https://github.com/open-telemetry/opentelemetry-python/issues/3644) ([#2136](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2136)) - Fix elastic-search instrumentation sanitization to support bulk queries - ([#1990](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1988)) + ([#1990](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1990)) ## Version 1.22.0/0.43b0 (2023-12-14) From e488a3dbe6b27c8ace876149f4af4fb75363dab3 Mon Sep 17 00:00:00 2001 From: Gil Teixeira Date: Thu, 8 Feb 2024 19:43:40 +0000 Subject: [PATCH 6/8] Split multiline bodies --- .../src/opentelemetry/instrumentation/elasticsearch/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py index 9abf60628a..8d91d966e0 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/src/opentelemetry/instrumentation/elasticsearch/utils.py @@ -56,6 +56,10 @@ def sanitize_body(body) -> str: body = body.decode("utf8") if isinstance(body, str): + body_lines = body.strip().split("\n") + if len(body_lines) > 1: + return sanitize_body(body_lines) + body = json.loads(body) if isinstance(body, list): From 8c54c5e7ce395baebfbfc68fc3de6b83076b65a9 Mon Sep 17 00:00:00 2001 From: Gil Teixeira Date: Thu, 8 Feb 2024 19:44:23 +0000 Subject: [PATCH 7/8] Add bulk search test --- .../tests/test_elasticsearch.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py index f6f1d94764..717bc95e3c 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py @@ -502,3 +502,38 @@ def test_body_sanitization(self, _): ] ), ) + + def test_bulk_search(self, request_mock): + + request_mock.return_value = (2, {}, json.dumps({"items": []})) + + client = Elasticsearch() + + data = [ + { + "_index": "words", + "word": "foo", + }, + { + "_index": "words", + "word": "bar", + }, + ] + + elasticsearch.helpers.bulk(client, data) + + spans = self.get_finished_spans() + span = spans[0] + self.assertEqual(1, len(spans)) + self.assertEqual(span.name, "Elasticsearch/_bulk") + self.assertIsNotNone(span.end_time) + expected_bulk_attributes = { + SpanAttributes.DB_SYSTEM: "elasticsearch", + "elasticsearch.url": "/_bulk", + "elasticsearch.method": "POST", + SpanAttributes.DB_STATEMENT: "[\"{'index': {'_index': 'words'}}\", \"{'word': 'foo'}\", \"{'index': {'_index': 'words'}}\", \"{'word': 'bar'}\"]", + } + self.assertEqual( + span.attributes, + expected_bulk_attributes, + ) From f84f8eef2f14af23ede02c7764ca06d293f313d0 Mon Sep 17 00:00:00 2001 From: Gil Teixeira Date: Thu, 8 Feb 2024 19:46:24 +0000 Subject: [PATCH 8/8] Remove newlines --- .../tests/test_elasticsearch.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py index 717bc95e3c..c4d4033017 100644 --- a/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py +++ b/instrumentation/opentelemetry-instrumentation-elasticsearch/tests/test_elasticsearch.py @@ -504,11 +504,8 @@ def test_body_sanitization(self, _): ) def test_bulk_search(self, request_mock): - request_mock.return_value = (2, {}, json.dumps({"items": []})) - client = Elasticsearch() - data = [ { "_index": "words", @@ -519,7 +516,7 @@ def test_bulk_search(self, request_mock): "word": "bar", }, ] - + client = Elasticsearch() elasticsearch.helpers.bulk(client, data) spans = self.get_finished_spans()