diff --git a/.ddev/ci/scripts/sqlserver/windows/41_install_native_client.bat b/.ddev/ci/scripts/sqlserver/windows/41_install_native_client.bat deleted file mode 100644 index 0fbd0e0b47cdd..0000000000000 --- a/.ddev/ci/scripts/sqlserver/windows/41_install_native_client.bat +++ /dev/null @@ -1,3 +0,0 @@ -powershell -Command "Invoke-WebRequest https://download.microsoft.com/download/F/3/C/F3C64941-22A0-47E9-BC9B-1A19B4CA3E88/ENU/x64/sqlncli.msi -OutFile sqlncli.msi" -msiexec /quiet /passive /qn /i sqlncli.msi IACCEPTSQLNCLILICENSETERMS=YES -del sqlncli.msi diff --git a/sqlserver/assets/configuration/spec.yaml b/sqlserver/assets/configuration/spec.yaml index c5d7643bd8815..a78b914ccbfc4 100644 --- a/sqlserver/assets/configuration/spec.yaml +++ b/sqlserver/assets/configuration/spec.yaml @@ -760,6 +760,29 @@ files: value: type: number example: 10 + - name: deadlocks_collection + hidden: True + description: | + Configure the collection of deadlock data. The feature is supported for odbc connector only. + options: + - name: enabled + description: | + Enable the collection of deadlock data. Requires `dbm: true`. Disabled by default. + value: + type: boolean + example: false + - name: collection_interval + description: | + Set the interval for collecting deadlock data, in seconds. Defaults to 600 seconds. + value: + type: number + example: 600 + - name: max_deadlocks + description: | + Set the maximum number of deadlocks to retrieve per collection. + value: + type: number + example: 100 - template: instances/default - template: logs example: diff --git a/sqlserver/changelog.d/18108.added b/sqlserver/changelog.d/18108.added new file mode 100644 index 0000000000000..a75d5681d83d1 --- /dev/null +++ b/sqlserver/changelog.d/18108.added @@ -0,0 +1 @@ +Added deadlock collection feature to the SQL Server integration. diff --git a/sqlserver/datadog_checks/sqlserver/config.py b/sqlserver/datadog_checks/sqlserver/config.py index a26440949a82d..166299b3c2bb6 100644 --- a/sqlserver/datadog_checks/sqlserver/config.py +++ b/sqlserver/datadog_checks/sqlserver/config.py @@ -50,6 +50,7 @@ def __init__(self, init_config, instance, log): self.settings_config: dict = instance.get('collect_settings', {}) or {} self.activity_config: dict = instance.get('query_activity', {}) or {} self.schema_config: dict = instance.get('schemas_collection', {}) or {} + self.deadlocks_config: dict = instance.get('deadlocks_collection', {}) or {} self.cloud_metadata: dict = {} aws: dict = instance.get('aws', {}) or {} gcp: dict = instance.get('gcp', {}) or {} diff --git a/sqlserver/datadog_checks/sqlserver/config_models/instance.py b/sqlserver/datadog_checks/sqlserver/config_models/instance.py index d37ad477a430f..ef0200cb763a7 100644 --- a/sqlserver/datadog_checks/sqlserver/config_models/instance.py +++ b/sqlserver/datadog_checks/sqlserver/config_models/instance.py @@ -68,6 +68,16 @@ class CustomQuery(BaseModel): tags: Optional[tuple[str, ...]] = None +class DeadlocksCollection(BaseModel): + model_config = ConfigDict( + arbitrary_types_allowed=True, + frozen=True, + ) + collection_interval: Optional[float] = None + enabled: Optional[bool] = None + max_deadlocks: Optional[float] = None + + class Gcp(BaseModel): model_config = ConfigDict( arbitrary_types_allowed=True, @@ -185,6 +195,7 @@ class InstanceConfig(BaseModel): database_instance_collection_interval: Optional[float] = None db_fragmentation_object_names: Optional[tuple[str, ...]] = None dbm: Optional[bool] = None + deadlocks_collection: Optional[DeadlocksCollection] = None disable_generic_tags: Optional[bool] = None driver: Optional[str] = None dsn: Optional[str] = None diff --git a/sqlserver/datadog_checks/sqlserver/deadlocks.py b/sqlserver/datadog_checks/sqlserver/deadlocks.py new file mode 100644 index 0000000000000..b623760188330 --- /dev/null +++ b/sqlserver/datadog_checks/sqlserver/deadlocks.py @@ -0,0 +1,193 @@ +# (C) Datadog, Inc. 2024-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +import xml.etree.ElementTree as ET +from time import time + +from datadog_checks.base.utils.db.sql import compute_sql_signature +from datadog_checks.base.utils.db.utils import DBMAsyncJob, default_json_event_encoding, obfuscate_sql_with_metadata +from datadog_checks.base.utils.serialization import json +from datadog_checks.base.utils.tracking import tracked_method +from datadog_checks.sqlserver.config import SQLServerConfig +from datadog_checks.sqlserver.const import STATIC_INFO_ENGINE_EDITION, STATIC_INFO_VERSION +from datadog_checks.sqlserver.queries import DEADLOCK_QUERY, DEADLOCK_TIMESTAMP_ALIAS, DEADLOCK_XML_ALIAS + +try: + import datadog_agent +except ImportError: + from ..stubs import datadog_agent + +DEFAULT_COLLECTION_INTERVAL = 600 +MAX_DEADLOCKS = 100 +MAX_PAYLOAD_BYTES = 19e6 + +PAYLOAD_TIMESTAMP = "deadlock_timestamp" +PAYLOAD_QUERY_SIGNATURE = "query_signatures" +PAYLOAD_XML = "xml" + + +def agent_check_getter(self): + return self._check + + +class Deadlocks(DBMAsyncJob): + def __init__(self, check, config: SQLServerConfig): + self.tags = [t for t in check.tags if not t.startswith('dd.internal')] + self._check = check + self._log = self._check.log + self._config = config + self._last_deadlock_timestamp = time() + self._max_deadlocks = config.deadlocks_config.get("max_deadlocks", MAX_DEADLOCKS) + self._deadlock_payload_max_bytes = MAX_PAYLOAD_BYTES + self.collection_interval = config.deadlocks_config.get("collection_interval", DEFAULT_COLLECTION_INTERVAL) + super(Deadlocks, self).__init__( + check, + run_sync=True, + enabled=self._config.deadlocks_config.get('enabled', False), + expected_db_exceptions=(), + min_collection_interval=self._config.min_collection_interval, + dbms="sqlserver", + rate_limit=1 / float(self.collection_interval), + job_name="deadlocks", + shutdown_callback=self._close_db_conn, + ) + self._conn_key_prefix = "dbm-deadlocks-" + + def _close_db_conn(self): + pass + + def obfuscate_no_except_wrapper(self, sql_text): + try: + sql_text = obfuscate_sql_with_metadata( + sql_text, self._config.obfuscator_options, replace_null_character=True + )['query'] + except Exception as e: + sql_text = "ERROR: failed to obfuscate" + error_text = "Failed to obfuscate sql text within a deadlock" + if self._config.log_unobfuscated_queries: + error_text += "=[%s]" % sql_text + error_text += " | err=[%s]" + self._log.error(error_text, e) + return sql_text + + def _obfuscate_xml(self, root): + process_list = root.find(".//process-list") + if process_list is None: + raise Exception("process-list element not found. The deadlock XML is in an unexpected format.") + query_signatures = [] + for process in process_list.findall('process'): + for inputbuf in process.findall('.//inputbuf'): + if inputbuf.text is not None: + inputbuf.text = self.obfuscate_no_except_wrapper(inputbuf.text) + spid = process.get('spid') + if spid is not None: + try: + spid = int(spid) + except ValueError: + self._log.error("spid not an integer. Skipping query signature computation.") + continue + if spid in query_signatures: + continue + query_signatures.append({"spid": spid, "signature": compute_sql_signature(inputbuf.text)}) + else: + self._log.error("spid not found in process element. Skipping query signature computation.") + for frame in process.findall('.//frame'): + if frame.text is not None: + frame.text = self.obfuscate_no_except_wrapper(frame.text) + return query_signatures + + def _get_lookback_seconds(self): + return min(-60, self._last_deadlock_timestamp - time()) + + def _query_deadlocks(self): + with self._check.connection.open_managed_default_connection(key_prefix=self._conn_key_prefix): + with self._check.connection.get_managed_cursor(key_prefix=self._conn_key_prefix) as cursor: + self._log.debug("collecting sql server deadlocks") + self._log.debug( + "Running query [%s] with max deadlocks %s and timestamp %s", + DEADLOCK_QUERY, + self._max_deadlocks, + self._last_deadlock_timestamp, + ) + try: + cursor.execute(DEADLOCK_QUERY, (self._max_deadlocks, self._get_lookback_seconds())) + except Exception as e: + if "Data column of Unknown ADO type" in str(e): + raise Exception(f"{str(e)} | cursor.description: {cursor.description}") + raise e + + columns = [column[0] for column in cursor.description] + return [dict(zip(columns, row)) for row in cursor.fetchall()] + + def _create_deadlock_rows(self): + db_rows = self._query_deadlocks() + deadlock_events = [] + total_number_of_characters = 0 + for i, row in enumerate(db_rows): + try: + root = ET.fromstring(row[DEADLOCK_XML_ALIAS]) + except Exception as e: + self._log.error( + """An error occurred while collecting SQLServer deadlocks. + One of the deadlock XMLs couldn't be parsed. The error: {}. XML: {}""".format( + e, row + ) + ) + continue + query_signatures = {} + try: + query_signatures = self._obfuscate_xml(root) + except Exception as e: + error = "An error occurred while obfuscating SQLServer deadlocks. The error: {}".format(e) + self._log.error(error) + continue + + total_number_of_characters += len(row) + len(query_signatures) + if total_number_of_characters > self._deadlock_payload_max_bytes: + self._log.warning( + """We've dropped {} deadlocks from a total of {} deadlocks as the + max deadlock payload of {} bytes was exceeded.""".format( + len(db_rows) - i, len(db_rows), self._deadlock_payload_max_bytes + ) + ) + break + + deadlock_events.append( + { + PAYLOAD_TIMESTAMP: row[DEADLOCK_TIMESTAMP_ALIAS], + PAYLOAD_XML: ET.tostring(root, encoding='unicode'), + PAYLOAD_QUERY_SIGNATURE: query_signatures, + } + ) + self._last_deadlock_timestamp = time() + return deadlock_events + + @tracked_method(agent_check_getter=agent_check_getter) + def collect_deadlocks(self): + rows = self._create_deadlock_rows() + # Send payload only if deadlocks found + if rows: + deadlocks_event = self._create_deadlock_event(rows) + payload = json.dumps(deadlocks_event, default=default_json_event_encoding) + self._log.debug("Deadlocks payload: %s", str(payload)) + self._check.database_monitoring_query_activity(payload) + + def _create_deadlock_event(self, deadlock_rows): + event = { + "host": self._check.resolved_hostname, + "ddagentversion": datadog_agent.get_version(), + "ddsource": "sqlserver", + "dbm_type": "deadlocks", + "collection_interval": self.collection_interval, + "ddtags": self.tags, + "timestamp": time() * 1000, + 'sqlserver_version': self._check.static_info_cache.get(STATIC_INFO_VERSION, ""), + 'sqlserver_engine_edition': self._check.static_info_cache.get(STATIC_INFO_ENGINE_EDITION, ""), + "cloud_metadata": self._config.cloud_metadata, + "sqlserver_deadlocks": deadlock_rows, + } + return event + + def run_job(self): + self.collect_deadlocks() diff --git a/sqlserver/datadog_checks/sqlserver/queries.py b/sqlserver/datadog_checks/sqlserver/queries.py index 230b5152f7867..48b27d812e87f 100644 --- a/sqlserver/datadog_checks/sqlserver/queries.py +++ b/sqlserver/datadog_checks/sqlserver/queries.py @@ -214,6 +214,23 @@ FK.name, FK.parent_object_id, FK.referenced_object_id; """ +DEADLOCK_TIMESTAMP_ALIAS = "timestamp" +DEADLOCK_XML_ALIAS = "event_xml" +DEADLOCK_QUERY = """ +SELECT TOP(?) xdr.value('@timestamp', 'datetime') AS [{timestamp}], + xdr.query('.') AS [{xml}] +FROM (SELECT CAST([target_data] AS XML) AS Target_Data + FROM sys.dm_xe_session_targets AS xt + INNER JOIN sys.dm_xe_sessions AS xs ON xs.address = xt.event_session_address + WHERE xs.name = N'system_health' + AND xt.target_name = N'ring_buffer' + ) AS XML_Data +CROSS APPLY Target_Data.nodes('RingBufferTarget/event[@name="xml_deadlock_report"]') AS XEventData(xdr) +WHERE xdr.value('@timestamp', 'datetime') >= DATEADD(SECOND, ?, GETDATE()) +;""".format( + **{"timestamp": DEADLOCK_TIMESTAMP_ALIAS, "xml": DEADLOCK_XML_ALIAS} +) + def get_query_ao_availability_groups(sqlserver_major_version): """ diff --git a/sqlserver/datadog_checks/sqlserver/schemas.py b/sqlserver/datadog_checks/sqlserver/schemas.py index 8d645796ef615..b40dd59415fa8 100644 --- a/sqlserver/datadog_checks/sqlserver/schemas.py +++ b/sqlserver/datadog_checks/sqlserver/schemas.py @@ -32,7 +32,6 @@ class SubmitData: - def __init__(self, submit_data_function, base_event, logger): self._submit_to_agent_queue = submit_data_function self._base_event = base_event diff --git a/sqlserver/datadog_checks/sqlserver/sqlserver.py b/sqlserver/datadog_checks/sqlserver/sqlserver.py index c2f9599b308d5..427aab153a4e5 100644 --- a/sqlserver/datadog_checks/sqlserver/sqlserver.py +++ b/sqlserver/datadog_checks/sqlserver/sqlserver.py @@ -1,6 +1,7 @@ # (C) Datadog, Inc. 2018-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) + from __future__ import division import copy @@ -12,7 +13,11 @@ from datadog_checks.base import AgentCheck from datadog_checks.base.config import is_affirmative from datadog_checks.base.utils.db import QueryExecutor, QueryManager -from datadog_checks.base.utils.db.utils import default_json_event_encoding, resolve_db_host, tracked_query +from datadog_checks.base.utils.db.utils import ( + default_json_event_encoding, + resolve_db_host, + tracked_query, +) from datadog_checks.base.utils.serialization import json from datadog_checks.sqlserver.activity import SqlserverActivity from datadog_checks.sqlserver.agent_history import SqlserverAgentHistory @@ -23,6 +28,7 @@ SqlserverDBFragmentationMetrics, SqlserverIndexUsageMetrics, ) +from datadog_checks.sqlserver.deadlocks import Deadlocks from datadog_checks.sqlserver.metadata import SqlserverMetadata from datadog_checks.sqlserver.schemas import Schemas from datadog_checks.sqlserver.statements import SqlserverStatementMetrics @@ -135,6 +141,7 @@ def __init__(self, name, init_config, instances): self.sql_metadata = SqlserverMetadata(self, self._config) self.activity = SqlserverActivity(self, self._config) self.agent_history = SqlserverAgentHistory(self, self._config) + self.deadlocks = Deadlocks(self, self._config) self.static_info_cache = TTLCache( maxsize=100, @@ -171,6 +178,7 @@ def cancel(self): self.activity.cancel() self.sql_metadata.cancel() self._schemas.cancel() + self.deadlocks.cancel() def config_checks(self): if self._config.autodiscovery and self.instance.get("database"): @@ -785,6 +793,7 @@ def check(self, _): self.activity.run_job_loop(self.tags) self.sql_metadata.run_job_loop(self.tags) self._schemas.run_job_loop(self.tags) + self.deadlocks.run_job_loop(self.tags) else: self.log.debug("Skipping check") diff --git a/sqlserver/hatch.toml b/sqlserver/hatch.toml index 843ea38127536..b996a56b88920 100644 --- a/sqlserver/hatch.toml +++ b/sqlserver/hatch.toml @@ -14,7 +14,7 @@ setup = ["single", "ha"] [[envs.default.matrix]] python = ["3.12"] os = ["windows"] -driver = ["SQLOLEDB", "SQLNCLI11", "MSOLEDBSQL", "odbc"] +driver = ["SQLOLEDB", "MSOLEDBSQL", "odbc"] version = ["2019", "2022"] setup = ["single"] diff --git a/sqlserver/tests/compose-ha/sql/aoag_primary.sql b/sqlserver/tests/compose-ha/sql/aoag_primary.sql index bc6574d39e2f6..3a3193f6dd662 100644 --- a/sqlserver/tests/compose-ha/sql/aoag_primary.sql +++ b/sqlserver/tests/compose-ha/sql/aoag_primary.sql @@ -121,6 +121,21 @@ CREATE USER bob FOR LOGIN bob; CREATE USER fred FOR LOGIN fred; GO +-- Create a simple table for deadlocks +CREATE TABLE [datadog_test-1].dbo.deadlocks (a int PRIMARY KEY not null ,b int null); + +INSERT INTO [datadog_test-1].dbo.deadlocks VALUES (1,10),(2,20),(3,30) + +-- Grant permissions to bob and fred to update the deadlocks table +GRANT INSERT ON [datadog_test-1].dbo.deadlocks TO bob; +GRANT UPDATE ON [datadog_test-1].dbo.deadlocks TO bob; +GRANT DELETE ON [datadog_test-1].dbo.deadlocks TO bob; + +GRANT INSERT ON [datadog_test-1].dbo.deadlocks TO fred; +GRANT UPDATE ON [datadog_test-1].dbo.deadlocks TO fred; +GRANT DELETE ON [datadog_test-1].dbo.deadlocks TO fred; +GO + EXEC sp_addrolemember 'db_datareader', 'bob' EXEC sp_addrolemember 'db_datareader', 'fred' EXEC sp_addrolemember 'db_datawriter', 'bob' diff --git a/sqlserver/tests/compose-high-cardinality-windows/setup.sql b/sqlserver/tests/compose-high-cardinality-windows/setup.sql index f33ceff2df42e..4446dbdb52c70 100644 --- a/sqlserver/tests/compose-high-cardinality-windows/setup.sql +++ b/sqlserver/tests/compose-high-cardinality-windows/setup.sql @@ -118,6 +118,21 @@ CREATE USER fred FOR LOGIN fred; -- database so it's copied by default to new databases GO +-- Create a simple table for deadlocks +CREATE TABLE [datadog_test-1].dbo.deadlocks (a int PRIMARY KEY not null ,b int null); + +INSERT INTO [datadog_test-1].dbo.deadlocks VALUES (1,10),(2,20),(3,30) + +-- Grant permissions to bob and fred to update the deadlocks table +GRANT INSERT ON [datadog_test-1].dbo.deadlocks TO bob; +GRANT UPDATE ON [datadog_test-1].dbo.deadlocks TO bob; +GRANT DELETE ON [datadog_test-1].dbo.deadlocks TO bob; + +GRANT INSERT ON [datadog_test-1].dbo.deadlocks TO fred; +GRANT UPDATE ON [datadog_test-1].dbo.deadlocks TO fred; +GRANT DELETE ON [datadog_test-1].dbo.deadlocks TO fred; +GO + EXEC sp_addrolemember 'db_datareader', 'bob' EXEC sp_addrolemember 'db_datareader', 'fred' EXEC sp_addrolemember 'db_datawriter', 'bob' diff --git a/sqlserver/tests/compose-high-cardinality/setup.sql b/sqlserver/tests/compose-high-cardinality/setup.sql index 839fd7c690679..f9d89b4a5a9df 100644 --- a/sqlserver/tests/compose-high-cardinality/setup.sql +++ b/sqlserver/tests/compose-high-cardinality/setup.sql @@ -251,6 +251,21 @@ CREATE CLUSTERED INDEX thingsindex ON [datadog_test-1].dbo.ϑings (name); DECLARE @table_prefix VARCHAR(100) = 'CREATE TABLE [datadog_test-1].dbo.' DECLARE @table_columns VARCHAR(500) = ' (id INT NOT NULL IDENTITY, col1_txt TEXT, col2_txt TEXT, col3_txt TEXT, col4_txt TEXT, col5_txt TEXT, col6_txt TEXT, col7_txt TEXT, col8_txt TEXT, col9_txt TEXT, col10_txt TEXT, col11_float FLOAT, col12_float FLOAT, col13_float FLOAT, col14_int INT, col15_int INT, col16_int INT, col17_date DATE, PRIMARY KEY(id));'; +-- Create a simple table for deadlocks +CREATE TABLE [datadog_test-1].dbo.deadlocks (a int PRIMARY KEY not null ,b int null); + +INSERT INTO [datadog_test-1].dbo.deadlocks VALUES (1,10),(2,20),(3,30) + +-- Grant permissions to bob and fred to update the deadlocks table +GRANT INSERT ON [datadog_test-1].dbo.deadlocks TO bob; +GRANT UPDATE ON [datadog_test-1].dbo.deadlocks TO bob; +GRANT DELETE ON [datadog_test-1].dbo.deadlocks TO bob; + +GRANT INSERT ON [datadog_test-1].dbo.deadlocks TO fred; +GRANT UPDATE ON [datadog_test-1].dbo.deadlocks TO fred; +GRANT DELETE ON [datadog_test-1].dbo.deadlocks TO fred; +GO + -- Create a main table which contains high cardinality data for testing. DECLARE @main_table_query VARCHAR(600) = @table_prefix + 'high_cardinality' + @table_columns; EXEC (@main_table_query); diff --git a/sqlserver/tests/compose-windows/setup.sql b/sqlserver/tests/compose-windows/setup.sql index da285a3535baf..5160b7012f40d 100644 --- a/sqlserver/tests/compose-windows/setup.sql +++ b/sqlserver/tests/compose-windows/setup.sql @@ -120,6 +120,21 @@ CREATE USER fred FOR LOGIN fred; -- database so it's copied by default to new databases GO +-- Create a simple table for deadlocks +CREATE TABLE [datadog_test-1].dbo.deadlocks (a int PRIMARY KEY not null ,b int null); + +INSERT INTO [datadog_test-1].dbo.deadlocks VALUES (1,10),(2,20),(3,30) + +-- Grant permissions to bob and fred to update the deadlocks table +GRANT INSERT ON [datadog_test-1].dbo.deadlocks TO bob; +GRANT UPDATE ON [datadog_test-1].dbo.deadlocks TO bob; +GRANT DELETE ON [datadog_test-1].dbo.deadlocks TO bob; + +GRANT INSERT ON [datadog_test-1].dbo.deadlocks TO fred; +GRANT UPDATE ON [datadog_test-1].dbo.deadlocks TO fred; +GRANT DELETE ON [datadog_test-1].dbo.deadlocks TO fred; +GO + EXEC sp_addrolemember 'db_datareader', 'bob' EXEC sp_addrolemember 'db_datareader', 'fred' EXEC sp_addrolemember 'db_datawriter', 'bob' diff --git a/sqlserver/tests/compose/setup.sql b/sqlserver/tests/compose/setup.sql index c8749702677a9..5cb9b50a42e11 100644 --- a/sqlserver/tests/compose/setup.sql +++ b/sqlserver/tests/compose/setup.sql @@ -104,6 +104,21 @@ CREATE USER fred FOR LOGIN fred; CREATE CLUSTERED INDEX thingsindex ON [datadog_test-1].dbo.ϑings (name); GO +-- Create a simple table for deadlocks +CREATE TABLE [datadog_test-1].dbo.deadlocks (a int PRIMARY KEY not null ,b int null); + +INSERT INTO [datadog_test-1].dbo.deadlocks VALUES (1,10),(2,20),(3,30) + +-- Grant permissions to bob and fred to update the deadlocks table +GRANT INSERT ON [datadog_test-1].dbo.deadlocks TO bob; +GRANT UPDATE ON [datadog_test-1].dbo.deadlocks TO bob; +GRANT DELETE ON [datadog_test-1].dbo.deadlocks TO bob; + +GRANT INSERT ON [datadog_test-1].dbo.deadlocks TO fred; +GRANT UPDATE ON [datadog_test-1].dbo.deadlocks TO fred; +GRANT DELETE ON [datadog_test-1].dbo.deadlocks TO fred; +GO + EXEC sp_addrolemember 'db_datareader', 'bob' EXEC sp_addrolemember 'db_datareader', 'fred' EXEC sp_addrolemember 'db_datawriter', 'bob' diff --git a/sqlserver/tests/deadlocks/sqlserver_deadlock_event.xml b/sqlserver/tests/deadlocks/sqlserver_deadlock_event.xml new file mode 100644 index 0000000000000..0384345f9c6eb --- /dev/null +++ b/sqlserver/tests/deadlocks/sqlserver_deadlock_event.xml @@ -0,0 +1,140 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + unknown + + + unknown + + + + update [datadog_test-1].[dbo].[t] set n=1 where n=1 + + rollback + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + unknown + + + unknown + + + + begin TRANSACTION + + update [datadog_test-1].[dbo].[t] set n=1 where n=1 + + update [datadog_test-1].[dbo].[t] set n=2 where n=2 + + rollback + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/sqlserver/tests/test_deadlocks.py b/sqlserver/tests/test_deadlocks.py new file mode 100644 index 0000000000000..1099a0dfcd1f6 --- /dev/null +++ b/sqlserver/tests/test_deadlocks.py @@ -0,0 +1,328 @@ +# (C) Datadog, Inc. 2024-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +from __future__ import unicode_literals + +import concurrent +import logging +import os +import re +import xml.etree.ElementTree as ET +from copy import copy, deepcopy +from threading import Event + +import pytest +from mock import patch + +from datadog_checks.sqlserver import SQLServer +from datadog_checks.sqlserver.deadlocks import ( + PAYLOAD_QUERY_SIGNATURE, + PAYLOAD_TIMESTAMP, + Deadlocks, +) +from datadog_checks.sqlserver.queries import DEADLOCK_TIMESTAMP_ALIAS, DEADLOCK_XML_ALIAS + +from .common import CHECK_NAME +from .utils import not_windows_ado + +try: + import pyodbc +except ImportError: + pyodbc = None + + +@pytest.fixture +def dbm_instance(instance_docker): + instance_docker['dbm'] = True + # set a very small collection interval so the tests go fast + instance_docker['query_activity'] = { + 'enabled': False, + } + # do not need query_metrics for these tests + instance_docker['query_metrics'] = {'enabled': False} + instance_docker['procedure_metrics'] = {'enabled': False} + instance_docker['collect_settings'] = {'enabled': False} + instance_docker['deadlocks_collection'] = {'enabled': True, 'collection_interval': 0.1} + return copy(instance_docker) + + +def run_check_and_return_deadlock_payloads(dd_run_check, check, aggregator): + dd_run_check(check) + dbm_activity = aggregator.get_event_platform_events("dbm-activity") + matched = [] + for event in dbm_activity: + if "sqlserver_deadlocks" in event: + matched.append(event) + return matched + + +def _get_conn_for_user(instance_docker, user, timeout=1, _autocommit=False): + # Make DB connection + conn_str = 'DRIVER={};Server={};Database=master;UID={};PWD={};TrustServerCertificate=yes;'.format( + instance_docker['driver'], instance_docker['host'], user, "Password12!" + ) + conn = pyodbc.connect(conn_str, timeout=timeout, autocommit=_autocommit) + conn.timeout = timeout + return conn + + +def _run_first_deadlock_query(conn, event1, event2): + exception_text = "" + try: + conn.cursor().execute("BEGIN TRAN foo;") + conn.cursor().execute("UPDATE [datadog_test-1].dbo.deadlocks SET b = b + 10 WHERE a = 1;") + event1.set() + event2.wait() + conn.cursor().execute("UPDATE [datadog_test-1].dbo.deadlocks SET b = b + 100 WHERE a = 2;") + except Exception as e: + # Exception is expected due to a deadlock + exception_text = str(e) + pass + conn.commit() + return exception_text + + +def _run_second_deadlock_query(conn, event1, event2): + exception_text = "" + try: + event1.wait() + conn.cursor().execute("BEGIN TRAN bar;") + conn.cursor().execute("UPDATE [datadog_test-1].dbo.deadlocks SET b = b + 10 WHERE a = 2;") + event2.set() + conn.cursor().execute("UPDATE [datadog_test-1].dbo.deadlocks SET b = b + 20 WHERE a = 1;") + except Exception as e: + # Exception is expected due to a deadlock + exception_text = str(e) + pass + conn.commit() + return exception_text + + +def _create_deadlock(bob_conn, fred_conn): + executor = concurrent.futures.thread.ThreadPoolExecutor(2) + event1 = Event() + event2 = Event() + + futures_first_query = executor.submit(_run_first_deadlock_query, bob_conn, event1, event2) + futures_second_query = executor.submit(_run_second_deadlock_query, fred_conn, event1, event2) + exception_1_text = futures_first_query.result() + exception_2_text = futures_second_query.result() + executor.shutdown() + return "deadlock" in exception_1_text or "deadlock" in exception_2_text + + +# TODO: remove @not_windows_ado when the functionality is supported for MSOLEDBSQL +@not_windows_ado +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +def test_deadlocks(aggregator, dd_run_check, init_config, dbm_instance): + sqlserver_check = SQLServer(CHECK_NAME, {}, [dbm_instance]) + + deadlock_payloads = run_check_and_return_deadlock_payloads(dd_run_check, sqlserver_check, aggregator) + assert not deadlock_payloads, "shouldn't have sent an empty payload" + + created_deadlock = False + # Rarely instead of creating a deadlock one of the transactions time outs + for _ in range(0, 3): + bob_conn = _get_conn_for_user(dbm_instance, 'bob', 3) + fred_conn = _get_conn_for_user(dbm_instance, 'fred', 3) + created_deadlock = _create_deadlock(bob_conn, fred_conn) + bob_conn.close() + fred_conn.close() + if created_deadlock: + break + try: + assert created_deadlock, "Couldn't create a deadlock, exiting" + except AssertionError as e: + raise e + + dbm_instance_no_dbm = deepcopy(dbm_instance) + dbm_instance_no_dbm['dbm'] = False + sqlserver_check_no_dbm = SQLServer(CHECK_NAME, init_config, [dbm_instance_no_dbm]) + deadlock_payloads = run_check_and_return_deadlock_payloads(dd_run_check, sqlserver_check_no_dbm, aggregator) + assert len(deadlock_payloads) == 0, "deadlock should be behind dbm" + + dbm_instance['dbm_enabled'] = True + deadlock_payloads = run_check_and_return_deadlock_payloads(dd_run_check, sqlserver_check, aggregator) + try: + assert len(deadlock_payloads) == 1, "Should have collected one deadlock payload, but collected: {}.".format( + len(deadlock_payloads) + ) + except AssertionError as e: + raise e + deadlocks = deadlock_payloads[0]['sqlserver_deadlocks'] + found = 0 + for d in deadlocks: + assert "ERROR" not in d, "Shouldn't have generated an error" + assert isinstance(d, dict), "sqlserver_deadlocks should be a dictionary" + try: + root = ET.fromstring(d["xml"]) + except ET.ParseError as e: + logging.error("deadlock events: %s", str(deadlocks)) + raise e + process_list = root.find(".//process-list") + for process in process_list.findall('process'): + if process.find('inputbuf').text == "UPDATE [datadog_test-1].dbo.deadlocks SET b = b + 100 WHERE a = 2;": + found += 1 + try: + assert ( + found == 1 + ), "Should have collected the UPDATE statement in deadlock exactly once, but collected: {}.".format(found) + except AssertionError as e: + logging.error("deadlock payload: %s", str(deadlocks)) + raise e + + +DEADLOCKS_PLAN_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "deadlocks") + + +def _load_test_deadlocks_xml(filename): + with open(os.path.join(DEADLOCKS_PLAN_DIR, filename), 'r') as f: + return f.read() + + +@pytest.fixture +def deadlocks_collection_instance(instance_docker): + instance_docker['dbm'] = True + instance_docker['deadlocks_collection'] = { + 'enabled': True, + 'collection_interval': 1.0, + } + instance_docker['min_collection_interval'] = 1 + # do not need other dbm metrics + instance_docker['query_activity'] = {'enabled': False} + instance_docker['query_metrics'] = {'enabled': False} + instance_docker['procedure_metrics'] = {'enabled': False} + instance_docker['collect_settings'] = {'enabled': False} + return copy(instance_docker) + + +def get_deadlock_obj(deadlocks_collection_instance): + check = SQLServer(CHECK_NAME, {}, [deadlocks_collection_instance]) + return check.deadlocks + + +def test__create_deadlock_rows(deadlocks_collection_instance): + deadlocks_obj = get_deadlock_obj(deadlocks_collection_instance) + xml = _load_test_deadlocks_xml("sqlserver_deadlock_event.xml") + with patch.object( + Deadlocks, + '_query_deadlocks', + return_value=[{DEADLOCK_TIMESTAMP_ALIAS: "2024-09-20T12:07:16.647000", DEADLOCK_XML_ALIAS: xml}], + ): + rows = deadlocks_obj._create_deadlock_rows() + assert len(rows) == 1, "Should have created one deadlock row" + row = rows[0] + assert row[PAYLOAD_TIMESTAMP], "Should have a timestamp" + query_signatures = row[PAYLOAD_QUERY_SIGNATURE] + assert len(query_signatures) == 2, "Should have two query signatures" + first_mapping = query_signatures[0] + assert "spid" in first_mapping, "Should have spid in query signatures" + assert isinstance(first_mapping["spid"], int), "spid should be an int" + + +def test_deadlock_xml_bad_format(deadlocks_collection_instance): + test_xml = """ + + + + + + + + + + + + + """ + deadlocks_obj = get_deadlock_obj(deadlocks_collection_instance) + root = ET.fromstring(test_xml) + try: + deadlocks_obj._obfuscate_xml(root) + except Exception as e: + result = str(e) + assert result == "process-list element not found. The deadlock XML is in an unexpected format." + else: + AssertionError("Should have raised an exception for bad XML format") + + +def test_deadlock_calls_obfuscator(deadlocks_collection_instance): + test_xml = """ + + + + + + + + + + + + \nunknown + \nunknown + + \nUPDATE [datadog_test-1].dbo.deadlocks SET b = b + 100 WHERE a = 2; + + + + \nunknown + \nunknown + + \nUPDATE [datadog_test-1].dbo.deadlocks SET b = b + 20 WHERE a = 1; + + + + + + + """ + + expected_xml_string = ( + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + " " + "obfuscated " + "obfuscated " + " " + "obfuscated " + " " + " " + " " + "obfuscated " + "obfuscated " + " " + "obfuscated " + " " + " " + " " + " " + " " + "" + ) + + with patch('datadog_checks.sqlserver.deadlocks.Deadlocks.obfuscate_no_except_wrapper', return_value="obfuscated"): + deadlocks_obj = get_deadlock_obj(deadlocks_collection_instance) + root = ET.fromstring(test_xml) + deadlocks_obj._obfuscate_xml(root) + result_string = ET.tostring(root, encoding='unicode') + result_string = result_string.replace('\t', '').replace('\n', '') + result_string = re.sub(r'\s{2,}', ' ', result_string) + assert expected_xml_string == result_string + + +def test__get_lookback_seconds(deadlocks_collection_instance): + deadlocks_obj = get_deadlock_obj(deadlocks_collection_instance) + deadlocks_obj._last_deadlock_timestamp = 100 + lookback_seconds = deadlocks_obj._get_lookback_seconds() + assert isinstance(lookback_seconds, float), "Should return a float"