From 598a7fb52316df7e4c722c2611d113b18eeb604c Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Wed, 18 Dec 2024 14:49:39 -0700 Subject: [PATCH 1/6] feat(uptime): add initial table migration --- .../0001_uptime_monitor_checks.py | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 snuba/snuba_migrations/uptime_monitor_checks/0001_uptime_monitor_checks.py diff --git a/snuba/snuba_migrations/uptime_monitor_checks/0001_uptime_monitor_checks.py b/snuba/snuba_migrations/uptime_monitor_checks/0001_uptime_monitor_checks.py new file mode 100644 index 00000000000..a227c265fac --- /dev/null +++ b/snuba/snuba_migrations/uptime_monitor_checks/0001_uptime_monitor_checks.py @@ -0,0 +1,96 @@ +from typing import List, Sequence + +from snuba.clickhouse.columns import UUID, Column, DateTime, String, UInt +from snuba.clusters.storage_sets import StorageSetKey +from snuba.migrations import migration, operations, table_engines +from snuba.migrations.columns import MigrationModifiers as Modifiers +from snuba.migrations.operations import AddIndicesData, OperationTarget, SqlOperation + +storage_set = StorageSetKey.UPTIME_MONITOR_CHECKS +table_prefix = "uptime_monitor_checks" +local_table_name = f"{table_prefix}_local" +dist_table_name = f"{table_prefix}_dist" + +## what about all the fancy codecs? do we need those? +columns: List[Column[Modifiers]] = [ + Column("organization_id", UInt(64)), + Column("project_id", UInt(64)), + Column("environment", String(Modifiers(nullable=True, low_cardinality=True))), + Column("uptime_subscription_id", UInt(64)), + Column("uptime_check_id", UUID()), + Column("scheduled_check_time", DateTime()), + Column("timestamp", DateTime()), + Column("_sort_timestamp", DateTime()), + Column("duration", UInt(64)), + Column("region_id", UInt(16, Modifiers(nullable=True))), + Column("check_status", String(Modifiers(low_cardinality=True))), + Column( + "check_status_reason", + String(Modifiers(nullable=True, low_cardinality=True)), + ), + Column("http_status_code", UInt(16)), + Column("trace_id", UUID()), + Column("retention_days", UInt(16)), +] + +indices: Sequence[AddIndicesData] = [ + AddIndicesData( + name="bf_trace_id", + expression="trace_id", + type="bloom_filter", + granularity=1, + ) +] + + +class Migration(migration.ClickhouseNodeMigration): + blocking = False + + def forwards_ops(self) -> Sequence[SqlOperation]: + return [ + operations.CreateTable( + storage_set=storage_set, + table_name=local_table_name, + columns=columns, + engine=table_engines.ReplacingMergeTree( + # do i actually need primary key to be different than sorting key? + primary_key="(organization_id, project_id, _sort_timestamp, uptime_check_id)", + order_by="(organization_id, project_id, uptime_subscription_id, _sort_timestamp, uptime_check_id)", + partition_by="(retention_days, toMonday(_sort_timestamp))", + settings={"index_granularity": "8192"}, + storage_set=storage_set, + ttl="_sort_timestamp + toIntervalDay(retention_days)", + ), + target=OperationTarget.LOCAL, + ), + operations.CreateTable( + storage_set=storage_set, + table_name=dist_table_name, + columns=columns, + engine=table_engines.Distributed( + local_table_name=local_table_name, + sharding_key="cityHash64(reinterpretAsUInt128(trace_id))", + ), + target=OperationTarget.DISTRIBUTED, + ), + operations.AddIndices( + storage_set=storage_set, + table_name=local_table_name, + indices=indices, + target=OperationTarget.LOCAL, + ), + ] + + def backwards_ops(self) -> Sequence[SqlOperation]: + return [ + operations.DropTable( + storage_set=storage_set, + table_name=dist_table_name, + target=OperationTarget.DISTRIBUTED, + ), + operations.DropTable( + storage_set=storage_set, + table_name=local_table_name, + target=OperationTarget.LOCAL, + ), + ] From 82ae43935de9f9ce0049e483b12ed1c3dd35d6ea Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Wed, 18 Dec 2024 15:00:02 -0700 Subject: [PATCH 2/6] remove comment --- .../uptime_monitor_checks/0001_uptime_monitor_checks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/snuba/snuba_migrations/uptime_monitor_checks/0001_uptime_monitor_checks.py b/snuba/snuba_migrations/uptime_monitor_checks/0001_uptime_monitor_checks.py index a227c265fac..6f2ebd58e3f 100644 --- a/snuba/snuba_migrations/uptime_monitor_checks/0001_uptime_monitor_checks.py +++ b/snuba/snuba_migrations/uptime_monitor_checks/0001_uptime_monitor_checks.py @@ -53,7 +53,6 @@ def forwards_ops(self) -> Sequence[SqlOperation]: table_name=local_table_name, columns=columns, engine=table_engines.ReplacingMergeTree( - # do i actually need primary key to be different than sorting key? primary_key="(organization_id, project_id, _sort_timestamp, uptime_check_id)", order_by="(organization_id, project_id, uptime_subscription_id, _sort_timestamp, uptime_check_id)", partition_by="(retention_days, toMonday(_sort_timestamp))", From 753312df831a40b4b61a81a44861834a582fca6a Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Wed, 18 Dec 2024 15:19:04 -0700 Subject: [PATCH 3/6] pr feedback --- .../0001_uptime_monitor_checks.py | 28 ++++--------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/snuba/snuba_migrations/uptime_monitor_checks/0001_uptime_monitor_checks.py b/snuba/snuba_migrations/uptime_monitor_checks/0001_uptime_monitor_checks.py index 6f2ebd58e3f..c74deb2f676 100644 --- a/snuba/snuba_migrations/uptime_monitor_checks/0001_uptime_monitor_checks.py +++ b/snuba/snuba_migrations/uptime_monitor_checks/0001_uptime_monitor_checks.py @@ -4,7 +4,7 @@ from snuba.clusters.storage_sets import StorageSetKey from snuba.migrations import migration, operations, table_engines from snuba.migrations.columns import MigrationModifiers as Modifiers -from snuba.migrations.operations import AddIndicesData, OperationTarget, SqlOperation +from snuba.migrations.operations import OperationTarget, SqlOperation storage_set = StorageSetKey.UPTIME_MONITOR_CHECKS table_prefix = "uptime_monitor_checks" @@ -20,8 +20,7 @@ Column("uptime_check_id", UUID()), Column("scheduled_check_time", DateTime()), Column("timestamp", DateTime()), - Column("_sort_timestamp", DateTime()), - Column("duration", UInt(64)), + Column("duration_ms", UInt(64)), Column("region_id", UInt(16, Modifiers(nullable=True))), Column("check_status", String(Modifiers(low_cardinality=True))), Column( @@ -33,15 +32,6 @@ Column("retention_days", UInt(16)), ] -indices: Sequence[AddIndicesData] = [ - AddIndicesData( - name="bf_trace_id", - expression="trace_id", - type="bloom_filter", - granularity=1, - ) -] - class Migration(migration.ClickhouseNodeMigration): blocking = False @@ -53,12 +43,12 @@ def forwards_ops(self) -> Sequence[SqlOperation]: table_name=local_table_name, columns=columns, engine=table_engines.ReplacingMergeTree( - primary_key="(organization_id, project_id, _sort_timestamp, uptime_check_id)", - order_by="(organization_id, project_id, uptime_subscription_id, _sort_timestamp, uptime_check_id)", - partition_by="(retention_days, toMonday(_sort_timestamp))", + primary_key="(organization_id, project_id, timestamp, uptime_check_id, trace_id)", + order_by="(organization_id, project_id, timestamp, uptime_check_id, trace_id)", + partition_by="(retention_days, toMonday(timestamp))", settings={"index_granularity": "8192"}, storage_set=storage_set, - ttl="_sort_timestamp + toIntervalDay(retention_days)", + ttl="timestamp + toIntervalDay(retention_days)", ), target=OperationTarget.LOCAL, ), @@ -72,12 +62,6 @@ def forwards_ops(self) -> Sequence[SqlOperation]: ), target=OperationTarget.DISTRIBUTED, ), - operations.AddIndices( - storage_set=storage_set, - table_name=local_table_name, - indices=indices, - target=OperationTarget.LOCAL, - ), ] def backwards_ops(self) -> Sequence[SqlOperation]: From 4b47987de6e81c598e433a9fc4399df35a758867 Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Wed, 18 Dec 2024 15:25:47 -0700 Subject: [PATCH 4/6] use eap storage set --- snuba/migrations/groups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snuba/migrations/groups.py b/snuba/migrations/groups.py index 579af8a6987..1c023e23f50 100644 --- a/snuba/migrations/groups.py +++ b/snuba/migrations/groups.py @@ -183,7 +183,7 @@ def __init__( ), MigrationGroup.UPTIME_MONITOR_CHECKS: _MigrationGroup( loader=UptimeMonitorChecksLoader(), - storage_sets_keys={StorageSetKey.UPTIME_MONITOR_CHECKS}, + storage_sets_keys={StorageSetKey.EVENTS_ANALYTICS_PLATFORM}, readiness_state=ReadinessState.LIMITED, ), } From 64923e02313d65aad39c4d9077d6222e1dfb882a Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Wed, 18 Dec 2024 16:57:18 -0700 Subject: [PATCH 5/6] update schema --- scripts/load_uptime_checks.py | 14 ++++++++------ snuba/migrations/groups.py | 2 +- .../0001_uptime_monitor_checks.py | 15 ++++++++------- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/scripts/load_uptime_checks.py b/scripts/load_uptime_checks.py index 8795f0bf53b..07965dd1bdf 100644 --- a/scripts/load_uptime_checks.py +++ b/scripts/load_uptime_checks.py @@ -13,14 +13,14 @@ query = """ INSERT INTO default.uptime_monitor_checks_local ( organization_id, project_id, environment, uptime_subscription_id, uptime_check_id, - scheduled_check_time, timestamp, duration, region_id, check_status, + scheduled_check_time, timestamp, duration_ms, region_slug, check_status, check_status_reason, http_status_code, trace_id, retention_days ) FORMAT JSONEachRow """ total_records = 0 -for project_id in range(1, 2): +for project_id in range(2, 100): project_data = [] for minute in range(24 * 60 * 90): # 24 hours * 60 minutes * 90 days timestamp = base_time + datetime.timedelta(minutes=minute) @@ -36,10 +36,12 @@ "environment": "production", "uptime_subscription_id": random.randint(1, 3) * project_id, "uptime_check_id": str(uuid.uuid4()), - "scheduled_check_time": scheduled_time.strftime("%Y-%m-%d %H:%M:%S"), - "timestamp": timestamp.strftime("%Y-%m-%d %H:%M:%S"), - "duration": random.randint(1, 1000), - "region_id": random.randint(1, 3), + "scheduled_check_time": scheduled_time.strftime("%Y-%m-%d %H:%M:%S.%f")[ + :-3 + ], + "timestamp": timestamp.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3], + "duration_ms": random.randint(1, 1000), + "region_slug": f"region-{random.randint(1, 3)}", "check_status": check_status, "check_status_reason": "Timeout error" if check_status == "failure" diff --git a/snuba/migrations/groups.py b/snuba/migrations/groups.py index 1c023e23f50..579af8a6987 100644 --- a/snuba/migrations/groups.py +++ b/snuba/migrations/groups.py @@ -183,7 +183,7 @@ def __init__( ), MigrationGroup.UPTIME_MONITOR_CHECKS: _MigrationGroup( loader=UptimeMonitorChecksLoader(), - storage_sets_keys={StorageSetKey.EVENTS_ANALYTICS_PLATFORM}, + storage_sets_keys={StorageSetKey.UPTIME_MONITOR_CHECKS}, readiness_state=ReadinessState.LIMITED, ), } diff --git a/snuba/snuba_migrations/uptime_monitor_checks/0001_uptime_monitor_checks.py b/snuba/snuba_migrations/uptime_monitor_checks/0001_uptime_monitor_checks.py index c74deb2f676..22daab79519 100644 --- a/snuba/snuba_migrations/uptime_monitor_checks/0001_uptime_monitor_checks.py +++ b/snuba/snuba_migrations/uptime_monitor_checks/0001_uptime_monitor_checks.py @@ -1,10 +1,11 @@ from typing import List, Sequence -from snuba.clickhouse.columns import UUID, Column, DateTime, String, UInt +from snuba.clickhouse.columns import UUID, Column, String, UInt from snuba.clusters.storage_sets import StorageSetKey from snuba.migrations import migration, operations, table_engines from snuba.migrations.columns import MigrationModifiers as Modifiers from snuba.migrations.operations import OperationTarget, SqlOperation +from snuba.utils.schemas import DateTime64 storage_set = StorageSetKey.UPTIME_MONITOR_CHECKS table_prefix = "uptime_monitor_checks" @@ -18,10 +19,10 @@ Column("environment", String(Modifiers(nullable=True, low_cardinality=True))), Column("uptime_subscription_id", UInt(64)), Column("uptime_check_id", UUID()), - Column("scheduled_check_time", DateTime()), - Column("timestamp", DateTime()), + Column("scheduled_check_time", DateTime64(3)), # millisecond precision + Column("timestamp", DateTime64(3)), # millisecond precision Column("duration_ms", UInt(64)), - Column("region_id", UInt(16, Modifiers(nullable=True))), + Column("region_slug", String(Modifiers(low_cardinality=True))), Column("check_status", String(Modifiers(low_cardinality=True))), Column( "check_status_reason", @@ -43,12 +44,12 @@ def forwards_ops(self) -> Sequence[SqlOperation]: table_name=local_table_name, columns=columns, engine=table_engines.ReplacingMergeTree( - primary_key="(organization_id, project_id, timestamp, uptime_check_id, trace_id)", - order_by="(organization_id, project_id, timestamp, uptime_check_id, trace_id)", + primary_key="(organization_id, project_id, toDateTime(timestamp), uptime_check_id, trace_id)", + order_by="(organization_id, project_id, toDateTime(timestamp), uptime_check_id, trace_id)", partition_by="(retention_days, toMonday(timestamp))", settings={"index_granularity": "8192"}, storage_set=storage_set, - ttl="timestamp + toIntervalDay(retention_days)", + ttl="toDateTime(timestamp) + toIntervalDay(retention_days)", ), target=OperationTarget.LOCAL, ), From 1ecfaa92a39cf7a2f5e43bfa347ac522e12b592c Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Thu, 19 Dec 2024 13:08:27 -0700 Subject: [PATCH 6/6] update subscription id --- .../uptime_monitor_checks/0001_uptime_monitor_checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snuba/snuba_migrations/uptime_monitor_checks/0001_uptime_monitor_checks.py b/snuba/snuba_migrations/uptime_monitor_checks/0001_uptime_monitor_checks.py index 22daab79519..575d62d5756 100644 --- a/snuba/snuba_migrations/uptime_monitor_checks/0001_uptime_monitor_checks.py +++ b/snuba/snuba_migrations/uptime_monitor_checks/0001_uptime_monitor_checks.py @@ -17,7 +17,7 @@ Column("organization_id", UInt(64)), Column("project_id", UInt(64)), Column("environment", String(Modifiers(nullable=True, low_cardinality=True))), - Column("uptime_subscription_id", UInt(64)), + Column("uptime_subscription_id", UUID()), Column("uptime_check_id", UUID()), Column("scheduled_check_time", DateTime64(3)), # millisecond precision Column("timestamp", DateTime64(3)), # millisecond precision