Skip to content

Commit

Permalink
docs: manual sql steps for importing metrics to Clickhouse (#2410)
Browse files Browse the repository at this point in the history
  • Loading branch information
ryscheng authored Oct 25, 2024
1 parent 781d336 commit 8afd172
Showing 1 changed file with 142 additions and 0 deletions.
142 changes: 142 additions & 0 deletions manual-work-log.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,145 @@ SELECT * FROM "source"."default"."projects_by_collection_v1_source";
INSERT INTO "metrics"."default"."artifacts_by_project_v1"
SELECT * FROM "source"."default"."artifacts_by_project_v1_source";
```

## Metrics manual import into Clickhouse (Ray)

```sql

CREATE OR REPLACE TABLE timeseries_metrics_to_artifact
(
metrics_sample_date Date,
event_source String,
to_artifact_id String,
from_artifact_id String,
metric String,
amount Float64
)
ENGINE = MergeTree()
ORDER BY metrics_sample_date;

INSERT INTO timeseries_metrics_to_artifact
SELECT *
FROM s3Cluster('default', 'gs://oso-dataset-transfer-bucket/metrics-testing/2024-10-24/exports/timeseries_metrics_to_artifact.parquet');

CREATE OR REPLACE TABLE timeseries_metrics_to_project
(
metrics_sample_date Date,
event_source String,
to_project_id String,
from_artifact_id String,
metric String,
amount Float64
)
ENGINE = MergeTree()
ORDER BY metrics_sample_date;

INSERT INTO timeseries_metrics_to_project
SELECT *
FROM s3Cluster('default', 'gs://oso-dataset-transfer-bucket/metrics-testing/2024-10-24/exports/timeseries_metrics_to_project.parquet');


CREATE OR REPLACE TABLE timeseries_metrics_to_collection
(
metrics_sample_date Date,
event_source String,
to_collection_id String,
from_artifact_id String,
metric String,
amount Float64
)
ENGINE = MergeTree()
ORDER BY metrics_sample_date;

INSERT INTO timeseries_metrics_to_collection
SELECT *
FROM s3Cluster('default', 'gs://oso-dataset-transfer-bucket/metrics-testing/2024-10-24/exports/timeseries_metrics_to_collection.parquet');

```

```sql
CREATE OR REPLACE TABLE metrics_v0
(
metric_id String,
metric_source String,
metric_namespace String,
metric_name String,
display_name String,
description String,
raw_definition String,
definition_ref String,
aggregation_function String,
INDEX idx_metric_id (metric_id) TYPE bloom_filter,
INDEX idx_metric_name (metric_source, metric_namespace, metric_name) TYPE bloom_filter
)
ENGINE = MergeTree()
ORDER BY (metric_source, metric_namespace, metric_name);


INSERT INTO metrics_v0
WITH unioned_metric_names AS (
SELECT DISTINCT metric, event_source
FROM timeseries_metrics_to_artifact
UNION ALL
SELECT DISTINCT metric, event_source
FROM timeseries_metrics_to_project
UNION ALL
SELECT DISTINCT metric, event_source
FROM timeseries_metrics_to_collection
),
all_timeseries_metric_names AS (
SELECT DISTINCT metric, event_source
FROM unioned_metric_names
),
metrics_v0_no_casting AS (
SELECT TO_BASE64(SHA256(CONCAT(event_source, 'OSO', 'oso', metric))) AS metric_id,
'OSO' AS metric_source,
'oso' AS metric_namespace,
metric AS metric_name,
metric AS display_name,
metric AS description,
NULL AS raw_definition,
'TODO' AS definition_ref,
'UNKNOWN' AS aggregation_function
FROM all_timeseries_metric_names
)
select metric_id::String AS metric_id,
metric_source::String AS metric_source,
metric_namespace::String AS metric_namespace,
metric_name::String AS metric_name,
display_name::String AS display_name,
description::Nullable(String),
raw_definition::Nullable(String),
definition_ref::Nullable(String),
aggregation_function::Nullable(String)
FROM metrics_v0_no_casting
```

```sql
CREATE OR REPLACE TABLE timeseries_metrics_by_artifact_v0
(
metric_id String,
artifact_id String,
sample_date Date,
amount Float64,
unit Nullable(String)
)
ENGINE = MergeTree()
ORDER BY (metric_id, artifact_id, sample_date);

INSERT INTO timeseries_metrics_by_artifact_v0
WITH all_timeseries_metrics_by_artifact AS (
SELECT TO_BASE64(SHA256(CONCAT(event_source, 'OSO', 'oso', metric))) AS metric_id,
to_artifact_id AS artifact_id,
metrics_sample_date AS sample_date,
amount AS amount,
NULL AS unit
FROM timeseries_metrics_to_artifact
)
SELECT metric_id::String,
artifact_id::String,
sample_date::Date,
amount::Float64,
unit::Nullable(String)
FROM all_timeseries_metrics_by_artifact
```

0 comments on commit 8afd172

Please sign in to comment.