-
Notifications
You must be signed in to change notification settings - Fork 104
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Materialized views and aggregated tables for event monitoring (#4478)
* WIP event monitoring * Add FxA custom events to view definition (#4483) * Add FxA custom events to view definition * Update sql_generators/event_monitoring/templates/event_monitoring_live.init.sql * Update sql_generators/event_monitoring/templates/event_monitoring_live.init.sql * Update sql_generators/event_monitoring/templates/event_monitoring_live.init.sql * Update sql_generators/event_monitoring/templates/event_monitoring_live.init.sql --------- Co-authored-by: Anna Scholtz <anna@scholtzan.net> * Move event monitoring to glean_usage generator * Add cross-app event monitoring view * Generate cross app monitoring * Simplyfy event monitoring aggregation --------- Co-authored-by: akkomar <akkomar@users.noreply.github.com>
- Loading branch information
1 parent
54dada9
commit 5712712
Showing
16 changed files
with
578 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
"""Generate Materialized Views and aggregate queries for event monitoring.""" | ||
|
||
import os | ||
from collections import namedtuple | ||
from datetime import datetime | ||
from pathlib import Path | ||
|
||
from bigquery_etl.schema.stable_table_schema import get_stable_table_schemas | ||
from sql_generators.glean_usage.common import ( | ||
GleanTable, | ||
get_app_info, | ||
get_table_dir, | ||
render, | ||
table_names_from_baseline, | ||
write_sql, | ||
) | ||
|
||
TARGET_TABLE_ID = "event_monitoring_live_v1" | ||
TARGET_DATASET_CROSS_APP = "monitoring" | ||
PREFIX = "event_monitoring" | ||
PATH = Path(os.path.dirname(__file__)) | ||
|
||
|
||
class EventMonitoringLive(GleanTable): | ||
"""Represents the generated materialized view for event monitoring.""" | ||
|
||
def __init__(self) -> None: | ||
"""Initialize materialized view generation.""" | ||
self.no_init = False | ||
self.per_app_id_enabled = True | ||
self.per_app_enabled = False | ||
self.across_apps_enabled = True | ||
self.prefix = PREFIX | ||
self.target_table_id = TARGET_TABLE_ID | ||
self.custom_render_kwargs = {} | ||
|
||
def generate_per_app_id( | ||
self, project_id, baseline_table, output_dir=None, use_cloud_function=True | ||
): | ||
tables = table_names_from_baseline(baseline_table, include_project_id=False) | ||
|
||
init_filename = f"{self.target_table_id}.init.sql" | ||
metadata_filename = f"{self.target_table_id}.metadata.yaml" | ||
|
||
table = tables[f"{self.prefix}"] | ||
dataset = tables[self.prefix].split(".")[-2].replace("_derived", "") | ||
|
||
render_kwargs = dict( | ||
header="-- Generated via bigquery_etl.glean_usage\n", | ||
header_yaml="---\n# Generated via bigquery_etl.glean_usage\n", | ||
project_id=project_id, | ||
derived_dataset=tables[self.prefix].split(".")[-2], | ||
dataset=dataset, | ||
current_date=datetime.today().strftime("%Y-%m-%d"), | ||
app_name=[ | ||
app_dataset["canonical_app_name"] | ||
for _, app in get_app_info().items() | ||
for app_dataset in app | ||
if dataset == app_dataset["bq_dataset_family"] | ||
][0], | ||
) | ||
|
||
render_kwargs.update(self.custom_render_kwargs) | ||
render_kwargs.update(tables) | ||
|
||
# generated files to update | ||
Artifact = namedtuple("Artifact", "table_id basename sql") | ||
artifacts = [] | ||
|
||
if not self.no_init: | ||
init_sql = render( | ||
init_filename, template_folder=PATH / "templates", **render_kwargs | ||
) | ||
metadata = render( | ||
metadata_filename, | ||
template_folder=PATH / "templates", | ||
format=False, | ||
**render_kwargs, | ||
) | ||
artifacts.append(Artifact(table, "metadata.yaml", metadata)) | ||
|
||
skip_existing_artifact = self.skip_existing(output_dir, project_id) | ||
|
||
if output_dir: | ||
if not self.no_init: | ||
artifacts.append(Artifact(table, "init.sql", init_sql)) | ||
|
||
for artifact in artifacts: | ||
destination = ( | ||
get_table_dir(output_dir, artifact.table_id) / artifact.basename | ||
) | ||
skip_existing = str(destination) in skip_existing_artifact | ||
|
||
write_sql( | ||
output_dir, | ||
artifact.table_id, | ||
artifact.basename, | ||
artifact.sql, | ||
skip_existing=skip_existing, | ||
) | ||
|
||
def generate_across_apps( | ||
self, project_id, apps, output_dir=None, use_cloud_function=True | ||
): | ||
"""Generate a query across all apps.""" | ||
if not self.across_apps_enabled: | ||
return | ||
|
||
prod_datasets_with_baseline = [ | ||
s.bq_dataset_family | ||
for s in get_stable_table_schemas() | ||
if s.schema_id == "moz://mozilla.org/schemas/glean/ping/1" | ||
and s.bq_table == "baseline_v1" | ||
] | ||
|
||
aggregate_table = "event_monitoring_aggregates_v1" | ||
target_view_name = "_".join(self.target_table_id.split("_")[:-1]) | ||
|
||
render_kwargs = dict( | ||
header="-- Generated via bigquery_etl.glean_usage\n", | ||
header_yaml="---\n# Generated via bigquery_etl.glean_usage\n", | ||
project_id=project_id, | ||
target_view=f"{TARGET_DATASET_CROSS_APP}.{target_view_name}", | ||
table=target_view_name, | ||
target_table=f"{TARGET_DATASET_CROSS_APP}_derived.{aggregate_table}", | ||
apps=apps, | ||
prod_datasets=prod_datasets_with_baseline, | ||
) | ||
render_kwargs.update(self.custom_render_kwargs) | ||
|
||
skip_existing_artifacts = self.skip_existing(output_dir, project_id) | ||
|
||
Artifact = namedtuple("Artifact", "table_id basename sql") | ||
|
||
query_filename = f"{aggregate_table}.query.sql" | ||
query_sql = render( | ||
query_filename, template_folder=PATH / "templates", **render_kwargs | ||
) | ||
metadata = render( | ||
f"{aggregate_table}.metadata.yaml", | ||
template_folder=PATH / "templates", | ||
format=False, | ||
**render_kwargs, | ||
) | ||
table = f"{project_id}.{TARGET_DATASET_CROSS_APP}_derived.{aggregate_table}" | ||
|
||
view_sql = render( | ||
"event_monitoring_live.view.sql", | ||
template_folder=PATH / "templates", | ||
**render_kwargs, | ||
) | ||
view_metadata = render( | ||
"event_monitoring_live.metadata.yaml", | ||
template_folder=PATH / "templates", | ||
format=False, | ||
**render_kwargs, | ||
) | ||
|
||
view = f"{project_id}.{TARGET_DATASET_CROSS_APP}.{target_view_name}" | ||
if output_dir: | ||
artifacts = [ | ||
Artifact(table, "metadata.yaml", metadata), | ||
Artifact(table, "query.sql", query_sql), | ||
Artifact(view, "metadata.yaml", view_metadata), | ||
Artifact(view, "view.sql", view_sql), | ||
] | ||
|
||
for artifact in artifacts: | ||
destination = ( | ||
get_table_dir(output_dir, artifact.table_id) / artifact.basename | ||
) | ||
skip_existing = destination in skip_existing_artifacts | ||
|
||
write_sql( | ||
output_dir, | ||
artifact.table_id, | ||
artifact.basename, | ||
artifact.sql, | ||
skip_existing=skip_existing, | ||
) |
26 changes: 26 additions & 0 deletions
26
sql_generators/glean_usage/templates/cross_channel_event_monitoring.view.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
-- Generated via ./bqetl generate glean_usage | ||
CREATE OR REPLACE VIEW | ||
`{{ project_id }}.{{ target_view }}` | ||
AS | ||
{% for (dataset, channel) in datasets -%} | ||
{% if not loop.first -%} | ||
UNION ALL | ||
{% endif -%} | ||
SELECT | ||
{% if app_name == "fenix" -%} | ||
mozfun.norm.fenix_app_info("{{ dataset }}", client_info.app_build).channel AS normalized_channel, | ||
{% elif datasets|length > 1 -%} | ||
"{{ channel }}" AS normalized_channel, | ||
{% endif -%} | ||
normalized_app_name, | ||
window_start, | ||
window_end, | ||
event_category, | ||
event_name, | ||
event_extra_key, | ||
country, | ||
version, | ||
total_events | ||
FROM | ||
`{{ project_id }}.{{ dataset }}_derived.event_monitoring_live_v1` | ||
{% endfor %} |
21 changes: 21 additions & 0 deletions
21
sql_generators/glean_usage/templates/event_monitoring_aggregates_v1.metadata.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
friendly_name: Event Monitoring Aggregates | ||
description: |- | ||
Materialized view of experimentation related events | ||
coming from all Glean apps. | ||
owners: | ||
- ascholtz@mozilla.com | ||
- akomar@mozilla.com | ||
labels: | ||
incremental: true | ||
scheduling: | ||
dag_name: bqetl_monitoring | ||
bigquery: | ||
time_partitioning: | ||
type: day | ||
field: submission_date | ||
require_partitions_filter: false | ||
clustering: | ||
fields: | ||
- event_name | ||
- normalized_channel | ||
- normalized_app_name |
Oops, something went wrong.