From f500ba9b7aa00223608c8676375743e84ccb5d4a Mon Sep 17 00:00:00 2001 From: Alexander Date: Wed, 8 Nov 2023 15:13:17 -0500 Subject: [PATCH] DENG-1705 - Add missing client attribution columns to clients daily/first-seen (#4505) * DENG-1705 Add missing client attribution columns to clients daily/firstseen * Update clients_last_seen_joined --- .../clients_daily_joined_v1/schema.yaml | 36 ++++++++++ .../clients_daily_v6/query.sql | 27 +++++++- .../clients_daily_v6/schema.yaml | 36 ++++++++++ .../clients_first_seen_v1/schema.yaml | 36 ++++++++++ .../clients_first_seen_v2/query.sql | 68 +++++++++---------- .../clients_last_seen_joined_v1/schema.yaml | 36 ++++++++++ .../clients_last_seen_v1/schema.yaml | 36 ++++++++++ ...metry_derived.clients_daily_v6.schema.json | 48 +++++++++++++ 8 files changed, 286 insertions(+), 37 deletions(-) diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_joined_v1/schema.yaml b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_joined_v1/schema.yaml index 144bdf63924..e5f29f13ddb 100644 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_joined_v1/schema.yaml +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_joined_v1/schema.yaml @@ -154,6 +154,9 @@ fields: - name: dlsource type: STRING mode: NULLABLE + - name: ua + type: STRING + mode: NULLABLE mode: NULLABLE name: attribution type: RECORD @@ -2339,3 +2342,36 @@ fields: - name: startup_profile_selection_reason_first type: STRING mode: NULLABLE +- name: first_document_id + type: STRING + mode: NULLABLE +- name: partner_id + type: STRING + mode: NULLABLE +- name: distribution_version + type: STRING + mode: NULLABLE +- name: distributor + type: STRING + mode: NULLABLE +- name: distributor_channel + type: STRING + mode: NULLABLE +- name: env_build_platform_version + type: STRING + mode: NULLABLE +- name: env_build_xpcom_abi + type: STRING + mode: NULLABLE +- name: geo_db_version + type: STRING + mode: NULLABLE +- name: apple_model_id + type: STRING + mode: NULLABLE +- name: max_subsession_counter + type: INTEGER + mode: NULLABLE +- name: min_subsession_counter + type: INTEGER + mode: NULLABLE diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/query.sql b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/query.sql index b69f5959864..29de173a71a 100755 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/query.sql +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/query.sql @@ -177,6 +177,7 @@ clients_summary AS ( submission_timestamp, client_id, sample_id, + document_id, metadata.uri.app_update_channel AS channel, normalized_channel, normalized_os_version, @@ -184,6 +185,7 @@ clients_summary AS ( metadata.geo.city, metadata.geo.subdivision1 AS geo_subdivision1, metadata.geo.subdivision2 AS geo_subdivision2, + metadata.geo.db_version AS geo_db_version, metadata.isp.name AS isp_name, metadata.isp.organization AS isp_organization, environment.system.os.name AS os, @@ -194,6 +196,7 @@ clients_summary AS ( SAFE_CAST(environment.system.os.windows_ubr AS INT64) AS windows_ubr, SAFE_CAST(environment.system.os.install_year AS INT64) AS install_year, environment.system.is_wow64, + environment.system.apple_model_id, SAFE_CAST(environment.system.memory_mb AS INT64) AS memory_mb, environment.system.cpu.count AS cpu_count, environment.system.cpu.cores AS cpu_cores, @@ -214,6 +217,10 @@ clients_summary AS ( payload.info.subsession_counter, payload.info.subsession_length, environment.partner.distribution_id, + environment.partner.partner_id, + environment.partner.distribution_version, + environment.partner.distributor, + environment.partner.distributor_channel, IFNULL( environment.services.account_enabled, udf.boolean_histogram_to_boolean(payload.histograms.fxa_configured) @@ -235,6 +242,8 @@ clients_summary AS ( environment.build.build_id AS env_build_id, environment.build.version AS env_build_version, environment.build.architecture AS env_build_arch, + environment.build.platform_version AS env_build_platform_version, + environment.build.xpcom_abi AS env_build_xpcom_abi, environment.settings.e10s_enabled, environment.settings.locale, environment.settings.update.channel AS update_channel, @@ -251,7 +260,8 @@ clients_summary AS ( environment.settings.attribution.experiment, environment.settings.attribution.variation, environment.settings.attribution.dltoken, - environment.settings.attribution.dlsource + environment.settings.attribution.dlsource, + environment.settings.attribution.ua ), NULL ) AS attribution, @@ -686,6 +696,7 @@ aggregates AS ( SELECT DATE(submission_timestamp) AS submission_date, client_id, + ARRAY_AGG(document_id ORDER BY submission_timestamp)[OFFSET(0)] AS first_document_id, SUM(aborts_content) AS aborts_content_sum, SUM(aborts_gmplugin) AS aborts_gmplugin_sum, SUM(aborts_plugin) AS aborts_plugin_sum, @@ -772,10 +783,20 @@ aggregates AS ( mozfun.stats.mode_last( ARRAY_AGG(distribution_id ORDER BY submission_timestamp) ) AS distribution_id, + mozfun.stats.mode_last(ARRAY_AGG(partner_id ORDER BY submission_timestamp)) AS partner_id, + mozfun.stats.mode_last(ARRAY_AGG(distribution_version ORDER BY submission_timestamp)) AS distribution_version, + mozfun.stats.mode_last(ARRAY_AGG(distributor ORDER BY submission_timestamp)) AS distributor, + mozfun.stats.mode_last(ARRAY_AGG(distributor_channel ORDER BY submission_timestamp)) AS distributor_channel, mozfun.stats.mode_last(ARRAY_AGG(e10s_enabled ORDER BY submission_timestamp)) AS e10s_enabled, mozfun.stats.mode_last( ARRAY_AGG(env_build_arch ORDER BY submission_timestamp) ) AS env_build_arch, + mozfun.stats.mode_last( + ARRAY_AGG(env_build_platform_version ORDER BY submission_timestamp) + ) AS env_build_platform_version, + mozfun.stats.mode_last( + ARRAY_AGG(env_build_xpcom_abi ORDER BY submission_timestamp) + ) AS env_build_xpcom_abi, mozfun.stats.mode_last(ARRAY_AGG(env_build_id ORDER BY submission_timestamp)) AS env_build_id, mozfun.stats.mode_last( ARRAY_AGG(env_build_version ORDER BY submission_timestamp) @@ -858,6 +879,7 @@ aggregates AS ( submission_timestamp ) ).*, + mozfun.stats.mode_last(ARRAY_AGG(geo_db_version ORDER BY submission_timestamp)) AS geo_db_version, mozfun.json.mode_last( ARRAY_AGG( IF( @@ -978,6 +1000,7 @@ aggregates AS ( ARRAY_AGG(is_default_browser ORDER BY submission_timestamp) ) AS is_default_browser, mozfun.stats.mode_last(ARRAY_AGG(is_wow64 ORDER BY submission_timestamp)) AS is_wow64, + mozfun.stats.mode_last(ARRAY_AGG(apple_model_id ORDER BY submission_timestamp)) AS apple_model_id, mozfun.stats.mode_last(ARRAY_AGG(locale ORDER BY submission_timestamp)) AS locale, mozfun.stats.mode_last(ARRAY_AGG(memory_mb ORDER BY submission_timestamp)) AS memory_mb, mozfun.stats.mode_last( @@ -1135,6 +1158,8 @@ aggregates AS ( udf.aggregate_search_counts(ARRAY_CONCAT_AGG(search_counts ORDER BY submission_timestamp)).*, AVG(session_restored) AS session_restored_mean, COUNTIF(subsession_counter = 1) AS sessions_started_on_this_day, + MAX(subsession_counter)AS max_subsession_counter, + MIN(subsession_counter)AS min_subsession_counter, SUM(shutdown_kill) AS shutdown_kill_sum, SUM(subsession_length / NUMERIC '3600') AS subsession_hours_sum, SUM(ssl_handshake_result_failure) AS ssl_handshake_result_failure_sum, diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/schema.yaml b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/schema.yaml index 4a3ab071e76..e183735622c 100644 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/schema.yaml +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/schema.yaml @@ -154,6 +154,9 @@ fields: - name: dlsource type: STRING mode: NULLABLE + - name: ua + type: STRING + mode: NULLABLE mode: NULLABLE name: attribution type: RECORD @@ -2303,3 +2306,36 @@ fields: - name: startup_profile_selection_reason_first type: STRING mode: NULLABLE +- name: first_document_id + type: STRING + mode: NULLABLE +- name: partner_id + type: STRING + mode: NULLABLE +- name: distribution_version + type: STRING + mode: NULLABLE +- name: distributor + type: STRING + mode: NULLABLE +- name: distributor_channel + type: STRING + mode: NULLABLE +- name: env_build_platform_version + type: STRING + mode: NULLABLE +- name: env_build_xpcom_abi + type: STRING + mode: NULLABLE +- name: geo_db_version + type: STRING + mode: NULLABLE +- name: apple_model_id + type: STRING + mode: NULLABLE +- name: max_subsession_counter + type: INTEGER + mode: NULLABLE +- name: min_subsession_counter + type: INTEGER + mode: NULLABLE diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_first_seen_v1/schema.yaml b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_first_seen_v1/schema.yaml index 36866f4362f..dbfe2690dfb 100644 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_first_seen_v1/schema.yaml +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_first_seen_v1/schema.yaml @@ -155,6 +155,9 @@ fields: - name: dlsource type: STRING mode: NULLABLE + - name: ua + type: STRING + mode: NULLABLE mode: NULLABLE name: attribution type: RECORD @@ -2305,3 +2308,36 @@ fields: - name: startup_profile_selection_reason_first type: STRING mode: NULLABLE +- name: first_document_id + type: STRING + mode: NULLABLE +- name: partner_id + type: STRING + mode: NULLABLE +- name: distribution_version + type: STRING + mode: NULLABLE +- name: distributor + type: STRING + mode: NULLABLE +- name: distributor_channel + type: STRING + mode: NULLABLE +- name: env_build_platform_version + type: STRING + mode: NULLABLE +- name: env_build_xpcom_abi + type: STRING + mode: NULLABLE +- name: geo_db_version + type: STRING + mode: NULLABLE +- name: apple_model_id + type: STRING + mode: NULLABLE +- name: max_subsession_counter + type: INTEGER + mode: NULLABLE +- name: min_subsession_counter + type: INTEGER + mode: NULLABLE diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_first_seen_v2/query.sql b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_first_seen_v2/query.sql index 6e2092b48c8..6447f196532 100644 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_first_seen_v2/query.sql +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_first_seen_v2/query.sql @@ -298,38 +298,36 @@ main_ping AS ( TIMESTAMP(MIN(submission_date)) ) AS first_seen_timestamp, ARRAY_AGG(DATE(submission_date) ORDER BY submission_date ASC) AS all_dates, - CAST( - NULL AS STRING - ) AS architecture, -- main_v5:environment.build.architecture + ARRAY_AGG(env_build_arch RESPECT NULLS ORDER BY submission_date)[ + SAFE_OFFSET(0) + ] AS architecture, ARRAY_AGG(env_build_id RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS app_build_id, ARRAY_AGG(app_name RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS app_name, ARRAY_AGG(locale RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS locale, - CAST( - NULL AS STRING - ) AS platform_version, -- main_v5:environment.build.platform_version + ARRAY_AGG(env_build_platform_version RESPECT NULLS ORDER BY submission_date)[ + SAFE_OFFSET(0) + ] AS platform_version, ARRAY_AGG(vendor RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS vendor, ARRAY_AGG(app_version RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS app_version, - CAST( - NULL AS STRING - ) AS xpcom_abi, -- main_v5:environment.build.xpcom_abi / application.xpcom_abi - CAST( - NULL AS STRING - ) AS document_id, -- main_v5:document_id + ARRAY_AGG(env_build_xpcom_abi RESPECT NULLS ORDER BY submission_date)[ + SAFE_OFFSET(0) + ] AS xpcom_abi, + ARRAY_AGG(first_document_id RESPECT NULLS ORDER BY submission_date)[ + SAFE_OFFSET(0) + ] AS document_id, ARRAY_AGG(distribution_id RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS distribution_id, - CAST( - NULL AS STRING - ) AS partner_distribution_version, -- main_v5:environment.partner.distribution_version - CAST( - NULL AS STRING - ) AS partner_distributor, -- main_v5:environment.partner.distributor - CAST( - NULL AS STRING - ) AS partner_distributor_channel, -- main_v5:environment.partner.distributor_channel - CAST( - NULL AS STRING - ) AS partner_id, -- main_v5:environment.partner.distribution_id + ARRAY_AGG(distribution_version RESPECT NULLS ORDER BY submission_date)[ + SAFE_OFFSET(0) + ] AS partner_distribution_version, + ARRAY_AGG(distributor RESPECT NULLS ORDER BY submission_date)[ + SAFE_OFFSET(0) + ] AS partner_distributor, + ARRAY_AGG(distributor_channel RESPECT NULLS ORDER BY submission_date)[ + SAFE_OFFSET(0) + ] AS partner_distributor_channel, + ARRAY_AGG(partner_id RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS partner_id, ARRAY_AGG(attribution.campaign RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS attribution_campaign, @@ -345,9 +343,9 @@ main_ping AS ( ARRAY_AGG(attribution.source RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS attribution_source, - CAST( - NULL AS STRING - ) AS attribution_ua, -- main_v5:environment.settings.attribution.ua + ARRAY_AGG(attribution.ua RESPECT NULLS ORDER BY submission_date)[ + SAFE_OFFSET(0) + ] AS attribution_ua, ARRAY_AGG(default_search_engine_data_load_path RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS engine_data_load_path, @@ -360,13 +358,11 @@ main_ping AS ( ARRAY_AGG(default_search_engine_data_submission_url RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS engine_data_submission_url, - CAST( - NULL AS STRING - ) AS apple_model_id, -- main_v5:environment.system.apple_model_id + ARRAY_AGG(apple_model_id RESPECT NULLS ORDER BY submission_date)[ + SAFE_OFFSET(0) + ] AS apple_model_id, ARRAY_AGG(city RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS city, - CAST( - NULL AS STRING - ) AS db_version, -- main_v5:metadata.geo.db_version + ARRAY_AGG(geo_db_version RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS db_version, ARRAY_AGG(geo_subdivision1 RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS subdivision1, @@ -384,9 +380,9 @@ main_ping AS ( ARRAY_AGG(attribution.dltoken RESPECT NULLS ORDER BY submission_date)[ SAFE_OFFSET(0) ] AS attribution_dltoken, - CAST( - NULL AS STRING - ) AS attribution_dlsource -- main_v5:environment.settings.attribution.dlsource + ARRAY_AGG(attribution.dlsource RESPECT NULLS ORDER BY submission_date)[ + SAFE_OFFSET(0) + ] AS attribution_dlsource FROM `moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6` WHERE diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_last_seen_joined_v1/schema.yaml b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_last_seen_joined_v1/schema.yaml index 206df88b624..5c6becbdc50 100644 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_last_seen_joined_v1/schema.yaml +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_last_seen_joined_v1/schema.yaml @@ -170,6 +170,9 @@ fields: - mode: NULLABLE name: dlsource type: STRING + - mode: NULLABLE + name: ua + type: STRING mode: NULLABLE name: attribution type: RECORD @@ -2107,3 +2110,36 @@ fields: - name: startup_profile_selection_reason_first type: STRING mode: NULLABLE +- name: first_document_id + type: STRING + mode: NULLABLE +- name: partner_id + type: STRING + mode: NULLABLE +- name: distribution_version + type: STRING + mode: NULLABLE +- name: distributor + type: STRING + mode: NULLABLE +- name: distributor_channel + type: STRING + mode: NULLABLE +- name: env_build_platform_version + type: STRING + mode: NULLABLE +- name: env_build_xpcom_abi + type: STRING + mode: NULLABLE +- name: geo_db_version + type: STRING + mode: NULLABLE +- name: apple_model_id + type: STRING + mode: NULLABLE +- name: max_subsession_counter + type: INTEGER + mode: NULLABLE +- name: min_subsession_counter + type: INTEGER + mode: NULLABLE diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_last_seen_v1/schema.yaml b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_last_seen_v1/schema.yaml index dc838534401..9c98ce2466d 100644 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/clients_last_seen_v1/schema.yaml +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/clients_last_seen_v1/schema.yaml @@ -202,6 +202,9 @@ fields: - mode: NULLABLE name: dlsource type: STRING + - name: ua + type: STRING + mode: NULLABLE mode: NULLABLE name: attribution type: RECORD @@ -2352,3 +2355,36 @@ fields: - name: startup_profile_selection_reason_first type: STRING mode: NULLABLE +- name: first_document_id + type: STRING + mode: NULLABLE +- name: partner_id + type: STRING + mode: NULLABLE +- name: distribution_version + type: STRING + mode: NULLABLE +- name: distributor + type: STRING + mode: NULLABLE +- name: distributor_channel + type: STRING + mode: NULLABLE +- name: env_build_platform_version + type: STRING + mode: NULLABLE +- name: env_build_xpcom_abi + type: STRING + mode: NULLABLE +- name: geo_db_version + type: STRING + mode: NULLABLE +- name: apple_model_id + type: STRING + mode: NULLABLE +- name: max_subsession_counter + type: INTEGER + mode: NULLABLE +- name: min_subsession_counter + type: INTEGER + mode: NULLABLE diff --git a/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_first_seen_v2/moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6.schema.json b/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_first_seen_v2/moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6.schema.json index 40cb5f21321..9f54ac67109 100644 --- a/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_first_seen_v2/moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6.schema.json +++ b/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_first_seen_v2/moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6.schema.json @@ -19,6 +19,18 @@ "type": "STRING", "name": "env_build_id" }, + { + "type": "STRING", + "name": "env_build_arch" + }, + { + "type": "STRING", + "name": "env_build_platform_version" + }, + { + "type": "STRING", + "name": "env_build_xpcom_abi" + }, { "type": "STRING", "name": "normalized_channel" @@ -39,6 +51,10 @@ "type": "STRING", "name": "vendor" }, + { + "type": "STRING", + "name": "apple_model_id" + }, { "type": "STRING", "name": "app_version" @@ -47,6 +63,22 @@ "type": "STRING", "name": "distribution_id" }, + { + "type": "STRING", + "name": "distribution_version" + }, + { + "type": "STRING", + "name": "distributor" + }, + { + "type": "STRING", + "name": "distributor_channel" + }, + { + "type": "STRING", + "name": "partner_id" + }, { "fields": [ { @@ -72,6 +104,14 @@ { "type": "STRING", "name": "dltoken" + }, + { + "type": "STRING", + "name": "dlsource" + }, + { + "type": "STRING", + "name": "ua" } ], "name": "attribution", @@ -101,6 +141,10 @@ "type": "STRING", "name": "geo_subdivision1" }, + { + "type": "STRING", + "name": "geo_db_version" + }, { "type": "STRING", "name": "country" @@ -109,6 +153,10 @@ "type": "STRING", "name": "os" }, + { + "type": "STRING", + "name": "first_document_id" + }, { "type": "STRING", "name": "normalized_os_version"