diff --git a/src/databricks/labs/ucx/queries/progress/main/01_0_percentage_migration_progress.sql b/src/databricks/labs/ucx/queries/progress/main/01_00_percentage_migration_progress.sql similarity index 55% rename from src/databricks/labs/ucx/queries/progress/main/01_0_percentage_migration_progress.sql rename to src/databricks/labs/ucx/queries/progress/main/01_00_percentage_migration_progress.sql index d429af42cc..d5ca534978 100644 --- a/src/databricks/labs/ucx/queries/progress/main/01_0_percentage_migration_progress.sql +++ b/src/databricks/labs/ucx/queries/progress/main/01_00_percentage_migration_progress.sql @@ -2,4 +2,4 @@ SELECT ROUND(100 * try_divide(COUNT_IF(SIZE(failures) = 0), COUNT(*)), 2) AS percentage FROM ucx_catalog.multiworkspace.objects_snapshot -WHERE object_type IN ('ClusterInfo', 'Grant', 'JobInfo', 'PipelineInfo', 'PolicyInfo', 'Table', 'Udf') +WHERE object_type IN ('ClusterInfo', 'DirectFsAccess', 'Grant', 'JobInfo', 'PipelineInfo', 'PolicyInfo', 'QueryProblem', 'Table', 'Udf', 'UsedTable') diff --git a/src/databricks/labs/ucx/queries/progress/main/01_1_percentage_udf_migration_progress.sql b/src/databricks/labs/ucx/queries/progress/main/01_01_percentage_udf_migration_progress.sql similarity index 100% rename from src/databricks/labs/ucx/queries/progress/main/01_1_percentage_udf_migration_progress.sql rename to src/databricks/labs/ucx/queries/progress/main/01_01_percentage_udf_migration_progress.sql diff --git a/src/databricks/labs/ucx/queries/progress/main/01_2_percentage_grant_migration_progress.sql b/src/databricks/labs/ucx/queries/progress/main/01_02_percentage_grant_migration_progress.sql similarity index 100% rename from src/databricks/labs/ucx/queries/progress/main/01_2_percentage_grant_migration_progress.sql rename to src/databricks/labs/ucx/queries/progress/main/01_02_percentage_grant_migration_progress.sql diff --git a/src/databricks/labs/ucx/queries/progress/main/01_3_percentage_job_migration_progress.sql b/src/databricks/labs/ucx/queries/progress/main/01_03_percentage_job_migration_progress.sql similarity index 100% rename from src/databricks/labs/ucx/queries/progress/main/01_3_percentage_job_migration_progress.sql rename to src/databricks/labs/ucx/queries/progress/main/01_03_percentage_job_migration_progress.sql diff --git a/src/databricks/labs/ucx/queries/progress/main/01_4_percentage_cluster_migration_progress.sql b/src/databricks/labs/ucx/queries/progress/main/01_04_percentage_cluster_migration_progress.sql similarity index 100% rename from src/databricks/labs/ucx/queries/progress/main/01_4_percentage_cluster_migration_progress.sql rename to src/databricks/labs/ucx/queries/progress/main/01_04_percentage_cluster_migration_progress.sql diff --git a/src/databricks/labs/ucx/queries/progress/main/01_5_percentage_table_migration_progress.sql b/src/databricks/labs/ucx/queries/progress/main/01_05_percentage_table_migration_progress.sql similarity index 75% rename from src/databricks/labs/ucx/queries/progress/main/01_5_percentage_table_migration_progress.sql rename to src/databricks/labs/ucx/queries/progress/main/01_05_percentage_table_migration_progress.sql index b3a2e4554e..120dbab112 100644 --- a/src/databricks/labs/ucx/queries/progress/main/01_5_percentage_table_migration_progress.sql +++ b/src/databricks/labs/ucx/queries/progress/main/01_05_percentage_table_migration_progress.sql @@ -1,4 +1,4 @@ -/* --title 'Table migration progress (%)' --width 2 */ +/* --title 'Table migration progress (%)' */ SELECT ROUND(100 * TRY_DIVIDE(COUNT_IF(SIZE(failures) = 0), COUNT(*)), 2) AS percentage FROM ucx_catalog.multiworkspace.objects_snapshot diff --git a/src/databricks/labs/ucx/queries/progress/main/01_06_percentage_used_table_progress.sql b/src/databricks/labs/ucx/queries/progress/main/01_06_percentage_used_table_progress.sql new file mode 100644 index 0000000000..544062edc2 --- /dev/null +++ b/src/databricks/labs/ucx/queries/progress/main/01_06_percentage_used_table_progress.sql @@ -0,0 +1,5 @@ +/* --title '"Table references in code" progress (%)' --description 'Tables referring UC over Hive metastore' */ +SELECT + ROUND(100 * TRY_DIVIDE(COUNT_IF(SIZE(failures) = 0), COUNT(*)), 2) AS percentage +FROM ucx_catalog.multiworkspace.objects_snapshot +WHERE object_type = "UsedTable" diff --git a/src/databricks/labs/ucx/queries/progress/main/01_07_count_direct_filesystem_access.sql b/src/databricks/labs/ucx/queries/progress/main/01_07_count_direct_filesystem_access.sql new file mode 100644 index 0000000000..2a79b7d902 --- /dev/null +++ b/src/databricks/labs/ucx/queries/progress/main/01_07_count_direct_filesystem_access.sql @@ -0,0 +1,7 @@ +/* --title 'Direct filesystem access progress (#)' --description 'Unsupported in Unity Catalog' */ +SELECT COUNT(*) AS counter +FROM ucx_catalog.multiworkspace.objects_snapshot +WHERE object_type = "DirectFsAccess" + -- Redundant filter as a direct filesystem access is a failure by definition (see description above), + -- however, filter is defined for explicitness and as this knowledge is not "known" to this query. + AND SIZE(failures) > 0 diff --git a/src/databricks/labs/ucx/queries/progress/main/01_08_count_query_problem.sql b/src/databricks/labs/ucx/queries/progress/main/01_08_count_query_problem.sql new file mode 100644 index 0000000000..a70028dc6b --- /dev/null +++ b/src/databricks/labs/ucx/queries/progress/main/01_08_count_query_problem.sql @@ -0,0 +1,6 @@ +/* --title 'Query problem progress (#)' */ +SELECT COUNT(*) AS counter +FROM ucx_catalog.multiworkspace.objects_snapshot +WHERE object_type = "QueryProblem" + -- Redundant filter as a query problem is a failure by definition, however, filter is defined for explicitness + AND SIZE(failures) > 0 diff --git a/src/databricks/labs/ucx/queries/progress/main/01_6_percentage_pipeline_migration_progress.sql b/src/databricks/labs/ucx/queries/progress/main/01_09_percentage_pipeline_migration_progress.sql similarity index 100% rename from src/databricks/labs/ucx/queries/progress/main/01_6_percentage_pipeline_migration_progress.sql rename to src/databricks/labs/ucx/queries/progress/main/01_09_percentage_pipeline_migration_progress.sql diff --git a/src/databricks/labs/ucx/queries/progress/main/01_7_percentage_policy_migration_progress.sql b/src/databricks/labs/ucx/queries/progress/main/01_10_percentage_policy_migration_progress.sql similarity index 100% rename from src/databricks/labs/ucx/queries/progress/main/01_7_percentage_policy_migration_progress.sql rename to src/databricks/labs/ucx/queries/progress/main/01_10_percentage_policy_migration_progress.sql diff --git a/src/databricks/labs/ucx/queries/progress/main/01_8_distinct_failures_per_object_type.sql b/src/databricks/labs/ucx/queries/progress/main/01_11_distinct_failures_per_object_type.sql similarity index 67% rename from src/databricks/labs/ucx/queries/progress/main/01_8_distinct_failures_per_object_type.sql rename to src/databricks/labs/ucx/queries/progress/main/01_11_distinct_failures_per_object_type.sql index 00a229d02f..75cb3bcaf6 100644 --- a/src/databricks/labs/ucx/queries/progress/main/01_8_distinct_failures_per_object_type.sql +++ b/src/databricks/labs/ucx/queries/progress/main/01_11_distinct_failures_per_object_type.sql @@ -2,7 +2,7 @@ with failures AS ( SELECT object_type, explode(failures) AS failure FROM ucx_catalog.multiworkspace.objects_snapshot - WHERE object_type IN ('ClusterInfo', 'Grant', 'JobInfo', 'PipelineInfo', 'PolicyInfo', 'Table', 'Udf') + WHERE object_type IN ('ClusterInfo', 'DirectFsAccess', 'Grant', 'JobInfo', 'PipelineInfo', 'PolicyInfo', 'QueryProblem', 'Table', 'Udf', 'UsedTable') ) SELECT diff --git a/src/databricks/labs/ucx/queries/progress/main/02_4_migration_status_by_owner_overview.sql b/src/databricks/labs/ucx/queries/progress/main/02_4_migration_status_by_owner_overview.sql index a9d7a7591f..c4ff69b267 100644 --- a/src/databricks/labs/ucx/queries/progress/main/02_4_migration_status_by_owner_overview.sql +++ b/src/databricks/labs/ucx/queries/progress/main/02_4_migration_status_by_owner_overview.sql @@ -1,6 +1,6 @@ /* --title 'Overview' --description 'Tables and views migration' --width 5 */ WITH migration_statuses AS ( - SELECT * + SELECT owner, failures FROM ucx_catalog.multiworkspace.objects_snapshot WHERE object_type = 'Table' ) diff --git a/src/databricks/labs/ucx/queries/progress/main/03_00_code.md b/src/databricks/labs/ucx/queries/progress/main/03_00_code.md new file mode 100644 index 0000000000..ca3fd81e2a --- /dev/null +++ b/src/databricks/labs/ucx/queries/progress/main/03_00_code.md @@ -0,0 +1,8 @@ +# Code + +This section shows Unity Catalog compatability issues found while linting code. There are two kinds of code changes to +perform: +- Data asset reference, like references to Hive metastore tables and views or direct filesystem access (dfsa). These + references should be updated to refer to their Unity Catalog counterparts. +- Linting compatability issues, like using RDDs or directly accessing the Spark context. These issues should be resolved + by following the instructions stated with the issue. diff --git a/src/databricks/labs/ucx/queries/progress/main/03_01_pending_migration_data_asset_references.sql b/src/databricks/labs/ucx/queries/progress/main/03_01_pending_migration_data_asset_references.sql new file mode 100644 index 0000000000..d6388b41b3 --- /dev/null +++ b/src/databricks/labs/ucx/queries/progress/main/03_01_pending_migration_data_asset_references.sql @@ -0,0 +1,4 @@ +/* --title 'Pending migration' --description 'Total number of table, view and dfsa references' --height 6 */ +SELECT COUNT(*) AS count +FROM ucx_catalog.multiworkspace.objects_snapshot +WHERE object_type IN ('DirectFsAccess', 'UsedTable') AND SIZE(failures) > 0 diff --git a/src/databricks/labs/ucx/queries/progress/main/03_02_data_asset_references_by_owner_bar_graph.sql b/src/databricks/labs/ucx/queries/progress/main/03_02_data_asset_references_by_owner_bar_graph.sql new file mode 100644 index 0000000000..3910fc0b06 --- /dev/null +++ b/src/databricks/labs/ucx/queries/progress/main/03_02_data_asset_references_by_owner_bar_graph.sql @@ -0,0 +1,24 @@ +/* +--title 'Pending migration' +--description 'Tables, views and dfsa per owner' +--width 5 +--overrides '{"spec": { + "version": 3, + "widgetType": "bar", + "encodings": { + "x": {"fieldName": "owner", "scale": {"type": "categorical"}, "displayName": "owner"}, + "y": {"fieldName": "count", "scale": {"type": "quantitative"}, "displayName": "count"} + } +}}' +*/ +WITH owners_with_failures AS ( + SELECT owner + FROM ucx_catalog.multiworkspace.objects_snapshot + WHERE object_type IN ('DirectFsAccess', 'UsedTable') AND SIZE(failures) > 0 +) + +SELECT + owner, + COUNT(1) AS count +FROM owners_with_failures +GROUP BY owner diff --git a/src/databricks/labs/ucx/queries/progress/main/03_03_migrated_data_asset_references.sql b/src/databricks/labs/ucx/queries/progress/main/03_03_migrated_data_asset_references.sql new file mode 100644 index 0000000000..689e2bfaf0 --- /dev/null +++ b/src/databricks/labs/ucx/queries/progress/main/03_03_migrated_data_asset_references.sql @@ -0,0 +1,4 @@ +/* --title 'Migrated' --description 'Total number of table, view and dfsa references' --height 6 */ +SELECT COUNT(*) AS count +FROM ucx_catalog.multiworkspace.objects_snapshot +WHERE object_type IN ('DirectFsAccess', 'UsedTable') AND SIZE(failures) == 0 diff --git a/src/databricks/labs/ucx/queries/progress/main/03_04_data_asset_references_pending_migration_overview.sql b/src/databricks/labs/ucx/queries/progress/main/03_04_data_asset_references_pending_migration_overview.sql new file mode 100644 index 0000000000..1b14d7185b --- /dev/null +++ b/src/databricks/labs/ucx/queries/progress/main/03_04_data_asset_references_pending_migration_overview.sql @@ -0,0 +1,20 @@ +/* --title 'Overview' --description 'Table, view and dfsa migration' --width 5 */ +WITH migration_statuses AS ( + SELECT owner, object_type, failures + FROM ucx_catalog.multiworkspace.objects_snapshot + WHERE object_type IN ('DirectFsAccess', 'UsedTable') +) + +SELECT + owner, + CASE + WHEN object_type = 'DirectFsAccess' THEN 'Direct filesystem access' + WHEN object_type = 'UsedTable' THEN 'Table or view reference' + ELSE object_type + END AS object_type, + DOUBLE(CEIL(100 * COUNT_IF(SIZE(failures) = 0) / SUM(COUNT(*)) OVER (PARTITION BY owner, object_type), 2)) AS percentage, + COUNT(*) AS total, + COUNT_IF(SIZE(failures) = 0) AS total_migrated, + COUNT_IF(SIZE(failures) > 0) AS total_not_migrated +FROM migration_statuses +GROUP BY owner, object_type diff --git a/src/databricks/labs/ucx/queries/progress/main/03_05_data_asset_references_pending_migration.sql b/src/databricks/labs/ucx/queries/progress/main/03_05_data_asset_references_pending_migration.sql new file mode 100644 index 0000000000..0db6a1bc8c --- /dev/null +++ b/src/databricks/labs/ucx/queries/progress/main/03_05_data_asset_references_pending_migration.sql @@ -0,0 +1,43 @@ +/* +--title 'Data asset references' +--width 6 +--overrides '{"spec":{ + "encodings":{ + "columns": [ + {"fieldName": "workspace_id", "booleanValues": ["false", "true"], "type": "string", "displayAs": "string", "title": "workspace_id"}, + {"fieldName": "object_type", "booleanValues": ["false", "true"], "type": "string", "displayAs": "string", "title": "object_type"}, + {"fieldName": "object_id", "booleanValues": ["false", "true"], "linkUrlTemplate": "{{ link }}", "linkTextTemplate": "{{ @ }}", "linkTitleTemplate": "{{ @ }}", "linkOpenInNewTab": true, "type": "string", "displayAs": "link", "title": "object_id"}, + {"fieldName": "failure", "booleanValues": ["false", "true"], "type": "integer", "displayAs": "number", "title": "failure"}, + {"fieldName": "is_read", "booleanValues": ["false", "true"], "type": "integer", "displayAs": "number", "title": "is_read"}, + {"fieldName": "is_write", "booleanValues": ["false", "true"], "type": "integer", "displayAs": "number", "title": "is_write"} + ]}, + "invisibleColumns": [ + {"name": "link", "booleanValues": ["false", "true"], "linkUrlTemplate": "{{ @ }}", "linkTextTemplate": "{{ @ }}", "linkTitleTemplate": "{{ @ }}", "linkOpenInNewTab": true, "type": "string", "displayAs": "link", "title": "link"} + ] + }}' +*/ +SELECT + workspace_id, + owner, + CASE + WHEN object_type = 'DirectFsAccess' THEN 'Direct filesystem access' + WHEN object_type = 'UsedTable' THEN 'Table or view reference' + ELSE object_type + END AS object_type, + CASE + WHEN object_type = 'DirectFsAccess' THEN data.path + WHEN object_type = 'UsedTable' THEN CONCAT_WS('.', object_id) + ELSE CONCAT_WS('.', object_id) + END AS object_id, + EXPLODE(failures) AS failure, + CAST(data.is_read AS BOOLEAN) AS is_read, + CAST(data.is_write AS BOOLEAN) AS is_write, + -- Below are invisible column(s) used in links url templates + CASE + -- SQL queries do NOT point to the workspace, i.e. start with '/' + WHEN object_type = 'DirectFsAccess' AND SUBSTRING(data.source_id, 0, 1) != '/' THEN CONCAT('/sql/editor/', data.source_id) + ELSE CONCAT('/#workspace', data.source_id) + END AS link +FROM ucx_catalog.multiworkspace.objects_snapshot +ORDER BY workspace_id, owner, object_type, object_id +WHERE object_type IN ('DirectFsAccess', 'UsedTable') diff --git a/src/databricks/labs/ucx/queries/progress/main/03_06_code_compatibility_issues.sql b/src/databricks/labs/ucx/queries/progress/main/03_06_code_compatibility_issues.sql new file mode 100644 index 0000000000..1c623da8fd --- /dev/null +++ b/src/databricks/labs/ucx/queries/progress/main/03_06_code_compatibility_issues.sql @@ -0,0 +1,29 @@ +/* +--title 'Code compatability issues' +--width 6 +--overrides '{"spec":{ + "encodings":{ + "columns": [ + {"fieldName": "workspace_id", "booleanValues": ["false", "true"], "type": "string", "displayAs": "string", "title": "workspace_id"}, + {"fieldName": "code", "booleanValues": ["false", "true"], "type": "string", "displayAs": "string", "title": "code"}, + {"fieldName": "message", "booleanValues": ["false", "true"], "type": "string", "displayAs": "string", "title": "message"}, + {"fieldName": "dashboard_name", "booleanValues": ["false", "true"], "linkUrlTemplate": "/sql/dashboards/{{ dashboard_id }}", "linkTextTemplate": "{{ @ }}", "linkTitleTemplate": "{{ @ }}", "linkOpenInNewTab": true, "type": "string", "displayAs": "link", "title": "dashboard"}, + {"fieldName": "query_name", "booleanValues": ["false", "true"], "linkUrlTemplate": "/sql/editor/{{ query_id }}", "linkTextTemplate": "{{ @ }}", "linkTitleTemplate": "{{ @ }}", "linkOpenInNewTab": true, "type": "string", "displayAs": "link", "title": "query"} + ]}, + "invisibleColumns": [ + {"name": "dashboard_id", "booleanValues": ["false", "true"], "linkUrlTemplate": "{{ @ }}", "linkTextTemplate": "{{ @ }}", "linkTitleTemplate": "{{ @ }}", "linkOpenInNewTab": true, "type": "string", "displayAs": "link", "title": "dashboard_id"}, + {"name": "query_id", "booleanValues": ["false", "true"], "linkUrlTemplate": "{{ @ }}", "linkTextTemplate": "{{ @ }}", "linkTitleTemplate": "{{ @ }}", "linkOpenInNewTab": true, "type": "string", "displayAs": "link", "title": "query_id"} + ] + }}' +*/ +SELECT + workspace_id, + data.code, + data.message, + data.dashboard_name, + data.query_name, + -- Below are invisible columns used in links url templates + data.dashboard_id, + data.query_id +FROM ucx_catalog.multiworkspace.objects_snapshot +WHERE object_type = 'QueryProblem'