Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEXT_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ To disable this, set the environment variable DATABRICKS_CACHE_ENABLED to false.
* Fix false positive folder permission warnings and make them more actionable ([#4216](https://github.com/databricks/cli/pull/4216))
* Pass additional Azure DevOps system variables ([#4236](https://github.com/databricks/cli/pull/4236))
* Replace Black formatter with Ruff in Python bundle templates for faster, all-in-one linting and formatting ([#4196](https://github.com/databricks/cli/pull/4196))
* engine/direct: support quality monitors ([#4278](https://github.com/databricks/cli/pull/4278))

### Dependency updates

Expand Down
69 changes: 69 additions & 0 deletions acceptance/bin/create_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env python3
import json
import os
import subprocess
import sys
import time

table_name = sys.argv[1]
# Extract catalog.schema from table_name
parts = table_name.split(".")
if len(parts) != 3:
print(f"Invalid table name: {table_name}. Expected format: catalog.schema.table", file=sys.stderr)
sys.exit(1)

catalog_name = parts[0]
schema_name = parts[1]
full_schema_name = f"{catalog_name}.{schema_name}"

cli = os.environ.get("CLI", "databricks")


def run_cli(*args):
result = subprocess.run([cli, *args], capture_output=True, text=True)
return result


def execute_sql(warehouse_id, sql):
"""Execute SQL using the API endpoint."""
payload = json.dumps({"warehouse_id": warehouse_id, "statement": sql, "wait_timeout": "30s"})
return run_cli("api", "post", "/api/2.0/sql/statements/", "--json", payload)


# Get warehouse ID from environment variable
warehouse_id = os.environ["TEST_DEFAULT_WAREHOUSE_ID"]

# Create a simple table
sql = f"CREATE TABLE IF NOT EXISTS {table_name} (id INT, value STRING, timestamp TIMESTAMP) USING DELTA"

result = execute_sql(warehouse_id, sql)
if result.returncode != 0:
print(f"Failed to create table: {result.stderr}", file=sys.stderr)
sys.exit(1)

print(f"Created table {table_name}")

# Insert some sample data so the monitor has something to analyze
insert_sql = f"""INSERT INTO {table_name} VALUES
(1, 'test1', current_timestamp()),
(2, 'test2', current_timestamp()),
(3, 'test3', current_timestamp())"""

result = execute_sql(warehouse_id, insert_sql)
if result.returncode != 0:
print(f"Failed to insert data: {result.stderr}", file=sys.stderr)
sys.exit(1)

print(f"Inserted sample data into {table_name}")

# Wait for table to be visible in Unity Catalog
for attempt in range(10):
result = run_cli("tables", "get", table_name)
if result.returncode == 0:
table_info = json.loads(result.stdout)
print(f"Table {table_name} is now visible (catalog_name={table_info.get('catalog_name')})")
break
if attempt < 9:
time.sleep(1)
else:
print(f"Warning: Table may not be immediately visible: {result.stderr}", file=sys.stderr)
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
>>> [CLI] quality-monitors create catalog.schema.table --json @input.json
{
"assets_dir":"/Users/user/databricks_lakehouse_monitoring",
"drift_metrics_table_name":"",
"dashboard_id":"(redacted)",
"drift_metrics_table_name":"catalog.schema.table_drift_metrics",
"monitor_version":0,
"output_schema_name":"catalog.schema",
"profile_metrics_table_name":"",
"profile_metrics_table_name":"catalog.schema.table_profile_metrics",
"snapshot": {},
"status":"MONITOR_STATUS_ACTIVE",
"table_name":"catalog.schema.table"
Expand Down
4 changes: 4 additions & 0 deletions acceptance/bundle/deployment/bind/quality-monitor/test.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
Local = true
Cloud = false

[[Repls]]
Old = '"dashboard_id":"[0-9a-f]+",'
New = '"dashboard_id":"(redacted)",'
54 changes: 54 additions & 0 deletions acceptance/bundle/refschema/out.fields.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3457,6 +3457,60 @@ resources.pipelines.*.permissions.permissions[*].group_name string ALL
resources.pipelines.*.permissions.permissions[*].permission_level iam.PermissionLevel ALL
resources.pipelines.*.permissions.permissions[*].service_principal_name string ALL
resources.pipelines.*.permissions.permissions[*].user_name string ALL
resources.quality_monitors.*.assets_dir string ALL
resources.quality_monitors.*.baseline_table_name string ALL
resources.quality_monitors.*.custom_metrics []catalog.MonitorMetric ALL
resources.quality_monitors.*.custom_metrics[*] catalog.MonitorMetric ALL
resources.quality_monitors.*.custom_metrics[*].definition string ALL
resources.quality_monitors.*.custom_metrics[*].input_columns []string ALL
resources.quality_monitors.*.custom_metrics[*].input_columns[*] string ALL
resources.quality_monitors.*.custom_metrics[*].name string ALL
resources.quality_monitors.*.custom_metrics[*].output_data_type string ALL
resources.quality_monitors.*.custom_metrics[*].type catalog.MonitorMetricType ALL
resources.quality_monitors.*.dashboard_id string REMOTE
resources.quality_monitors.*.data_classification_config *catalog.MonitorDataClassificationConfig ALL
resources.quality_monitors.*.data_classification_config.enabled bool ALL
resources.quality_monitors.*.drift_metrics_table_name string REMOTE
resources.quality_monitors.*.id string INPUT
resources.quality_monitors.*.inference_log *catalog.MonitorInferenceLog ALL
resources.quality_monitors.*.inference_log.granularities []string ALL
resources.quality_monitors.*.inference_log.granularities[*] string ALL
resources.quality_monitors.*.inference_log.label_col string ALL
resources.quality_monitors.*.inference_log.model_id_col string ALL
resources.quality_monitors.*.inference_log.prediction_col string ALL
resources.quality_monitors.*.inference_log.prediction_proba_col string ALL
resources.quality_monitors.*.inference_log.problem_type catalog.MonitorInferenceLogProblemType ALL
resources.quality_monitors.*.inference_log.timestamp_col string ALL
resources.quality_monitors.*.latest_monitor_failure_msg string ALL
resources.quality_monitors.*.lifecycle resources.Lifecycle INPUT
resources.quality_monitors.*.lifecycle.prevent_destroy bool INPUT
resources.quality_monitors.*.modified_status string INPUT
resources.quality_monitors.*.monitor_version int64 REMOTE
resources.quality_monitors.*.notifications *catalog.MonitorNotifications ALL
resources.quality_monitors.*.notifications.on_failure *catalog.MonitorDestination ALL
resources.quality_monitors.*.notifications.on_failure.email_addresses []string ALL
resources.quality_monitors.*.notifications.on_failure.email_addresses[*] string ALL
resources.quality_monitors.*.notifications.on_new_classification_tag_detected *catalog.MonitorDestination ALL
resources.quality_monitors.*.notifications.on_new_classification_tag_detected.email_addresses []string ALL
resources.quality_monitors.*.notifications.on_new_classification_tag_detected.email_addresses[*] string ALL
resources.quality_monitors.*.output_schema_name string ALL
resources.quality_monitors.*.profile_metrics_table_name string REMOTE
resources.quality_monitors.*.schedule *catalog.MonitorCronSchedule ALL
resources.quality_monitors.*.schedule.pause_status catalog.MonitorCronSchedulePauseStatus ALL
resources.quality_monitors.*.schedule.quartz_cron_expression string ALL
resources.quality_monitors.*.schedule.timezone_id string ALL
resources.quality_monitors.*.skip_builtin_dashboard bool INPUT STATE
resources.quality_monitors.*.slicing_exprs []string ALL
resources.quality_monitors.*.slicing_exprs[*] string ALL
resources.quality_monitors.*.snapshot *catalog.MonitorSnapshot ALL
resources.quality_monitors.*.status catalog.MonitorInfoStatus REMOTE
resources.quality_monitors.*.table_name string ALL
resources.quality_monitors.*.time_series *catalog.MonitorTimeSeries ALL
resources.quality_monitors.*.time_series.granularities []string ALL
resources.quality_monitors.*.time_series.granularities[*] string ALL
resources.quality_monitors.*.time_series.timestamp_col string ALL
resources.quality_monitors.*.url string INPUT
resources.quality_monitors.*.warehouse_id string INPUT STATE
resources.registered_models.*.aliases []catalog.RegisteredModelAlias ALL
resources.registered_models.*.aliases[*] catalog.RegisteredModelAlias ALL
resources.registered_models.*.aliases[*].alias_name string ALL
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
bundle:
name: quality-monitor-update-$UNIQUE_NAME

resources:
quality_monitors:
monitor1:
table_name: main.qm_test_$UNIQUE_NAME.test_table
assets_dir: /Workspace/Users/$CURRENT_USER_NAME/monitor_assets_$UNIQUE_NAME
output_schema_name: main.qm_test_$UNIQUE_NAME
snapshot: {}
warehouse_id: $TEST_DEFAULT_WAREHOUSE_ID
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

>>> errcode [CLI] bundle deploy
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/quality-monitor-update-[UNIQUE_NAME]/default/files...
Deploying resources...
Updating deployment state...
Deployment complete!
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like the update now causes a recreation instead of the actual update, which leads to a quality monitor URL / id change. This URL / ID might be referenced from other places (outside of bundle scope, like let's say some dashboard)
Is this recreation intentional?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, because assets_dir is not available on update request for quality monitors, so it simply cannot be updated.

https://docs.databricks.com/api/workspace/qualitymonitors/update

"method": "DELETE",
"path": "/api/2.1/unity-catalog/tables/main.qm_test_[UNIQUE_NAME].test_table/monitor"
}
{
"method": "POST",
"path": "/api/2.1/unity-catalog/tables/main.qm_test_[UNIQUE_NAME].test_table/monitor",
"body": {
"assets_dir": "/Workspace/Users/[USERNAME]/monitor_assets2_[UNIQUE_NAME]",
"output_schema_name": "main.qm_test_[UNIQUE_NAME]",
"snapshot": {},
"warehouse_id": "[TEST_DEFAULT_WAREHOUSE_ID]"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"method": "PUT",
"path": "/api/2.1/unity-catalog/tables/main.qm_test_[UNIQUE_NAME].test_table/monitor",
"body": {
"dashboard_id": "(redacted)",
"output_schema_name": "main.qm_test_[UNIQUE_NAME]",
"snapshot": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

>>> errcode [CLI] bundle deploy
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/quality-monitor-update-[UNIQUE_NAME]/default/files...
Deploying resources...
Error: terraform apply: exit status 1

Error: Provider produced inconsistent result after apply

When applying changes to databricks_quality_monitor.monitor1, provider
"provider[\"registry.terraform.io/databricks/databricks\"]" produced an
unexpected new value: .assets_dir: was
cty.StringVal("/Workspace/Users/[USERNAME]/monitor_assets2_[UNIQUE_NAME]"),
but now
cty.StringVal("/Workspace/Users/[USERNAME]/monitor_assets_[UNIQUE_NAME]").

This is a bug in the provider, which should be reported in the provider's own
issue tracker.


Updating deployment state...

Exit code: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@

>>> errcode [CLI] bundle plan -o json
{
"plan_version": 2,
"cli_version": "[DEV_VERSION]",
"lineage": "[UUID]",
"serial": 1,
"plan": {
"resources.quality_monitors.monitor1": {
"action": "recreate",
"new_state": {
"value": {
"assets_dir": "/Workspace/Users/[USERNAME]/monitor_assets2_[UNIQUE_NAME]",
"output_schema_name": "main.qm_test_[UNIQUE_NAME]",
"snapshot": {},
"table_name": "main.qm_test_[UNIQUE_NAME].test_table",
"warehouse_id": "[TEST_DEFAULT_WAREHOUSE_ID]"
}
},
"remote_state": {
"assets_dir": "/Workspace/Users/[USERNAME]/monitor_assets_[UNIQUE_NAME]",
"dashboard_id": "(redacted)",
"drift_metrics_table_name": "main.qm_test_[UNIQUE_NAME].test_table_drift_metrics",
"monitor_version": 0,
"output_schema_name": "main.qm_test_[UNIQUE_NAME]",
"profile_metrics_table_name": "main.qm_test_[UNIQUE_NAME].test_table_profile_metrics",
"snapshot": {},
"status": "MONITOR_STATUS_ACTIVE",
"table_name": "main.qm_test_[UNIQUE_NAME].test_table"
},
"changes": {
"assets_dir": {
"action": "recreate",
"reason": "field_triggers",
"old": "/Workspace/Users/[USERNAME]/monitor_assets_[UNIQUE_NAME]",
"new": "/Workspace/Users/[USERNAME]/monitor_assets2_[UNIQUE_NAME]",
"remote": "/Workspace/Users/[USERNAME]/monitor_assets_[UNIQUE_NAME]"
},
"warehouse_id": {
"action": "skip",
"reason": "config_only",
"old": "[TEST_DEFAULT_WAREHOUSE_ID]",
"new": "[TEST_DEFAULT_WAREHOUSE_ID]"
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

>>> errcode [CLI] bundle plan
recreate quality_monitors.monitor1

Plan: 1 to add, 0 to change, 1 to delete, 0 unchanged
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

>>> errcode [CLI] bundle plan -o json
{
"plan_version": 2,
"cli_version": "[DEV_VERSION]",
"plan": {
"resources.quality_monitors.monitor1": {
"action": "update"
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

>>> errcode [CLI] bundle plan
update quality_monitors.monitor1

Plan: 0 to add, 1 to change, 0 to delete, 0 unchanged
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@

>>> errcode [CLI] bundle plan
Plan: 0 to add, 0 to change, 0 to delete, 1 unchanged
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

>>> errcode [CLI] bundle plan
update quality_monitors.monitor1

Plan: 0 to add, 1 to change, 0 to delete, 0 unchanged

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@

>>> [CLI] schemas create qm_test_[UNIQUE_NAME] main -o json
{
"full_name": "main.qm_test_[UNIQUE_NAME]"
}

>>> [CLI] schemas create qm_test_[UNIQUE_NAME]_2 main -o json
{
"full_name": "main.qm_test_[UNIQUE_NAME]_2"
}

>>> create_table.py main.qm_test_[UNIQUE_NAME].test_table
Created table main.qm_test_[UNIQUE_NAME].test_table
Inserted sample data into main.qm_test_[UNIQUE_NAME].test_table
Table main.qm_test_[UNIQUE_NAME].test_table is now visible (catalog_name=main)

>>> [CLI] bundle deploy
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/quality-monitor-update-[UNIQUE_NAME]/default/files...
Deploying resources...
Updating deployment state...
Deployment complete!

>>> [CLI] bundle plan
Plan: 0 to add, 0 to change, 0 to delete, 1 unchanged

>>> print_requests.py ^//import-file/ ^//workspace/ ^//telemetry-ext

>>> [CLI] bundle destroy --auto-approve
The following resources will be deleted:
delete resources.quality_monitors.monitor1

All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/quality-monitor-update-[UNIQUE_NAME]/default

Deleting files...
Destroy complete!
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
SCHEMA_NAME="main.qm_test_${UNIQUE_NAME}"
TABLE_NAME="${SCHEMA_NAME}.test_table"

trace $CLI schemas create "qm_test_${UNIQUE_NAME}" main -o json | jq '{full_name}'
trace $CLI schemas create "qm_test_${UNIQUE_NAME}_2" main -o json | jq '{full_name}'
trace create_table.py "$TABLE_NAME"

cleanup() {
trace $CLI bundle destroy --auto-approve
$CLI schemas delete "$SCHEMA_NAME" --force 2>/dev/null || true
$CLI schemas delete "qm_test_${UNIQUE_NAME}_2" --force 2>/dev/null || true
rm -f out.requests.txt
}
trap cleanup EXIT

envsubst < databricks.yml.tmpl > databricks.yml

trace $CLI bundle deploy

trace $CLI bundle plan | contains.py "1 unchanged"

update_file.py databricks.yml "assets_dir: /Workspace/Users/$CURRENT_USER_NAME/monitor_assets_$UNIQUE_NAME" "assets_dir: /Workspace/Users/$CURRENT_USER_NAME/monitor_assets2_$UNIQUE_NAME"

trace errcode $CLI bundle plan &> out.plan.$DATABRICKS_BUNDLE_ENGINE.txt
trace errcode $CLI bundle plan -o json &> out.plan.$DATABRICKS_BUNDLE_ENGINE.json

rm out.requests.txt
trace errcode $CLI bundle deploy &> out.deploy.$DATABRICKS_BUNDLE_ENGINE.txt
trace print_requests.py '^//import-file/' '^//workspace/' '^//telemetry-ext' > out.deploy.requests.$DATABRICKS_BUNDLE_ENGINE.json
trace errcode $CLI bundle plan &> out.plan_after_deploy.$DATABRICKS_BUNDLE_ENGINE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
bundle:
name: quality-monitor-update-$UNIQUE_NAME

resources:
quality_monitors:
monitor1:
table_name: main.qm_test_$UNIQUE_NAME.test_table
assets_dir: /Workspace/Users/$CURRENT_USER_NAME/monitor_assets_$UNIQUE_NAME
output_schema_name: main.qm_test_$UNIQUE_NAME
snapshot: {}
warehouse_id: $TEST_DEFAULT_WAREHOUSE_ID
Loading