databricks · benc-db · Sep 7, 2023 · Sep 5, 2023 · Sep 5, 2023 · Sep 5, 2023
@@ -6,7 +6,8 @@ jobs:
     environment: azure-prod
     env:
       DBT_DATABRICKS_HOST_NAME: ${{ secrets.DATABRICKS_HOST }}
-      DBT_DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
+      DBT_DATABRICKS_CLIENT_ID: ${{ secrets.TEST_PECO_SP_ID }}
+      DBT_DATABRICKS_CLIENT_SECRET: ${{ secrets.TEST_PECO_SP_SECRET }}
       DBT_DATABRICKS_HTTP_PATH: ${{ secrets.TEST_PECO_WAREHOUSE_HTTP_PATH }}
       DBT_DATABRICKS_UC_INITIAL_CATALOG: peco
       DBT_DATABRICKS_LOCATION_ROOT: ${{ secrets.TEST_PECO_EXTERNAL_LOCATION }}
@@ -27,9 +28,9 @@ jobs:
         id: install-dependencies
         run: pip install tox
       - name: Run integration-databricks-uc-sql-endpoint
-        run: DBT_TEST_USER=notnecessaryformosttests@example.com DBT_DATABRICKS_LOCATION_ROOT=$DBT_DATABRICKS_LOCATION_ROOT DBT_DATABRICKS_HOST_NAME=$DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_HTTP_PATH=$DBT_DATABRICKS_HTTP_PATH DBT_DATABRICKS_UC_CLUSTER_HTTP_PATH=$DBT_DATABRICKS_UC_CLUSTER_HTTP_PATH DBT_DATABRICKS_TOKEN=$DBT_DATABRICKS_TOKEN tox -e integration-databricks-uc-sql-endpoint
+        run: DBT_TEST_USER=notnecessaryformosttests@example.com DBT_DATABRICKS_LOCATION_ROOT=$DBT_DATABRICKS_LOCATION_ROOT DBT_DATABRICKS_HOST_NAME=$DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_HTTP_PATH=$DBT_DATABRICKS_HTTP_PATH DBT_DATABRICKS_UC_CLUSTER_HTTP_PATH=$DBT_DATABRICKS_UC_CLUSTER_HTTP_PATH DBT_DATABRICKS_CLIENT_ID=$DBT_DATABRICKS_CLIENT_ID DBT_DATABRICKS_CLIENT_SECRET=$DBT_DATABRICKS_CLIENT_SECRET tox -e integration-databricks-uc-sql-endpoint
       - name: Run integration-uc-databricks-cluster
-        run: DBT_TEST_USER=notnecessaryformosttests@example.com DBT_DATABRICKS_LOCATION_ROOT=$DBT_DATABRICKS_LOCATION_ROOT DBT_DATABRICKS_HOST_NAME=$DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_HTTP_PATH=$DBT_DATABRICKS_CLUSTER_HTTP_PATH DBT_DATABRICKS_UC_CLUSTER_HTTP_PATH=$DBT_DATABRICKS_UC_CLUSTER_HTTP_PATH DBT_DATABRICKS_TOKEN=$DBT_DATABRICKS_TOKEN tox -e integration-databricks-uc-cluster
+        run: DBT_TEST_USER=notnecessaryformosttests@example.com DBT_DATABRICKS_LOCATION_ROOT=$DBT_DATABRICKS_LOCATION_ROOT DBT_DATABRICKS_HOST_NAME=$DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_HTTP_PATH=$DBT_DATABRICKS_CLUSTER_HTTP_PATH DBT_DATABRICKS_UC_CLUSTER_HTTP_PATH=$DBT_DATABRICKS_UC_CLUSTER_HTTP_PATH DBT_DATABRICKS_CLIENT_ID=$DBT_DATABRICKS_CLIENT_ID DBT_DATABRICKS_CLIENT_SECRET=$DBT_DATABRICKS_CLIENT_SECRET tox -e integration-databricks-uc-cluster
   run-tox-tests-non-uc:
     runs-on: ubuntu-latest
     environment: azure-prod
@@ -55,4 +56,4 @@ jobs:
         id: install-dependencies
         run: pip install tox
       - name: Run integration-databricks-cluster
-        run: DBT_TEST_USER=notnecessaryformosttests@example.com DBT_DATABRICKS_LOCATION_ROOT=$DBT_DATABRICKS_LOCATION_ROOT DBT_DATABRICKS_HOST_NAME=$DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_HTTP_PATH=$DBT_DATABRICKS_CLUSTER_HTTP_PATH DBT_DATABRICKS_TOKEN=$DBT_DATABRICKS_TOKEN tox -e integration-databricks-cluster
+        run: DBT_TEST_USER=notnecessaryformosttests@example.com DBT_DATABRICKS_LOCATION_ROOT=$DBT_DATABRICKS_LOCATION_ROOT DBT_DATABRICKS_HOST_NAME=$DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_HTTP_PATH=$DBT_DATABRICKS_CLUSTER_HTTP_PATH DBT_DATABRICKS_CLIENT_ID=$DBT_DATABRICKS_CLIENT_ID DBT_DATABRICKS_CLIENT_SECRET=$DBT_DATABRICKS_CLIENT_SECRET tox -e integration-databricks-cluster
@@ -3,6 +3,12 @@
 ### Fixes
 
 - Improved legibility of python stack traces ([#434](https://github.com/databricks/dbt-databricks/pull/434)).
+- Update our Databricks Workflow README to make clear that jobs clusters are not supported targets ([#436](https://github.com/databricks/dbt-databricks/pull/436))
+- Relaxed the constraint on databricks-sql-connector to allow newer versions ([#436](https://github.com/databricks/dbt-databricks/pull/436))
+
+### Under the hood
+
+- Switch to running integration tests with OAuth ([#436](https://github.com/databricks/dbt-databricks/pull/436))
 
 ## dbt-databricks 1.6.2 (August 29, 2023)
 

@@ -5,25 +5,34 @@ Databricks Workflows is a highly-reliable, managed orchestrator that lets you au
 In this guide, you will learn how to update an existing dbt project to run as a job, retrieve dbt run artifacts using the Jobs API and debug common issues.
 
 # Overview
+
 When you run a dbt project as a Databricks Job, the dbt CLI runs on a single-node Automated Cluster. The SQL generated by dbt runs on a serverless SQL warehouse.
 
 # Prerequisites
+
 - An existing dbt project version controlled in git
 - Access to a Databricks workspace
-- Ability to launch job clusters (using a policy or cluster create permissions) or access to an existing interactive cluster with `dbt-core` and `dbt-databricks` libraries installed or  `CAN_MANAGE` permissions to install the `dbt-core` and `dbt-databricks` as cluster libraries.
+- Access to an existing interactive cluster with `dbt-core` and `dbt-databricks` libraries installed or `CAN_MANAGE` permissions to install the `dbt-core` and `dbt-databricks` as cluster libraries OR
 - Access to serverless SQL warehouses. See [documentation](https://docs.databricks.com/serverless-compute/index.html) to learn more about this feature and regional availability.
 - [Files in Repos](https://docs.databricks.com/repos/index.html#enable-support-for-arbitrary-files-in-databricks-repos) must be enabled and is only supported on Databricks Runtime (DBR) 8.4+ or DBR 11+ depending on the configuration. Please make sure the cluster has the appropriate DBR version.
 - Install and configure the [Databricks CLI](https://docs.databricks.com/dev-tools/cli/index.html)
 - Install [jq](https://stedolan.github.io/jq/download/), a popular open source tool for parsing JSON from the command line
 
+Note: previously dbt tasks on Databricks Workflows could target jobs clusters for compute.
+That is [no longer supported](https://docs.databricks.com/en/workflows/jobs/how-to/use-dbt-in-workflows.html#advanced-run-dbt-with-a-custom-profile).
+Job clusters can only be used for running the dbt-cli.
+
 # Run dbt as a production job
+
 In this step, you will create a job that will run the dbt project on a schedule.
 
 ## Connect Databricks to Git
+
 The dbt task only supports retrieve dbt projects from Git. Please follow [the documentation](https://docs.databricks.com/repos/index.html#configure-your-git-integration-with-databricks) to connect Databricks to Git.
 
 ## Create a job
-1. Log in to your Databricks workspace 
+
+1. Log in to your Databricks workspace
 2. Click the _Data Science & Engineering_ persona in the left navigation bar
 3. Click _Workflows_
 4. Click _Create Job_
@@ -41,12 +50,14 @@ The dbt task only supports retrieve dbt projects from Git. Please follow [the do
 13. Click _Save_
 
 # Run the job and view dbt output
+
 You can now run your newly-saved job and see its output.
 
 1. Click _Run Now_ on the notification that shows up when you save the job
 2. Click the active run and see dbt output. Note that dbt output is not real-time, it lags behind dbt's progress by several seconds to a minute.
 
 # Retrieve dbt artifacts using the Jobs API
+
 A dbt run generates useful artifacts which you may want to retrieve for analysis and more. Databricks saves the contents of `/logs` and `/target` directories as a compressed archive which you can retrieve using the Jobs API.
 
 > It is currently not possible to refer to a previous run's artifacts e.g. using the `--state` flag. You can, however, include a known good state in your repository.
@@ -77,14 +88,18 @@ $ tar -xvf artifact.tar.gz
 ```
 
 # Common issues
+
 ## Unable to connect to Databricks
+
 - If you do not use the automatically-generated `profiles.yml`, check your Personal Access Token (PAT). It must not be expired.
 - Consider adding `dbt debug` as the first command. This may give you a clue about the failure.
 
 ## dbt cannot find my `dbt_project.yml` file
+
 If you have checked out the Git repository before enabling the _Files in Repos_ feature, the checkout might be cached invalidly. You need to push a dummy commit to your repository to force a fresh checkout.
 
 # Connecting to different sources (custom profile)
+
 By default the dbt task type will connect to the serverless SQL warehouse specified in the task without any configuration changes or need to check in any secrets. It does so by generating a default `profiles.yml` and telling dbt to use it. We have no restrictions on connection to any other dbt targets such as Databricks SQL, Amazon Redshift, Google BigQuery, Snowflake, or any other [supported adapter](https://docs.getdbt.com/docs/available-adapters).
 
 The automatically generated profile can be overridden by specifying an alternative profiles directory in the dbt command using `--profiles-dir <dir>`, where the path of the `<dir>` should be a relative path like `.` or `./my-directory`.
@@ -95,15 +110,15 @@ If you'd like to connect to multiple outputs and including the current Interacti
 
 ```yaml
 databricks_demo:
- target: databricks_cluster
- outputs:
-   databricks_cluster:
-     type: databricks
-     connect_retries: 5
-     connect_timeout: 180
-     schema: "<your-schema>"
-     threads: 8 # This can be increased or decreased to control the parallism
-     host: "{{ env_var('DBT_HOST') }}"
-     http_path: "sql/protocolv1/o/{{ env_var('DBT_ORG_ID') }}/{{ env_var('DBT_CLUSTER_ID') }}"
-     token: "{{ env_var('DBT_ACCESS_TOKEN') }}"
+  target: databricks_cluster
+  outputs:
+    databricks_cluster:
+      type: databricks
+      connect_retries: 5
+      connect_timeout: 180
+      schema: "<your-schema>"
+      threads: 8 # This can be increased or decreased to control the parallism
+      host: "{{ env_var('DBT_HOST') }}"
+      http_path: "sql/protocolv1/o/{{ env_var('DBT_ORG_ID') }}/{{ env_var('DBT_CLUSTER_ID') }}"
+      token: "{{ env_var('DBT_ACCESS_TOKEN') }}"
 ```
@@ -1,4 +1,4 @@
-databricks-sql-connector~=2.7.0
+databricks-sql-connector>=2.9.3, <3.0.0
 dbt-spark==1.6.0
 databricks-sdk==0.1.7
 keyring>=23.13.0
@@ -17,7 +17,9 @@
 except ImportError:
     # the user has a downlevel version of setuptools.
     print("Error: dbt requires setuptools v40.1.0 or higher.")
-    print('Please upgrade setuptools with "pip install --upgrade setuptools" and try again')
+    print(
+        'Please upgrade setuptools with "pip install --upgrade setuptools" and try again'
+    )
     sys.exit(1)
 
 
@@ -29,12 +31,16 @@
 
 # get this package's version from dbt/adapters/<name>/__version__.py
 def _get_plugin_version():
-    _version_path = os.path.join(this_directory, "dbt", "adapters", "databricks", "__version__.py")
+    _version_path = os.path.join(
+        this_directory, "dbt", "adapters", "databricks", "__version__.py"
+    )
     try:
         exec(open(_version_path).read())
         return locals()["version"]
     except IOError:
-        print("Failed to load dbt-databricks version file for packaging.", file=sys.stderr)
+        print(
+            "Failed to load dbt-databricks version file for packaging.", file=sys.stderr
+        )
         sys.exit(-1)
 
 
@@ -55,7 +61,7 @@ def _get_plugin_version():
     include_package_data=True,
     install_requires=[
         "dbt-spark==1.6.0",
-        "databricks-sql-connector~=2.7.0",
+        "databricks-sql-connector>=2.9.3, <3.0.0",
         "databricks-sdk>=0.1.7",
         "keyring>=23.13.0",
     ],