databricks · mingyangge-db · Jan 29, 2025 · Jan 29, 2025 · Jan 29, 2025 · Jan 29, 2025
diff --git a/multi_region_serving/.gitignore b/multi_region_serving/.gitignore
@@ -0,0 +1,3 @@
+.vscode
+.databricks
+.scratch
diff --git a/multi_region_serving/README.md b/multi_region_serving/README.md
@@ -0,0 +1,51 @@
+# Multi-region Serving
+
+This Databricks Asset Bundle (DAB) is an example tool used to sync resources between main 
+workspaces and remote workspaces to simplify the workflow for serving models or features 
+across multiple regions.
+
+## How to use this example
+1. Download this example
+
+2. Make changes as needed. Some files to highlight:
+   * resources/*.job.yml - Job metadata, including parameters.
+   * src/manage_endpoint.ipynb - Notebook for create / update serving endpoints.
+   * src/manage_share.ipynb - Notebook for syncing dependencies of a shared model.
+   * databricks.yml - DAB bundle configuration including target name and workspace URL.
+
+## How to trigger the workflows
+
+1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html
+
+2. Authenticate to your Databricks workspaces, if you have not done so already:
+    ```
+    $ databricks configure
+    ```
+
+3. To deploy a copy to your main workspace:
+    ```
+    $ databricks bundle deploy --target main
+    ```
+    (Note that "main" is the target name defined in databricks.yml)
+
+    This deploys everything that's defined for this project.
+    For example, the default template would deploy a job called
+    `[dev yourname] manage_serving_job` to your workspace.
+    You can find that job by opening your workpace and clicking on **Workflows**.
+
+4. Similarly, to deploy a remote workspace, type:
+   ```
+   $ databricks bundle -t remote1 -p <DATABRICKS_PROFILE> deploy 
+   ```
+
+   Use `-p` to specify the databricks profile used by this command. The profile need to be 
+   configured in `~/.databrickscfg`. 
+
+5. To run the workflow to sync a share, use the "run" command:
+   ```
+   $ databricks bundle -t main -p <DATABRICKS_PROFILE> run manage_share_job
+   ```
+
+6. For documentation on the Databricks asset bundles format used
+   for this project, and for CI/CD configuration, see
+   https://docs.databricks.com/dev-tools/bundles/index.html.
diff --git a/multi_region_serving/databricks.yml b/multi_region_serving/databricks.yml
@@ -0,0 +1,26 @@
+# This is a Databricks asset bundle definition for manage_serving.
+# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
+bundle:
+  name: manage_serving
+
+include:
+  - resources/*.yml
+
+targets:
+  main:
+    # The default target uses 'mode: development' to create a development copy.
+    # - Deployed resources get prefixed with '[dev my_user_name]'
+    # - Any job schedules and triggers are paused by default.
+    # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html.
+    mode: development
+    default: true
+    workspace:
+      host: https://e2-dogfood.staging.cloud.databricks.com
+
+  remote1:
+    # The remote workspace that serves the model
+    mode: development
+    workspace:
+      host: https://e2-dogfood-feature-store.staging.cloud.databricks.com
+
+
diff --git a/multi_region_serving/requirements-dev.txt b/multi_region_serving/requirements-dev.txt
@@ -0,0 +1,29 @@
+## requirements-dev.txt: dependencies for local development.
+##
+## For defining dependencies used by jobs in Databricks Workflows, see
+## https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
+
+## Add code completion support for DLT
+databricks-dlt
+
+## pytest is the default package used for testing
+pytest
+
+## Dependencies for building wheel files
+setuptools
+wheel
+
+## databricks-connect can be used to run parts of this project locally.
+## See https://docs.databricks.com/dev-tools/databricks-connect.html.
+##
+## databricks-connect is automatically installed if you're using Databricks
+## extension for Visual Studio Code
+## (https://docs.databricks.com/dev-tools/vscode-ext/dev-tasks/databricks-connect.html).
+##
+## To manually install databricks-connect, either follow the instructions
+## at https://docs.databricks.com/dev-tools/databricks-connect.html
+## to install the package system-wide. Or uncomment the line below to install a
+## version of db-connect that corresponds to the Databricks Runtime version used
+## for this project.
+#
+# databricks-connect>=15.4,<15.5
diff --git a/multi_region_serving/resources/manage_serving.job.yml b/multi_region_serving/resources/manage_serving.job.yml
@@ -0,0 +1,18 @@
+resources:
+  jobs:
+    manage_serving_job:
+      name: manage_serving_job
+      email_notifications:
+        on_failure:
+          - <YOUR EAMIL ADDRESS>
+      tasks:
+        - task_key: notebook_task
+          notebook_task:
+            notebook_path: ../src/manage_endpoint.ipynb
+      parameters: 
+        - name: endpoint_name
+          default: <ENDPOINT_NAME>
+        - name: model_name
+          default: <MODEL_NAME_IN_UC> 
+        - name: model_version
+          default: "1"
diff --git a/multi_region_serving/resources/manage_share.job.yml b/multi_region_serving/resources/manage_share.job.yml
@@ -0,0 +1,18 @@
+resources:
+  jobs:
+    manage_share_job:
+      name: manage_share_job
+      email_notifications:
+        on_failure:
+          - <YOUR_EMAIL_ADDRESS>
+      tasks:
+        - task_key: notebook_task
+          notebook_task:
+            notebook_path: ../src/manage_share.ipynb
+      parameters: 
+        - name: model_name
+          default: <MODEL_NAME_IN_UC>
+        - name: max_number_of_versions_to_sync
+          default: '10'
+        - name: share_name
+          default: <NAME_OF_THE_SHARE_TO_SYNC>
diff --git a/multi_region_serving/scratch/README.md b/multi_region_serving/scratch/README.md
@@ -0,0 +1,4 @@
+# scratch
+
+This folder is reserved for personal, exploratory notebooks.
+By default these are not committed to Git, as 'scratch' is listed in .gitignore.
diff --git a/multi_region_serving/src/lib/rest_client.py b/multi_region_serving/src/lib/rest_client.py
@@ -0,0 +1,25 @@
+import urllib.request
+import json
+from databricks.sdk.runtime import spark
+
+
+class RestClient:
+    def __init__(self, context):
+        self.base_url = "https://" + spark.conf.get("spark.databricks.workspaceUrl")
+        self.token = context.apiToken().get()
+
+    def get_share_info(self, share_name: str):
+        return self._get(
+            f"api/2.1/unity-catalog/shares/{share_name}?include_shared_data=true"
+        )
+
+    def _get(self, uri):
+        url = f"{self.base_url}/{uri}"
+        headers = {"Authorization": f"Bearer {self.token}"}
+        req = urllib.request.Request(url, headers=headers)
+        try:
+            response = urllib.request.urlopen(req)
+            return json.load(response)
+        except urllib.error.HTTPError as e:
+            result = e.read().decode()
+            print((e.code, result))