From 00d9c00364afa4fa8ecf262a21f090e3731035b7 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Tue, 15 Oct 2024 11:59:07 +0200 Subject: [PATCH] Add example to demonstrate using a custom `sys.path` --- .../job_with_custom_sys_path/.gitignore | 4 ++ .../job_with_custom_sys_path/README.md | 43 ++++++++++++++++ .../job_with_custom_sys_path/config/dev.json | 1 + .../job_with_custom_sys_path/config/prod.json | 1 + .../job_with_custom_sys_path/config/test.json | 1 + .../job_with_custom_sys_path/databricks.yml | 20 ++++++++ .../my_custom_library/__init__.py | 5 ++ .../my_custom_library/loader.py | 15 ++++++ .../my_custom_library/parameters.py | 31 ++++++++++++ .../job_with_custom_sys_path/requirements.txt | 2 + .../print_bundle_configuration.job.yml | 18 +++++++ .../job_with_custom_sys_path/src/print.ipynb | 49 +++++++++++++++++++ .../tests/test_load_configuration.py | 34 +++++++++++++ 13 files changed, 224 insertions(+) create mode 100644 knowledge_base/job_with_custom_sys_path/.gitignore create mode 100644 knowledge_base/job_with_custom_sys_path/README.md create mode 100644 knowledge_base/job_with_custom_sys_path/config/dev.json create mode 100644 knowledge_base/job_with_custom_sys_path/config/prod.json create mode 100644 knowledge_base/job_with_custom_sys_path/config/test.json create mode 100644 knowledge_base/job_with_custom_sys_path/databricks.yml create mode 100644 knowledge_base/job_with_custom_sys_path/my_custom_library/__init__.py create mode 100644 knowledge_base/job_with_custom_sys_path/my_custom_library/loader.py create mode 100644 knowledge_base/job_with_custom_sys_path/my_custom_library/parameters.py create mode 100644 knowledge_base/job_with_custom_sys_path/requirements.txt create mode 100644 knowledge_base/job_with_custom_sys_path/resources/print_bundle_configuration.job.yml create mode 100644 knowledge_base/job_with_custom_sys_path/src/print.ipynb create mode 100644 knowledge_base/job_with_custom_sys_path/tests/test_load_configuration.py diff --git a/knowledge_base/job_with_custom_sys_path/.gitignore b/knowledge_base/job_with_custom_sys_path/.gitignore new file mode 100644 index 0000000..391378e --- /dev/null +++ b/knowledge_base/job_with_custom_sys_path/.gitignore @@ -0,0 +1,4 @@ +/.databricks +/.venv +/.vscode +__pycache__ diff --git a/knowledge_base/job_with_custom_sys_path/README.md b/knowledge_base/job_with_custom_sys_path/README.md new file mode 100644 index 0000000..e8029a1 --- /dev/null +++ b/knowledge_base/job_with_custom_sys_path/README.md @@ -0,0 +1,43 @@ +# Job with custom `sys.path` + +This example demonstrates how to: +1. Define a job that takes parameters with values that derive from the bundle. +2. Use the path parameter to augment Python's `sys.path` to import a module from the bundle. +3. Access job parameters from the imported module. + +## Prerequisites + +* Databricks CLI v0.230.0 or above + +## Usage + +This example includes a unit test for the function defined under `my_custom_library` that you can execute on your machine. + +```bash +# Setup a virtual environment +uv venv +source .venv/bin/activate +uv pip install -r ./requirements.txt + +# Run the unit test +python -m pytest +``` + +To deploy the bundle to Databricks, follow these steps: + +* Update the `host` field under `workspace` in `databricks.yml` to the Databricks workspace you wish to deploy to. +* Run `databricks bundle deploy` to deploy the job. +* Run `databricks bundle run print_bundle_configuration` to run the job. + +Example output: + +``` +% databricks bundle run print_bundle_configuration +Run URL: https://... + +2024-10-15 11:48:43 "[dev pieter_noordhuis] Example to demonstrate job parameterization" TERMINATED SUCCESS +``` + +Navigate to the run URL to observe the output of the loaded configuration file. + +You can execute the same steps for the `prod` target. diff --git a/knowledge_base/job_with_custom_sys_path/config/dev.json b/knowledge_base/job_with_custom_sys_path/config/dev.json new file mode 100644 index 0000000..65ebb2a --- /dev/null +++ b/knowledge_base/job_with_custom_sys_path/config/dev.json @@ -0,0 +1 @@ +[ "this is my development config" ] diff --git a/knowledge_base/job_with_custom_sys_path/config/prod.json b/knowledge_base/job_with_custom_sys_path/config/prod.json new file mode 100644 index 0000000..fc69159 --- /dev/null +++ b/knowledge_base/job_with_custom_sys_path/config/prod.json @@ -0,0 +1 @@ +[ "this is my production config" ] diff --git a/knowledge_base/job_with_custom_sys_path/config/test.json b/knowledge_base/job_with_custom_sys_path/config/test.json new file mode 100644 index 0000000..6eedd39 --- /dev/null +++ b/knowledge_base/job_with_custom_sys_path/config/test.json @@ -0,0 +1 @@ +[ "this is my test config" ] diff --git a/knowledge_base/job_with_custom_sys_path/databricks.yml b/knowledge_base/job_with_custom_sys_path/databricks.yml new file mode 100644 index 0000000..0b99963 --- /dev/null +++ b/knowledge_base/job_with_custom_sys_path/databricks.yml @@ -0,0 +1,20 @@ +bundle: + name: job_with_custom_sys_path + +include: + - ./resources/*.job.yml + +workspace: + host: https://e2-dogfood.staging.cloud.databricks.com + +targets: + dev: + default: true + mode: development + + prod: + mode: production + + # Production mode requires explicit configuration of the identity to use to run the job. + run_as: + user_name: "${workspace.current_user.userName}" diff --git a/knowledge_base/job_with_custom_sys_path/my_custom_library/__init__.py b/knowledge_base/job_with_custom_sys_path/my_custom_library/__init__.py new file mode 100644 index 0000000..4b39b6f --- /dev/null +++ b/knowledge_base/job_with_custom_sys_path/my_custom_library/__init__.py @@ -0,0 +1,5 @@ +from .loader import load_configuration + +__all__ = [ + "load_configuration", +] diff --git a/knowledge_base/job_with_custom_sys_path/my_custom_library/loader.py b/knowledge_base/job_with_custom_sys_path/my_custom_library/loader.py new file mode 100644 index 0000000..dfb9f80 --- /dev/null +++ b/knowledge_base/job_with_custom_sys_path/my_custom_library/loader.py @@ -0,0 +1,15 @@ +import json +from os import path + +from my_custom_library import parameters + + +def load_configuration() -> any: + """ + Load the configuration file for the bundle target. + """ + config_file_path = path.join( + parameters.bundle_file_path(), "config", f"{parameters.bundle_target()}.json" + ) + with open(config_file_path, "r") as file: + return json.load(file) diff --git a/knowledge_base/job_with_custom_sys_path/my_custom_library/parameters.py b/knowledge_base/job_with_custom_sys_path/my_custom_library/parameters.py new file mode 100644 index 0000000..08d0a02 --- /dev/null +++ b/knowledge_base/job_with_custom_sys_path/my_custom_library/parameters.py @@ -0,0 +1,31 @@ +from functools import cache + + +@cache +def bundle_file_path() -> str: + """ + Return the bundle file path. + + This function expects a job parameter called "bundle_file_path" to be set. + + It is mocked during testing. + + The dbutils import is done inside the function so it is omitted when run locally. + """ + from databricks.sdk.runtime import dbutils + return dbutils.widgets.get("bundle_file_path") + + +@cache +def bundle_target() -> str: + """ + Return the bundle target. + + This function expects a job parameter called "bundle_target" to be set. + + It is mocked during testing. + + The dbutils import is done inside the function so it is omitted when run locally. + """ + from databricks.sdk.runtime import dbutils + return dbutils.widgets.get("bundle_target") diff --git a/knowledge_base/job_with_custom_sys_path/requirements.txt b/knowledge_base/job_with_custom_sys_path/requirements.txt new file mode 100644 index 0000000..61eb408 --- /dev/null +++ b/knowledge_base/job_with_custom_sys_path/requirements.txt @@ -0,0 +1,2 @@ +databricks-sdk +pytest diff --git a/knowledge_base/job_with_custom_sys_path/resources/print_bundle_configuration.job.yml b/knowledge_base/job_with_custom_sys_path/resources/print_bundle_configuration.job.yml new file mode 100644 index 0000000..fb663d0 --- /dev/null +++ b/knowledge_base/job_with_custom_sys_path/resources/print_bundle_configuration.job.yml @@ -0,0 +1,18 @@ +resources: + jobs: + print_bundle_configuration: + name: Example to demonstrate job parameterization + + parameters: + - # The bundle deployment's root file path in the workspace. + name: "bundle_file_path" + default: "${workspace.file_path}" + + - # The bundle target name (e.g. "dev" or "prod"). + name: "bundle_target" + default: "${bundle.target}" + + tasks: + - task_key: print + notebook_task: + notebook_path: ../src/print.ipynb diff --git a/knowledge_base/job_with_custom_sys_path/src/print.ipynb b/knowledge_base/job_with_custom_sys_path/src/print.ipynb new file mode 100644 index 0000000..50e0901 --- /dev/null +++ b/knowledge_base/job_with_custom_sys_path/src/print.ipynb @@ -0,0 +1,49 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The below cell retrieves the path to this bundle's deployment file path,\n", + "and adds it to the Python path." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "from databricks.sdk.runtime import dbutils\n", + "bundle_file_path = dbutils.widgets.get(\"bundle_file_path\")\n", + "sys.path.append(bundle_file_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from my_custom_library import load_configuration\n", + "from pprint import pprint\n", + "\n", + "pprint(load_configuration())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/knowledge_base/job_with_custom_sys_path/tests/test_load_configuration.py b/knowledge_base/job_with_custom_sys_path/tests/test_load_configuration.py new file mode 100644 index 0000000..cb159f2 --- /dev/null +++ b/knowledge_base/job_with_custom_sys_path/tests/test_load_configuration.py @@ -0,0 +1,34 @@ +from os import path + +import my_custom_library +import my_custom_library.parameters + + +def mock_bundle_file_path(monkeypatch): + def mock(): + return path.join(path.dirname(__file__), "..") + + monkeypatch.setattr( + my_custom_library.parameters, + "bundle_file_path", + mock, + ) + + +def mock_bundle_target(monkeypatch): + def mock(): + return "test" + + monkeypatch.setattr( + my_custom_library.parameters, + "bundle_target", + mock, + ) + + +def test_load_configuration(monkeypatch): + mock_bundle_file_path(monkeypatch) + mock_bundle_target(monkeypatch) + + configuration = my_custom_library.load_configuration() + assert configuration == ["this is my test config"]