From fbd8a92c01b2419c9da21366f0e0a5e455a52dc6 Mon Sep 17 00:00:00 2001
From: Lennart Kats <lennart.kats@databricks.com>
Date: Sun, 17 Nov 2024 21:35:04 +0100
Subject: [PATCH 01/18] Add initial data-engineering template

---
 contrib/README.md                             | 43 ++++++++++
 contrib/templates/README.md                   | 10 +++
 contrib/templates/data-engineering/README.md  | 10 +++
 .../databricks_template_schema.json           | 46 +++++++++++
 .../{{.pipeline_name}}/explorations/README.md |  4 +
 .../explorations/exploration.ipynb.tmpl       | 51 ++++++++++++
 .../assets/{{.pipeline_name}}/main.py         |  3 +
 .../{{.pipeline_name}}/sources/dev/taxis.py   | 10 +++
 .../{{.pipeline_name}}/sources/prod/taxis.py  |  8 ++
 .../tests/taxi_stats_test.py                  |  7 ++
 .../transformations/__init__.py               |  9 +++
 .../transformations/taxi_stats.py             | 23 ++++++
 .../{{.pipeline_name}}.job.yml.tmpl           | 24 ++++++
 .../{{.pipeline_name}}.pipeline.yml.tmpl      | 17 ++++
 .../databricks_template_schema.json           | 10 +++
 .../job/databricks_template_schema.json       | 10 +++
 .../base/databricks_template_schema.json      | 46 +++++++++++
 .../.vscode/__builtins__.pyi                  |  3 +
 .../{{.project_name}}/.vscode/extensions.json |  7 ++
 .../.vscode/settings.json.tmpl                | 22 ++++++
 .../template/{{.project_name}}/README.md.tmpl | 79 +++++++++++++++++++
 .../{{.project_name}}/assets/README.md        |  4 +
 .../template/{{.project_name}}/conftest.py    | 36 +++++++++
 .../{{.project_name}}/databricks.yml.tmpl     | 50 ++++++++++++
 .../template/{{.project_name}}/pyproject.toml | 22 ++++++
 .../{{.project_name}}/scripts/add_asset.py    | 46 +++++++++++
 .../{{.project_name}}/scripts/test.py         | 15 ++++
 27 files changed, 615 insertions(+)
 create mode 100644 contrib/README.md
 create mode 100644 contrib/templates/README.md
 create mode 100644 contrib/templates/data-engineering/README.md
 create mode 100644 contrib/templates/data-engineering/assets/etl-pipeline/databricks_template_schema.json
 create mode 100644 contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/explorations/README.md
 create mode 100644 contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/explorations/exploration.ipynb.tmpl
 create mode 100644 contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/main.py
 create mode 100644 contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/sources/dev/taxis.py
 create mode 100644 contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/sources/prod/taxis.py
 create mode 100644 contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/tests/taxi_stats_test.py
 create mode 100644 contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/transformations/__init__.py
 create mode 100644 contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/transformations/taxi_stats.py
 create mode 100644 contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/{{.pipeline_name}}.job.yml.tmpl
 create mode 100644 contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/{{.pipeline_name}}.pipeline.yml.tmpl
 create mode 100644 contrib/templates/data-engineering/assets/ingest-pipeline/databricks_template_schema.json
 create mode 100644 contrib/templates/data-engineering/assets/job/databricks_template_schema.json
 create mode 100644 contrib/templates/data-engineering/base/databricks_template_schema.json
 create mode 100644 contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/__builtins__.pyi
 create mode 100644 contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/extensions.json
 create mode 100644 contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/settings.json.tmpl
 create mode 100644 contrib/templates/data-engineering/base/template/{{.project_name}}/README.md.tmpl
 create mode 100644 contrib/templates/data-engineering/base/template/{{.project_name}}/assets/README.md
 create mode 100644 contrib/templates/data-engineering/base/template/{{.project_name}}/conftest.py
 create mode 100644 contrib/templates/data-engineering/base/template/{{.project_name}}/databricks.yml.tmpl
 create mode 100644 contrib/templates/data-engineering/base/template/{{.project_name}}/pyproject.toml
 create mode 100644 contrib/templates/data-engineering/base/template/{{.project_name}}/scripts/add_asset.py
 create mode 100644 contrib/templates/data-engineering/base/template/{{.project_name}}/scripts/test.py

diff --git a/contrib/README.md b/contrib/README.md
new file mode 100644
index 0000000..252d132
--- /dev/null
+++ b/contrib/README.md
@@ -0,0 +1,43 @@
+# Contrib Directory
+
+The `contrib` directory contains additional community-contributed examples and resources for Databricks Asset Bundles. These examples may include:
+
+- Custom configurations and extensions
+- Advanced usage patterns
+- Tools or utilities for enhancing Databricks Asset Bundles workflows
+
+## Structure
+
+Each contribution should be organized into its own subdirectory within `contrib/`.
+Templates should go under `contrib/templates/`. For example:
+
+```
+contrib/
+├── awesome-bundle/
+│   ├── README.md
+│   ├── databricks.yml
+│   └── ...
+└── templates/
+    └── awesome-template/
+        ├── README.md
+        ├── databricks_template_schema.json
+        ├── library/
+        │   └── ...
+        └── template/
+            └── ...
+```
+
+## How to Use Contributions
+
+To use or explore a contributed example, navigate to its subdirectory and follow the instructions in its `README.md` file. Each example should provide details on setup, configuration, and usage.
+
+## Contributing
+
+If you would like to add your own examples or resources, please:
+1. Create a new directory under `contrib/` with a descriptive name.
+2. Include a `README.md` file explaining the contribution.
+3. Ensure that any necessary configuration files, scripts, or dependencies are included.
+
+For more information on Databricks Asset Bundles, see:
+- [Public Preview Announcement](https://www.databricks.com/blog/announcing-public-preview-databricks-asset-bundles-apply-software-development-best-practices)
+- [Databricks Asset Bundles Documentation](https://docs.databricks.com/dev-tools/bundles/index.html)
\ No newline at end of file
diff --git a/contrib/templates/README.md b/contrib/templates/README.md
new file mode 100644
index 0000000..38d1654
--- /dev/null
+++ b/contrib/templates/README.md
@@ -0,0 +1,10 @@
+# Contrib/Templates directory
+
+This directory community-contributed templates.
+
+See https://github.com/databricks/bundle-examples/blob/main/contrib/README.md for
+about community contributions.
+
+Looking to contribute? See https://github.com/databricks/cli/tree/main/libs/template/templates
+for inspiration. These are the standard templates that are included with the
+Databricks CLI.
diff --git a/contrib/templates/data-engineering/README.md b/contrib/templates/data-engineering/README.md
new file mode 100644
index 0000000..e1f892b
--- /dev/null
+++ b/contrib/templates/data-engineering/README.md
@@ -0,0 +1,10 @@
+# data-engineering template
+
+This template introduces a new structure for organizing data-engineering
+assets in DABs.
+
+Install it using
+
+```
+databricks bundle init https://github.com/databricks/bundle-examples/tree/main/contrib/templates/data-engineering/base
+```
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/databricks_template_schema.json b/contrib/templates/data-engineering/assets/etl-pipeline/databricks_template_schema.json
new file mode 100644
index 0000000..774c5cb
--- /dev/null
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/databricks_template_schema.json
@@ -0,0 +1,46 @@
+{
+    "welcome_message": "\nWelcome to the data-engineering pipeline template!",
+    "properties": {
+        "pipeline_name": {
+            "type": "string",
+            "description": "\nPlease provide the name of the pipeline to generate.\npipeline_name",
+            "default": "etl_pipeline",
+            "order": 1
+        },
+        "format": {
+            "type": "string",
+            "description": "\nPlease select the format to use to define this pipeline.\nformat",
+            "order": 2,
+            "enum": [
+                "python files",
+                "sql files",
+                "notebooks"
+            ],
+            "default": "python files"
+        },
+        "only_python_files_supported": {
+            "skip_prompt_if": {
+                "properties": {
+                    "format": {
+                        "pattern": "python files"
+                    }
+                }
+            },
+            "default": "ignored",
+            "type": "string",
+            "description": "{{fail Only Python files are supported in this template at this time.}}",
+            "order": 3
+        },
+        "include_job": {
+            "type": "string",
+            "description": "\nWould you like to include a job that automatically triggers this pipeline?\nThis trigger will only be enabled for production deployments.\ninclude_job",
+            "order": 4,
+            "enum": [
+                "yes",
+                "no"
+            ],
+            "default": "yes"
+        }
+    },
+    "success_message": "\n\n🪠 New pipeline definition generated under 'assets/{{.pipeline_name}}'!"
+}
\ No newline at end of file
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/explorations/README.md b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/explorations/README.md
new file mode 100644
index 0000000..e6cfb81
--- /dev/null
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/explorations/README.md
@@ -0,0 +1,4 @@
+# scratch
+
+This folder is reserved for personal, exploratory notebooks.
+By default these are not committed to Git, as 'scratch' is listed in .gitignore.
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/explorations/exploration.ipynb.tmpl b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/explorations/exploration.ipynb.tmpl
new file mode 100644
index 0000000..560703c
--- /dev/null
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/explorations/exploration.ipynb.tmpl
@@ -0,0 +1,51 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.append('../transformations')\n",
+    "\n",
+    "\n",
+    "spark = SparkSession.builder.getOrCreate()\n",
+    "spark.sql('SELECT * FROM taxi_stats').show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "application/vnd.databricks.v1+notebook": {
+   "dashboards": [],
+   "language": "python",
+   "notebookMetadata": {
+    "pythonIndentUnit": 2
+   },
+   "notebookName": "ipynb-notebook",
+   "widgets": {}
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/main.py b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/main.py
new file mode 100644
index 0000000..72d3f5c
--- /dev/null
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/main.py
@@ -0,0 +1,3 @@
+# This is the entry point for the {{.pipeline_name}} pipeline.
+# It makes sure all transformations in the transformations directory are included.
+import transformations
\ No newline at end of file
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/sources/dev/taxis.py b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/sources/dev/taxis.py
new file mode 100644
index 0000000..ed2a28c
--- /dev/null
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/sources/dev/taxis.py
@@ -0,0 +1,10 @@
+import dlt
+from pyspark.sql import SparkSession, DataFrame
+
+
+@dlt.view(
+    comment="Small set of taxis for development (uses LIMIT 10)"
+)
+def taxis() -> DataFrame:
+    spark = SparkSession.builder.getOrCreate()
+    return spark.sql("SELECT * FROM samples.nyctaxi.trips LIMIT 10")
\ No newline at end of file
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/sources/prod/taxis.py b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/sources/prod/taxis.py
new file mode 100644
index 0000000..6c22c80
--- /dev/null
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/sources/prod/taxis.py
@@ -0,0 +1,8 @@
+import dlt
+from pyspark.sql import SparkSession, DataFrame
+
+
+@dlt.view
+def taxis() -> DataFrame:
+    spark = SparkSession.builder.getOrCreate()
+    return spark.sql("SELECT * FROM samples.nyctaxi.trips")
\ No newline at end of file
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/tests/taxi_stats_test.py b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/tests/taxi_stats_test.py
new file mode 100644
index 0000000..35daa25
--- /dev/null
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/tests/taxi_stats_test.py
@@ -0,0 +1,7 @@
+from sources.dev.taxis import taxis
+from transformations import taxi_stats
+
+
+def test_taxi_stats():
+    result = taxi_stats.filter_taxis(taxis())
+    assert len(result.collect()) > 5
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/transformations/__init__.py b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/transformations/__init__.py
new file mode 100644
index 0000000..26b7072
--- /dev/null
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/transformations/__init__.py
@@ -0,0 +1,9 @@
+# __init__.py defines the 'transformations' Python package
+import importlib
+import pkgutil
+
+
+# Import all modules in the package except those starting with '_', like '__init__.py'
+for _, module_name, _ in pkgutil.iter_modules(__path__):
+    if not module_name.startswith("_"):
+        importlib.import_module(f"{__name__}.{module_name}")
\ No newline at end of file
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/transformations/taxi_stats.py b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/transformations/taxi_stats.py
new file mode 100644
index 0000000..7c979fb
--- /dev/null
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/transformations/taxi_stats.py
@@ -0,0 +1,23 @@
+import dlt
+from pyspark.sql.functions import to_date, count
+from pyspark.sql import DataFrame
+
+
+@dlt.table(
+    comment="Daily statistics of NYC Taxi trips"
+)
+def taxi_stats() -> DataFrame:
+    """ Read from the 'taxis' view from etl_pipeline/sources. """
+    taxis = dlt.read("taxis")
+    
+    return filter_taxis(taxis)
+
+
+def filter_taxis(taxis: DataFrame) -> DataFrame:
+    """ Group by date and calculate the number of trips. """
+    return (
+        taxis
+        .withColumn("pickup_date", to_date("tpep_pickup_datetime"))
+        .groupBy("pickup_date")
+        .agg(count("*").alias("number_of_trips"))
+    )
\ No newline at end of file
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/{{.pipeline_name}}.job.yml.tmpl b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/{{.pipeline_name}}.job.yml.tmpl
new file mode 100644
index 0000000..a7af118
--- /dev/null
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/{{.pipeline_name}}.job.yml.tmpl
@@ -0,0 +1,24 @@
+# The job that triggers {{.pipeline_name}}.
+resources:
+  jobs:
+    {{.pipeline_name}}_job:
+      name: {{.pipeline_name}}_job
+
+      trigger:
+        # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger
+        periodic:
+          interval: 1
+          unit: DAYS
+
+      {{- if not is_service_principal}}
+
+      email_notifications:
+        on_failure:
+          - {{user_name}}
+
+      {{- end}}
+
+      tasks:
+        - task_key: refresh_pipeline
+          pipeline_task:
+            pipeline_id: ${resources.pipelines.{{.pipeline_name}}.id}
\ No newline at end of file
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/{{.pipeline_name}}.pipeline.yml.tmpl b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/{{.pipeline_name}}.pipeline.yml.tmpl
new file mode 100644
index 0000000..a1fba4b
--- /dev/null
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/{{.pipeline_name}}.pipeline.yml.tmpl
@@ -0,0 +1,17 @@
+resources:
+  pipelines:
+    {{.pipeline_name}}:
+      name: {{.pipeline_name}}
+      serverless: true
+      {{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}}
+      ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog:
+      # catalog: catalog_name
+      {{- else}}
+      catalog: {{default_catalog}}
+      {{- end}}
+      target: {{.pipeline_name}}_${bundle.environment}
+      libraries:
+        - file:
+            path: sources/${bundle.target}/*.py
+        - file:
+            path: main.py
\ No newline at end of file
diff --git a/contrib/templates/data-engineering/assets/ingest-pipeline/databricks_template_schema.json b/contrib/templates/data-engineering/assets/ingest-pipeline/databricks_template_schema.json
new file mode 100644
index 0000000..df21996
--- /dev/null
+++ b/contrib/templates/data-engineering/assets/ingest-pipeline/databricks_template_schema.json
@@ -0,0 +1,10 @@
+{
+    "welcome_message": "\nWelcome to the data-engineering ingest-pipeline template!",
+    "properties": {
+        "pipeline_name": {
+            "type": "string",
+            "description": "\n{{fail \"The ingest-pipeline is not yet implemented.\"}}",
+            "order": 3
+        }
+    }
+}
diff --git a/contrib/templates/data-engineering/assets/job/databricks_template_schema.json b/contrib/templates/data-engineering/assets/job/databricks_template_schema.json
new file mode 100644
index 0000000..afcf5b6
--- /dev/null
+++ b/contrib/templates/data-engineering/assets/job/databricks_template_schema.json
@@ -0,0 +1,10 @@
+{
+    "welcome_message": "\nWelcome to the data-engineering job resource template!",
+    "properties": {
+        "pipeline_name": {
+            "type": "string",
+            "description": "\n{{fail \"The ingest-pipeline is not yet implemented.\"}}",
+            "order": 3
+        }
+    }
+}
diff --git a/contrib/templates/data-engineering/base/databricks_template_schema.json b/contrib/templates/data-engineering/base/databricks_template_schema.json
new file mode 100644
index 0000000..debf4e1
--- /dev/null
+++ b/contrib/templates/data-engineering/base/databricks_template_schema.json
@@ -0,0 +1,46 @@
+{
+    "welcome_message": "\nWelcome to the pipeline-folders template for Databricks Asset Bundles!",
+    "properties": {
+        "project_name": {
+            "type": "string",
+            "default": "my_data_project",
+            "description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project\nproject_name",
+            "order": 1,
+            "pattern": "^[A-Za-z0-9_]+$",
+            "pattern_match_failure_message": "Name must consist of letters, numbers, and underscores."
+        },
+        "default_catalog": {
+            "type": "string",
+            "default": "{{default_catalog}}",
+            "pattern": "^\\w*$",
+            "pattern_match_failure_message": "Invalid catalog name.",
+            "description": "\nPlease provide an initial catalog{{if eq (default_catalog) \"\"}} (leave blank when not using Unity Catalog){{end}}.\ndefault_catalog",
+            "order": 2
+        },
+        "personal_schemas": {
+            "type": "string",
+            "description": "\nWould you like to use a personal schema for each user working on this project? (e.g., 'catalog.{{short_name}}')\npersonal_schemas",
+            "enum": [
+                "yes, use a schema based on the current user name during development",
+                "no, use a shared schema during development"
+            ],
+            "order": 3
+        },
+        "shared_schema": {
+            "skip_prompt_if": {
+                "properties": {
+                    "personal_schemas": {
+                        "const": "yes, use a schema based on the current user name during development"
+                    }
+                }
+            },
+            "type": "string",
+            "default": "default",
+            "pattern": "^\\w+$",
+            "pattern_match_failure_message": "Invalid schema name.",
+            "description": "\nPlease provide an initial schema during development.\ndefault_schema",
+            "order": 4
+        }
+    },
+    "success_message": "\n\nYour new project has been created in the '{{.project_name}}' directory!\n\nTo add an example asset to your project, use\n\n  $ databricks bundle init https://github.com/databricks/bundle-examples/tree/main/contrib/templates/data-engineering/assets/etl-pipeline\n\nRefer to the README.md file for full \"getting started\" instructions!"
+}
\ No newline at end of file
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/__builtins__.pyi b/contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/__builtins__.pyi
new file mode 100644
index 0000000..0edd518
--- /dev/null
+++ b/contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/__builtins__.pyi
@@ -0,0 +1,3 @@
+# Typings for Pylance in Visual Studio Code
+# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md
+from databricks.sdk.runtime import *
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/extensions.json b/contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/extensions.json
new file mode 100644
index 0000000..5d15eba
--- /dev/null
+++ b/contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/extensions.json
@@ -0,0 +1,7 @@
+{
+    "recommendations": [
+        "databricks.databricks",
+        "ms-python.vscode-pylance",
+        "redhat.vscode-yaml"
+    ]
+}
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/settings.json.tmpl b/contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/settings.json.tmpl
new file mode 100644
index 0000000..380587b
--- /dev/null
+++ b/contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/settings.json.tmpl
@@ -0,0 +1,22 @@
+{
+    "python.analysis.stubPath": ".vscode",
+    "databricks.python.envFile": "${workspaceFolder}/.env",
+    "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
+    "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
+    "python.testing.pytestArgs": [
+        "."
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true,
+    {{- /* Unfortunately extraPaths doesn't support globs!! See: https://github.com/microsoft/pylance-release/issues/973 */ -}}
+    "python.analysis.extraPaths": ["assets/etl_pipeline"],
+    "files.exclude": {
+        "**/*.egg-info": true,
+        "**/__pycache__": true,
+        ".pytest_cache": true,
+    },
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter",
+        "editor.formatOnSave": true,
+    },
+}
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/README.md.tmpl b/contrib/templates/data-engineering/base/template/{{.project_name}}/README.md.tmpl
new file mode 100644
index 0000000..e83e65c
--- /dev/null
+++ b/contrib/templates/data-engineering/base/template/{{.project_name}}/README.md.tmpl
@@ -0,0 +1,79 @@
+# {{.project_name}}
+
+The '{{.project_name}}' project was generated by using the data-engineering template.
+
+## Setup
+
+1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html
+
+2. Authenticate to your Databricks workspace, if you have not done so already:
+    ```
+    $ databricks auth login
+    ```
+
+3. We recommend the UV package manager to install project dependencies. It's a drop-in replacement for `pip`.
+   See https://docs.astral.sh/uv/getting-started/installation/ for full installation instructions,
+   or run:
+   ```
+   $ pip install uv
+   ```
+
+4. Install all project dependencies:
+   ```
+   $ uv sync
+   ```
+
+   See the "Running unit tests" below for more on testing.
+
+5. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from
+   https://docs.databricks.com/dev-tools/vscode-ext.html. Or the PyCharm plugin from
+   https://www.databricks.com/blog/announcing-pycharm-integration-databricks.
+
+## Adding assets such as pipelines and jobs
+
+By default, the data-engineering template does not include any assets.
+
+1. To add an asset, run the `add-asset` script:
+   ```
+   $ uv run add-asset
+   ```
+
+2. Optionally, run all tests on serverless compute after adding an asset:
+   ```
+   $ uv run test
+   ```
+
+## Deploying assets
+
+1. To deploy a development copy of this project, type:
+    ```
+    $ databricks bundle deploy --target dev
+    ```
+    (Note that "dev" is the default target, so the `--target` parameter
+    is optional here.)
+
+2. Similarly, to deploy a production copy, type:
+   ```
+   $ databricks bundle deploy --target prod
+   ```
+
+3. Use the "summary" comand to review everything that was deployed:
+   ```
+   $ databricks bundle summary
+   ```
+
+4. To run a job or pipeline, use the "run" command:
+   ```
+   $ databricks bundle run
+   ```
+
+## Running unit tests
+
+1. Run tests on a serverless environment using:
+   ```
+   $ uv run test
+   ```
+
+2. Optionally, to run unit tests in a different environment, such as on a cluster,
+   please refer to the documentation of DB connect at
+   https://docs.databricks.com/en/dev-tools/databricks-connect/python/install.html
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/assets/README.md b/contrib/templates/data-engineering/base/template/{{.project_name}}/assets/README.md
new file mode 100644
index 0000000..f6c8907
--- /dev/null
+++ b/contrib/templates/data-engineering/base/template/{{.project_name}}/assets/README.md
@@ -0,0 +1,4 @@
+This folder is reserved for Databricks Asset Bundles definitions.
+
+New jobs and pipelines should conventions from the 'data-engineering' template.
+See https://github.com/databricks/bundle-examples/blob/main/contrib/templates/data-engineering/README.md.
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/conftest.py b/contrib/templates/data-engineering/base/template/{{.project_name}}/conftest.py
new file mode 100644
index 0000000..4a49bb1
--- /dev/null
+++ b/contrib/templates/data-engineering/base/template/{{.project_name}}/conftest.py
@@ -0,0 +1,36 @@
+# conftest.py is used to configure pytest
+import os
+import sys
+import dlt
+import pathlib
+import pytest
+import warnings
+from pyspark.sql import SparkSession
+from databricks.connect import DatabricksSession
+
+# Dynamically find and add all `assets/*` directories to `sys.path`
+for path in pathlib.Path("assets").glob("*"):
+    resolved_path = str(path.resolve())
+    if resolved_path not in sys.path:
+        sys.path.append(resolved_path)
+
+# Work around issues in older databricks-connect
+SparkSession.builder = DatabricksSession.builder
+os.environ.pop("SPARK_REMOTE", None)
+
+# Make dlt.views in 'sources/dev' available for tests
+warnings.filterwarnings(
+    "ignore",
+    message="This is a stub that only contains the interfaces to Delta Live Tables.*",
+    category=UserWarning,
+)
+dlt.enable_local_execution()
+dlt.view = lambda func=None, *args, **kwargs: func or (lambda f: f)
+
+
+# Provide a 'spark' fixture for tests and make sure the session is eagerly initialized
+@pytest.fixture(scope="session", autouse=True)
+def spark() -> SparkSession:
+    if hasattr(DatabricksSession.builder, "validateSession"):
+        return DatabricksSession.builder.validateSession().getOrCreate()
+    return DatabricksSession.builder.getOrCreate()
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/databricks.yml.tmpl b/contrib/templates/data-engineering/base/template/{{.project_name}}/databricks.yml.tmpl
new file mode 100644
index 0000000..420dded
--- /dev/null
+++ b/contrib/templates/data-engineering/base/template/{{.project_name}}/databricks.yml.tmpl
@@ -0,0 +1,50 @@
+# This is a Databricks asset bundle definition for {{.project_name}}.
+# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
+bundle:
+  name: {{.project_name}}
+
+include:
+  - assets/*.yml
+  - assets/*/*.yml
+
+variables:
+  catalog:
+    description: The catalog to use
+  schema:
+    description: The schema to use
+
+{{- $dev_schema := .shared_schema }}
+{{- $prod_schema := .shared_schema }}
+{{- if (regexp "^yes").MatchString .personal_schemas}}
+  {{- $dev_schema = "${workspace.current_user.short_name}"}}
+  {{- $prod_schema = "default"}}
+{{- end}}
+
+targets:
+  dev:
+    # The default target uses 'mode: development' to create a development copy.
+    # - Deployed resources get prefixed with '[dev my_user_name]'
+    # - Any job schedules and triggers are paused by default.
+    # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html.
+    mode: development
+    default: true
+    workspace:
+      host: {{workspace_host}}
+    variables:
+      catalog: {{.default_catalog}}
+      schema: {{$dev_schema}}
+
+  prod:
+    mode: production
+    workspace:
+      host: {{workspace_host}}
+      # We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy.
+      root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target}
+    permissions:
+      - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}
+        level: CAN_MANAGE
+    run_as:
+      {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}
+    variables:
+      catalog: {{.default_catalog}}
+      schema: {{$prod_schema}}
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/pyproject.toml b/contrib/templates/data-engineering/base/template/{{.project_name}}/pyproject.toml
new file mode 100644
index 0000000..b04668c
--- /dev/null
+++ b/contrib/templates/data-engineering/base/template/{{.project_name}}/pyproject.toml
@@ -0,0 +1,22 @@
+[project]
+name = "my_data_project"
+version = "0.1.0"
+description = "Databricks ETL pipeline project"
+requires-python = ">=3.10" 
+dependencies = [
+    "databricks-dlt",
+    "pytest",
+    "setuptools",
+    "wheel",
+    "databricks-connect==15.1.*",
+]
+
+[project.scripts]
+add-asset = "scripts.add_asset:main"
+test = "scripts.test:main"
+
+[tool.uv]
+package = true
+
+[tool.setuptools.packages.find]
+include = ["scripts"]
\ No newline at end of file
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/scripts/add_asset.py b/contrib/templates/data-engineering/base/template/{{.project_name}}/scripts/add_asset.py
new file mode 100644
index 0000000..34c0f13
--- /dev/null
+++ b/contrib/templates/data-engineering/base/template/{{.project_name}}/scripts/add_asset.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+#
+# add_asset.py is used to initialize a new asset from the data-engineering template.
+#
+import sys
+import subprocess
+from typing import Literal
+
+VALID_ASSETS = ["etl-pipeline", "job", "ingest-pipeline"]
+AssetType = Literal["etl-pipeline", "job", "ingest-pipeline"]
+
+
+def init_bundle(asset_type: AssetType) -> None:
+    cmd = f"databricks bundle init ~/projects/bundle-examples/contrib/templates/data-engineering/assets/{asset_type}"
+    subprocess.run(cmd, shell=True)
+
+
+def show_menu() -> AssetType:
+    print("\nSelect asset type to initialize:")
+    for i, asset in enumerate(VALID_ASSETS, 1):
+        print(f"{i}. {asset}")
+
+    while True:
+        try:
+            choice = int(input("\nEnter number (1-3): "))
+            if 1 <= choice <= len(VALID_ASSETS):
+                return VALID_ASSETS[choice - 1]
+            print("Invalid choice. Please try again.")
+        except ValueError:
+            print("Please enter a number.")
+
+
+def main():
+    if len(sys.argv) > 1:
+        asset_type = sys.argv[1]
+        if asset_type not in VALID_ASSETS:
+            print(f"Error: Asset type must be one of {VALID_ASSETS}")
+            sys.exit(1)
+    else:
+        asset_type = show_menu()
+
+    init_bundle(asset_type)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/scripts/test.py b/contrib/templates/data-engineering/base/template/{{.project_name}}/scripts/test.py
new file mode 100644
index 0000000..62d8dd3
--- /dev/null
+++ b/contrib/templates/data-engineering/base/template/{{.project_name}}/scripts/test.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+#
+# test.py runs the unit tests for this project using pytest.
+#
+import os
+import subprocess
+
+
+def main():
+    os.environ["DATABRICKS_SERVERLESS_COMPUTE_ID"] = "auto"
+    subprocess.run(["pytest"], check=True)
+
+
+if __name__ == "__main__":
+    main()

From 54762a7b13e5d8c47c6061f3216e636b6dc371ad Mon Sep 17 00:00:00 2001
From: Lennart Kats <lennart.kats@databricks.com>
Date: Mon, 18 Nov 2024 09:27:20 +0100
Subject: [PATCH 02/18] Fix init calls

---
 contrib/templates/data-engineering/README.md               | 2 +-
 .../data-engineering/base/databricks_template_schema.json  | 2 +-
 .../base/template/{{.project_name}}/README.md.tmpl         | 7 +++++++
 .../base/template/{{.project_name}}/scripts/add_asset.py   | 2 +-
 4 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/contrib/templates/data-engineering/README.md b/contrib/templates/data-engineering/README.md
index e1f892b..c59d450 100644
--- a/contrib/templates/data-engineering/README.md
+++ b/contrib/templates/data-engineering/README.md
@@ -6,5 +6,5 @@ assets in DABs.
 Install it using
 
 ```
-databricks bundle init https://github.com/databricks/bundle-examples/tree/main/contrib/templates/data-engineering/base
+databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering/base
 ```
diff --git a/contrib/templates/data-engineering/base/databricks_template_schema.json b/contrib/templates/data-engineering/base/databricks_template_schema.json
index debf4e1..018f5ad 100644
--- a/contrib/templates/data-engineering/base/databricks_template_schema.json
+++ b/contrib/templates/data-engineering/base/databricks_template_schema.json
@@ -42,5 +42,5 @@
             "order": 4
         }
     },
-    "success_message": "\n\nYour new project has been created in the '{{.project_name}}' directory!\n\nTo add an example asset to your project, use\n\n  $ databricks bundle init https://github.com/databricks/bundle-examples/tree/main/contrib/templates/data-engineering/assets/etl-pipeline\n\nRefer to the README.md file for full \"getting started\" instructions!"
+    "success_message": "\n\nYour new project has been created in the '{{.project_name}}' directory!\n\nTo add an example asset to your project, use\n\n $ cd ${{.project_name}}\n  $ databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering/assets/etl-pipeline\n\nRefer to the README.md file for full \"getting started\" instructions!"
 }
\ No newline at end of file
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/README.md.tmpl b/contrib/templates/data-engineering/base/template/{{.project_name}}/README.md.tmpl
index e83e65c..cadd25c 100644
--- a/contrib/templates/data-engineering/base/template/{{.project_name}}/README.md.tmpl
+++ b/contrib/templates/data-engineering/base/template/{{.project_name}}/README.md.tmpl
@@ -38,6 +38,13 @@ By default, the data-engineering template does not include any assets.
    $ uv run add-asset
    ```
 
+   or, if you don't use UV, use
+   
+   ```
+   $ export TYPE=etl-pipeline
+   $ databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering/assets/$TYPE
+   ```
+
 2. Optionally, run all tests on serverless compute after adding an asset:
    ```
    $ uv run test
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/scripts/add_asset.py b/contrib/templates/data-engineering/base/template/{{.project_name}}/scripts/add_asset.py
index 34c0f13..931db61 100644
--- a/contrib/templates/data-engineering/base/template/{{.project_name}}/scripts/add_asset.py
+++ b/contrib/templates/data-engineering/base/template/{{.project_name}}/scripts/add_asset.py
@@ -11,7 +11,7 @@
 
 
 def init_bundle(asset_type: AssetType) -> None:
-    cmd = f"databricks bundle init ~/projects/bundle-examples/contrib/templates/data-engineering/assets/{asset_type}"
+    cmd = f"databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering/assets/{asset_type}"
     subprocess.run(cmd, shell=True)
 
 

From 596ece0174b405e4276d11e97ca9580a47b6b11c Mon Sep 17 00:00:00 2001
From: Lennart Kats <lennart.kats@databricks.com>
Date: Mon, 18 Nov 2024 11:50:45 +0100
Subject: [PATCH 03/18] Add --branch parameters for now

---
 contrib/templates/data-engineering/README.md                    | 2 +-
 .../data-engineering/base/databricks_template_schema.json       | 2 +-
 .../base/template/{{.project_name}}/scripts/add_asset.py        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/contrib/templates/data-engineering/README.md b/contrib/templates/data-engineering/README.md
index c59d450..ec5206c 100644
--- a/contrib/templates/data-engineering/README.md
+++ b/contrib/templates/data-engineering/README.md
@@ -6,5 +6,5 @@ assets in DABs.
 Install it using
 
 ```
-databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering/base
+databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering/base --branch data-engineering
 ```
diff --git a/contrib/templates/data-engineering/base/databricks_template_schema.json b/contrib/templates/data-engineering/base/databricks_template_schema.json
index 018f5ad..c94c0fb 100644
--- a/contrib/templates/data-engineering/base/databricks_template_schema.json
+++ b/contrib/templates/data-engineering/base/databricks_template_schema.json
@@ -42,5 +42,5 @@
             "order": 4
         }
     },
-    "success_message": "\n\nYour new project has been created in the '{{.project_name}}' directory!\n\nTo add an example asset to your project, use\n\n $ cd ${{.project_name}}\n  $ databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering/assets/etl-pipeline\n\nRefer to the README.md file for full \"getting started\" instructions!"
+    "success_message": "\n\nYour new project has been created in the '{{.project_name}}' directory!\n\nTo add an example asset to your project, use\n\n $ cd ${{.project_name}}\n  $ databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering/assets/etl-pipeline --branch data-engineering\n\nRefer to the README.md file for full \"getting started\" instructions!"
 }
\ No newline at end of file
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/scripts/add_asset.py b/contrib/templates/data-engineering/base/template/{{.project_name}}/scripts/add_asset.py
index 931db61..80cac32 100644
--- a/contrib/templates/data-engineering/base/template/{{.project_name}}/scripts/add_asset.py
+++ b/contrib/templates/data-engineering/base/template/{{.project_name}}/scripts/add_asset.py
@@ -11,7 +11,7 @@
 
 
 def init_bundle(asset_type: AssetType) -> None:
-    cmd = f"databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering/assets/{asset_type}"
+    cmd = f"databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering/assets/{asset_type} --branch data-engineering"
     subprocess.run(cmd, shell=True)
 
 

From 448c4f3b6319eb23ab4fb78cd7b0c093023a4863 Mon Sep 17 00:00:00 2001
From: "Lennart Kats (databricks)" <lennart.kats@databricks.com>
Date: Mon, 18 Nov 2024 13:42:53 +0100
Subject: [PATCH 04/18] Update
 contrib/templates/data-engineering/base/databricks_template_schema.json

Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com>
---
 .../data-engineering/base/databricks_template_schema.json       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/templates/data-engineering/base/databricks_template_schema.json b/contrib/templates/data-engineering/base/databricks_template_schema.json
index c94c0fb..6769871 100644
--- a/contrib/templates/data-engineering/base/databricks_template_schema.json
+++ b/contrib/templates/data-engineering/base/databricks_template_schema.json
@@ -42,5 +42,5 @@
             "order": 4
         }
     },
-    "success_message": "\n\nYour new project has been created in the '{{.project_name}}' directory!\n\nTo add an example asset to your project, use\n\n $ cd ${{.project_name}}\n  $ databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering/assets/etl-pipeline --branch data-engineering\n\nRefer to the README.md file for full \"getting started\" instructions!"
+    "success_message": "\n\nYour new project has been created in the '{{.project_name}}' directory!\n\nTo add an example asset to your project, use\n\n  $ cd {{.project_name}}\n  $ databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering/assets/etl-pipeline --branch data-engineering\n\nRefer to the README.md file for full \"getting started\" instructions!"
 }
\ No newline at end of file

From 5493ac42568015caa5371ec5dc07a4a09a2cdf8e Mon Sep 17 00:00:00 2001
From: Lennart Kats <lennart.kats@databricks.com>
Date: Thu, 21 Nov 2024 10:15:49 +0100
Subject: [PATCH 05/18] Use vars for catalogs/schemas

---
 .../{{.pipeline_name}}/{{.pipeline_name}}.pipeline.yml.tmpl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/{{.pipeline_name}}.pipeline.yml.tmpl b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/{{.pipeline_name}}.pipeline.yml.tmpl
index a1fba4b..eeedcbe 100644
--- a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/{{.pipeline_name}}.pipeline.yml.tmpl
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/{{.pipeline_name}}.pipeline.yml.tmpl
@@ -5,11 +5,11 @@ resources:
       serverless: true
       {{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}}
       ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog:
-      # catalog: catalog_name
+      # catalog: ${var.catalog}
       {{- else}}
-      catalog: {{default_catalog}}
+      catalog: ${var.catalog}
       {{- end}}
-      target: {{.pipeline_name}}_${bundle.environment}
+      target: ${var.schema}
       libraries:
         - file:
             path: sources/${bundle.target}/*.py

From e15ceb1c6b30ffda859038af4892386537c6b46f Mon Sep 17 00:00:00 2001
From: Lennart Kats <lennart.kats@databricks.com>
Date: Fri, 22 Nov 2024 10:23:30 +0100
Subject: [PATCH 06/18] Update based on feedback

---
 contrib/README.md                                            | 4 ++--
 .../assets/etl-pipeline/databricks_template_schema.json      | 2 +-
 .../assets/{{.pipeline_name}}/explorations/README.md         | 4 ++--
 .../etl-pipeline/template/assets/{{.pipeline_name}}/main.py  | 3 ++-
 .../{{.pipeline_name}}/{{.pipeline_name}}.job.yml.tmpl       | 3 +--
 .../base/template/{{.project_name}}/conftest.py              | 5 +++--
 .../base/template/{{.project_name}}/databricks.yml.tmpl      | 5 ++++-
 .../base/template/{{.project_name}}/pyproject.toml           | 2 +-
 8 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/contrib/README.md b/contrib/README.md
index 252d132..31f7d01 100644
--- a/contrib/README.md
+++ b/contrib/README.md
@@ -39,5 +39,5 @@ If you would like to add your own examples or resources, please:
 3. Ensure that any necessary configuration files, scripts, or dependencies are included.
 
 For more information on Databricks Asset Bundles, see:
-- [Public Preview Announcement](https://www.databricks.com/blog/announcing-public-preview-databricks-asset-bundles-apply-software-development-best-practices)
-- [Databricks Asset Bundles Documentation](https://docs.databricks.com/dev-tools/bundles/index.html)
\ No newline at end of file
+- The launch blog post at https://www.databricks.com/blog/announcing-general-availability-databricks-asset-bundles
+- The docs at https://docs.databricks.com/dev-tools/bundles/index.html
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/databricks_template_schema.json b/contrib/templates/data-engineering/assets/etl-pipeline/databricks_template_schema.json
index 774c5cb..a49171a 100644
--- a/contrib/templates/data-engineering/assets/etl-pipeline/databricks_template_schema.json
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/databricks_template_schema.json
@@ -28,7 +28,7 @@
             },
             "default": "ignored",
             "type": "string",
-            "description": "{{fail Only Python files are supported in this template at this time.}}",
+            "description": "{{fail \"Only Python files are supported in this template at this time.\"}}",
             "order": 3
         },
         "include_job": {
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/explorations/README.md b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/explorations/README.md
index e6cfb81..7292d7f 100644
--- a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/explorations/README.md
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/explorations/README.md
@@ -1,4 +1,4 @@
-# scratch
+# explorations
 
 This folder is reserved for personal, exploratory notebooks.
-By default these are not committed to Git, as 'scratch' is listed in .gitignore.
+By default these are not committed to Git, as 'explorations' is listed in .gitignore.
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/main.py b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/main.py
index 72d3f5c..ad6b5f8 100644
--- a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/main.py
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/main.py
@@ -1,3 +1,4 @@
 # This is the entry point for the {{.pipeline_name}} pipeline.
 # It makes sure all transformations in the transformations directory are included.
-import transformations
\ No newline at end of file
+import transformations
+__all__ = ["transformations"]
\ No newline at end of file
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/{{.pipeline_name}}.job.yml.tmpl b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/{{.pipeline_name}}.job.yml.tmpl
index a7af118..a75b746 100644
--- a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/{{.pipeline_name}}.job.yml.tmpl
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/{{.pipeline_name}}.job.yml.tmpl
@@ -13,8 +13,7 @@ resources:
       {{- if not is_service_principal}}
 
       email_notifications:
-        on_failure:
-          - {{user_name}}
+        on_failure: ${var.notifications}
 
       {{- end}}
 
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/conftest.py b/contrib/templates/data-engineering/base/template/{{.project_name}}/conftest.py
index 4a49bb1..2602fb5 100644
--- a/contrib/templates/data-engineering/base/template/{{.project_name}}/conftest.py
+++ b/contrib/templates/data-engineering/base/template/{{.project_name}}/conftest.py
@@ -1,4 +1,5 @@
-# conftest.py is used to configure pytest
+# conftest.py is used to configure pytest.
+# This file is in the root since it affects all tests through this bundle.
 import os
 import sys
 import dlt
@@ -9,7 +10,7 @@
 from databricks.connect import DatabricksSession
 
 # Dynamically find and add all `assets/*` directories to `sys.path`
-for path in pathlib.Path("assets").glob("*"):
+for path in pathlib.Path(pathlib.Path(__file__).parent / "assets").glob("*"):
     resolved_path = str(path.resolve())
     if resolved_path not in sys.path:
         sys.path.append(resolved_path)
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/databricks.yml.tmpl b/contrib/templates/data-engineering/base/template/{{.project_name}}/databricks.yml.tmpl
index 420dded..d988fcc 100644
--- a/contrib/templates/data-engineering/base/template/{{.project_name}}/databricks.yml.tmpl
+++ b/contrib/templates/data-engineering/base/template/{{.project_name}}/databricks.yml.tmpl
@@ -12,6 +12,8 @@ variables:
     description: The catalog to use
   schema:
     description: The schema to use
+  notifications:
+    description: The email addresses to use for failure notifications
 
 {{- $dev_schema := .shared_schema }}
 {{- $prod_schema := .shared_schema }}
@@ -33,7 +35,7 @@ targets:
     variables:
       catalog: {{.default_catalog}}
       schema: {{$dev_schema}}
-
+      notifications: []
   prod:
     mode: production
     workspace:
@@ -48,3 +50,4 @@ targets:
     variables:
       catalog: {{.default_catalog}}
       schema: {{$prod_schema}}
+      notifications: [{{user_name}}]
\ No newline at end of file
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/pyproject.toml b/contrib/templates/data-engineering/base/template/{{.project_name}}/pyproject.toml
index b04668c..25cc305 100644
--- a/contrib/templates/data-engineering/base/template/{{.project_name}}/pyproject.toml
+++ b/contrib/templates/data-engineering/base/template/{{.project_name}}/pyproject.toml
@@ -2,7 +2,7 @@
 name = "my_data_project"
 version = "0.1.0"
 description = "Databricks ETL pipeline project"
-requires-python = ">=3.10" 
+requires-python = "==3.11.*"
 dependencies = [
     "databricks-dlt",
     "pytest",

From 9b87093316fcdb07bb1e071efd6903100abeedf8 Mon Sep 17 00:00:00 2001
From: Lennart Kats <lennart.kats@databricks.com>
Date: Fri, 22 Nov 2024 10:27:33 +0100
Subject: [PATCH 07/18] Fix newline

---
 .../base/template/{{.project_name}}/.vscode/settings.json.tmpl  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/settings.json.tmpl b/contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/settings.json.tmpl
index 380587b..2f753e8 100644
--- a/contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/settings.json.tmpl
+++ b/contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/settings.json.tmpl
@@ -8,7 +8,7 @@
     ],
     "python.testing.unittestEnabled": false,
     "python.testing.pytestEnabled": true,
-    {{- /* Unfortunately extraPaths doesn't support globs!! See: https://github.com/microsoft/pylance-release/issues/973 */ -}}
+    {{- /* Unfortunately extraPaths doesn't support globs!! See: https://github.com/microsoft/pylance-release/issues/973 */}}
     "python.analysis.extraPaths": ["assets/etl_pipeline"],
     "files.exclude": {
         "**/*.egg-info": true,

From 3141102d6fe5e5ec4feb22c8a71241c74619c686 Mon Sep 17 00:00:00 2001
From: Lennart Kats <lennart.kats@databricks.com>
Date: Sun, 24 Nov 2024 17:21:09 +0100
Subject: [PATCH 08/18] Move template one folder up

---
 contrib/templates/data-engineering/README.md                 | 5 ++++-
 .../{base => }/databricks_template_schema.json               | 0
 .../template/{{.project_name}}/.vscode/__builtins__.pyi      | 0
 .../template/{{.project_name}}/.vscode/extensions.json       | 0
 .../template/{{.project_name}}/.vscode/settings.json.tmpl    | 0
 .../{base => }/template/{{.project_name}}/README.md.tmpl     | 0
 .../{base => }/template/{{.project_name}}/assets/README.md   | 0
 .../{base => }/template/{{.project_name}}/conftest.py        | 0
 .../template/{{.project_name}}/databricks.yml.tmpl           | 0
 .../{base => }/template/{{.project_name}}/pyproject.toml     | 2 +-
 .../template/{{.project_name}}/scripts/add_asset.py          | 0
 .../{base => }/template/{{.project_name}}/scripts/test.py    | 0
 12 files changed, 5 insertions(+), 2 deletions(-)
 rename contrib/templates/data-engineering/{base => }/databricks_template_schema.json (100%)
 rename contrib/templates/data-engineering/{base => }/template/{{.project_name}}/.vscode/__builtins__.pyi (100%)
 rename contrib/templates/data-engineering/{base => }/template/{{.project_name}}/.vscode/extensions.json (100%)
 rename contrib/templates/data-engineering/{base => }/template/{{.project_name}}/.vscode/settings.json.tmpl (100%)
 rename contrib/templates/data-engineering/{base => }/template/{{.project_name}}/README.md.tmpl (100%)
 rename contrib/templates/data-engineering/{base => }/template/{{.project_name}}/assets/README.md (100%)
 rename contrib/templates/data-engineering/{base => }/template/{{.project_name}}/conftest.py (100%)
 rename contrib/templates/data-engineering/{base => }/template/{{.project_name}}/databricks.yml.tmpl (100%)
 rename contrib/templates/data-engineering/{base => }/template/{{.project_name}}/pyproject.toml (92%)
 rename contrib/templates/data-engineering/{base => }/template/{{.project_name}}/scripts/add_asset.py (100%)
 rename contrib/templates/data-engineering/{base => }/template/{{.project_name}}/scripts/test.py (100%)

diff --git a/contrib/templates/data-engineering/README.md b/contrib/templates/data-engineering/README.md
index ec5206c..19df04b 100644
--- a/contrib/templates/data-engineering/README.md
+++ b/contrib/templates/data-engineering/README.md
@@ -6,5 +6,8 @@ assets in DABs.
 Install it using
 
 ```
-databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering/base --branch data-engineering
+databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering --branch data-engineering
 ```
+
+Note that by default this template doesn't come with any assets such as jobs or pipelines.
+Follow the instructions in the template setup and README to add them.
\ No newline at end of file
diff --git a/contrib/templates/data-engineering/base/databricks_template_schema.json b/contrib/templates/data-engineering/databricks_template_schema.json
similarity index 100%
rename from contrib/templates/data-engineering/base/databricks_template_schema.json
rename to contrib/templates/data-engineering/databricks_template_schema.json
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/__builtins__.pyi b/contrib/templates/data-engineering/template/{{.project_name}}/.vscode/__builtins__.pyi
similarity index 100%
rename from contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/__builtins__.pyi
rename to contrib/templates/data-engineering/template/{{.project_name}}/.vscode/__builtins__.pyi
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/extensions.json b/contrib/templates/data-engineering/template/{{.project_name}}/.vscode/extensions.json
similarity index 100%
rename from contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/extensions.json
rename to contrib/templates/data-engineering/template/{{.project_name}}/.vscode/extensions.json
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/settings.json.tmpl b/contrib/templates/data-engineering/template/{{.project_name}}/.vscode/settings.json.tmpl
similarity index 100%
rename from contrib/templates/data-engineering/base/template/{{.project_name}}/.vscode/settings.json.tmpl
rename to contrib/templates/data-engineering/template/{{.project_name}}/.vscode/settings.json.tmpl
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/README.md.tmpl b/contrib/templates/data-engineering/template/{{.project_name}}/README.md.tmpl
similarity index 100%
rename from contrib/templates/data-engineering/base/template/{{.project_name}}/README.md.tmpl
rename to contrib/templates/data-engineering/template/{{.project_name}}/README.md.tmpl
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/assets/README.md b/contrib/templates/data-engineering/template/{{.project_name}}/assets/README.md
similarity index 100%
rename from contrib/templates/data-engineering/base/template/{{.project_name}}/assets/README.md
rename to contrib/templates/data-engineering/template/{{.project_name}}/assets/README.md
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/conftest.py b/contrib/templates/data-engineering/template/{{.project_name}}/conftest.py
similarity index 100%
rename from contrib/templates/data-engineering/base/template/{{.project_name}}/conftest.py
rename to contrib/templates/data-engineering/template/{{.project_name}}/conftest.py
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/databricks.yml.tmpl b/contrib/templates/data-engineering/template/{{.project_name}}/databricks.yml.tmpl
similarity index 100%
rename from contrib/templates/data-engineering/base/template/{{.project_name}}/databricks.yml.tmpl
rename to contrib/templates/data-engineering/template/{{.project_name}}/databricks.yml.tmpl
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/pyproject.toml b/contrib/templates/data-engineering/template/{{.project_name}}/pyproject.toml
similarity index 92%
rename from contrib/templates/data-engineering/base/template/{{.project_name}}/pyproject.toml
rename to contrib/templates/data-engineering/template/{{.project_name}}/pyproject.toml
index 25cc305..909faaa 100644
--- a/contrib/templates/data-engineering/base/template/{{.project_name}}/pyproject.toml
+++ b/contrib/templates/data-engineering/template/{{.project_name}}/pyproject.toml
@@ -2,7 +2,7 @@
 name = "my_data_project"
 version = "0.1.0"
 description = "Databricks ETL pipeline project"
-requires-python = "==3.11.*"
+requires-python = "==3.10.*"
 dependencies = [
     "databricks-dlt",
     "pytest",
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/scripts/add_asset.py b/contrib/templates/data-engineering/template/{{.project_name}}/scripts/add_asset.py
similarity index 100%
rename from contrib/templates/data-engineering/base/template/{{.project_name}}/scripts/add_asset.py
rename to contrib/templates/data-engineering/template/{{.project_name}}/scripts/add_asset.py
diff --git a/contrib/templates/data-engineering/base/template/{{.project_name}}/scripts/test.py b/contrib/templates/data-engineering/template/{{.project_name}}/scripts/test.py
similarity index 100%
rename from contrib/templates/data-engineering/base/template/{{.project_name}}/scripts/test.py
rename to contrib/templates/data-engineering/template/{{.project_name}}/scripts/test.py

From eca8f28f92caca0b72553ee3237a0b401990527e Mon Sep 17 00:00:00 2001
From: Lennart Kats <lennart.kats@databricks.com>
Date: Mon, 25 Nov 2024 10:51:57 +0100
Subject: [PATCH 09/18] Add workaround for databricks-dlt conflict

---
 .../template/{{.project_name}}/pyproject.toml                | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/contrib/templates/data-engineering/template/{{.project_name}}/pyproject.toml b/contrib/templates/data-engineering/template/{{.project_name}}/pyproject.toml
index 909faaa..a498f86 100644
--- a/contrib/templates/data-engineering/template/{{.project_name}}/pyproject.toml
+++ b/contrib/templates/data-engineering/template/{{.project_name}}/pyproject.toml
@@ -17,6 +17,11 @@ test = "scripts.test:main"
 
 [tool.uv]
 package = true
+override-dependencies = [
+    # work around conflict with older databricks-dlt libraries
+    # pyspark is already installed by databricks-connect
+    "pyspark; sys_platform == 'never'",
+]
 
 [tool.setuptools.packages.find]
 include = ["scripts"]
\ No newline at end of file

From 80a2e025d77b70a95eb3c5cb9208a2af28bfb43e Mon Sep 17 00:00:00 2001
From: Lennart Kats <lennart.kats@databricks.com>
Date: Thu, 28 Nov 2024 12:35:01 +0100
Subject: [PATCH 10/18] Update based on reviewer feedback

---
 .vscode/settings.json                         | 22 +++++++++++++++++++
 .../explorations/exploration.ipynb.tmpl       |  1 +
 .../assets/{{.pipeline_name}}/main.py         |  3 ++-
 .../databricks_template_schema.json           |  4 ++--
 .../job/databricks_template_schema.json       |  4 ++--
 .../template/{{.project_name}}/conftest.py    |  5 ++++-
 .../template/{{.project_name}}/pyproject.toml |  2 --
 7 files changed, 33 insertions(+), 8 deletions(-)
 create mode 100644 .vscode/settings.json

diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..02d2535
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,22 @@
+{
+    "python.analysis.stubPath": ".vscode",
+    "databricks.python.envFile": "${workspaceFolder}/.env",
+    "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
+    "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
+    "python.testing.pytestArgs": [
+        "."
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true,
+    "files.exclude": {
+        "**/*.egg-info": true,
+        "**/__pycache__": true,
+        ".pytest_cache": true,
+    },
+    "debugpy.debugJustMyCode": false,
+    "editor.formatOnSave": true,
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter",
+        "editor.formatOnSave": true,
+    }
+}
\ No newline at end of file
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/explorations/exploration.ipynb.tmpl b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/explorations/exploration.ipynb.tmpl
index 560703c..14b9859 100644
--- a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/explorations/exploration.ipynb.tmpl
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/explorations/exploration.ipynb.tmpl
@@ -19,6 +19,7 @@
    "source": [
     "import sys\n",
     "sys.path.append('../transformations')\n",
+    "from transformations import taxi_stats\n",
     "\n",
     "\n",
     "spark = SparkSession.builder.getOrCreate()\n",
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/main.py b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/main.py
index ad6b5f8..67f4c4c 100644
--- a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/main.py
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/main.py
@@ -1,4 +1,5 @@
 # This is the entry point for the {{.pipeline_name}} pipeline.
 # It makes sure all transformations in the transformations directory are included.
 import transformations
-__all__ = ["transformations"]
\ No newline at end of file
+
+__all__ = ["transformations"]
diff --git a/contrib/templates/data-engineering/assets/ingest-pipeline/databricks_template_schema.json b/contrib/templates/data-engineering/assets/ingest-pipeline/databricks_template_schema.json
index df21996..0f7dddd 100644
--- a/contrib/templates/data-engineering/assets/ingest-pipeline/databricks_template_schema.json
+++ b/contrib/templates/data-engineering/assets/ingest-pipeline/databricks_template_schema.json
@@ -3,8 +3,8 @@
     "properties": {
         "pipeline_name": {
             "type": "string",
-            "description": "\n{{fail \"The ingest-pipeline is not yet implemented.\"}}",
+            "description": "\n{{fail \"The ingest-pipeline template is not yet implemented.\"}}",
             "order": 3
         }
     }
-}
+}
\ No newline at end of file
diff --git a/contrib/templates/data-engineering/assets/job/databricks_template_schema.json b/contrib/templates/data-engineering/assets/job/databricks_template_schema.json
index afcf5b6..5e0d4b9 100644
--- a/contrib/templates/data-engineering/assets/job/databricks_template_schema.json
+++ b/contrib/templates/data-engineering/assets/job/databricks_template_schema.json
@@ -3,8 +3,8 @@
     "properties": {
         "pipeline_name": {
             "type": "string",
-            "description": "\n{{fail \"The ingest-pipeline is not yet implemented.\"}}",
+            "description": "\n{{fail \"The job template is not yet implemented.\"}}",
             "order": 3
         }
     }
-}
+}
\ No newline at end of file
diff --git a/contrib/templates/data-engineering/template/{{.project_name}}/conftest.py b/contrib/templates/data-engineering/template/{{.project_name}}/conftest.py
index 2602fb5..2b7f5db 100644
--- a/contrib/templates/data-engineering/template/{{.project_name}}/conftest.py
+++ b/contrib/templates/data-engineering/template/{{.project_name}}/conftest.py
@@ -1,5 +1,7 @@
 # conftest.py is used to configure pytest.
 # This file is in the root since it affects all tests through this bundle.
+# It makes sure all 'assets/*' directories are added to `sys.path` so that
+# tests can import them.
 import os
 import sys
 import dlt
@@ -15,7 +17,8 @@
     if resolved_path not in sys.path:
         sys.path.append(resolved_path)
 
-# Work around issues in older databricks-connect
+# For older databricks-connect, work around issues importing SparkSession
+# and errors when SPARK_REMOTE is set.
 SparkSession.builder = DatabricksSession.builder
 os.environ.pop("SPARK_REMOTE", None)
 
diff --git a/contrib/templates/data-engineering/template/{{.project_name}}/pyproject.toml b/contrib/templates/data-engineering/template/{{.project_name}}/pyproject.toml
index a498f86..8e98865 100644
--- a/contrib/templates/data-engineering/template/{{.project_name}}/pyproject.toml
+++ b/contrib/templates/data-engineering/template/{{.project_name}}/pyproject.toml
@@ -6,8 +6,6 @@ requires-python = "==3.10.*"
 dependencies = [
     "databricks-dlt",
     "pytest",
-    "setuptools",
-    "wheel",
     "databricks-connect==15.1.*",
 ]
 

From d2ea026ea0ae1b9afe7a39a53b56a7bb3f8e9497 Mon Sep 17 00:00:00 2001
From: Lennart Kats <lennart.kats@databricks.com>
Date: Thu, 28 Nov 2024 12:37:26 +0100
Subject: [PATCH 11/18] Remove workaround for older databricks-dlt

---
 .../template/{{.project_name}}/pyproject.toml                | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/contrib/templates/data-engineering/template/{{.project_name}}/pyproject.toml b/contrib/templates/data-engineering/template/{{.project_name}}/pyproject.toml
index 8e98865..966b1ab 100644
--- a/contrib/templates/data-engineering/template/{{.project_name}}/pyproject.toml
+++ b/contrib/templates/data-engineering/template/{{.project_name}}/pyproject.toml
@@ -15,11 +15,6 @@ test = "scripts.test:main"
 
 [tool.uv]
 package = true
-override-dependencies = [
-    # work around conflict with older databricks-dlt libraries
-    # pyspark is already installed by databricks-connect
-    "pyspark; sys_platform == 'never'",
-]
 
 [tool.setuptools.packages.find]
 include = ["scripts"]
\ No newline at end of file

From 56cb31701789613aa241d89dda8630dc8594c169 Mon Sep 17 00:00:00 2001
From: Lennart Kats <lennart.kats@databricks.com>
Date: Thu, 28 Nov 2024 12:39:30 +0100
Subject: [PATCH 12/18] Extend description

---
 .../template/{{.project_name}}/scripts/test.py                | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/contrib/templates/data-engineering/template/{{.project_name}}/scripts/test.py b/contrib/templates/data-engineering/template/{{.project_name}}/scripts/test.py
index 62d8dd3..4748c81 100644
--- a/contrib/templates/data-engineering/template/{{.project_name}}/scripts/test.py
+++ b/contrib/templates/data-engineering/template/{{.project_name}}/scripts/test.py
@@ -1,6 +1,8 @@
 #!/usr/bin/env python3
 #
-# test.py runs the unit tests for this project using pytest.
+# test.py runs the unit tests for this project using pytest and serverless compute.
+# To use a different form of compute, instead use 'uv run pytest' or
+# use your IDE's testing panel. When using VS Code, consider using the Databricks extension.
 #
 import os
 import subprocess

From 93cc57ab6865d03e465113a14014a2526bc0aa35 Mon Sep 17 00:00:00 2001
From: Lennart Kats <lennart.kats@databricks.com>
Date: Thu, 28 Nov 2024 13:54:06 +0100
Subject: [PATCH 13/18] Update .gitignore

---
 .../template/{{.project_name}}/.gitignore                 | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 contrib/templates/data-engineering/template/{{.project_name}}/.gitignore

diff --git a/contrib/templates/data-engineering/template/{{.project_name}}/.gitignore b/contrib/templates/data-engineering/template/{{.project_name}}/.gitignore
new file mode 100644
index 0000000..f6a3b5f
--- /dev/null
+++ b/contrib/templates/data-engineering/template/{{.project_name}}/.gitignore
@@ -0,0 +1,8 @@
+.databricks/
+build/
+dist/
+__pycache__/
+*.egg-info
+.venv/
+**/explorations/**
+**/!explorations/README.md

From 6a615bb7fee69ae8545640382408a71c5030f81d Mon Sep 17 00:00:00 2001
From: Lennart Kats <lennart.kats@databricks.com>
Date: Sat, 30 Nov 2024 10:45:26 +0100
Subject: [PATCH 14/18] Rework package structure

---
 .../{{.pipeline_name}}/{main.py => __init__.py}   |  0
 .../explorations/exploration.ipynb.tmpl           |  4 ++--
 .../{{.pipeline_name}}/sources/dev/taxis.py       | 10 ++++------
 .../{{.pipeline_name}}/sources/prod/taxis.py      |  6 +++---
 .../{{.pipeline_name}}/tests/taxi_stats_test.py   |  4 ++--
 .../transformations/__init__.py                   |  2 +-
 .../transformations/taxi_stats.py                 | 15 ++++++---------
 .../{{.pipeline_name}}.pipeline.yml.tmpl          |  2 +-
 8 files changed, 19 insertions(+), 24 deletions(-)
 rename contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/{main.py => __init__.py} (100%)

diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/main.py b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/__init__.py
similarity index 100%
rename from contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/main.py
rename to contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/__init__.py
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/explorations/exploration.ipynb.tmpl b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/explorations/exploration.ipynb.tmpl
index 14b9859..ef1f017 100644
--- a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/explorations/exploration.ipynb.tmpl
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/explorations/exploration.ipynb.tmpl
@@ -18,8 +18,8 @@
    "outputs": [],
    "source": [
     "import sys\n",
-    "sys.path.append('../transformations')\n",
-    "from transformations import taxi_stats\n",
+    "sys.path.append('..')\n",
+    "from {{.pipeline_name}}.transformations import taxi_stats\n",
     "\n",
     "\n",
     "spark = SparkSession.builder.getOrCreate()\n",
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/sources/dev/taxis.py b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/sources/dev/taxis.py
index ed2a28c..1fba2e2 100644
--- a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/sources/dev/taxis.py
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/sources/dev/taxis.py
@@ -1,10 +1,8 @@
 import dlt
-from pyspark.sql import SparkSession, DataFrame
+from pyspark.sql import DataFrame
+from databricks.sdk.runtime import spark
 
 
-@dlt.view(
-    comment="Small set of taxis for development (uses LIMIT 10)"
-)
+@dlt.view(comment="Small set of taxis for development (uses LIMIT 10)")
 def taxis() -> DataFrame:
-    spark = SparkSession.builder.getOrCreate()
-    return spark.sql("SELECT * FROM samples.nyctaxi.trips LIMIT 10")
\ No newline at end of file
+    return spark.sql("SELECT * FROM samples.nyctaxi.trips LIMIT 10")
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/sources/prod/taxis.py b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/sources/prod/taxis.py
index 6c22c80..15ce56a 100644
--- a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/sources/prod/taxis.py
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/sources/prod/taxis.py
@@ -1,8 +1,8 @@
 import dlt
-from pyspark.sql import SparkSession, DataFrame
+from pyspark.sql import DataFrame
+from databricks.sdk.runtime import spark
 
 
 @dlt.view
 def taxis() -> DataFrame:
-    spark = SparkSession.builder.getOrCreate()
-    return spark.sql("SELECT * FROM samples.nyctaxi.trips")
\ No newline at end of file
+    return spark.sql("SELECT * FROM samples.nyctaxi.trips")
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/tests/taxi_stats_test.py b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/tests/taxi_stats_test.py
index 35daa25..b0c4449 100644
--- a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/tests/taxi_stats_test.py
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/tests/taxi_stats_test.py
@@ -1,5 +1,5 @@
-from sources.dev.taxis import taxis
-from transformations import taxi_stats
+from ..sources.dev.taxis import taxis
+from ..transformations import taxi_stats
 
 
 def test_taxi_stats():
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/transformations/__init__.py b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/transformations/__init__.py
index 26b7072..80577db 100644
--- a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/transformations/__init__.py
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/transformations/__init__.py
@@ -6,4 +6,4 @@
 # Import all modules in the package except those starting with '_', like '__init__.py'
 for _, module_name, _ in pkgutil.iter_modules(__path__):
     if not module_name.startswith("_"):
-        importlib.import_module(f"{__name__}.{module_name}")
\ No newline at end of file
+        importlib.import_module(f"{__name__}.{module_name}")
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/transformations/taxi_stats.py b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/transformations/taxi_stats.py
index 7c979fb..5c5dcd9 100644
--- a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/transformations/taxi_stats.py
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/transformations/taxi_stats.py
@@ -3,21 +3,18 @@
 from pyspark.sql import DataFrame
 
 
-@dlt.table(
-    comment="Daily statistics of NYC Taxi trips"
-)
+@dlt.table(comment="Daily statistics of NYC Taxi trips")
 def taxi_stats() -> DataFrame:
-    """ Read from the 'taxis' view from etl_pipeline/sources. """
+    """Read from the 'taxis' view from etl_pipeline/sources."""
     taxis = dlt.read("taxis")
-    
+
     return filter_taxis(taxis)
 
 
 def filter_taxis(taxis: DataFrame) -> DataFrame:
-    """ Group by date and calculate the number of trips. """
+    """Group by date and calculate the number of trips."""
     return (
-        taxis
-        .withColumn("pickup_date", to_date("tpep_pickup_datetime"))
+        taxis.withColumn("pickup_date", to_date("tpep_pickup_datetime"))
         .groupBy("pickup_date")
         .agg(count("*").alias("number_of_trips"))
-    )
\ No newline at end of file
+    )
diff --git a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/{{.pipeline_name}}.pipeline.yml.tmpl b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/{{.pipeline_name}}.pipeline.yml.tmpl
index eeedcbe..86890fd 100644
--- a/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/{{.pipeline_name}}.pipeline.yml.tmpl
+++ b/contrib/templates/data-engineering/assets/etl-pipeline/template/assets/{{.pipeline_name}}/{{.pipeline_name}}.pipeline.yml.tmpl
@@ -14,4 +14,4 @@ resources:
         - file:
             path: sources/${bundle.target}/*.py
         - file:
-            path: main.py
\ No newline at end of file
+            path: __init__.py

From c447107b79fde7d8b2ccad4f5ee2aebcf0c0c791 Mon Sep 17 00:00:00 2001
From: Lennart Kats <lennart.kats@databricks.com>
Date: Sat, 30 Nov 2024 10:57:36 +0100
Subject: [PATCH 15/18] Remove settings.json from the present PR

---
 .vscode/settings.json | 22 ----------------------
 1 file changed, 22 deletions(-)
 delete mode 100644 .vscode/settings.json

diff --git a/.vscode/settings.json b/.vscode/settings.json
deleted file mode 100644
index 02d2535..0000000
--- a/.vscode/settings.json
+++ /dev/null
@@ -1,22 +0,0 @@
-{
-    "python.analysis.stubPath": ".vscode",
-    "databricks.python.envFile": "${workspaceFolder}/.env",
-    "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
-    "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
-    "python.testing.pytestArgs": [
-        "."
-    ],
-    "python.testing.unittestEnabled": false,
-    "python.testing.pytestEnabled": true,
-    "files.exclude": {
-        "**/*.egg-info": true,
-        "**/__pycache__": true,
-        ".pytest_cache": true,
-    },
-    "debugpy.debugJustMyCode": false,
-    "editor.formatOnSave": true,
-    "[python]": {
-        "editor.defaultFormatter": "ms-python.black-formatter",
-        "editor.formatOnSave": true,
-    }
-}
\ No newline at end of file

From bed1101e2cb84e7c94c0d0c8cc45ae05b3c15633 Mon Sep 17 00:00:00 2001
From: Lennart Kats <lennart.kats@databricks.com>
Date: Sun, 1 Dec 2024 14:59:02 +0100
Subject: [PATCH 16/18] Fix template name

---
 .vscode/settings.json                         | 22 +++++++++++++++++++
 .../databricks_template_schema.json           |  2 +-
 2 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 .vscode/settings.json

diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..02d2535
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,22 @@
+{
+    "python.analysis.stubPath": ".vscode",
+    "databricks.python.envFile": "${workspaceFolder}/.env",
+    "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
+    "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
+    "python.testing.pytestArgs": [
+        "."
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true,
+    "files.exclude": {
+        "**/*.egg-info": true,
+        "**/__pycache__": true,
+        ".pytest_cache": true,
+    },
+    "debugpy.debugJustMyCode": false,
+    "editor.formatOnSave": true,
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter",
+        "editor.formatOnSave": true,
+    }
+}
\ No newline at end of file
diff --git a/contrib/templates/data-engineering/databricks_template_schema.json b/contrib/templates/data-engineering/databricks_template_schema.json
index 6769871..2d3048e 100644
--- a/contrib/templates/data-engineering/databricks_template_schema.json
+++ b/contrib/templates/data-engineering/databricks_template_schema.json
@@ -1,5 +1,5 @@
 {
-    "welcome_message": "\nWelcome to the pipeline-folders template for Databricks Asset Bundles!",
+    "welcome_message": "\nWelcome to the data-engineering template for Databricks Asset Bundles!",
     "properties": {
         "project_name": {
             "type": "string",

From 56093bd79df94515cd572350591e3196d8d6da5b Mon Sep 17 00:00:00 2001
From: Lennart Kats <lennart.kats@databricks.com>
Date: Mon, 2 Dec 2024 09:16:24 +0100
Subject: [PATCH 17/18] Remove settings.json from this PR

---
 .vscode/settings.json | 22 ----------------------
 1 file changed, 22 deletions(-)
 delete mode 100644 .vscode/settings.json

diff --git a/.vscode/settings.json b/.vscode/settings.json
deleted file mode 100644
index 02d2535..0000000
--- a/.vscode/settings.json
+++ /dev/null
@@ -1,22 +0,0 @@
-{
-    "python.analysis.stubPath": ".vscode",
-    "databricks.python.envFile": "${workspaceFolder}/.env",
-    "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
-    "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
-    "python.testing.pytestArgs": [
-        "."
-    ],
-    "python.testing.unittestEnabled": false,
-    "python.testing.pytestEnabled": true,
-    "files.exclude": {
-        "**/*.egg-info": true,
-        "**/__pycache__": true,
-        ".pytest_cache": true,
-    },
-    "debugpy.debugJustMyCode": false,
-    "editor.formatOnSave": true,
-    "[python]": {
-        "editor.defaultFormatter": "ms-python.black-formatter",
-        "editor.formatOnSave": true,
-    }
-}
\ No newline at end of file

From aab5cc5c52962acccdbc00f04fd07219dd14fac9 Mon Sep 17 00:00:00 2001
From: Lennart Kats <lennart.kats@databricks.com>
Date: Mon, 2 Dec 2024 09:23:47 +0100
Subject: [PATCH 18/18] Revert "Add --branch parameters for now"

This reverts commit 596ece0174b405e4276d11e97ca9580a47b6b11c.
---
 contrib/templates/data-engineering/README.md                    | 2 +-
 .../templates/data-engineering/databricks_template_schema.json  | 2 +-
 .../template/{{.project_name}}/scripts/add_asset.py             | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/contrib/templates/data-engineering/README.md b/contrib/templates/data-engineering/README.md
index 19df04b..c1ed1c6 100644
--- a/contrib/templates/data-engineering/README.md
+++ b/contrib/templates/data-engineering/README.md
@@ -6,7 +6,7 @@ assets in DABs.
 Install it using
 
 ```
-databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering --branch data-engineering
+databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering
 ```
 
 Note that by default this template doesn't come with any assets such as jobs or pipelines.
diff --git a/contrib/templates/data-engineering/databricks_template_schema.json b/contrib/templates/data-engineering/databricks_template_schema.json
index 2d3048e..575488f 100644
--- a/contrib/templates/data-engineering/databricks_template_schema.json
+++ b/contrib/templates/data-engineering/databricks_template_schema.json
@@ -42,5 +42,5 @@
             "order": 4
         }
     },
-    "success_message": "\n\nYour new project has been created in the '{{.project_name}}' directory!\n\nTo add an example asset to your project, use\n\n  $ cd {{.project_name}}\n  $ databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering/assets/etl-pipeline --branch data-engineering\n\nRefer to the README.md file for full \"getting started\" instructions!"
+    "success_message": "\n\nYour new project has been created in the '{{.project_name}}' directory!\n\nTo add an example asset to your project, use\n\n  $ cd {{.project_name}}\n  $ databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering/assets/etl-pipeline\n\nRefer to the README.md file for full \"getting started\" instructions!"
 }
\ No newline at end of file
diff --git a/contrib/templates/data-engineering/template/{{.project_name}}/scripts/add_asset.py b/contrib/templates/data-engineering/template/{{.project_name}}/scripts/add_asset.py
index 80cac32..931db61 100644
--- a/contrib/templates/data-engineering/template/{{.project_name}}/scripts/add_asset.py
+++ b/contrib/templates/data-engineering/template/{{.project_name}}/scripts/add_asset.py
@@ -11,7 +11,7 @@
 
 
 def init_bundle(asset_type: AssetType) -> None:
-    cmd = f"databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering/assets/{asset_type} --branch data-engineering"
+    cmd = f"databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering/assets/{asset_type}"
     subprocess.run(cmd, shell=True)