Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Configure Airflow tasks using dbt model meta #1339

Merged
merged 17 commits into from
Dec 17, 2024
4 changes: 4 additions & 0 deletions cosmos/airflow/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def create_test_task_metadata(
extra_context = {}

task_owner = ""
airflow_task_config = {}
if test_indirect_selection != TestIndirectSelection.EAGER:
task_args["indirect_selection"] = test_indirect_selection.value
if node is not None:
Expand All @@ -110,6 +111,7 @@ def create_test_task_metadata(

extra_context = {"dbt_node_config": node.context_dict}
task_owner = node.owner
airflow_task_config = node.airflow_task_config

elif render_config is not None: # TestBehavior.AFTER_ALL
task_args["select"] = render_config.select
Expand All @@ -119,6 +121,7 @@ def create_test_task_metadata(
return TaskMetadata(
id=test_task_name,
owner=task_owner,
airflow_task_config=airflow_task_config,
operator_class=calculate_operator_class(
execution_mode=execution_mode,
dbt_class="DbtTest",
Expand Down Expand Up @@ -191,6 +194,7 @@ def create_task_metadata(
task_metadata = TaskMetadata(
id=task_id,
owner=node.owner,
airflow_task_config=node.airflow_task_config,
operator_class=calculate_operator_class(
execution_mode=execution_mode, dbt_class=dbt_resource_to_class[node.resource_type]
),
Expand Down
3 changes: 3 additions & 0 deletions cosmos/core/airflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ def get_airflow_task(task: Task, dag: DAG, task_group: TaskGroup | None = None)
if task.owner != "":
task_kwargs["owner"] = task.owner

for k, v in task.airflow_task_config.items():
task_kwargs[k] = v

airflow_task = Operator(
task_id=task.id,
dag=dag,
Expand Down
1 change: 1 addition & 0 deletions cosmos/core/graph/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class Task(CosmosEntity):
"""

owner: str = ""
airflow_task_config: Dict[str, Any] = field(default_factory=dict)
operator_class: str = "airflow.operators.empty.EmptyOperator"
arguments: Dict[str, Any] = field(default_factory=dict)
extra_context: Dict[str, Any] = field(default_factory=dict)
18 changes: 17 additions & 1 deletion cosmos/dbt/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from functools import cached_property
from pathlib import Path
from subprocess import PIPE, Popen
from typing import TYPE_CHECKING, Any, Optional
from typing import TYPE_CHECKING, Any, Dict, Optional

from airflow.models import Variable

Expand Down Expand Up @@ -67,6 +67,22 @@ class DbtNode:
has_freshness: bool = False
has_test: bool = False

@property
def airflow_task_config(self) -> Dict[str, Any]:
"""
This method is designed to extend the dbt project's functionality by incorporating Airflow-related metadata into the dbt YAML configuration.
Since dbt projects are independent of Airflow, adding Airflow-specific information to the `meta` field within the dbt YAML allows Airflow tasks to
utilize this information during execution.

Examples: pool, pool_slots, queue, ...
Returns:
Dict[str, Any]: A dictionary containing custom metadata configurations for integration with Airflow.
"""
airflow_task_config = self.config.get("meta", {}).get("cosmos")
if isinstance(airflow_task_config, dict):
return airflow_task_config
return {}

@property
def resource_name(self) -> str:
"""
Expand Down
23 changes: 23 additions & 0 deletions docs/getting_started/custom-airflow-properties.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
Airflow Configuration Overrides with Astronomer Cosmos
======================================================

**Astronomer Cosmos** allows you to override Airflow configurations for each dbt task (dbt operator) via the dbt YAML file.

### Sample dbt Model YAML
tatiana marked this conversation as resolved.
Show resolved Hide resolved

.. code-block:: yaml

version: 2
models:
- name: name
description: description
meta:
cosmos:
pool: abcd

### Explanation

By adding Airflow configurations under ***cosmos*** in the ***meta*** field, you can set independent Airflow configurations for each task.
For example, in the YAML above, the ***pool*** setting is applied to the specific dbt task.

wornjs marked this conversation as resolved.
Show resolved Hide resolved
This approach allows for more granular control over Airflow settings per task within your dbt model definitions.
43 changes: 41 additions & 2 deletions tests/airflow/test_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
depends_on=[parent_node.unique_id],
file_path=SAMPLE_PROJ_PATH / "gen3/models/child.sql",
tags=["nightly"],
config={"materialized": "table"},
config={"materialized": "table", "meta": {"cosmos": {"queue": "custom_queue"}}},
)

child2_node = DbtNode(
Expand All @@ -71,7 +71,7 @@
depends_on=[parent_node.unique_id],
file_path=SAMPLE_PROJ_PATH / "gen3/models/child2_v2.sql",
tags=["nightly"],
config={"materialized": "table"},
config={"materialized": "table", "meta": {"cosmos": {"pool": "custom_pool"}}},
)

sample_nodes_list = [parent_seed, parent_node, test_parent_node, child_node, child2_node]
Expand Down Expand Up @@ -707,3 +707,42 @@ def test_owner(dbt_extra_config, expected_owner):

assert len(output.leaves) == 1
assert output.leaves[0].owner == expected_owner


def test_custom_meta():
with DAG("test-id", start_date=datetime(2022, 1, 1)) as dag:
task_args = {
"project_dir": SAMPLE_PROJ_PATH,
"conn_id": "fake_conn",
"profile_config": ProfileConfig(
profile_name="default",
target_name="default",
profile_mapping=PostgresUserPasswordProfileMapping(
conn_id="fake_conn",
profile_args={"schema": "public"},
),
),
}
build_airflow_graph(
nodes=sample_nodes,
dag=dag,
execution_mode=ExecutionMode.LOCAL,
test_indirect_selection=TestIndirectSelection.EAGER,
task_args=task_args,
render_config=RenderConfig(
test_behavior=TestBehavior.AFTER_EACH,
source_rendering_behavior=SOURCE_RENDERING_BEHAVIOR,
),
dbt_project_name="astro_shop",
)
# test custom meta (queue, pool)
for task in dag.tasks:
if task.task_id == "child2_v2_run":
assert task.pool == "custom_pool"
else:
assert task.pool == "default_pool"

if task.task_id == "child_run":
assert task.queue == "custom_queue"
else:
assert task.queue == "default"
Loading