Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
5c794b8
Make dag_version_id in TI non-nullable
ephraimbuddy May 20, 2025
203536d
fixup! Make dag_version_id in TI non-nullable
ephraimbuddy May 20, 2025
21918b0
Fix tests for some providers
ephraimbuddy May 21, 2025
83681f6
fixup! Fix tests for some providers
ephraimbuddy May 21, 2025
4675b20
fixup! fixup! Fix tests for some providers
ephraimbuddy May 21, 2025
9be5b3b
more test fixes
ephraimbuddy May 23, 2025
fb18650
fixup! more test fixes
ephraimbuddy May 24, 2025
ff25459
fixup! fixup! more test fixes
ephraimbuddy May 25, 2025
4e3a9c6
Update more tests
ephraimbuddy May 25, 2025
050cb50
fixup! Update more tests
ephraimbuddy May 25, 2025
b721be8
fixup! fixup! Update more tests
ephraimbuddy May 27, 2025
b5bd21e
fixup! fixup! fixup! Update more tests
ephraimbuddy May 27, 2025
bde9818
fixup! fixup! fixup! fixup! Update more tests
ephraimbuddy May 27, 2025
ccd5257
fixup! fixup! fixup! fixup! fixup! Update more tests
ephraimbuddy May 27, 2025
5f3f29a
Add compat for MockedTaskInstance
ephraimbuddy May 28, 2025
8357ecf
fixup! Add compat for MockedTaskInstance
ephraimbuddy May 28, 2025
08336cd
fixup! fixup! Add compat for MockedTaskInstance
ephraimbuddy May 28, 2025
b208906
fix system test
ephraimbuddy May 28, 2025
6e5497d
fixup! fix system test
ephraimbuddy May 29, 2025
538c6ce
A fix for kube integration tests
ephraimbuddy May 29, 2025
448d58c
Update the expected TIs from test multiple versions number file
ephraimbuddy May 30, 2025
ec42bf9
fix incorrect imports
ephraimbuddy May 30, 2025
7c7c03d
fix time delta async test
ephraimbuddy May 30, 2025
86aa622
Remove non working kube test fix
ephraimbuddy May 30, 2025
b78cc59
Fix task sdk test
ephraimbuddy May 30, 2025
d5fcb1d
Add cadwyn migration and fix some tests
ephraimbuddy May 30, 2025
192c69e
fix some provider tests
ephraimbuddy May 30, 2025
b24ef32
Add data migration before migrating dag_version_id
ephraimbuddy Jun 2, 2025
9ccfd0b
Update test, TaskInstanceResponse and fix assert dag_version in task_…
ephraimbuddy Jun 3, 2025
7e58b23
Resolve conflicts
ephraimbuddy Jul 8, 2025
27e07ae
Fix test
ephraimbuddy Jul 1, 2025
b461d67
Fix more tests
ephraimbuddy Jul 1, 2025
15bda9d
fixup! fixup! Fix more tests
ephraimbuddy Jul 2, 2025
4bc2e2c
Fix bad rebase
ephraimbuddy Jul 2, 2025
2890710
update fab www-hash
ephraimbuddy Jul 3, 2025
c6afa74
Fix conflicts
ephraimbuddy Jul 8, 2025
f3c6444
Fix MappedTaskInstancedata
ephraimbuddy Jul 8, 2025
743e188
add dag_version_id to a new test
ephraimbuddy Jul 8, 2025
43428f4
Fix fab www hash
ephraimbuddy Jul 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion airflow-core/docs/img/airflow_erd.sha256
Original file line number Diff line number Diff line change
@@ -1 +1 @@
72b7bb2d4e109d8f786d229e68c83d2b6f7442e48a6617ea3d47842f1bfa33eb
e0de73aab81a28995b99be21dd25c8ca31c4e0f4a5a0a26df8aff412e5067fd5
3 changes: 2 additions & 1 deletion airflow-core/docs/img/airflow_erd.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 3 additions & 1 deletion airflow-core/docs/migrations-ref.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ Here's the list of all the Database Migrations that are executed via when you ru
+-------------------------+------------------+-------------------+--------------------------------------------------------------+
| Revision ID | Revises ID | Airflow Version | Description |
+=========================+==================+===================+==============================================================+
| ``ffdb0566c7c0`` (head) | ``66a7743fe20e`` | ``3.1.0`` | Add dag_favorite table. |
| ``5d3072c51bac`` (head) | ``ffdb0566c7c0`` | ``3.1.0`` | Make dag_version_id non-nullable in TaskInstance. |
+-------------------------+------------------+-------------------+--------------------------------------------------------------+
| ``ffdb0566c7c0`` | ``66a7743fe20e`` | ``3.1.0`` | Add dag_favorite table. |
+-------------------------+------------------+-------------------+--------------------------------------------------------------+
| ``66a7743fe20e`` | ``583e80dfcef4`` | ``3.1.0`` | Add triggering user to dag_run. |
+-------------------------+------------------+-------------------+--------------------------------------------------------------+
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class TaskInstanceResponse(BaseModel):
id: str
task_id: str
dag_id: str
dag_version: DagVersionResponse
run_id: str = Field(alias="dag_run_id")
map_index: int
logical_date: datetime | None
Expand Down Expand Up @@ -76,7 +77,6 @@ class TaskInstanceResponse(BaseModel):
)
trigger: TriggerResponse | None
queued_by_job: JobResponse | None = Field(alias="triggerer_job")
dag_version: DagVersionResponse | None


class TaskInstanceCollectionResponse(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10377,6 +10377,8 @@ components:
dag_id:
type: string
title: Dag Id
dag_version:
$ref: '#/components/schemas/DagVersionResponse'
dag_run_id:
type: string
title: Dag Run Id
Expand Down Expand Up @@ -10504,15 +10506,12 @@ components:
anyOf:
- $ref: '#/components/schemas/JobResponse'
- type: 'null'
dag_version:
anyOf:
- $ref: '#/components/schemas/DagVersionResponse'
- type: 'null'
type: object
required:
- id
- task_id
- dag_id
- dag_version
- dag_run_id
- map_index
- logical_date
Expand Down Expand Up @@ -10541,7 +10540,6 @@ components:
- rendered_map_index
- trigger
- triggerer_job
- dag_version
title: TaskInstanceResponse
description: TaskInstance serializer for responses.
TaskInstanceState:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ class TaskInstance(BaseModel):
dag_id: str
run_id: str
try_number: int
dag_version_id: uuid.UUID
map_index: int = -1
hostname: str | None = None
context_carrier: dict | None = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@

from airflow.api_fastapi.execution_api.versions.v2025_04_28 import AddRenderedMapIndexField
from airflow.api_fastapi.execution_api.versions.v2025_05_20 import DowngradeUpstreamMapIndexes
from airflow.api_fastapi.execution_api.versions.v2025_08_10 import AddDagVersionIdField

bundle = VersionBundle(
HeadVersion(),
Version("2025-08-10", AddDagVersionIdField),
Version("2025-05-20", DowngradeUpstreamMapIndexes),
Version("2025-04-28", AddRenderedMapIndexField),
Version("2025-04-11"),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

from __future__ import annotations

from cadwyn import VersionChange, schema

from airflow.api_fastapi.execution_api.datamodels.taskinstance import TaskInstance


class AddDagVersionIdField(VersionChange):
"""Add the `dag_version_id` field to the TaskInstance model."""

description = __doc__

instructions_to_migrate_to_previous_version = (schema(TaskInstance).field("dag_version_id").didnt_exist,)
9 changes: 8 additions & 1 deletion airflow-core/src/airflow/cli/commands/task_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from airflow.exceptions import AirflowConfigException, DagRunNotFound, TaskInstanceNotFound
from airflow.models import TaskInstance
from airflow.models.dag import DAG as SchedulerDAG, _get_or_create_dagrun
from airflow.models.dag_version import DagVersion
from airflow.models.dagrun import DagRun
from airflow.sdk.definitions.dag import DAG, _run_task
from airflow.sdk.definitions.param import ParamsDict
Expand Down Expand Up @@ -200,7 +201,13 @@ def _get_ti(
f"run_id or logical_date of {logical_date_or_run_id!r} not found"
)
# TODO: Validate map_index is in range?
ti = TaskInstance(task, run_id=dag_run.run_id, map_index=map_index)
dag_version = DagVersion.get_latest_version(dag.dag_id, session=session)
if not dag_version:
# TODO: Remove this once DagVersion.get_latest_version is guaranteed to return a DagVersion/raise
raise ValueError(
f"Cannot create TaskInstance for {dag.dag_id} because the Dag is not serialized."
)
ti = TaskInstance(task, run_id=dag_run.run_id, map_index=map_index, dag_version_id=dag_version.id)
if dag_run in session:
session.add(ti)
ti.dag_run = dag_run
Expand Down
2 changes: 1 addition & 1 deletion airflow-core/src/airflow/executors/workloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class TaskInstance(BaseModel):
"""Schema for TaskInstance with minimal required fields needed for Executors and Task SDK."""

id: uuid.UUID

dag_version_id: uuid.UUID
task_id: str
dag_id: str
run_id: str
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""
Make dag_version_id non-nullable in TaskInstance.

Revision ID: 5d3072c51bac
Revises: ffdb0566c7c0
Create Date: 2025-05-20 10:38:25.635779

"""

from __future__ import annotations

import sqlalchemy as sa
from alembic import op
from sqlalchemy_utils import UUIDType

# revision identifiers, used by Alembic.
revision = "5d3072c51bac"
down_revision = "ffdb0566c7c0"
branch_labels = None
depends_on = None
airflow_version = "3.1.0"


def upgrade():
"""Apply make dag_version_id non-nullable in TaskInstance."""
conn = op.get_bind()
if conn.dialect.name == "postgresql":
update_query = sa.text("""
UPDATE task_instance
SET dag_version_id = latest_versions.id
FROM (
SELECT DISTINCT ON (dag_id) dag_id, id
FROM dag_version
ORDER BY dag_id, created_at DESC
) latest_versions
WHERE task_instance.dag_id = latest_versions.dag_id
AND task_instance.dag_version_id IS NULL
""")
else:
update_query = sa.text("""
UPDATE task_instance
SET dag_version_id = (
SELECT id FROM (
SELECT id, dag_id,
ROW_NUMBER() OVER (PARTITION BY dag_id ORDER BY created_at DESC) as rn
FROM dag_version
) ranked_versions
WHERE ranked_versions.dag_id = task_instance.dag_id
AND ranked_versions.rn = 1
)
WHERE task_instance.dag_version_id IS NULL
""")

op.execute(update_query)

with op.batch_alter_table("task_instance", schema=None) as batch_op:
batch_op.alter_column("dag_version_id", existing_type=UUIDType(binary=False), nullable=False)


def downgrade():
"""Unapply make dag_version_id non-nullable in TaskInstance."""
with op.batch_alter_table("task_instance", schema=None) as batch_op:
batch_op.alter_column("dag_version_id", existing_type=UUIDType(binary=False), nullable=True)
5 changes: 4 additions & 1 deletion airflow-core/src/airflow/models/dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,9 @@ def _create_orm_dagrun(
select(DagModel.bundle_version).where(DagModel.dag_id == dag.dag_id),
)
dag_version = DagVersion.get_latest_version(dag.dag_id, session=session)
if not dag_version:
raise AirflowException(f"Cannot create DagRun for DAG {dag.dag_id} because the dag is not serialized")

run = DagRun(
dag_id=dag.dag_id,
run_id=run_id,
Expand All @@ -270,7 +273,7 @@ def _create_orm_dagrun(
run.dag = dag
# create the associated task instances
# state is None at the moment of creation
run.verify_integrity(session=session, dag_version_id=dag_version.id if dag_version else None)
run.verify_integrity(session=session, dag_version_id=dag_version.id)
return run


Expand Down
22 changes: 13 additions & 9 deletions airflow-core/src/airflow/models/dagrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -1452,7 +1452,11 @@ def _expand_mapped_task_if_needed(ti: TI) -> Iterable[TI] | None:
# It's enough to revise map index once per task id,
# checking the map index for each mapped task significantly slows down scheduling
if schedulable.task.task_id not in revised_map_index_task_ids:
ready_tis.extend(self._revise_map_indexes_if_mapped(schedulable.task, session=session))
ready_tis.extend(
self._revise_map_indexes_if_mapped(
schedulable.task, dag_version_id=schedulable.dag_version_id, session=session
)
)
revised_map_index_task_ids.add(schedulable.task.task_id)
ready_tis.append(schedulable)

Expand Down Expand Up @@ -1555,9 +1559,7 @@ def _emit_duration_stats_for_finished_state(self):
Stats.timing(f"dagrun.duration.{self.state}", **timer_params)

@provide_session
def verify_integrity(
self, *, session: Session = NEW_SESSION, dag_version_id: UUIDType | None = None
) -> None:
def verify_integrity(self, *, session: Session = NEW_SESSION, dag_version_id: UUIDType) -> None:
"""
Verify the DagRun by checking for removed tasks or tasks that are not in the database yet.

Expand Down Expand Up @@ -1687,7 +1689,7 @@ def _get_task_creator(
created_counts: dict[str, int],
ti_mutation_hook: Callable,
hook_is_noop: Literal[True],
dag_version_id: UUIDType | None,
dag_version_id: UUIDType,
) -> Callable[[Operator, Iterable[int]], Iterator[dict[str, Any]]]: ...

@overload
Expand All @@ -1696,15 +1698,15 @@ def _get_task_creator(
created_counts: dict[str, int],
ti_mutation_hook: Callable,
hook_is_noop: Literal[False],
dag_version_id: UUIDType | None,
dag_version_id: UUIDType,
) -> Callable[[Operator, Iterable[int]], Iterator[TI]]: ...

def _get_task_creator(
self,
created_counts: dict[str, int],
ti_mutation_hook: Callable,
hook_is_noop: Literal[True, False],
dag_version_id: UUIDType | None,
dag_version_id: UUIDType,
) -> Callable[[Operator, Iterable[int]], Iterator[dict[str, Any]] | Iterator[TI]]:
"""
Get the task creator function.
Expand Down Expand Up @@ -1815,7 +1817,9 @@ def _create_task_instances(
# TODO[HA]: We probably need to savepoint this so we can keep the transaction alive.
session.rollback()

def _revise_map_indexes_if_mapped(self, task: Operator, *, session: Session) -> Iterator[TI]:
def _revise_map_indexes_if_mapped(
self, task: Operator, *, dag_version_id: UUIDType, session: Session
) -> Iterator[TI]:
"""
Check if task increased or reduced in length and handle appropriately.

Expand Down Expand Up @@ -1861,7 +1865,7 @@ def _revise_map_indexes_if_mapped(self, task: Operator, *, session: Session) ->
for index in range(total_length):
if index in existing_indexes:
continue
ti = TI(task, run_id=self.run_id, map_index=index, state=None)
ti = TI(task, run_id=self.run_id, map_index=index, state=None, dag_version_id=dag_version_id)
self.log.debug("Expanding TIs upserted %s", ti)
task_instance_mutation_hook(ti)
ti = session.merge(ti)
Expand Down
Loading