diff --git a/airflow-core/src/airflow/models/serialized_dag.py b/airflow-core/src/airflow/models/serialized_dag.py index 71722b54adee6..684cea847791b 100644 --- a/airflow-core/src/airflow/models/serialized_dag.py +++ b/airflow-core/src/airflow/models/serialized_dag.py @@ -350,7 +350,13 @@ def __repr__(self) -> str: def hash(cls, dag_data): """Hash the data to get the dag_hash.""" dag_data = cls._sort_serialized_dag_dict(dag_data) - data_json = json.dumps(dag_data, sort_keys=True).encode("utf-8") + data_ = dag_data.copy() + # Remove fileloc from the hash so changes to fileloc + # does not affect the hash. In 3.0+, a combination of + # bundle_path and relative fileloc more correctly determines the + # dag file location. + data_["dag"].pop("fileloc", None) + data_json = json.dumps(data_, sort_keys=True).encode("utf-8") return md5(data_json).hexdigest() @classmethod diff --git a/airflow-core/tests/unit/models/test_serialized_dag.py b/airflow-core/tests/unit/models/test_serialized_dag.py index d30d71d0b02cb..f6277038dd96c 100644 --- a/airflow-core/tests/unit/models/test_serialized_dag.py +++ b/airflow-core/tests/unit/models/test_serialized_dag.py @@ -522,3 +522,31 @@ def test_new_dag_version_created_when_bundle_name_changes_and_hash_unchanged(sel # There should now be two versions of the DAG assert session.query(DagVersion).count() == 2 + + def test_hash_method_removes_fileloc_and_remains_consistent(self): + """Test that the hash method removes fileloc before hashing.""" + test_data = { + "__version": 1, + "dag": { + "fileloc": "/path/to/dag.py", + "dag_id": "test_dag", + "tasks": { + "task1": {"task_id": "task1"}, + }, + }, + } + + hash_with_fileloc = SDM.hash(test_data) + + # Modify only the top-level dag.fileloc path (simulating file location changes) + test_data["dag"]["fileloc"] = "/different/path/to/dag.py" + + # Get hash with different top-level fileloc (should be the same) + hash_with_different_fileloc = SDM.hash(test_data) + + # Hashes should be identical since top-level dag.fileloc is removed before hashing + assert hash_with_fileloc == hash_with_different_fileloc + + # Verify that the original data still has fileloc (method shouldn't modify original) + assert "fileloc" in test_data["dag"] + assert test_data["dag"]["fileloc"] == "/different/path/to/dag.py"