Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions airflow-core/newsfragments/57069.significant.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Git provider: Remove '.git' folder from versions in GitDagBundle

A new option(``prune_dotgit_folder``) has been added to the GitDagBundle to remove ``.git`` from
versioned bundles by default to reduce disk usage; set prune_dotgit_folder=False to keep
repo metadata in the dag bundle's versions folders.

* Types of change

* [ ] Dag changes
* [ ] Config changes
* [ ] API changes
* [ ] CLI changes
* [x] Behaviour changes
* [ ] Plugin changes
* [ ] Dependency changes
* [ ] Code interface changes
1 change: 1 addition & 0 deletions providers/git/docs/bundles/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Example of using the GitDagBundle:
"subdir": "dags",
"tracking_ref": "main",
"refresh_interval": 3600
"prune_dotgit_folder": True
}
}
]'
10 changes: 10 additions & 0 deletions providers/git/src/airflow/providers/git/bundles/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ class GitDagBundle(BaseDagBundle):
:param subdir: Subdirectory within the repository where the DAGs are stored (Optional)
:param git_conn_id: Connection ID for SSH/token based connection to the repository (Optional)
:param repo_url: Explicit Git repository URL to override the connection's host. (Optional)
:param prune_dotgit_folder: Remove .git folder from the versions after cloning.

The per-version clone is not a full "git" copy (it makes use of git's `--local` ability
to share the object directory via hard links, but if you have a lot of current versions
running, or an especially large git repo leaving this as True will save some disk space
at the expense of `git` operations not working in the bundle that Tasks run from.
"""

supports_versioning = True
Expand All @@ -56,6 +62,7 @@ def __init__(
subdir: str | None = None,
git_conn_id: str | None = None,
repo_url: str | None = None,
prune_dotgit_folder: bool = True,
**kwargs,
) -> None:
super().__init__(**kwargs)
Expand All @@ -68,6 +75,7 @@ def __init__(
self.repo_path = self.base_dir / "tracking_repo"
self.git_conn_id = git_conn_id
self.repo_url = repo_url
self.prune_dotgit_folder = prune_dotgit_folder

self._log = log.bind(
bundle_name=self.name,
Expand Down Expand Up @@ -115,6 +123,8 @@ def _initialize(self):
self.repo.remotes.origin.fetch()
self.repo.head.set_reference(str(self.repo.commit(self.version)))
self.repo.head.reset(index=True, working_tree=True)
if self.prune_dotgit_folder:
shutil.rmtree(self.repo_path / ".git")
else:
self.refresh()
self.repo.close()
Expand Down
43 changes: 43 additions & 0 deletions providers/git/tests/unit/git/bundles/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ def test_get_specific_version(self, mock_githook, git_repo):
git_conn_id=CONN_HTTPS,
version=starting_commit.hexsha,
tracking_ref=GIT_DEFAULT_BRANCH,
prune_dotgit_folder=False,
)
bundle.initialize()

Expand Down Expand Up @@ -196,6 +197,7 @@ def test_get_tag_version(self, mock_githook, git_repo):
git_conn_id=CONN_HTTPS,
version="test",
tracking_ref=GIT_DEFAULT_BRANCH,
prune_dotgit_folder=False,
)
bundle.initialize()
assert bundle.get_current_version() == starting_commit.hexsha
Expand Down Expand Up @@ -225,6 +227,47 @@ def test_get_latest(self, mock_githook, git_repo):

assert_repo_is_closed(bundle)

@mock.patch("airflow.providers.git.bundles.git.GitHook")
def test_removes_git_dir_for_versioned_bundle_by_default(self, mock_githook, git_repo):
repo_path, repo = git_repo
mock_githook.return_value.repo_url = repo_path
starting_commit = repo.head.commit

bundle = GitDagBundle(
name="test",
git_conn_id=CONN_HTTPS,
version=starting_commit.hexsha,
tracking_ref=GIT_DEFAULT_BRANCH,
)
bundle.initialize()

assert not (bundle.repo_path / ".git").exists()

files_in_repo = {f.name for f in bundle.path.iterdir() if f.is_file()}
assert {"test_dag.py"} == files_in_repo

assert_repo_is_closed(bundle)

@mock.patch("airflow.providers.git.bundles.git.GitHook")
def test_keeps_git_dir_when_disabled(self, mock_githook, git_repo):
repo_path, repo = git_repo
mock_githook.return_value.repo_url = repo_path
starting_commit = repo.head.commit

bundle = GitDagBundle(
name="test",
git_conn_id=CONN_HTTPS,
version=starting_commit.hexsha,
tracking_ref=GIT_DEFAULT_BRANCH,
prune_dotgit_folder=False,
)
bundle.initialize()

assert (bundle.repo_path / ".git").exists()
assert bundle.get_current_version() == starting_commit.hexsha

assert_repo_is_closed(bundle)

@pytest.mark.parametrize(
"amend",
[
Expand Down
Loading