From aee4a2ea924557305828a4b5be529b617313b26a Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Wed, 9 Jul 2025 16:46:46 -0700 Subject: [PATCH] kpd: add optional mirror support We always clone a full repository. This is counter productive and wasteful. Allow users to specify that they are using kpd to help test patches for a git tree which we should expect a mirror on a target mirror path. Optionally, we also allow users to clarify that their target git tree is a linux clone, and in such cases we can always fallback to looking for a mirror path with the "linux.git" name. So for example, all kdevops enterprise deployments can easily profit from this as kdevops has support to mirror all target git trees it supports under /mirror/ through an NFS export for clients. And so small thing guests can be used for kpd instances, which can leverage this NFS export. This allows kpd to be run on smaller guests with less storage needs. This should allow more than one kpd instance to run on small guests too. Generated-by: ChatGPT Codex Signed-off-by: Luis Chamberlain --- README.md | 15 ++++++++ configs/kpd.json | 4 +- kernel_patches_daemon/branch_worker.py | 25 +++++++++++- kernel_patches_daemon/config.py | 4 ++ kernel_patches_daemon/github_sync.py | 2 + tests/test_branch_worker.py | 53 ++++++++++++++++++++++++++ tests/test_config.py | 11 ++++++ tests/test_github_sync.py | 14 +++++++ 8 files changed, 126 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 028bfff..e767392 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,21 @@ poetry install poetry run python -m unittest ``` +### Mirror setup + +To make more efficient use of network bandwidth consider having a mirror of your target git tree +under /mirror/ or something like that and set the configuration attribute "mirror_dir" variable to the +path where to find possible git trees. + +If your git tree is a linux clone set the "linux_clone" to true. In that case, in case your +target exact basename repo is not in in the mirror path, for example {{ mirror_dir }}/linux-subsystem.git +then the extra fallback path of {{ mirror_dir }}/linux.git will be used as a reference target. + +A reference target mirror path is only used if it exists. The mirror takes effect by leveraging +the git clone --reference option when cloning. Using this can save considerable bandwidth and +space, allowing kpd to run on thing guests on a corporate environment with for example an NFS +mount for local git trees on a network. + ## Running ``` poetry run python -m kernel_patches_daemon --config --label-color configs/labels.json diff --git a/configs/kpd.json b/configs/kpd.json index 13fcb92..194c9ab 100644 --- a/configs/kpd.json +++ b/configs/kpd.json @@ -41,5 +41,7 @@ "github_oauth_token": "" } }, - "base_directory": "/tmp/repos" + "base_directory": "/tmp/repos", + "mirror_dir": "/mirror/", + "linux_clone": true } diff --git a/kernel_patches_daemon/branch_worker.py b/kernel_patches_daemon/branch_worker.py index 80d801e..f045d21 100644 --- a/kernel_patches_daemon/branch_worker.py +++ b/kernel_patches_daemon/branch_worker.py @@ -547,6 +547,8 @@ def __init__( app_auth: Optional[Auth.AppInstallationAuth] = None, email: Optional[EmailConfig] = None, http_retries: Optional[int] = None, + linux_clone: bool = False, + mirror_dir: Optional[str] = None, ) -> None: super().__init__( repo_url=repo_url, @@ -559,6 +561,8 @@ def __init__( self.email = email self.log_extractor = log_extractor + self.mirror_dir = mirror_dir + self.linux_clone = linux_clone self.ci_repo_url = ci_repo_url self.ci_repo_dir = _uniq_tmp_folder(ci_repo_url, ci_branch, base_directory) self.ci_branch = ci_branch @@ -682,9 +686,28 @@ def do_sync(self) -> None: def full_sync(self, path: str, url: str, branch: str) -> git.Repo: logging.info(f"Doing full clone from {redact_url(url)}, branch: {branch}") + multi_opts: Optional[List[str]] = None + if self.mirror_dir: + upstream_name = os.path.basename(self.upstream_url) + reference_path = os.path.join(self.mirror_dir, upstream_name) + fallback = None + if self.linux_clone: + fallback = os.path.join(self.mirror_dir, "linux.git") + if ( + not os.path.exists(reference_path) + and fallback + and os.path.exists(fallback) + ): + reference_path = fallback + if os.path.exists(reference_path): + multi_opts = ["--reference", reference_path] + with HistogramMetricTimer(git_clone_duration, {"branch": branch}): shutil.rmtree(path, ignore_errors=True) - repo = git.Repo.clone_from(url, path) + if multi_opts: + repo = git.Repo.clone_from(url, path, multi_options=multi_opts) + else: + repo = git.Repo.clone_from(url, path) _reset_repo(repo, f"origin/{branch}") git_clone_counter.add(1, {"branch": branch}) diff --git a/kernel_patches_daemon/config.py b/kernel_patches_daemon/config.py index b1f6bd6..564bdb2 100644 --- a/kernel_patches_daemon/config.py +++ b/kernel_patches_daemon/config.py @@ -171,6 +171,8 @@ class KPDConfig: branches: Dict[str, BranchConfig] tag_to_branch_mapping: Dict[str, List[str]] base_directory: str + mirror_dir: Optional[str] = None + linux_clone: bool = False @classmethod def from_json(cls, json: Dict) -> "KPDConfig": @@ -203,6 +205,8 @@ def from_json(cls, json: Dict) -> "KPDConfig": for name, json_config in json["branches"].items() }, base_directory=json["base_directory"], + mirror_dir=json.get("mirror_dir"), + linux_clone=json.get("linux_clone", False), ) @classmethod diff --git a/kernel_patches_daemon/github_sync.py b/kernel_patches_daemon/github_sync.py index 3dd6e09..11bcd87 100644 --- a/kernel_patches_daemon/github_sync.py +++ b/kernel_patches_daemon/github_sync.py @@ -114,6 +114,8 @@ def __init__( ci_branch=branch_config.ci_branch, log_extractor=_log_extractor_from_project(kpd_config.patchwork.project), base_directory=kpd_config.base_directory, + mirror_dir=kpd_config.mirror_dir, + linux_clone=kpd_config.linux_clone, http_retries=http_retries, github_oauth_token=branch_config.github_oauth_token, app_auth=github_app_auth_from_branch_config(branch_config), diff --git a/tests/test_branch_worker.py b/tests/test_branch_worker.py index f4c7396..907bc9e 100644 --- a/tests/test_branch_worker.py +++ b/tests/test_branch_worker.py @@ -68,6 +68,7 @@ TEST_CI_REPO_URL = f"https://user:pass@127.0.0.1/ci-org/{TEST_CI_REPO}" TEST_CI_BRANCH = "test_ci_branch" TEST_BASE_DIRECTORY = "/repos" +TEST_MIRROR_DIRECTORY = "/mirror" TEST_BRANCH = "test-branch" TEST_CONFIG: Dict[str, Any] = { "version": 2, @@ -124,6 +125,8 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: "ci_branch": TEST_CI_BRANCH, "log_extractor": DefaultGithubLogExtractor(), "base_directory": TEST_BASE_DIRECTORY, + "mirror_dir": None, + "linux_clone": False, } presets.update(kwargs) @@ -464,6 +467,56 @@ def test_fetch_repo_path_exists_git_exception(self) -> None: self._bw.fetch_repo(*fetch_params) fr.assert_called_once_with(*fetch_params) + def test_full_sync_with_mirror_dir(self) -> None: + bw = BranchWorkerMock(mirror_dir=TEST_MIRROR_DIRECTORY) + reference = os.path.join( + TEST_MIRROR_DIRECTORY, os.path.basename(TEST_UPSTREAM_REPO_URL) + ) + with ( + patch("kernel_patches_daemon.branch_worker.os.path.exists") as exists, + patch("kernel_patches_daemon.branch_worker.shutil.rmtree") as rm, + ): + exists.side_effect = lambda p: p == reference + bw.upstream_url = TEST_UPSTREAM_REPO_URL + bw.full_sync("somepath", "giturl", "branch") + self._git_repo_mock.clone_from.assert_called_once_with( + "giturl", + "somepath", + multi_options=["--reference", reference], + ) + + def test_full_sync_with_linux_mirror_fallback(self) -> None: + bw = BranchWorkerMock(mirror_dir=TEST_MIRROR_DIRECTORY, linux_clone=True) + fallback = os.path.join(TEST_MIRROR_DIRECTORY, "linux.git") + with ( + patch("kernel_patches_daemon.branch_worker.os.path.exists") as exists, + patch("kernel_patches_daemon.branch_worker.shutil.rmtree") as rm, + ): + exists.side_effect = lambda p: p == fallback + bw.upstream_url = TEST_UPSTREAM_REPO_URL + bw.full_sync("somepath", "giturl", "branch") + self._git_repo_mock.clone_from.assert_called_once_with( + "giturl", + "somepath", + multi_options=["--reference", fallback], + ) + + def test_full_sync_without_linux_mirror_fallback(self) -> None: + bw = BranchWorkerMock(mirror_dir=TEST_MIRROR_DIRECTORY, linux_clone=False) + fallback = os.path.join(TEST_MIRROR_DIRECTORY, "linux.git") + with ( + patch("kernel_patches_daemon.branch_worker.os.path.exists") as exists, + patch("kernel_patches_daemon.branch_worker.shutil.rmtree") as rm, + ): + exists.side_effect = lambda p: p == fallback + bw.upstream_url = TEST_UPSTREAM_REPO_URL + bw.full_sync("somepath", "giturl", "branch") + # Without linux_mirror we should not use fallback + self._git_repo_mock.clone_from.assert_called_once_with( + "giturl", + "somepath", + ) + def test_expire_branches(self) -> None: """Only the branch that matches pattern and is expired should be deleted""" not_expired_time = datetime.fromtimestamp(3 * BRANCH_TTL) diff --git a/tests/test_config.py b/tests/test_config.py index 8d79fa3..c9cb025 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -208,5 +208,16 @@ def test_valid(self) -> None: ), }, base_directory="/repos", + mirror_dir=None, + linux_clone=False, ) self.assertEqual(config, expected_config) + + def test_linux_clone_enabled(self) -> None: + kpd_config_json = read_fixture("fixtures/kpd_config.json") + kpd_config_json["linux_clone"] = True + + with patch("builtins.open", mock_open(read_data="TEST_KEY_FILE_CONTENT")): + config = KPDConfig.from_json(kpd_config_json) + + self.assertTrue(config.linux_clone) diff --git a/tests/test_github_sync.py b/tests/test_github_sync.py index 789dbf4..95a75d8 100644 --- a/tests/test_github_sync.py +++ b/tests/test_github_sync.py @@ -126,6 +126,20 @@ class TestCase: gh.workers[TEST_BRANCH].ci_repo_dir.startswith(case.prefix), ) + def test_init_with_mirror_dir(self) -> None: + config = copy.copy(TEST_CONFIG) + config["mirror_dir"] = "/mirror" + kpd_config = KPDConfig.from_json(config) + gh = GithubSyncMock(kpd_config=kpd_config) + self.assertEqual("/mirror", gh.workers[TEST_BRANCH].mirror_dir) + + def test_init_with_linux_clone(self) -> None: + config = copy.copy(TEST_CONFIG) + config["linux_clone"] = True + kpd_config = KPDConfig.from_json(config) + gh = GithubSyncMock(kpd_config=kpd_config) + self.assertTrue(gh.workers[TEST_BRANCH].linux_clone) + def test_close_existing_prs_for_series(self) -> None: matching_pr_mock = MagicMock() matching_pr_mock.title = "matching"