Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions backend/infrahub/core/schema/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -774,10 +774,15 @@ def purge_inactive_branches(self, active_branches: list[str]) -> list[str]:
"""Return non active branches that were purged."""

hashes_to_keep: set[str] = set()
branch_processed: set[str] = set()
for active_branch in active_branches:
if branch := self._branches.get(active_branch):
nodes = branch.get_all(include_internal=True, duplicate=False)
hashes_to_keep.update([node.get_hash() for node in nodes.values()])
branch_hash = self._branch_hash_by_name.get(active_branch)
if not branch_hash or branch_hash not in branch_processed:
if branch_hash:
branch_processed.add(branch_hash)
if branch := self._branches.get(active_branch):
nodes = branch.get_all(include_internal=True, duplicate=False)
hashes_to_keep.update([node.get_hash() for node in nodes.values()])

removed_branches: list[str] = []
for branch_name in list(self._branches.keys()):
Expand Down
13 changes: 13 additions & 0 deletions backend/infrahub/git/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from pydantic import BaseModel, ConfigDict, Field

from infrahub.context import InfrahubContext
from infrahub.core.node import Node
from infrahub.core.protocols import CoreReadOnlyRepository, CoreRepository
from infrahub.message_bus.types import ProposedChangeBranchDiff


Expand Down Expand Up @@ -201,11 +203,22 @@ class RepositoryBranchInfo(BaseModel):


class RepositoryData(BaseModel):
model_config = ConfigDict(arbitrary_types_allowed=True)

repository_id: str = Field(..., description="Id of the repository")
repository_name: str = Field(..., description="Name of the repository")
repository: CoreRepository | CoreReadOnlyRepository | Node = Field(
..., description="InfrahubNode representing a Repository"
)
branches: dict[str, str] = Field(
...,
description="Dictionary with the name of the branch as the key and the active commit id as the value",
)

branch_info: dict[str, RepositoryBranchInfo] = Field(default_factory=dict)

def get_staging_branch(self) -> str | None:
for branch, info in self.branch_info.items():
if info.internal_status == "staging":
return branch
return None
42 changes: 23 additions & 19 deletions backend/infrahub/git/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
UserCheckDefinitionData,
)
from .repository import InfrahubReadOnlyRepository, InfrahubRepository, get_initialized_repo
from .utils import fetch_artifact_definition_targets, fetch_check_definition_targets
from .utils import fetch_artifact_definition_targets, fetch_check_definition_targets, get_repositories_commit_per_branch


@flow(
Expand Down Expand Up @@ -195,13 +195,17 @@ async def sync_git_repo_with_origin_and_tag_on_failure(
@flow(name="git_repositories_sync", flow_run_name="Sync Git Repositories")
async def sync_remote_repositories() -> None:
log = get_run_logger()
db = await get_database()

client = get_client()

branches = await client.branch.all()
repositories = await client.get_list_repositories(branches=branches, kind=InfrahubKind.REPOSITORY)
async with db.start_session() as dbs:
repositories = await get_repositories_commit_per_branch(db=dbs, kind=InfrahubKind.REPOSITORY)

for repo_name, repository_data in repositories.items():
repository: CoreRepository = repository_data.repository

active_internal_status = RepositoryInternalStatus.ACTIVE.value
default_internal_status = repository_data.branch_info[registry.default_branch].internal_status
staging_branch = None
Expand All @@ -215,12 +219,12 @@ async def sync_remote_repositories() -> None:
init_failed = False
try:
repo = await InfrahubRepository.init(
id=repository_data.repository.id,
name=repository_data.repository.name.value,
location=repository_data.repository.location.value,
id=repository.id,
name=repository.name.value,
location=repository.location.value,
client=client,
internal_status=active_internal_status,
default_branch_name=repository_data.repository.default_branch.value,
default_branch_name=repository.default_branch.value,
)
except RepositoryError as exc:
get_logger().error(str(exc))
Expand All @@ -229,12 +233,12 @@ async def sync_remote_repositories() -> None:
if init_failed:
try:
repo = await InfrahubRepository.new(
id=repository_data.repository.id,
name=repository_data.repository.name.value,
location=repository_data.repository.location.value,
id=repository.id,
name=repository.name.value,
location=repository.location.value,
client=client,
internal_status=active_internal_status,
default_branch_name=repository_data.repository.default_branch.value,
default_branch_name=repository.default_branch.value,
)
await repo.import_objects_from_files( # type: ignore[call-overload]
git_branch_name=registry.default_branch, infrahub_branch_name=infrahub_branch
Expand All @@ -246,22 +250,22 @@ async def sync_remote_repositories() -> None:
try:
await sync_git_repo_with_origin_and_tag_on_failure(
client=client,
repository_id=repository_data.repository.id,
repository_name=repository_data.repository.name.value,
repository_location=repository_data.repository.location.value,
repository_id=repository.id,
repository_name=repository.name.value,
repository_location=repository.location.value,
internal_status=active_internal_status,
default_branch_name=repository_data.repository.default_branch.value,
operational_status=repository_data.repository.operational_status.value,
default_branch_name=repository.default_branch.value,
operational_status=repository.operational_status.value,
staging_branch=staging_branch,
infrahub_branch=infrahub_branch,
)
# Tell workers to fetch to stay in sync
message = messages.RefreshGitFetch(
meta=Meta(initiator_id=WORKER_IDENTITY, request_id=get_log_data().get("request_id", "")),
location=repository_data.repository.location.value,
repository_id=repository_data.repository.id,
repository_name=repository_data.repository.name.value,
repository_kind=repository_data.repository.get_kind(),
location=repository.location.value,
repository_id=repository.id,
repository_name=repository.name.value,
repository_kind=repository.get_kind(),
infrahub_branch_name=infrahub_branch,
infrahub_branch_id=branches[infrahub_branch].id,
)
Expand Down
25 changes: 16 additions & 9 deletions backend/infrahub/git/utils.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,32 @@
import re
from collections import defaultdict
from typing import TYPE_CHECKING, Any
from typing import Any

from infrahub_sdk import InfrahubClient
from infrahub_sdk.node import RelationshipManager
from infrahub_sdk.protocols import CoreArtifactDefinition, CoreCheckDefinition, CoreGroup
from infrahub_sdk.protocols import (
CoreArtifactDefinition,
CoreCheckDefinition,
CoreGroup,
CoreReadOnlyRepository,
CoreRepository,
)
from infrahub_sdk.types import Order

from infrahub.core import registry
from infrahub.core.constants import InfrahubKind
from infrahub.core.manager import NodeManager
from infrahub.database import InfrahubDatabase
from infrahub.generators.models import ProposedChangeGeneratorDefinition
from infrahub.graphql.models import OrderModel

from .. import config
from .models import RepositoryBranchInfo, RepositoryData

if TYPE_CHECKING:
from infrahub.core.protocols import CoreGenericRepository


async def get_repositories_commit_per_branch(
db: InfrahubDatabase,
kind: str = InfrahubKind.GENERICREPOSITORY,
) -> dict[str, RepositoryData]:
"""Get a list of all repositories and their commit on each branches.

Expand All @@ -33,11 +38,12 @@ async def get_repositories_commit_per_branch(
repositories: dict[str, RepositoryData] = {}

for branch in list(registry.branch.values()):
repos: list[CoreGenericRepository] = await NodeManager.query(
repos: list[CoreRepository | CoreReadOnlyRepository] = await NodeManager.query(
db=db,
branch=branch,
fields={"id": None, "name": None, "commit": None, "internal_status": None},
schema=InfrahubKind.GENERICREPOSITORY,
fields={"id": None, "name": None, "commit": None, "internal_status": None, "location": None, "ref": None},
schema=kind,
order=OrderModel(disable=True),
)

for repository in repos:
Expand All @@ -46,10 +52,11 @@ async def get_repositories_commit_per_branch(
repositories[repo_name] = RepositoryData(
repository_id=repository.get_id(),
repository_name=repo_name,
repository=repository,
branches={},
)

repositories[repo_name].branches[branch.name] = repository.commit.value # type: ignore[attr-defined]
repositories[repo_name].branches[branch.name] = repository.commit.value
repositories[repo_name].branch_info[branch.name] = RepositoryBranchInfo(
internal_status=repository.internal_status.value
)
Expand Down
12 changes: 8 additions & 4 deletions backend/tests/unit/git/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,15 @@ async def test_get_repositories_commit_per_branch_main(
repositories = await get_repositories_commit_per_branch(db=db)
assert list(repositories.keys()) == ["repo01", "repo02"]

assert repositories["repo01"].model_dump() == {
assert repositories["repo01"].repository.id == repository_01.id
assert repositories["repo01"].model_dump(exclude=["repository"]) == {
"repository_id": repository_01.id,
"repository_name": "repo01",
"branches": {"main": "commit01", "-global-": "commit01"},
"branch_info": {"main": {"internal_status": "inactive"}, "-global-": {"internal_status": "inactive"}},
}
assert repositories["repo02"].model_dump() == {
assert repositories["repo02"].repository.id == repository_02.id
assert repositories["repo02"].model_dump(exclude=["repository"]) == {
"repository_id": repository_02.id,
"repository_name": "repo02",
"branches": {"main": "commit02", "-global-": None},
Expand All @@ -61,7 +63,8 @@ async def test_get_repositories_commit_per_branch_branches(
repositories = await get_repositories_commit_per_branch(db=db)
assert list(repositories.keys()) == ["repo01", "repo02"]

assert repositories["repo01"].model_dump() == {
assert repositories["repo01"].repository.id == repository_01.id
assert repositories["repo01"].model_dump(exclude=["repository"]) == {
"repository_id": repository_01.id,
"repository_name": "repo01",
"branches": {
Expand All @@ -77,7 +80,8 @@ async def test_get_repositories_commit_per_branch_branches(
"main": {"internal_status": "inactive"},
},
}
assert repositories["repo02"].model_dump() == {
assert repositories["repo02"].repository.id == repository_02.id
assert repositories["repo02"].model_dump(exclude=["repository"]) == {
"repository_id": repository_02.id,
"repository_name": "repo02",
"branches": {
Expand Down
1 change: 1 addition & 0 deletions changelog/+d9659fb5.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve branch creation and repository sync performance when having a lot of branches.
Loading