Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Integration][GitLab] - Improve on GitOps push events #1028

Merged
merged 11 commits into from
Sep 18, 2024
8 changes: 8 additions & 0 deletions integrations/gitlab/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm

<!-- towncrier release notes start -->

0.1.121 (2024-09-17)
====================

### Improvements

- Improved on the way the integration handles GitOps push events by using only files that have been changed in the push even rather than fetching the entire repository tree (0.1.121)


0.1.120 (2024-09-17)
====================

Expand Down
8 changes: 6 additions & 2 deletions integrations/gitlab/gitlab_integration/core/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,23 @@

from port_ocean.core.models import Entity

from gitlab_integration.core.async_fetcher import AsyncFetcher

FILE_PROPERTY_PREFIX = "file://"
SEARCH_PROPERTY_PREFIX = "search://"
JSON_SUFFIX = ".json"


def generate_entity_from_port_yaml(
async def generate_entity_from_port_yaml(
raw_entity: Entity, project: Project, ref: str
) -> Entity:
properties = {}
for key, value in raw_entity.properties.items():
if isinstance(value, str) and value.startswith(FILE_PROPERTY_PREFIX):
file_meta = Path(value.replace(FILE_PROPERTY_PREFIX, ""))
gitlab_file = project.files.get(file_path=str(file_meta), ref=ref)
gitlab_file = await AsyncFetcher.fetch_single(
project.files.get, str(file_meta), ref
)

if file_meta.suffix == JSON_SUFFIX:
properties[key] = json.loads(gitlab_file.decode().decode("utf-8"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ async def _notify(self, event_id: str, body: dict[str, Any]) -> None:
)

if not observers:
logger.debug(
logger.info(
f"event: {event_id} has no matching handler. the handlers available are for events: {self._observers.keys()}"
)

Expand Down
143 changes: 131 additions & 12 deletions integrations/gitlab/gitlab_integration/events/hooks/push.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import typing
from typing import Any
from enum import StrEnum

from loguru import logger
from gitlab.v4.objects import Project

from gitlab_integration.core.utils import generate_ref
from gitlab_integration.core.utils import generate_ref, does_pattern_apply
from gitlab_integration.events.hooks.base import ProjectHandler
from gitlab_integration.git_integration import GitlabPortAppConfig
from gitlab_integration.utils import ObjectKind
Expand All @@ -14,36 +15,84 @@
from port_ocean.context.ocean import ocean


class FileAction(StrEnum):
DELETED = "deleted"
ADDED = "added"
MODIFIED = "modified"


class PushHook(ProjectHandler):
events = ["Push Hook"]
system_events = ["push"]

async def _on_hook(self, body: dict[str, Any], gitlab_project: Project) -> None:
before, after, ref = body.get("before"), body.get("after"), body.get("ref")
commit_before, commit_after, ref = (
body.get("before"),
body.get("after"),
body.get("ref"),
)

if before is None or after is None or ref is None:
if commit_before is None or commit_after is None or ref is None:
raise ValueError(
"Invalid push hook. Missing one or more of the required fields (before, after, ref)"
)

added_files = [
added_file
for commit in body.get("commits", [])
for added_file in commit.get("added", [])
]
modified_files = [
modified_file
for commit in body.get("commits", [])
for modified_file in commit.get("modified", [])
]

removed_files = [
removed_file
for commit in body.get("commits", [])
for removed_file in commit.get("removed", [])
]

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe worth using the same verb names as in the push hook.
instead of diverging between removed and deleted to use the same removed key that way in the commit.get( you could use the consts that you've created

config: GitlabPortAppConfig = typing.cast(
GitlabPortAppConfig, event.port_app_config
)

branch = config.branch or gitlab_project.default_branch

if generate_ref(branch) == ref:
entities_before, entities_after = (
await self.gitlab_service.get_entities_diff(
gitlab_project, config.spec_path, before, after, branch
)
)
spec_path = config.spec_path
if not isinstance(spec_path, list):
spec_path = [spec_path]

# update the entities diff found in the `config.spec_path` file the user configured
await ocean.update_diff(
{"before": entities_before, "after": entities_after},
UserAgentType.gitops,
await self._process_files(
gitlab_project,
removed_files,
spec_path,
commit_before,
"",
branch,
FileAction.DELETED,
)
await self._process_files(
gitlab_project,
added_files,
spec_path,
"",
commit_after,
branch,
FileAction.ADDED,
)
await self._process_files(
gitlab_project,
modified_files,
spec_path,
commit_before,
commit_after,
branch,
FileAction.MODIFIED,
)

# update information regarding the project as well
logger.info(
f"Updating project information after push hook for project {gitlab_project.path_with_namespace}"
Expand All @@ -52,8 +101,78 @@ async def _on_hook(self, body: dict[str, Any], gitlab_project: Project) -> None:
gitlab_project
)
await ocean.register_raw(ObjectKind.PROJECT, [enriched_project])

else:
logger.debug(
f"Skipping push hook for project {gitlab_project.path_with_namespace} because the ref {ref} "
f"does not match the branch {branch}"
)

async def _process_files(
self,
gitlab_project: Project,
files: list[str],
spec_path: list[str],
commit_before: str,
commit_after: str,
branch: str,
file_action: FileAction,
) -> None:
if not files:
return
logger.info(
f"Processing {file_action} files {files} for project {gitlab_project.path_with_namespace}"
)

for file in files:
try:
if does_pattern_apply(spec_path, file):
logger.info(
f"Found file {file} in spec_path {spec_path} pattern, processing its entity diff"
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder why would we want to pass all the files that have been added / removed / modified to this function and to this log.

it can be very spammy and irrelevant.
I would rather move this outside of this method, to where we filter the each type of changed files.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've removed it from the loop. i see we are already logging the file info in the method where we get the entities from the commit

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Screenshot 2024-09-18 at 12 16 37 PM So far I don't think it's spammy, we only log the relevant info that needs to be displayed. And this will be crucial for debugging


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

redundant

if file_action == FileAction.DELETED:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

match, case here will be prettier imo

entities_before = (
await self.gitlab_service._get_entities_by_commit(
gitlab_project, file, commit_before, branch
)
)
await ocean.update_diff(
{"before": entities_before, "after": []},
UserAgentType.gitops,
)

elif file_action == FileAction.ADDED:
entities_after = (
await self.gitlab_service._get_entities_by_commit(
gitlab_project, file, commit_after, branch
)
)
await ocean.update_diff(
{"before": [], "after": entities_after},
UserAgentType.gitops,
)

elif file_action == FileAction.MODIFIED:
entities_before = (
await self.gitlab_service._get_entities_by_commit(
gitlab_project, file, commit_before, branch
)
)
entities_after = (
await self.gitlab_service._get_entities_by_commit(
gitlab_project, file, commit_after, branch
)
)
await ocean.update_diff(
{"before": entities_before, "after": entities_after},
UserAgentType.gitops,
)
else:
logger.info(
f"Skipping file {file} as it does not match the spec_path pattern {spec_path}"
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be debug I think 🤔

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and should be moved outside of the method

except Exception as e:
logger.error(
f"Error processing file {file} in action {file_action}: {str(e)}"
)
25 changes: 13 additions & 12 deletions integrations/gitlab/gitlab_integration/gitlab_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,11 +166,14 @@ async def search_files_in_project(
if files_with_content:
yield files_with_content

def _get_entities_from_git(
self, project: Project, file_name: str, sha: str, ref: str
async def _get_entities_from_git(
self, project: Project, file_path: str | List[str], sha: str, ref: str
) -> List[Entity]:
try:
file_content = project.files.get(file_path=file_name, ref=sha)
file_content = await AsyncFetcher.fetch_single(
project.files.get, file_path, sha
)

entities = yaml.safe_load(file_content.decode())
raw_entities = [
Entity(**entity_data)
Expand All @@ -179,29 +182,27 @@ def _get_entities_from_git(
)
]
return [
generate_entity_from_port_yaml(entity_data, project, ref)
await generate_entity_from_port_yaml(entity_data, project, ref)
for entity_data in raw_entities
]
except ParserError as exec:
logger.error(
f"Failed to parse gitops entities from gitlab project {project.path_with_namespace},z file {file_name}."
f"Failed to parse gitops entities from gitlab project {project.path_with_namespace},z file {file_path}."
f"\n {exec}"
)
except Exception:
logger.error(
f"Failed to get gitops entities from gitlab project {project.path_with_namespace}, file {file_name}"
f"Failed to get gitops entities from gitlab project {project.path_with_namespace}, file {file_path}"
)
return []

async def _get_entities_by_commit(
self, project: Project, spec: str | List["str"], commit: str, ref: str
) -> List[Entity]:
spec_paths = await self.get_all_file_paths(project, spec, commit)
return [
entity
for path in spec_paths
for entity in self._get_entities_from_git(project, path, commit, ref)
]
logger.info(
f"Getting entities for project {project.path_with_namespace} in path {spec} at commit {commit} and ref {ref}"
)
return await self._get_entities_from_git(project, spec, commit, ref)

def should_run_for_path(self, path: str) -> bool:
return any(does_pattern_apply(mapping, path) for mapping in self.group_mapping)
Expand Down
4 changes: 2 additions & 2 deletions integrations/gitlab/gitlab_integration/ocean.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ async def handle_webhook_request(group_id: str, request: Request) -> dict[str, A
event_id = f"{request.headers.get('X-Gitlab-Event')}:{group_id}"
with logger.contextualize(event_id=event_id):
try:
logger.debug(f"Received webhook event {event_id} from Gitlab")
logger.info(f"Received webhook event {event_id} from Gitlab")
body = await request.json()
await event_handler.notify(event_id, body)
return {"ok": True}
Expand All @@ -50,7 +50,7 @@ async def handle_system_webhook_request(request: Request) -> dict[str, Any]:
# some system hooks have event_type instead of event_name in the body, such as merge_request events
event_name: str = str(body.get("event_name") or body.get("event_type"))
with logger.contextualize(event_name=event_name):
logger.debug(f"Received system webhook event {event_name} from Gitlab")
logger.info(f"Received system webhook event {event_name} from Gitlab")
await system_event_handler.notify(event_name, body)

return {"ok": True}
Expand Down
2 changes: 1 addition & 1 deletion integrations/gitlab/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "gitlab"
version = "0.1.120"
version = "0.1.121"
description = "Gitlab integration for Port using Port-Ocean Framework"
authors = ["Yair Siman-Tov <yair@getport.io>"]

Expand Down
Loading