Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Integration][GitLab] - Improve on GitOps push events #1028

Merged
merged 11 commits into from
Sep 18, 2024
8 changes: 8 additions & 0 deletions integrations/gitlab/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm

<!-- towncrier release notes start -->

0.1.121 (2024-09-17)
====================

### Improvements

- Improved on the way the integration handles GitOps push events by using only files that have been changed in the push even rather than fetching the entire repository tree (0.1.121)


0.1.120 (2024-09-17)
====================

Expand Down
8 changes: 6 additions & 2 deletions integrations/gitlab/gitlab_integration/core/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,23 @@

from port_ocean.core.models import Entity

from gitlab_integration.core.async_fetcher import AsyncFetcher

FILE_PROPERTY_PREFIX = "file://"
SEARCH_PROPERTY_PREFIX = "search://"
JSON_SUFFIX = ".json"


def generate_entity_from_port_yaml(
async def generate_entity_from_port_yaml(
raw_entity: Entity, project: Project, ref: str
) -> Entity:
properties = {}
for key, value in raw_entity.properties.items():
if isinstance(value, str) and value.startswith(FILE_PROPERTY_PREFIX):
file_meta = Path(value.replace(FILE_PROPERTY_PREFIX, ""))
gitlab_file = project.files.get(file_path=str(file_meta), ref=ref)
gitlab_file = await AsyncFetcher.fetch_single(
project.files.get, str(file_meta), ref
)

if file_meta.suffix == JSON_SUFFIX:
properties[key] = json.loads(gitlab_file.decode().decode("utf-8"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,16 @@ async def _start_event_processor(self) -> None:
logger.info(f"Started {self.__class__.__name__} worker")
while True:
event_ctx, event_id, body = await self.webhook_tasks_queue.get()
logger.debug(f"Retrieved event: {event_id} from Queue, notifying observers")
try:
async with event_context(
"gitlab_http_event_async_worker", parent_override=event_ctx
):
await self._notify(event_id, body)
except Exception as e:
logger.error(
f"Error notifying observers for event: {event_id}, error: {e}"
)
finally:
self.webhook_tasks_queue.task_done()

Expand All @@ -44,6 +49,7 @@ async def _notify(self, event_id: str, body: dict[str, Any]) -> None:
pass

async def notify(self, event_id: str, body: dict[str, Any]) -> None:
logger.debug(f"Received event: {event_id}, putting it in Queue for processing")
await self.webhook_tasks_queue.put(
(
deepcopy(current_event_context),
Expand Down Expand Up @@ -71,7 +77,7 @@ async def _notify(self, event_id: str, body: dict[str, Any]) -> None:
)

if not observers:
logger.debug(
logger.info(
f"event: {event_id} has no matching handler. the handlers available are for events: {self._observers.keys()}"
)

Expand Down
9 changes: 7 additions & 2 deletions integrations/gitlab/gitlab_integration/events/hooks/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,13 @@ async def on_hook(self, event: str, body: dict[str, Any]) -> None:
logger.info(
f"Handling hook {event} for project {project.path_with_namespace}"
)
await self._on_hook(body, project)
logger.info(f"Finished handling {event}")
try:
await self._on_hook(body, project)
logger.info(f"Finished handling {event}")
except Exception as e:
logger.error(
f"Error handling hook {event} for project {project.path_with_namespace}. Error: {e}"
)
else:
logger.info(
f"Project {body['project']['id']} was filtered for event {event}. Skipping..."
Expand Down
149 changes: 137 additions & 12 deletions integrations/gitlab/gitlab_integration/events/hooks/push.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import typing
from typing import Any
from enum import StrEnum

from loguru import logger
from gitlab.v4.objects import Project

from gitlab_integration.core.utils import generate_ref
from gitlab_integration.core.utils import generate_ref, does_pattern_apply
from gitlab_integration.events.hooks.base import ProjectHandler
from gitlab_integration.git_integration import GitlabPortAppConfig
from gitlab_integration.utils import ObjectKind
Expand All @@ -14,36 +15,84 @@
from port_ocean.context.ocean import ocean


class FileAction(StrEnum):
REMOVED = "removed"
ADDED = "added"
MODIFIED = "modified"


class PushHook(ProjectHandler):
events = ["Push Hook"]
system_events = ["push"]

async def _on_hook(self, body: dict[str, Any], gitlab_project: Project) -> None:
before, after, ref = body.get("before"), body.get("after"), body.get("ref")
commit_before, commit_after, ref = (
body.get("before"),
body.get("after"),
body.get("ref"),
)

if before is None or after is None or ref is None:
if commit_before is None or commit_after is None or ref is None:
raise ValueError(
"Invalid push hook. Missing one or more of the required fields (before, after, ref)"
)

added_files = [
added_file
for commit in body.get("commits", [])
for added_file in commit.get(FileAction.ADDED, [])
]
modified_files = [
modified_file
for commit in body.get("commits", [])
for modified_file in commit.get(FileAction.MODIFIED, [])
]

removed_files = [
removed_file
for commit in body.get("commits", [])
for removed_file in commit.get(FileAction.REMOVED, [])
]

config: GitlabPortAppConfig = typing.cast(
GitlabPortAppConfig, event.port_app_config
)

branch = config.branch or gitlab_project.default_branch

if generate_ref(branch) == ref:
entities_before, entities_after = (
await self.gitlab_service.get_entities_diff(
gitlab_project, config.spec_path, before, after, branch
)
)
spec_path = config.spec_path
if not isinstance(spec_path, list):
spec_path = [spec_path]

# update the entities diff found in the `config.spec_path` file the user configured
await ocean.update_diff(
{"before": entities_before, "after": entities_after},
UserAgentType.gitops,
await self._process_files(
gitlab_project,
removed_files,
spec_path,
commit_before,
"",
branch,
FileAction.REMOVED,
)
await self._process_files(
gitlab_project,
added_files,
spec_path,
"",
commit_after,
branch,
FileAction.ADDED,
)
await self._process_files(
gitlab_project,
modified_files,
spec_path,
commit_before,
commit_after,
branch,
FileAction.MODIFIED,
)

# update information regarding the project as well
logger.info(
f"Updating project information after push hook for project {gitlab_project.path_with_namespace}"
Expand All @@ -52,8 +101,84 @@ async def _on_hook(self, body: dict[str, Any], gitlab_project: Project) -> None:
gitlab_project
)
await ocean.register_raw(ObjectKind.PROJECT, [enriched_project])

else:
logger.debug(
f"Skipping push hook for project {gitlab_project.path_with_namespace} because the ref {ref} "
f"does not match the branch {branch}"
)

async def _process_files(
self,
gitlab_project: Project,
files: list[str],
spec_path: list[str],
commit_before: str,
commit_after: str,
branch: str,
file_action: FileAction,
) -> None:
if not files:
return
logger.info(
f"Processing {file_action} files {files} for project {gitlab_project.path_with_namespace}"
)
matching_files = [file for file in files if does_pattern_apply(spec_path, file)]

if not matching_files:
logger.info("No matching files found for mapping")
logger.debug(f"Files {files} didn't match {spec_path} patten")
return
else:
logger.info(
f"While processing {file_action} Found {len(matching_files)} that matches {spec_path}, matching files: {matching_files}"
)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

redundant

for file in matching_files:
try:
match file_action:
case FileAction.REMOVED:
entities_before = (
await self.gitlab_service._get_entities_by_commit(
gitlab_project, file, commit_before, branch
)
)
await ocean.update_diff(
{"before": entities_before, "after": []},
UserAgentType.gitops,
)

case FileAction.ADDED:
entities_after = (
await self.gitlab_service._get_entities_by_commit(
gitlab_project, file, commit_after, branch
)
)
await ocean.update_diff(
{"before": [], "after": entities_after},
UserAgentType.gitops,
)

case FileAction.MODIFIED:
entities_before = (
await self.gitlab_service._get_entities_by_commit(
gitlab_project, file, commit_before, branch
)
)
entities_after = (
await self.gitlab_service._get_entities_by_commit(
gitlab_project, file, commit_after, branch
)
)
await ocean.update_diff(
{"before": entities_before, "after": entities_after},
UserAgentType.gitops,
)
except Exception as e:
logger.error(
f"Error processing file {file} in action {file_action}: {str(e)}"
)
skipped_files = set(files) - set(matching_files)
logger.debug(
f"Skipped {len(skipped_files)} files as they didn't match {spec_path} Skipped files: {skipped_files}"
)
25 changes: 13 additions & 12 deletions integrations/gitlab/gitlab_integration/gitlab_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,11 +166,14 @@ async def search_files_in_project(
if files_with_content:
yield files_with_content

def _get_entities_from_git(
self, project: Project, file_name: str, sha: str, ref: str
async def _get_entities_from_git(
self, project: Project, file_path: str | List[str], sha: str, ref: str
) -> List[Entity]:
try:
file_content = project.files.get(file_path=file_name, ref=sha)
file_content = await AsyncFetcher.fetch_single(
project.files.get, file_path, sha
)

entities = yaml.safe_load(file_content.decode())
raw_entities = [
Entity(**entity_data)
Expand All @@ -179,29 +182,27 @@ def _get_entities_from_git(
)
]
return [
generate_entity_from_port_yaml(entity_data, project, ref)
await generate_entity_from_port_yaml(entity_data, project, ref)
for entity_data in raw_entities
]
except ParserError as exec:
logger.error(
f"Failed to parse gitops entities from gitlab project {project.path_with_namespace},z file {file_name}."
f"Failed to parse gitops entities from gitlab project {project.path_with_namespace},z file {file_path}."
f"\n {exec}"
)
except Exception:
logger.error(
f"Failed to get gitops entities from gitlab project {project.path_with_namespace}, file {file_name}"
f"Failed to get gitops entities from gitlab project {project.path_with_namespace}, file {file_path}"
)
return []

async def _get_entities_by_commit(
self, project: Project, spec: str | List["str"], commit: str, ref: str
) -> List[Entity]:
spec_paths = await self.get_all_file_paths(project, spec, commit)
return [
entity
for path in spec_paths
for entity in self._get_entities_from_git(project, path, commit, ref)
]
logger.info(
f"Getting entities for project {project.path_with_namespace} in path {spec} at commit {commit} and ref {ref}"
)
return await self._get_entities_from_git(project, spec, commit, ref)

def should_run_for_path(self, path: str) -> bool:
return any(does_pattern_apply(mapping, path) for mapping in self.group_mapping)
Expand Down
4 changes: 2 additions & 2 deletions integrations/gitlab/gitlab_integration/ocean.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ async def handle_webhook_request(group_id: str, request: Request) -> dict[str, A
event_id = f"{request.headers.get('X-Gitlab-Event')}:{group_id}"
with logger.contextualize(event_id=event_id):
try:
logger.debug(f"Received webhook event {event_id} from Gitlab")
logger.info(f"Received webhook event {event_id} from Gitlab")
body = await request.json()
await event_handler.notify(event_id, body)
return {"ok": True}
Expand All @@ -50,7 +50,7 @@ async def handle_system_webhook_request(request: Request) -> dict[str, Any]:
# some system hooks have event_type instead of event_name in the body, such as merge_request events
event_name: str = str(body.get("event_name") or body.get("event_type"))
with logger.contextualize(event_name=event_name):
logger.debug(f"Received system webhook event {event_name} from Gitlab")
logger.info(f"Received system webhook event {event_name} from Gitlab")
await system_event_handler.notify(event_name, body)

return {"ok": True}
Expand Down
2 changes: 1 addition & 1 deletion integrations/gitlab/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "gitlab"
version = "0.1.120"
version = "0.1.121"
description = "Gitlab integration for Port using Port-Ocean Framework"
authors = ["Yair Siman-Tov <yair@getport.io>"]

Expand Down
Loading