Skip to content

Commit

Permalink
unrevert #32806 (#32847)
Browse files Browse the repository at this point in the history
Co-authored-by: postamar <postamar@users.noreply.github.com>
  • Loading branch information
postamar and postamar authored Nov 28, 2023
1 parent 71a84f0 commit 3126204
Show file tree
Hide file tree
Showing 12 changed files with 99 additions and 117 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/connectors_nightly_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
- name: Test connectors
uses: ./.github/actions/run-dagger-pipeline
with:
context: "nightly_builds"
context: "master"
docker_hub_password: ${{ secrets.DOCKER_HUB_PASSWORD }}
docker_hub_username: ${{ secrets.DOCKER_HUB_USERNAME }}
gcp_gsm_credentials: ${{ secrets.GCP_GSM_CREDENTIALS }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/connectors_weekly_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
- name: Test connectors
uses: ./.github/actions/run-dagger-pipeline
with:
context: "nightly_builds"
context: "master"
ci_job_key: "weekly_alpha_test"
docker_hub_password: ${{ secrets.DOCKER_HUB_PASSWORD }}
docker_hub_username: ${{ secrets.DOCKER_HUB_USERNAME }}
Expand Down
5 changes: 4 additions & 1 deletion airbyte-ci/connectors/pipelines/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,10 @@ This command runs the Python tests for a airbyte-ci poetry package.

## Changelog
| Version | PR | Description |
| ------- | ---------------------------------------------------------- | --------------------------------------------------------------------------------------------------------- |
|---------| ---------------------------------------------------------- |-----------------------------------------------------------------------------------------------------------|
| 2.7.3 | [#32847](https://github.com/airbytehq/airbyte/pull/32847) | Improve --modified behaviour for pull requests. |
| 2.7.2 | [#32839](https://github.com/airbytehq/airbyte/pull/32839) | Revert changes in v2.7.1. |
| 2.7.1 | [#32806](https://github.com/airbytehq/airbyte/pull/32806) | Improve --modified behaviour for pull requests. |
| 2.7.0 | [#31930](https://github.com/airbytehq/airbyte/pull/31930) | Merge airbyte-ci-internal into airbyte-ci |
| 2.6.0 | [#31831](https://github.com/airbytehq/airbyte/pull/31831) | Add `airbyte-ci format` commands, remove connector-specific formatting check |
| 2.5.9 | [#32427](https://github.com/airbytehq/airbyte/pull/32427) | Re-enable caching for source-postgres |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
from pipelines import main_logger
from pipelines.cli.click_decorators import click_append_to_context_object, click_ignore_unused_kwargs, click_merge_args_into_context_obj
from pipelines.cli.lazy_group import LazyGroup
from pipelines.consts import CIContext
from pipelines.helpers.connectors.modifed import ConnectorWithModifiedFiles, get_connector_modified_files, get_modified_connectors
from pipelines.helpers.git import get_modified_files_in_branch, get_modified_files_in_commit
from pipelines.helpers.utils import transform_strs_to_paths

ALL_CONNECTORS = get_all_connectors_in_repo()

Expand Down Expand Up @@ -236,14 +239,41 @@ async def connectors(
"""Group all the connectors-ci command."""
validate_environment(ctx.obj["is_local"])

modified_files = []
if ctx.obj["modified"] or ctx.obj["metadata_changes_only"]:
modified_files = transform_strs_to_paths(
await get_modified_files(
ctx.obj["git_branch"],
ctx.obj["git_revision"],
ctx.obj["diffed_branch"],
ctx.obj["is_local"],
ctx.obj["ci_context"],
)
)

ctx.obj["selected_connectors_with_modified_files"] = get_selected_connectors_with_modified_files(
ctx.obj["names"],
ctx.obj["support_levels"],
ctx.obj["languages"],
ctx.obj["modified"],
ctx.obj["metadata_changes_only"],
ctx.obj["metadata_query"],
ctx.obj["modified_files"],
set(modified_files),
ctx.obj["enable_dependency_scanning"],
)
log_selected_connectors(ctx.obj["selected_connectors_with_modified_files"])


async def get_modified_files(git_branch: str, git_revision: str, diffed_branch: str, is_local: bool, ci_context: CIContext) -> Set[str]:
"""Get the list of modified files in the current git branch.
If the current branch is master, it will return the list of modified files in the head commit.
The head commit on master should be the merge commit of the latest merged pull request as we squash commits on merge.
Pipelines like "publish on merge" are triggered on each new commit on master.
If the CI context is a pull request, it will return the list of modified files in the pull request, without using git diff.
If the current branch is not master, it will return the list of modified files in the current branch.
This latest case is the one we encounter when running the pipeline locally, on a local branch, or manually on GHA with a workflow dispatch event.
"""
if ci_context is CIContext.MASTER or (ci_context is CIContext.MANUAL and git_branch == "master"):
return await get_modified_files_in_commit(git_branch, git_revision, is_local)
return await get_modified_files_in_branch(git_branch, git_revision, diffed_branch, is_local)
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def validate(self) -> StepResult:
async def _run(self) -> StepResult:
if not self.should_run:
return StepResult(self, status=StepStatus.SKIPPED, stdout="No modified files required a version bump.")
if self.context.ci_context in [CIContext.MASTER, CIContext.NIGHTLY_BUILDS]:
if self.context.ci_context == CIContext.MASTER:
return StepResult(self, status=StepStatus.SKIPPED, stdout="Version check are not running in master context.")
try:
return self.validate()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def format_java_container(dagger_client: dagger.Client) -> dagger.Container:
"yum install -y findutils", # gradle requires xargs, which is shipped in findutils.
"yum clean all",
],
env_vars={"RUN_IN_AIRBYTE_CI": "1"},
)


Expand Down
50 changes: 3 additions & 47 deletions airbyte-ci/connectors/pipelines/pipelines/cli/airbyte_ci.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import os
import sys
from pathlib import Path
from typing import List, Optional
from typing import Optional

import asyncclick as click
import docker
Expand All @@ -22,14 +22,8 @@
from pipelines.cli.telemetry import click_track_command
from pipelines.consts import DAGGER_WRAP_ENV_VAR_NAME, LOCAL_PIPELINE_PACKAGE_PATH, CIContext
from pipelines.helpers import github
from pipelines.helpers.git import (
get_current_git_branch,
get_current_git_revision,
get_modified_files_in_branch,
get_modified_files_in_commit,
get_modified_files_in_pull_request,
)
from pipelines.helpers.utils import get_current_epoch_time, transform_strs_to_paths
from pipelines.helpers.git import get_current_git_branch, get_current_git_revision
from pipelines.helpers.utils import get_current_epoch_time

# HELPERS

Expand Down Expand Up @@ -142,30 +136,6 @@ def set_working_directory_to_root() -> None:
os.chdir(working_dir)


async def get_modified_files(
git_branch: str, git_revision: str, diffed_branch: str, is_local: bool, ci_context: CIContext, pull_request: PullRequest
) -> List[str]:
"""Get the list of modified files in the current git branch.
If the current branch is master, it will return the list of modified files in the head commit.
The head commit on master should be the merge commit of the latest merged pull request as we squash commits on merge.
Pipelines like "publish on merge" are triggered on each new commit on master.
If the CI context is a pull request, it will return the list of modified files in the pull request, without using git diff.
If the current branch is not master, it will return the list of modified files in the current branch.
This latest case is the one we encounter when running the pipeline locally, on a local branch, or manually on GHA with a workflow dispatch event.
"""
if ci_context is CIContext.MASTER or ci_context is CIContext.NIGHTLY_BUILDS:
return await get_modified_files_in_commit(git_branch, git_revision, is_local)
if ci_context is CIContext.PULL_REQUEST and pull_request is not None:
return get_modified_files_in_pull_request(pull_request)
if ci_context is CIContext.MANUAL:
if git_branch == "master":
return await get_modified_files_in_commit(git_branch, git_revision, is_local)
else:
return await get_modified_files_in_branch(git_branch, git_revision, diffed_branch, is_local)
return await get_modified_files_in_branch(git_branch, git_revision, diffed_branch, is_local)


def log_git_info(ctx: click.Context):
main_logger.info("Running airbyte-ci in CI mode.")
main_logger.info(f"CI Context: {ctx.obj['ci_context']}")
Expand All @@ -176,7 +146,6 @@ def log_git_info(ctx: click.Context):
main_logger.info(f"GitHub Workflow Run URL: {ctx.obj['gha_workflow_run_url']}")
main_logger.info(f"Pull Request Number: {ctx.obj['pull_request_number']}")
main_logger.info(f"Pipeline Start Timestamp: {ctx.obj['pipeline_start_timestamp']}")
main_logger.info(f"Modified Files: {ctx.obj['modified_files']}")


def _get_gha_workflow_run_url(ctx: click.Context) -> Optional[str]:
Expand Down Expand Up @@ -244,18 +213,6 @@ def is_current_process_wrapped_by_dagger_run() -> bool:
return called_with_dagger_run


async def get_modified_files_str(ctx: click.Context):
modified_files = await get_modified_files(
ctx.obj["git_branch"],
ctx.obj["git_revision"],
ctx.obj["diffed_branch"],
ctx.obj["is_local"],
ctx.obj["ci_context"],
ctx.obj["pull_request"],
)
return transform_strs_to_paths(modified_files)


# COMMANDS


Expand Down Expand Up @@ -304,7 +261,6 @@ async def get_modified_files_str(ctx: click.Context):
@click_append_to_context_object("is_ci", lambda ctx: not ctx.obj["is_local"])
@click_append_to_context_object("gha_workflow_run_url", _get_gha_workflow_run_url)
@click_append_to_context_object("pull_request", _get_pull_request)
@click_append_to_context_object("modified_files", get_modified_files_str)
@click.pass_context
@click_ignore_unused_kwargs
async def airbyte_ci(ctx: click.Context): # noqa D103
Expand Down
1 change: 0 additions & 1 deletion airbyte-ci/connectors/pipelines/pipelines/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ class CIContext(str, Enum):

MANUAL = "manual"
PULL_REQUEST = "pull_request"
NIGHTLY_BUILDS = "nightly_builds"
MASTER = "master"

def __str__(self) -> str:
Expand Down
38 changes: 38 additions & 0 deletions airbyte-ci/connectors/pipelines/pipelines/dagger/containers/git.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.

from typing import Optional

from dagger import Client, Container
from pipelines.helpers.utils import AIRBYTE_REPO_URL


async def checked_out_git_container(
dagger_client: Client,
current_git_branch: str,
current_git_revision: str,
diffed_branch: Optional[str] = None,
) -> Container:
"""Builds git-based container with the current branch checked out."""
current_git_branch = current_git_branch.removeprefix("origin/")
diffed_branch = current_git_branch if diffed_branch is None else diffed_branch.removeprefix("origin/")
return await (
dagger_client.container()
.from_("alpine/git:latest")
.with_workdir("/repo")
.with_exec(["init"])
.with_env_variable("CACHEBUSTER", current_git_revision)
.with_exec(
[
"remote",
"add",
"--fetch",
"--track",
current_git_branch,
"--track",
diffed_branch if diffed_branch is not None else current_git_branch,
"origin",
AIRBYTE_REPO_URL,
]
)
.with_exec(["checkout", "-t", f"origin/{current_git_branch}"])
)
71 changes: 13 additions & 58 deletions airbyte-ci/connectors/pipelines/pipelines/helpers/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
#

import functools
from typing import List, Set
from typing import Set

import git
from dagger import Connection
from github import PullRequest
from pipelines.helpers.utils import AIRBYTE_REPO_URL, DAGGER_CONFIG, DIFF_FILTER
from pipelines.dagger.containers.git import checked_out_git_container
from pipelines.helpers.utils import DAGGER_CONFIG, DIFF_FILTER


def get_current_git_revision() -> str: # noqa D103
Expand All @@ -24,38 +24,17 @@ async def get_modified_files_in_branch_remote(
) -> Set[str]:
"""Use git diff to spot the modified files on the remote branch."""
async with Connection(DAGGER_CONFIG) as dagger_client:
modified_files = await (
dagger_client.container()
.from_("alpine/git:latest")
.with_workdir("/repo")
.with_exec(["init"])
.with_env_variable("CACHEBUSTER", current_git_revision)
.with_exec(
[
"remote",
"add",
"--fetch",
"--track",
diffed_branch.split("/")[-1],
"--track",
current_git_branch,
"origin",
AIRBYTE_REPO_URL,
]
)
.with_exec(["checkout", "-t", f"origin/{current_git_branch}"])
.with_exec(["diff", f"--diff-filter={DIFF_FILTER}", "--name-only", f"{diffed_branch}...{current_git_revision}"])
.stdout()
)
container = await checked_out_git_container(dagger_client, current_git_branch, current_git_revision, diffed_branch)
modified_files = await container.with_exec(
["diff", f"--diff-filter={DIFF_FILTER}", "--name-only", f"{diffed_branch}...{current_git_branch}"]
).stdout()
return set(modified_files.split("\n"))


def get_modified_files_in_branch_local(current_git_revision: str, diffed_branch: str = "master") -> Set[str]:
"""Use git diff and git status to spot the modified files on the local branch."""
def get_modified_files_local(current_git_revision: str, diffed: str = "master") -> Set[str]:
"""Use git diff and git status to spot the modified files in the local repo."""
airbyte_repo = git.Repo()
modified_files = airbyte_repo.git.diff(
f"--diff-filter={DIFF_FILTER}", "--name-only", f"{diffed_branch}...{current_git_revision}"
).split("\n")
modified_files = airbyte_repo.git.diff(f"--diff-filter={DIFF_FILTER}", "--name-only", f"{diffed}...{current_git_revision}").split("\n")
status_output = airbyte_repo.git.status("--porcelain")
for not_committed_change in status_output.split("\n"):
file_path = not_committed_change.strip().split(" ")[-1]
Expand All @@ -69,34 +48,15 @@ async def get_modified_files_in_branch(
) -> Set[str]:
"""Retrieve the list of modified files on the branch."""
if is_local:
return get_modified_files_in_branch_local(current_git_revision, diffed_branch)
return get_modified_files_local(current_git_revision, diffed_branch)
else:
return await get_modified_files_in_branch_remote(current_git_branch, current_git_revision, diffed_branch)


async def get_modified_files_in_commit_remote(current_git_branch: str, current_git_revision: str) -> Set[str]:
async with Connection(DAGGER_CONFIG) as dagger_client:
modified_files = await (
dagger_client.container()
.from_("alpine/git:latest")
.with_workdir("/repo")
.with_exec(["init"])
.with_env_variable("CACHEBUSTER", current_git_revision)
.with_exec(
[
"remote",
"add",
"--fetch",
"--track",
current_git_branch,
"origin",
AIRBYTE_REPO_URL,
]
)
.with_exec(["checkout", "-t", f"origin/{current_git_branch}"])
.with_exec(["diff-tree", "--no-commit-id", "--name-only", current_git_revision, "-r"])
.stdout()
)
container = await checked_out_git_container(dagger_client, current_git_branch, current_git_revision)
modified_files = await container.with_exec(["diff-tree", "--no-commit-id", "--name-only", current_git_revision, "-r"]).stdout()
return set(modified_files.split("\n"))


Expand All @@ -113,11 +73,6 @@ async def get_modified_files_in_commit(current_git_branch: str, current_git_revi
return await get_modified_files_in_commit_remote(current_git_branch, current_git_revision)


def get_modified_files_in_pull_request(pull_request: PullRequest) -> List[str]:
"""Retrieve the list of modified files in a pull request."""
return [f.filename for f in pull_request.get_files()]


@functools.cache
def get_git_repo() -> git.Repo:
"""Retrieve the git repo."""
Expand Down
10 changes: 5 additions & 5 deletions airbyte-ci/connectors/pipelines/pipelines/helpers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import unicodedata
from io import TextIOWrapper
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, List, Optional, Tuple
from typing import TYPE_CHECKING, Any, Callable, List, Optional, Set, Tuple

import anyio
import asyncer
Expand Down Expand Up @@ -308,16 +308,16 @@ def sh_dash_c(lines: List[str]) -> List[str]:
return ["sh", "-c", " && ".join(["set -o xtrace"] + lines)]


def transform_strs_to_paths(str_paths: List[str]) -> List[Path]:
"""Transform a list of string paths to a list of Path objects.
def transform_strs_to_paths(str_paths: Set[str]) -> List[Path]:
"""Transform a list of string paths to an ordered list of Path objects.
Args:
str_paths (List[str]): A list of string paths.
str_paths (Set[str]): A set of string paths.
Returns:
List[Path]: A list of Path objects.
"""
return [Path(str_path) for str_path in str_paths]
return sorted([Path(str_path) for str_path in str_paths])


def fail_if_missing_docker_hub_creds(ctx: click.Context):
Expand Down
2 changes: 1 addition & 1 deletion airbyte-ci/connectors/pipelines/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "pipelines"
version = "2.7.2"
version = "2.7.3"
description = "Packaged maintained by the connector operations team to perform CI for connectors' pipelines"
authors = ["Airbyte <contact@airbyte.io>"]

Expand Down

0 comments on commit 3126204

Please sign in to comment.