diff --git a/.github/workflows/connector_integration_test_single_dagger.yml b/.github/workflows/connector_integration_test_single_dagger.yml index 8131c866b645..37dc4bc33f59 100644 --- a/.github/workflows/connector_integration_test_single_dagger.yml +++ b/.github/workflows/connector_integration_test_single_dagger.yml @@ -64,10 +64,7 @@ jobs: env: _EXPERIMENTAL_DAGGER_CLOUD_TOKEN: "p.eyJ1IjogIjFiZjEwMmRjLWYyZmQtNDVhNi1iNzM1LTgxNzI1NGFkZDU2ZiIsICJpZCI6ICJlNjk3YzZiYy0yMDhiLTRlMTktODBjZC0yNjIyNGI3ZDBjMDEifQ.hT6eMOYt3KZgNoVGNYI3_v4CC-s19z8uQsBkGrBhU3k" GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }} - AWS_ACCESS_KEY_ID: ${{ secrets.STATUS_API_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.STATUS_API_AWS_SECRET_ACCESS_KEY }} - AWS_DEFAULT_REGION: "us-east-2" - TEST_REPORTS_BUCKET_NAME: "airbyte-connector-build-status" + CI_REPORT_BUCKET_NAME: "airbyte-ci-reports" CI_GITHUB_ACCESS_TOKEN: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }} CI_GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }} CI_GIT_REVISION: ${{ github.sha }} @@ -88,10 +85,7 @@ jobs: env: _EXPERIMENTAL_DAGGER_CLOUD_TOKEN: "p.eyJ1IjogIjFiZjEwMmRjLWYyZmQtNDVhNi1iNzM1LTgxNzI1NGFkZDU2ZiIsICJpZCI6ICJlNjk3YzZiYy0yMDhiLTRlMTktODBjZC0yNjIyNGI3ZDBjMDEifQ.hT6eMOYt3KZgNoVGNYI3_v4CC-s19z8uQsBkGrBhU3k" GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }} - AWS_ACCESS_KEY_ID: ${{ secrets.STATUS_API_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.STATUS_API_AWS_SECRET_ACCESS_KEY }} - AWS_DEFAULT_REGION: "us-east-2" - TEST_REPORTS_BUCKET_NAME: "airbyte-connector-build-status" + CI_REPORT_BUCKET_NAME: "airbyte-ci-reports" CI_GITHUB_ACCESS_TOKEN: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }} CI_GIT_BRANCH: ${{ github.head_ref }} CI_GIT_REVISION: ${{ github.event.pull_request.head.sha }} diff --git a/.github/workflows/connectors_nightly_build.yml b/.github/workflows/connectors_nightly_build.yml index 13adf092923c..43f1b9810ce4 100644 --- a/.github/workflows/connectors_nightly_build.yml +++ b/.github/workflows/connectors_nightly_build.yml @@ -61,11 +61,8 @@ jobs: env: _EXPERIMENTAL_DAGGER_CLOUD_TOKEN: "p.eyJ1IjogIjFiZjEwMmRjLWYyZmQtNDVhNi1iNzM1LTgxNzI1NGFkZDU2ZiIsICJpZCI6ICJlNjk3YzZiYy0yMDhiLTRlMTktODBjZC0yNjIyNGI3ZDBjMDEifQ.hT6eMOYt3KZgNoVGNYI3_v4CC-s19z8uQsBkGrBhU3k" GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }} - AWS_ACCESS_KEY_ID: ${{ secrets.STATUS_API_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.STATUS_API_AWS_SECRET_ACCESS_KEY }} - AWS_DEFAULT_REGION: "us-east-2" - TEST_REPORTS_BUCKET_NAME: "airbyte-connector-build-status" + CI_REPORT_BUCKET_NAME: "airbyte-ci-reports" CI_GITHUB_ACCESS_TOKEN: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }} CI_GIT_BRANCH: ${{ steps.extract_branch.outputs.branch }} CI_CONTEXT: "nightly_builds" - CI_PIPELINE_START_TIMESTAMP: ${{ steps.get-start-timestamp.outputs.start-timestamp }} + CI_PIPELINE_START_TIMESTAMP: ${{ steps.get-start-timestamp.outputs.start-timestamp }} \ No newline at end of file diff --git a/.github/workflows/publish_connectors.yml b/.github/workflows/publish_connectors.yml index e5202ae11fdb..8367ec862672 100644 --- a/.github/workflows/publish_connectors.yml +++ b/.github/workflows/publish_connectors.yml @@ -19,9 +19,6 @@ jobs: name: Publish connectors runs-on: large-runner env: - AWS_ACCESS_KEY_ID: ${{ secrets.STATUS_API_AWS_ACCESS_KEY_ID }} - AWS_DEFAULT_REGION: "us-east-2" - AWS_SECRET_ACCESS_KEY: ${{ secrets.STATUS_API_AWS_SECRET_ACCESS_KEY }} CI_GITHUB_ACCESS_TOKEN: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }} DOCKER_HUB_PASSWORD: ${{ secrets.DOCKER_HUB_PASSWORD }} DOCKER_HUB_USERNAME: ${{ secrets.DOCKER_HUB_USERNAME }} @@ -31,7 +28,7 @@ jobs: METADATA_SERVICE_BUCKET_NAME: prod-airbyte-cloud-connector-metadata-service SPEC_CACHE_BUCKET_NAME: io-airbyte-cloud-spec-cache SPEC_CACHE_GCS_CREDENTIALS: ${{ secrets.SPEC_CACHE_SERVICE_ACCOUNT_KEY_PUBLISH }} - TEST_REPORTS_BUCKET_NAME: "airbyte-connector-build-status" + CI_REPORT_BUCKET_NAME: "airbyte-ci-reports" SLACK_WEBHOOK: ${{ secrets.PUBLISH_ON_MERGE_SLACK_WEBHOOK }} steps: - name: Checkout Airbyte diff --git a/tools/ci_connector_ops/ci_connector_ops/pipelines/actions/remote_storage.py b/tools/ci_connector_ops/ci_connector_ops/pipelines/actions/remote_storage.py index d4d22735f670..c0d04a702fd6 100644 --- a/tools/ci_connector_ops/ci_connector_ops/pipelines/actions/remote_storage.py +++ b/tools/ci_connector_ops/ci_connector_ops/pipelines/actions/remote_storage.py @@ -54,7 +54,7 @@ async def upload_to_gcs( """Upload a local file to GCS using the AWS CLI docker image and running aws s3 cp command. Args: dagger_client (Client): The dagger client. - file_to_upload_path (File): The dagger File to upload. + file_to_upload (File): The dagger File to upload. key (str): The key that will be written on the S3 bucket. bucket (str): The S3 bucket name. gcs_credentials (Secret): The dagger secret holding the credentials to get and upload the targeted GCS bucket. diff --git a/tools/ci_connector_ops/ci_connector_ops/pipelines/bases.py b/tools/ci_connector_ops/ci_connector_ops/pipelines/bases.py index 82a021b35bf0..903450bd7593 100644 --- a/tools/ci_connector_ops/ci_connector_ops/pipelines/bases.py +++ b/tools/ci_connector_ops/ci_connector_ops/pipelines/bases.py @@ -39,6 +39,9 @@ class CIContext(str, Enum): NIGHTLY_BUILDS = "nightly_builds" MASTER = "master" + def __str__(self) -> str: + return self.value + class StepStatus(Enum): """An Enum to characterize the success, failure or skipping of a Step.""" diff --git a/tools/ci_connector_ops/ci_connector_ops/pipelines/commands/airbyte_ci.py b/tools/ci_connector_ops/ci_connector_ops/pipelines/commands/airbyte_ci.py index 6e409a864fbe..0f86fe7958dc 100644 --- a/tools/ci_connector_ops/ci_connector_ops/pipelines/commands/airbyte_ci.py +++ b/tools/ci_connector_ops/ci_connector_ops/pipelines/commands/airbyte_ci.py @@ -22,6 +22,8 @@ from .groups.connectors import connectors from .groups.metadata import metadata +# HELPERS + def get_modified_files( git_branch: str, git_revision: str, diffed_branch: str, is_local: bool, ci_context: CIContext, pull_request: PullRequest @@ -47,6 +49,9 @@ def get_modified_files( return get_modified_files_in_branch(git_branch, git_revision, diffed_branch, is_local) +# COMMANDS + + @click.group(help="Airbyte CI top-level command group.") @click.option("--is-local/--is-ci", default=True) @click.option("--git-branch", default=get_current_git_branch, envvar="CI_GIT_BRANCH") @@ -62,6 +67,7 @@ def get_modified_files( @click.option("--pipeline-start-timestamp", default=get_current_epoch_time, envvar="CI_PIPELINE_START_TIMESTAMP", type=int) @click.option("--pull-request-number", envvar="PULL_REQUEST_NUMBER", type=int) @click.option("--ci-github-access-token", envvar="CI_GITHUB_ACCESS_TOKEN", type=str) +@click.option("--ci-report-bucket-name", envvar="CI_REPORT_BUCKET_NAME", type=str) @click.pass_context def airbyte_ci( ctx: click.Context, @@ -74,6 +80,7 @@ def airbyte_ci( pipeline_start_timestamp: int, pull_request_number: int, ci_github_access_token: str, + ci_report_bucket_name: str, ): # noqa D103 ctx.ensure_object(dict) ctx.obj["is_local"] = is_local @@ -85,6 +92,7 @@ def airbyte_ci( f"https://github.com/airbytehq/airbyte/actions/runs/{gha_workflow_run_id}" if gha_workflow_run_id else None ) ctx.obj["ci_context"] = ci_context + ctx.obj["ci_report_bucket_name"] = ci_report_bucket_name ctx.obj["pipeline_start_timestamp"] = pipeline_start_timestamp if pull_request_number and ci_github_access_token: @@ -97,6 +105,7 @@ def airbyte_ci( if not is_local: click.echo("Running airbyte-ci in CI mode.") click.echo(f"CI Context: {ci_context}") + click.echo(f"CI Report Bucket Name: {ci_report_bucket_name}") click.echo(f"Git Branch: {git_branch}") click.echo(f"Git Revision: {git_revision}") click.echo(f"GitHub Workflow Run ID: {gha_workflow_run_id}") diff --git a/tools/ci_connector_ops/ci_connector_ops/pipelines/commands/groups/connectors.py b/tools/ci_connector_ops/ci_connector_ops/pipelines/commands/groups/connectors.py index 78319fd4e90e..d972c0ef3bbf 100644 --- a/tools/ci_connector_ops/ci_connector_ops/pipelines/commands/groups/connectors.py +++ b/tools/ci_connector_ops/ci_connector_ops/pipelines/commands/groups/connectors.py @@ -40,10 +40,7 @@ def validate_environment(is_local: bool, use_remote_secrets: bool): else: required_env_vars_for_ci = [ "GCP_GSM_CREDENTIALS", - "AWS_ACCESS_KEY_ID", - "AWS_SECRET_ACCESS_KEY", - "AWS_DEFAULT_REGION", - "TEST_REPORTS_BUCKET_NAME", + "CI_REPORT_BUCKET_NAME", "CI_GITHUB_ACCESS_TOKEN", ] for required_env_var in required_env_vars_for_ci: @@ -55,11 +52,55 @@ def validate_environment(is_local: bool, use_remote_secrets: bool): ) +def render_report_output_prefix(ctx: click.Context) -> str: + """Render the report output prefix for any command in the Connector CLI. + + The goal is to standardize the output of all logs and reports generated by the CLI + related to a specific command, and to a specific CI context. + + Note: We cannot hoist this higher in the command hierarchy because only one level of + subcommands are available at the time the context is created. + """ + + git_branch = ctx.obj["git_branch"] + git_revision = ctx.obj["git_revision"] + pipeline_start_timestamp = ctx.obj["pipeline_start_timestamp"] + ci_context = ctx.obj["ci_context"] + sanitized_branch = git_branch.replace("/", "_") + + # get the command name for the current context, if a group then prepend the parent command name + invoked_subcommand = ctx.invoked_subcommand + parent_command_path = ctx.command_path.replace(" ", "/") if ctx.command_path else None + cmd = f"{parent_command_path}/{invoked_subcommand}" if parent_command_path else invoked_subcommand + + path_values = [ + cmd, + ci_context, + sanitized_branch, + pipeline_start_timestamp, + git_revision, + ] + + # check all values are defined + if None in path_values: + raise ValueError(f"Missing value required to render the report output prefix: {path_values}") + + # join all values with a slash, and convert all values to string + return "/".join(map(str, path_values)) + + # COMMANDS @click.group(help="Commands related to connectors and connector acceptance tests.") @click.option("--use-remote-secrets", default=True) # specific to connectors +@click.option( + "--ci-gcs-credentials", + help="The service account to use during CI.", + type=click.STRING, + required=False, # Not required for pre-release or local pipelines + envvar="GCP_GSM_CREDENTIALS", +) @click.option( "--name", "names", multiple=True, help="Only test a specific connector. Use its technical name. e.g source-pokeapi.", type=str ) @@ -82,7 +123,8 @@ def validate_environment(is_local: bool, use_remote_secrets: bool): @click.pass_context def connectors( ctx: click.Context, - use_remote_secrets: str, + use_remote_secrets: bool, + ci_gcs_credentials: str, names: Tuple[str], languages: Tuple[ConnectorLanguage], release_stages: Tuple[str], @@ -95,12 +137,14 @@ def connectors( ctx.ensure_object(dict) ctx.obj["use_remote_secrets"] = use_remote_secrets + ctx.obj["ci_gcs_credentials"] = ci_gcs_credentials ctx.obj["connector_names"] = names ctx.obj["connector_languages"] = languages ctx.obj["release_states"] = release_stages ctx.obj["modified"] = modified ctx.obj["concurrency"] = concurrency ctx.obj["execute_timeout"] = execute_timeout + ctx.obj["report_output_prefix"] = render_report_output_prefix(ctx) all_connectors = get_all_released_connectors() @@ -165,12 +209,14 @@ def test( git_branch=ctx.obj["git_branch"], git_revision=ctx.obj["git_revision"], modified_files=modified_files, - s3_report_key="python-poc/tests/history/", + test_report_bucket=ctx.obj["ci_report_bucket_name"], + report_output_prefix=ctx.obj["report_output_prefix"], use_remote_secrets=ctx.obj["use_remote_secrets"], gha_workflow_run_url=ctx.obj.get("gha_workflow_run_url"), pipeline_start_timestamp=ctx.obj.get("pipeline_start_timestamp"), ci_context=ctx.obj.get("ci_context"), pull_request=ctx.obj.get("pull_request"), + ci_gcs_credentials=ctx.obj["ci_gcs_credentials"], ) for connector, modified_files in ctx.obj["selected_connectors_and_files"].items() ] @@ -205,11 +251,13 @@ def build(ctx: click.Context) -> bool: git_branch=ctx.obj["git_branch"], git_revision=ctx.obj["git_revision"], modified_files=modified_files, - s3_report_key="python-poc/build/history/", + test_report_bucket=ctx.obj["ci_report_bucket_name"], + report_output_prefix=ctx.obj["report_output_prefix"], use_remote_secrets=ctx.obj["use_remote_secrets"], gha_workflow_run_url=ctx.obj.get("gha_workflow_run_url"), pipeline_start_timestamp=ctx.obj.get("pipeline_start_timestamp"), ci_context=ctx.obj.get("ci_context"), + ci_gcs_credentials=ctx.obj["ci_gcs_credentials"], ) for connector, modified_files in ctx.obj["selected_connectors_and_files"].items() ] @@ -299,6 +347,7 @@ def publish( ctx.obj["spec_cache_bucket_name"] = spec_cache_bucket_name ctx.obj["metadata_service_bucket_name"] = metadata_service_bucket_name ctx.obj["metadata_service_gcs_credentials"] = metadata_service_gcs_credentials + validate_publish_options(pre_release, ctx.obj) if ctx.obj["is_local"]: click.confirm( @@ -317,23 +366,26 @@ def publish( publish_connector_contexts = reorder_contexts( [ PublishConnectorContext( - connector, - pre_release, - modified_files, - spec_cache_gcs_credentials, - spec_cache_bucket_name, - metadata_service_gcs_credentials, - metadata_service_bucket_name, - docker_hub_username, - docker_hub_password, - slack_webhook, - slack_channel, - ctx.obj["is_local"], - ctx.obj["git_branch"], - ctx.obj["git_revision"], + connector=connector, + pre_release=pre_release, + modified_files=modified_files, + spec_cache_gcs_credentials=spec_cache_gcs_credentials, + spec_cache_bucket_name=spec_cache_bucket_name, + metadata_service_gcs_credentials=metadata_service_gcs_credentials, + metadata_bucket_name=metadata_service_bucket_name, + docker_hub_username=docker_hub_username, + docker_hub_password=docker_hub_password, + slack_webhook=slack_webhook, + reporting_slack_channel=slack_channel, + test_report_bucket=ctx.obj["ci_report_bucket_name"], + report_output_prefix=ctx.obj["report_output_prefix"], + is_local=ctx.obj["is_local"], + git_branch=ctx.obj["git_branch"], + git_revision=ctx.obj["git_revision"], gha_workflow_run_url=ctx.obj.get("gha_workflow_run_url"), pipeline_start_timestamp=ctx.obj.get("pipeline_start_timestamp"), ci_context=ctx.obj.get("ci_context"), + ci_gcs_credentials=ctx.obj["ci_gcs_credentials"], pull_request=ctx.obj.get("pull_request"), ) for connector, modified_files in selected_connectors_and_files.items() diff --git a/tools/ci_connector_ops/ci_connector_ops/pipelines/contexts.py b/tools/ci_connector_ops/ci_connector_ops/pipelines/contexts.py index 30d6a680a9e4..156da00b8dca 100644 --- a/tools/ci_connector_ops/ci_connector_ops/pipelines/contexts.py +++ b/tools/ci_connector_ops/ci_connector_ops/pipelines/contexts.py @@ -268,8 +268,10 @@ def __init__( git_branch: bool, git_revision: bool, modified_files: List[str], - s3_report_key: str, + test_report_bucket: str, + report_output_prefix: str, use_remote_secrets: bool = True, + ci_gcs_credentials: Optional[str] = None, connector_acceptance_test_image: Optional[str] = DEFAULT_CONNECTOR_ACCEPTANCE_TEST_IMAGE, gha_workflow_run_url: Optional[str] = None, pipeline_start_timestamp: Optional[int] = None, @@ -286,7 +288,7 @@ def __init__( git_branch (str): The current git branch name. git_revision (str): The current git revision, commit hash. modified_files (List[str]): The list of modified files in the current git branch. - s3_report_key (str): The S3 key to upload the test report to. + report_output_prefix (str): The S3 key to upload the test report to. use_remote_secrets (bool, optional): Whether to download secrets for GSM or use the local secrets. Defaults to True. connector_acceptance_test_image (Optional[str], optional): The image to use to run connector acceptance tests. Defaults to DEFAULT_CONNECTOR_ACCEPTANCE_TEST_IMAGE. gha_workflow_run_url (Optional[str], optional): URL to the github action workflow run. Only valid for CI run. Defaults to None. @@ -302,10 +304,13 @@ def __init__( self.use_remote_secrets = use_remote_secrets self.connector_acceptance_test_image = connector_acceptance_test_image self.modified_files = modified_files - self.s3_report_key = s3_report_key + self.test_report_bucket = test_report_bucket + self.report_output_prefix = report_output_prefix self._secrets_dir = None self._updated_secrets_dir = None self.cdk_version = None + self.ci_gcs_credentials = sanitize_gcs_credentials(ci_gcs_credentials) if ci_gcs_credentials else None + super().__init__( pipeline_name=pipeline_name, is_local=is_local, @@ -359,6 +364,11 @@ def metadata(self) -> dict: def docker_image_from_metadata(self) -> str: return f"{self.metadata['dockerRepository']}:{self.metadata['dockerImageTag']}" + @property + def ci_gcs_credentials_secret(self) -> Secret: + # TODO (ben): Update this to be in use ANYWHERE we use a service account. + return self.dagger_client.set_secret("ci_gcs_credentials", self.ci_gcs_credentials) + def get_connector_dir(self, exclude=None, include=None) -> Directory: """Get the connector under test source code directory. @@ -404,24 +414,37 @@ async def __aexit__( local_reports_path_root = "tools/ci_connector_ops/pipeline_reports/" connector_name = self.report.pipeline_context.connector.technical_name connector_version = self.report.pipeline_context.connector.version - git_revision = self.report.pipeline_context.git_revision - git_branch = self.report.pipeline_context.git_branch.replace("/", "_") - suffix = f"{connector_name}/{git_branch}/{connector_version}/{git_revision}.json" - local_report_path = Path(local_reports_path_root + suffix) + + suffix = f"{connector_name}/{connector_version}/output.json" + file_path_key = f"{self.report_output_prefix}/{suffix}" + + local_report_path = Path(local_reports_path_root + file_path_key) + await local_report_path.parents[0].mkdir(parents=True, exist_ok=True) await local_report_path.write_text(self.report.to_json()) + if self.report.should_be_saved: - s3_key = self.s3_report_key + suffix - report_upload_exit_code = await remote_storage.upload_to_s3( - self.dagger_client, str(local_report_path), s3_key, os.environ["TEST_REPORTS_BUCKET_NAME"] + local_report_dagger_file = ( + self.dagger_client.host().directory(".", include=[str(local_report_path)]).file(str(local_report_path)) + ) + report_upload_exit_code, _stdout, _stderr = await remote_storage.upload_to_gcs( + dagger_client=self.dagger_client, + file_to_upload=local_report_dagger_file, + key=file_path_key, + bucket=self.test_report_bucket, + gcs_credentials=self.ci_gcs_credentials_secret, ) if report_upload_exit_code != 0: - self.logger.error("Uploading the report to S3 failed.") + self.logger.error(f"Uploading the report to GCS Bucket: {self.test_report_bucket} failed.") + if self.report.should_be_commented_on_pr: self.report.post_comment_on_pr() + await asyncify(update_commit_status_check)(**self.github_commit_status) + if self.should_send_slack_message: await asyncify(send_message_to_webhook)(self.create_slack_message(), self.reporting_slack_channel, self.slack_webhook) + # Supress the exception if any return True @@ -443,12 +466,15 @@ def __init__( docker_hub_password: str, slack_webhook: str, reporting_slack_channel: str, + test_report_bucket: str, + report_output_prefix: str, is_local: bool, git_branch: bool, git_revision: bool, gha_workflow_run_url: Optional[str] = None, pipeline_start_timestamp: Optional[int] = None, ci_context: Optional[str] = None, + ci_gcs_credentials: str = None, pull_request: PullRequest = None, ): self.pre_release = pre_release @@ -466,7 +492,8 @@ def __init__( pipeline_name=pipeline_name, connector=connector, modified_files=modified_files, - s3_report_key="python-poc/publish/history/", + report_output_prefix=report_output_prefix, + test_report_bucket=test_report_bucket, is_local=is_local, git_branch=git_branch, git_revision=git_revision, @@ -475,6 +502,7 @@ def __init__( ci_context=ci_context, slack_webhook=slack_webhook, reporting_slack_channel=reporting_slack_channel, + ci_gcs_credentials=ci_gcs_credentials, ) @property