diff --git a/airbyte-ci/connectors/metadata_service/lib/metadata_service/commands.py b/airbyte-ci/connectors/metadata_service/lib/metadata_service/commands.py index 594e6fec56e5..22da931f06cf 100644 --- a/airbyte-ci/connectors/metadata_service/lib/metadata_service/commands.py +++ b/airbyte-ci/connectors/metadata_service/lib/metadata_service/commands.py @@ -5,7 +5,7 @@ import click from metadata_service.gcs_upload import upload_metadata_to_gcs, MetadataUploadInfo -from metadata_service.validators.metadata_validator import PRE_UPLOAD_VALIDATORS, validate_and_load +from metadata_service.validators.metadata_validator import PRE_UPLOAD_VALIDATORS, validate_and_load, ValidatorOptions from metadata_service.constants import METADATA_FILE_NAME from pydantic import ValidationError @@ -54,9 +54,9 @@ def validate(file_path: pathlib.Path): @click.option("--prerelease", type=click.STRING, required=False, default=None, help="The prerelease tag of the connector.") def upload(metadata_file_path: pathlib.Path, bucket_name: str, prerelease: str): metadata_file_path = metadata_file_path if not metadata_file_path.is_dir() else metadata_file_path / METADATA_FILE_NAME - + validator_opts = ValidatorOptions(prerelease_tag=prerelease) try: - upload_info = upload_metadata_to_gcs(bucket_name, metadata_file_path, prerelease) + upload_info = upload_metadata_to_gcs(bucket_name, metadata_file_path, validator_opts) log_metadata_upload_info(upload_info) except (ValidationError, FileNotFoundError) as e: click.secho(f"The metadata file could not be uploaded: {str(e)}", color="red") diff --git a/airbyte-ci/connectors/metadata_service/lib/metadata_service/gcs_upload.py b/airbyte-ci/connectors/metadata_service/lib/metadata_service/gcs_upload.py index a1de6c5ad1b9..e771047d771c 100644 --- a/airbyte-ci/connectors/metadata_service/lib/metadata_service/gcs_upload.py +++ b/airbyte-ci/connectors/metadata_service/lib/metadata_service/gcs_upload.py @@ -15,7 +15,7 @@ from google.oauth2 import service_account from metadata_service.constants import METADATA_FILE_NAME, METADATA_FOLDER, ICON_FILE_NAME -from metadata_service.validators.metadata_validator import POST_UPLOAD_VALIDATORS, validate_and_load +from metadata_service.validators.metadata_validator import POST_UPLOAD_VALIDATORS, validate_and_load, ValidatorOptions from metadata_service.models.transform import to_json_sanitized_dict from metadata_service.models.generated.ConnectorMetadataDefinitionV0 import ConnectorMetadataDefinitionV0 @@ -122,8 +122,8 @@ def _icon_upload(metadata: ConnectorMetadataDefinitionV0, bucket: storage.bucket return upload_file_if_changed(local_icon_path, bucket, latest_icon_path) -def create_prerelease_metadata_file(metadata_file_path: Path, prerelease_tag: str) -> Path: - metadata, error = validate_and_load(metadata_file_path, []) +def create_prerelease_metadata_file(metadata_file_path: Path, validator_opts: ValidatorOptions) -> Path: + metadata, error = validate_and_load(metadata_file_path, [], validator_opts) if metadata is None: raise ValueError(f"Metadata file {metadata_file_path} is invalid for uploading: {error}") @@ -131,13 +131,13 @@ def create_prerelease_metadata_file(metadata_file_path: Path, prerelease_tag: st # this includes metadata.data.dockerImageTag, metadata.data.registries[].dockerImageTag # where registries is a dictionary of registry name to registry object metadata_dict = to_json_sanitized_dict(metadata, exclude_none=True) - metadata_dict["data"]["dockerImageTag"] = prerelease_tag + metadata_dict["data"]["dockerImageTag"] = validator_opts.prerelease_tag for registry in get(metadata_dict, "data.registries", {}).values(): if "dockerImageTag" in registry: - registry["dockerImageTag"] = prerelease_tag + registry["dockerImageTag"] = validator_opts.prerelease_tag # write metadata to yaml file in system tmp folder - tmp_metadata_file_path = Path("/tmp") / metadata.data.dockerRepository / prerelease_tag / METADATA_FILE_NAME + tmp_metadata_file_path = Path("/tmp") / metadata.data.dockerRepository / validator_opts.prerelease_tag / METADATA_FILE_NAME tmp_metadata_file_path.parent.mkdir(parents=True, exist_ok=True) with open(tmp_metadata_file_path, "w") as f: yaml.dump(metadata_dict, f) @@ -145,7 +145,7 @@ def create_prerelease_metadata_file(metadata_file_path: Path, prerelease_tag: st return tmp_metadata_file_path -def upload_metadata_to_gcs(bucket_name: str, metadata_file_path: Path, prerelease: Optional[str] = None) -> MetadataUploadInfo: +def upload_metadata_to_gcs(bucket_name: str, metadata_file_path: Path, validator_opts: ValidatorOptions = ValidatorOptions()) -> MetadataUploadInfo: """Upload a metadata file to a GCS bucket. If the per 'version' key already exists it won't be overwritten. @@ -155,14 +155,14 @@ def upload_metadata_to_gcs(bucket_name: str, metadata_file_path: Path, prereleas bucket_name (str): Name of the GCS bucket to which the metadata file will be uploade. metadata_file_path (Path): Path to the metadata file. service_account_file_path (Path): Path to the JSON file with the service account allowed to read and write on the bucket. - prerelease (Optional[str]): Whether the connector is a prerelease or not. + prerelease_tag (Optional[str]): Whether the connector is a prerelease_tag or not. Returns: Tuple[bool, str]: Whether the metadata file was uploaded and its blob id. """ - if prerelease: - metadata_file_path = create_prerelease_metadata_file(metadata_file_path, prerelease) + if validator_opts.prerelease_tag: + metadata_file_path = create_prerelease_metadata_file(metadata_file_path, validator_opts) - metadata, error = validate_and_load(metadata_file_path, POST_UPLOAD_VALIDATORS) + metadata, error = validate_and_load(metadata_file_path, POST_UPLOAD_VALIDATORS, validator_opts) if metadata is None: raise ValueError(f"Metadata file {metadata_file_path} is invalid for uploading: {error}") @@ -175,7 +175,7 @@ def upload_metadata_to_gcs(bucket_name: str, metadata_file_path: Path, prereleas icon_uploaded, icon_blob_id = _icon_upload(metadata, bucket, metadata_file_path) version_uploaded, version_blob_id = _version_upload(metadata, bucket, metadata_file_path) - if not prerelease: + if not validator_opts.prerelease_tag: latest_uploaded, latest_blob_id = _latest_upload(metadata, bucket, metadata_file_path) else: latest_uploaded, latest_blob_id = False, None diff --git a/airbyte-ci/connectors/metadata_service/lib/metadata_service/validators/metadata_validator.py b/airbyte-ci/connectors/metadata_service/lib/metadata_service/validators/metadata_validator.py index c6a2249c8539..772bc5dd0bb0 100644 --- a/airbyte-ci/connectors/metadata_service/lib/metadata_service/validators/metadata_validator.py +++ b/airbyte-ci/connectors/metadata_service/lib/metadata_service/validators/metadata_validator.py @@ -1,15 +1,22 @@ -import re import semver import pathlib import yaml + +from dataclasses import dataclass from pydantic import ValidationError from metadata_service.models.generated.ConnectorMetadataDefinitionV0 import ConnectorMetadataDefinitionV0 from typing import Optional, Tuple, Union, List, Callable from metadata_service.docker_hub import is_image_on_docker_hub from pydash.objects import get + +@dataclass(frozen=True) +class ValidatorOptions: + prerelease_tag: Optional[str] = None + + ValidationResult = Tuple[bool, Optional[Union[ValidationError, str]]] -Validator = Callable[[ConnectorMetadataDefinitionV0], ValidationResult] +Validator = Callable[[ConnectorMetadataDefinitionV0, ValidatorOptions], ValidationResult] # TODO: Remove these when each of these connectors ship any new version ALREADY_ON_MAJOR_VERSION_EXCEPTIONS = [ @@ -26,7 +33,9 @@ ] -def validate_metadata_images_in_dockerhub(metadata_definition: ConnectorMetadataDefinitionV0) -> ValidationResult: +def validate_metadata_images_in_dockerhub( + metadata_definition: ConnectorMetadataDefinitionV0, validator_opts: ValidatorOptions +) -> ValidationResult: metadata_definition_dict = metadata_definition.dict() base_docker_image = get(metadata_definition_dict, "data.dockerRepository") base_docker_version = get(metadata_definition_dict, "data.dockerImageTag") @@ -48,7 +57,9 @@ def validate_metadata_images_in_dockerhub(metadata_definition: ConnectorMetadata (cloud_docker_image, cloud_docker_version), (normalization_docker_image, normalization_docker_version), ] - possible_docker_images.extend([(base_docker_image, version) for version in breaking_change_versions]) + + if not validator_opts.prerelease_tag: + possible_docker_images.extend([(base_docker_image, version) for version in breaking_change_versions]) # Filter out tuples with None and remove duplicates images_to_check = list(set(filter(lambda x: None not in x, possible_docker_images))) @@ -61,7 +72,9 @@ def validate_metadata_images_in_dockerhub(metadata_definition: ConnectorMetadata return True, None -def validate_at_least_one_language_tag(metadata_definition: ConnectorMetadataDefinitionV0) -> ValidationResult: +def validate_at_least_one_language_tag( + metadata_definition: ConnectorMetadataDefinitionV0, _validator_opts: ValidatorOptions +) -> ValidationResult: """Ensure that there is at least one tag in the data.tags field that matches language:.""" tags = get(metadata_definition, "data.tags", []) if not any([tag.startswith("language:") for tag in tags]): @@ -70,7 +83,9 @@ def validate_at_least_one_language_tag(metadata_definition: ConnectorMetadataDef return True, None -def validate_all_tags_are_keyvalue_pairs(metadata_definition: ConnectorMetadataDefinitionV0) -> ValidationResult: +def validate_all_tags_are_keyvalue_pairs( + metadata_definition: ConnectorMetadataDefinitionV0, _validator_opts: ValidatorOptions +) -> ValidationResult: """Ensure that all tags are of the form :.""" tags = get(metadata_definition, "data.tags", []) for tag in tags: @@ -86,7 +101,9 @@ def is_major_version(version: str) -> bool: return semver_version.minor == 0 and semver_version.patch == 0 -def validate_major_version_bump_has_breaking_change_entry(metadata_definition: ConnectorMetadataDefinitionV0) -> ValidationResult: +def validate_major_version_bump_has_breaking_change_entry( + metadata_definition: ConnectorMetadataDefinitionV0, _validator_opts: ValidatorOptions +) -> ValidationResult: """Ensure that if the major version is incremented, there is a breaking change entry for that version.""" metadata_definition_dict = metadata_definition.dict() image_tag = get(metadata_definition_dict, "data.dockerImageTag") @@ -128,7 +145,9 @@ def validate_major_version_bump_has_breaking_change_entry(metadata_definition: C def validate_and_load( - file_path: pathlib.Path, validators_to_run: List[Validator] + file_path: pathlib.Path, + validators_to_run: List[Validator], + validator_opts: ValidatorOptions = ValidatorOptions(), ) -> Tuple[Optional[ConnectorMetadataDefinitionV0], Optional[ValidationError]]: """Load a metadata file from a path (runs jsonschema validation) and run optional extra validators. @@ -136,7 +155,6 @@ def validate_and_load( If the metadata file is valid, metadata_model will be populated. Otherwise, error_message will be populated with a string describing the error. """ - try: # Load the metadata file - this implicitly runs jsonschema validation metadata = yaml.safe_load(file_path.read_text()) @@ -145,7 +163,7 @@ def validate_and_load( return None, f"Validation error: {e}" for validator in validators_to_run: - is_valid, error = validator(metadata_model) + is_valid, error = validator(metadata_model, validator_opts) if not is_valid: return None, f"Validation error: {error}" diff --git a/airbyte-ci/connectors/metadata_service/lib/tests/fixtures/metadata_validate/invalid/metadata_no_extra_data.yaml b/airbyte-ci/connectors/metadata_service/lib/tests/fixtures/metadata_validate/valid/metadata_extra_data.yaml similarity index 100% rename from airbyte-ci/connectors/metadata_service/lib/tests/fixtures/metadata_validate/invalid/metadata_no_extra_data.yaml rename to airbyte-ci/connectors/metadata_service/lib/tests/fixtures/metadata_validate/valid/metadata_extra_data.yaml diff --git a/airbyte-ci/connectors/metadata_service/lib/tests/test_commands.py b/airbyte-ci/connectors/metadata_service/lib/tests/test_commands.py index 294c87b6bdf7..07b07836fdec 100644 --- a/airbyte-ci/connectors/metadata_service/lib/tests/test_commands.py +++ b/airbyte-ci/connectors/metadata_service/lib/tests/test_commands.py @@ -2,10 +2,12 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # import pytest +import pathlib from click.testing import CliRunner from metadata_service import commands from metadata_service.gcs_upload import MetadataUploadInfo +from metadata_service.validators.metadata_validator import ValidatorOptions from pydantic import BaseModel, ValidationError, error_wrappers @@ -98,6 +100,27 @@ def test_upload(mocker, valid_metadata_yaml_files, latest_uploaded, version_uplo # We exit with 5 status code to share with the CI pipeline that the upload was skipped. assert result.exit_code == 5 +def test_upload_prerelease(mocker, valid_metadata_yaml_files): + runner = CliRunner() + mocker.patch.object(commands.click, "secho") + mocker.patch.object(commands, "upload_metadata_to_gcs") + + prerelease_tag = "0.3.0-dev.6d33165120" + bucket = "my-bucket" + metadata_file_path = valid_metadata_yaml_files[0] + validator_opts = ValidatorOptions(prerelease_tag=prerelease_tag) + + upload_info = mock_metadata_upload_info(False, True, False, metadata_file_path) + commands.upload_metadata_to_gcs.return_value = upload_info + result = runner.invoke( + commands.upload, [metadata_file_path, bucket, "--prerelease", prerelease_tag] + ) # Using valid_metadata_yaml_files[0] as SA because it exists... + + commands.upload_metadata_to_gcs.assert_has_calls( + [mocker.call(bucket, pathlib.Path(metadata_file_path), validator_opts)] + ) + assert result.exit_code == 0 + @pytest.mark.parametrize( "error, handled", diff --git a/airbyte-ci/connectors/metadata_service/lib/tests/test_gcs_upload.py b/airbyte-ci/connectors/metadata_service/lib/tests/test_gcs_upload.py index 212d0da20678..a3cc6d7e7c69 100644 --- a/airbyte-ci/connectors/metadata_service/lib/tests/test_gcs_upload.py +++ b/airbyte-ci/connectors/metadata_service/lib/tests/test_gcs_upload.py @@ -9,6 +9,7 @@ from pydash.objects import get from metadata_service import gcs_upload +from metadata_service.validators.metadata_validator import ValidatorOptions from metadata_service.models.generated.ConnectorMetadataDefinitionV0 import ConnectorMetadataDefinitionV0 from metadata_service.constants import METADATA_FILE_NAME from metadata_service.models.transform import to_json_sanitized_dict @@ -185,7 +186,7 @@ def test_upload_metadata_to_gcs_with_prerelease(mocker, valid_metadata_upload_fi gcs_upload.upload_metadata_to_gcs( "my_bucket", metadata_file_path, - prerelease_image_tag, + ValidatorOptions(prerelease_tag=prerelease_image_tag), ) gcs_upload._latest_upload.assert_not_called() diff --git a/airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/hacks.py b/airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/hacks.py index 06d4c9e530b0..24799dbec4a5 100644 --- a/airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/hacks.py +++ b/airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/hacks.py @@ -11,12 +11,17 @@ PolymorphicRegistryEntry = Union[ConnectorRegistrySourceDefinition, ConnectorRegistryDestinationDefinition] -def _is_docker_repository_overridden(metadata_entry: LatestMetadataEntry, registry_entry: PolymorphicRegistryEntry,) -> bool: + +def _is_docker_repository_overridden( + metadata_entry: LatestMetadataEntry, + registry_entry: PolymorphicRegistryEntry, +) -> bool: """Check if the docker repository is overridden in the registry entry.""" registry_entry_docker_repository = registry_entry.dockerRepository metadata_docker_repository = metadata_entry.metadata_definition.data.dockerRepository return registry_entry_docker_repository != metadata_docker_repository + def _get_version_specific_registry_entry_file_path(registry_entry, registry_name): """Get the file path for the version specific registry entry file.""" docker_reposiory = registry_entry.dockerRepository @@ -26,11 +31,15 @@ def _get_version_specific_registry_entry_file_path(registry_entry, registry_name registry_entry_file_path = assumed_metadata_file_path.replace(METADATA_FILE_NAME, registry_name) return registry_entry_file_path + def _check_for_invalid_write_path(write_path: str): """Check if the write path is valid.""" if "latest" in write_path: - raise ValueError("Cannot write to a path that contains 'latest'. That is reserved for the latest metadata file and its direct transformations") + raise ValueError( + "Cannot write to a path that contains 'latest'. That is reserved for the latest metadata file and its direct transformations" + ) + def write_registry_to_overrode_file_paths( registry_entry: PolymorphicRegistryEntry, @@ -72,7 +81,8 @@ def write_registry_to_overrode_file_paths( overrode_registry_entry_version_write_path = _get_version_specific_registry_entry_file_path(registry_entry, registry_name) _check_for_invalid_write_path(overrode_registry_entry_version_write_path) logger.info(f"Writing registry entry to {overrode_registry_entry_version_write_path}") - file_handle = registry_directory_manager.write_data(registry_entry_json.encode("utf-8"), ext="json", key=overrode_registry_entry_version_write_path) + file_handle = registry_directory_manager.write_data( + registry_entry_json.encode("utf-8"), ext="json", key=overrode_registry_entry_version_write_path + ) logger.info(f"Successfully wrote registry entry to {file_handle.public_url}") return file_handle - diff --git a/airbyte-ci/connectors/pipelines/pipelines/pipelines/metadata.py b/airbyte-ci/connectors/pipelines/pipelines/pipelines/metadata.py index a222385f4005..ad97646fb07b 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/pipelines/metadata.py +++ b/airbyte-ci/connectors/pipelines/pipelines/pipelines/metadata.py @@ -63,7 +63,6 @@ async def _run(self) -> StepResult: class MetadataUpload(PoetryRun): - # When the metadata service exits with this code, it means the metadata is valid but the upload was skipped because the metadata is already uploaded skipped_exit_code = 5