Skip to content

Commit

Permalink
Metadata: skip breaking change validation on prerelease (#29017)
Browse files Browse the repository at this point in the history
* skip breaking change validation

* Move ValidatorOpts higher in call

* Add prerelease test

* Fix test
  • Loading branch information
bnchrch authored Aug 3, 2023
1 parent 6985547 commit 0fc2a35
Show file tree
Hide file tree
Showing 8 changed files with 82 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import click
from metadata_service.gcs_upload import upload_metadata_to_gcs, MetadataUploadInfo
from metadata_service.validators.metadata_validator import PRE_UPLOAD_VALIDATORS, validate_and_load
from metadata_service.validators.metadata_validator import PRE_UPLOAD_VALIDATORS, validate_and_load, ValidatorOptions
from metadata_service.constants import METADATA_FILE_NAME
from pydantic import ValidationError

Expand Down Expand Up @@ -54,9 +54,9 @@ def validate(file_path: pathlib.Path):
@click.option("--prerelease", type=click.STRING, required=False, default=None, help="The prerelease tag of the connector.")
def upload(metadata_file_path: pathlib.Path, bucket_name: str, prerelease: str):
metadata_file_path = metadata_file_path if not metadata_file_path.is_dir() else metadata_file_path / METADATA_FILE_NAME

validator_opts = ValidatorOptions(prerelease_tag=prerelease)
try:
upload_info = upload_metadata_to_gcs(bucket_name, metadata_file_path, prerelease)
upload_info = upload_metadata_to_gcs(bucket_name, metadata_file_path, validator_opts)
log_metadata_upload_info(upload_info)
except (ValidationError, FileNotFoundError) as e:
click.secho(f"The metadata file could not be uploaded: {str(e)}", color="red")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from google.oauth2 import service_account

from metadata_service.constants import METADATA_FILE_NAME, METADATA_FOLDER, ICON_FILE_NAME
from metadata_service.validators.metadata_validator import POST_UPLOAD_VALIDATORS, validate_and_load
from metadata_service.validators.metadata_validator import POST_UPLOAD_VALIDATORS, validate_and_load, ValidatorOptions
from metadata_service.models.transform import to_json_sanitized_dict
from metadata_service.models.generated.ConnectorMetadataDefinitionV0 import ConnectorMetadataDefinitionV0

Expand Down Expand Up @@ -122,30 +122,30 @@ def _icon_upload(metadata: ConnectorMetadataDefinitionV0, bucket: storage.bucket
return upload_file_if_changed(local_icon_path, bucket, latest_icon_path)


def create_prerelease_metadata_file(metadata_file_path: Path, prerelease_tag: str) -> Path:
metadata, error = validate_and_load(metadata_file_path, [])
def create_prerelease_metadata_file(metadata_file_path: Path, validator_opts: ValidatorOptions) -> Path:
metadata, error = validate_and_load(metadata_file_path, [], validator_opts)
if metadata is None:
raise ValueError(f"Metadata file {metadata_file_path} is invalid for uploading: {error}")

# replace any dockerImageTag references with the actual tag
# this includes metadata.data.dockerImageTag, metadata.data.registries[].dockerImageTag
# where registries is a dictionary of registry name to registry object
metadata_dict = to_json_sanitized_dict(metadata, exclude_none=True)
metadata_dict["data"]["dockerImageTag"] = prerelease_tag
metadata_dict["data"]["dockerImageTag"] = validator_opts.prerelease_tag
for registry in get(metadata_dict, "data.registries", {}).values():
if "dockerImageTag" in registry:
registry["dockerImageTag"] = prerelease_tag
registry["dockerImageTag"] = validator_opts.prerelease_tag

# write metadata to yaml file in system tmp folder
tmp_metadata_file_path = Path("/tmp") / metadata.data.dockerRepository / prerelease_tag / METADATA_FILE_NAME
tmp_metadata_file_path = Path("/tmp") / metadata.data.dockerRepository / validator_opts.prerelease_tag / METADATA_FILE_NAME
tmp_metadata_file_path.parent.mkdir(parents=True, exist_ok=True)
with open(tmp_metadata_file_path, "w") as f:
yaml.dump(metadata_dict, f)

return tmp_metadata_file_path


def upload_metadata_to_gcs(bucket_name: str, metadata_file_path: Path, prerelease: Optional[str] = None) -> MetadataUploadInfo:
def upload_metadata_to_gcs(bucket_name: str, metadata_file_path: Path, validator_opts: ValidatorOptions = ValidatorOptions()) -> MetadataUploadInfo:
"""Upload a metadata file to a GCS bucket.
If the per 'version' key already exists it won't be overwritten.
Expand All @@ -155,14 +155,14 @@ def upload_metadata_to_gcs(bucket_name: str, metadata_file_path: Path, prereleas
bucket_name (str): Name of the GCS bucket to which the metadata file will be uploade.
metadata_file_path (Path): Path to the metadata file.
service_account_file_path (Path): Path to the JSON file with the service account allowed to read and write on the bucket.
prerelease (Optional[str]): Whether the connector is a prerelease or not.
prerelease_tag (Optional[str]): Whether the connector is a prerelease_tag or not.
Returns:
Tuple[bool, str]: Whether the metadata file was uploaded and its blob id.
"""
if prerelease:
metadata_file_path = create_prerelease_metadata_file(metadata_file_path, prerelease)
if validator_opts.prerelease_tag:
metadata_file_path = create_prerelease_metadata_file(metadata_file_path, validator_opts)

metadata, error = validate_and_load(metadata_file_path, POST_UPLOAD_VALIDATORS)
metadata, error = validate_and_load(metadata_file_path, POST_UPLOAD_VALIDATORS, validator_opts)

if metadata is None:
raise ValueError(f"Metadata file {metadata_file_path} is invalid for uploading: {error}")
Expand All @@ -175,7 +175,7 @@ def upload_metadata_to_gcs(bucket_name: str, metadata_file_path: Path, prereleas
icon_uploaded, icon_blob_id = _icon_upload(metadata, bucket, metadata_file_path)

version_uploaded, version_blob_id = _version_upload(metadata, bucket, metadata_file_path)
if not prerelease:
if not validator_opts.prerelease_tag:
latest_uploaded, latest_blob_id = _latest_upload(metadata, bucket, metadata_file_path)
else:
latest_uploaded, latest_blob_id = False, None
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
import re
import semver
import pathlib
import yaml

from dataclasses import dataclass
from pydantic import ValidationError
from metadata_service.models.generated.ConnectorMetadataDefinitionV0 import ConnectorMetadataDefinitionV0
from typing import Optional, Tuple, Union, List, Callable
from metadata_service.docker_hub import is_image_on_docker_hub
from pydash.objects import get


@dataclass(frozen=True)
class ValidatorOptions:
prerelease_tag: Optional[str] = None


ValidationResult = Tuple[bool, Optional[Union[ValidationError, str]]]
Validator = Callable[[ConnectorMetadataDefinitionV0], ValidationResult]
Validator = Callable[[ConnectorMetadataDefinitionV0, ValidatorOptions], ValidationResult]

# TODO: Remove these when each of these connectors ship any new version
ALREADY_ON_MAJOR_VERSION_EXCEPTIONS = [
Expand All @@ -26,7 +33,9 @@
]


def validate_metadata_images_in_dockerhub(metadata_definition: ConnectorMetadataDefinitionV0) -> ValidationResult:
def validate_metadata_images_in_dockerhub(
metadata_definition: ConnectorMetadataDefinitionV0, validator_opts: ValidatorOptions
) -> ValidationResult:
metadata_definition_dict = metadata_definition.dict()
base_docker_image = get(metadata_definition_dict, "data.dockerRepository")
base_docker_version = get(metadata_definition_dict, "data.dockerImageTag")
Expand All @@ -48,7 +57,9 @@ def validate_metadata_images_in_dockerhub(metadata_definition: ConnectorMetadata
(cloud_docker_image, cloud_docker_version),
(normalization_docker_image, normalization_docker_version),
]
possible_docker_images.extend([(base_docker_image, version) for version in breaking_change_versions])

if not validator_opts.prerelease_tag:
possible_docker_images.extend([(base_docker_image, version) for version in breaking_change_versions])

# Filter out tuples with None and remove duplicates
images_to_check = list(set(filter(lambda x: None not in x, possible_docker_images)))
Expand All @@ -61,7 +72,9 @@ def validate_metadata_images_in_dockerhub(metadata_definition: ConnectorMetadata
return True, None


def validate_at_least_one_language_tag(metadata_definition: ConnectorMetadataDefinitionV0) -> ValidationResult:
def validate_at_least_one_language_tag(
metadata_definition: ConnectorMetadataDefinitionV0, _validator_opts: ValidatorOptions
) -> ValidationResult:
"""Ensure that there is at least one tag in the data.tags field that matches language:<LANG>."""
tags = get(metadata_definition, "data.tags", [])
if not any([tag.startswith("language:") for tag in tags]):
Expand All @@ -70,7 +83,9 @@ def validate_at_least_one_language_tag(metadata_definition: ConnectorMetadataDef
return True, None


def validate_all_tags_are_keyvalue_pairs(metadata_definition: ConnectorMetadataDefinitionV0) -> ValidationResult:
def validate_all_tags_are_keyvalue_pairs(
metadata_definition: ConnectorMetadataDefinitionV0, _validator_opts: ValidatorOptions
) -> ValidationResult:
"""Ensure that all tags are of the form <KEY>:<VALUE>."""
tags = get(metadata_definition, "data.tags", [])
for tag in tags:
Expand All @@ -86,7 +101,9 @@ def is_major_version(version: str) -> bool:
return semver_version.minor == 0 and semver_version.patch == 0


def validate_major_version_bump_has_breaking_change_entry(metadata_definition: ConnectorMetadataDefinitionV0) -> ValidationResult:
def validate_major_version_bump_has_breaking_change_entry(
metadata_definition: ConnectorMetadataDefinitionV0, _validator_opts: ValidatorOptions
) -> ValidationResult:
"""Ensure that if the major version is incremented, there is a breaking change entry for that version."""
metadata_definition_dict = metadata_definition.dict()
image_tag = get(metadata_definition_dict, "data.dockerImageTag")
Expand Down Expand Up @@ -128,15 +145,16 @@ def validate_major_version_bump_has_breaking_change_entry(metadata_definition: C


def validate_and_load(
file_path: pathlib.Path, validators_to_run: List[Validator]
file_path: pathlib.Path,
validators_to_run: List[Validator],
validator_opts: ValidatorOptions = ValidatorOptions(),
) -> Tuple[Optional[ConnectorMetadataDefinitionV0], Optional[ValidationError]]:
"""Load a metadata file from a path (runs jsonschema validation) and run optional extra validators.
Returns a tuple of (metadata_model, error_message).
If the metadata file is valid, metadata_model will be populated.
Otherwise, error_message will be populated with a string describing the error.
"""

try:
# Load the metadata file - this implicitly runs jsonschema validation
metadata = yaml.safe_load(file_path.read_text())
Expand All @@ -145,7 +163,7 @@ def validate_and_load(
return None, f"Validation error: {e}"

for validator in validators_to_run:
is_valid, error = validator(metadata_model)
is_valid, error = validator(metadata_model, validator_opts)
if not is_valid:
return None, f"Validation error: {error}"

Expand Down
23 changes: 23 additions & 0 deletions airbyte-ci/connectors/metadata_service/lib/tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import pytest
import pathlib

from click.testing import CliRunner
from metadata_service import commands
from metadata_service.gcs_upload import MetadataUploadInfo
from metadata_service.validators.metadata_validator import ValidatorOptions
from pydantic import BaseModel, ValidationError, error_wrappers


Expand Down Expand Up @@ -98,6 +100,27 @@ def test_upload(mocker, valid_metadata_yaml_files, latest_uploaded, version_uplo
# We exit with 5 status code to share with the CI pipeline that the upload was skipped.
assert result.exit_code == 5

def test_upload_prerelease(mocker, valid_metadata_yaml_files):
runner = CliRunner()
mocker.patch.object(commands.click, "secho")
mocker.patch.object(commands, "upload_metadata_to_gcs")

prerelease_tag = "0.3.0-dev.6d33165120"
bucket = "my-bucket"
metadata_file_path = valid_metadata_yaml_files[0]
validator_opts = ValidatorOptions(prerelease_tag=prerelease_tag)

upload_info = mock_metadata_upload_info(False, True, False, metadata_file_path)
commands.upload_metadata_to_gcs.return_value = upload_info
result = runner.invoke(
commands.upload, [metadata_file_path, bucket, "--prerelease", prerelease_tag]
) # Using valid_metadata_yaml_files[0] as SA because it exists...

commands.upload_metadata_to_gcs.assert_has_calls(
[mocker.call(bucket, pathlib.Path(metadata_file_path), validator_opts)]
)
assert result.exit_code == 0


@pytest.mark.parametrize(
"error, handled",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pydash.objects import get

from metadata_service import gcs_upload
from metadata_service.validators.metadata_validator import ValidatorOptions
from metadata_service.models.generated.ConnectorMetadataDefinitionV0 import ConnectorMetadataDefinitionV0
from metadata_service.constants import METADATA_FILE_NAME
from metadata_service.models.transform import to_json_sanitized_dict
Expand Down Expand Up @@ -185,7 +186,7 @@ def test_upload_metadata_to_gcs_with_prerelease(mocker, valid_metadata_upload_fi
gcs_upload.upload_metadata_to_gcs(
"my_bucket",
metadata_file_path,
prerelease_image_tag,
ValidatorOptions(prerelease_tag=prerelease_image_tag),
)

gcs_upload._latest_upload.assert_not_called()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,17 @@

PolymorphicRegistryEntry = Union[ConnectorRegistrySourceDefinition, ConnectorRegistryDestinationDefinition]

def _is_docker_repository_overridden(metadata_entry: LatestMetadataEntry, registry_entry: PolymorphicRegistryEntry,) -> bool:

def _is_docker_repository_overridden(
metadata_entry: LatestMetadataEntry,
registry_entry: PolymorphicRegistryEntry,
) -> bool:
"""Check if the docker repository is overridden in the registry entry."""
registry_entry_docker_repository = registry_entry.dockerRepository
metadata_docker_repository = metadata_entry.metadata_definition.data.dockerRepository
return registry_entry_docker_repository != metadata_docker_repository


def _get_version_specific_registry_entry_file_path(registry_entry, registry_name):
"""Get the file path for the version specific registry entry file."""
docker_reposiory = registry_entry.dockerRepository
Expand All @@ -26,11 +31,15 @@ def _get_version_specific_registry_entry_file_path(registry_entry, registry_name
registry_entry_file_path = assumed_metadata_file_path.replace(METADATA_FILE_NAME, registry_name)
return registry_entry_file_path


def _check_for_invalid_write_path(write_path: str):
"""Check if the write path is valid."""

if "latest" in write_path:
raise ValueError("Cannot write to a path that contains 'latest'. That is reserved for the latest metadata file and its direct transformations")
raise ValueError(
"Cannot write to a path that contains 'latest'. That is reserved for the latest metadata file and its direct transformations"
)


def write_registry_to_overrode_file_paths(
registry_entry: PolymorphicRegistryEntry,
Expand Down Expand Up @@ -72,7 +81,8 @@ def write_registry_to_overrode_file_paths(
overrode_registry_entry_version_write_path = _get_version_specific_registry_entry_file_path(registry_entry, registry_name)
_check_for_invalid_write_path(overrode_registry_entry_version_write_path)
logger.info(f"Writing registry entry to {overrode_registry_entry_version_write_path}")
file_handle = registry_directory_manager.write_data(registry_entry_json.encode("utf-8"), ext="json", key=overrode_registry_entry_version_write_path)
file_handle = registry_directory_manager.write_data(
registry_entry_json.encode("utf-8"), ext="json", key=overrode_registry_entry_version_write_path
)
logger.info(f"Successfully wrote registry entry to {file_handle.public_url}")
return file_handle

Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ async def _run(self) -> StepResult:


class MetadataUpload(PoetryRun):

# When the metadata service exits with this code, it means the metadata is valid but the upload was skipped because the metadata is already uploaded
skipped_exit_code = 5

Expand Down

0 comments on commit 0fc2a35

Please sign in to comment.