From 9cb4fbfc211825e155a231ec49baa4d7a3695c28 Mon Sep 17 00:00:00 2001 From: Augustin Date: Tue, 29 Nov 2022 11:52:11 +0100 Subject: [PATCH] SAT: stricter integer validation (#19820) --- .../bases/source-acceptance-test/CHANGELOG.md | 2 ++ .../bases/source-acceptance-test/Dockerfile | 2 +- .../source_acceptance_test/utils/asserts.py | 17 +++++++++++------ .../unit_tests/test_asserts.py | 12 ++++++++++-- 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md b/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md index ce9ad120f3e6..3309a34985f2 100644 --- a/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md +++ b/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md @@ -1,5 +1,7 @@ # Changelog +## 0.2.20 +Stricter integer field schema validation. [#19820](https://github.com/airbytehq/airbyte/pull/19820). ## 0.2.19 Test for exposed secrets: const values can not hold secrets. [#19465](https://github.com/airbytehq/airbyte/pull/19465). diff --git a/airbyte-integrations/bases/source-acceptance-test/Dockerfile b/airbyte-integrations/bases/source-acceptance-test/Dockerfile index 660a56ca4a98..bc039a5c191b 100644 --- a/airbyte-integrations/bases/source-acceptance-test/Dockerfile +++ b/airbyte-integrations/bases/source-acceptance-test/Dockerfile @@ -33,7 +33,7 @@ COPY pytest.ini setup.py ./ COPY source_acceptance_test ./source_acceptance_test RUN pip install . -LABEL io.airbyte.version=0.2.19 +LABEL io.airbyte.version=0.2.20 LABEL io.airbyte.name=airbyte/source-acceptance-test ENTRYPOINT ["python", "-m", "pytest", "-p", "source_acceptance_test.plugin", "-r", "fEsx"] diff --git a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/asserts.py b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/asserts.py index 5202c5e8abba..64e0680c9acd 100644 --- a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/asserts.py +++ b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/asserts.py @@ -9,7 +9,7 @@ import pendulum from airbyte_cdk.models import AirbyteRecordMessage, ConfiguredAirbyteCatalog -from jsonschema import Draft7Validator, FormatChecker, FormatError, ValidationError +from jsonschema import Draft7Validator, FormatChecker, FormatError, ValidationError, validators # fmt: off timestamp_regex = re.compile((r"^\d{4}-\d?\d-\d?\d" # date @@ -18,6 +18,11 @@ r".*$")) # timezone # fmt: on +# In Json schema, numbers with a zero fractional part are considered integers. E.G. 1.0 is considered a valid integer +# For stricter type validation we don't want to keep this behavior. We want to consider integers in the Pythonic way. +strict_integer_type_checker = Draft7Validator.TYPE_CHECKER.redefine("integer", lambda _, value: isinstance(value, int)) +Draft7ValidatorWithStrictInteger = validators.extend(Draft7Validator, type_checker=strict_integer_type_checker) + class CustomFormatChecker(FormatChecker): @staticmethod @@ -45,14 +50,14 @@ def verify_records_schema( """Check records against their schemas from the catalog, yield error messages. Only first record with error will be yielded for each stream. """ - validators = {} + stream_validators = {} for stream in catalog.streams: - validators[stream.stream.name] = Draft7Validator(stream.stream.json_schema, format_checker=CustomFormatChecker()) - + stream_validators[stream.stream.name] = Draft7ValidatorWithStrictInteger( + stream.stream.json_schema, format_checker=CustomFormatChecker() + ) stream_errors = defaultdict(dict) - for record in records: - validator = validators.get(record.stream) + validator = stream_validators.get(record.stream) if not validator: logging.error(f"Record from the {record.stream} stream that is not in the catalog.") continue diff --git a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_asserts.py b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_asserts.py index 46d495abbbb0..19211c47eda9 100644 --- a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_asserts.py +++ b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_asserts.py @@ -20,6 +20,7 @@ def record_schema_fixture(): "properties": { "text_or_null": {"type": ["null", "string"]}, "number_or_null": {"type": ["null", "number"]}, + "integer_or_null": {"type": ["null", "integer"]}, "text": {"type": ["string"]}, "number": {"type": ["number"]}, }, @@ -66,6 +67,8 @@ def test_verify_records_schema(configured_catalog: ConfiguredAirbyteCatalog): "text": "text", "number": "text", # wrong format }, + {"text_or_null": None, "number_or_null": None, "text": "text", "number": 10.3, "integer": 1}, + {"text_or_null": None, "number_or_null": None, "text": "text", "number": 10.3, "integer_or_null": 1.0}, # wrong format ] records = [AirbyteRecordMessage(stream="my_stream", data=record, emitted_at=0) for record in records] @@ -75,8 +78,13 @@ def test_verify_records_schema(configured_catalog: ConfiguredAirbyteCatalog): assert "my_stream" in streams_with_errors assert len(streams_with_errors) == 1, "only one stream" - assert len(streams_with_errors["my_stream"]) == 3, "only first error for each field" - assert errors == ["123 is not of type 'null', 'string'", "'text' is not of type 'number'", "None is not of type 'string'"] + assert len(streams_with_errors["my_stream"]) == 4, "only first error for each field" + assert errors == [ + "123 is not of type 'null', 'string'", + "'text' is not of type 'number'", + "None is not of type 'string'", + "1.0 is not of type 'null', 'integer'", + ] @pytest.mark.parametrize(