diff --git a/airbyte/_executors/python.py b/airbyte/_executors/python.py index c520afae..c6578c0d 100644 --- a/airbyte/_executors/python.py +++ b/airbyte/_executors/python.py @@ -10,7 +10,7 @@ from typing import TYPE_CHECKING, Literal from overrides import overrides -from rich import print +from rich import print # noqa: A004 # Allow shadowing the built-in from airbyte import exceptions as exc from airbyte._executors.base import Executor diff --git a/airbyte/_executors/util.py b/airbyte/_executors/util.py index 4086a446..9a0990db 100644 --- a/airbyte/_executors/util.py +++ b/airbyte/_executors/util.py @@ -8,7 +8,7 @@ import requests import yaml from requests import HTTPError -from rich import print +from rich import print # noqa: A004 # Allow shadowing the built-in from airbyte import exceptions as exc from airbyte._executors.declarative import DeclarativeExecutor diff --git a/airbyte/_processors/sql/postgres.py b/airbyte/_processors/sql/postgres.py index 16cd5703..ffa9eba2 100644 --- a/airbyte/_processors/sql/postgres.py +++ b/airbyte/_processors/sql/postgres.py @@ -3,8 +3,11 @@ from __future__ import annotations +import functools + from overrides import overrides +from airbyte._util.name_normalizers import LowerCaseNormalizer from airbyte._writers.jsonl import JsonlWriter from airbyte.secrets.base import SecretString from airbyte.shared.sql_processor import SqlConfig, SqlProcessorBase @@ -35,6 +38,24 @@ def get_database_name(self) -> str: return self.database +class PostgresNormalizer(LowerCaseNormalizer): + """A name normalizer for Postgres. + + Postgres has specific field name length limits: + - Tables names are limited to 63 characters. + - Column names are limited to 63 characters. + + The postgres normalizer inherits from the default LowerCaseNormalizer class, and + additionally truncates column and table names to 63 characters. + """ + + @staticmethod + @functools.cache + def normalize(name: str) -> str: + """Normalize the name, truncating to 63 characters.""" + return LowerCaseNormalizer.normalize(name)[:63] + + class PostgresSqlProcessor(SqlProcessorBase): """A Postgres implementation of the cache. @@ -49,3 +70,6 @@ class PostgresSqlProcessor(SqlProcessorBase): supports_merge_insert = False file_writer_class = JsonlWriter sql_config: PostgresConfig + + normalizer = PostgresNormalizer + """A Postgres-specific name normalizer for table and column name normalization.""" diff --git a/airbyte/_util/temp_files.py b/airbyte/_util/temp_files.py index e1e1e91f..93834d17 100644 --- a/airbyte/_util/temp_files.py +++ b/airbyte/_util/temp_files.py @@ -23,7 +23,7 @@ def as_temp_files(files_contents: list[dict | str]) -> Generator[list[str], Any, try: for content in files_contents: use_json = isinstance(content, dict) - temp_file = tempfile.NamedTemporaryFile( + temp_file = tempfile.NamedTemporaryFile( # noqa: SIM115 # Avoiding context manager mode="w+t", delete=False, encoding="utf-8", diff --git a/airbyte/_writers/jsonl.py b/airbyte/_writers/jsonl.py index 39bf198b..1f5d1b4b 100644 --- a/airbyte/_writers/jsonl.py +++ b/airbyte/_writers/jsonl.py @@ -35,7 +35,7 @@ def _open_new_file( """Open a new file for writing.""" return cast( IO[str], - gzip.open( + gzip.open( # noqa: SIM115 # Avoiding context manager file_path, mode="wt", encoding="utf-8", diff --git a/airbyte/cloud/workspaces.py b/airbyte/cloud/workspaces.py index 48b0c427..6bd52a24 100644 --- a/airbyte/cloud/workspaces.py +++ b/airbyte/cloud/workspaces.py @@ -215,10 +215,10 @@ def _deploy_connection( source_id: str if isinstance(source, Source): selected_streams = selected_streams or source.get_selected_streams() - if source._deployed_source_id: # noqa: SLF001 - source_id = source._deployed_source_id # noqa: SLF001 - else: - source_id = self._deploy_source(source) + source_id = ( + source._deployed_source_id # noqa: SLF001 # Access to non-public API + or self._deploy_source(source) + ) else: source_id = source if not selected_streams: diff --git a/airbyte/secrets/config.py b/airbyte/secrets/config.py index 20629f23..f793043d 100644 --- a/airbyte/secrets/config.py +++ b/airbyte/secrets/config.py @@ -77,6 +77,6 @@ def disable_secret_source(source: SecretManager | SecretSourceEnum) -> None: return # Else, remove by name - for s in _SECRETS_SOURCES: + for s in list(_SECRETS_SOURCES).copy(): if s.name == str(source): _SECRETS_SOURCES.remove(s) diff --git a/airbyte/shared/sql_processor.py b/airbyte/shared/sql_processor.py index e53896c5..39cd9b51 100644 --- a/airbyte/shared/sql_processor.py +++ b/airbyte/shared/sql_processor.py @@ -497,8 +497,20 @@ def _get_temp_table_name( batch_id: str | None = None, # ULID of the batch ) -> str: """Return a new (unique) temporary table name.""" - batch_id = batch_id or str(ulid.ULID()) - return self.normalizer.normalize(f"{stream_name}_{batch_id}") + if not batch_id: + batch_id = str(ulid.ULID()) + + # Use the first 6 and last 3 characters of the ULID. This gives great uniqueness while + # limiting the table name suffix to 10 characters, including the underscore. + suffix = ( + f"{batch_id[:6]}{batch_id[-3:]}" + if len(batch_id) > 9 # noqa: PLR2004 # Allow magic int value + else batch_id + ) + + # Note: The normalizer may truncate the table name if the database has a name length limit. + # For instance, the Postgres normalizer will enforce a 63-character limit on table names. + return self.normalizer.normalize(f"{stream_name}_{suffix}") def _fully_qualified( self, diff --git a/airbyte/sources/base.py b/airbyte/sources/base.py index fed5a973..d7447a42 100644 --- a/airbyte/sources/base.py +++ b/airbyte/sources/base.py @@ -9,7 +9,7 @@ from typing import TYPE_CHECKING, Any, Literal import yaml -from rich import print +from rich import print # noqa: A004 # Allow shadowing the built-in from rich.syntax import Syntax from airbyte_protocol.models import ( @@ -405,9 +405,25 @@ def get_stream_json_schema(self, stream_name: str) -> dict[str, Any]: return found[0].json_schema - def get_records(self, stream: str) -> LazyDataset: + def get_records( + self, + stream: str, + *, + normalize_field_names: bool = False, + prune_undeclared_fields: bool = True, + ) -> LazyDataset: """Read a stream from the connector. + Args: + stream: The name of the stream to read. + normalize_field_names: When `True`, field names will be normalized to lower case, with + special characters removed. This matches the behavior of PyAirbyte caches and most + Airbyte destinations. + prune_undeclared_fields: When `True`, undeclared fields will be pruned from the records, + which generally matches the behavior of PyAirbyte caches and most Airbyte + destinations, specifically when you expect the catalog may be stale. You can disable + this to keep all fields in the records. + This involves the following steps: * Call discover to get the catalog * Generate a configured catalog that syncs the given stream in full_refresh mode @@ -445,8 +461,8 @@ def _with_logging(records: Iterable[dict[str, Any]]) -> Iterator[dict[str, Any]] stream_record_handler = StreamRecordHandler( json_schema=self.get_stream_json_schema(stream), - prune_extra_fields=True, - normalize_keys=False, + prune_extra_fields=prune_undeclared_fields, + normalize_keys=normalize_field_names, ) # This method is non-blocking, so we use "PLAIN" to avoid a live progress display diff --git a/airbyte/types.py b/airbyte/types.py index 45820c4b..382f5135 100644 --- a/airbyte/types.py +++ b/airbyte/types.py @@ -1,3 +1,4 @@ +# noqa: A005 # Allow shadowing the built-in 'types' module # Copyright (c) 2023 Airbyte, Inc., all rights reserved. """Type conversion methods for SQL Caches.""" @@ -7,7 +8,7 @@ from typing import cast import sqlalchemy -from rich import print +from rich import print # noqa: A004 # Allow shadowing the built-in # Compare to documentation here: https://docs.airbyte.com/understanding-airbyte/supported-data-types diff --git a/airbyte/validate.py b/airbyte/validate.py index ee58fd00..2c47f0df 100644 --- a/airbyte/validate.py +++ b/airbyte/validate.py @@ -15,7 +15,7 @@ from pathlib import Path import yaml -from rich import print +from rich import print # noqa: A004 # Allow shadowing the built-in import airbyte as ab from airbyte import exceptions as exc diff --git a/poetry.lock b/poetry.lock index 2c55f50c..379d1785 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3049,28 +3049,29 @@ pyasn1 = ">=0.1.3" [[package]] name = "ruff" -version = "0.4.1" +version = "0.6.4" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.4.1-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:2d9ef6231e3fbdc0b8c72404a1a0c46fd0dcea84efca83beb4681c318ea6a953"}, - {file = "ruff-0.4.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:9485f54a7189e6f7433e0058cf8581bee45c31a25cd69009d2a040d1bd4bfaef"}, - {file = "ruff-0.4.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2921ac03ce1383e360e8a95442ffb0d757a6a7ddd9a5be68561a671e0e5807e"}, - {file = "ruff-0.4.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eec8d185fe193ad053eda3a6be23069e0c8ba8c5d20bc5ace6e3b9e37d246d3f"}, - {file = "ruff-0.4.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:baa27d9d72a94574d250f42b7640b3bd2edc4c58ac8ac2778a8c82374bb27984"}, - {file = "ruff-0.4.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:f1ee41580bff1a651339eb3337c20c12f4037f6110a36ae4a2d864c52e5ef954"}, - {file = "ruff-0.4.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0926cefb57fc5fced629603fbd1a23d458b25418681d96823992ba975f050c2b"}, - {file = "ruff-0.4.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2c6e37f2e3cd74496a74af9a4fa67b547ab3ca137688c484749189bf3a686ceb"}, - {file = "ruff-0.4.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efd703a5975ac1998c2cc5e9494e13b28f31e66c616b0a76e206de2562e0843c"}, - {file = "ruff-0.4.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b92f03b4aa9fa23e1799b40f15f8b95cdc418782a567d6c43def65e1bbb7f1cf"}, - {file = "ruff-0.4.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:1c859f294f8633889e7d77de228b203eb0e9a03071b72b5989d89a0cf98ee262"}, - {file = "ruff-0.4.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:b34510141e393519a47f2d7b8216fec747ea1f2c81e85f076e9f2910588d4b64"}, - {file = "ruff-0.4.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:6e68d248ed688b9d69fd4d18737edcbb79c98b251bba5a2b031ce2470224bdf9"}, - {file = "ruff-0.4.1-py3-none-win32.whl", hash = "sha256:b90506f3d6d1f41f43f9b7b5ff845aeefabed6d2494307bc7b178360a8805252"}, - {file = "ruff-0.4.1-py3-none-win_amd64.whl", hash = "sha256:c7d391e5936af5c9e252743d767c564670dc3889aff460d35c518ee76e4b26d7"}, - {file = "ruff-0.4.1-py3-none-win_arm64.whl", hash = "sha256:a1eaf03d87e6a7cd5e661d36d8c6e874693cb9bc3049d110bc9a97b350680c43"}, - {file = "ruff-0.4.1.tar.gz", hash = "sha256:d592116cdbb65f8b1b7e2a2b48297eb865f6bdc20641879aa9d7b9c11d86db79"}, + {file = "ruff-0.6.4-py3-none-linux_armv6l.whl", hash = "sha256:c4b153fc152af51855458e79e835fb6b933032921756cec9af7d0ba2aa01a258"}, + {file = "ruff-0.6.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:bedff9e4f004dad5f7f76a9d39c4ca98af526c9b1695068198b3bda8c085ef60"}, + {file = "ruff-0.6.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d02a4127a86de23002e694d7ff19f905c51e338c72d8e09b56bfb60e1681724f"}, + {file = "ruff-0.6.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7862f42fc1a4aca1ea3ffe8a11f67819d183a5693b228f0bb3a531f5e40336fc"}, + {file = "ruff-0.6.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eebe4ff1967c838a1a9618a5a59a3b0a00406f8d7eefee97c70411fefc353617"}, + {file = "ruff-0.6.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:932063a03bac394866683e15710c25b8690ccdca1cf192b9a98260332ca93408"}, + {file = "ruff-0.6.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:50e30b437cebef547bd5c3edf9ce81343e5dd7c737cb36ccb4fe83573f3d392e"}, + {file = "ruff-0.6.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c44536df7b93a587de690e124b89bd47306fddd59398a0fb12afd6133c7b3818"}, + {file = "ruff-0.6.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ea086601b22dc5e7693a78f3fcfc460cceabfdf3bdc36dc898792aba48fbad6"}, + {file = "ruff-0.6.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b52387d3289ccd227b62102c24714ed75fbba0b16ecc69a923a37e3b5e0aaaa"}, + {file = "ruff-0.6.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:0308610470fcc82969082fc83c76c0d362f562e2f0cdab0586516f03a4e06ec6"}, + {file = "ruff-0.6.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:803b96dea21795a6c9d5bfa9e96127cc9c31a1987802ca68f35e5c95aed3fc0d"}, + {file = "ruff-0.6.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:66dbfea86b663baab8fcae56c59f190caba9398df1488164e2df53e216248baa"}, + {file = "ruff-0.6.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:34d5efad480193c046c86608dbba2bccdc1c5fd11950fb271f8086e0c763a5d1"}, + {file = "ruff-0.6.4-py3-none-win32.whl", hash = "sha256:f0f8968feea5ce3777c0d8365653d5e91c40c31a81d95824ba61d871a11b8523"}, + {file = "ruff-0.6.4-py3-none-win_amd64.whl", hash = "sha256:549daccee5227282289390b0222d0fbee0275d1db6d514550d65420053021a58"}, + {file = "ruff-0.6.4-py3-none-win_arm64.whl", hash = "sha256:ac4b75e898ed189b3708c9ab3fc70b79a433219e1e87193b4f2b77251d058d14"}, + {file = "ruff-0.6.4.tar.gz", hash = "sha256:ac3b5bfbee99973f80aa1b7cbd1c9cbce200883bdd067300c22a6cc1c7fba212"}, ] [[package]] @@ -3665,4 +3666,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = ">=3.10,<4.0" -content-hash = "4c69f134b7ab00e30c39c905ec65b853d832be2bfd9141e2fe86bcba0e81070f" +content-hash = "6be92e6d85a72b9a9745638df95bc8138a22ccd3762f36b21e705bbe247966bc" diff --git a/pyproject.toml b/pyproject.toml index 2fb655a6..94193f97 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,7 +59,7 @@ pdoc = "^14.3.0" pytest = "^8.2.0" pytest-docker = "^3.1.1" pytest-mypy = "^0.10.3" -ruff = "0.4.1" +ruff = "^0.6.4" types-jsonschema = "^4.20.0.0" types-requests = "2.31.0.4" freezegun = "^1.4.0" @@ -188,7 +188,7 @@ ignore = [ "S", # flake8-bandit (noisy, security related) "SIM910", # Allow "None" as second argument to Dict.get(). "Explicit is better than implicit." "TD002", # Require author for TODOs - "TRIO", # flake8-trio (opinionated, noisy) + "ASYNC1", # flake8-trio (opinionated, noisy) "INP001", # Dir 'examples' is part of an implicit namespace package. Add an __init__.py. # TODO: Consider re-enabling these before release: diff --git a/tests/integration_tests/test_source_test_fixture.py b/tests/integration_tests/test_source_test_fixture.py index ffc65e5f..9993d5ea 100644 --- a/tests/integration_tests/test_source_test_fixture.py +++ b/tests/integration_tests/test_source_test_fixture.py @@ -130,6 +130,15 @@ def expected_test_stream_data() -> dict[str, list[dict[str, str | int]]]: }, ], "always-empty-stream": [], + "primary-key-with-dot": [ + # Expect field names lowercase, with '.' replaced by '_': + { + "table1_column1": "value1", + "table1_column2": 1, + "table1_empty_column": None, + "table1_big_number": 1234567890123456, + } + ], } @@ -325,7 +334,7 @@ def test_file_write_and_cleanup() -> None: # There are three streams, but only two of them have data: assert ( - len(list(Path(temp_dir_2).glob("*.jsonl.gz"))) == 2 + len(list(Path(temp_dir_2).glob("*.jsonl.gz"))) == 3 ), "Expected files to exist" with suppress(Exception): @@ -342,7 +351,9 @@ def test_sync_to_duckdb( result: ReadResult = source.read(cache) - assert result.processed_records == 3 + assert result.processed_records == sum( + len(stream_data) for stream_data in expected_test_stream_data.values() + ) assert_data_matches_cache(expected_test_stream_data, cache) @@ -350,13 +361,18 @@ def test_read_result_mapping(): source = ab.get_source("source-test", config={"apiKey": "test"}) source.select_all_streams() result: ReadResult = source.read(ab.new_local_cache()) - assert len(result) == 3 + assert len(result) == 4 assert isinstance(result, Mapping) assert "stream1" in result assert "stream2" in result assert "always-empty-stream" in result assert "stream3" not in result - assert result.keys() == {"stream1", "stream2", "always-empty-stream"} + assert result.keys() == { + "stream1", + "stream2", + "always-empty-stream", + "primary-key-with-dot", + } def test_dataset_list_and_len(expected_test_stream_data): @@ -381,7 +397,12 @@ def test_dataset_list_and_len(expected_test_stream_data): assert "stream2" in result assert "always-empty-stream" in result assert "stream3" not in result - assert result.keys() == {"stream1", "stream2", "always-empty-stream"} + assert result.keys() == { + "stream1", + "stream2", + "always-empty-stream", + "primary-key-with-dot", + } def test_read_from_cache( @@ -456,6 +477,10 @@ def test_merge_streams_in_cache( """ Test that we can extend a cache with new streams """ + expected_test_stream_data.pop( + "primary-key-with-dot" + ) # Stream not needed for this test. + cache_name = str(ulid.ULID()) source = ab.get_source("source-test", config={"apiKey": "test"}) cache = ab.new_local_cache(cache_name) @@ -552,7 +577,7 @@ def test_sync_with_merge_to_duckdb( result: ReadResult = source.read(cache) result: ReadResult = source.read(cache) - assert result.processed_records == 3 + assert result.processed_records == 4 for stream_name, expected_data in expected_test_stream_data.items(): if len(cache[stream_name]) > 0: pd.testing.assert_frame_equal( @@ -713,7 +738,10 @@ def test_lazy_dataset_from_source( for stream_name in source.get_available_streams(): assert isinstance(stream_name, str) - lazy_dataset: LazyDataset = source.get_records(stream_name) + lazy_dataset: LazyDataset = source.get_records( + stream_name, + normalize_field_names=True, + ) assert isinstance(lazy_dataset, LazyDataset) list_data = list(lazy_dataset) @@ -756,7 +784,9 @@ def test_sync_with_merge_to_postgres( result: ReadResult = source.read(new_postgres_cache, write_strategy="merge") result: ReadResult = source.read(new_postgres_cache, write_strategy="merge") - assert result.processed_records == 3 + assert result.processed_records == sum( + len(stream_data) for stream_data in expected_test_stream_data.values() + ) assert_data_matches_cache( expected_test_stream_data=expected_test_stream_data, cache=new_postgres_cache, @@ -780,7 +810,9 @@ def test_sync_to_postgres( result: ReadResult = source.read(new_postgres_cache) - assert result.processed_records == 3 + assert result.processed_records == sum( + len(stream_data) for stream_data in expected_test_stream_data.values() + ) for stream_name, expected_data in expected_test_stream_data.items(): if len(new_postgres_cache[stream_name]) > 0: pd.testing.assert_frame_equal( @@ -804,7 +836,9 @@ def test_sync_to_snowflake( result: ReadResult = source.read(new_snowflake_cache) - assert result.processed_records == 3 + assert result.processed_records == sum( + len(stream_data) for stream_data in expected_test_stream_data.values() + ) for stream_name, expected_data in expected_test_stream_data.items(): if len(new_snowflake_cache[stream_name]) > 0: pd.testing.assert_frame_equal( diff --git a/tests/unit_tests/test_text_normalization.py b/tests/unit_tests/test_text_normalization.py index b246c65c..89a8ac14 100644 --- a/tests/unit_tests/test_text_normalization.py +++ b/tests/unit_tests/test_text_normalization.py @@ -3,6 +3,7 @@ from airbyte._util.name_normalizers import LowerCaseNormalizer from airbyte.constants import AB_INTERNAL_COLUMNS from airbyte.records import StreamRecord, StreamRecordHandler +from airbyte._processors.sql.postgres import PostgresNormalizer @pytest.fixture @@ -187,40 +188,50 @@ def test_case_insensitive_w_pretty_keys( @pytest.mark.parametrize( - "raw_value, expected_result, should_raise", + "raw_value, expected_result, should_raise, normalizer_class", [ - ("_airbyte_meta", "_airbyte_meta", False), - ("Test_String", "test_string", False), - ("ANOTHER-TEST", "another_test", False), - ("another.test", "another_test", False), - ("sales(%)", "sales___", False), - ("something_-_-_-_else", "something_______else", False), - ("sales (%)", "sales____", False), - ("sales-%", "sales__", False), - ("sales(#)", "sales___", False), - ("sales (#)", "sales____", False), - ("sales--(#)", "sales_____", False), - ("sales-#", "sales__", False), - ("+1", "_1", False), - ("1", "_1", False), - ("2", "_2", False), - ("3", "_3", False), - ("-1", "_1", False), - ("+#$", "", True), - ("+", "", True), - ("", "", True), - ("*", "", True), - ("!@$", "", True), - ("some.col", "some_col", False), + ("_airbyte_meta", "_airbyte_meta", False, LowerCaseNormalizer), + ("Test_String", "test_string", False, LowerCaseNormalizer), + ("ANOTHER-TEST", "another_test", False, LowerCaseNormalizer), + ("another.test", "another_test", False, LowerCaseNormalizer), + ("sales(%)", "sales___", False, LowerCaseNormalizer), + ("something_-_-_-_else", "something_______else", False, LowerCaseNormalizer), + ("sales (%)", "sales____", False, LowerCaseNormalizer), + ("sales-%", "sales__", False, LowerCaseNormalizer), + ("sales(#)", "sales___", False, LowerCaseNormalizer), + ("sales (#)", "sales____", False, LowerCaseNormalizer), + ("sales--(#)", "sales_____", False, LowerCaseNormalizer), + ("sales-#", "sales__", False, LowerCaseNormalizer), + ("+1", "_1", False, LowerCaseNormalizer), + ("1", "_1", False, LowerCaseNormalizer), + ("2", "_2", False, LowerCaseNormalizer), + ("3", "_3", False, LowerCaseNormalizer), + ("-1", "_1", False, LowerCaseNormalizer), + ("+#$", "", True, LowerCaseNormalizer), + ("+", "", True, LowerCaseNormalizer), + ("", "", True, LowerCaseNormalizer), + ("*", "", True, LowerCaseNormalizer), + ("!@$", "", True, LowerCaseNormalizer), + ("some.col", "some_col", False, LowerCaseNormalizer), + # Check that the default normalizer doesn't truncate: + ("a" * 60, "a" * 60, False, LowerCaseNormalizer), + ("a" * 70, "a" * 70, False, LowerCaseNormalizer), + ("a" * 100, "a" * 100, False, LowerCaseNormalizer), + # Check that postgres normalizer truncates to 63 characters: + ("a" * 60, "a" * 60, False, PostgresNormalizer), + ("a" * 70, "a" * 63, False, PostgresNormalizer), + ("a" * 100, "a" * 63, False, PostgresNormalizer), + # Check that postgres also properly inherits special characters and casing: + ("Test.String", "test_string", False, PostgresNormalizer), ], ) def test_lower_case_normalizer( raw_value, expected_result, should_raise, + normalizer_class, ): - normalizer = LowerCaseNormalizer() - + normalizer = normalizer_class() if should_raise: with pytest.raises(exc.PyAirbyteNameNormalizationError): assert normalizer.normalize(raw_value) == expected_result