From e7db5794dfd036bac6ec0f5b8921625490902afa Mon Sep 17 00:00:00 2001 From: Felix Wang Date: Wed, 26 Jan 2022 23:18:15 -0800 Subject: [PATCH 1/2] Raise error for Redshift table names with >127 characters Signed-off-by: Felix Wang --- sdk/python/feast/errors.py | 7 +++++++ sdk/python/feast/infra/utils/aws_utils.py | 18 ++++++++++++++---- .../feature_repos/repo_configuration.py | 2 +- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/sdk/python/feast/errors.py b/sdk/python/feast/errors.py index 8a4f365b44..3fc8c7571e 100644 --- a/sdk/python/feast/errors.py +++ b/sdk/python/feast/errors.py @@ -243,6 +243,13 @@ def __init__(self, details): super().__init__(f"Redshift SQL Query failed to finish. Details: {details}") +class RedshiftTableNameTooLong(Exception): + def __init__(self, table_name: str): + super().__init__( + f"Redshift table names have a maximum length of 127 characters, but the table name {table_name} has length {len(table_name)} characters." + ) + + class EntityTimestampInferenceException(Exception): def __init__(self, expected_column_name: str): super().__init__( diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py index 6211c75e37..b25454ca6a 100644 --- a/sdk/python/feast/infra/utils/aws_utils.py +++ b/sdk/python/feast/infra/utils/aws_utils.py @@ -15,7 +15,11 @@ wait_exponential, ) -from feast.errors import RedshiftCredentialsError, RedshiftQueryError +from feast.errors import ( + RedshiftCredentialsError, + RedshiftQueryError, + RedshiftTableNameTooLong, +) from feast.type_map import pa_to_redshift_value_type try: @@ -28,6 +32,9 @@ raise FeastExtrasDependencyImportError("aws", str(e)) +REDSHIFT_TABLE_NAME_MAX_LENGTH = 127 + + def get_redshift_data_client(aws_region: str): """ Get the Redshift Data API Service client for the given AWS region. @@ -184,7 +191,7 @@ def upload_df_to_redshift( iam_role: str, table_name: str, df: pd.DataFrame, -) -> None: +): """Uploads a Pandas DataFrame to Redshift as a new table. The caller is responsible for deleting the table when no longer necessary. @@ -208,9 +215,12 @@ def upload_df_to_redshift( table_name: The name of the new Redshift table where we copy the dataframe df: The Pandas DataFrame to upload - Returns: None - + Raises: + RedshiftTableNameTooLong: The specified table name is too long. """ + if len(table_name) > REDSHIFT_TABLE_NAME_MAX_LENGTH: + raise RedshiftTableNameTooLong(table_name) + bucket, key = get_bucket_and_key(s3_path) # Drop the index so that we dont have unnecessary columns diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py index e1f4f0317c..f66a92c9d6 100644 --- a/sdk/python/tests/integration/feature_repos/repo_configuration.py +++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py @@ -258,7 +258,7 @@ def construct_test_environment( worker_id: str = "worker_id", ) -> Environment: - _uuid = str(uuid.uuid4()).replace("-", "")[:8] + _uuid = str(uuid.uuid4()).replace("-", "")[:6] run_id = os.getenv("GITHUB_RUN_ID", default=None) run_id = f"gh_run_{run_id}_{_uuid}" if run_id else _uuid From b400483d75b2144d4d88020d223b2166dee26552 Mon Sep 17 00:00:00 2001 From: Felix Wang Date: Wed, 26 Jan 2022 23:19:18 -0800 Subject: [PATCH 2/2] Fix lint error Signed-off-by: Felix Wang --- sdk/python/tests/integration/registration/test_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/tests/integration/registration/test_cli.py b/sdk/python/tests/integration/registration/test_cli.py index 2cf5ccd672..bba12056ce 100644 --- a/sdk/python/tests/integration/registration/test_cli.py +++ b/sdk/python/tests/integration/registration/test_cli.py @@ -174,10 +174,10 @@ def test_nullable_online_store(test_nullable_online_store) -> None: with tempfile.TemporaryDirectory() as repo_dir_name: try: + repo_path = Path(repo_dir_name) feature_store_yaml = make_feature_store_yaml( - project, test_nullable_online_store, repo_dir_name + project, test_nullable_online_store, repo_path ) - repo_path = Path(repo_dir_name) repo_config = repo_path / "feature_store.yaml"