From eb32133f546d1f6e740900010bd9eb16e46e9fe7 Mon Sep 17 00:00:00 2001 From: Antonio Rivero Date: Thu, 3 Nov 2022 09:30:11 -0300 Subject: [PATCH 1/5] BigQuery: - Add new method to parse unknown exception in BigQuery engines. This method is going to shed some light so we can show correct messages later using our regex --- superset/db_engine_specs/base.py | 12 ++++++ superset/db_engine_specs/bigquery.py | 4 ++ .../db_engine_specs/test_bigquery.py | 37 +++++++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py index 55a026541ade3..22e52d3057a12 100644 --- a/superset/db_engine_specs/base.py +++ b/superset/db_engine_specs/base.py @@ -430,6 +430,15 @@ def get_dbapi_exception_mapping(cls) -> Dict[Type[Exception], Type[Exception]]: """ return {} + @classmethod + def parse_error_exception(cls, exception_message: str) -> str: + """ + Each engine can implement and converge its own specific parser method + + :return: A parsed string off the original exception + """ + return exception_message + @classmethod def get_dbapi_mapped_exception(cls, exception: Exception) -> Exception: """ @@ -443,6 +452,9 @@ def get_dbapi_mapped_exception(cls, exception: Exception) -> Exception: """ new_exception = cls.get_dbapi_exception_mapping().get(type(exception)) if not new_exception: + # We are interested in just BigQuery exceptions + if cls.engine == "bigquery": + logger.error(cls.parse_error_exception(str(exception)), exc_info=True) return exception return new_exception(str(exception)) diff --git a/superset/db_engine_specs/bigquery.py b/superset/db_engine_specs/bigquery.py index ffeff31b17787..e5d99a72bafe9 100644 --- a/superset/db_engine_specs/bigquery.py +++ b/superset/db_engine_specs/bigquery.py @@ -578,3 +578,7 @@ def _get_fields(cls, cols: List[Dict[str, Any]]) -> List[Any]: "author__name" and "author__email", respectively. """ return [column(c["name"]).label(c["name"].replace(".", "__")) for c in cols] + + @classmethod + def parse_error_exception(cls, exception_message: str) -> str: + return exception_message.rsplit("\n")[0] diff --git a/tests/unit_tests/db_engine_specs/test_bigquery.py b/tests/unit_tests/db_engine_specs/test_bigquery.py index 8c33489faf598..86ac06d476b88 100644 --- a/tests/unit_tests/db_engine_specs/test_bigquery.py +++ b/tests/unit_tests/db_engine_specs/test_bigquery.py @@ -244,3 +244,40 @@ def test_mask_encrypted_extra_when_empty() -> None: from superset.db_engine_specs.bigquery import BigQueryEngineSpec assert BigQueryEngineSpec.mask_encrypted_extra(None) is None + + +def test_parse_error_message() -> None: + """ + Test that we parse a received message and just extract the useful information. + + Example errors: + 400 Syntax error: Expected "(" or keyword UNNEST but got "@" at [4:80] + + (job ID: 60c732c3-4b4e-4da6-9988-18ba20d389ee) + + -----Query Job SQL Follows----- + + | . | . | . | . | . | . | . | . | . | . | . | . | . | . | . | + 1:SELECT count(*) AS `count_1` + 2:FROM (SELECT `bigquery-public-data.census_bureau_acs.blockgroup_2010_5yr`.`geo_id` AS `bigquery_public_data_census_bureau_acs_blockgroup_2010_5yr_geo_id` + 3:FROM `bigquery-public-data.census_bureau_acs.blockgroup_2010_5yr` + 4:WHERE `bigquery-public-data.census_bureau_acs.blockgroup_2010_5yr`.`geo_id` IN @`geo_id_1`) AS `anon_1` + | . | . | . | . | . | . | . | . | . | . | . | . | . | . | . | + + + bigquery error: 400 Table \"case_detail_all_suites\" must be qualified with a dataset (e.g. dataset.table). + + (job ID: ddf30b05-44e8-4fbf-aa29-40bfccaed886) + -----Query Job SQL Follows----- + | . | . | . |\n 1:select * from case_detail_all_suites\n 2:LIMIT 1001\n | . | . | . | + """ + from superset.db_engine_specs.bigquery import BigQueryEngineSpec + + message = 'bigquery error: 400 Table "case_detail_all_suites" must be qualified with a dataset (e.g. dataset.table).\n\n(job ID: ddf30b05-44e8-4fbf-aa29-40bfccaed886)\n\n -----Query Job SQL Follows----- \n\n | . | . | . |\n 1:select * from case_detail_all_suites\n 2:LIMIT 1001\n | . | . | . |' + message_2 = '400 Syntax error: Expected "(" or keyword UNNEST but got "@" at [4:80]\n\n(job ID: 60c732c3-4b4e-4da6-9988-18ba20d389ee)\n\n -----Query Job SQL Follows----- \n\n | . | . | . | . | . | . | . | . | . | . | . | . | . | . | . |\n\n 1:SELECT count(*) AS `count_1` \n\n 2:FROM (SELECT `bigquery-public-data.census_bureau_acs.blockgroup_2010_5yr`.`geo_id` AS `bigquery_public_data_census_bureau_acs_blockgroup_2010_5yr_geo_id` \n\n 3:FROM `bigquery-public-data.census_bureau_acs.blockgroup_2010_5yr` \n\n 4:WHERE `bigquery-public-data.census_bureau_acs.blockgroup_2010_5yr`.`geo_id` IN @`geo_id_1`) AS `anon_1`' + expected_result = 'bigquery error: 400 Table "case_detail_all_suites" must be qualified with a dataset (e.g. dataset.table).' + expected_result_2 = ( + '400 Syntax error: Expected "(" or keyword UNNEST but got "@" at [4:80]' + ) + assert BigQueryEngineSpec.parse_error_exception(message) == expected_result + assert BigQueryEngineSpec.parse_error_exception(message_2) == expected_result_2 From fcf0bf88736ed2ab378da6cc8d3c9de43a3e565b Mon Sep 17 00:00:00 2001 From: Antonio Rivero Date: Thu, 3 Nov 2022 14:31:09 -0300 Subject: [PATCH 2/5] BigQuery: - Use new EngineType enum instead of raw string - Add better exception handling for new parse_error_exception method in case anything fails during the parsing - Add test to back the exception handling --- superset/db_engine_specs/base.py | 4 ++-- superset/db_engine_specs/bigquery.py | 7 +++++- superset/utils/core.py | 4 ++++ .../db_engine_specs/test_bigquery.py | 22 +++++++++++++++++++ 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py index 22e52d3057a12..2baba04847032 100644 --- a/superset/db_engine_specs/base.py +++ b/superset/db_engine_specs/base.py @@ -64,7 +64,7 @@ from superset.sql_parse import ParsedQuery, Table from superset.superset_typing import ResultSetColumnType from superset.utils import core as utils -from superset.utils.core import ColumnSpec, GenericDataType, get_username +from superset.utils.core import ColumnSpec, EngineType, GenericDataType, get_username from superset.utils.hashing import md5_sha_from_str from superset.utils.network import is_hostname_valid, is_port_open @@ -453,7 +453,7 @@ def get_dbapi_mapped_exception(cls, exception: Exception) -> Exception: new_exception = cls.get_dbapi_exception_mapping().get(type(exception)) if not new_exception: # We are interested in just BigQuery exceptions - if cls.engine == "bigquery": + if cls.engine == EngineType.BIGQUERY: logger.error(cls.parse_error_exception(str(exception)), exc_info=True) return exception return new_exception(str(exception)) diff --git a/superset/db_engine_specs/bigquery.py b/superset/db_engine_specs/bigquery.py index e5d99a72bafe9..a11d7397f0e5e 100644 --- a/superset/db_engine_specs/bigquery.py +++ b/superset/db_engine_specs/bigquery.py @@ -581,4 +581,9 @@ def _get_fields(cls, cols: List[Dict[str, Any]]) -> List[Any]: @classmethod def parse_error_exception(cls, exception_message: str) -> str: - return exception_message.rsplit("\n")[0] + try: + return exception_message.rsplit("\n")[0] + except Exception: # pylint: disable=broad-except + # If for some reason we get an exception, for example, no new line + # We will return the original exception_message + return exception_message diff --git a/superset/utils/core.py b/superset/utils/core.py index cd992250ee7d6..ff10cd9ca0fc8 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -190,6 +190,10 @@ class DatasourceType(str, Enum): VIEW = "view" +class EngineType(str, Enum): + BIGQUERY = "bigquery" + + class HeaderDataType(TypedDict): notification_format: str owners: List[int] diff --git a/tests/unit_tests/db_engine_specs/test_bigquery.py b/tests/unit_tests/db_engine_specs/test_bigquery.py index 86ac06d476b88..3466606a61885 100644 --- a/tests/unit_tests/db_engine_specs/test_bigquery.py +++ b/tests/unit_tests/db_engine_specs/test_bigquery.py @@ -281,3 +281,25 @@ def test_parse_error_message() -> None: ) assert BigQueryEngineSpec.parse_error_exception(message) == expected_result assert BigQueryEngineSpec.parse_error_exception(message_2) == expected_result_2 + + +def test_parse_error_raises_exception() -> None: + """ + Test that we handle any exception we might get from calling the parse_error_exception method. + + Example errors: + 400 Syntax error: Expected "(" or keyword UNNEST but got "@" at [4:80] + bigquery error: 400 Table \"case_detail_all_suites\" must be qualified with a dataset (e.g. dataset.table). + """ + from superset.db_engine_specs.bigquery import BigQueryEngineSpec + + message = 'bigquery error: 400 Table "case_detail_all_suites" must be qualified with a dataset (e.g. dataset.table).' + message_2 = '400 Syntax error: Expected "(" or keyword UNNEST but got "@" at [4:80]' + message_3 = "6" + expected_result = 'bigquery error: 400 Table "case_detail_all_suites" must be qualified with a dataset (e.g. dataset.table).' + expected_result_2 = ( + '400 Syntax error: Expected "(" or keyword UNNEST but got "@" at [4:80]' + ) + assert BigQueryEngineSpec.parse_error_exception(message) == expected_result + assert BigQueryEngineSpec.parse_error_exception(message_2) == expected_result_2 + assert BigQueryEngineSpec.parse_error_exception(message_3) == "6" From 7cadba9bbb50c4e3f196577305807e9b3752e969 Mon Sep 17 00:00:00 2001 From: Antonio Rivero Date: Mon, 7 Nov 2022 16:27:38 -0300 Subject: [PATCH 3/5] BigQuery: - parse_error_exception returns an exception with the parsed message, because of that we can skip the compare with Bigquery and instead just make use of the existing logger upstream (sql_lab) --- superset/db_engine_specs/base.py | 13 +++++------ superset/db_engine_specs/bigquery.py | 10 +++++---- superset/utils/core.py | 4 ---- .../db_engine_specs/test_bigquery.py | 22 ++++++++++++++----- 4 files changed, 28 insertions(+), 21 deletions(-) diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py index 2baba04847032..cd1651648c58a 100644 --- a/superset/db_engine_specs/base.py +++ b/superset/db_engine_specs/base.py @@ -64,7 +64,7 @@ from superset.sql_parse import ParsedQuery, Table from superset.superset_typing import ResultSetColumnType from superset.utils import core as utils -from superset.utils.core import ColumnSpec, EngineType, GenericDataType, get_username +from superset.utils.core import ColumnSpec, GenericDataType, get_username from superset.utils.hashing import md5_sha_from_str from superset.utils.network import is_hostname_valid, is_port_open @@ -431,13 +431,13 @@ def get_dbapi_exception_mapping(cls) -> Dict[Type[Exception], Type[Exception]]: return {} @classmethod - def parse_error_exception(cls, exception_message: str) -> str: + def parse_error_exception(cls, exception: Exception) -> Exception: """ Each engine can implement and converge its own specific parser method - :return: A parsed string off the original exception + :return: An Exception with a parsed string off the original exception """ - return exception_message + return exception @classmethod def get_dbapi_mapped_exception(cls, exception: Exception) -> Exception: @@ -452,10 +452,7 @@ def get_dbapi_mapped_exception(cls, exception: Exception) -> Exception: """ new_exception = cls.get_dbapi_exception_mapping().get(type(exception)) if not new_exception: - # We are interested in just BigQuery exceptions - if cls.engine == EngineType.BIGQUERY: - logger.error(cls.parse_error_exception(str(exception)), exc_info=True) - return exception + return cls.parse_error_exception(exception) return new_exception(str(exception)) @classmethod diff --git a/superset/db_engine_specs/bigquery.py b/superset/db_engine_specs/bigquery.py index a11d7397f0e5e..6f10d75fcd45a 100644 --- a/superset/db_engine_specs/bigquery.py +++ b/superset/db_engine_specs/bigquery.py @@ -580,10 +580,12 @@ def _get_fields(cls, cols: List[Dict[str, Any]]) -> List[Any]: return [column(c["name"]).label(c["name"].replace(".", "__")) for c in cols] @classmethod - def parse_error_exception(cls, exception_message: str) -> str: + def parse_error_exception(cls, exception: Exception) -> Exception: try: - return exception_message.rsplit("\n")[0] + return Exception( + str(exception).rsplit("\n")[0] # pylint: disable=use-maxsplit-arg + ) except Exception: # pylint: disable=broad-except # If for some reason we get an exception, for example, no new line - # We will return the original exception_message - return exception_message + # We will return the original exception + return exception diff --git a/superset/utils/core.py b/superset/utils/core.py index ff10cd9ca0fc8..cd992250ee7d6 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -190,10 +190,6 @@ class DatasourceType(str, Enum): VIEW = "view" -class EngineType(str, Enum): - BIGQUERY = "bigquery" - - class HeaderDataType(TypedDict): notification_format: str owners: List[int] diff --git a/tests/unit_tests/db_engine_specs/test_bigquery.py b/tests/unit_tests/db_engine_specs/test_bigquery.py index 3466606a61885..ec6aaff3ba93d 100644 --- a/tests/unit_tests/db_engine_specs/test_bigquery.py +++ b/tests/unit_tests/db_engine_specs/test_bigquery.py @@ -279,8 +279,14 @@ def test_parse_error_message() -> None: expected_result_2 = ( '400 Syntax error: Expected "(" or keyword UNNEST but got "@" at [4:80]' ) - assert BigQueryEngineSpec.parse_error_exception(message) == expected_result - assert BigQueryEngineSpec.parse_error_exception(message_2) == expected_result_2 + assert ( + str(BigQueryEngineSpec.parse_error_exception(Exception(message))) + == expected_result + ) + assert ( + str(BigQueryEngineSpec.parse_error_exception(Exception(message_2))) + == expected_result_2 + ) def test_parse_error_raises_exception() -> None: @@ -300,6 +306,12 @@ def test_parse_error_raises_exception() -> None: expected_result_2 = ( '400 Syntax error: Expected "(" or keyword UNNEST but got "@" at [4:80]' ) - assert BigQueryEngineSpec.parse_error_exception(message) == expected_result - assert BigQueryEngineSpec.parse_error_exception(message_2) == expected_result_2 - assert BigQueryEngineSpec.parse_error_exception(message_3) == "6" + assert ( + str(BigQueryEngineSpec.parse_error_exception(Exception(message))) + == expected_result + ) + assert ( + str(BigQueryEngineSpec.parse_error_exception(Exception(message_2))) + == expected_result_2 + ) + assert str(BigQueryEngineSpec.parse_error_exception(Exception(message_3))) == "6" From 5c8bf8e86a59c37f822aa3442ce69b1b8322a0ad Mon Sep 17 00:00:00 2001 From: Antonio Rivero Date: Mon, 7 Nov 2022 19:27:12 -0300 Subject: [PATCH 4/5] BigQuery: - Update our method and tests so bigquery error: portion doesn't appear in the message --- superset/db_engine_specs/bigquery.py | 5 ++- .../db_engine_specs/test_bigquery.py | 38 ++----------------- 2 files changed, 8 insertions(+), 35 deletions(-) diff --git a/superset/db_engine_specs/bigquery.py b/superset/db_engine_specs/bigquery.py index 6f10d75fcd45a..3a93f0031bb1d 100644 --- a/superset/db_engine_specs/bigquery.py +++ b/superset/db_engine_specs/bigquery.py @@ -583,7 +583,10 @@ def _get_fields(cls, cols: List[Dict[str, Any]]) -> List[Any]: def parse_error_exception(cls, exception: Exception) -> Exception: try: return Exception( - str(exception).rsplit("\n")[0] # pylint: disable=use-maxsplit-arg + str(exception) # pylint: disable=use-maxsplit-arg + .rsplit("\n")[0] + .rsplit(":")[1] + .strip() ) except Exception: # pylint: disable=broad-except # If for some reason we get an exception, for example, no new line diff --git a/tests/unit_tests/db_engine_specs/test_bigquery.py b/tests/unit_tests/db_engine_specs/test_bigquery.py index ec6aaff3ba93d..4a5c741531dd7 100644 --- a/tests/unit_tests/db_engine_specs/test_bigquery.py +++ b/tests/unit_tests/db_engine_specs/test_bigquery.py @@ -251,20 +251,6 @@ def test_parse_error_message() -> None: Test that we parse a received message and just extract the useful information. Example errors: - 400 Syntax error: Expected "(" or keyword UNNEST but got "@" at [4:80] - - (job ID: 60c732c3-4b4e-4da6-9988-18ba20d389ee) - - -----Query Job SQL Follows----- - - | . | . | . | . | . | . | . | . | . | . | . | . | . | . | . | - 1:SELECT count(*) AS `count_1` - 2:FROM (SELECT `bigquery-public-data.census_bureau_acs.blockgroup_2010_5yr`.`geo_id` AS `bigquery_public_data_census_bureau_acs_blockgroup_2010_5yr_geo_id` - 3:FROM `bigquery-public-data.census_bureau_acs.blockgroup_2010_5yr` - 4:WHERE `bigquery-public-data.census_bureau_acs.blockgroup_2010_5yr`.`geo_id` IN @`geo_id_1`) AS `anon_1` - | . | . | . | . | . | . | . | . | . | . | . | . | . | . | . | - - bigquery error: 400 Table \"case_detail_all_suites\" must be qualified with a dataset (e.g. dataset.table). (job ID: ddf30b05-44e8-4fbf-aa29-40bfccaed886) @@ -274,19 +260,11 @@ def test_parse_error_message() -> None: from superset.db_engine_specs.bigquery import BigQueryEngineSpec message = 'bigquery error: 400 Table "case_detail_all_suites" must be qualified with a dataset (e.g. dataset.table).\n\n(job ID: ddf30b05-44e8-4fbf-aa29-40bfccaed886)\n\n -----Query Job SQL Follows----- \n\n | . | . | . |\n 1:select * from case_detail_all_suites\n 2:LIMIT 1001\n | . | . | . |' - message_2 = '400 Syntax error: Expected "(" or keyword UNNEST but got "@" at [4:80]\n\n(job ID: 60c732c3-4b4e-4da6-9988-18ba20d389ee)\n\n -----Query Job SQL Follows----- \n\n | . | . | . | . | . | . | . | . | . | . | . | . | . | . | . |\n\n 1:SELECT count(*) AS `count_1` \n\n 2:FROM (SELECT `bigquery-public-data.census_bureau_acs.blockgroup_2010_5yr`.`geo_id` AS `bigquery_public_data_census_bureau_acs_blockgroup_2010_5yr_geo_id` \n\n 3:FROM `bigquery-public-data.census_bureau_acs.blockgroup_2010_5yr` \n\n 4:WHERE `bigquery-public-data.census_bureau_acs.blockgroup_2010_5yr`.`geo_id` IN @`geo_id_1`) AS `anon_1`' - expected_result = 'bigquery error: 400 Table "case_detail_all_suites" must be qualified with a dataset (e.g. dataset.table).' - expected_result_2 = ( - '400 Syntax error: Expected "(" or keyword UNNEST but got "@" at [4:80]' - ) + expected_result = '400 Table "case_detail_all_suites" must be qualified with a dataset (e.g. dataset.table).' assert ( str(BigQueryEngineSpec.parse_error_exception(Exception(message))) == expected_result ) - assert ( - str(BigQueryEngineSpec.parse_error_exception(Exception(message_2))) - == expected_result_2 - ) def test_parse_error_raises_exception() -> None: @@ -300,18 +278,10 @@ def test_parse_error_raises_exception() -> None: from superset.db_engine_specs.bigquery import BigQueryEngineSpec message = 'bigquery error: 400 Table "case_detail_all_suites" must be qualified with a dataset (e.g. dataset.table).' - message_2 = '400 Syntax error: Expected "(" or keyword UNNEST but got "@" at [4:80]' - message_3 = "6" - expected_result = 'bigquery error: 400 Table "case_detail_all_suites" must be qualified with a dataset (e.g. dataset.table).' - expected_result_2 = ( - '400 Syntax error: Expected "(" or keyword UNNEST but got "@" at [4:80]' - ) + message_2 = "6" + expected_result = '400 Table "case_detail_all_suites" must be qualified with a dataset (e.g. dataset.table).' assert ( str(BigQueryEngineSpec.parse_error_exception(Exception(message))) == expected_result ) - assert ( - str(BigQueryEngineSpec.parse_error_exception(Exception(message_2))) - == expected_result_2 - ) - assert str(BigQueryEngineSpec.parse_error_exception(Exception(message_3))) == "6" + assert str(BigQueryEngineSpec.parse_error_exception(Exception(message_2))) == "6" From afa72000745fc696b058eb827b8f7963149dd144 Mon Sep 17 00:00:00 2001 From: Antonio Rivero Date: Tue, 8 Nov 2022 22:27:28 -0300 Subject: [PATCH 5/5] BigQuery: - Use splitlines instead and avoid ignoring pylint rules --- superset/db_engine_specs/bigquery.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/superset/db_engine_specs/bigquery.py b/superset/db_engine_specs/bigquery.py index 3a93f0031bb1d..373fc2f747375 100644 --- a/superset/db_engine_specs/bigquery.py +++ b/superset/db_engine_specs/bigquery.py @@ -582,12 +582,7 @@ def _get_fields(cls, cols: List[Dict[str, Any]]) -> List[Any]: @classmethod def parse_error_exception(cls, exception: Exception) -> Exception: try: - return Exception( - str(exception) # pylint: disable=use-maxsplit-arg - .rsplit("\n")[0] - .rsplit(":")[1] - .strip() - ) + return Exception(str(exception).splitlines()[0].rsplit(":")[1].strip()) except Exception: # pylint: disable=broad-except # If for some reason we get an exception, for example, no new line # We will return the original exception