From 8f0b338b07ec6f0aac53bc043413387d36c0f86e Mon Sep 17 00:00:00 2001 From: HemangChothani Date: Thu, 19 Nov 2020 10:54:35 +0530 Subject: [PATCH 1/7] feat: add support of BIGNUMERIC --- google/cloud/bigquery/_helpers.py | 2 ++ google/cloud/bigquery/_pandas_helpers.py | 6 ++++++ google/cloud/bigquery/dbapi/_helpers.py | 11 ++++++++++- google/cloud/bigquery/dbapi/types.py | 2 +- google/cloud/bigquery/query.py | 8 ++++---- google/cloud/bigquery/schema.py | 1 + 6 files changed, 24 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index b59b3d794..4bf920672 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -188,6 +188,7 @@ def _record_from_json(value, field): "FLOAT": _float_from_json, "FLOAT64": _float_from_json, "NUMERIC": _decimal_from_json, + "BIGNUMERIC": _decimal_from_json, "BOOLEAN": _bool_from_json, "BOOL": _bool_from_json, "STRING": _string_from_json, @@ -350,6 +351,7 @@ def _time_to_json(value): "FLOAT": _float_to_json, "FLOAT64": _float_to_json, "NUMERIC": _decimal_to_json, + "BIGNUMERIC": _decimal_from_json, "BOOLEAN": _bool_to_json, "BOOL": _bool_to_json, "BYTES": _bytes_to_json, diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 7774ce26b..36f3ba2af 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -81,6 +81,10 @@ def pyarrow_numeric(): return pyarrow.decimal128(38, 9) +def pyarrow_bignumeric(): + return pyarrow.decimal256(76, 38) + + def pyarrow_time(): return pyarrow.time64("us") @@ -104,6 +108,7 @@ def pyarrow_timestamp(): "INT64": pyarrow.int64, "INTEGER": pyarrow.int64, "NUMERIC": pyarrow_numeric, + "BIGNUMERIC": pyarrow_bignumeric, "STRING": pyarrow.string, "TIME": pyarrow_time, "TIMESTAMP": pyarrow_timestamp, @@ -132,6 +137,7 @@ def pyarrow_timestamp(): pyarrow.decimal128(38, scale=9).id: "NUMERIC", # The exact decimal's scale and precision are not important, as only # the type ID matters, and it's the same for all decimal128 instances. + pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC", } else: # pragma: NO COVER diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index fdf4e17c3..95574a8b7 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -21,6 +21,11 @@ import six +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None + from google.cloud import bigquery from google.cloud.bigquery import table from google.cloud.bigquery.dbapi import exceptions @@ -186,7 +191,11 @@ def bigquery_scalar_type(value): elif isinstance(value, numbers.Real): return "FLOAT64" elif isinstance(value, decimal.Decimal): - return "NUMERIC" + scalar_object = pyarrow.scalar(value) + if isinstance(scalar_object, pyarrow.Decimal128Scalar): + return "NUMERIC" + else: + return "BIGNUMERIC" elif isinstance(value, six.text_type): return "STRING" elif isinstance(value, six.binary_type): diff --git a/google/cloud/bigquery/dbapi/types.py b/google/cloud/bigquery/dbapi/types.py index 14917820c..20eca9b00 100644 --- a/google/cloud/bigquery/dbapi/types.py +++ b/google/cloud/bigquery/dbapi/types.py @@ -78,7 +78,7 @@ def __eq__(self, other): STRING = "STRING" BINARY = _DBAPITypeObject("BYTES", "RECORD", "STRUCT") NUMBER = _DBAPITypeObject( - "INTEGER", "INT64", "FLOAT", "FLOAT64", "NUMERIC", "BOOLEAN", "BOOL" + "INTEGER", "INT64", "FLOAT", "FLOAT64", "NUMERIC", "BIGNUMERIC", "BOOLEAN", "BOOL" ) DATETIME = _DBAPITypeObject("TIMESTAMP", "DATE", "TIME", "DATETIME") ROWID = "ROWID" diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index f2ed6337e..ecec73e99 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -83,7 +83,7 @@ class ScalarQueryParameter(_AbstractQueryParameter): type_ (str): Name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or + 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): @@ -102,7 +102,7 @@ def positional(cls, type_, value): Args: type_ (str): Name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or + 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): @@ -186,7 +186,7 @@ class ArrayQueryParameter(_AbstractQueryParameter): array_type (str): Name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. values (List[appropriate scalar type]): The parameter array values. """ @@ -203,7 +203,7 @@ def positional(cls, array_type, values): Args: array_type (str): Name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. values (List[appropriate scalar type]): The parameter array values. diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 8ae0a3a85..a110400e6 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -32,6 +32,7 @@ "FLOAT": types.StandardSqlDataType.TypeKind.FLOAT64, "FLOAT64": types.StandardSqlDataType.TypeKind.FLOAT64, "NUMERIC": types.StandardSqlDataType.TypeKind.NUMERIC, + "BIGNUMERIC": types.StandardSqlDataType.TypeKind.BIGNUMERIC, "BOOLEAN": types.StandardSqlDataType.TypeKind.BOOL, "BOOL": types.StandardSqlDataType.TypeKind.BOOL, "GEOGRAPHY": types.StandardSqlDataType.TypeKind.GEOGRAPHY, From 7be79b23011cc323b5fc8ecba3943200b60130f4 Mon Sep 17 00:00:00 2001 From: HemangChothani Date: Tue, 22 Dec 2020 19:20:12 +0530 Subject: [PATCH 2/7] feat: add BIGNUMERIC support --- tests/unit/test__pandas_helpers.py | 8 ++++++++ tests/unit/test_dbapi__helpers.py | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index ef0c40e1a..4a1a969c7 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -433,6 +433,14 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", ], ), + ( + "BIGNUMERIC", + [ + decimal.Decimal("-1.123456789012345678901234567890"), + None, + decimal.Decimal("3.141592653589793238462643383279"), + ], + ), ], ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index 08dd6dcfa..72c8f5525 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -41,6 +41,10 @@ def test_scalar_to_query_parameter(self): (-123456789, "INT64"), (1.25, "FLOAT64"), (decimal.Decimal("1.25"), "NUMERIC"), + ( + decimal.Decimal("1.1234567890123456789012345678901234567890"), + "BIGNUMERIC", + ), (b"I am some bytes", "BYTES"), (u"I am a string", "STRING"), (datetime.date(2017, 4, 1), "DATE"), From fcfcd04ed86aaab14ee426af6c5fd59ad1afe573 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 17 Feb 2021 18:56:15 +0100 Subject: [PATCH 3/7] Add bignumeric_type extra --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index ea2df4843..08935e53e 100644 --- a/setup.py +++ b/setup.py @@ -48,6 +48,7 @@ "pyarrow >= 1.0.0, < 4.0dev", ], "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 4.0dev",], + "bignumeric_type": ["pyarrow >= 3.0.0, < 4.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api==0.11b0", From 21ae1238d7ebabcc34fbeacdcb0e53f52e86a7e1 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 18 Feb 2021 13:44:52 +0100 Subject: [PATCH 4/7] Add additional BIGNUMERIC tests --- tests/system/test_client.py | 29 +++++++ tests/unit/test__pandas_helpers.py | 126 ++++++++++++++++++----------- tests/unit/test_dbapi__helpers.py | 1 + tests/unit/test_query.py | 10 +++ 4 files changed, 118 insertions(+), 48 deletions(-) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 60c3b3fa8..371b1b522 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -887,6 +887,7 @@ def test_load_table_from_dataframe_w_nulls(self): bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), @@ -912,6 +913,7 @@ def test_load_table_from_dataframe_w_nulls(self): ("geo_col", nulls), ("int_col", nulls), ("num_col", nulls), + ("bignum_col", nulls), ("str_col", nulls), ("time_col", nulls), ("ts_col", nulls), @@ -999,6 +1001,7 @@ def test_load_table_from_dataframe_w_explicit_schema(self): bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), @@ -1046,6 +1049,14 @@ def test_load_table_from_dataframe_w_explicit_schema(self): decimal.Decimal("99999999999999999999999999999.999999999"), ], ), + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ), ("str_col", [u"abc", None, u"def"]), ( "time_col", @@ -1172,6 +1183,7 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(self): bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), @@ -1210,6 +1222,14 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(self): decimal.Decimal("99999999999999999999999999999.999999999"), ], ), + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ), ("str_col", [u"abc", None, u"def"]), ( "time_col", @@ -2157,6 +2177,10 @@ def test_query_w_query_params(self): pi_numeric_param = ScalarQueryParameter( name="pi_numeric_param", type_="NUMERIC", value=pi_numeric ) + bignum = decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)) + bignum_param = ScalarQueryParameter( + name="bignum_param", type_="BIGNUMERIC", value=bignum + ) truthy = True truthy_param = ScalarQueryParameter(name="truthy", type_="BOOL", value=truthy) beef = b"DEADBEEF" @@ -2237,6 +2261,11 @@ def test_query_w_query_params(self): "expected": pi_numeric, "query_parameters": [pi_numeric_param], }, + { + "sql": "SELECT @bignum_param", + "expected": bignum, + "query_parameters": [bignum_param], + }, { "sql": "SELECT @truthy", "expected": truthy, diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 4a1a969c7..c8c616cf8 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -70,6 +70,15 @@ def is_numeric(type_): )(type_) +def is_bignumeric(type_): + # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#numeric-type + return all_( + pyarrow.types.is_decimal, + lambda type_: type_.precision == 76, + lambda type_: type_.scale == 38, + )(type_) + + def is_timestamp(type_): # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp-type return all_( @@ -120,6 +129,7 @@ def test_all_(): ("FLOAT", "NULLABLE", pyarrow.types.is_float64), ("FLOAT64", "NULLABLE", pyarrow.types.is_float64), ("NUMERIC", "NULLABLE", is_numeric), + ("BIGNUMERIC", "NULLABLE", is_bignumeric), ("BOOLEAN", "NULLABLE", pyarrow.types.is_boolean), ("BOOL", "NULLABLE", pyarrow.types.is_boolean), ("TIMESTAMP", "NULLABLE", is_timestamp), @@ -198,6 +208,11 @@ def test_all_(): "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_numeric(type_.value_type)), ), + ( + "BIGNUMERIC", + "REPEATED", + all_(pyarrow.types.is_list, lambda type_: is_bignumeric(type_.value_type)), + ), ( "BOOLEAN", "REPEATED", @@ -270,13 +285,14 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), - schema.SchemaField("field08", "BOOLEAN"), - schema.SchemaField("field09", "BOOL"), - schema.SchemaField("field10", "TIMESTAMP"), - schema.SchemaField("field11", "DATE"), - schema.SchemaField("field12", "TIME"), - schema.SchemaField("field13", "DATETIME"), - schema.SchemaField("field14", "GEOGRAPHY"), + schema.SchemaField("field08", "BIGNUMERIC"), + schema.SchemaField("field09", "BOOLEAN"), + schema.SchemaField("field10", "BOOL"), + schema.SchemaField("field11", "TIMESTAMP"), + schema.SchemaField("field12", "DATE"), + schema.SchemaField("field13", "TIME"), + schema.SchemaField("field14", "DATETIME"), + schema.SchemaField("field15", "GEOGRAPHY"), ) field = schema.SchemaField("ignored_name", bq_type, mode="NULLABLE", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) @@ -289,13 +305,14 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), pyarrow.field("field07", module_under_test.pyarrow_numeric()), - pyarrow.field("field08", pyarrow.bool_()), + pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), - pyarrow.field("field10", module_under_test.pyarrow_timestamp()), - pyarrow.field("field11", pyarrow.date32()), - pyarrow.field("field12", module_under_test.pyarrow_time()), - pyarrow.field("field13", module_under_test.pyarrow_datetime()), - pyarrow.field("field14", pyarrow.string()), + pyarrow.field("field10", pyarrow.bool_()), + pyarrow.field("field11", module_under_test.pyarrow_timestamp()), + pyarrow.field("field12", pyarrow.date32()), + pyarrow.field("field13", module_under_test.pyarrow_time()), + pyarrow.field("field14", module_under_test.pyarrow_datetime()), + pyarrow.field("field15", pyarrow.string()), ) ) assert pyarrow.types.is_struct(actual) @@ -314,13 +331,14 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), - schema.SchemaField("field08", "BOOLEAN"), - schema.SchemaField("field09", "BOOL"), - schema.SchemaField("field10", "TIMESTAMP"), - schema.SchemaField("field11", "DATE"), - schema.SchemaField("field12", "TIME"), - schema.SchemaField("field13", "DATETIME"), - schema.SchemaField("field14", "GEOGRAPHY"), + schema.SchemaField("field08", "BIGNUMERIC"), + schema.SchemaField("field09", "BOOLEAN"), + schema.SchemaField("field10", "BOOL"), + schema.SchemaField("field11", "TIMESTAMP"), + schema.SchemaField("field12", "DATE"), + schema.SchemaField("field13", "TIME"), + schema.SchemaField("field14", "DATETIME"), + schema.SchemaField("field15", "GEOGRAPHY"), ) field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) @@ -333,13 +351,14 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), pyarrow.field("field07", module_under_test.pyarrow_numeric()), - pyarrow.field("field08", pyarrow.bool_()), + pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), - pyarrow.field("field10", module_under_test.pyarrow_timestamp()), - pyarrow.field("field11", pyarrow.date32()), - pyarrow.field("field12", module_under_test.pyarrow_time()), - pyarrow.field("field13", module_under_test.pyarrow_datetime()), - pyarrow.field("field14", pyarrow.string()), + pyarrow.field("field10", pyarrow.bool_()), + pyarrow.field("field11", module_under_test.pyarrow_timestamp()), + pyarrow.field("field12", pyarrow.date32()), + pyarrow.field("field13", module_under_test.pyarrow_time()), + pyarrow.field("field14", module_under_test.pyarrow_datetime()), + pyarrow.field("field15", pyarrow.string()), ) ) assert pyarrow.types.is_list(actual) @@ -385,6 +404,15 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("999.123456789"), ], ), + ( + "BIGNUMERIC", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + decimal.Decimal("3.141592653589793238462643383279"), + ], + ), ("BOOLEAN", [True, None, False, None]), ("BOOL", [False, None, True, None]), ( @@ -433,14 +461,6 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", ], ), - ( - "BIGNUMERIC", - [ - decimal.Decimal("-1.123456789012345678901234567890"), - None, - decimal.Decimal("3.141592653589793238462643383279"), - ], - ), ], ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @@ -849,13 +869,14 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): schema.SchemaField("field05", "FLOAT", mode="REQUIRED"), schema.SchemaField("field06", "FLOAT64", mode="REQUIRED"), schema.SchemaField("field07", "NUMERIC", mode="REQUIRED"), - schema.SchemaField("field08", "BOOLEAN", mode="REQUIRED"), - schema.SchemaField("field09", "BOOL", mode="REQUIRED"), - schema.SchemaField("field10", "TIMESTAMP", mode="REQUIRED"), - schema.SchemaField("field11", "DATE", mode="REQUIRED"), - schema.SchemaField("field12", "TIME", mode="REQUIRED"), - schema.SchemaField("field13", "DATETIME", mode="REQUIRED"), - schema.SchemaField("field14", "GEOGRAPHY", mode="REQUIRED"), + schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"), + schema.SchemaField("field09", "BOOLEAN", mode="REQUIRED"), + schema.SchemaField("field10", "BOOL", mode="REQUIRED"), + schema.SchemaField("field11", "TIMESTAMP", mode="REQUIRED"), + schema.SchemaField("field12", "DATE", mode="REQUIRED"), + schema.SchemaField("field13", "TIME", mode="REQUIRED"), + schema.SchemaField("field14", "DATETIME", mode="REQUIRED"), + schema.SchemaField("field15", "GEOGRAPHY", mode="REQUIRED"), ) dataframe = pandas.DataFrame( { @@ -866,19 +887,23 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): "field05": [1.25, 9.75], "field06": [-1.75, -3.5], "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")], - "field08": [True, False], - "field09": [False, True], - "field10": [ + "field08": [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + "field09": [True, False], + "field10": [False, True], + "field11": [ datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=pytz.utc), ], - "field11": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)], - "field12": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)], - "field13": [ + "field12": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)], + "field13": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)], + "field14": [ datetime.datetime(1970, 1, 1, 0, 0, 0), datetime.datetime(2012, 12, 21, 9, 7, 42), ], - "field14": [ + "field15": [ "POINT(30 10)", "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", ], @@ -1097,6 +1122,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test): "bytes_field": b"some bytes", "string_field": u"some characters", "numeric_field": decimal.Decimal("123.456"), + "bignumeric_field": decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), } ] ) @@ -1116,6 +1142,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("bytes_field", field_type=None, mode="NULLABLE"), schema.SchemaField("string_field", field_type=None, mode="NULLABLE"), schema.SchemaField("numeric_field", field_type=None, mode="NULLABLE"), + schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), ) with warnings.catch_warnings(record=True) as warned: @@ -1138,6 +1165,9 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("bytes_field", field_type="BYTES", mode="NULLABLE"), schema.SchemaField("string_field", field_type="STRING", mode="NULLABLE"), schema.SchemaField("numeric_field", field_type="NUMERIC", mode="NULLABLE"), + schema.SchemaField( + "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE" + ), ) by_name = operator.attrgetter("name") assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index 2fd0f56f2..fa940f4ea 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -84,6 +84,7 @@ def test_array_to_query_parameter_valid_argument(self): ([123, -456, 0], "INT64"), ([1.25, 2.50], "FLOAT64"), ([decimal.Decimal("1.25")], "NUMERIC"), + ([decimal.Decimal("{d38}.{d38}".format(d38="9" * 38))], "BIGNUMERIC"), ([b"foo", b"bar"], "BYTES"), ([u"foo", u"bar"], "STRING"), ([datetime.date(2017, 4, 1), datetime.date(2018, 4, 1)], "DATE"), diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index cf268daf1..ae2c29d09 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -166,6 +166,16 @@ def test_to_api_repr_w_numeric(self): param = klass.positional(type_="NUMERIC", value="123456789.123456789") self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_bignumeric(self): + big_num_string = "{d38}.{d38}".format(d38="9" * 38) + EXPECTED = { + "parameterType": {"type": "BIGNUMERIC"}, + "parameterValue": {"value": big_num_string}, + } + klass = self._get_target_class() + param = klass.positional(type_="BIGNUMERIC", value=big_num_string) + self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_bool(self): EXPECTED = { "parameterType": {"type": "BOOL"}, From 5a2fc40e14fb848e06d7c4a474b8de0b8f809710 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 18 Feb 2021 14:25:04 +0100 Subject: [PATCH 5/7] Prevent import time error if no BIGNUMERIC support --- google/cloud/bigquery/_pandas_helpers.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index e1357157a..b6526ab20 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -107,7 +107,6 @@ def pyarrow_timestamp(): "INT64": pyarrow.int64, "INTEGER": pyarrow.int64, "NUMERIC": pyarrow_numeric, - "BIGNUMERIC": pyarrow_bignumeric, "STRING": pyarrow.string, "TIME": pyarrow_time, "TIMESTAMP": pyarrow_timestamp, @@ -134,10 +133,13 @@ def pyarrow_timestamp(): pyarrow.binary().id: "BYTES", pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() pyarrow.decimal128(38, scale=9).id: "NUMERIC", + } + + if int(pyarrow.__version__.split(".")[0]) >= 3: + BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric # The exact decimal's scale and precision are not important, as only # the type ID matters, and it's the same for all decimal128 instances. - pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC", - } + ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" else: # pragma: NO COVER BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER From 88a5303638cde66277fa81a3f5494632d2ba1f2f Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 19 Feb 2021 09:17:14 +0100 Subject: [PATCH 6/7] Add/improve a few comments --- google/cloud/bigquery/_pandas_helpers.py | 3 ++- google/cloud/bigquery/dbapi/_helpers.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index b6526ab20..029cb5bca 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -132,13 +132,14 @@ def pyarrow_timestamp(): pyarrow.date64().id: "DATETIME", # because millisecond resolution pyarrow.binary().id: "BYTES", pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() + # The exact scale and precision don't matter, see below. pyarrow.decimal128(38, scale=9).id: "NUMERIC", } if int(pyarrow.__version__.split(".")[0]) >= 3: BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric # The exact decimal's scale and precision are not important, as only - # the type ID matters, and it's the same for all decimal128 instances. + # the type ID matters, and it's the same for all decimal256 instances. ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" else: # pragma: NO COVER diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 0b47377bf..6b36d6e43 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -189,6 +189,7 @@ def bigquery_scalar_type(value): elif isinstance(value, numbers.Real): return "FLOAT64" elif isinstance(value, decimal.Decimal): + # We check for NUMERIC before BIGNUMERIC in order to support pyarrow < 3.0. scalar_object = pyarrow.scalar(value) if isinstance(scalar_object, pyarrow.Decimal128Scalar): return "NUMERIC" From 9af432707b5134d5c1eef471cb9fa210b76fba45 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 19 Feb 2021 09:35:02 +0100 Subject: [PATCH 7/7] Add feature flag for BIGNUMERIC suppport --- google/cloud/bigquery/_pandas_helpers.py | 7 +- setup.py | 1 + tests/system/test_client.py | 159 ++++++++++---------- tests/unit/test__pandas_helpers.py | 180 +++++++++++++---------- tests/unit/test_dbapi__helpers.py | 19 ++- 5 files changed, 208 insertions(+), 158 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 029cb5bca..7ad416e08 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -20,6 +20,7 @@ import queue import warnings +from packaging import version try: import pandas @@ -136,15 +137,19 @@ def pyarrow_timestamp(): pyarrow.decimal128(38, scale=9).id: "NUMERIC", } - if int(pyarrow.__version__.split(".")[0]) >= 3: + if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric # The exact decimal's scale and precision are not important, as only # the type ID matters, and it's the same for all decimal256 instances. ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" + _BIGNUMERIC_SUPPORT = True + else: + _BIGNUMERIC_SUPPORT = False else: # pragma: NO COVER BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER + _BIGNUMERIC_SUPPORT = False # pragma: NO COVER def bq_to_arrow_struct_data_type(field): diff --git a/setup.py b/setup.py index 08935e53e..31b6a3ff7 100644 --- a/setup.py +++ b/setup.py @@ -33,6 +33,7 @@ "proto-plus >= 1.10.0", "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.6.0, < 2.0dev", + "packaging >= 14.3", "protobuf >= 3.12.0", ] extras = { diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 371b1b522..684a42c30 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -65,6 +65,7 @@ from google.api_core.iam import Policy from google.cloud import bigquery from google.cloud import bigquery_v2 +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table @@ -887,11 +888,13 @@ def test_load_table_from_dataframe_w_nulls(self): bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), - bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) + if _BIGNUMERIC_SUPPORT: + scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) + table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED # mode mismatch. See: @@ -903,22 +906,22 @@ def test_load_table_from_dataframe_w_nulls(self): ) num_rows = 100 nulls = [None] * num_rows - df_data = collections.OrderedDict( - [ - ("bool_col", nulls), - ("bytes_col", nulls), - ("date_col", nulls), - ("dt_col", nulls), - ("float_col", nulls), - ("geo_col", nulls), - ("int_col", nulls), - ("num_col", nulls), - ("bignum_col", nulls), - ("str_col", nulls), - ("time_col", nulls), - ("ts_col", nulls), - ] - ) + df_data = [ + ("bool_col", nulls), + ("bytes_col", nulls), + ("date_col", nulls), + ("dt_col", nulls), + ("float_col", nulls), + ("geo_col", nulls), + ("int_col", nulls), + ("num_col", nulls), + ("str_col", nulls), + ("time_col", nulls), + ("ts_col", nulls), + ] + if _BIGNUMERIC_SUPPORT: + df_data.append(("bignum_col", nulls)) + df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) dataset_id = _make_dataset_id("bq_load_test") @@ -1001,11 +1004,13 @@ def test_load_table_from_dataframe_w_explicit_schema(self): bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), - bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) + if _BIGNUMERIC_SUPPORT: + scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) + table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED # mode mismatch. See: @@ -1015,40 +1020,55 @@ def test_load_table_from_dataframe_w_explicit_schema(self): # https://jira.apache.org/jira/browse/ARROW-2587 # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), ) - df_data = collections.OrderedDict( - [ - ("bool_col", [True, None, False]), - ("bytes_col", [b"abc", None, b"def"]), - ( - "date_col", - [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)], - ), - # ( - # "dt_col", - # [ - # datetime.datetime(1, 1, 1, 0, 0, 0), - # None, - # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), - # ], - # ), - ("float_col", [float("-inf"), float("nan"), float("inf")]), - ( - "geo_col", - [ - "POINT(30 10)", - None, - "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", - ], - ), - ("int_col", [-9223372036854775808, None, 9223372036854775807]), - ( - "num_col", - [ - decimal.Decimal("-99999999999999999999999999999.999999999"), - None, - decimal.Decimal("99999999999999999999999999999.999999999"), - ], - ), + + df_data = [ + ("bool_col", [True, None, False]), + ("bytes_col", [b"abc", None, b"def"]), + ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]), + # ( + # "dt_col", + # [ + # datetime.datetime(1, 1, 1, 0, 0, 0), + # None, + # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + # ], + # ), + ("float_col", [float("-inf"), float("nan"), float("inf")]), + ( + "geo_col", + [ + "POINT(30 10)", + None, + "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", + ], + ), + ("int_col", [-9223372036854775808, None, 9223372036854775807]), + ( + "num_col", + [ + decimal.Decimal("-99999999999999999999999999999.999999999"), + None, + decimal.Decimal("99999999999999999999999999999.999999999"), + ], + ), + ("str_col", [u"abc", None, u"def"]), + ( + "time_col", + [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], + ), + ( + "ts_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + None, + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc + ), + ], + ), + ] + if _BIGNUMERIC_SUPPORT: + df_data.append( ( "bignum_col", [ @@ -1056,24 +1076,9 @@ def test_load_table_from_dataframe_w_explicit_schema(self): None, decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), ], - ), - ("str_col", [u"abc", None, u"def"]), - ( - "time_col", - [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], - ), - ( - "ts_col", - [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - None, - datetime.datetime( - 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc - ), - ], - ), - ] - ) + ) + ) + df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) dataset_id = _make_dataset_id("bq_load_test") @@ -2261,11 +2266,6 @@ def test_query_w_query_params(self): "expected": pi_numeric, "query_parameters": [pi_numeric_param], }, - { - "sql": "SELECT @bignum_param", - "expected": bignum, - "query_parameters": [bignum_param], - }, { "sql": "SELECT @truthy", "expected": truthy, @@ -2331,6 +2331,15 @@ def test_query_w_query_params(self): "query_parameters": [with_friends_param], }, ] + if _BIGNUMERIC_SUPPORT: + examples.append( + { + "sql": "SELECT @bignum_param", + "expected": bignum, + "query_parameters": [bignum_param], + } + ) + for example in examples: jconfig = QueryJobConfig() jconfig.query_parameters = example["query_parameters"] diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index c8c616cf8..abd725820 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -39,6 +39,12 @@ from google import api_core from google.cloud.bigquery import schema +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT + + +skip_if_no_bignumeric = pytest.mark.skipif( + not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0", +) @pytest.fixture @@ -129,7 +135,9 @@ def test_all_(): ("FLOAT", "NULLABLE", pyarrow.types.is_float64), ("FLOAT64", "NULLABLE", pyarrow.types.is_float64), ("NUMERIC", "NULLABLE", is_numeric), - ("BIGNUMERIC", "NULLABLE", is_bignumeric), + pytest.param( + "BIGNUMERIC", "NULLABLE", is_bignumeric, marks=skip_if_no_bignumeric, + ), ("BOOLEAN", "NULLABLE", pyarrow.types.is_boolean), ("BOOL", "NULLABLE", pyarrow.types.is_boolean), ("TIMESTAMP", "NULLABLE", is_timestamp), @@ -208,10 +216,11 @@ def test_all_(): "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_numeric(type_.value_type)), ), - ( + pytest.param( "BIGNUMERIC", "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_bignumeric(type_.value_type)), + marks=skip_if_no_bignumeric, ), ( "BOOLEAN", @@ -285,7 +294,6 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), - schema.SchemaField("field08", "BIGNUMERIC"), schema.SchemaField("field09", "BOOLEAN"), schema.SchemaField("field10", "BOOL"), schema.SchemaField("field11", "TIMESTAMP"), @@ -294,27 +302,33 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): schema.SchemaField("field14", "DATETIME"), schema.SchemaField("field15", "GEOGRAPHY"), ) + + if _BIGNUMERIC_SUPPORT: + fields += (schema.SchemaField("field08", "BIGNUMERIC"),) + field = schema.SchemaField("ignored_name", bq_type, mode="NULLABLE", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) - expected = pyarrow.struct( - ( - pyarrow.field("field01", pyarrow.string()), - pyarrow.field("field02", pyarrow.binary()), - pyarrow.field("field03", pyarrow.int64()), - pyarrow.field("field04", pyarrow.int64()), - pyarrow.field("field05", pyarrow.float64()), - pyarrow.field("field06", pyarrow.float64()), - pyarrow.field("field07", module_under_test.pyarrow_numeric()), - pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), - pyarrow.field("field09", pyarrow.bool_()), - pyarrow.field("field10", pyarrow.bool_()), - pyarrow.field("field11", module_under_test.pyarrow_timestamp()), - pyarrow.field("field12", pyarrow.date32()), - pyarrow.field("field13", module_under_test.pyarrow_time()), - pyarrow.field("field14", module_under_test.pyarrow_datetime()), - pyarrow.field("field15", pyarrow.string()), - ) + + expected = ( + pyarrow.field("field01", pyarrow.string()), + pyarrow.field("field02", pyarrow.binary()), + pyarrow.field("field03", pyarrow.int64()), + pyarrow.field("field04", pyarrow.int64()), + pyarrow.field("field05", pyarrow.float64()), + pyarrow.field("field06", pyarrow.float64()), + pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field09", pyarrow.bool_()), + pyarrow.field("field10", pyarrow.bool_()), + pyarrow.field("field11", module_under_test.pyarrow_timestamp()), + pyarrow.field("field12", pyarrow.date32()), + pyarrow.field("field13", module_under_test.pyarrow_time()), + pyarrow.field("field14", module_under_test.pyarrow_datetime()), + pyarrow.field("field15", pyarrow.string()), ) + if _BIGNUMERIC_SUPPORT: + expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) + expected = pyarrow.struct(expected) + assert pyarrow.types.is_struct(actual) assert actual.num_fields == len(fields) assert actual.equals(expected) @@ -331,7 +345,6 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), - schema.SchemaField("field08", "BIGNUMERIC"), schema.SchemaField("field09", "BOOLEAN"), schema.SchemaField("field10", "BOOL"), schema.SchemaField("field11", "TIMESTAMP"), @@ -340,27 +353,33 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): schema.SchemaField("field14", "DATETIME"), schema.SchemaField("field15", "GEOGRAPHY"), ) + + if _BIGNUMERIC_SUPPORT: + fields += (schema.SchemaField("field08", "BIGNUMERIC"),) + field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) - expected_value_type = pyarrow.struct( - ( - pyarrow.field("field01", pyarrow.string()), - pyarrow.field("field02", pyarrow.binary()), - pyarrow.field("field03", pyarrow.int64()), - pyarrow.field("field04", pyarrow.int64()), - pyarrow.field("field05", pyarrow.float64()), - pyarrow.field("field06", pyarrow.float64()), - pyarrow.field("field07", module_under_test.pyarrow_numeric()), - pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), - pyarrow.field("field09", pyarrow.bool_()), - pyarrow.field("field10", pyarrow.bool_()), - pyarrow.field("field11", module_under_test.pyarrow_timestamp()), - pyarrow.field("field12", pyarrow.date32()), - pyarrow.field("field13", module_under_test.pyarrow_time()), - pyarrow.field("field14", module_under_test.pyarrow_datetime()), - pyarrow.field("field15", pyarrow.string()), - ) + + expected = ( + pyarrow.field("field01", pyarrow.string()), + pyarrow.field("field02", pyarrow.binary()), + pyarrow.field("field03", pyarrow.int64()), + pyarrow.field("field04", pyarrow.int64()), + pyarrow.field("field05", pyarrow.float64()), + pyarrow.field("field06", pyarrow.float64()), + pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field09", pyarrow.bool_()), + pyarrow.field("field10", pyarrow.bool_()), + pyarrow.field("field11", module_under_test.pyarrow_timestamp()), + pyarrow.field("field12", pyarrow.date32()), + pyarrow.field("field13", module_under_test.pyarrow_time()), + pyarrow.field("field14", module_under_test.pyarrow_datetime()), + pyarrow.field("field15", pyarrow.string()), ) + if _BIGNUMERIC_SUPPORT: + expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) + expected_value_type = pyarrow.struct(expected) + assert pyarrow.types.is_list(actual) assert pyarrow.types.is_struct(actual.value_type) assert actual.value_type.num_fields == len(fields) @@ -404,7 +423,7 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("999.123456789"), ], ), - ( + pytest.param( "BIGNUMERIC", [ decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), @@ -412,6 +431,7 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), decimal.Decimal("3.141592653589793238462643383279"), ], + marks=skip_if_no_bignumeric, ), ("BOOLEAN", [True, None, False, None]), ("BOOL", [False, None, True, None]), @@ -869,7 +889,6 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): schema.SchemaField("field05", "FLOAT", mode="REQUIRED"), schema.SchemaField("field06", "FLOAT64", mode="REQUIRED"), schema.SchemaField("field07", "NUMERIC", mode="REQUIRED"), - schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"), schema.SchemaField("field09", "BOOLEAN", mode="REQUIRED"), schema.SchemaField("field10", "BOOL", mode="REQUIRED"), schema.SchemaField("field11", "TIMESTAMP", mode="REQUIRED"), @@ -878,37 +897,37 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): schema.SchemaField("field14", "DATETIME", mode="REQUIRED"), schema.SchemaField("field15", "GEOGRAPHY", mode="REQUIRED"), ) - dataframe = pandas.DataFrame( - { - "field01": ["hello", "world"], - "field02": [b"abd", b"efg"], - "field03": [1, 2], - "field04": [3, 4], - "field05": [1.25, 9.75], - "field06": [-1.75, -3.5], - "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")], - "field08": [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ], - "field09": [True, False], - "field10": [False, True], - "field11": [ - datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=pytz.utc), - ], - "field12": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)], - "field13": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)], - "field14": [ - datetime.datetime(1970, 1, 1, 0, 0, 0), - datetime.datetime(2012, 12, 21, 9, 7, 42), - ], - "field15": [ - "POINT(30 10)", - "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", - ], - } - ) + if _BIGNUMERIC_SUPPORT: + bq_schema += (schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"),) + + data = { + "field01": ["hello", "world"], + "field02": [b"abd", b"efg"], + "field03": [1, 2], + "field04": [3, 4], + "field05": [1.25, 9.75], + "field06": [-1.75, -3.5], + "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")], + "field09": [True, False], + "field10": [False, True], + "field11": [ + datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=pytz.utc), + ], + "field12": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)], + "field13": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)], + "field14": [ + datetime.datetime(1970, 1, 1, 0, 0, 0), + datetime.datetime(2012, 12, 21, 9, 7, 42), + ], + "field15": ["POINT(30 10)", "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"], + } + if _BIGNUMERIC_SUPPORT: + data["field08"] = [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ] + dataframe = pandas.DataFrame(data) arrow_table = module_under_test.dataframe_to_arrow(dataframe, bq_schema) arrow_schema = arrow_table.schema @@ -1142,8 +1161,11 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("bytes_field", field_type=None, mode="NULLABLE"), schema.SchemaField("string_field", field_type=None, mode="NULLABLE"), schema.SchemaField("numeric_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), ) + if _BIGNUMERIC_SUPPORT: + current_schema += ( + schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), + ) with warnings.catch_warnings(record=True) as warned: augmented_schema = module_under_test.augment_schema(dataframe, current_schema) @@ -1165,10 +1187,14 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("bytes_field", field_type="BYTES", mode="NULLABLE"), schema.SchemaField("string_field", field_type="STRING", mode="NULLABLE"), schema.SchemaField("numeric_field", field_type="NUMERIC", mode="NULLABLE"), - schema.SchemaField( - "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE" - ), ) + if _BIGNUMERIC_SUPPORT: + expected_schema += ( + schema.SchemaField( + "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE" + ), + ) + by_name = operator.attrgetter("name") assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index fa940f4ea..c28c014d4 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -25,6 +25,7 @@ import google.cloud._helpers from google.cloud.bigquery import table +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions from tests.unit.helpers import _to_pyarrow @@ -39,10 +40,6 @@ def test_scalar_to_query_parameter(self): (-123456789, "INT64"), (1.25, "FLOAT64"), (decimal.Decimal("1.25"), "NUMERIC"), - ( - decimal.Decimal("1.1234567890123456789012345678901234567890"), - "BIGNUMERIC", - ), (b"I am some bytes", "BYTES"), (u"I am a string", "STRING"), (datetime.date(2017, 4, 1), "DATE"), @@ -55,6 +52,14 @@ def test_scalar_to_query_parameter(self): "TIMESTAMP", ), ] + if _BIGNUMERIC_SUPPORT: + expected_types.append( + ( + decimal.Decimal("1.1234567890123456789012345678901234567890"), + "BIGNUMERIC", + ) + ) + for value, expected_type in expected_types: msg = "value: {} expected_type: {}".format(value, expected_type) parameter = _helpers.scalar_to_query_parameter(value) @@ -84,7 +89,6 @@ def test_array_to_query_parameter_valid_argument(self): ([123, -456, 0], "INT64"), ([1.25, 2.50], "FLOAT64"), ([decimal.Decimal("1.25")], "NUMERIC"), - ([decimal.Decimal("{d38}.{d38}".format(d38="9" * 38))], "BIGNUMERIC"), ([b"foo", b"bar"], "BYTES"), ([u"foo", u"bar"], "STRING"), ([datetime.date(2017, 4, 1), datetime.date(2018, 4, 1)], "DATE"), @@ -109,6 +113,11 @@ def test_array_to_query_parameter_valid_argument(self): ), ] + if _BIGNUMERIC_SUPPORT: + expected_types.append( + ([decimal.Decimal("{d38}.{d38}".format(d38="9" * 38))], "BIGNUMERIC") + ) + for values, expected_type in expected_types: msg = "value: {} expected_type: {}".format(values, expected_type) parameter = _helpers.array_to_query_parameter(values)