diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 53db9511c0..20d51266c1 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -509,6 +509,10 @@ def augment_schema(dataframe, current_bq_schema): else: detected_mode = field.mode detected_type = _pyarrow_helpers.arrow_scalar_ids_to_bq(arrow_table.type.id) + if detected_type == "NUMERIC" and ( + arrow_table.type.precision > 38 or arrow_table.type.scale > 9 + ): + detected_type = "BIGNUMERIC" if detected_type is None: unknown_type_fields.append(field) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index d470bd9fd0..8a31a81431 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -8745,6 +8745,62 @@ def test_load_table_from_dataframe_with_csv_source_format(self): sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.CSV + @unittest.skipIf( + pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, + "Only `pandas version >=1.0.0` supported", + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_higher_scale_decimal128_datatype(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + from decimal import Decimal + + client = self._make_client() + dataframe = pandas.DataFrame( + { + "x": [ + Decimal("0.12345678901234560000000000000000000000"), + Decimal("01234567890123456789012345678901234567.1234567891"), + ] + } + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock(schema=[SchemaField("x", "BIGNUMERIC", "NULLABLE")]), + ) + + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, location=self.LOCATION + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + size=mock.ANY, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert tuple(sent_config.schema) == ( + SchemaField("x", "BIGNUMERIC", "NULLABLE", None), + ) + def test_load_table_from_json_basic_use(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job