Skip to content

Commit 9f10541

Browse files
authored
build: update google-cloud-bigquery to 3.31.0 to support JSON when allow_large_results=False (#1547)
1 parent 0a4e245 commit 9f10541

File tree

8 files changed

+44
-123
lines changed

8 files changed

+44
-123
lines changed

setup.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
"google-auth >=2.15.0,<3.0dev",
4242
"google-cloud-bigtable >=2.24.0",
4343
"google-cloud-pubsub >=2.21.4",
44-
"google-cloud-bigquery[bqstorage,pandas] >=3.18.0",
44+
"google-cloud-bigquery[bqstorage,pandas] >=3.31.0",
4545
"google-cloud-functions >=1.12.0",
4646
"google-cloud-bigquery-connection >=1.12.0",
4747
"google-cloud-iam >=2.12.1",
@@ -51,7 +51,7 @@
5151
"jellyfish >=0.8.9,<1.1.2",
5252
"numpy >=1.24.0",
5353
"pandas >=1.5.3",
54-
"pandas-gbq >=0.26.0",
54+
"pandas-gbq >=0.26.1",
5555
"pyarrow >=15.0.2",
5656
"pydata-google-auth >=1.8.2",
5757
"requests >=2.27.1",

testing/constraints-3.9.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ geopandas==0.12.2
66
google-auth==2.15.0
77
google-cloud-bigtable==2.24.0
88
google-cloud-pubsub==2.21.4
9-
google-cloud-bigquery==3.18.0
9+
google-cloud-bigquery==3.31.0
1010
google-cloud-functions==1.12.0
1111
google-cloud-bigquery-connection==1.12.0
1212
google-cloud-iam==2.12.1
@@ -15,7 +15,7 @@ google-cloud-storage==2.0.0
1515
jellyfish==0.8.9
1616
numpy==1.24.0
1717
pandas==1.5.3
18-
pandas-gbq==0.26.0
18+
pandas-gbq==0.26.1
1919
pyarrow==15.0.2
2020
pydata-google-auth==1.8.2
2121
requests==2.27.1

tests/system/small/bigquery/test_json.py

+11-55
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,7 @@ def test_json_set_at_json_path(json_path, expected_json):
3636
actual = bbq.json_set(s, json_path_value_pairs=[(json_path, 10)])
3737
expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)
3838

39-
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
40-
pd.testing.assert_series_equal(
41-
actual.to_pandas(allow_large_results=True),
42-
expected.to_pandas(allow_large_results=True),
43-
)
39+
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
4440

4541

4642
@pytest.mark.parametrize(
@@ -60,11 +56,7 @@ def test_json_set_at_json_value_type(json_value, expected_json):
6056
actual = bbq.json_set(s, json_path_value_pairs=[("$.a.b", json_value)])
6157
expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)
6258

63-
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
64-
pd.testing.assert_series_equal(
65-
actual.to_pandas(allow_large_results=True),
66-
expected.to_pandas(allow_large_results=True),
67-
)
59+
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
6860

6961

7062
def test_json_set_w_more_pairs():
@@ -77,11 +69,7 @@ def test_json_set_w_more_pairs():
7769
expected_json = ['{"a": 3, "b": 2}', '{"a": 4, "b": 2}', '{"a": 5, "b": 2, "c": 1}']
7870
expected = bpd.Series(expected_json, dtype=dtypes.JSON_DTYPE)
7971

80-
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
81-
pd.testing.assert_series_equal(
82-
actual.to_pandas(allow_large_results=True),
83-
expected.to_pandas(allow_large_results=True),
84-
)
72+
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
8573

8674

8775
def test_json_set_w_invalid_value_type():
@@ -114,11 +102,7 @@ def test_json_extract_from_json():
114102
actual = bbq.json_extract(s, "$.a.b")
115103
expected = bpd.Series(["[1, 2]", None, "0"], dtype=dtypes.JSON_DTYPE)
116104

117-
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
118-
pd.testing.assert_series_equal(
119-
actual.to_pandas(allow_large_results=True),
120-
expected.to_pandas(allow_large_results=True),
121-
)
105+
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
122106

123107

124108
def test_json_extract_from_string():
@@ -129,11 +113,7 @@ def test_json_extract_from_string():
129113
actual = bbq.json_extract(s, "$.a.b")
130114
expected = bpd.Series(["[1,2]", None, "0"], dtype=pd.StringDtype(storage="pyarrow"))
131115

132-
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
133-
pd.testing.assert_series_equal(
134-
actual.to_pandas(allow_large_results=True),
135-
expected.to_pandas(allow_large_results=True),
136-
)
116+
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
137117

138118

139119
def test_json_extract_w_invalid_series_type():
@@ -165,11 +145,7 @@ def test_json_extract_array_from_json():
165145
expected.index.name = None
166146
expected.name = None
167147

168-
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
169-
pd.testing.assert_series_equal(
170-
actual.to_pandas(allow_large_results=True),
171-
expected.to_pandas(allow_large_results=True),
172-
)
148+
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
173149

174150

175151
def test_json_extract_array_from_json_strings():
@@ -183,11 +159,7 @@ def test_json_extract_array_from_json_strings():
183159
dtype=pd.ArrowDtype(pa.list_(pa.string())),
184160
)
185161

186-
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
187-
pd.testing.assert_series_equal(
188-
actual.to_pandas(allow_large_results=True),
189-
expected.to_pandas(allow_large_results=True),
190-
)
162+
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
191163

192164

193165
def test_json_extract_array_from_json_array_strings():
@@ -201,11 +173,7 @@ def test_json_extract_array_from_json_array_strings():
201173
dtype=pd.ArrowDtype(pa.list_(pa.string())),
202174
)
203175

204-
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
205-
pd.testing.assert_series_equal(
206-
actual.to_pandas(allow_large_results=True),
207-
expected.to_pandas(allow_large_results=True),
208-
)
176+
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
209177

210178

211179
def test_json_extract_array_w_invalid_series_type():
@@ -219,35 +187,23 @@ def test_json_extract_string_array_from_json_strings():
219187
actual = bbq.json_extract_string_array(s, "$.a")
220188
expected = bpd.Series([["ab", "2", "3 xy"], [], ["4", "5"]])
221189

222-
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
223-
pd.testing.assert_series_equal(
224-
actual.to_pandas(allow_large_results=True),
225-
expected.to_pandas(allow_large_results=True),
226-
)
190+
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
227191

228192

229193
def test_json_extract_string_array_from_array_strings():
230194
s = bpd.Series(["[1, 2, 3]", "[]", "[4,5]"])
231195
actual = bbq.json_extract_string_array(s)
232196
expected = bpd.Series([["1", "2", "3"], [], ["4", "5"]])
233197

234-
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
235-
pd.testing.assert_series_equal(
236-
actual.to_pandas(allow_large_results=True),
237-
expected.to_pandas(allow_large_results=True),
238-
)
198+
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
239199

240200

241201
def test_json_extract_string_array_as_float_array_from_array_strings():
242202
s = bpd.Series(["[1, 2.5, 3]", "[]", "[4,5]"])
243203
actual = bbq.json_extract_string_array(s, value_dtype=dtypes.FLOAT_DTYPE)
244204
expected = bpd.Series([[1, 2.5, 3], [], [4, 5]])
245205

246-
# TODO(b/401630655): JSON is not compatible with allow_large_results=False
247-
pd.testing.assert_series_equal(
248-
actual.to_pandas(allow_large_results=True),
249-
expected.to_pandas(allow_large_results=True),
250-
)
206+
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
251207

252208

253209
def test_json_extract_string_array_w_invalid_series_type():

tests/system/small/blob/test_properties.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,7 @@ def test_blob_version(images_mm_df: bpd.DataFrame):
5555

5656

5757
def test_blob_metadata(images_mm_df: bpd.DataFrame):
58-
# allow_large_result=False incompatible with json b/401630655
59-
with bigframes.option_context(
60-
"bigquery.allow_large_results", True, "experiments.blob", True
61-
):
58+
with bigframes.option_context("experiments.blob", True):
6259
actual = images_mm_df["blob_col"].blob.metadata().to_pandas()
6360
expected = pd.Series(
6461
[

tests/system/small/ml/test_llm.py

-7
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,6 @@
2424
from tests.system import utils
2525

2626

27-
# Until b/401630655 is resolved, ML apis return json, not compatible with allow_large_results=False
28-
@pytest.fixture(scope="module", autouse=True)
29-
def always_create_table():
30-
with bigframes.option_context("bigquery.allow_large_results", True):
31-
yield
32-
33-
3427
@pytest.mark.parametrize(
3528
"model_name",
3629
("text-embedding-005", "text-embedding-004", "text-multilingual-embedding-002"),

tests/system/small/test_dataframe.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -4607,13 +4607,12 @@ def test_df_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep, sub
46074607
],
46084608
)
46094609
def test_df_drop_duplicates_w_json(json_df, keep):
4610-
bf_df = json_df.drop_duplicates(keep=keep).to_pandas(allow_large_results=True)
4610+
bf_df = json_df.drop_duplicates(keep=keep).to_pandas()
46114611

46124612
# drop_duplicates relies on pa.compute.dictionary_encode, which is incompatible
46134613
# with Arrow string extension types. Temporary conversion to standard Pandas
46144614
# strings is required.
4615-
# allow_large_results=True for b/401630655
4616-
json_pandas_df = json_df.to_pandas(allow_large_results=True)
4615+
json_pandas_df = json_df.to_pandas()
46174616
json_pandas_df["json_col"] = json_pandas_df["json_col"].astype(
46184617
pd.StringDtype(storage="pyarrow")
46194618
)

tests/system/small/test_series.py

+16-19
Original file line numberDiff line numberDiff line change
@@ -322,24 +322,22 @@ def test_series_construct_local_unordered_has_sequential_index(unordered_session
322322

323323

324324
def test_series_construct_w_dtype_for_json():
325-
# Until b/401630655 is resolved, json, not compatible with allow_large_results=False
326-
with bigframes.option_context("bigquery.allow_large_results", True):
327-
data = [
328-
"1",
329-
'"str"',
330-
"false",
331-
'["a", {"b": 1}, null]',
332-
None,
333-
'{"a": {"b": [1, 2, 3], "c": true}}',
334-
]
335-
s = bigframes.pandas.Series(data, dtype=dtypes.JSON_DTYPE)
325+
data = [
326+
"1",
327+
'"str"',
328+
"false",
329+
'["a", {"b": 1}, null]',
330+
None,
331+
'{"a": {"b": [1, 2, 3], "c": true}}',
332+
]
333+
s = bigframes.pandas.Series(data, dtype=dtypes.JSON_DTYPE)
336334

337-
assert s[0] == "1"
338-
assert s[1] == '"str"'
339-
assert s[2] == "false"
340-
assert s[3] == '["a",{"b":1},null]'
341-
assert pd.isna(s[4])
342-
assert s[5] == '{"a":{"b":[1,2,3],"c":true}}'
335+
assert s[0] == "1"
336+
assert s[1] == '"str"'
337+
assert s[2] == "false"
338+
assert s[3] == '["a",{"b":1},null]'
339+
assert pd.isna(s[4])
340+
assert s[5] == '{"a":{"b":[1,2,3],"c":true}}'
343341

344342

345343
def test_series_keys(scalars_dfs):
@@ -402,8 +400,7 @@ def test_get_column(scalars_dfs, col_name, expected_dtype):
402400

403401
def test_get_column_w_json(json_df, json_pandas_df):
404402
series = json_df["json_col"]
405-
# Until b/401630655 is resolved, json not compatible with allow_large_results=False
406-
series_pandas = series.to_pandas(allow_large_results=True)
403+
series_pandas = series.to_pandas()
407404
assert series.dtype == pd.ArrowDtype(db_dtypes.JSONArrowType())
408405
assert series_pandas.shape[0] == json_pandas_df.shape[0]
409406

0 commit comments

Comments
 (0)