Skip to content

Commit 5ec3cc0

Browse files
authored
feat: support pa.json_(pa.string()) in struct/list if available (#2180)
1 parent 68723bc commit 5ec3cc0

File tree

2 files changed

+45
-0
lines changed

2 files changed

+45
-0
lines changed

bigframes/dtypes.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,12 @@ def is_struct_like(type_: ExpressionType) -> bool:
340340
)
341341

342342

343+
def is_json_arrow_type(type_: pa.DataType) -> bool:
344+
return isinstance(type_, db_dtypes.JSONArrowType) or (
345+
hasattr(pa, "JsonType") and isinstance(type_, pa.JsonType)
346+
)
347+
348+
343349
def is_json_like(type_: ExpressionType) -> bool:
344350
return type_ == JSON_DTYPE or type_ == STRING_DTYPE # Including JSON string
345351

@@ -510,6 +516,10 @@ def arrow_dtype_to_bigframes_dtype(
510516
if arrow_dtype == pa.null():
511517
return DEFAULT_DTYPE
512518

519+
# Allow both db_dtypes.JSONArrowType() and pa.json_(pa.string())
520+
if is_json_arrow_type(arrow_dtype):
521+
return JSON_DTYPE
522+
513523
# No other types matched.
514524
raise TypeError(
515525
f"Unexpected Arrow data type {arrow_dtype}. {constants.FEEDBACK_LINK}"

tests/system/small/test_series.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,41 @@ def test_series_construct_w_json_dtype(json_type):
353353
assert s[5] == '{"a":{"b":[1,2,3],"c":true}}'
354354

355355

356+
def test_series_construct_w_nested_json_dtype():
357+
list_data = [
358+
[{"key": "1"}],
359+
[{"key": None}],
360+
[{"key": '["1","3","5"]'}],
361+
[{"key": '{"a":1,"b":["x","y"],"c":{"x":[],"z":false}}'}],
362+
]
363+
pa_array = pa.array(list_data, type=pa.list_(pa.struct([("key", pa.string())])))
364+
365+
db_json_arrow_dtype = db_dtypes.JSONArrowType()
366+
s = bigframes.pandas.Series(
367+
pd.arrays.ArrowExtensionArray(pa_array), # type: ignore
368+
dtype=pd.ArrowDtype(
369+
pa.list_(pa.struct([("key", db_json_arrow_dtype)])),
370+
),
371+
)
372+
373+
assert s[0][0]["key"] == "1"
374+
assert not s[1][0]["key"]
375+
assert s[2][0]["key"] == '["1","3","5"]'
376+
assert s[3][0]["key"] == '{"a":1,"b":["x","y"],"c":{"x":[],"z":false}}'
377+
378+
# Test with pyarrow.json_(pa.string()) if available.
379+
if hasattr(pa, "JsonType"):
380+
pyarrow_json_dtype = pa.json_(pa.string())
381+
s2 = bigframes.pandas.Series(
382+
pd.arrays.ArrowExtensionArray(pa_array), # type: ignore
383+
dtype=pd.ArrowDtype(
384+
pa.list_(pa.struct([("key", pyarrow_json_dtype)])),
385+
),
386+
)
387+
388+
pd.testing.assert_series_equal(s.to_pandas(), s2.to_pandas())
389+
390+
356391
def test_series_keys(scalars_dfs):
357392
scalars_df, scalars_pandas_df = scalars_dfs
358393
bf_result = scalars_df["int64_col"].keys().to_pandas()

0 commit comments

Comments
 (0)