Skip to content

Commit b4cfcd9

Browse files
committed
unit-test for some functionalities
1 parent 8800b6b commit b4cfcd9

File tree

2 files changed

+67
-27
lines changed

2 files changed

+67
-27
lines changed

Diff for: db_dtypes/json.py

+5-15
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
from __future__ import annotations
1616

1717
import json
18-
import typing
1918

2019
import numpy as np
2120
import pandas as pd
@@ -69,10 +68,10 @@ def construct_array_type(cls):
6968
"""Return the array type associated with this dtype."""
7069
return JSONArray
7170

72-
@staticmethod
73-
def __from_arrow__(array: typing.Union[pa.Array, pa.ChunkedArray]) -> JSONArray:
74-
"""Convert to JSONArray from an Arrow array."""
75-
return JSONArray(array)
71+
# @staticmethod
72+
# def __from_arrow__(array: typing.Union[pa.Array, pa.ChunkedArray]) -> JSONArray:
73+
# """Convert to JSONArray from an Arrow array."""
74+
# return JSONArray(array)
7675

7776

7877
class JSONArray(arrays.ArrowExtensionArray):
@@ -143,18 +142,9 @@ def _box_pa_array(
143142
@classmethod
144143
def _from_sequence(cls, scalars, *, dtype=None, copy=False):
145144
"""Construct a new ExtensionArray from a sequence of scalars."""
146-
result = []
147-
for scalar in scalars:
148-
result.append(JSONArray._serialize_json(scalar))
145+
result = [JSONArray._serialize_json(scalar) for scalar in scalars]
149146
return cls(pa.array(result, type=pa.string(), from_pandas=True))
150147

151-
@classmethod
152-
def _from_sequence_of_strings(
153-
cls, strings, *, dtype, copy: bool = False
154-
) -> JSONArray:
155-
"""Construct a new ExtensionArray from a sequence of strings."""
156-
return cls._from_sequence(strings, dtype=dtype, copy=copy)
157-
158148
@classmethod
159149
def _concat_same_type(cls, to_concat) -> JSONArray:
160150
"""Concatenate multiple JSONArray."""

Diff for: tests/unit/test_json.py

+62-12
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,72 @@
1313
# limitations under the License.
1414

1515

16-
import datetime as dt
17-
from typing import Optional
16+
import json
1817

19-
import pandas
20-
import pandas.api.extensions
18+
import pandas as pd
2119
import pandas.testing
22-
import pyarrow
2320
import pytest
2421

25-
import packaging.version
26-
2722
import db_dtypes
2823

29-
is_supported_version = packaging.version.Version(pandas.__version__) >= packaging.version.Version("1.5.0")
24+
# Check for minimum Pandas version.
25+
pytest.importorskip("pandas", minversion="1.5.0")
26+
27+
28+
# # Python data types mirroring all standard JSON types
29+
# https://json-schema.org/understanding-json-schema/reference/type
30+
JSON_DATA = {
31+
"boolean": True,
32+
"int": 100,
33+
"float": 0.98,
34+
"string": "hello world",
35+
"array": [0.1, 0.2],
36+
"dict": {
37+
"null_field": None,
38+
"order": {
39+
"items": ["book", "pen", "computer"],
40+
"total": 15.99,
41+
"address": {"street": "123 Main St", "city": "Anytown"},
42+
},
43+
},
44+
"null": None,
45+
}
46+
47+
48+
def test_get_items():
49+
data = db_dtypes.JSONArray._from_sequence(JSON_DATA.values())
50+
for id, key in enumerate(JSON_DATA.keys()):
51+
if key == "null":
52+
assert pd.isna(data[id])
53+
else:
54+
assert data[id] == JSON_DATA[key]
55+
56+
57+
def test_get_items_unbox_object():
58+
data = db_dtypes.JSONArray._from_sequence([JSON_DATA["dict"]])
59+
assert len(data[0]) == 2
60+
61+
assert data[0]["null_field"] is None
62+
assert data[0]["order"]["address"]["city"] == "Anytown"
63+
assert len(data[0]["order"]["items"]) == 3
64+
assert data[0]["order"]["items"][0] == "book"
65+
66+
with pytest.raises(KeyError):
67+
data[0]["unknown"]
68+
69+
70+
def test_to_numpy():
71+
s = pd.Series(db_dtypes.JSONArray._from_sequence(JSON_DATA.values()))
72+
data = s.to_numpy()
73+
for id, key in enumerate(JSON_DATA.keys()):
74+
if key == "null":
75+
assert pd.isna(data[id])
76+
else:
77+
assert data[id] == json.dumps(JSON_DATA[key], sort_keys=True)
78+
3079

31-
@pytest.mark.skipif(not is_supported_version, reason="requires Pandas 1.5.0 and above")
32-
def test_constructor_from_sequence():
33-
json_obj = [0, "str", {"a": 0, "b": 1}]
34-
data = db_dtypes.JSONArray._from_sequence(json_obj)
80+
def test_deterministic_json_serialization():
81+
x = {"a": 0, "b": 1}
82+
y = {"b": 1, "a": 0}
83+
data = db_dtypes.JSONArray._from_sequence([x])
84+
assert y in data

0 commit comments

Comments
 (0)