Skip to content

Commit 5301dcb

Browse files
Merge pull request #2955 from plotly/orjson_encoding
JSON encoding refactor and orjson encoding
2 parents 5ab8da3 + 1fbfa0d commit 5301dcb

23 files changed

+706
-143
lines changed

doc/requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,4 @@ umap-learn==0.5.1
3131
pooch
3232
wget
3333
nbconvert==5.6.1
34+
orjson

packages/python/plotly/_plotly_utils/basevalidators.py

+35-44
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def to_scalar_or_list(v):
5353
return v
5454

5555

56-
def copy_to_readonly_numpy_array(v, kind=None, force_numeric=False):
56+
def copy_to_readonly_numpy_array_or_list(v, kind=None, force_numeric=False):
5757
"""
5858
Convert an array-like value into a read-only numpy array
5959
@@ -89,7 +89,13 @@ def copy_to_readonly_numpy_array(v, kind=None, force_numeric=False):
8989

9090
# u: unsigned int, i: signed int, f: float
9191
numeric_kinds = {"u", "i", "f"}
92-
kind_default_dtypes = {"u": "uint32", "i": "int32", "f": "float64", "O": "object"}
92+
kind_default_dtypes = {
93+
"u": "uint32",
94+
"i": "int32",
95+
"f": "float64",
96+
"O": "object",
97+
"U": "U",
98+
}
9399

94100
# Handle pandas Series and Index objects
95101
if pd and isinstance(v, (pd.Series, pd.Index)):
@@ -113,18 +119,12 @@ def copy_to_readonly_numpy_array(v, kind=None, force_numeric=False):
113119
if not isinstance(v, np.ndarray):
114120
# v has its own logic on how to convert itself into a numpy array
115121
if is_numpy_convertable(v):
116-
return copy_to_readonly_numpy_array(
122+
return copy_to_readonly_numpy_array_or_list(
117123
np.array(v), kind=kind, force_numeric=force_numeric
118124
)
119125
else:
120126
# v is not homogenous array
121-
v_list = [to_scalar_or_list(e) for e in v]
122-
123-
# Lookup dtype for requested kind, if any
124-
dtype = kind_default_dtypes.get(first_kind, None)
125-
126-
# construct new array from list
127-
new_v = np.array(v_list, order="C", dtype=dtype)
127+
return [to_scalar_or_list(e) for e in v]
128128
elif v.dtype.kind in numeric_kinds:
129129
# v is a homogenous numeric array
130130
if kind and v.dtype.kind not in kind:
@@ -135,6 +135,12 @@ def copy_to_readonly_numpy_array(v, kind=None, force_numeric=False):
135135
else:
136136
# Either no kind was requested or requested kind is satisfied
137137
new_v = np.ascontiguousarray(v.copy())
138+
elif v.dtype.kind == "O":
139+
if kind:
140+
dtype = kind_default_dtypes.get(first_kind, None)
141+
return np.array(v, dtype=dtype)
142+
else:
143+
return v.tolist()
138144
else:
139145
# v is a non-numeric homogenous array
140146
new_v = v.copy()
@@ -149,12 +155,12 @@ def copy_to_readonly_numpy_array(v, kind=None, force_numeric=False):
149155
if "U" not in kind:
150156
# Force non-numeric arrays to have object type
151157
# --------------------------------------------
152-
# Here we make sure that non-numeric arrays have the object
153-
# datatype. This works around cases like np.array([1, 2, '3']) where
158+
# Here we make sure that non-numeric arrays become lists
159+
# This works around cases like np.array([1, 2, '3']) where
154160
# numpy converts the integers to strings and returns array of dtype
155161
# '<U21'
156162
if new_v.dtype.kind not in ["u", "i", "f", "O", "M"]:
157-
new_v = np.array(v, dtype="object")
163+
return v.tolist()
158164

159165
# Set new array to be read-only
160166
# -----------------------------
@@ -191,7 +197,7 @@ def is_homogeneous_array(v):
191197
if v_numpy.shape == ():
192198
return False
193199
else:
194-
return True
200+
return True # v_numpy.dtype.kind in ["u", "i", "f", "M", "U"]
195201
return False
196202

197203

@@ -393,7 +399,7 @@ def validate_coerce(self, v):
393399
# Pass None through
394400
pass
395401
elif is_homogeneous_array(v):
396-
v = copy_to_readonly_numpy_array(v)
402+
v = copy_to_readonly_numpy_array_or_list(v)
397403
elif is_simple_array(v):
398404
v = to_scalar_or_list(v)
399405
else:
@@ -598,7 +604,7 @@ def validate_coerce(self, v):
598604
self.raise_invalid_elements(invalid_els[:10])
599605

600606
if is_homogeneous_array(v):
601-
v = copy_to_readonly_numpy_array(v)
607+
v = copy_to_readonly_numpy_array_or_list(v)
602608
else:
603609
v = to_scalar_or_list(v)
604610
else:
@@ -754,7 +760,7 @@ def validate_coerce(self, v):
754760
elif self.array_ok and is_homogeneous_array(v):
755761
np = get_module("numpy")
756762
try:
757-
v_array = copy_to_readonly_numpy_array(v, force_numeric=True)
763+
v_array = copy_to_readonly_numpy_array_or_list(v, force_numeric=True)
758764
except (ValueError, TypeError, OverflowError):
759765
self.raise_invalid_val(v)
760766

@@ -881,7 +887,7 @@ def validate_coerce(self, v):
881887
pass
882888
elif self.array_ok and is_homogeneous_array(v):
883889
np = get_module("numpy")
884-
v_array = copy_to_readonly_numpy_array(
890+
v_array = copy_to_readonly_numpy_array_or_list(
885891
v, kind=("i", "u"), force_numeric=True
886892
)
887893

@@ -1042,26 +1048,7 @@ def validate_coerce(self, v):
10421048
if invalid_els:
10431049
self.raise_invalid_elements(invalid_els)
10441050

1045-
if is_homogeneous_array(v):
1046-
np = get_module("numpy")
1047-
1048-
# If not strict, let numpy cast elements to strings
1049-
v = copy_to_readonly_numpy_array(v, kind="U")
1050-
1051-
# Check no_blank
1052-
if self.no_blank:
1053-
invalid_els = v[v == ""][:10].tolist()
1054-
if invalid_els:
1055-
self.raise_invalid_elements(invalid_els)
1056-
1057-
# Check values
1058-
if self.values:
1059-
invalid_inds = np.logical_not(np.isin(v, self.values))
1060-
invalid_els = v[invalid_inds][:10].tolist()
1061-
if invalid_els:
1062-
self.raise_invalid_elements(invalid_els)
1063-
1064-
elif is_simple_array(v):
1051+
if is_simple_array(v) or is_homogeneous_array(v):
10651052
if not self.strict:
10661053
v = [StringValidator.to_str_or_unicode_or_none(e) for e in v]
10671054

@@ -1338,8 +1325,12 @@ def validate_coerce(self, v, should_raise=True):
13381325
# Pass None through
13391326
pass
13401327
elif self.array_ok and is_homogeneous_array(v):
1341-
v = copy_to_readonly_numpy_array(v)
1342-
if self.numbers_allowed() and v.dtype.kind in ["u", "i", "f"]:
1328+
v = copy_to_readonly_numpy_array_or_list(v)
1329+
if (
1330+
not isinstance(v, list)
1331+
and self.numbers_allowed()
1332+
and v.dtype.kind in ["u", "i", "f"]
1333+
):
13431334
# Numbers are allowed and we have an array of numbers.
13441335
# All good
13451336
pass
@@ -1353,9 +1344,9 @@ def validate_coerce(self, v, should_raise=True):
13531344

13541345
# ### Check that elements have valid colors types ###
13551346
elif self.numbers_allowed() or invalid_els:
1356-
v = copy_to_readonly_numpy_array(validated_v, kind="O")
1347+
v = copy_to_readonly_numpy_array_or_list(validated_v, kind="O")
13571348
else:
1358-
v = copy_to_readonly_numpy_array(validated_v, kind="U")
1349+
v = copy_to_readonly_numpy_array_or_list(validated_v, kind="U")
13591350
elif self.array_ok and is_simple_array(v):
13601351
validated_v = [self.validate_coerce(e, should_raise=False) for e in v]
13611352

@@ -1870,7 +1861,7 @@ def validate_coerce(self, v):
18701861
self.raise_invalid_elements(invalid_els)
18711862

18721863
if is_homogeneous_array(v):
1873-
v = copy_to_readonly_numpy_array(validated_v, kind="U")
1864+
v = copy_to_readonly_numpy_array_or_list(validated_v, kind="U")
18741865
else:
18751866
v = to_scalar_or_list(v)
18761867
else:
@@ -1918,7 +1909,7 @@ def validate_coerce(self, v):
19181909
# Pass None through
19191910
pass
19201911
elif self.array_ok and is_homogeneous_array(v):
1921-
v = copy_to_readonly_numpy_array(v, kind="O")
1912+
v = copy_to_readonly_numpy_array_or_list(v, kind="O")
19221913
elif self.array_ok and is_simple_array(v):
19231914
v = to_scalar_or_list(v)
19241915
return v

packages/python/plotly/_plotly_utils/tests/validators/test_dataarray_validator.py

+16-2
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,29 @@ def test_validator_acceptance_simple(val, validator):
3232

3333

3434
@pytest.mark.parametrize(
35-
"val",
36-
[np.array([2, 3, 4]), pd.Series(["a", "b", "c"]), np.array([[1, 2, 3], [4, 5, 6]])],
35+
"val", [np.array([2, 3, 4]), np.array([[1, 2, 3], [4, 5, 6]])],
3736
)
3837
def test_validator_acceptance_homogeneous(val, validator):
3938
coerce_val = validator.validate_coerce(val)
4039
assert isinstance(coerce_val, np.ndarray)
4140
assert np.array_equal(validator.present(coerce_val), val)
4241

4342

43+
# Accept object array as list
44+
@pytest.mark.parametrize(
45+
"val",
46+
[
47+
["A", "B", "C"],
48+
np.array(["A", "B", "C"], dtype="object"),
49+
pd.Series(["a", "b", "c"]),
50+
],
51+
)
52+
def test_validator_accept_object_array_as_list(val, validator):
53+
coerce_val = validator.validate_coerce(val)
54+
assert isinstance(coerce_val, list)
55+
assert coerce_val == list(val)
56+
57+
4458
# ### Rejection ###
4559
@pytest.mark.parametrize("val", ["Hello", 23, set(), {}])
4660
def test_rejection(val, validator):

packages/python/plotly/_plotly_utils/tests/validators/test_enumerated_validator.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def test_rejection_by_element_aok(val, validator_aok):
126126
[],
127127
["bar12"],
128128
("foo", "bar012", "baz"),
129-
np.array([]),
129+
np.array([], dtype="object"),
130130
np.array(["bar12"]),
131131
np.array(["foo", "bar012", "baz"]),
132132
],
@@ -135,7 +135,7 @@ def test_acceptance_aok(val, validator_aok_re):
135135
# Values should be accepted and returned unchanged
136136
coerce_val = validator_aok_re.validate_coerce(val)
137137
if isinstance(val, (np.ndarray, pd.Series)):
138-
assert np.array_equal(coerce_val, np.array(val, dtype=coerce_val.dtype))
138+
assert coerce_val == list(np.array(val))
139139
elif isinstance(val, (list, tuple)):
140140
assert validator_aok_re.present(coerce_val) == tuple(val)
141141
else:

packages/python/plotly/_plotly_utils/tests/validators/test_pandas_series_input.py

+8-20
Original file line numberDiff line numberDiff line change
@@ -149,13 +149,10 @@ def test_color_validator_object(color_validator, color_object_pandas):
149149
res = color_validator.validate_coerce(color_object_pandas)
150150

151151
# Check type
152-
assert isinstance(res, np.ndarray)
153-
154-
# Check dtype
155-
assert res.dtype == "object"
152+
assert isinstance(res, list)
156153

157154
# Check values
158-
np.testing.assert_array_equal(res, color_object_pandas)
155+
assert res == color_object_pandas.tolist()
159156

160157

161158
def test_color_validator_categorical(color_validator, color_categorical_pandas):
@@ -164,13 +161,10 @@ def test_color_validator_categorical(color_validator, color_categorical_pandas):
164161

165162
# Check type
166163
assert color_categorical_pandas.dtype == "category"
167-
assert isinstance(res, np.ndarray)
168-
169-
# Check dtype
170-
assert res.dtype == "object"
164+
assert isinstance(res, list)
171165

172166
# Check values
173-
np.testing.assert_array_equal(res, np.array(color_categorical_pandas))
167+
assert res == color_categorical_pandas.tolist()
174168

175169

176170
def test_data_array_validator_dates_series(
@@ -180,13 +174,10 @@ def test_data_array_validator_dates_series(
180174
res = data_array_validator.validate_coerce(datetime_pandas)
181175

182176
# Check type
183-
assert isinstance(res, np.ndarray)
184-
185-
# Check dtype
186-
assert res.dtype == "object"
177+
assert isinstance(res, list)
187178

188179
# Check values
189-
np.testing.assert_array_equal(res, dates_array)
180+
assert res == dates_array.tolist()
190181

191182

192183
def test_data_array_validator_dates_dataframe(
@@ -197,10 +188,7 @@ def test_data_array_validator_dates_dataframe(
197188
res = data_array_validator.validate_coerce(df)
198189

199190
# Check type
200-
assert isinstance(res, np.ndarray)
201-
202-
# Check dtype
203-
assert res.dtype == "object"
191+
assert isinstance(res, list)
204192

205193
# Check values
206-
np.testing.assert_array_equal(res, dates_array.reshape(len(dates_array), 1))
194+
assert res == dates_array.reshape(len(dates_array), 1).tolist()

packages/python/plotly/_plotly_utils/tests/validators/test_string_validator.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,7 @@ def test_acceptance_aok_scalars(val, validator_aok):
138138
def test_acceptance_aok_list(val, validator_aok):
139139
coerce_val = validator_aok.validate_coerce(val)
140140
if isinstance(val, np.ndarray):
141-
assert isinstance(coerce_val, np.ndarray)
142-
assert np.array_equal(coerce_val, np.array(val, dtype=coerce_val.dtype))
141+
assert coerce_val == val.tolist()
143142
elif isinstance(val, list):
144143
assert validator_aok.present(val) == tuple(val)
145144
else:
@@ -178,9 +177,7 @@ def test_rejection_aok_values(val, validator_aok_values):
178177
)
179178
def test_acceptance_no_blanks_aok(val, validator_no_blanks_aok):
180179
coerce_val = validator_no_blanks_aok.validate_coerce(val)
181-
if isinstance(val, np.ndarray):
182-
assert np.array_equal(coerce_val, np.array(val, dtype=coerce_val.dtype))
183-
elif isinstance(val, list):
180+
if isinstance(val, (list, np.ndarray)):
184181
assert validator_no_blanks_aok.present(coerce_val) == tuple(val)
185182
else:
186183
assert coerce_val == val

packages/python/plotly/_plotly_utils/tests/validators/test_xarray_input.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -126,10 +126,7 @@ def test_color_validator_object(color_validator, color_object_xarray):
126126
res = color_validator.validate_coerce(color_object_xarray)
127127

128128
# Check type
129-
assert isinstance(res, np.ndarray)
130-
131-
# Check dtype
132-
assert res.dtype == "object"
129+
assert isinstance(res, list)
133130

134131
# Check values
135-
np.testing.assert_array_equal(res, color_object_xarray)
132+
assert res == list(color_object_xarray)

packages/python/plotly/_plotly_utils/utils.py

+2
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,10 @@ def encode(self, o):
6161
# We catch false positive cases (e.g. strings such as titles, labels etc.)
6262
# but this is ok since the intention is to skip the decoding / reencoding
6363
# step when it's completely safe
64+
6465
if not ("NaN" in encoded_o or "Infinity" in encoded_o):
6566
return encoded_o
67+
6668
# now:
6769
# 1. `loads` to switch Infinity, -Infinity, NaN to None
6870
# 2. `dumps` again so you get 'null' instead of extended JSON

packages/python/plotly/plotly/basedatatypes.py

+15
Original file line numberDiff line numberDiff line change
@@ -3297,6 +3297,7 @@ def to_dict(self):
32973297
# Frame key is only added if there are any frames
32983298
res = {"data": data, "layout": layout}
32993299
frames = deepcopy([frame._props for frame in self._frame_objs])
3300+
33003301
if frames:
33013302
res["frames"] = frames
33023303

@@ -3413,6 +3414,13 @@ def to_json(self, *args, **kwargs):
34133414
remove_uids: bool (default True)
34143415
True if trace UIDs should be omitted from the JSON representation
34153416
3417+
engine: str (default None)
3418+
The JSON encoding engine to use. One of:
3419+
- "json" for an encoder based on the built-in Python json module
3420+
- "orjson" for a fast encoder the requires the orjson package
3421+
If not specified, the default encoder is set to the current value of
3422+
plotly.io.json.config.default_encoder.
3423+
34163424
Returns
34173425
-------
34183426
str
@@ -3469,6 +3477,13 @@ def write_json(self, *args, **kwargs):
34693477
remove_uids: bool (default True)
34703478
True if trace UIDs should be omitted from the JSON representation
34713479
3480+
engine: str (default None)
3481+
The JSON encoding engine to use. One of:
3482+
- "json" for an encoder based on the built-in Python json module
3483+
- "orjson" for a fast encoder the requires the orjson package
3484+
If not specified, the default encoder is set to the current value of
3485+
plotly.io.json.config.default_encoder.
3486+
34723487
Returns
34733488
-------
34743489
None

0 commit comments

Comments
 (0)