From 763bd7a1df49b4206606d2e81ebeda1f7c14de43 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Fri, 26 May 2023 21:07:18 -0400 Subject: [PATCH 01/28] Adding logic to throw a deprecation warning when a literal json string is passed to read_json --- doc/source/whatsnew/v2.1.0.rst | 2 ++ pandas/io/json/_json.py | 12 +++++++++--- pandas/tests/io/json/test_readlines.py | 10 ++++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 2c5263f447951..cfefd69689147 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -269,8 +269,10 @@ Deprecations - Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`) - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) +- Deprecated literal json input to :func:`read_json`. Moving forward the method only accepts file-like objects (:issue:`53330`) - + .. --------------------------------------------------------------------------- .. _whatsnew_210.performance: diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 5c2fba814375f..0b7d16f4e2044 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -1,5 +1,5 @@ from __future__ import annotations - +import warnings from abc import ( ABC, abstractmethod, @@ -20,7 +20,7 @@ ) import numpy as np - +from pandas.util._exceptions import find_stack_level from pandas._libs import lib from pandas._libs.json import ( dumps, @@ -925,7 +925,13 @@ def _get_data_from_filepath(self, filepath_or_buffer): and not file_exists(filepath_or_buffer) ): raise FileNotFoundError(f"File {filepath_or_buffer} does not exist") - + else: + warnings.warn("Passing literal json to 'read_json' is deprecated and " + "will be removed in a future version. To read from a " + "literal string, wrap it in a 'StringIO' object.", + FutureWarning, + stacklevel=find_stack_level(), + ) return filepath_or_buffer def _combine_lines(self, lines) -> str: diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index 7d7614bc93845..5c19dffad70ff 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -15,6 +15,16 @@ from pandas.io.json._json import JsonReader +def test_json_deprecation(): + expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + warning_msg = "Passing literal json to 'read_json' is deprecated and "\ + "will be removed in a future version. To read from a "\ + "literal string, wrap it in a 'StringIO' object." + + with tm.assert_produces_warning(FutureWarning, match=warning_msg): + result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) + tm.assert_frame_equal(result, expected) + @pytest.fixture def lines_json_df(): df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) From ffc47a6fafd3fe3bb24f9000feb64a12323b1b83 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Fri, 26 May 2023 21:10:18 -0400 Subject: [PATCH 02/28] Adding logic to throw a deprecation warning when a literal json string is passed to read_json --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/io/json/_json.py | 19 +++++++++++-------- pandas/tests/io/json/test_readlines.py | 9 ++++++--- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index cfefd69689147..16a3e98c6bcea 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -268,8 +268,8 @@ Deprecations - Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`) - Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`) - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) -- Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) - Deprecated literal json input to :func:`read_json`. Moving forward the method only accepts file-like objects (:issue:`53330`) +- Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) - diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 0b7d16f4e2044..10853d1ef06a8 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -1,5 +1,5 @@ from __future__ import annotations -import warnings + from abc import ( ABC, abstractmethod, @@ -18,9 +18,10 @@ TypeVar, overload, ) +import warnings import numpy as np -from pandas.util._exceptions import find_stack_level + from pandas._libs import lib from pandas._libs.json import ( dumps, @@ -30,6 +31,7 @@ from pandas.compat._optional import import_optional_dependency from pandas.errors import AbstractMethodError from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level from pandas.util._validators import check_dtype_backend from pandas.core.dtypes.common import ensure_str @@ -926,12 +928,13 @@ def _get_data_from_filepath(self, filepath_or_buffer): ): raise FileNotFoundError(f"File {filepath_or_buffer} does not exist") else: - warnings.warn("Passing literal json to 'read_json' is deprecated and " - "will be removed in a future version. To read from a " - "literal string, wrap it in a 'StringIO' object.", - FutureWarning, - stacklevel=find_stack_level(), - ) + warnings.warn( + "Passing literal json to 'read_json' is deprecated and " + "will be removed in a future version. To read from a " + "literal string, wrap it in a 'StringIO' object.", + FutureWarning, + stacklevel=find_stack_level(), + ) return filepath_or_buffer def _combine_lines(self, lines) -> str: diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index 5c19dffad70ff..200a31996684d 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -17,14 +17,17 @@ def test_json_deprecation(): expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) - warning_msg = "Passing literal json to 'read_json' is deprecated and "\ - "will be removed in a future version. To read from a "\ - "literal string, wrap it in a 'StringIO' object." + warning_msg = ( + "Passing literal json to 'read_json' is deprecated and " + "will be removed in a future version. To read from a " + "literal string, wrap it in a 'StringIO' object." + ) with tm.assert_produces_warning(FutureWarning, match=warning_msg): result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) tm.assert_frame_equal(result, expected) + @pytest.fixture def lines_json_df(): df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) From b23e54891dbcf16019be64c4ceb5156ee8b70eb1 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Fri, 26 May 2023 21:14:27 -0400 Subject: [PATCH 03/28] Updating documentation and adding PR num to unit test --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/tests/io/json/test_readlines.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 16a3e98c6bcea..820786c8d77c6 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -268,7 +268,7 @@ Deprecations - Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`) - Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`) - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) -- Deprecated literal json input to :func:`read_json`. Moving forward the method only accepts file-like objects (:issue:`53330`) +- Deprecated literal json input to :func:`read_json`. Moving forward the method only accepts file-like objects (:issue:`53409`) - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) - diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index 200a31996684d..24038e5c66585 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -16,6 +16,7 @@ def test_json_deprecation(): + # PR 53409 expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) warning_msg = ( "Passing literal json to 'read_json' is deprecated and " From 639a212930b10ab86868d6e273882c0f0ec35b3d Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Fri, 26 May 2023 21:26:47 -0400 Subject: [PATCH 04/28] Adding a deprecation warning to the user guide --- doc/source/user_guide/io.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 90a8bd868b60b..9bd5ac8c088b0 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -2072,6 +2072,10 @@ is ``None``. To explicitly force ``Series`` parsing, pass ``typ=series`` * ``engine``: Either ``"ujson"``, the built-in JSON parser, or ``"pyarrow"`` which dispatches to pyarrow's ``pyarrow.json.read_json``. The ``"pyarrow"`` is only available when ``lines=True`` +.. warning:: + + Passing json literal strings will be deprecated in a future release of pandas. + The parser will raise one of ``ValueError/TypeError/AssertionError`` if the JSON is not parseable. If a non-default ``orient`` was used when encoding to JSON be sure to pass the same From 57c96dfe403fbd9b55250afbec24bb74b5a0a186 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Fri, 26 May 2023 23:08:32 -0400 Subject: [PATCH 05/28] Updating unit tests to check for FutureWarning --- pandas/tests/io/json/test_pandas.py | 213 ++++++++++++++++--------- pandas/tests/io/json/test_readlines.py | 54 +++++-- 2 files changed, 173 insertions(+), 94 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index a966ad1dabcaa..4aa4721c44b0d 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -29,6 +29,14 @@ ) +def generateDepMsg(): + return ( + "Passing literal json to 'read_json' is deprecated and " + "will be removed in a future version. To read from a " + "literal string, wrap it in a 'StringIO' object." + ) + + def assert_json_roundtrip_equal(result, expected, orient): if orient in ("records", "values"): expected = expected.reset_index(drop=True) @@ -74,7 +82,8 @@ def test_frame_double_encoded_labels(self, orient): columns=["a \\ b", "y / z"], ) - result = read_json(df.to_json(orient=orient), orient=orient) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(df.to_json(orient=orient), orient=orient) expected = df.copy() assert_json_roundtrip_equal(result, expected, orient) @@ -82,7 +91,8 @@ def test_frame_double_encoded_labels(self, orient): @pytest.mark.parametrize("orient", ["split", "records", "values"]) def test_frame_non_unique_index(self, orient): df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"]) - result = read_json(df.to_json(orient=orient), orient=orient) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(df.to_json(orient=orient), orient=orient) expected = df.copy() assert_json_roundtrip_equal(result, expected, orient) @@ -107,9 +117,10 @@ def test_frame_non_unique_index_raises(self, orient): def test_frame_non_unique_columns(self, orient, data): df = DataFrame(data, index=[1, 2], columns=["x", "x"]) - result = read_json( - df.to_json(orient=orient), orient=orient, convert_dates=["x"] - ) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json( + df.to_json(orient=orient), orient=orient, convert_dates=["x"] + ) if orient == "values": expected = DataFrame(data) if expected.iloc[:, 0].dtype == "datetime64[ns]": @@ -139,7 +150,10 @@ def test_frame_default_orient(self, float_frame): @pytest.mark.parametrize("convert_axes", [True, False]) def test_roundtrip_simple(self, orient, convert_axes, dtype, float_frame): data = float_frame.to_json(orient=orient) - result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json( + data, orient=orient, convert_axes=convert_axes, dtype=dtype + ) expected = float_frame @@ -149,7 +163,10 @@ def test_roundtrip_simple(self, orient, convert_axes, dtype, float_frame): @pytest.mark.parametrize("convert_axes", [True, False]) def test_roundtrip_intframe(self, orient, convert_axes, dtype, int_frame): data = int_frame.to_json(orient=orient) - result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json( + data, orient=orient, convert_axes=convert_axes, dtype=dtype + ) expected = int_frame assert_json_roundtrip_equal(result, expected, orient) @@ -164,7 +181,10 @@ def test_roundtrip_str_axes(self, orient, convert_axes, dtype): ) data = df.to_json(orient=orient) - result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json( + data, orient=orient, convert_axes=convert_axes, dtype=dtype + ) expected = df.copy() if not dtype: @@ -198,8 +218,8 @@ def test_roundtrip_categorical( ) data = categorical_frame.to_json(orient=orient) - - result = read_json(data, orient=orient, convert_axes=convert_axes) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(data, orient=orient, convert_axes=convert_axes) expected = categorical_frame.copy() expected.index = expected.index.astype(str) # Categorical not preserved @@ -210,7 +230,8 @@ def test_roundtrip_categorical( def test_roundtrip_empty(self, orient, convert_axes): empty_frame = DataFrame() data = empty_frame.to_json(orient=orient) - result = read_json(data, orient=orient, convert_axes=convert_axes) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(data, orient=orient, convert_axes=convert_axes) if orient == "split": idx = pd.Index([], dtype=(float if convert_axes else object)) expected = DataFrame(index=idx, columns=idx) @@ -225,7 +246,8 @@ def test_roundtrip_empty(self, orient, convert_axes): def test_roundtrip_timestamp(self, orient, convert_axes, datetime_frame): # TODO: improve coverage with date_format parameter data = datetime_frame.to_json(orient=orient) - result = read_json(data, orient=orient, convert_axes=convert_axes) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(data, orient=orient, convert_axes=convert_axes) expected = datetime_frame.copy() if not convert_axes: # one off for ts handling @@ -251,7 +273,8 @@ def test_roundtrip_mixed(self, orient, convert_axes): df = DataFrame(data=values, index=index) data = df.to_json(orient=orient) - result = read_json(data, orient=orient, convert_axes=convert_axes) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(data, orient=orient, convert_axes=convert_axes) expected = df.copy() expected = expected.assign(**expected.select_dtypes("number").astype(np.int64)) @@ -276,7 +299,8 @@ def test_roundtrip_multiindex(self, columns): columns=pd.MultiIndex.from_arrays(columns), ) - result = read_json(df.to_json(orient="split"), orient="split") + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(df.to_json(orient="split"), orient="split") tm.assert_frame_equal(result, df) @@ -322,28 +346,31 @@ def test_frame_from_json_bad_data_raises(self, data, msg, orient): @pytest.mark.parametrize("convert_axes", [True, False]) def test_frame_from_json_missing_data(self, orient, convert_axes, dtype): num_df = DataFrame([[1, 2], [4, 5, 6]]) - result = read_json( - num_df.to_json(orient=orient), - orient=orient, - convert_axes=convert_axes, - dtype=dtype, - ) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json( + num_df.to_json(orient=orient), + orient=orient, + convert_axes=convert_axes, + dtype=dtype, + ) assert np.isnan(result.iloc[0, 2]) obj_df = DataFrame([["1", "2"], ["4", "5", "6"]]) - result = read_json( - obj_df.to_json(orient=orient), - orient=orient, - convert_axes=convert_axes, - dtype=dtype, - ) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json( + obj_df.to_json(orient=orient), + orient=orient, + convert_axes=convert_axes, + dtype=dtype, + ) assert np.isnan(result.iloc[0, 2]) @pytest.mark.parametrize("dtype", [True, False]) def test_frame_read_json_dtype_missing_value(self, dtype): # GH28501 Parse missing values using read_json with dtype=False # to NaN instead of None - result = read_json("[null]", dtype=dtype) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json("[null]", dtype=dtype) expected = DataFrame([np.nan]) tm.assert_frame_equal(result, expected) @@ -355,7 +382,9 @@ def test_frame_infinity(self, inf, dtype): # deserialisation df = DataFrame([[1, 2], [4, 5, 6]]) df.loc[0, 2] = inf - result = read_json(df.to_json(), dtype=dtype) + + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(df.to_json(), dtype=dtype) assert np.isnan(result.iloc[0, 2]) @pytest.mark.skipif(not IS64, reason="not compliant on 32-bit, xref #15865") @@ -384,9 +413,13 @@ def test_frame_to_json_except(self): def test_frame_empty(self): df = DataFrame(columns=["jim", "joe"]) assert not df._is_mixed_type - tm.assert_frame_equal( - read_json(df.to_json(), dtype=dict(df.dtypes)), df, check_index_type=False - ) + + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + tm.assert_frame_equal( + read_json(df.to_json(), dtype=dict(df.dtypes)), + df, + check_index_type=False, + ) # GH 7445 result = DataFrame({"test": []}, index=[]).to_json(orient="columns") expected = '{"test":{}}' @@ -397,9 +430,12 @@ def test_frame_empty_mixedtype(self): df = DataFrame(columns=["jim", "joe"]) df["joe"] = df["joe"].astype("i8") assert df._is_mixed_type - tm.assert_frame_equal( - read_json(df.to_json(), dtype=dict(df.dtypes)), df, check_index_type=False - ) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + tm.assert_frame_equal( + read_json(df.to_json(), dtype=dict(df.dtypes)), + df, + check_index_type=False, + ) def test_frame_mixedtype_orient(self): # GH10289 vals = [ @@ -418,17 +454,20 @@ def test_frame_mixedtype_orient(self): # GH10289 for orient in ["split", "index", "columns"]: inp = df.to_json(orient=orient) - left = read_json(inp, orient=orient, convert_axes=False) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + left = read_json(inp, orient=orient, convert_axes=False) tm.assert_frame_equal(left, right) right.index = pd.RangeIndex(len(df)) inp = df.to_json(orient="records") - left = read_json(inp, orient="records", convert_axes=False) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + left = read_json(inp, orient="records", convert_axes=False) tm.assert_frame_equal(left, right) right.columns = pd.RangeIndex(df.shape[1]) inp = df.to_json(orient="values") - left = read_json(inp, orient="values", convert_axes=False) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + left = read_json(inp, orient="values", convert_axes=False) tm.assert_frame_equal(left, right) def test_v12_compat(self, datapath): @@ -546,8 +585,8 @@ def test_blocks_compat_GH9037(self): # JSON deserialisation always creates unicode strings df_mixed.columns = df_mixed.columns.astype("unicode") - - df_roundtrip = read_json(df_mixed.to_json(orient="split"), orient="split") + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + df_roundtrip = read_json(df_mixed.to_json(orient="split"), orient="split") tm.assert_frame_equal( df_mixed, df_roundtrip, @@ -608,12 +647,13 @@ def test_series_non_unique_index(self): with pytest.raises(ValueError, match=msg): s.to_json(orient="index") - tm.assert_series_equal( - s, read_json(s.to_json(orient="split"), orient="split", typ="series") - ) - unserialized = read_json( - s.to_json(orient="records"), orient="records", typ="series" - ) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + tm.assert_series_equal( + s, read_json(s.to_json(orient="split"), orient="split", typ="series") + ) + unserialized = read_json( + s.to_json(orient="records"), orient="records", typ="series" + ) tm.assert_numpy_array_equal(s.values, unserialized.values) def test_series_default_orient(self, string_series): @@ -621,7 +661,8 @@ def test_series_default_orient(self, string_series): def test_series_roundtrip_simple(self, orient, string_series): data = string_series.to_json(orient=orient) - result = read_json(data, typ="series", orient=orient) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(data, typ="series", orient=orient) expected = string_series if orient in ("values", "records"): @@ -634,7 +675,8 @@ def test_series_roundtrip_simple(self, orient, string_series): @pytest.mark.parametrize("dtype", [False, None]) def test_series_roundtrip_object(self, orient, dtype, object_series): data = object_series.to_json(orient=orient) - result = read_json(data, typ="series", orient=orient, dtype=dtype) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(data, typ="series", orient=orient, dtype=dtype) expected = object_series if orient in ("values", "records"): @@ -647,7 +689,8 @@ def test_series_roundtrip_object(self, orient, dtype, object_series): def test_series_roundtrip_empty(self, orient): empty_series = Series([], index=[], dtype=np.float64) data = empty_series.to_json(orient=orient) - result = read_json(data, typ="series", orient=orient) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(data, typ="series", orient=orient) expected = empty_series.reset_index(drop=True) if orient in ("split"): @@ -657,7 +700,8 @@ def test_series_roundtrip_empty(self, orient): def test_series_roundtrip_timeseries(self, orient, datetime_series): data = datetime_series.to_json(orient=orient) - result = read_json(data, typ="series", orient=orient) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(data, typ="series", orient=orient) expected = datetime_series if orient in ("values", "records"): @@ -671,7 +715,8 @@ def test_series_roundtrip_timeseries(self, orient, datetime_series): def test_series_roundtrip_numeric(self, orient, dtype): s = Series(range(6), index=["a", "b", "c", "d", "e", "f"]) data = s.to_json(orient=orient) - result = read_json(data, typ="series", orient=orient) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(data, typ="series", orient=orient) expected = s.copy() if orient in ("values", "records"): @@ -687,13 +732,15 @@ def test_series_to_json_except(self): def test_series_from_json_precise_float(self): s = Series([4.56, 4.56, 4.56]) - result = read_json(s.to_json(), typ="series", precise_float=True) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(s.to_json(), typ="series", precise_float=True) tm.assert_series_equal(result, s, check_index_type=False) def test_series_with_dtype(self): # GH 21986 s = Series([4.56, 4.56, 4.56]) - result = read_json(s.to_json(), typ="series", dtype=np.int64) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(s.to_json(), typ="series", dtype=np.int64) expected = Series([4] * 3) tm.assert_series_equal(result, expected) @@ -707,44 +754,51 @@ def test_series_with_dtype(self): def test_series_with_dtype_datetime(self, dtype, expected): s = Series(["2000-01-01"], dtype="datetime64[ns]") data = s.to_json() - result = read_json(data, typ="series", dtype=dtype) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(data, typ="series", dtype=dtype) tm.assert_series_equal(result, expected) def test_frame_from_json_precise_float(self): df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]]) - result = read_json(df.to_json(), precise_float=True) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(df.to_json(), precise_float=True) tm.assert_frame_equal(result, df) def test_typ(self): s = Series(range(6), index=["a", "b", "c", "d", "e", "f"], dtype="int64") - result = read_json(s.to_json(), typ=None) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(s.to_json(), typ=None) tm.assert_series_equal(result, s) def test_reconstruction_index(self): df = DataFrame([[1, 2, 3], [4, 5, 6]]) - result = read_json(df.to_json()) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(df.to_json()) - tm.assert_frame_equal(result, df) + tm.assert_frame_equal(result, df) - df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["A", "B", "C"]) - result = read_json(df.to_json()) + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["A", "B", "C"]) + result = read_json(df.to_json()) tm.assert_frame_equal(result, df) def test_path(self, float_frame, int_frame, datetime_frame): with tm.ensure_clean("test.json") as path: - for df in [float_frame, int_frame, datetime_frame]: - df.to_json(path) - read_json(path) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + for df in [float_frame, int_frame, datetime_frame]: + df.to_json(path) + read_json(path) def test_axis_dates(self, datetime_series, datetime_frame): # frame json = datetime_frame.to_json() - result = read_json(json) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(json) tm.assert_frame_equal(result, datetime_frame) # series json = datetime_series.to_json() - result = read_json(json, typ="series") + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(json, typ="series") tm.assert_series_equal(result, datetime_series, check_names=False) assert result.name is None @@ -753,23 +807,26 @@ def test_convert_dates(self, datetime_series, datetime_frame): df = datetime_frame df["date"] = Timestamp("20130101").as_unit("ns") - json = df.to_json() - result = read_json(json) - tm.assert_frame_equal(result, df) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + json = df.to_json() + result = read_json(json) + tm.assert_frame_equal(result, df) - df["foo"] = 1.0 - json = df.to_json(date_unit="ns") + df["foo"] = 1.0 + json = df.to_json(date_unit="ns") - result = read_json(json, convert_dates=False) - expected = df.copy() - expected["date"] = expected["date"].values.view("i8") - expected["foo"] = expected["foo"].astype("int64") - tm.assert_frame_equal(result, expected) + result = read_json(json, convert_dates=False) + expected = df.copy() + expected["date"] = expected["date"].values.view("i8") + expected["foo"] = expected["foo"].astype("int64") + tm.assert_frame_equal(result, expected) - # series - ts = Series(Timestamp("20130101").as_unit("ns"), index=datetime_series.index) - json = ts.to_json() - result = read_json(json, typ="series") + # series + ts = Series( + Timestamp("20130101").as_unit("ns"), index=datetime_series.index + ) + json = ts.to_json() + result = read_json(json, typ="series") tm.assert_series_equal(result, ts) @pytest.mark.parametrize("date_format", ["epoch", "iso"]) diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index 24038e5c66585..c78d3bcfb013d 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -15,16 +15,19 @@ from pandas.io.json._json import JsonReader -def test_json_deprecation(): - # PR 53409 - expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) - warning_msg = ( +def generateDepMsg(): + return ( "Passing literal json to 'read_json' is deprecated and " "will be removed in a future version. To read from a " "literal string, wrap it in a 'StringIO' object." ) - with tm.assert_produces_warning(FutureWarning, match=warning_msg): + +def test_json_deprecation(): + # PR 53409 + expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) tm.assert_frame_equal(result, expected) @@ -37,7 +40,8 @@ def lines_json_df(): def test_read_jsonl(): # GH9180 - result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) tm.assert_frame_equal(result, expected) @@ -64,7 +68,12 @@ def test_read_datetime(request, engine): columns=["accounts", "date", "name"], ) json_line = df.to_json(lines=True, orient="records") - result = read_json(json_line, engine=engine) + + if engine == "pyarrow": + result = read_json(json_line, engine=engine) + else: + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(json_line, engine=engine) expected = DataFrame( [[1, "2020-03-05", "hector"], [2, "2020-04-08T09:58:49+00:00", "hector"]], columns=["accounts", "date", "name"], @@ -85,7 +94,8 @@ def test_read_jsonl_unicode_chars(): # simulate string json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' - result = read_json(json, lines=True) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(json, lines=True) expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"]) tm.assert_frame_equal(result, expected) @@ -101,14 +111,16 @@ def test_to_jsonl(): result = df.to_json(orient="records", lines=True) expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n' assert result == expected - tm.assert_frame_equal(read_json(result, lines=True), df) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + tm.assert_frame_equal(read_json(result, lines=True), df) # GH15096: escaped characters in columns and data df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"]) result = df.to_json(orient="records", lines=True) expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n' assert result == expected - tm.assert_frame_equal(read_json(result, lines=True), df) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + tm.assert_frame_equal(read_json(result, lines=True), df) def test_to_jsonl_count_new_lines(): @@ -270,7 +282,8 @@ def test_readjson_chunks_multiple_empty_lines(chunksize): {"A":3,"B":6} """ orig = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - test = read_json(j, lines=True, chunksize=chunksize) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + test = read_json(j, lines=True, chunksize=chunksize) if chunksize is not None: with test: test = pd.concat(test) @@ -304,7 +317,8 @@ def test_readjson_nrows(nrows, engine): {"a": 3, "b": 4} {"a": 5, "b": 6} {"a": 7, "b": 8}""" - result = read_json(jsonl, lines=True, nrows=nrows) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(jsonl, lines=True, nrows=nrows) expected = DataFrame({"a": [1, 3, 5, 7], "b": [2, 4, 6, 8]}).iloc[:nrows] tm.assert_frame_equal(result, expected) @@ -325,10 +339,18 @@ def test_readjson_nrows_chunks(request, nrows, chunksize, engine): {"a": 3, "b": 4} {"a": 5, "b": 6} {"a": 7, "b": 8}""" - with read_json( - jsonl, lines=True, nrows=nrows, chunksize=chunksize, engine=engine - ) as reader: - chunked = pd.concat(reader) + + if engine != "pyarrow": + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + with read_json( + jsonl, lines=True, nrows=nrows, chunksize=chunksize, engine=engine + ) as reader: + chunked = pd.concat(reader) + else: + with read_json( + jsonl, lines=True, nrows=nrows, chunksize=chunksize, engine=engine + ) as reader: + chunked = pd.concat(reader) expected = DataFrame({"a": [1, 3, 5, 7], "b": [2, 4, 6, 8]}).iloc[:nrows] tm.assert_frame_equal(chunked, expected) From 93c47ea33b601b13f4663ece391f4406581c10d1 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sat, 27 May 2023 12:15:28 -0400 Subject: [PATCH 06/28] Fixing unit tests --- pandas/tests/io/json/test_compression.py | 20 +- .../tests/io/json/test_deprecated_kwargs.py | 10 +- .../tests/io/json/test_json_table_schema.py | 23 ++- .../json/test_json_table_schema_ext_dtype.py | 14 +- pandas/tests/io/json/test_pandas.py | 171 ++++++++++-------- 5 files changed, 155 insertions(+), 83 deletions(-) diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py index 143d2431d4147..427eefe787652 100644 --- a/pandas/tests/io/json/test_compression.py +++ b/pandas/tests/io/json/test_compression.py @@ -9,6 +9,14 @@ from pandas.tests.io.test_compression import _compression_to_extension +def generateDepMsg(): + return ( + "Passing literal json to 'read_json' is deprecated and " + "will be removed in a future version. To read from a " + "literal string, wrap it in a 'StringIO' object." + ) + + def test_compression_roundtrip(compression): df = pd.DataFrame( [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], @@ -23,7 +31,8 @@ def test_compression_roundtrip(compression): # explicitly ensure file was compressed. with tm.decompress_file(path, compression) as fh: result = fh.read().decode("utf8") - tm.assert_frame_equal(df, pd.read_json(result)) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + tm.assert_frame_equal(df, pd.read_json(result)) def test_read_zipped_json(datapath): @@ -56,7 +65,8 @@ def test_with_s3_url(compression, s3_resource, s3so): def test_lines_with_compression(compression): with tm.ensure_clean() as path: - df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') df.to_json(path, orient="records", lines=True, compression=compression) roundtripped_df = pd.read_json(path, lines=True, compression=compression) tm.assert_frame_equal(df, roundtripped_df) @@ -64,7 +74,8 @@ def test_lines_with_compression(compression): def test_chunksize_with_compression(compression): with tm.ensure_clean() as path: - df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}') + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}') df.to_json(path, orient="records", lines=True, compression=compression) with pd.read_json( @@ -75,7 +86,8 @@ def test_chunksize_with_compression(compression): def test_write_unsupported_compression_type(): - df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') with tm.ensure_clean() as path: msg = "Unrecognized compression type: unsupported" with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/io/json/test_deprecated_kwargs.py b/pandas/tests/io/json/test_deprecated_kwargs.py index 7e3296db75323..29711d2c92769 100644 --- a/pandas/tests/io/json/test_deprecated_kwargs.py +++ b/pandas/tests/io/json/test_deprecated_kwargs.py @@ -8,9 +8,17 @@ from pandas.io.json import read_json +def generateDepMsg(): + return ( + "Passing literal json to 'read_json' is deprecated and " + "will be removed in a future version. To read from a " + "literal string, wrap it in a 'StringIO' object." + ) + + def test_good_kwargs(): df = pd.DataFrame({"A": [2, 4, 6], "B": [3, 6, 9]}, index=[0, 1, 2]) - with tm.assert_produces_warning(None): + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): tm.assert_frame_equal(df, read_json(df.to_json(orient="split"), orient="split")) tm.assert_frame_equal( df, read_json(df.to_json(orient="columns"), orient="columns") diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 48ab0f1be8c4a..8c36e22af02b8 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -24,6 +24,14 @@ ) +def generateDepMsg(): + return ( + "Passing literal json to 'read_json' is deprecated and " + "will be removed in a future version. To read from a " + "literal string, wrap it in a 'StringIO' object." + ) + + @pytest.fixture def df_schema(): return DataFrame( @@ -254,7 +262,8 @@ def test_read_json_from_to_json_results(self): "name_en": {"row_0": "Hakata Dolls Matsuo"}, } ) - result1 = pd.read_json(df.to_json()) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result1 = pd.read_json(df.to_json()) result2 = DataFrame.from_dict(json.loads(df.to_json())) tm.assert_frame_equal(result1, df) tm.assert_frame_equal(result2, df) @@ -795,7 +804,8 @@ def test_comprehensive(self): ) out = df.to_json(orient="table") - result = pd.read_json(out, orient="table") + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) @pytest.mark.parametrize( @@ -811,7 +821,8 @@ def test_multiindex(self, index_names): ) df.index.names = index_names out = df.to_json(orient="table") - result = pd.read_json(out, orient="table") + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) def test_empty_frame_roundtrip(self): @@ -819,7 +830,8 @@ def test_empty_frame_roundtrip(self): df = DataFrame(columns=["a", "b", "c"]) expected = df.copy() out = df.to_json(orient="table") - result = pd.read_json(out, orient="table") + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = pd.read_json(out, orient="table") tm.assert_frame_equal(expected, result) def test_read_json_orient_table_old_schema_version(self): @@ -841,5 +853,6 @@ def test_read_json_orient_table_old_schema_version(self): } """ expected = DataFrame({"a": [1, 2.0, "s"]}) - result = pd.read_json(df_json, orient="table") + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = pd.read_json(df_json, orient="table") tm.assert_frame_equal(expected, result) diff --git a/pandas/tests/io/json/test_json_table_schema_ext_dtype.py b/pandas/tests/io/json/test_json_table_schema_ext_dtype.py index 75845148f6581..27c145b35cf41 100644 --- a/pandas/tests/io/json/test_json_table_schema_ext_dtype.py +++ b/pandas/tests/io/json/test_json_table_schema_ext_dtype.py @@ -33,6 +33,14 @@ ) +def generateDepMsg(): + return ( + "Passing literal json to 'read_json' is deprecated and " + "will be removed in a future version. To read from a " + "literal string, wrap it in a 'StringIO' object." + ) + + class TestBuildSchema: def test_build_table_schema(self): df = DataFrame( @@ -287,7 +295,8 @@ def test_json_ext_dtype_reading_roundtrip(self): ) expected = df.copy() data_json = df.to_json(orient="table", indent=4) - result = read_json(data_json, orient="table") + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(data_json, orient="table") tm.assert_frame_equal(result, expected) def test_json_ext_dtype_reading(self): @@ -311,6 +320,7 @@ def test_json_ext_dtype_reading(self): } ] }""" - result = read_json(data_json, orient="table") + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(data_json, orient="table") expected = DataFrame({"a": Series([2, NA], dtype="Int64")}) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 4aa4721c44b0d..62461a0dbfc5e 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -783,10 +783,9 @@ def test_reconstruction_index(self): def test_path(self, float_frame, int_frame, datetime_frame): with tm.ensure_clean("test.json") as path: - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - for df in [float_frame, int_frame, datetime_frame]: - df.to_json(path) - read_json(path) + for df in [float_frame, int_frame, datetime_frame]: + df.to_json(path) + read_json(path) def test_axis_dates(self, datetime_series, datetime_frame): # frame @@ -872,7 +871,8 @@ def test_convert_dates_infer(self, infer_word): expected = DataFrame( [[1, Timestamp("2002-11-08")], [2, pd.NaT]], columns=["id", infer_word] ) - result = read_json(dumps(data))[["id", infer_word]] + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(dumps(data))[["id", infer_word]] tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -895,7 +895,8 @@ def test_date_format_frame(self, date, date_unit, datetime_frame): json = df.to_json(date_format="iso", date_unit=date_unit) else: json = df.to_json(date_format="iso") - result = read_json(json) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(json) expected = df.copy() tm.assert_frame_equal(result, expected) @@ -923,7 +924,8 @@ def test_date_format_series(self, date, date_unit, datetime_series): json = ts.to_json(date_format="iso", date_unit=date_unit) else: json = ts.to_json(date_format="iso") - result = read_json(json, typ="series") + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(json, typ="series") expected = ts.copy() tm.assert_series_equal(result, expected) @@ -945,12 +947,13 @@ def test_date_unit(self, unit, datetime_frame): json = df.to_json(date_format="epoch", date_unit=unit) # force date unit - result = read_json(json, date_unit=unit) - tm.assert_frame_equal(result, df) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(json, date_unit=unit) + tm.assert_frame_equal(result, df) - # detect date unit - result = read_json(json, date_unit=None) - tm.assert_frame_equal(result, df) + # detect date unit + result = read_json(json, date_unit=None) + tm.assert_frame_equal(result, df) def test_weird_nested_json(self): # this used to core dump the parser @@ -971,8 +974,8 @@ def test_weird_nested_json(self): ] } }""" - - read_json(s) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + read_json(s) def test_doc_example(self): dfj2 = DataFrame(np.random.randn(5, 2), columns=list("AB")) @@ -982,7 +985,8 @@ def test_doc_example(self): dfj2.index = pd.date_range("20130101", periods=5) json = dfj2.to_json() - result = read_json(json, dtype={"ints": np.int64, "bools": np.bool_}) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(json, dtype={"ints": np.int64, "bools": np.bool_}) tm.assert_frame_equal(result, result) def test_round_trip_exception(self, datapath): @@ -990,7 +994,8 @@ def test_round_trip_exception(self, datapath): path = datapath("io", "json", "data", "teams.csv") df = pd.read_csv(path) s = df.to_json() - result = read_json(s) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(s) res = result.reindex(index=df.index, columns=df.columns) res = res.fillna(np.nan, downcast=False) tm.assert_frame_equal(res, df) @@ -1019,17 +1024,18 @@ def test_timedelta(self): ser = Series([timedelta(23), timedelta(seconds=5)]) assert ser.dtype == "timedelta64[ns]" - result = read_json(ser.to_json(), typ="series").apply(converter) - tm.assert_series_equal(result, ser) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(ser.to_json(), typ="series").apply(converter) + tm.assert_series_equal(result, ser) - ser = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1])) - assert ser.dtype == "timedelta64[ns]" - result = read_json(ser.to_json(), typ="series").apply(converter) - tm.assert_series_equal(result, ser) + ser = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1])) + assert ser.dtype == "timedelta64[ns]" + result = read_json(ser.to_json(), typ="series").apply(converter) + tm.assert_series_equal(result, ser) - frame = DataFrame([timedelta(23), timedelta(seconds=5)]) - assert frame[0].dtype == "timedelta64[ns]" - tm.assert_frame_equal(frame, read_json(frame.to_json()).apply(converter)) + frame = DataFrame([timedelta(23), timedelta(seconds=5)]) + assert frame[0].dtype == "timedelta64[ns]" + tm.assert_frame_equal(frame, read_json(frame.to_json()).apply(converter)) def test_timedelta2(self): frame = DataFrame( @@ -1039,8 +1045,8 @@ def test_timedelta2(self): "c": pd.date_range(start="20130101", periods=2), } ) - - result = read_json(frame.to_json(date_unit="ns")) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(frame.to_json(date_unit="ns")) result["a"] = pd.to_timedelta(result.a, unit="ns") result["c"] = pd.to_datetime(result.c) tm.assert_frame_equal(frame, result) @@ -1053,7 +1059,8 @@ def test_mixed_timedelta_datetime(self): expected = DataFrame( {"a": [pd.Timedelta(td).as_unit("ns")._value, ts.as_unit("ns")._value]} ) - result = read_json(frame.to_json(date_unit="ns"), dtype={"a": "int64"}) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(frame.to_json(date_unit="ns"), dtype={"a": "int64"}) tm.assert_frame_equal(result, expected, check_index_type=False) @pytest.mark.parametrize("as_object", [True, False]) @@ -1083,7 +1090,8 @@ def test_default_handler(self): value = object() frame = DataFrame({"a": [7, value]}) expected = DataFrame({"a": [7, str(value)]}) - result = read_json(frame.to_json(default_handler=str)) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(frame.to_json(default_handler=str)) tm.assert_frame_equal(expected, result, check_index_type=False) def test_default_handler_indirect(self): @@ -1255,7 +1263,8 @@ def test_tz_range_is_naive(self): def test_read_inline_jsonl(self): # GH9180 - result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) tm.assert_frame_equal(result, expected) @@ -1283,18 +1292,23 @@ def test_read_jsonl_unicode_chars(self): # GH15132: non-ascii unicode characters # \u201d == RIGHT DOUBLE QUOTATION MARK - # simulate file handle - json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' - json = StringIO(json) - result = read_json(json, lines=True) - expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"]) - tm.assert_frame_equal(result, expected) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + # simulate file handle + json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' + json = StringIO(json) + result = read_json(json, lines=True) + expected = DataFrame( + [["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"] + ) + tm.assert_frame_equal(result, expected) - # simulate string - json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' - result = read_json(json, lines=True) - expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"]) - tm.assert_frame_equal(result, expected) + # simulate string + json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' + result = read_json(json, lines=True) + expected = DataFrame( + [["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"] + ) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("bigNum", [sys.maxsize + 1, -(sys.maxsize + 2)]) def test_to_json_large_numbers(self, bigNum): @@ -1346,14 +1360,16 @@ def test_to_jsonl(self): result = df.to_json(orient="records", lines=True) expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n' assert result == expected - tm.assert_frame_equal(read_json(result, lines=True), df) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + tm.assert_frame_equal(read_json(result, lines=True), df) # GH15096: escaped characters in columns and data df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"]) result = df.to_json(orient="records", lines=True) expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n' assert result == expected - tm.assert_frame_equal(read_json(result, lines=True), df) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + tm.assert_frame_equal(read_json(result, lines=True), df) # TODO: there is a near-identical test for pytables; can we share? @pytest.mark.xfail(reason="GH#13774 encoding kwarg not supported", raises=TypeError) @@ -1384,7 +1400,8 @@ def test_latin_encoding(self): def roundtrip(s, encoding="latin-1"): with tm.ensure_clean("test.json") as path: s.to_json(path, encoding=encoding) - retr = read_json(path, encoding=encoding) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + retr = read_json(path, encoding=encoding) tm.assert_series_equal(s, retr, check_categorical=False) for s in examples: @@ -1408,14 +1425,16 @@ def test_from_json_to_json_table_index_and_columns(self, index, columns): # GH25433 GH25435 expected = DataFrame([[1, 2], [3, 4]], index=index, columns=columns) dfjson = expected.to_json(orient="table") - result = read_json(dfjson, orient="table") + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(dfjson, orient="table") tm.assert_frame_equal(result, expected) def test_from_json_to_json_table_dtypes(self): # GH21345 expected = DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]}) dfjson = expected.to_json(orient="table") - result = read_json(dfjson, orient="table") + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(dfjson, orient="table") tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("orient", ["split", "records", "index", "columns"]) @@ -1434,18 +1453,19 @@ def test_to_json_from_json_columns_dtypes(self, orient): } ) dfjson = expected.to_json(orient=orient) - result = read_json( - dfjson, - orient=orient, - dtype={ - "Integer": "int64", - "Float": "float64", - "Object": "object", - "Bool": "bool", - "Category": "category", - "Datetime": "datetime64[ns]", - }, - ) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json( + dfjson, + orient=orient, + dtype={ + "Integer": "int64", + "Float": "float64", + "Object": "object", + "Bool": "bool", + "Category": "category", + "Datetime": "datetime64[ns]", + }, + ) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("dtype", [True, {"b": int, "c": int}]) @@ -1462,7 +1482,8 @@ def test_read_json_table_empty_axes_dtype(self, orient): # GH28558 expected = DataFrame() - result = read_json("{}", orient=orient, convert_axes=True) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json("{}", orient=orient, convert_axes=True) tm.assert_index_equal(result.index, expected.index) tm.assert_index_equal(result.columns, expected.columns) @@ -1576,14 +1597,16 @@ def test_index_false_from_json_to_json(self, orient, index): # Test index=False in from_json to_json expected = DataFrame({"a": [1, 2], "b": [3, 4]}) dfjson = expected.to_json(orient=orient, index=index) - result = read_json(dfjson, orient=orient) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(dfjson, orient=orient) tm.assert_frame_equal(result, expected) def test_read_timezone_information(self): # GH 25546 - result = read_json( - '{"2019-01-01T11:00:00.000Z":88}', typ="series", orient="index" - ) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json( + '{"2019-01-01T11:00:00.000Z":88}', typ="series", orient="index" + ) expected = Series([88], index=DatetimeIndex(["2019-01-01 11:00:00"], tz="UTC")) tm.assert_series_equal(result, expected) @@ -1598,7 +1621,8 @@ def test_read_timezone_information(self): ) def test_read_json_with_url_value(self, url): # GH 36271 - result = read_json(f'{{"url":{{"0":"{url}"}}}}') + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(f'{{"url":{{"0":"{url}"}}}}') expected = DataFrame({"url": [url]}) tm.assert_frame_equal(result, expected) @@ -1804,7 +1828,8 @@ def test_json_negative_indent_raises(self): def test_emca_262_nan_inf_support(self): # GH 12213 data = '["a", NaN, "NaN", Infinity, "Infinity", -Infinity, "-Infinity"]' - result = read_json(data) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(data) expected = DataFrame( ["a", None, "NaN", np.inf, "Infinity", -np.inf, "-Infinity"] ) @@ -1814,7 +1839,8 @@ def test_frame_int_overflow(self): # GH 30320 encoded_json = json.dumps([{"col": "31900441201190696999"}, {"col": "Text"}]) expected = DataFrame({"col": ["31900441201190696999", "Text"]}) - result = read_json(encoded_json) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(encoded_json) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -1859,7 +1885,8 @@ def test_json_pandas_nulls(self, nulls_fixture, request): def test_readjson_bool_series(self): # GH31464 - result = read_json("[true, true, false]", typ="series") + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json("[true, true, false]", typ="series") expected = Series([True, True, False]) tm.assert_series_equal(result, expected) @@ -1975,7 +2002,8 @@ def test_read_json_dtype_backend(self, string_storage, dtype_backend, orient): out = df.to_json(orient=orient) with pd.option_context("mode.string_storage", string_storage): - result = read_json(out, dtype_backend=dtype_backend, orient=orient) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json(out, dtype_backend=dtype_backend, orient=orient) expected = DataFrame( { @@ -2013,9 +2041,10 @@ def test_read_json_nullable_series(self, string_storage, dtype_backend, orient): out = ser.to_json(orient=orient) with pd.option_context("mode.string_storage", string_storage): - result = read_json( - out, dtype_backend=dtype_backend, orient=orient, typ="series" - ) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + result = read_json( + out, dtype_backend=dtype_backend, orient=orient, typ="series" + ) expected = Series([1, np.nan, 3], dtype="Int64") From f45c0f230c16dffe6b4adcb50c6a6a2dcbef65c3 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sat, 27 May 2023 14:00:27 -0400 Subject: [PATCH 07/28] Fixing unit tests --- pandas/tests/io/json/test_compression.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py index 427eefe787652..30cf9d147c78f 100644 --- a/pandas/tests/io/json/test_compression.py +++ b/pandas/tests/io/json/test_compression.py @@ -57,9 +57,10 @@ def test_with_s3_url(compression, s3_resource, s3so): with open(path, "rb") as f: s3_resource.Bucket("pandas-test").put_object(Key="test-1", Body=f) - roundtripped_df = pd.read_json( - "s3://pandas-test/test-1", compression=compression, storage_options=s3so - ) + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + roundtripped_df = pd.read_json( + "s3://pandas-test/test-1", compression=compression, storage_options=s3so + ) tm.assert_frame_equal(df, roundtripped_df) From d62aa6dd34fa201efabf44fb79c08bac4a417bc3 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sat, 27 May 2023 16:01:52 -0400 Subject: [PATCH 08/28] Fixing unit tests --- pandas/tests/io/json/test_compression.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py index 30cf9d147c78f..4f7506c827c5a 100644 --- a/pandas/tests/io/json/test_compression.py +++ b/pandas/tests/io/json/test_compression.py @@ -49,8 +49,8 @@ def test_read_zipped_json(datapath): @pytest.mark.single_cpu def test_with_s3_url(compression, s3_resource, s3so): # Bucket "pandas-test" created in tests/io/conftest.py - - df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') with tm.ensure_clean() as path: df.to_json(path, compression=compression) From 5938434d7c253398e9e8404ba9dcfe24556dc255 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sat, 27 May 2023 17:20:38 -0400 Subject: [PATCH 09/28] Fixing unit tests --- pandas/tests/io/json/test_compression.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py index 4f7506c827c5a..52e1ffbca8264 100644 --- a/pandas/tests/io/json/test_compression.py +++ b/pandas/tests/io/json/test_compression.py @@ -57,10 +57,9 @@ def test_with_s3_url(compression, s3_resource, s3so): with open(path, "rb") as f: s3_resource.Bucket("pandas-test").put_object(Key="test-1", Body=f) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - roundtripped_df = pd.read_json( - "s3://pandas-test/test-1", compression=compression, storage_options=s3so - ) + roundtripped_df = pd.read_json( + "s3://pandas-test/test-1", compression=compression, storage_options=s3so + ) tm.assert_frame_equal(df, roundtripped_df) From e5e3b09c342d3162726dddec2a7a15d8d686e527 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Tue, 30 May 2023 14:44:40 -0400 Subject: [PATCH 10/28] Fixing documentation errors in PR feedback --- doc/source/user_guide/io.rst | 3 --- doc/source/whatsnew/v2.1.0.rst | 3 +-- pandas/io/json/_json.py | 5 +++++ 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 9bd5ac8c088b0..6fde9f413c0c4 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -2072,9 +2072,6 @@ is ``None``. To explicitly force ``Series`` parsing, pass ``typ=series`` * ``engine``: Either ``"ujson"``, the built-in JSON parser, or ``"pyarrow"`` which dispatches to pyarrow's ``pyarrow.json.read_json``. The ``"pyarrow"`` is only available when ``lines=True`` -.. warning:: - - Passing json literal strings will be deprecated in a future release of pandas. The parser will raise one of ``ValueError/TypeError/AssertionError`` if the JSON is not parseable. diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 820786c8d77c6..78444a5b70777 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -268,11 +268,10 @@ Deprecations - Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`) - Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`) - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) -- Deprecated literal json input to :func:`read_json`. Moving forward the method only accepts file-like objects (:issue:`53409`) +- Deprecated literal json input to :func:`read_json` (:issue:`53409`) - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) - - .. --------------------------------------------------------------------------- .. _whatsnew_210.performance: diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 10853d1ef06a8..c3a9a599e439c 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -537,6 +537,11 @@ def read_json( By file-like object, we refer to objects with a ``read()`` method, such as a file handle (e.g. via builtin ``open`` function) or ``StringIO``. + + .. deprecated:: 2.1.0 + Passing json literal strings will be deprecated in a future release of + pandas. + orient : str, optional Indication of expected JSON string format. Compatible JSON strings can be produced by ``to_json()`` with a From 097b3f2390cc84e573acef595f65f3e4cd938c90 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Tue, 30 May 2023 16:06:50 -0400 Subject: [PATCH 11/28] Fixing documentation errors in PR feedback --- doc/source/user_guide/io.rst | 1 - pandas/io/json/_json.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 6fde9f413c0c4..90a8bd868b60b 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -2072,7 +2072,6 @@ is ``None``. To explicitly force ``Series`` parsing, pass ``typ=series`` * ``engine``: Either ``"ujson"``, the built-in JSON parser, or ``"pyarrow"`` which dispatches to pyarrow's ``pyarrow.json.read_json``. The ``"pyarrow"`` is only available when ``lines=True`` - The parser will raise one of ``ValueError/TypeError/AssertionError`` if the JSON is not parseable. If a non-default ``orient`` was used when encoding to JSON be sure to pass the same diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index c3a9a599e439c..3eb9b12fad181 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -539,8 +539,7 @@ def read_json( or ``StringIO``. .. deprecated:: 2.1.0 - Passing json literal strings will be deprecated in a future release of - pandas. + Passing json literal strings is deprecated. orient : str, optional Indication of expected JSON string format. From c9a1a1a16b7d19277af19f978b42bfa5989d9bd7 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Tue, 30 May 2023 20:02:55 -0400 Subject: [PATCH 12/28] Updating unit tests to use StringIO rather than catch FutureWarning --- pandas/tests/io/json/test_compression.py | 29 ++++++----------- .../tests/io/json/test_deprecated_kwargs.py | 22 +++++-------- .../tests/io/json/test_json_table_schema.py | 31 ++++++------------- .../json/test_json_table_schema_ext_dtype.py | 15 ++------- 4 files changed, 31 insertions(+), 66 deletions(-) diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py index 52e1ffbca8264..18a815a58ee3c 100644 --- a/pandas/tests/io/json/test_compression.py +++ b/pandas/tests/io/json/test_compression.py @@ -1,4 +1,7 @@ -from io import BytesIO +from io import ( + BytesIO, + StringIO, +) import pytest @@ -9,14 +12,6 @@ from pandas.tests.io.test_compression import _compression_to_extension -def generateDepMsg(): - return ( - "Passing literal json to 'read_json' is deprecated and " - "will be removed in a future version. To read from a " - "literal string, wrap it in a 'StringIO' object." - ) - - def test_compression_roundtrip(compression): df = pd.DataFrame( [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], @@ -31,8 +26,8 @@ def test_compression_roundtrip(compression): # explicitly ensure file was compressed. with tm.decompress_file(path, compression) as fh: result = fh.read().decode("utf8") - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - tm.assert_frame_equal(df, pd.read_json(result)) + data = StringIO(result) + tm.assert_frame_equal(df, pd.read_json(data)) def test_read_zipped_json(datapath): @@ -49,8 +44,7 @@ def test_read_zipped_json(datapath): @pytest.mark.single_cpu def test_with_s3_url(compression, s3_resource, s3so): # Bucket "pandas-test" created in tests/io/conftest.py - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}')) with tm.ensure_clean() as path: df.to_json(path, compression=compression) @@ -65,8 +59,7 @@ def test_with_s3_url(compression, s3_resource, s3so): def test_lines_with_compression(compression): with tm.ensure_clean() as path: - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}')) df.to_json(path, orient="records", lines=True, compression=compression) roundtripped_df = pd.read_json(path, lines=True, compression=compression) tm.assert_frame_equal(df, roundtripped_df) @@ -74,8 +67,7 @@ def test_lines_with_compression(compression): def test_chunksize_with_compression(compression): with tm.ensure_clean() as path: - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}') + df = pd.read_json(StringIO('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}')) df.to_json(path, orient="records", lines=True, compression=compression) with pd.read_json( @@ -86,8 +78,7 @@ def test_chunksize_with_compression(compression): def test_write_unsupported_compression_type(): - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}')) with tm.ensure_clean() as path: msg = "Unrecognized compression type: unsupported" with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/io/json/test_deprecated_kwargs.py b/pandas/tests/io/json/test_deprecated_kwargs.py index 29711d2c92769..89bcd27446a09 100644 --- a/pandas/tests/io/json/test_deprecated_kwargs.py +++ b/pandas/tests/io/json/test_deprecated_kwargs.py @@ -1,6 +1,7 @@ """ Tests for the deprecated keyword arguments for `read_json`. """ +from io import StringIO import pandas as pd import pandas._testing as tm @@ -8,19 +9,12 @@ from pandas.io.json import read_json -def generateDepMsg(): - return ( - "Passing literal json to 'read_json' is deprecated and " - "will be removed in a future version. To read from a " - "literal string, wrap it in a 'StringIO' object." - ) - - def test_good_kwargs(): df = pd.DataFrame({"A": [2, 4, 6], "B": [3, 6, 9]}, index=[0, 1, 2]) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - tm.assert_frame_equal(df, read_json(df.to_json(orient="split"), orient="split")) - tm.assert_frame_equal( - df, read_json(df.to_json(orient="columns"), orient="columns") - ) - tm.assert_frame_equal(df, read_json(df.to_json(orient="index"), orient="index")) + + data1 = StringIO(df.to_json(orient="split")) + tm.assert_frame_equal(df, read_json(data1, orient="split")) + data2 = StringIO(df.to_json(orient="columns")) + tm.assert_frame_equal(df, read_json(data2, orient="columns")) + data3 = StringIO(df.to_json(orient="index")) + tm.assert_frame_equal(df, read_json(data3, orient="index")) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 8c36e22af02b8..25b0e4a9f1de9 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -1,5 +1,6 @@ """Tests for Table Schema integration.""" from collections import OrderedDict +from io import StringIO import json import numpy as np @@ -24,14 +25,6 @@ ) -def generateDepMsg(): - return ( - "Passing literal json to 'read_json' is deprecated and " - "will be removed in a future version. To read from a " - "literal string, wrap it in a 'StringIO' object." - ) - - @pytest.fixture def df_schema(): return DataFrame( @@ -262,8 +255,8 @@ def test_read_json_from_to_json_results(self): "name_en": {"row_0": "Hakata Dolls Matsuo"}, } ) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result1 = pd.read_json(df.to_json()) + + result1 = pd.read_json(StringIO(df.to_json())) result2 = DataFrame.from_dict(json.loads(df.to_json())) tm.assert_frame_equal(result1, df) tm.assert_frame_equal(result2, df) @@ -803,9 +796,8 @@ def test_comprehensive(self): index=pd.Index(range(4), name="idx"), ) - out = df.to_json(orient="table") - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = pd.read_json(out, orient="table") + out = StringIO(df.to_json(orient="table")) + result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) @pytest.mark.parametrize( @@ -820,18 +812,16 @@ def test_multiindex(self, index_names): columns=["Aussprache", "Griechisch", "Args"], ) df.index.names = index_names - out = df.to_json(orient="table") - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = pd.read_json(out, orient="table") + out = StringIO(df.to_json(orient="table")) + result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) def test_empty_frame_roundtrip(self): # GH 21287 df = DataFrame(columns=["a", "b", "c"]) expected = df.copy() - out = df.to_json(orient="table") - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = pd.read_json(out, orient="table") + out = StringIO(df.to_json(orient="table")) + result = pd.read_json(out, orient="table") tm.assert_frame_equal(expected, result) def test_read_json_orient_table_old_schema_version(self): @@ -853,6 +843,5 @@ def test_read_json_orient_table_old_schema_version(self): } """ expected = DataFrame({"a": [1, 2.0, "s"]}) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = pd.read_json(df_json, orient="table") + result = pd.read_json(StringIO(df_json), orient="table") tm.assert_frame_equal(expected, result) diff --git a/pandas/tests/io/json/test_json_table_schema_ext_dtype.py b/pandas/tests/io/json/test_json_table_schema_ext_dtype.py index 27c145b35cf41..b7bb057bc538e 100644 --- a/pandas/tests/io/json/test_json_table_schema_ext_dtype.py +++ b/pandas/tests/io/json/test_json_table_schema_ext_dtype.py @@ -3,6 +3,7 @@ from collections import OrderedDict import datetime as dt import decimal +from io import StringIO import json import pytest @@ -33,14 +34,6 @@ ) -def generateDepMsg(): - return ( - "Passing literal json to 'read_json' is deprecated and " - "will be removed in a future version. To read from a " - "literal string, wrap it in a 'StringIO' object." - ) - - class TestBuildSchema: def test_build_table_schema(self): df = DataFrame( @@ -295,8 +288,7 @@ def test_json_ext_dtype_reading_roundtrip(self): ) expected = df.copy() data_json = df.to_json(orient="table", indent=4) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(data_json, orient="table") + result = read_json(StringIO(data_json), orient="table") tm.assert_frame_equal(result, expected) def test_json_ext_dtype_reading(self): @@ -320,7 +312,6 @@ def test_json_ext_dtype_reading(self): } ] }""" - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(data_json, orient="table") + result = read_json(StringIO(data_json), orient="table") expected = DataFrame({"a": Series([2, NA], dtype="Int64")}) tm.assert_frame_equal(result, expected) From df64bf1a32103b034c79568ccba50b43912ebe36 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Tue, 30 May 2023 21:09:00 -0400 Subject: [PATCH 13/28] Finishing updating unit tests to use StringIO rather than catch FutureWarning --- pandas/tests/io/json/test_pandas.py | 440 +++++++++++-------------- pandas/tests/io/json/test_readlines.py | 43 +-- 2 files changed, 205 insertions(+), 278 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 62461a0dbfc5e..773caaf934137 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -29,14 +29,6 @@ ) -def generateDepMsg(): - return ( - "Passing literal json to 'read_json' is deprecated and " - "will be removed in a future version. To read from a " - "literal string, wrap it in a 'StringIO' object." - ) - - def assert_json_roundtrip_equal(result, expected, orient): if orient in ("records", "values"): expected = expected.reset_index(drop=True) @@ -82,17 +74,16 @@ def test_frame_double_encoded_labels(self, orient): columns=["a \\ b", "y / z"], ) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(df.to_json(orient=orient), orient=orient) + data = StringIO(df.to_json(orient=orient)) + result = read_json(data, orient=orient) expected = df.copy() - assert_json_roundtrip_equal(result, expected, orient) @pytest.mark.parametrize("orient", ["split", "records", "values"]) def test_frame_non_unique_index(self, orient): df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"]) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(df.to_json(orient=orient), orient=orient) + data = StringIO(df.to_json(orient=orient)) + result = read_json(data, orient=orient) expected = df.copy() assert_json_roundtrip_equal(result, expected, orient) @@ -117,10 +108,9 @@ def test_frame_non_unique_index_raises(self, orient): def test_frame_non_unique_columns(self, orient, data): df = DataFrame(data, index=[1, 2], columns=["x", "x"]) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json( - df.to_json(orient=orient), orient=orient, convert_dates=["x"] - ) + result = read_json( + StringIO(df.to_json(orient=orient)), orient=orient, convert_dates=["x"] + ) if orient == "values": expected = DataFrame(data) if expected.iloc[:, 0].dtype == "datetime64[ns]": @@ -149,11 +139,8 @@ def test_frame_default_orient(self, float_frame): @pytest.mark.parametrize("dtype", [False, float]) @pytest.mark.parametrize("convert_axes", [True, False]) def test_roundtrip_simple(self, orient, convert_axes, dtype, float_frame): - data = float_frame.to_json(orient=orient) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json( - data, orient=orient, convert_axes=convert_axes, dtype=dtype - ) + data = StringIO(float_frame.to_json(orient=orient)) + result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype) expected = float_frame @@ -162,11 +149,8 @@ def test_roundtrip_simple(self, orient, convert_axes, dtype, float_frame): @pytest.mark.parametrize("dtype", [False, np.int64]) @pytest.mark.parametrize("convert_axes", [True, False]) def test_roundtrip_intframe(self, orient, convert_axes, dtype, int_frame): - data = int_frame.to_json(orient=orient) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json( - data, orient=orient, convert_axes=convert_axes, dtype=dtype - ) + data = StringIO(int_frame.to_json(orient=orient)) + result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype) expected = int_frame assert_json_roundtrip_equal(result, expected, orient) @@ -180,11 +164,8 @@ def test_roundtrip_str_axes(self, orient, convert_axes, dtype): dtype=dtype, ) - data = df.to_json(orient=orient) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json( - data, orient=orient, convert_axes=convert_axes, dtype=dtype - ) + data = StringIO(df.to_json(orient=orient)) + result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype) expected = df.copy() if not dtype: @@ -217,9 +198,8 @@ def test_roundtrip_categorical( ) ) - data = categorical_frame.to_json(orient=orient) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(data, orient=orient, convert_axes=convert_axes) + data = StringIO(categorical_frame.to_json(orient=orient)) + result = read_json(data, orient=orient, convert_axes=convert_axes) expected = categorical_frame.copy() expected.index = expected.index.astype(str) # Categorical not preserved @@ -229,9 +209,8 @@ def test_roundtrip_categorical( @pytest.mark.parametrize("convert_axes", [True, False]) def test_roundtrip_empty(self, orient, convert_axes): empty_frame = DataFrame() - data = empty_frame.to_json(orient=orient) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(data, orient=orient, convert_axes=convert_axes) + data = StringIO(empty_frame.to_json(orient=orient)) + result = read_json(data, orient=orient, convert_axes=convert_axes) if orient == "split": idx = pd.Index([], dtype=(float if convert_axes else object)) expected = DataFrame(index=idx, columns=idx) @@ -245,9 +224,8 @@ def test_roundtrip_empty(self, orient, convert_axes): @pytest.mark.parametrize("convert_axes", [True, False]) def test_roundtrip_timestamp(self, orient, convert_axes, datetime_frame): # TODO: improve coverage with date_format parameter - data = datetime_frame.to_json(orient=orient) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(data, orient=orient, convert_axes=convert_axes) + data = StringIO(datetime_frame.to_json(orient=orient)) + result = read_json(data, orient=orient, convert_axes=convert_axes) expected = datetime_frame.copy() if not convert_axes: # one off for ts handling @@ -272,9 +250,8 @@ def test_roundtrip_mixed(self, orient, convert_axes): df = DataFrame(data=values, index=index) - data = df.to_json(orient=orient) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(data, orient=orient, convert_axes=convert_axes) + data = StringIO(df.to_json(orient=orient)) + result = read_json(data, orient=orient, convert_axes=convert_axes) expected = df.copy() expected = expected.assign(**expected.select_dtypes("number").astype(np.int64)) @@ -298,10 +275,8 @@ def test_roundtrip_multiindex(self, columns): [[1, 2], [3, 4]], columns=pd.MultiIndex.from_arrays(columns), ) - - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(df.to_json(orient="split"), orient="split") - + data = StringIO(df.to_json(orient="split")) + result = read_json(data, orient="split") tm.assert_frame_equal(result, df) @pytest.mark.parametrize( @@ -346,31 +321,29 @@ def test_frame_from_json_bad_data_raises(self, data, msg, orient): @pytest.mark.parametrize("convert_axes", [True, False]) def test_frame_from_json_missing_data(self, orient, convert_axes, dtype): num_df = DataFrame([[1, 2], [4, 5, 6]]) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json( - num_df.to_json(orient=orient), - orient=orient, - convert_axes=convert_axes, - dtype=dtype, - ) + + result = read_json( + StringIO(num_df.to_json(orient=orient)), + orient=orient, + convert_axes=convert_axes, + dtype=dtype, + ) assert np.isnan(result.iloc[0, 2]) obj_df = DataFrame([["1", "2"], ["4", "5", "6"]]) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json( - obj_df.to_json(orient=orient), - orient=orient, - convert_axes=convert_axes, - dtype=dtype, - ) + result = read_json( + StringIO(obj_df.to_json(orient=orient)), + orient=orient, + convert_axes=convert_axes, + dtype=dtype, + ) assert np.isnan(result.iloc[0, 2]) @pytest.mark.parametrize("dtype", [True, False]) def test_frame_read_json_dtype_missing_value(self, dtype): # GH28501 Parse missing values using read_json with dtype=False # to NaN instead of None - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json("[null]", dtype=dtype) + result = read_json(StringIO("[null]"), dtype=dtype) expected = DataFrame([np.nan]) tm.assert_frame_equal(result, expected) @@ -383,8 +356,8 @@ def test_frame_infinity(self, inf, dtype): df = DataFrame([[1, 2], [4, 5, 6]]) df.loc[0, 2] = inf - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(df.to_json(), dtype=dtype) + data = StringIO(df.to_json()) + result = read_json(data, dtype=dtype) assert np.isnan(result.iloc[0, 2]) @pytest.mark.skipif(not IS64, reason="not compliant on 32-bit, xref #15865") @@ -414,12 +387,12 @@ def test_frame_empty(self): df = DataFrame(columns=["jim", "joe"]) assert not df._is_mixed_type - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - tm.assert_frame_equal( - read_json(df.to_json(), dtype=dict(df.dtypes)), - df, - check_index_type=False, - ) + data = StringIO(df.to_json()) + tm.assert_frame_equal( + read_json(data, dtype=dict(df.dtypes)), + df, + check_index_type=False, + ) # GH 7445 result = DataFrame({"test": []}, index=[]).to_json(orient="columns") expected = '{"test":{}}' @@ -430,12 +403,12 @@ def test_frame_empty_mixedtype(self): df = DataFrame(columns=["jim", "joe"]) df["joe"] = df["joe"].astype("i8") assert df._is_mixed_type - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - tm.assert_frame_equal( - read_json(df.to_json(), dtype=dict(df.dtypes)), - df, - check_index_type=False, - ) + data = df.to_json() + tm.assert_frame_equal( + read_json(StringIO(data), dtype=dict(df.dtypes)), + df, + check_index_type=False, + ) def test_frame_mixedtype_orient(self): # GH10289 vals = [ @@ -453,21 +426,18 @@ def test_frame_mixedtype_orient(self): # GH10289 right = df.copy() for orient in ["split", "index", "columns"]: - inp = df.to_json(orient=orient) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - left = read_json(inp, orient=orient, convert_axes=False) + inp = StringIO(df.to_json(orient=orient)) + left = read_json(inp, orient=orient, convert_axes=False) tm.assert_frame_equal(left, right) right.index = pd.RangeIndex(len(df)) - inp = df.to_json(orient="records") - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - left = read_json(inp, orient="records", convert_axes=False) + inp = StringIO(df.to_json(orient="records")) + left = read_json(inp, orient="records", convert_axes=False) tm.assert_frame_equal(left, right) right.columns = pd.RangeIndex(df.shape[1]) - inp = df.to_json(orient="values") - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - left = read_json(inp, orient="values", convert_axes=False) + inp = StringIO(df.to_json(orient="values")) + left = read_json(inp, orient="values", convert_axes=False) tm.assert_frame_equal(left, right) def test_v12_compat(self, datapath): @@ -585,8 +555,8 @@ def test_blocks_compat_GH9037(self): # JSON deserialisation always creates unicode strings df_mixed.columns = df_mixed.columns.astype("unicode") - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - df_roundtrip = read_json(df_mixed.to_json(orient="split"), orient="split") + data = StringIO(df_mixed.to_json(orient="split")) + df_roundtrip = read_json(data, orient="split") tm.assert_frame_equal( df_mixed, df_roundtrip, @@ -647,22 +617,23 @@ def test_series_non_unique_index(self): with pytest.raises(ValueError, match=msg): s.to_json(orient="index") - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - tm.assert_series_equal( - s, read_json(s.to_json(orient="split"), orient="split", typ="series") - ) - unserialized = read_json( - s.to_json(orient="records"), orient="records", typ="series" - ) + tm.assert_series_equal( + s, + read_json( + StringIO(s.to_json(orient="split")), orient="split", typ="series" + ), + ) + unserialized = read_json( + StringIO(s.to_json(orient="records")), orient="records", typ="series" + ) tm.assert_numpy_array_equal(s.values, unserialized.values) def test_series_default_orient(self, string_series): assert string_series.to_json() == string_series.to_json(orient="index") def test_series_roundtrip_simple(self, orient, string_series): - data = string_series.to_json(orient=orient) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(data, typ="series", orient=orient) + data = StringIO(string_series.to_json(orient=orient)) + result = read_json(data, typ="series", orient=orient) expected = string_series if orient in ("values", "records"): @@ -674,9 +645,8 @@ def test_series_roundtrip_simple(self, orient, string_series): @pytest.mark.parametrize("dtype", [False, None]) def test_series_roundtrip_object(self, orient, dtype, object_series): - data = object_series.to_json(orient=orient) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(data, typ="series", orient=orient, dtype=dtype) + data = StringIO(object_series.to_json(orient=orient)) + result = read_json(data, typ="series", orient=orient, dtype=dtype) expected = object_series if orient in ("values", "records"): @@ -688,9 +658,8 @@ def test_series_roundtrip_object(self, orient, dtype, object_series): def test_series_roundtrip_empty(self, orient): empty_series = Series([], index=[], dtype=np.float64) - data = empty_series.to_json(orient=orient) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(data, typ="series", orient=orient) + data = StringIO(empty_series.to_json(orient=orient)) + result = read_json(data, typ="series", orient=orient) expected = empty_series.reset_index(drop=True) if orient in ("split"): @@ -699,9 +668,8 @@ def test_series_roundtrip_empty(self, orient): tm.assert_series_equal(result, expected) def test_series_roundtrip_timeseries(self, orient, datetime_series): - data = datetime_series.to_json(orient=orient) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(data, typ="series", orient=orient) + data = StringIO(datetime_series.to_json(orient=orient)) + result = read_json(data, typ="series", orient=orient) expected = datetime_series if orient in ("values", "records"): @@ -714,9 +682,8 @@ def test_series_roundtrip_timeseries(self, orient, datetime_series): @pytest.mark.parametrize("dtype", [np.float64, int]) def test_series_roundtrip_numeric(self, orient, dtype): s = Series(range(6), index=["a", "b", "c", "d", "e", "f"]) - data = s.to_json(orient=orient) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(data, typ="series", orient=orient) + data = StringIO(s.to_json(orient=orient)) + result = read_json(data, typ="series", orient=orient) expected = s.copy() if orient in ("values", "records"): @@ -732,15 +699,13 @@ def test_series_to_json_except(self): def test_series_from_json_precise_float(self): s = Series([4.56, 4.56, 4.56]) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(s.to_json(), typ="series", precise_float=True) + result = read_json(StringIO(s.to_json()), typ="series", precise_float=True) tm.assert_series_equal(result, s, check_index_type=False) def test_series_with_dtype(self): # GH 21986 s = Series([4.56, 4.56, 4.56]) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(s.to_json(), typ="series", dtype=np.int64) + result = read_json(StringIO(s.to_json()), typ="series", dtype=np.int64) expected = Series([4] * 3) tm.assert_series_equal(result, expected) @@ -753,32 +718,27 @@ def test_series_with_dtype(self): ) def test_series_with_dtype_datetime(self, dtype, expected): s = Series(["2000-01-01"], dtype="datetime64[ns]") - data = s.to_json() - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(data, typ="series", dtype=dtype) + data = StringIO(s.to_json()) + result = read_json(data, typ="series", dtype=dtype) tm.assert_series_equal(result, expected) def test_frame_from_json_precise_float(self): df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]]) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(df.to_json(), precise_float=True) + result = read_json(StringIO(df.to_json()), precise_float=True) tm.assert_frame_equal(result, df) def test_typ(self): s = Series(range(6), index=["a", "b", "c", "d", "e", "f"], dtype="int64") - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(s.to_json(), typ=None) + result = read_json(StringIO(s.to_json()), typ=None) tm.assert_series_equal(result, s) def test_reconstruction_index(self): df = DataFrame([[1, 2, 3], [4, 5, 6]]) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(df.to_json()) - - tm.assert_frame_equal(result, df) + result = read_json(StringIO(df.to_json())) + tm.assert_frame_equal(result, df) - df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["A", "B", "C"]) - result = read_json(df.to_json()) + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["A", "B", "C"]) + result = read_json(StringIO(df.to_json())) tm.assert_frame_equal(result, df) def test_path(self, float_frame, int_frame, datetime_frame): @@ -789,15 +749,13 @@ def test_path(self, float_frame, int_frame, datetime_frame): def test_axis_dates(self, datetime_series, datetime_frame): # frame - json = datetime_frame.to_json() - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(json) + json = StringIO(datetime_frame.to_json()) + result = read_json(json) tm.assert_frame_equal(result, datetime_frame) # series - json = datetime_series.to_json() - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(json, typ="series") + json = StringIO(datetime_series.to_json()) + result = read_json(json, typ="series") tm.assert_series_equal(result, datetime_series, check_names=False) assert result.name is None @@ -806,26 +764,23 @@ def test_convert_dates(self, datetime_series, datetime_frame): df = datetime_frame df["date"] = Timestamp("20130101").as_unit("ns") - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - json = df.to_json() - result = read_json(json) - tm.assert_frame_equal(result, df) + json = StringIO(df.to_json()) + result = read_json(json) + tm.assert_frame_equal(result, df) - df["foo"] = 1.0 - json = df.to_json(date_unit="ns") + df["foo"] = 1.0 + json = StringIO(df.to_json(date_unit="ns")) - result = read_json(json, convert_dates=False) - expected = df.copy() - expected["date"] = expected["date"].values.view("i8") - expected["foo"] = expected["foo"].astype("int64") - tm.assert_frame_equal(result, expected) + result = read_json(json, convert_dates=False) + expected = df.copy() + expected["date"] = expected["date"].values.view("i8") + expected["foo"] = expected["foo"].astype("int64") + tm.assert_frame_equal(result, expected) - # series - ts = Series( - Timestamp("20130101").as_unit("ns"), index=datetime_series.index - ) - json = ts.to_json() - result = read_json(json, typ="series") + # series + ts = Series(Timestamp("20130101").as_unit("ns"), index=datetime_series.index) + json = StringIO(ts.to_json()) + result = read_json(json, typ="series") tm.assert_series_equal(result, ts) @pytest.mark.parametrize("date_format", ["epoch", "iso"]) @@ -871,8 +826,8 @@ def test_convert_dates_infer(self, infer_word): expected = DataFrame( [[1, Timestamp("2002-11-08")], [2, pd.NaT]], columns=["id", infer_word] ) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(dumps(data))[["id", infer_word]] + + result = read_json(StringIO(dumps(data)))[["id", infer_word]] tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -895,8 +850,8 @@ def test_date_format_frame(self, date, date_unit, datetime_frame): json = df.to_json(date_format="iso", date_unit=date_unit) else: json = df.to_json(date_format="iso") - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(json) + + result = read_json(StringIO(json)) expected = df.copy() tm.assert_frame_equal(result, expected) @@ -924,8 +879,8 @@ def test_date_format_series(self, date, date_unit, datetime_series): json = ts.to_json(date_format="iso", date_unit=date_unit) else: json = ts.to_json(date_format="iso") - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(json, typ="series") + + result = read_json(StringIO(json), typ="series") expected = ts.copy() tm.assert_series_equal(result, expected) @@ -947,13 +902,12 @@ def test_date_unit(self, unit, datetime_frame): json = df.to_json(date_format="epoch", date_unit=unit) # force date unit - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(json, date_unit=unit) - tm.assert_frame_equal(result, df) + result = read_json(StringIO(json), date_unit=unit) + tm.assert_frame_equal(result, df) - # detect date unit - result = read_json(json, date_unit=None) - tm.assert_frame_equal(result, df) + # detect date unit + result = read_json(StringIO(json), date_unit=None) + tm.assert_frame_equal(result, df) def test_weird_nested_json(self): # this used to core dump the parser @@ -974,8 +928,7 @@ def test_weird_nested_json(self): ] } }""" - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - read_json(s) + read_json(StringIO(s)) def test_doc_example(self): dfj2 = DataFrame(np.random.randn(5, 2), columns=list("AB")) @@ -984,9 +937,8 @@ def test_doc_example(self): dfj2["bools"] = True dfj2.index = pd.date_range("20130101", periods=5) - json = dfj2.to_json() - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(json, dtype={"ints": np.int64, "bools": np.bool_}) + json = StringIO(dfj2.to_json()) + result = read_json(json, dtype={"ints": np.int64, "bools": np.bool_}) tm.assert_frame_equal(result, result) def test_round_trip_exception(self, datapath): @@ -994,8 +946,8 @@ def test_round_trip_exception(self, datapath): path = datapath("io", "json", "data", "teams.csv") df = pd.read_csv(path) s = df.to_json() - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(s) + + result = read_json(StringIO(s)) res = result.reindex(index=df.index, columns=df.columns) res = res.fillna(np.nan, downcast=False) tm.assert_frame_equal(res, df) @@ -1024,18 +976,19 @@ def test_timedelta(self): ser = Series([timedelta(23), timedelta(seconds=5)]) assert ser.dtype == "timedelta64[ns]" - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(ser.to_json(), typ="series").apply(converter) - tm.assert_series_equal(result, ser) + result = read_json(StringIO(ser.to_json()), typ="series").apply(converter) + tm.assert_series_equal(result, ser) - ser = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1])) - assert ser.dtype == "timedelta64[ns]" - result = read_json(ser.to_json(), typ="series").apply(converter) - tm.assert_series_equal(result, ser) + ser = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1])) + assert ser.dtype == "timedelta64[ns]" + result = read_json(StringIO(ser.to_json()), typ="series").apply(converter) + tm.assert_series_equal(result, ser) - frame = DataFrame([timedelta(23), timedelta(seconds=5)]) - assert frame[0].dtype == "timedelta64[ns]" - tm.assert_frame_equal(frame, read_json(frame.to_json()).apply(converter)) + frame = DataFrame([timedelta(23), timedelta(seconds=5)]) + assert frame[0].dtype == "timedelta64[ns]" + tm.assert_frame_equal( + frame, read_json(StringIO(frame.to_json())).apply(converter) + ) def test_timedelta2(self): frame = DataFrame( @@ -1045,8 +998,8 @@ def test_timedelta2(self): "c": pd.date_range(start="20130101", periods=2), } ) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(frame.to_json(date_unit="ns")) + data = StringIO(frame.to_json(date_unit="ns")) + result = read_json(data) result["a"] = pd.to_timedelta(result.a, unit="ns") result["c"] = pd.to_datetime(result.c) tm.assert_frame_equal(frame, result) @@ -1059,8 +1012,8 @@ def test_mixed_timedelta_datetime(self): expected = DataFrame( {"a": [pd.Timedelta(td).as_unit("ns")._value, ts.as_unit("ns")._value]} ) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(frame.to_json(date_unit="ns"), dtype={"a": "int64"}) + data = StringIO(frame.to_json(date_unit="ns")) + result = read_json(data, dtype={"a": "int64"}) tm.assert_frame_equal(result, expected, check_index_type=False) @pytest.mark.parametrize("as_object", [True, False]) @@ -1090,8 +1043,7 @@ def test_default_handler(self): value = object() frame = DataFrame({"a": [7, value]}) expected = DataFrame({"a": [7, str(value)]}) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(frame.to_json(default_handler=str)) + result = read_json(StringIO(frame.to_json(default_handler=str))) tm.assert_frame_equal(expected, result, check_index_type=False) def test_default_handler_indirect(self): @@ -1263,8 +1215,8 @@ def test_tz_range_is_naive(self): def test_read_inline_jsonl(self): # GH9180 - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) + + result = read_json(StringIO('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n'), lines=True) expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) tm.assert_frame_equal(result, expected) @@ -1292,23 +1244,18 @@ def test_read_jsonl_unicode_chars(self): # GH15132: non-ascii unicode characters # \u201d == RIGHT DOUBLE QUOTATION MARK - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - # simulate file handle - json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' - json = StringIO(json) - result = read_json(json, lines=True) - expected = DataFrame( - [["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"] - ) - tm.assert_frame_equal(result, expected) + # simulate file handle + json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' + json = StringIO(json) + result = read_json(json, lines=True) + expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) - # simulate string - json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' - result = read_json(json, lines=True) - expected = DataFrame( - [["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"] - ) - tm.assert_frame_equal(result, expected) + # simulate string + json = StringIO('{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n') + result = read_json(json, lines=True) + expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("bigNum", [sys.maxsize + 1, -(sys.maxsize + 2)]) def test_to_json_large_numbers(self, bigNum): @@ -1360,16 +1307,15 @@ def test_to_jsonl(self): result = df.to_json(orient="records", lines=True) expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n' assert result == expected - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - tm.assert_frame_equal(read_json(result, lines=True), df) + tm.assert_frame_equal(read_json(StringIO(result), lines=True), df) # GH15096: escaped characters in columns and data df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"]) result = df.to_json(orient="records", lines=True) expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n' assert result == expected - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - tm.assert_frame_equal(read_json(result, lines=True), df) + + tm.assert_frame_equal(read_json(StringIO(result), lines=True), df) # TODO: there is a near-identical test for pytables; can we share? @pytest.mark.xfail(reason="GH#13774 encoding kwarg not supported", raises=TypeError) @@ -1400,8 +1346,7 @@ def test_latin_encoding(self): def roundtrip(s, encoding="latin-1"): with tm.ensure_clean("test.json") as path: s.to_json(path, encoding=encoding) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - retr = read_json(path, encoding=encoding) + retr = read_json(StringIO(path), encoding=encoding) tm.assert_series_equal(s, retr, check_categorical=False) for s in examples: @@ -1425,16 +1370,15 @@ def test_from_json_to_json_table_index_and_columns(self, index, columns): # GH25433 GH25435 expected = DataFrame([[1, 2], [3, 4]], index=index, columns=columns) dfjson = expected.to_json(orient="table") - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(dfjson, orient="table") + + result = read_json(StringIO(dfjson), orient="table") tm.assert_frame_equal(result, expected) def test_from_json_to_json_table_dtypes(self): # GH21345 expected = DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]}) dfjson = expected.to_json(orient="table") - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(dfjson, orient="table") + result = read_json(StringIO(dfjson), orient="table") tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("orient", ["split", "records", "index", "columns"]) @@ -1453,19 +1397,19 @@ def test_to_json_from_json_columns_dtypes(self, orient): } ) dfjson = expected.to_json(orient=orient) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json( - dfjson, - orient=orient, - dtype={ - "Integer": "int64", - "Float": "float64", - "Object": "object", - "Bool": "bool", - "Category": "category", - "Datetime": "datetime64[ns]", - }, - ) + + result = read_json( + StringIO(dfjson), + orient=orient, + dtype={ + "Integer": "int64", + "Float": "float64", + "Object": "object", + "Bool": "bool", + "Category": "category", + "Datetime": "datetime64[ns]", + }, + ) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("dtype", [True, {"b": int, "c": int}]) @@ -1482,9 +1426,7 @@ def test_read_json_table_empty_axes_dtype(self, orient): # GH28558 expected = DataFrame() - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json("{}", orient=orient, convert_axes=True) - + result = read_json(StringIO("{}"), orient=orient, convert_axes=True) tm.assert_index_equal(result.index, expected.index) tm.assert_index_equal(result.columns, expected.columns) @@ -1597,16 +1539,14 @@ def test_index_false_from_json_to_json(self, orient, index): # Test index=False in from_json to_json expected = DataFrame({"a": [1, 2], "b": [3, 4]}) dfjson = expected.to_json(orient=orient, index=index) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(dfjson, orient=orient) + result = read_json(StringIO(dfjson), orient=orient) tm.assert_frame_equal(result, expected) def test_read_timezone_information(self): # GH 25546 - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json( - '{"2019-01-01T11:00:00.000Z":88}', typ="series", orient="index" - ) + result = read_json( + StringIO('{"2019-01-01T11:00:00.000Z":88}'), typ="series", orient="index" + ) expected = Series([88], index=DatetimeIndex(["2019-01-01 11:00:00"], tz="UTC")) tm.assert_series_equal(result, expected) @@ -1621,8 +1561,7 @@ def test_read_timezone_information(self): ) def test_read_json_with_url_value(self, url): # GH 36271 - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(f'{{"url":{{"0":"{url}"}}}}') + result = read_json(StringIO(f'{{"url":{{"0":"{url}"}}}}')) expected = DataFrame({"url": [url]}) tm.assert_frame_equal(result, expected) @@ -1827,9 +1766,10 @@ def test_json_negative_indent_raises(self): def test_emca_262_nan_inf_support(self): # GH 12213 - data = '["a", NaN, "NaN", Infinity, "Infinity", -Infinity, "-Infinity"]' - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(data) + data = StringIO( + '["a", NaN, "NaN", Infinity, "Infinity", -Infinity, "-Infinity"]' + ) + result = read_json(data) expected = DataFrame( ["a", None, "NaN", np.inf, "Infinity", -np.inf, "-Infinity"] ) @@ -1839,8 +1779,7 @@ def test_frame_int_overflow(self): # GH 30320 encoded_json = json.dumps([{"col": "31900441201190696999"}, {"col": "Text"}]) expected = DataFrame({"col": ["31900441201190696999", "Text"]}) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(encoded_json) + result = read_json(StringIO(encoded_json)) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -1885,8 +1824,7 @@ def test_json_pandas_nulls(self, nulls_fixture, request): def test_readjson_bool_series(self): # GH31464 - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json("[true, true, false]", typ="series") + result = read_json(StringIO("[true, true, false]"), typ="series") expected = Series([True, True, False]) tm.assert_series_equal(result, expected) @@ -2002,8 +1940,9 @@ def test_read_json_dtype_backend(self, string_storage, dtype_backend, orient): out = df.to_json(orient=orient) with pd.option_context("mode.string_storage", string_storage): - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(out, dtype_backend=dtype_backend, orient=orient) + result = read_json( + StringIO(out), dtype_backend=dtype_backend, orient=orient + ) expected = DataFrame( { @@ -2041,10 +1980,9 @@ def test_read_json_nullable_series(self, string_storage, dtype_backend, orient): out = ser.to_json(orient=orient) with pd.option_context("mode.string_storage", string_storage): - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json( - out, dtype_backend=dtype_backend, orient=orient, typ="series" - ) + result = read_json( + StringIO(out), dtype_backend=dtype_backend, orient=orient, typ="series" + ) expected = Series([1, np.nan, 3], dtype="Int64") diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index c78d3bcfb013d..951843d50f1bb 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -15,19 +15,16 @@ from pandas.io.json._json import JsonReader -def generateDepMsg(): - return ( +def test_json_deprecation(): + # PR 53409 + expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + msg = ( "Passing literal json to 'read_json' is deprecated and " "will be removed in a future version. To read from a " "literal string, wrap it in a 'StringIO' object." ) - -def test_json_deprecation(): - # PR 53409 - expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) - - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): + with tm.assert_produces_warning(FutureWarning, match=msg): result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) tm.assert_frame_equal(result, expected) @@ -40,8 +37,7 @@ def lines_json_df(): def test_read_jsonl(): # GH9180 - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) + result = read_json(StringIO('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n'), lines=True) expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) tm.assert_frame_equal(result, expected) @@ -72,8 +68,7 @@ def test_read_datetime(request, engine): if engine == "pyarrow": result = read_json(json_line, engine=engine) else: - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(json_line, engine=engine) + result = read_json(StringIO(json_line), engine=engine) expected = DataFrame( [[1, "2020-03-05", "hector"], [2, "2020-04-08T09:58:49+00:00", "hector"]], columns=["accounts", "date", "name"], @@ -94,8 +89,7 @@ def test_read_jsonl_unicode_chars(): # simulate string json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(json, lines=True) + result = read_json(StringIO(json), lines=True) expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"]) tm.assert_frame_equal(result, expected) @@ -111,16 +105,14 @@ def test_to_jsonl(): result = df.to_json(orient="records", lines=True) expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n' assert result == expected - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - tm.assert_frame_equal(read_json(result, lines=True), df) + tm.assert_frame_equal(read_json(StringIO(result), lines=True), df) # GH15096: escaped characters in columns and data df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"]) result = df.to_json(orient="records", lines=True) expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n' assert result == expected - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - tm.assert_frame_equal(read_json(result, lines=True), df) + tm.assert_frame_equal(read_json(StringIO(result), lines=True), df) def test_to_jsonl_count_new_lines(): @@ -282,8 +274,7 @@ def test_readjson_chunks_multiple_empty_lines(chunksize): {"A":3,"B":6} """ orig = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - test = read_json(j, lines=True, chunksize=chunksize) + test = read_json(StringIO(j), lines=True, chunksize=chunksize) if chunksize is not None: with test: test = pd.concat(test) @@ -317,8 +308,7 @@ def test_readjson_nrows(nrows, engine): {"a": 3, "b": 4} {"a": 5, "b": 6} {"a": 7, "b": 8}""" - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - result = read_json(jsonl, lines=True, nrows=nrows) + result = read_json(StringIO(jsonl), lines=True, nrows=nrows) expected = DataFrame({"a": [1, 3, 5, 7], "b": [2, 4, 6, 8]}).iloc[:nrows] tm.assert_frame_equal(result, expected) @@ -341,11 +331,10 @@ def test_readjson_nrows_chunks(request, nrows, chunksize, engine): {"a": 7, "b": 8}""" if engine != "pyarrow": - with tm.assert_produces_warning(FutureWarning, match=generateDepMsg()): - with read_json( - jsonl, lines=True, nrows=nrows, chunksize=chunksize, engine=engine - ) as reader: - chunked = pd.concat(reader) + with read_json( + StringIO(jsonl), lines=True, nrows=nrows, chunksize=chunksize, engine=engine + ) as reader: + chunked = pd.concat(reader) else: with read_json( jsonl, lines=True, nrows=nrows, chunksize=chunksize, engine=engine From ee427c93b55f6f35ab146cead35fb22fb5177845 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Fri, 2 Jun 2023 13:40:34 -0400 Subject: [PATCH 14/28] Fixing indendation errors in unit tests. Moved one unit test to another file. --- pandas/tests/io/json/test_compression.py | 2 +- pandas/tests/io/json/test_deprecated_kwargs.py | 13 +++++++------ pandas/tests/io/json/test_pandas.py | 14 ++++++++++++++ pandas/tests/io/json/test_readlines.py | 14 -------------- 4 files changed, 22 insertions(+), 21 deletions(-) diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py index 18a815a58ee3c..fca57c3786e5d 100644 --- a/pandas/tests/io/json/test_compression.py +++ b/pandas/tests/io/json/test_compression.py @@ -27,7 +27,7 @@ def test_compression_roundtrip(compression): with tm.decompress_file(path, compression) as fh: result = fh.read().decode("utf8") data = StringIO(result) - tm.assert_frame_equal(df, pd.read_json(data)) + tm.assert_frame_equal(df, pd.read_json(data)) def test_read_zipped_json(datapath): diff --git a/pandas/tests/io/json/test_deprecated_kwargs.py b/pandas/tests/io/json/test_deprecated_kwargs.py index 89bcd27446a09..cc88fc3ba1826 100644 --- a/pandas/tests/io/json/test_deprecated_kwargs.py +++ b/pandas/tests/io/json/test_deprecated_kwargs.py @@ -12,9 +12,10 @@ def test_good_kwargs(): df = pd.DataFrame({"A": [2, 4, 6], "B": [3, 6, 9]}, index=[0, 1, 2]) - data1 = StringIO(df.to_json(orient="split")) - tm.assert_frame_equal(df, read_json(data1, orient="split")) - data2 = StringIO(df.to_json(orient="columns")) - tm.assert_frame_equal(df, read_json(data2, orient="columns")) - data3 = StringIO(df.to_json(orient="index")) - tm.assert_frame_equal(df, read_json(data3, orient="index")) + with tm.assert_produces_warning(None): + data1 = StringIO(df.to_json(orient="split")) + tm.assert_frame_equal(df, read_json(data1, orient="split")) + data2 = StringIO(df.to_json(orient="columns")) + tm.assert_frame_equal(df, read_json(data2, orient="columns")) + data3 = StringIO(df.to_json(orient="index")) + tm.assert_frame_equal(df, read_json(data3, orient="index")) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 773caaf934137..7febc6634ec39 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -29,6 +29,20 @@ ) +def test_json_deprecation(): + # PR 53409 + expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + msg = ( + "Passing literal json to 'read_json' is deprecated and " + "will be removed in a future version. To read from a " + "literal string, wrap it in a 'StringIO' object." + ) + + with tm.assert_produces_warning(FutureWarning, match=msg): + result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) + tm.assert_frame_equal(result, expected) + + def assert_json_roundtrip_equal(result, expected, orient): if orient in ("records", "values"): expected = expected.reset_index(drop=True) diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index 951843d50f1bb..ab9551d298c59 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -15,20 +15,6 @@ from pandas.io.json._json import JsonReader -def test_json_deprecation(): - # PR 53409 - expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) - msg = ( - "Passing literal json to 'read_json' is deprecated and " - "will be removed in a future version. To read from a " - "literal string, wrap it in a 'StringIO' object." - ) - - with tm.assert_produces_warning(FutureWarning, match=msg): - result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) - tm.assert_frame_equal(result, expected) - - @pytest.fixture def lines_json_df(): df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) From 7fb9d7c3bd3e3d93be4ab1ecd7377505f8272024 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Fri, 2 Jun 2023 15:48:53 -0400 Subject: [PATCH 15/28] Updating unit test name --- pandas/tests/io/json/test_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 7febc6634ec39..de97b80ed15db 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -29,7 +29,7 @@ ) -def test_json_deprecation(): +def test_literal_json_deprecation(): # PR 53409 expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) msg = ( From 89a8c627616878694e88bb2374d590f71d6a3c6d Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Mon, 5 Jun 2023 19:09:18 -0400 Subject: [PATCH 16/28] Adding additional checks to unit tests --- pandas/io/json/_json.py | 8 ++++++++ pandas/tests/io/json/test_pandas.py | 31 +++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 3eb9b12fad181..fc418b58ec295 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -866,6 +866,14 @@ def __init__( self.nrows = validate_integer("nrows", self.nrows, 0) if not self.lines: raise ValueError("nrows can only be passed if lines=True") + if not self.lines and "\n" in filepath_or_buffer: + warnings.warn( + "Passing literal json to 'read_json' is deprecated and " + "will be removed in a future version. To read from a " + "literal string, wrap it in a 'StringIO' object.", + FutureWarning, + stacklevel=find_stack_level(), + ) if self.engine == "pyarrow": if not self.lines: raise ValueError( diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index de97b80ed15db..48b6870743d8b 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -32,16 +32,47 @@ def test_literal_json_deprecation(): # PR 53409 expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + + jsonl = """{"a": 1, "b": 2} + {"a": 3, "b": 4} + {"a": 5, "b": 6} + {"a": 7, "b": 8}""" + msg = ( "Passing literal json to 'read_json' is deprecated and " "will be removed in a future version. To read from a " "literal string, wrap it in a 'StringIO' object." ) + with tm.assert_produces_warning(FutureWarning, match=msg): + try: + read_json(jsonl, lines=False) + except ValueError: + pass + + with tm.assert_produces_warning(FutureWarning, match=msg): + read_json(expected.to_json(), lines=False) + with tm.assert_produces_warning(FutureWarning, match=msg): result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) tm.assert_frame_equal(result, expected) + with tm.assert_produces_warning(FutureWarning, match=msg): + try: + result = read_json( + '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n', + lines=False, + ) + except ValueError: + pass + + with tm.assert_produces_warning(FutureWarning, match=msg): + try: + result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=False) + except ValueError: + pass + tm.assert_frame_equal(result, expected) + def assert_json_roundtrip_equal(result, expected, orient): if orient in ("records", "values"): From 83d94e65a7f76562d078f005a21984f513b67734 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Mon, 5 Jun 2023 20:53:58 -0400 Subject: [PATCH 17/28] Fixing unit tests --- pandas/io/json/_json.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index fc418b58ec295..4b65ddc84ea66 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -866,7 +866,11 @@ def __init__( self.nrows = validate_integer("nrows", self.nrows, 0) if not self.lines: raise ValueError("nrows can only be passed if lines=True") - if not self.lines and "\n" in filepath_or_buffer: + if ( + isinstance(filepath_or_buffer, str) + and not self.lines + and "\n" in filepath_or_buffer + ): warnings.warn( "Passing literal json to 'read_json' is deprecated and " "will be removed in a future version. To read from a " From 81b7ab28d263f78032b26cdc6ef07ffabd860614 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Tue, 6 Jun 2023 23:11:41 -0400 Subject: [PATCH 18/28] Fixing unit tests --- pandas/tests/io/json/test_readlines.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index ab9551d298c59..54f4980b1e4e3 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -52,7 +52,7 @@ def test_read_datetime(request, engine): json_line = df.to_json(lines=True, orient="records") if engine == "pyarrow": - result = read_json(json_line, engine=engine) + result = read_json(StringIO(json_line), engine=engine) else: result = read_json(StringIO(json_line), engine=engine) expected = DataFrame( From 7d2a80aa3ce80c6773eabc930bd4fded1a7f1a50 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Mon, 12 Jun 2023 16:10:23 -0400 Subject: [PATCH 19/28] Updating whatsnew documentation per reviewer recommendations. --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 96324356875b3..bde93335f8827 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -277,7 +277,7 @@ Deprecations - Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`) - Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`) - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) -- Deprecated literal json input to :func:`read_json` (:issue:`53409`) +- Deprecated literal json input to :func:`read_json`. Wrap literal json string input in ``io.StringIO`` instead. (:issue:`53409`) - Deprecated option "mode.use_inf_as_na", convert inf entries to ``NaN`` before instead (:issue:`51684`) - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) From bf2e6869f0c34989f8862d0bd6d6eb90b38d7d4f Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Tue, 13 Jun 2023 18:08:38 -0400 Subject: [PATCH 20/28] Fixing failing code tests --- pandas/io/json/_json.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 4b65ddc84ea66..67b64563dfb03 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -715,7 +715,7 @@ def read_json( "data":[["a","b"],["c","d"]]\ }}\ ' - >>> pd.read_json(_, orient='split') + >>> pd.read_json(StringIO(_), orient='split') col 1 col 2 row 1 a b row 2 c d @@ -725,7 +725,7 @@ def read_json( >>> df.to_json(orient='index') '{{"row 1":{{"col 1":"a","col 2":"b"}},"row 2":{{"col 1":"c","col 2":"d"}}}}' - >>> pd.read_json(_, orient='index') + >>> pd.read_json(StringIO(_), orient='index') col 1 col 2 row 1 a b row 2 c d @@ -735,7 +735,7 @@ def read_json( >>> df.to_json(orient='records') '[{{"col 1":"a","col 2":"b"}},{{"col 1":"c","col 2":"d"}}]' - >>> pd.read_json(_, orient='records') + >>> pd.read_json(StringIO(_), orient='records') col 1 col 2 0 a b 1 c d From c55ea185a596d4c391612ebd033bfa18371b5722 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Tue, 13 Jun 2023 19:07:12 -0400 Subject: [PATCH 21/28] Fixing failing code tests --- pandas/io/json/_json.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 67b64563dfb03..35cd0689a0cef 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -715,7 +715,7 @@ def read_json( "data":[["a","b"],["c","d"]]\ }}\ ' - >>> pd.read_json(StringIO(_), orient='split') + >>> pd.read_json(io.StringIO(_), orient='split') col 1 col 2 row 1 a b row 2 c d @@ -724,8 +724,8 @@ def read_json( >>> df.to_json(orient='index') '{{"row 1":{{"col 1":"a","col 2":"b"}},"row 2":{{"col 1":"c","col 2":"d"}}}}' - - >>> pd.read_json(StringIO(_), orient='index') +git config pull.rebase false + >>> pd.read_json(io.StringIO(_), orient='index') col 1 col 2 row 1 a b row 2 c d @@ -735,7 +735,7 @@ def read_json( >>> df.to_json(orient='records') '[{{"col 1":"a","col 2":"b"}},{{"col 1":"c","col 2":"d"}}]' - >>> pd.read_json(StringIO(_), orient='records') + >>> pd.read_json(io.StringIO(_), orient='records') col 1 col 2 0 a b 1 c d From 85ce639c35e2b1303881ecdd192edab7dec26fbd Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Tue, 13 Jun 2023 19:12:22 -0400 Subject: [PATCH 22/28] Adding import to doc string example --- pandas/io/json/_json.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 35cd0689a0cef..b1a94501944f8 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -701,6 +701,7 @@ def read_json( Examples -------- + >>> from io import StringIO >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']], ... index=['row 1', 'row 2'], ... columns=['col 1', 'col 2']) From 0773ef02579ea891ed0f53b4060bc0136de11026 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Tue, 13 Jun 2023 19:43:46 -0400 Subject: [PATCH 23/28] Fixing documentation formatting error --- pandas/io/json/_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index b1a94501944f8..d528d6edc4183 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -725,7 +725,7 @@ def read_json( >>> df.to_json(orient='index') '{{"row 1":{{"col 1":"a","col 2":"b"}},"row 2":{{"col 1":"c","col 2":"d"}}}}' -git config pull.rebase false + >>> pd.read_json(io.StringIO(_), orient='index') col 1 col 2 row 1 a b From 89180d38fcff47e586f991d4d978091306075acb Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Tue, 13 Jun 2023 19:54:03 -0400 Subject: [PATCH 24/28] Fixing documentation formatting error --- pandas/io/json/_json.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index d528d6edc4183..eaeaedfdddfcb 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -716,7 +716,7 @@ def read_json( "data":[["a","b"],["c","d"]]\ }}\ ' - >>> pd.read_json(io.StringIO(_), orient='split') + >>> pd.read_json(StringIO(_), orient='split') col 1 col 2 row 1 a b row 2 c d @@ -726,7 +726,7 @@ def read_json( >>> df.to_json(orient='index') '{{"row 1":{{"col 1":"a","col 2":"b"}},"row 2":{{"col 1":"c","col 2":"d"}}}}' - >>> pd.read_json(io.StringIO(_), orient='index') + >>> pd.read_json(StringIO(_), orient='index') col 1 col 2 row 1 a b row 2 c d @@ -736,7 +736,7 @@ def read_json( >>> df.to_json(orient='records') '[{{"col 1":"a","col 2":"b"}},{{"col 1":"c","col 2":"d"}}]' - >>> pd.read_json(io.StringIO(_), orient='records') + >>> pd.read_json(StringIO(_), orient='records') col 1 col 2 0 a b 1 c d From 543b725254a33a76623d43de5b05eed1122f463a Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Wed, 14 Jun 2023 13:19:19 -0400 Subject: [PATCH 25/28] Fixing documentation error after fixing merge conflict --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 3720b71ddd5e2..eaca68822ee5d 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -286,8 +286,8 @@ Deprecations - Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`) - Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`) - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) -- Deprecated literal json input to :func:`read_json`. Wrap literal json string input in ``io.StringIO`` instead. (:issue:`53409`) - Deprecated falling back to filling when ``value`` is not specified in :meth:`DataFrame.replace` and :meth:`Series.replace` with non-dict-like ``to_replace`` (:issue:`33302`) +- Deprecated literal json input to :func:`read_json`. Wrap literal json string input in ``io.StringIO`` instead. (:issue:`53409`) - Deprecated option "mode.use_inf_as_na", convert inf entries to ``NaN`` before instead (:issue:`51684`) - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) - Deprecated the "method" and "limit" keywords on :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`SeriesGroupBy.fillna`, :meth:`DataFrameGroupBy.fillna`, and :meth:`Resampler.fillna`, use ``obj.bfill()`` or ``obj.ffill()`` instead (:issue:`53394`) From b11a80ce983557d810ca3f64bbe691564bb8e17b Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Wed, 14 Jun 2023 13:46:31 -0400 Subject: [PATCH 26/28] Fixing formatting errors in whatsnew file --- doc/source/whatsnew/v1.5.0.rst | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index badf3f0f68627..d4cd3cba88126 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -474,19 +474,21 @@ upon serialization. (Related issue :issue:`12997`) .. code-block:: ipython - In [4]: a.to_json(date_format='iso') - Out[4]: '{"2020-12-28T00:00:00.000Z":0,"2020-12-28T01:00:00.000Z":1,"2020-12-28T02:00:00.000Z":2}' + In[4]: from io import StringIO - In [5]: pd.read_json(a.to_json(date_format='iso'), typ="series").index == a.index - Out[5]: array([False, False, False]) + In [5]: a.to_json(date_format='iso') + Out[5]: '{"2020-12-28T00:00:00.000Z":0,"2020-12-28T01:00:00.000Z":1,"2020-12-28T02:00:00.000Z":2}' + + In [6]: pd.read_json(StringIO(a.to_json(date_format='iso')), typ="series").index == a.index + Out[6]: array([False, False, False]) *New Behavior* .. ipython:: python - + from io import StringIO a.to_json(date_format='iso') # Roundtripping now works - pd.read_json(a.to_json(date_format='iso'), typ="series").index == a.index + pd.read_json(StringIO(a.to_json(date_format='iso')), typ="series").index == a.index .. _whatsnew_150.notable_bug_fixes.groupby_value_counts_categorical: From 4c29f5fc8dfcfe02c2c1281453e3440df7053451 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Wed, 14 Jun 2023 14:50:46 -0400 Subject: [PATCH 27/28] Updating formatting errors in documentation --- doc/source/whatsnew/v1.5.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index d4cd3cba88126..9653226b96196 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -474,7 +474,7 @@ upon serialization. (Related issue :issue:`12997`) .. code-block:: ipython - In[4]: from io import StringIO + In [4]: from io import StringIO In [5]: a.to_json(date_format='iso') Out[5]: '{"2020-12-28T00:00:00.000Z":0,"2020-12-28T01:00:00.000Z":1,"2020-12-28T02:00:00.000Z":2}' @@ -485,6 +485,7 @@ upon serialization. (Related issue :issue:`12997`) *New Behavior* .. ipython:: python + from io import StringIO a.to_json(date_format='iso') # Roundtripping now works From eee3b3d6254e598d498b6992c0c0a5e62dd810f1 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Wed, 14 Jun 2023 18:34:35 -0400 Subject: [PATCH 28/28] Updating formatting errors in documentation --- doc/source/user_guide/io.rst | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 90a8bd868b60b..84a78ace8d7c7 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -2111,7 +2111,8 @@ Reading from a JSON string: .. ipython:: python - pd.read_json(json) + from io import StringIO + pd.read_json(StringIO(json)) Reading from a file: @@ -2135,6 +2136,7 @@ Preserve string indices: .. ipython:: python + from io import StringIO si = pd.DataFrame( np.zeros((4, 4)), columns=list(range(4)), index=[str(i) for i in range(4)] ) @@ -2143,7 +2145,7 @@ Preserve string indices: si.columns json = si.to_json() - sij = pd.read_json(json, convert_axes=False) + sij = pd.read_json(StringIO(json), convert_axes=False) sij sij.index sij.columns @@ -2152,18 +2154,19 @@ Dates written in nanoseconds need to be read back in nanoseconds: .. ipython:: python + from io import StringIO json = dfj2.to_json(date_unit="ns") # Try to parse timestamps as milliseconds -> Won't Work - dfju = pd.read_json(json, date_unit="ms") + dfju = pd.read_json(StringIO(json), date_unit="ms") dfju # Let pandas detect the correct precision - dfju = pd.read_json(json) + dfju = pd.read_json(StringIO(json)) dfju # Or specify that all timestamps are in nanoseconds - dfju = pd.read_json(json, date_unit="ns") + dfju = pd.read_json(StringIO(json), date_unit="ns") dfju By setting the ``dtype_backend`` argument you can control the default dtypes used for the resulting DataFrame. @@ -2251,11 +2254,12 @@ For line-delimited json files, pandas can also return an iterator which reads in .. ipython:: python + from io import StringIO jsonl = """ {"a": 1, "b": 2} {"a": 3, "b": 4} """ - df = pd.read_json(jsonl, lines=True) + df = pd.read_json(StringIO(jsonl), lines=True) df df.to_json(orient="records", lines=True)