Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -832,6 +832,7 @@ I/O
- Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`)
- Bug in :class:`PythonParser` where str and bytes were being mixed when dealing with the decimal field (:issue:`29650`)
- :meth:`read_gbq` now accepts ``progress_bar_type`` to display progress bar while the data downloads. (:issue:`29857`)
- Bug in :func:`pandas.io.json.json_normalize` where a missing value in the location specified by `record_path` would raise a ``TypeError`` (:issue:`30148`)

Plotting
^^^^^^^^
Expand Down
16 changes: 13 additions & 3 deletions pandas/io/json/_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@

from collections import defaultdict
import copy
from typing import DefaultDict, Dict, List, Optional, Union
from typing import Any, DefaultDict, Dict, Iterable, List, Optional, Union

import numpy as np

from pandas._libs.writers import convert_json_to_lines
from pandas.util._decorators import deprecate

import pandas as pd
from pandas import DataFrame


Expand Down Expand Up @@ -229,14 +230,23 @@ def _json_normalize(
Returns normalized data with columns prefixed with the given string.
"""

def _pull_field(js, spec):
result = js
def _pull_field(js: Dict[str, Any], spec: Union[List, str]) -> Iterable:
result = js # type: ignore
if isinstance(spec, list):
for field in spec:
result = result[field]
else:
result = result[spec]

if not isinstance(result, Iterable):
if pd.isnull(result):
result = [] # type: ignore
else:
raise TypeError(
f"{js} has non iterable value {result} for path {spec}. "
"Must be iterable or null."
)

return result

if isinstance(data, list) and not data:
Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/io/json/test_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,30 @@ def test_nested_flattening_consistent(self):
# They should be the same.
tm.assert_frame_equal(df1, df2)

def test_nonetype_record_path(self, nulls_fixture):
# see gh-30148
# should not raise TypeError
result = json_normalize(
[
{"state": "Texas", "info": nulls_fixture},
{"state": "Florida", "info": [{"i": 2}]},
],
record_path=["info"],
)
expected = DataFrame({"i": 2}, index=[0])
tm.assert_equal(result, expected)

def test_non_interable_record_path_errors(self):
# see gh-30148
test_input = {"state": "Texas", "info": 1}
test_path = "info"
msg = (
f"{test_input} has non iterable value 1 for path {test_path}. "
"Must be iterable or null."
)
with pytest.raises(TypeError, match=msg):
json_normalize([test_input], record_path=[test_path])


class TestNestedToRecord:
def test_flat_stays_flat(self):
Expand Down