Skip to content

Commit

Permalink
Merge pull request #61 from baolsen/fix_delimiters_in_keys
Browse files Browse the repository at this point in the history
Replace separators found within keys
  • Loading branch information
amirziai authored Feb 3, 2020
2 parents dcf0078 + 569cc84 commit 1236f75
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 37 deletions.
20 changes: 10 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ flatten(dic)

Results:
```python
{'a': '1',
'b': '2',
'c_0_d_0': '2',
'c_0_d_1': '3',
'c_0_d_2': '4',
'c_0_e_0_f': '1',
'c_0_e_0_g': '2'}
{'a': 1,
'b': 2,
'c_0_d_0': 2,
'c_0_d_1': 3,
'c_0_d_2': 4,
'c_0_e_0_f': 1,
'c_0_e_0_g': 2}
```

### Usage with Pandas
Expand All @@ -51,9 +51,9 @@ dic_flattened = (flatten(d) for d in dic)
```
which creates an array of flattened objects:
```python
[{'a': '1', 'b': '2', 'c_d': '3', 'c_e': '4'},
{'a': '0.5', 'c_d': '3.2'},
{'a': '0.8', 'b': '1.8'}]
[{'a': 1, 'b': 2, 'c_d': 3, 'c_e': 4},
{'a': 0.5, 'c_d': 3.2},
{'a': 0.8, 'b': 1.8}]
```
Finally you can use ```pd.DataFrame``` to capture the flattened array:
```python
Expand Down
81 changes: 58 additions & 23 deletions flatten_json/__init__.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,47 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sys
import copy
import json
import re
import sys
from math import isnan

import six

from flatten_json.util import check_if_numbers_are_consecutive

try:
# 3.8 and up
from collections.abc import Iterable
except ImportError:
from collections import Iterable

from flatten_json.util import check_if_numbers_are_consecutive
import six
import copy
import re
from math import isnan


def _construct_key(previous_key, separator, new_key):
def _construct_key(previous_key, separator, new_key, replace_separators=None):
"""
Returns the new_key if no previous key exists, otherwise concatenates
previous key, separator, and new_key
:param previous_key:
:param separator:
:param new_key:
:param str replace_separators: Replace separators within keys
:return: a string if previous_key exists and simply passes through the
new_key otherwise
"""
if replace_separators is not None:
new_key = str(new_key).replace(separator, replace_separators)
if previous_key:
return u"{}{}{}".format(previous_key, separator, new_key)
else:
return new_key


def flatten(nested_dict, separator="_", root_keys_to_ignore=set()):
def flatten(
nested_dict,
separator="_",
root_keys_to_ignore=set(),
replace_separators=None):
"""
Flattens a dictionary with nested structure to a dictionary with no
hierarchy
Expand All @@ -44,6 +52,7 @@ def flatten(nested_dict, separator="_", root_keys_to_ignore=set()):
:param nested_dict: dictionary we want to flatten
:param separator: string to separate dictionary keys by
:param root_keys_to_ignore: set of root keys to ignore from flattening
:param str replace_separators: Replace separators within keys
:return: flattened dictionary
"""
assert isinstance(nested_dict, dict), "flatten requires a dictionary input"
Expand All @@ -69,12 +78,22 @@ def _flatten(object_, key):
elif isinstance(object_, dict):
for object_key in object_:
if not (not key and object_key in root_keys_to_ignore):
_flatten(object_[object_key], _construct_key(key,
separator,
object_key))
_flatten(
object_[object_key],
_construct_key(
key,
separator,
object_key,
replace_separators=replace_separators))
elif isinstance(object_, (list, set, tuple)):
for index, item in enumerate(object_):
_flatten(item, _construct_key(key, separator, index))
_flatten(
item,
_construct_key(
key,
separator,
index,
replace_separators=replace_separators))
# Anything left take as is
else:
flattened_dict[key] = object_
Expand All @@ -88,7 +107,8 @@ def _flatten(object_, key):

def flatten_preserve_lists(nested_dict, separator="_",
root_keys_to_ignore=set(),
max_list_index=3, max_depth=3):
max_list_index=3, max_depth=3,
replace_separators=None):
"""
Flattens a dictionary with nested structure to a dictionary with no
hierarchy
Expand All @@ -106,6 +126,7 @@ def flatten_preserve_lists(nested_dict, separator="_",
:param root_keys_to_ignore: set of root keys to ignore from flattening
:param max_list_index: maximum list index to process
:param max_depth: maximum nesting depth to process
:param str replace_separators: Replace separators within keys
:return: flattened dictionary
"""

Expand Down Expand Up @@ -145,13 +166,23 @@ def _flatten(object_, key):
else:
for object_key in object_:
if not (not key and object_key in root_keys_to_ignore):
_flatten(object_[object_key],
_construct_key(key, separator, object_key)
)
_flatten(
object_[object_key],
_construct_key(
key,
separator,
object_key,
replace_separators=replace_separators))

elif isinstance(object_, list) or isinstance(object_, set):
for index, item in enumerate(object_):
_flatten(item, _construct_key(key, separator, index))
_flatten(
item,
_construct_key(
key,
separator,
index,
replace_separators=replace_separators))

else:
flattened_dict[key] = object_
Expand Down Expand Up @@ -208,11 +239,15 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth_inner):
(str(type(x[1])), len(str(x[1]))),
reverse=False):
if not (not key and object_key in root_keys_to_ignore):
_flatten_low_entropy(object_[object_key],
_construct_key(key,
separator,
object_key),
cur_depth, max_depth_inner)
_flatten_low_entropy(
object_[object_key],
_construct_key(
key,
separator,
object_key,
replace_separators=replace_separators),
cur_depth,
max_depth_inner)

# lists could go into rows, like in a relational database
elif isinstance(object_, list) or isinstance(object_, set):
Expand Down
58 changes: 54 additions & 4 deletions test_flatten.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import unittest
import json
import unittest

from flatten_json import (cli, flatten, flatten_preserve_lists, unflatten,
unflatten_list)
from flatten_json.util import check_if_numbers_are_consecutive

try:
# python2
Expand All @@ -11,9 +15,6 @@
# python3
from io import StringIO

from flatten_json import flatten, flatten_preserve_lists, unflatten, \
unflatten_list, cli
from flatten_json.util import check_if_numbers_are_consecutive


class UnitTests(unittest.TestCase):
Expand Down Expand Up @@ -2195,6 +2196,55 @@ def test_command_line(self):
result = json.loads(output)
self.assertEqual(result, dict(a_b=1))

def test_replace_separators_none(self):
dic = {
'a_with_separator': {'b': [1, 2, 3]},
}
expected = {
'a_with_separator_b_0': 1,
'a_with_separator_b_1': 2,
'a_with_separator_b_2': 3
}
actual = flatten(dic)
self.assertEqual(actual, expected)

def test_replace_separators_remove(self):
dic = {
'a_with_separator': {'b': [1, 2, 3]},
}
expected = {
'awithseparator_b_0': 1,
'awithseparator_b_1': 2,
'awithseparator_b_2': 3
}
actual = flatten(dic, replace_separators='')
self.assertEqual(actual, expected)

def test_replace_separators_something(self):
dic = {
'a_with_separator': {'b': [1, 2, 3]},
}
expected = {
'a.with.separator_b_0': 1,
'a.with.separator_b_1': 2,
'a.with.separator_b_2': 3
}
actual = flatten(dic, replace_separators='.')
self.assertEqual(actual, expected)

def test_replace_separators_nested(self):
dic = {
'a_with_separator': {'b_with_separator': [1, 2, 3]},
}
expected = {
'awithseparator_bwithseparator_0': 1,
'awithseparator_bwithseparator_1': 2,
'awithseparator_bwithseparator_2': 3
}
actual = flatten(dic, replace_separators='')
self.assertEqual(actual, expected)



if __name__ == '__main__':
unittest.main()

0 comments on commit 1236f75

Please sign in to comment.