From 616e9228b44c4212e315f43dfa924e76dee1be0d Mon Sep 17 00:00:00 2001 From: Bjorn Olsen Date: Tue, 24 Dec 2019 15:22:20 +0200 Subject: [PATCH] Replace separators found within keys --- README.md | 20 +++++----- flatten_json/__init__.py | 81 ++++++++++++++++++++++++++++------------ test_flatten.py | 58 ++++++++++++++++++++++++++-- 3 files changed, 122 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 0ab6c02..59cb712 100644 --- a/README.md +++ b/README.md @@ -27,13 +27,13 @@ flatten(dic) Results: ```python -{'a': '1', - 'b': '2', - 'c_0_d_0': '2', - 'c_0_d_1': '3', - 'c_0_d_2': '4', - 'c_0_e_0_f': '1', - 'c_0_e_0_g': '2'} +{'a': 1, + 'b': 2, + 'c_0_d_0': 2, + 'c_0_d_1': 3, + 'c_0_d_2': 4, + 'c_0_e_0_f': 1, + 'c_0_e_0_g': 2} ``` ### Usage with Pandas @@ -51,9 +51,9 @@ dic_flattened = (flatten(d) for d in dic) ``` which creates an array of flattened objects: ```python -[{'a': '1', 'b': '2', 'c_d': '3', 'c_e': '4'}, - {'a': '0.5', 'c_d': '3.2'}, - {'a': '0.8', 'b': '1.8'}] +[{'a': 1, 'b': 2, 'c_d': 3, 'c_e': 4}, + {'a': 0.5, 'c_d': 3.2}, + {'a': 0.8, 'b': 1.8}] ``` Finally you can use ```pd.DataFrame``` to capture the flattened array: ```python diff --git a/flatten_json/__init__.py b/flatten_json/__init__.py index b44c923..1cbdb55 100644 --- a/flatten_json/__init__.py +++ b/flatten_json/__init__.py @@ -1,8 +1,15 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import sys +import copy import json +import re +import sys +from math import isnan + +import six + +from flatten_json.util import check_if_numbers_are_consecutive try: # 3.8 and up @@ -10,30 +17,31 @@ except ImportError: from collections import Iterable -from flatten_json.util import check_if_numbers_are_consecutive -import six -import copy -import re -from math import isnan - -def _construct_key(previous_key, separator, new_key): +def _construct_key(previous_key, separator, new_key, replace_separators=None): """ Returns the new_key if no previous key exists, otherwise concatenates previous key, separator, and new_key :param previous_key: :param separator: :param new_key: + :param str replace_separators: Replace separators within keys :return: a string if previous_key exists and simply passes through the new_key otherwise """ + if replace_separators is not None: + new_key = str(new_key).replace(separator, replace_separators) if previous_key: return u"{}{}{}".format(previous_key, separator, new_key) else: return new_key -def flatten(nested_dict, separator="_", root_keys_to_ignore=set()): +def flatten( + nested_dict, + separator="_", + root_keys_to_ignore=set(), + replace_separators=None): """ Flattens a dictionary with nested structure to a dictionary with no hierarchy @@ -44,6 +52,7 @@ def flatten(nested_dict, separator="_", root_keys_to_ignore=set()): :param nested_dict: dictionary we want to flatten :param separator: string to separate dictionary keys by :param root_keys_to_ignore: set of root keys to ignore from flattening + :param str replace_separators: Replace separators within keys :return: flattened dictionary """ assert isinstance(nested_dict, dict), "flatten requires a dictionary input" @@ -69,12 +78,22 @@ def _flatten(object_, key): elif isinstance(object_, dict): for object_key in object_: if not (not key and object_key in root_keys_to_ignore): - _flatten(object_[object_key], _construct_key(key, - separator, - object_key)) + _flatten( + object_[object_key], + _construct_key( + key, + separator, + object_key, + replace_separators=replace_separators)) elif isinstance(object_, (list, set, tuple)): for index, item in enumerate(object_): - _flatten(item, _construct_key(key, separator, index)) + _flatten( + item, + _construct_key( + key, + separator, + index, + replace_separators=replace_separators)) # Anything left take as is else: flattened_dict[key] = object_ @@ -88,7 +107,8 @@ def _flatten(object_, key): def flatten_preserve_lists(nested_dict, separator="_", root_keys_to_ignore=set(), - max_list_index=3, max_depth=3): + max_list_index=3, max_depth=3, + replace_separators=None): """ Flattens a dictionary with nested structure to a dictionary with no hierarchy @@ -106,6 +126,7 @@ def flatten_preserve_lists(nested_dict, separator="_", :param root_keys_to_ignore: set of root keys to ignore from flattening :param max_list_index: maximum list index to process :param max_depth: maximum nesting depth to process + :param str replace_separators: Replace separators within keys :return: flattened dictionary """ @@ -145,13 +166,23 @@ def _flatten(object_, key): else: for object_key in object_: if not (not key and object_key in root_keys_to_ignore): - _flatten(object_[object_key], - _construct_key(key, separator, object_key) - ) + _flatten( + object_[object_key], + _construct_key( + key, + separator, + object_key, + replace_separators=replace_separators)) elif isinstance(object_, list) or isinstance(object_, set): for index, item in enumerate(object_): - _flatten(item, _construct_key(key, separator, index)) + _flatten( + item, + _construct_key( + key, + separator, + index, + replace_separators=replace_separators)) else: flattened_dict[key] = object_ @@ -208,11 +239,15 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth_inner): (str(type(x[1])), len(str(x[1]))), reverse=False): if not (not key and object_key in root_keys_to_ignore): - _flatten_low_entropy(object_[object_key], - _construct_key(key, - separator, - object_key), - cur_depth, max_depth_inner) + _flatten_low_entropy( + object_[object_key], + _construct_key( + key, + separator, + object_key, + replace_separators=replace_separators), + cur_depth, + max_depth_inner) # lists could go into rows, like in a relational database elif isinstance(object_, list) or isinstance(object_, set): diff --git a/test_flatten.py b/test_flatten.py index 9211f6e..12ceb98 100644 --- a/test_flatten.py +++ b/test_flatten.py @@ -1,8 +1,12 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import unittest import json +import unittest + +from flatten_json import (cli, flatten, flatten_preserve_lists, unflatten, + unflatten_list) +from flatten_json.util import check_if_numbers_are_consecutive try: # python2 @@ -11,9 +15,6 @@ # python3 from io import StringIO -from flatten_json import flatten, flatten_preserve_lists, unflatten, \ - unflatten_list, cli -from flatten_json.util import check_if_numbers_are_consecutive class UnitTests(unittest.TestCase): @@ -2179,6 +2180,55 @@ def test_command_line(self): result = json.loads(output) self.assertEqual(result, dict(a_b=1)) + def test_replace_separators_none(self): + dic = { + 'a_with_separator': {'b': [1, 2, 3]}, + } + expected = { + 'a_with_separator_b_0': 1, + 'a_with_separator_b_1': 2, + 'a_with_separator_b_2': 3 + } + actual = flatten(dic) + self.assertEqual(actual, expected) + + def test_replace_separators_remove(self): + dic = { + 'a_with_separator': {'b': [1, 2, 3]}, + } + expected = { + 'awithseparator_b_0': 1, + 'awithseparator_b_1': 2, + 'awithseparator_b_2': 3 + } + actual = flatten(dic, replace_separators='') + self.assertEqual(actual, expected) + + def test_replace_separators_something(self): + dic = { + 'a_with_separator': {'b': [1, 2, 3]}, + } + expected = { + 'a.with.separator_b_0': 1, + 'a.with.separator_b_1': 2, + 'a.with.separator_b_2': 3 + } + actual = flatten(dic, replace_separators='.') + self.assertEqual(actual, expected) + + def test_replace_separators_nested(self): + dic = { + 'a_with_separator': {'b_with_separator': [1, 2, 3]}, + } + expected = { + 'awithseparator_bwithseparator_0': 1, + 'awithseparator_bwithseparator_1': 2, + 'awithseparator_bwithseparator_2': 3 + } + actual = flatten(dic, replace_separators='') + self.assertEqual(actual, expected) + + if __name__ == '__main__': unittest.main()