From 1f8c611adcbcd0a633e26203edbafd45b23bc453 Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Sat, 2 Mar 2019 10:16:42 +0000 Subject: [PATCH 01/24] Fix unflatten for dataframe with duplicate columns Adding fix for unflattening from pandas dataframe records. See here: https://github.com/amirziai/flatten/issues/40 --- flatten_json.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/flatten_json.py b/flatten_json.py index 4961dd1..ec6b6fe 100644 --- a/flatten_json.py +++ b/flatten_json.py @@ -109,8 +109,17 @@ def _unflatten(dic, keys, value): dic[keys[-1]] = value - for item in flat_dict: - _unflatten(unflattened_dict, item.split(separator), flat_dict[item]) + list_keys = sorted(flat_dict.keys()) + for i, item in enumerate(list_keys): + if i != len(list_keys)-1: + if list_keys[i] not in list_keys[i + 1]: + _unflatten(unflattened_dict, item.split(separator), flat_dict[item]) + else: + pass # if key contained in next key, json will be invalid. + else: + # last element + _unflatten(unflattened_dict, item.split(separator), flat_dict[item]) + return unflattened_dict From 2dd4fdbc3be86b39eba4b80e14cf5ac8b7da9cf5 Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Sat, 2 Mar 2019 10:40:53 +0000 Subject: [PATCH 02/24] added test for issue 40 added test for https://github.com/amirziai/flatten/issues/40 --- test_flatten.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/test_flatten.py b/test_flatten.py index acbcbe6..b89073b 100644 --- a/test_flatten.py +++ b/test_flatten.py @@ -231,7 +231,22 @@ def test_unflatten_with_list_issue31(self): dic_flatten = flatten(dic) actual = unflatten_list(dic_flatten) self.assertEqual(actual, dic) - + + def test_unflatten_with_df_issue40(self): + """https://github.com/amirziai/flatten/issues/40""" + dic = { + 'a.b': 1, + 'a.b.c': 2, + 'a.b.d': 3, + 'a.e': 4 + } + expected = { + 'a': {'b': {'c': 2, 'd':, 3}}, + 'e': 4 + } + actual = unflatten(dic,'.') + self.assertEqual(actual, expected) + def test_unflatten_with_list_deep(self): dic = {'a': [ {'b': [{'c': [{'a': 5, 'b': {'a': [1, 2, 3]}, 'c': {'x': 3}}]}]}]} From e2524b2d1699c3b0816ec00fd55db536bff1f043 Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Sat, 2 Mar 2019 10:42:13 +0000 Subject: [PATCH 03/24] fixed comma --- test_flatten.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_flatten.py b/test_flatten.py index b89073b..211bea1 100644 --- a/test_flatten.py +++ b/test_flatten.py @@ -241,7 +241,7 @@ def test_unflatten_with_df_issue40(self): 'a.e': 4 } expected = { - 'a': {'b': {'c': 2, 'd':, 3}}, + 'a': {'b': {'c': 2, 'd': 3}}, 'e': 4 } actual = unflatten(dic,'.') From ec65edbd9c31db70016f6002aca23d255b11b239 Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Sat, 2 Mar 2019 10:58:28 +0000 Subject: [PATCH 04/24] fixed to use startswidth --- flatten_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flatten_json.py b/flatten_json.py index ec6b6fe..d88b8db 100644 --- a/flatten_json.py +++ b/flatten_json.py @@ -112,7 +112,7 @@ def _unflatten(dic, keys, value): list_keys = sorted(flat_dict.keys()) for i, item in enumerate(list_keys): if i != len(list_keys)-1: - if list_keys[i] not in list_keys[i + 1]: + if not list_keys[i+1].startswith(list_keys[i]): _unflatten(unflattened_dict, item.split(separator), flat_dict[item]) else: pass # if key contained in next key, json will be invalid. From 3ebe6f54ac4dee07871ce76bf51baf523f4d61a0 Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Sat, 2 Mar 2019 11:16:39 +0000 Subject: [PATCH 05/24] fixed test --- test_flatten.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test_flatten.py b/test_flatten.py index 211bea1..120376e 100644 --- a/test_flatten.py +++ b/test_flatten.py @@ -241,9 +241,10 @@ def test_unflatten_with_df_issue40(self): 'a.e': 4 } expected = { - 'a': {'b': {'c': 2, 'd': 3}}, - 'e': 4 - } + 'a': {'b': {'c': 2, 'd': 3}, + 'e': 4 + } + } actual = unflatten(dic,'.') self.assertEqual(actual, expected) From 815e6400fb103f7c0731a616d8340b00ad560a3c Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Sat, 2 Mar 2019 11:23:35 +0000 Subject: [PATCH 06/24] fixed whitespace --- test_flatten.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test_flatten.py b/test_flatten.py index 120376e..947c981 100644 --- a/test_flatten.py +++ b/test_flatten.py @@ -244,9 +244,9 @@ def test_unflatten_with_df_issue40(self): 'a': {'b': {'c': 2, 'd': 3}, 'e': 4 } - } + } actual = unflatten(dic,'.') - self.assertEqual(actual, expected) + self.assertEqual(actual, expected) def test_unflatten_with_list_deep(self): dic = {'a': [ From e0d98cfd197dd5c10f77936530c0b85d88e9b690 Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Sat, 2 Mar 2019 11:27:12 +0000 Subject: [PATCH 07/24] whitespace --- test_flatten.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_flatten.py b/test_flatten.py index 947c981..698a123 100644 --- a/test_flatten.py +++ b/test_flatten.py @@ -247,7 +247,7 @@ def test_unflatten_with_df_issue40(self): } actual = unflatten(dic,'.') self.assertEqual(actual, expected) - + def test_unflatten_with_list_deep(self): dic = {'a': [ {'b': [{'c': [{'a': 5, 'b': {'a': [1, 2, 3]}, 'c': {'x': 3}}]}]}]} From 60eb6ff70d26454ef1a4654309e0ed105838827f Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Sat, 2 Mar 2019 11:30:22 +0000 Subject: [PATCH 08/24] ws --- test_flatten.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_flatten.py b/test_flatten.py index 698a123..ad04751 100644 --- a/test_flatten.py +++ b/test_flatten.py @@ -231,7 +231,7 @@ def test_unflatten_with_list_issue31(self): dic_flatten = flatten(dic) actual = unflatten_list(dic_flatten) self.assertEqual(actual, dic) - + def test_unflatten_with_df_issue40(self): """https://github.com/amirziai/flatten/issues/40""" dic = { From bcd213cf7bfd4af9c9c0559eb552c5dc25574577 Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Sat, 2 Mar 2019 13:05:24 +0000 Subject: [PATCH 09/24] whitespace --- test_flatten.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_flatten.py b/test_flatten.py index ad04751..79b797e 100644 --- a/test_flatten.py +++ b/test_flatten.py @@ -245,7 +245,7 @@ def test_unflatten_with_df_issue40(self): 'e': 4 } } - actual = unflatten(dic,'.') + actual = unflatten(dic, '.') self.assertEqual(actual, expected) def test_unflatten_with_list_deep(self): From 9e4d29c0de2a360d476b0de63371004b4bc7de4e Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Sat, 2 Mar 2019 13:07:46 +0000 Subject: [PATCH 10/24] pep errors --- flatten_json.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/flatten_json.py b/flatten_json.py index d88b8db..fff1f95 100644 --- a/flatten_json.py +++ b/flatten_json.py @@ -113,14 +113,14 @@ def _unflatten(dic, keys, value): for i, item in enumerate(list_keys): if i != len(list_keys)-1: if not list_keys[i+1].startswith(list_keys[i]): - _unflatten(unflattened_dict, item.split(separator), flat_dict[item]) + _unflatten(unflattened_dict, item.split(separator), + flat_dict[item]) else: pass # if key contained in next key, json will be invalid. else: # last element - _unflatten(unflattened_dict, item.split(separator), flat_dict[item]) - - + _unflatten(unflattened_dict, item.split(separator), + flat_dict[item]) return unflattened_dict From c86759281360c8532120d6fc8ed68c768593b3b1 Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Sat, 2 Mar 2019 13:10:50 +0000 Subject: [PATCH 11/24] pep --- flatten_json.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flatten_json.py b/flatten_json.py index fff1f95..fb5e284 100644 --- a/flatten_json.py +++ b/flatten_json.py @@ -113,13 +113,13 @@ def _unflatten(dic, keys, value): for i, item in enumerate(list_keys): if i != len(list_keys)-1: if not list_keys[i+1].startswith(list_keys[i]): - _unflatten(unflattened_dict, item.split(separator), + _unflatten(unflattened_dict, item.split(separator), flat_dict[item]) else: pass # if key contained in next key, json will be invalid. else: # last element - _unflatten(unflattened_dict, item.split(separator), + _unflatten(unflattened_dict, item.split(separator), flat_dict[item]) return unflattened_dict From 6cf29f64e0a08009764a23013ea44d54dba0bbf4 Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Mon, 4 Mar 2019 14:06:23 +0000 Subject: [PATCH 12/24] added flatten_preserver_lists function --- flatten_json.py | 186 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) diff --git a/flatten_json.py b/flatten_json.py index fb5e284..8a77550 100644 --- a/flatten_json.py +++ b/flatten_json.py @@ -11,6 +11,8 @@ from util import check_if_numbers_are_consecutive import six +import copy, re +from math import isnan def _construct_key(previous_key, separator, new_key): @@ -82,6 +84,190 @@ def _flatten(object_, key): flatten_json = flatten +def flatten_preserve_lists(nested_dict, separator="_", root_keys_to_ignore=set(), max_list_index=3, max_depth=3): + """ + Flattens a dictionary with nested structure to a dictionary with no + hierarchy + Consider ignoring keys that you are not interested in to prevent + unnecessary processing + This is specially true for very deep objects + This preserves list structure, and you can specify max_list_index and max_depth to limit processing + + Child elements with only one value inside will be unwrapped and become parent's value. + + :param nested_dict: dictionary we want to flatten + :param separator: string to separate dictionary keys by + :param root_keys_to_ignore: set of root keys to ignore from flattening + :param max_list_index: maximum list index to process. Any index beyond this will be ignored. + :param max_depth: maximum nesting depth to process. Anything further down is written as a string into value. + :return: flattened dictionary + """ + + assert isinstance(nested_dict, dict), "flatten requires a dictionary input" + assert isinstance(separator, str), "separator must be a string" + + # This global dictionary stores the flattened keys and values and is + # ultimately returned + flattened_dict = dict() + + def _flatten(object_, key, cur_depth, max_depth): + """ + For dict, list and set objects_ calls itself on the elements and for + other types assigns the object_ to + the corresponding key in the global flattened_dict + :param object_: object to flatten + :param key: carries the concatenated key for the object_ + :return: None + """ + + # Empty object can't be iterated, take as is + if not object_: + flattened_dict[key] = object_ + + # These object types support iteration + # dict always go into columns + elif isinstance(object_, dict): + first_key = list(object_.keys())[0] + # if only 1 child value, and it's first_key's value not a dict or list, flatten immediately + if len(object_) == 1 and not (isinstance(object_[first_key], dict) or isinstance(object_[first_key], list)): + flattened_dict[key] = object_[first_key] + else: + for object_key in object_: + if not (not key and object_key in root_keys_to_ignore): + _flatten(object_[object_key], _construct_key(key, + separator, + object_key), cur_depth, max_depth) # build the next child key + + elif isinstance(object_, list) or isinstance(object_, set): + for index, item in enumerate(object_): + _flatten(item, _construct_key(key, separator, index), cur_depth, max_depth) + + else: + flattened_dict[key] = object_ + + def _flatten_low_entropy(object_, key, cur_depth, max_depth): + """ + For dict, list and set objects_ calls itself on the elements and for + other types assigns the object_ to + the corresponding key in the global flattened_dict + + :param object_: object to flatten + :param key: carries the concatenated key for the object_ + :return: None + """ + cur_depth = cur_depth + 1 # increase current_depth + debug = 0 + + # write latest child as value if max_depth exceeded + if cur_depth > max_depth: + global_max_record = int(max(list(list_prebuilt_flattened_dict.keys()))) + for d in list_prebuilt_flattened_dict[str(global_max_record)]: + d[key] = object_ + + else: + # Empty object can't be iterated, take as is + if not object_: + global_max_record = int(max(list(list_prebuilt_flattened_dict.keys()))) + for d in list_prebuilt_flattened_dict[str(global_max_record)]: + d[key] = object_ + + # These object types support iteration + # dict always go into columns + elif isinstance(object_, dict): + first_key = list(object_.keys())[0] + # if only 1 child value, and not a dict or list, flatten immediately + if len(object_) == 1 and not (isinstance(object_[first_key], dict) or isinstance(object_[first_key], list)): + global_max_record = int(max(list(list_prebuilt_flattened_dict.keys()))) + + # decide if to update only last (potentially list) of record or all + for d in list_prebuilt_flattened_dict[str(global_max_record)]: + d[key] = object_[first_key] + + else: + for object_key in object_: + if not (not key and object_key in root_keys_to_ignore): + _flatten_low_entropy(object_[object_key], + _construct_key(key, + separator, + object_key), cur_depth, max_depth) # build the next child key + + # lists could go into rows, like in a relational database + elif isinstance(object_, list) or isinstance(object_, set): + if debug: print("\nparent key of list:", key, "| length: ", str(len(object_))) + + # need to remember global list state when we entered this recursion + global_max_record_start = int(max(list(list_prebuilt_flattened_dict.keys()))) + entry = copy.deepcopy(list_prebuilt_flattened_dict[str(global_max_record_start)]) + + for index, item in enumerate(object_): + # create new copy of all current records in global list for each element in any list + if debug: print(" list key:", key, " index: " + str(index), "vals: ", item) + + sub = -1 + if isinstance(item, dict): + first_value = list(item.values())[0] + if isinstance(first_value, float): + sub = first_value + + if not isnan(sub) and index < max_list_index: + if index > 0: # start from second element, 1st element is like column + global_max_record = int(max(list(list_prebuilt_flattened_dict.keys()))) + if debug: print("copy entry: ", entry) + + list_prebuilt_flattened_dict[str(global_max_record+1)] = copy.deepcopy(entry) + + if debug: print(" copied max record in global dict") + if debug: print(" len global list last rec: ", len(list_prebuilt_flattened_dict[str(global_max_record)]), + "len list: ", len(list_prebuilt_flattened_dict)) + + # copy only last element(s), by index. Need to index the global list? + # in global list, each record can have several rows. Keep an index. + + # dig in + _flatten_low_entropy(item, key, cur_depth, max_depth) + else: + pass + + list_prebuilt_flattened_dict['0'] = [subel for k, v in list_prebuilt_flattened_dict.items() for idx, subel in enumerate(v)] + + for key in list(list_prebuilt_flattened_dict.keys()): + if key != '0': + del list_prebuilt_flattened_dict[key] + if debug: print("collapsed global list") + + # Anything left take as is, assuming you hit the end of the line. + else: + # in this case, there may be a list of prebuilt_flattened_dict by now, so need to update them all. + global_max_record = int(max(list(list_prebuilt_flattened_dict.keys()))) + + for d in list_prebuilt_flattened_dict[str(global_max_record)]: + d[key] = object_ + + # decrease depth counter + cur_depth -= 1 + + _flatten(nested_dict, None, cur_depth=0, max_depth=max_depth) + + # get unique column names, without the integers + # TODO: potential issue: what if column names have digits naturally? + reskeys = list(flattened_dict.keys()) + unique_integers = list(set([separator+char for key in reskeys for char in key if char.isdigit()])) + regex = '|'.join(unique_integers) + regex += "|" + regex.replace(".", "") + unique_columns = list(set([re.sub("("+regex+")", "", key) for key in reskeys])) + + # create global dict, now with unique column names + prebuilt_flattened_dict = {column: None for column in unique_columns} + # if debug: print("unique columns: ",unique_columns) + + # initialize global record list + list_prebuilt_flattened_dict = {'0': [prebuilt_flattened_dict]} + + _flatten_low_entropy(nested_dict, None, cur_depth=0, max_depth=max_depth) + + return list_prebuilt_flattened_dict['0'] + + def _unflatten_asserts(flat_dict, separator): assert isinstance(flat_dict, dict), "un_flatten requires dictionary input" assert isinstance(separator, six.string_types), "separator must be string" From b76306b9e2a7bbe11f485a67468e721cb1d00a12 Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Tue, 5 Mar 2019 01:04:40 +0000 Subject: [PATCH 13/24] Update test_flatten.py --- test_flatten.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_flatten.py b/test_flatten.py index 79b797e..df421c2 100644 --- a/test_flatten.py +++ b/test_flatten.py @@ -235,7 +235,7 @@ def test_unflatten_with_list_issue31(self): def test_unflatten_with_df_issue40(self): """https://github.com/amirziai/flatten/issues/40""" dic = { - 'a.b': 1, + 'a.b': float('nan'), 'a.b.c': 2, 'a.b.d': 3, 'a.e': 4 From 3c5c9a7bef761e7e8cea52ae8548e49a83337deb Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Tue, 5 Mar 2019 19:54:02 +0000 Subject: [PATCH 14/24] pep8 --- flatten_json.py | 116 ++++++++++++++++++++++++++++++------------------ 1 file changed, 72 insertions(+), 44 deletions(-) diff --git a/flatten_json.py b/flatten_json.py index 8a77550..f8d6ef4 100644 --- a/flatten_json.py +++ b/flatten_json.py @@ -84,27 +84,30 @@ def _flatten(object_, key): flatten_json = flatten -def flatten_preserve_lists(nested_dict, separator="_", root_keys_to_ignore=set(), max_list_index=3, max_depth=3): +def flatten_preserve_lists(nested_dict, separator="_" + , root_keys_to_ignore=set(), max_list_index=3, max_depth=3): """ Flattens a dictionary with nested structure to a dictionary with no hierarchy Consider ignoring keys that you are not interested in to prevent unnecessary processing This is specially true for very deep objects - This preserves list structure, and you can specify max_list_index and max_depth to limit processing + This preserves list structure, and + you can specify max_list_index and max_depth to limit processing - Child elements with only one value inside will be unwrapped and become parent's value. + Child elements with only one value inside + will be unwrapped and become parent's value. :param nested_dict: dictionary we want to flatten :param separator: string to separate dictionary keys by :param root_keys_to_ignore: set of root keys to ignore from flattening - :param max_list_index: maximum list index to process. Any index beyond this will be ignored. - :param max_depth: maximum nesting depth to process. Anything further down is written as a string into value. + :param max_list_index: maximum list index to process + :param max_depth: maximum nesting depth to process :return: flattened dictionary """ assert isinstance(nested_dict, dict), "flatten requires a dictionary input" - assert isinstance(separator, str), "separator must be a string" + assert isinstance(separator, six.string_types), "separator must be a string" # This global dictionary stores the flattened keys and values and is # ultimately returned @@ -128,19 +131,24 @@ def _flatten(object_, key, cur_depth, max_depth): # dict always go into columns elif isinstance(object_, dict): first_key = list(object_.keys())[0] - # if only 1 child value, and it's first_key's value not a dict or list, flatten immediately - if len(object_) == 1 and not (isinstance(object_[first_key], dict) or isinstance(object_[first_key], list)): + # if only 1 child value, and child value not a dict or list + # flatten immediately + if len(object_) == 1 + and not (isinstance(object_[first_key], dict) + or isinstance(object_[first_key], list) + ): flattened_dict[key] = object_[first_key] else: for object_key in object_: if not (not key and object_key in root_keys_to_ignore): - _flatten(object_[object_key], _construct_key(key, - separator, - object_key), cur_depth, max_depth) # build the next child key + _flatten(object_[object_key] + , _construct_key(key, separator, object_key) + , cur_depth, max_depth) # build the next child key elif isinstance(object_, list) or isinstance(object_, set): for index, item in enumerate(object_): - _flatten(item, _construct_key(key, separator, index), cur_depth, max_depth) + _flatten(item, _construct_key(key, separator, index) + , cur_depth, max_depth) else: flattened_dict[key] = object_ @@ -160,14 +168,16 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): # write latest child as value if max_depth exceeded if cur_depth > max_depth: - global_max_record = int(max(list(list_prebuilt_flattened_dict.keys()))) + global_max_record = int(max(list( + list_prebuilt_flattened_dict.keys()))) for d in list_prebuilt_flattened_dict[str(global_max_record)]: d[key] = object_ else: # Empty object can't be iterated, take as is if not object_: - global_max_record = int(max(list(list_prebuilt_flattened_dict.keys()))) + global_max_record = int(max(list( + list_prebuilt_flattened_dict.keys()))) for d in list_prebuilt_flattened_dict[str(global_max_record)]: d[key] = object_ @@ -175,12 +185,18 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): # dict always go into columns elif isinstance(object_, dict): first_key = list(object_.keys())[0] - # if only 1 child value, and not a dict or list, flatten immediately - if len(object_) == 1 and not (isinstance(object_[first_key], dict) or isinstance(object_[first_key], list)): - global_max_record = int(max(list(list_prebuilt_flattened_dict.keys()))) - - # decide if to update only last (potentially list) of record or all - for d in list_prebuilt_flattened_dict[str(global_max_record)]: + # if only 1 child value, and child value + # not a dict or list, flatten immediately + if len(object_) == 1 + and not (isinstance(object_[first_key], dict) + or isinstance(object_[first_key], list) + ): + global_max_record = int(max(list( + list_prebuilt_flattened_dict.keys()))) + + for d in list_prebuilt_flattened_dict[ + str(global_max_record) + ]: d[key] = object_[first_key] else: @@ -189,19 +205,27 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): _flatten_low_entropy(object_[object_key], _construct_key(key, separator, - object_key), cur_depth, max_depth) # build the next child key + object_key) + , cur_depth, max_depth) # build the next child key # lists could go into rows, like in a relational database elif isinstance(object_, list) or isinstance(object_, set): - if debug: print("\nparent key of list:", key, "| length: ", str(len(object_))) - - # need to remember global list state when we entered this recursion - global_max_record_start = int(max(list(list_prebuilt_flattened_dict.keys()))) - entry = copy.deepcopy(list_prebuilt_flattened_dict[str(global_max_record_start)]) + if debug: print("\nparent key of list:" + , key, "| length: " + , str(len(object_))) + + # need to remember global list state when we entered + # this recursion + global_max_record_start = int(max(list( + list_prebuilt_flattened_dict.keys()))) + entry = copy.deepcopy(list_prebuilt_flattened_dict[ + str(global_max_record_start) + ]) for index, item in enumerate(object_): - # create new copy of all current records in global list for each element in any list - if debug: print(" list key:", key, " index: " + str(index), "vals: ", item) + + if debug: print(" list key:", key + , " index: " + str(index), "vals: ", item) sub = -1 if isinstance(item, dict): @@ -210,25 +234,24 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): sub = first_value if not isnan(sub) and index < max_list_index: - if index > 0: # start from second element, 1st element is like column - global_max_record = int(max(list(list_prebuilt_flattened_dict.keys()))) + # start from second element, 1st element is like column + if index > 0: + global_max_record = int(max(list( + list_prebuilt_flattened_dict.keys()))) if debug: print("copy entry: ", entry) - list_prebuilt_flattened_dict[str(global_max_record+1)] = copy.deepcopy(entry) - - if debug: print(" copied max record in global dict") - if debug: print(" len global list last rec: ", len(list_prebuilt_flattened_dict[str(global_max_record)]), - "len list: ", len(list_prebuilt_flattened_dict)) - - # copy only last element(s), by index. Need to index the global list? - # in global list, each record can have several rows. Keep an index. + list_prebuilt_flattened_dict[ + str(global_max_record+1) + ] = copy.deepcopy(entry) - # dig in _flatten_low_entropy(item, key, cur_depth, max_depth) else: pass - list_prebuilt_flattened_dict['0'] = [subel for k, v in list_prebuilt_flattened_dict.items() for idx, subel in enumerate(v)] + list_prebuilt_flattened_dict['0'] = + [subel for k, v in + list_prebuilt_flattened_dict.items() + for idx, subel in enumerate(v)] for key in list(list_prebuilt_flattened_dict.keys()): if key != '0': @@ -237,8 +260,11 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): # Anything left take as is, assuming you hit the end of the line. else: - # in this case, there may be a list of prebuilt_flattened_dict by now, so need to update them all. - global_max_record = int(max(list(list_prebuilt_flattened_dict.keys()))) + # in this case, there may be + # a list of prebuilt_flattened_dict by now + # so need to update them all. + global_max_record = int(max(list( + list_prebuilt_flattened_dict.keys()))) for d in list_prebuilt_flattened_dict[str(global_max_record)]: d[key] = object_ @@ -251,10 +277,12 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): # get unique column names, without the integers # TODO: potential issue: what if column names have digits naturally? reskeys = list(flattened_dict.keys()) - unique_integers = list(set([separator+char for key in reskeys for char in key if char.isdigit()])) + unique_integers = list(set([separator+char for key + in reskeys for char in key if char.isdigit()])) regex = '|'.join(unique_integers) regex += "|" + regex.replace(".", "") - unique_columns = list(set([re.sub("("+regex+")", "", key) for key in reskeys])) + unique_columns = list(set([re.sub("("+regex+")", "", key) + for key in reskeys])) # create global dict, now with unique column names prebuilt_flattened_dict = {column: None for column in unique_columns} From 7b16286824745cdd5bd4d34b368fbb555d703996 Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Tue, 5 Mar 2019 20:14:30 +0000 Subject: [PATCH 15/24] Update flatten_json.py --- flatten_json.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/flatten_json.py b/flatten_json.py index f8d6ef4..4ae3710 100644 --- a/flatten_json.py +++ b/flatten_json.py @@ -133,8 +133,8 @@ def _flatten(object_, key, cur_depth, max_depth): first_key = list(object_.keys())[0] # if only 1 child value, and child value not a dict or list # flatten immediately - if len(object_) == 1 - and not (isinstance(object_[first_key], dict) + if len(object_) == 1 \ + and not (isinstance(object_[first_key], dict) or isinstance(object_[first_key], list) ): flattened_dict[key] = object_[first_key] @@ -187,8 +187,8 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): first_key = list(object_.keys())[0] # if only 1 child value, and child value # not a dict or list, flatten immediately - if len(object_) == 1 - and not (isinstance(object_[first_key], dict) + if len(object_) == 1 \ + and not (isinstance(object_[first_key], dict) or isinstance(object_[first_key], list) ): global_max_record = int(max(list( @@ -210,7 +210,8 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): # lists could go into rows, like in a relational database elif isinstance(object_, list) or isinstance(object_, set): - if debug: print("\nparent key of list:" + if debug: + print("\nparent key of list:" , key, "| length: " , str(len(object_))) @@ -224,7 +225,8 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): for index, item in enumerate(object_): - if debug: print(" list key:", key + if debug: + print(" list key:", key , " index: " + str(index), "vals: ", item) sub = -1 @@ -238,7 +240,6 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): if index > 0: global_max_record = int(max(list( list_prebuilt_flattened_dict.keys()))) - if debug: print("copy entry: ", entry) list_prebuilt_flattened_dict[ str(global_max_record+1) @@ -248,15 +249,16 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): else: pass - list_prebuilt_flattened_dict['0'] = - [subel for k, v in - list_prebuilt_flattened_dict.items() - for idx, subel in enumerate(v)] + list_prebuilt_flattened_dict['0'] = \ + [subel for k, v in + list_prebuilt_flattened_dict.items() + for idx, subel in enumerate(v)] for key in list(list_prebuilt_flattened_dict.keys()): if key != '0': del list_prebuilt_flattened_dict[key] - if debug: print("collapsed global list") + if debug: + print("collapsed global list") # Anything left take as is, assuming you hit the end of the line. else: @@ -286,7 +288,6 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): # create global dict, now with unique column names prebuilt_flattened_dict = {column: None for column in unique_columns} - # if debug: print("unique columns: ",unique_columns) # initialize global record list list_prebuilt_flattened_dict = {'0': [prebuilt_flattened_dict]} From 139e4860b97d504eb7204fa178a69fe940f7ed60 Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Tue, 5 Mar 2019 20:31:13 +0000 Subject: [PATCH 16/24] Update flatten_json.py --- flatten_json.py | 76 +++++++++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/flatten_json.py b/flatten_json.py index 4ae3710..fbd81d9 100644 --- a/flatten_json.py +++ b/flatten_json.py @@ -3,6 +3,7 @@ import sys import json + try: # 3.8 and up from collections.abc import Iterable @@ -11,7 +12,8 @@ from util import check_if_numbers_are_consecutive import six -import copy, re +import copy +import re from math import isnan @@ -85,17 +87,18 @@ def _flatten(object_, key): def flatten_preserve_lists(nested_dict, separator="_" - , root_keys_to_ignore=set(), max_list_index=3, max_depth=3): + , root_keys_to_ignore=set() + , max_list_index=3, max_depth=3): """ Flattens a dictionary with nested structure to a dictionary with no hierarchy Consider ignoring keys that you are not interested in to prevent unnecessary processing This is specially true for very deep objects - This preserves list structure, and + This preserves list structure, and you can specify max_list_index and max_depth to limit processing - Child elements with only one value inside + Child elements with only one value inside will be unwrapped and become parent's value. :param nested_dict: dictionary we want to flatten @@ -134,21 +137,21 @@ def _flatten(object_, key, cur_depth, max_depth): # if only 1 child value, and child value not a dict or list # flatten immediately if len(object_) == 1 \ - and not (isinstance(object_[first_key], dict) - or isinstance(object_[first_key], list) + and not (isinstance(object_[first_key], dict) + or isinstance(object_[first_key], list) ): flattened_dict[key] = object_[first_key] else: for object_key in object_: if not (not key and object_key in root_keys_to_ignore): _flatten(object_[object_key] - , _construct_key(key, separator, object_key) - , cur_depth, max_depth) # build the next child key - + , _construct_key(key, separator, object_key) + , cur_depth, max_depth) + elif isinstance(object_, list) or isinstance(object_, set): for index, item in enumerate(object_): _flatten(item, _construct_key(key, separator, index) - , cur_depth, max_depth) + , cur_depth, max_depth) else: flattened_dict[key] = object_ @@ -188,9 +191,8 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): # if only 1 child value, and child value # not a dict or list, flatten immediately if len(object_) == 1 \ - and not (isinstance(object_[first_key], dict) - or isinstance(object_[first_key], list) - ): + and not (isinstance(object_[first_key], dict) + or isinstance(object_[first_key], list)): global_max_record = int(max(list( list_prebuilt_flattened_dict.keys()))) @@ -204,30 +206,30 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): if not (not key and object_key in root_keys_to_ignore): _flatten_low_entropy(object_[object_key], _construct_key(key, - separator, - object_key) - , cur_depth, max_depth) # build the next child key + separator, + object_key) + , cur_depth, max_depth) # lists could go into rows, like in a relational database elif isinstance(object_, list) or isinstance(object_, set): - if debug: + if debug: print("\nparent key of list:" - , key, "| length: " - , str(len(object_))) + , key, "| length: " + , str(len(object_))) # need to remember global list state when we entered # this recursion global_max_record_start = int(max(list( list_prebuilt_flattened_dict.keys()))) entry = copy.deepcopy(list_prebuilt_flattened_dict[ - str(global_max_record_start) - ]) + str(global_max_record_start) + ]) for index, item in enumerate(object_): - - if debug: + + if debug: print(" list key:", key - , " index: " + str(index), "vals: ", item) + , " index: " + str(index), "vals: ", item) sub = -1 if isinstance(item, dict): @@ -242,7 +244,7 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): list_prebuilt_flattened_dict.keys()))) list_prebuilt_flattened_dict[ - str(global_max_record+1) + str(global_max_record + 1) ] = copy.deepcopy(entry) _flatten_low_entropy(item, key, cur_depth, max_depth) @@ -251,13 +253,13 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): list_prebuilt_flattened_dict['0'] = \ [subel for k, v in - list_prebuilt_flattened_dict.items() - for idx, subel in enumerate(v)] + list_prebuilt_flattened_dict.items() + for idx, subel in enumerate(v)] for key in list(list_prebuilt_flattened_dict.keys()): if key != '0': del list_prebuilt_flattened_dict[key] - if debug: + if debug: print("collapsed global list") # Anything left take as is, assuming you hit the end of the line. @@ -269,9 +271,9 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): list_prebuilt_flattened_dict.keys()))) for d in list_prebuilt_flattened_dict[str(global_max_record)]: - d[key] = object_ + d[key] = object_ - # decrease depth counter + # decrease depth counter cur_depth -= 1 _flatten(nested_dict, None, cur_depth=0, max_depth=max_depth) @@ -279,12 +281,12 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): # get unique column names, without the integers # TODO: potential issue: what if column names have digits naturally? reskeys = list(flattened_dict.keys()) - unique_integers = list(set([separator+char for key - in reskeys for char in key if char.isdigit()])) + unique_integers = list(set([separator + char for key + in reskeys for char in key if char.isdigit()])) regex = '|'.join(unique_integers) regex += "|" + regex.replace(".", "") - unique_columns = list(set([re.sub("("+regex+")", "", key) - for key in reskeys])) + unique_columns = list(set([re.sub("(" + regex + ")", "", key) + for key in reskeys])) # create global dict, now with unique column names prebuilt_flattened_dict = {column: None for column in unique_columns} @@ -326,8 +328,8 @@ def _unflatten(dic, keys, value): list_keys = sorted(flat_dict.keys()) for i, item in enumerate(list_keys): - if i != len(list_keys)-1: - if not list_keys[i+1].startswith(list_keys[i]): + if i != len(list_keys) - 1: + if not list_keys[i + 1].startswith(list_keys[i]): _unflatten(unflattened_dict, item.split(separator), flat_dict[item]) else: @@ -372,7 +374,7 @@ def _convert_dict_to_list(object_, parent_object, parent_object_key): keys_len = len(keys) if (keys_len > 0 and sum(keys) == - int(((keys_len - 1) * keys_len) / 2) and keys[0] == 0 and + int(((keys_len - 1) * keys_len) / 2) and keys[0] == 0 and keys[-1] == keys_len - 1 and check_if_numbers_are_consecutive(keys)): From 0c2bd75c55c9c1364e1d9664a49ab22d913b540b Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Wed, 6 Mar 2019 09:42:07 +0000 Subject: [PATCH 17/24] added flatten_preserve_lists test --- test_flatten.py | 235 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 234 insertions(+), 1 deletion(-) diff --git a/test_flatten.py b/test_flatten.py index df421c2..49aec93 100644 --- a/test_flatten.py +++ b/test_flatten.py @@ -11,7 +11,7 @@ # python3 from io import StringIO -from flatten_json import flatten, unflatten, unflatten_list, cli +from flatten_json import flatten, flatten_preserve_lists, unflatten, unflatten_list, cli from util import check_if_numbers_are_consecutive @@ -247,6 +247,239 @@ def test_unflatten_with_df_issue40(self): } actual = unflatten(dic, '.') self.assertEqual(actual, expected) + + def test_flatten_preserve_lists_issue43(self): + """https://github.com/amirziai/flatten/issues/43""" + dic = { + 'a': {'a': ["x0", "x1", "x2"]}, + 'b': {'b': 'foo', 'c': 'bar'}, + 'c': {'c': [ + {'foo': 2, 'bar': 6, 'baz': ["n1", "n2", "n3", "n1.1", "n2.2"]}, + {'foo': 5, 'bar': 7, 'baz': ["n4", "n5", "n6"]}, + {'foo': float('nan')}, + {'foo': 100}, + ]}, + 'd': {'g': 10} + } + expected = [{'a_a': 'x0', + 'c_c_foo': 2, + 'c_c_baz': 'n1', + 'c_c_bar': 6, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x1', + 'c_c_foo': 2, + 'c_c_baz': 'n1', + 'c_c_bar': 6, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x2', + 'c_c_foo': 2, + 'c_c_baz': 'n1', + 'c_c_bar': 6, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x0', + 'c_c_foo': 2, + 'c_c_baz': 'n2', + 'c_c_bar': 6, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x1', + 'c_c_foo': 2, + 'c_c_baz': 'n2', + 'c_c_bar': 6, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x2', + 'c_c_foo': 2, + 'c_c_baz': 'n2', + 'c_c_bar': 6, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x0', + 'c_c_foo': 2, + 'c_c_baz': 'n3', + 'c_c_bar': 6, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x1', + 'c_c_foo': 2, + 'c_c_baz': 'n3', + 'c_c_bar': 6, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x2', + 'c_c_foo': 2, + 'c_c_baz': 'n3', + 'c_c_bar': 6, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x0', + 'c_c_foo': 2, + 'c_c_baz': 'n1.1', + 'c_c_bar': 6, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x1', + 'c_c_foo': 2, + 'c_c_baz': 'n1.1', + 'c_c_bar': 6, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x2', + 'c_c_foo': 2, + 'c_c_baz': 'n1.1', + 'c_c_bar': 6, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x0', + 'c_c_foo': 2, + 'c_c_baz': 'n2.2', + 'c_c_bar': 6, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x1', + 'c_c_foo': 2, + 'c_c_baz': 'n2.2', + 'c_c_bar': 6, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x2', + 'c_c_foo': 2, + 'c_c_baz': 'n2.2', + 'c_c_bar': 6, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x0', + 'c_c_foo': 5, + 'c_c_baz': 'n4', + 'c_c_bar': 7, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x1', + 'c_c_foo': 5, + 'c_c_baz': 'n4', + 'c_c_bar': 7, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x2', + 'c_c_foo': 5, + 'c_c_baz': 'n4', + 'c_c_bar': 7, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x0', + 'c_c_foo': 5, + 'c_c_baz': 'n5', + 'c_c_bar': 7, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x1', + 'c_c_foo': 5, + 'c_c_baz': 'n5', + 'c_c_bar': 7, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x2', + 'c_c_foo': 5, + 'c_c_baz': 'n5', + 'c_c_bar': 7, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x0', + 'c_c_foo': 5, + 'c_c_baz': 'n6', + 'c_c_bar': 7, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x1', + 'c_c_foo': 5, + 'c_c_baz': 'n6', + 'c_c_bar': 7, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x2', + 'c_c_foo': 5, + 'c_c_baz': 'n6', + 'c_c_bar': 7, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': None}, + {'a_a': 'x0', + 'c_c_foo': None, + 'c_c_baz': None, + 'c_c_bar': None, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': 100}, + {'a_a': 'x1', + 'c_c_foo': None, + 'c_c_baz': None, + 'c_c_bar': None, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': 100}, + {'a_a': 'x2', + 'c_c_foo': None, + 'c_c_baz': None, + 'c_c_bar': None, + 'b_b': 'foo', + 'd': 10, + 'b_c': 'bar', + 'c_c': 100}] + + actual = flatten_preserve_lists(dic) + self.assertEqual(actual, expected) def test_unflatten_with_list_deep(self): dic = {'a': [ From ce85c3a93f9ad2d779597afccf8449c8c3e46454 Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Wed, 6 Mar 2019 09:46:17 +0000 Subject: [PATCH 18/24] Update test_flatten.py --- test_flatten.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_flatten.py b/test_flatten.py index 49aec93..25059ad 100644 --- a/test_flatten.py +++ b/test_flatten.py @@ -478,7 +478,7 @@ def test_flatten_preserve_lists_issue43(self): 'b_c': 'bar', 'c_c': 100}] - actual = flatten_preserve_lists(dic) + actual = flatten_preserve_lists(dic, max_list_index=50, max_depth=10) self.assertEqual(actual, expected) def test_unflatten_with_list_deep(self): From bcbf0c686ca371b1b5309e2c45b33b209e230a60 Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Thu, 7 Mar 2019 14:37:26 +0000 Subject: [PATCH 19/24] added sorted to make dict python 2 compatible --- flatten_json.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flatten_json.py b/flatten_json.py index fbd81d9..9fdc21b 100644 --- a/flatten_json.py +++ b/flatten_json.py @@ -253,10 +253,10 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): list_prebuilt_flattened_dict['0'] = \ [subel for k, v in - list_prebuilt_flattened_dict.items() + sorted(list_prebuilt_flattened_dict.items()) for idx, subel in enumerate(v)] - for key in list(list_prebuilt_flattened_dict.keys()): + for key in list(sorted(list_prebuilt_flattened_dict.keys())): if key != '0': del list_prebuilt_flattened_dict[key] if debug: From 24ff42bbcfd0c4d26cd15deb69b12f8e46061985 Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Sun, 10 Mar 2019 15:59:11 +0000 Subject: [PATCH 20/24] fixed sorting for dictionaries, so that simple types are processed before lists --- flatten_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flatten_json.py b/flatten_json.py index 9fdc21b..efec225 100644 --- a/flatten_json.py +++ b/flatten_json.py @@ -202,7 +202,7 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): d[key] = object_[first_key] else: - for object_key in object_: + for object_key, val in sorted(object_.items(), key=lambda x: str(type(x[1])), reverse=False): if not (not key and object_key in root_keys_to_ignore): _flatten_low_entropy(object_[object_key], _construct_key(key, From a71a8a6ba40ab748a166f1a106548f1cb925ea10 Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Sun, 10 Mar 2019 18:55:42 +0000 Subject: [PATCH 21/24] tie resolution for dictionaries where values are all the same type. Resolving by element length. --- flatten_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flatten_json.py b/flatten_json.py index efec225..5bf849c 100644 --- a/flatten_json.py +++ b/flatten_json.py @@ -202,7 +202,7 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): d[key] = object_[first_key] else: - for object_key, val in sorted(object_.items(), key=lambda x: str(type(x[1])), reverse=False): + for object_key, val in sorted(object_.items(), key=lambda x: (str(type(x[1])), len(str(x[1]))), reverse=False): if not (not key and object_key in root_keys_to_ignore): _flatten_low_entropy(object_[object_key], _construct_key(key, From ba08b9f825fad263b64e6c5b7f7098d23ee87a6e Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Mon, 11 Mar 2019 10:39:40 +0000 Subject: [PATCH 22/24] added more tests for flatten_preserve_lists for nested lists --- test_flatten.py | 1647 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1647 insertions(+) diff --git a/test_flatten.py b/test_flatten.py index 25059ad..b3a5fe7 100644 --- a/test_flatten.py +++ b/test_flatten.py @@ -248,6 +248,1653 @@ def test_unflatten_with_df_issue40(self): actual = unflatten(dic, '.') self.assertEqual(actual, expected) + def test_flatten_preserve_lists_issue43_other(self): + """https://github.com/amirziai/flatten/issues/43""" + dic = { + 'a': {'a': ["x0", "x1", "x2"]}, + 'b': {'b': 'foo', 'c': 'bar'}, + 'c': {'c': [ + {'foo': 2, 'bar': 6, 'baz': ["n1", "n2", "n3", "n1.1", "n2.2"]}, + {'foo': 5, 'bar': 7, 'baz': ["n4", "n5", "n6"]}, + {'foo': float('nan')}, + {'foo': 100}, + ]}, + 'd': {'g': 10}, + 'f': {'h': 100, 'gar': [ + {"gup": 200, "garp": [ + {"gu": 300, "gat": ["f7", "f8"]}, + {"gu": 800, "gat": ["f9", "f10", "f11"]} + ] + }]} + } + + actual = flatten_preserve_lists(dic, max_depth=100, max_list_index=30) + + expected = [{"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f7", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f7", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f7", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f8", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f8", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f8", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f9", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f9", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f9", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f10", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f10", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f10", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f11", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f11", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f11", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f7", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f7", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f7", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f8", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f8", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f8", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f9", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f9", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f9", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f10", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f10", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f10", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f11", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f11", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f11", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n3", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f7", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n3", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f7", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n3", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f7", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n3", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f8", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n3", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f8", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n3", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f8", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n3", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f9", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n3", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f9", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n3", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f9", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n3", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f10", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n3", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f10", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n3", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f10", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n3", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f11", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n3", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f11", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n3", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f11", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1.1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f7", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1.1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f7", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1.1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f7", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1.1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f8", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1.1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f8", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1.1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f8", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1.1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f9", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1.1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f9", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1.1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f9", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1.1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f10", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1.1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f10", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1.1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f10", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1.1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f11", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1.1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f11", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n1.1", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f11", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2.2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f7", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2.2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f7", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2.2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f7", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2.2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f8", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2.2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f8", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2.2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f8", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2.2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f9", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2.2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f9", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2.2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f9", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2.2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f10", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2.2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f10", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2.2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f10", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2.2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f11", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2.2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f11", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n2.2", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 6, + "f_gar_garp_gat": "f11", + "c_c_foo": 2, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n4", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f7", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n4", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f7", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n4", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f7", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n4", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f8", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n4", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f8", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n4", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f8", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n4", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f9", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n4", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f9", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n4", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f9", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n4", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f10", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n4", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f10", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n4", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f10", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n4", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f11", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n4", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f11", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n4", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f11", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n5", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f7", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n5", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f7", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n5", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f7", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n5", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f8", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n5", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f8", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n5", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f8", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n5", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f9", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n5", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f9", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n5", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f9", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n5", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f10", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n5", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f10", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n5", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f10", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n5", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f11", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n5", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f11", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n5", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f11", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n6", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f7", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n6", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f7", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n6", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f7", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n6", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f8", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n6", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f8", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n6", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f8", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n6", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f9", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n6", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f9", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n6", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f9", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n6", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f10", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n6", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f10", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n6", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f10", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": None, + "c_c_baz": "n6", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f11", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": None, + "c_c_baz": "n6", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f11", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": None, + "c_c_baz": "n6", + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": 7, + "f_gar_garp_gat": "f11", + "c_c_foo": 5, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": 100, + "c_c_baz": None, + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": None, + "f_gar_garp_gat": "f7", + "c_c_foo": None, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": 100, + "c_c_baz": None, + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": None, + "f_gar_garp_gat": "f7", + "c_c_foo": None, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": 100, + "c_c_baz": None, + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": None, + "f_gar_garp_gat": "f7", + "c_c_foo": None, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": 100, + "c_c_baz": None, + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": None, + "f_gar_garp_gat": "f8", + "c_c_foo": None, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": 100, + "c_c_baz": None, + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": None, + "f_gar_garp_gat": "f8", + "c_c_foo": None, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": 100, + "c_c_baz": None, + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 300, + "f_gar_gup": 200, + "c_c_bar": None, + "f_gar_garp_gat": "f8", + "c_c_foo": None, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": 100, + "c_c_baz": None, + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": None, + "f_gar_garp_gat": "f9", + "c_c_foo": None, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": 100, + "c_c_baz": None, + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": None, + "f_gar_garp_gat": "f9", + "c_c_foo": None, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": 100, + "c_c_baz": None, + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": None, + "f_gar_garp_gat": "f9", + "c_c_foo": None, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": 100, + "c_c_baz": None, + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": None, + "f_gar_garp_gat": "f10", + "c_c_foo": None, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": 100, + "c_c_baz": None, + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": None, + "f_gar_garp_gat": "f10", + "c_c_foo": None, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": 100, + "c_c_baz": None, + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": None, + "f_gar_garp_gat": "f10", + "c_c_foo": None, + "d": 10}, + {"a_a": "x0", + "f_h": 100, + "c_c": 100, + "c_c_baz": None, + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": None, + "f_gar_garp_gat": "f11", + "c_c_foo": None, + "d": 10}, + {"a_a": "x1", + "f_h": 100, + "c_c": 100, + "c_c_baz": None, + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": None, + "f_gar_garp_gat": "f11", + "c_c_foo": None, + "d": 10}, + {"a_a": "x2", + "f_h": 100, + "c_c": 100, + "c_c_baz": None, + "b_b": "foo", + "b_c": "bar", + "f_gar_garp_gu": 800, + "f_gar_gup": 200, + "c_c_bar": None, + "f_gar_garp_gat": "f11", + "c_c_foo": None, + "d": 10}] + + + + self.assertEqual(expected, actual) + def test_flatten_preserve_lists_issue43(self): """https://github.com/amirziai/flatten/issues/43""" dic = { From 1ac6062e1ba8b15b72d6f55a665196bedf4ab244 Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Tue, 12 Mar 2019 10:33:06 +0100 Subject: [PATCH 23/24] pep8 --- flatten_json.py | 54 +++++++++++++++++++++++++++---------------------- test_flatten.py | 21 ++++++++++--------- 2 files changed, 42 insertions(+), 33 deletions(-) diff --git a/flatten_json.py b/flatten_json.py index 5bf849c..99bc945 100644 --- a/flatten_json.py +++ b/flatten_json.py @@ -86,9 +86,9 @@ def _flatten(object_, key): flatten_json = flatten -def flatten_preserve_lists(nested_dict, separator="_" - , root_keys_to_ignore=set() - , max_list_index=3, max_depth=3): +def flatten_preserve_lists(nested_dict, separator="_", + root_keys_to_ignore=set(), + max_list_index=3, max_depth=3): """ Flattens a dictionary with nested structure to a dictionary with no hierarchy @@ -110,13 +110,14 @@ def flatten_preserve_lists(nested_dict, separator="_" """ assert isinstance(nested_dict, dict), "flatten requires a dictionary input" - assert isinstance(separator, six.string_types), "separator must be a string" + assert isinstance(separator, six.string_types), \ + "separator must be a string" # This global dictionary stores the flattened keys and values and is # ultimately returned flattened_dict = dict() - def _flatten(object_, key, cur_depth, max_depth): + def _flatten(object_, key): """ For dict, list and set objects_ calls itself on the elements and for other types assigns the object_ to @@ -139,24 +140,23 @@ def _flatten(object_, key, cur_depth, max_depth): if len(object_) == 1 \ and not (isinstance(object_[first_key], dict) or isinstance(object_[first_key], list) - ): + ): flattened_dict[key] = object_[first_key] else: for object_key in object_: if not (not key and object_key in root_keys_to_ignore): - _flatten(object_[object_key] - , _construct_key(key, separator, object_key) - , cur_depth, max_depth) + _flatten(object_[object_key], + _construct_key(key, separator, object_key) + ) elif isinstance(object_, list) or isinstance(object_, set): for index, item in enumerate(object_): - _flatten(item, _construct_key(key, separator, index) - , cur_depth, max_depth) + _flatten(item, _construct_key(key, separator, index)) else: flattened_dict[key] = object_ - def _flatten_low_entropy(object_, key, cur_depth, max_depth): + def _flatten_low_entropy(object_, key, cur_depth, max_depth_inner): """ For dict, list and set objects_ calls itself on the elements and for other types assigns the object_ to @@ -170,7 +170,7 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): debug = 0 # write latest child as value if max_depth exceeded - if cur_depth > max_depth: + if cur_depth > max_depth_inner: global_max_record = int(max(list( list_prebuilt_flattened_dict.keys()))) for d in list_prebuilt_flattened_dict[str(global_max_record)]: @@ -202,20 +202,24 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): d[key] = object_[first_key] else: - for object_key, val in sorted(object_.items(), key=lambda x: (str(type(x[1])), len(str(x[1]))), reverse=False): + for object_key, val in \ + sorted(object_.items(), + key=lambda x: + (str(type(x[1])), len(str(x[1]))), + reverse=False): if not (not key and object_key in root_keys_to_ignore): _flatten_low_entropy(object_[object_key], _construct_key(key, separator, - object_key) - , cur_depth, max_depth) + object_key), + cur_depth, max_depth_inner) # lists could go into rows, like in a relational database elif isinstance(object_, list) or isinstance(object_, set): if debug: - print("\nparent key of list:" - , key, "| length: " - , str(len(object_))) + print("\nparent key of list:", + key, "| length: ", + str(len(object_))) # need to remember global list state when we entered # this recursion @@ -228,8 +232,8 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): for index, item in enumerate(object_): if debug: - print(" list key:", key - , " index: " + str(index), "vals: ", item) + print(" list key:", key, + " index: " + str(index), "vals: ", item) sub = -1 if isinstance(item, dict): @@ -247,7 +251,8 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): str(global_max_record + 1) ] = copy.deepcopy(entry) - _flatten_low_entropy(item, key, cur_depth, max_depth) + _flatten_low_entropy(item, key, cur_depth, + max_depth_inner) else: pass @@ -276,7 +281,7 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): # decrease depth counter cur_depth -= 1 - _flatten(nested_dict, None, cur_depth=0, max_depth=max_depth) + _flatten(nested_dict, None) # get unique column names, without the integers # TODO: potential issue: what if column names have digits naturally? @@ -294,7 +299,8 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth): # initialize global record list list_prebuilt_flattened_dict = {'0': [prebuilt_flattened_dict]} - _flatten_low_entropy(nested_dict, None, cur_depth=0, max_depth=max_depth) + _flatten_low_entropy(nested_dict, None, cur_depth=0, + max_depth_inner=max_depth) return list_prebuilt_flattened_dict['0'] diff --git a/test_flatten.py b/test_flatten.py index b3a5fe7..5d56ef1 100644 --- a/test_flatten.py +++ b/test_flatten.py @@ -11,7 +11,8 @@ # python3 from io import StringIO -from flatten_json import flatten, flatten_preserve_lists, unflatten, unflatten_list, cli +from flatten_json import flatten, flatten_preserve_lists, unflatten, \ + unflatten_list, cli from util import check_if_numbers_are_consecutive @@ -248,13 +249,14 @@ def test_unflatten_with_df_issue40(self): actual = unflatten(dic, '.') self.assertEqual(actual, expected) - def test_flatten_preserve_lists_issue43_other(self): + def test_flatten_preserve_lists_issue43_nested(self): """https://github.com/amirziai/flatten/issues/43""" dic = { 'a': {'a': ["x0", "x1", "x2"]}, 'b': {'b': 'foo', 'c': 'bar'}, 'c': {'c': [ - {'foo': 2, 'bar': 6, 'baz': ["n1", "n2", "n3", "n1.1", "n2.2"]}, + {'foo': 2, 'bar': 6, 'baz': + ["n1", "n2", "n3", "n1.1", "n2.2"]}, {'foo': 5, 'bar': 7, 'baz': ["n4", "n5", "n6"]}, {'foo': float('nan')}, {'foo': 100}, @@ -270,7 +272,8 @@ def test_flatten_preserve_lists_issue43_other(self): actual = flatten_preserve_lists(dic, max_depth=100, max_list_index=30) - expected = [{"a_a": "x0", + expected = [ + {"a_a": "x0", "f_h": 100, "c_c": None, "c_c_baz": "n1", @@ -1891,8 +1894,6 @@ def test_flatten_preserve_lists_issue43_other(self): "c_c_foo": None, "d": 10}] - - self.assertEqual(expected, actual) def test_flatten_preserve_lists_issue43(self): @@ -1901,14 +1902,16 @@ def test_flatten_preserve_lists_issue43(self): 'a': {'a': ["x0", "x1", "x2"]}, 'b': {'b': 'foo', 'c': 'bar'}, 'c': {'c': [ - {'foo': 2, 'bar': 6, 'baz': ["n1", "n2", "n3", "n1.1", "n2.2"]}, + {'foo': 2, 'bar': 6, 'baz': [ + "n1", "n2", "n3", "n1.1", "n2.2"]}, {'foo': 5, 'bar': 7, 'baz': ["n4", "n5", "n6"]}, {'foo': float('nan')}, {'foo': 100}, ]}, 'd': {'g': 10} } - expected = [{'a_a': 'x0', + expected = [ + {'a_a': 'x0', 'c_c_foo': 2, 'c_c_baz': 'n1', 'c_c_bar': 6, @@ -2124,7 +2127,7 @@ def test_flatten_preserve_lists_issue43(self): 'd': 10, 'b_c': 'bar', 'c_c': 100}] - + actual = flatten_preserve_lists(dic, max_list_index=50, max_depth=10) self.assertEqual(actual, expected) From 3c868c1e18d9b492f749a7efb7ab020b88d58e9f Mon Sep 17 00:00:00 2001 From: kaiaeberli Date: Tue, 12 Mar 2019 10:35:36 +0100 Subject: [PATCH 24/24] pep8 --- flatten_json.py | 6 +++--- test_flatten.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/flatten_json.py b/flatten_json.py index 99bc945..9072209 100644 --- a/flatten_json.py +++ b/flatten_json.py @@ -97,7 +97,7 @@ def flatten_preserve_lists(nested_dict, separator="_", This is specially true for very deep objects This preserves list structure, and you can specify max_list_index and max_depth to limit processing - + Child elements with only one value inside will be unwrapped and become parent's value. @@ -161,7 +161,7 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth_inner): For dict, list and set objects_ calls itself on the elements and for other types assigns the object_ to the corresponding key in the global flattened_dict - + :param object_: object to flatten :param key: carries the concatenated key for the object_ :return: None @@ -221,7 +221,7 @@ def _flatten_low_entropy(object_, key, cur_depth, max_depth_inner): key, "| length: ", str(len(object_))) - # need to remember global list state when we entered + # need to remember global list state when we entered # this recursion global_max_record_start = int(max(list( list_prebuilt_flattened_dict.keys()))) diff --git a/test_flatten.py b/test_flatten.py index 5d56ef1..f540022 100644 --- a/test_flatten.py +++ b/test_flatten.py @@ -248,7 +248,7 @@ def test_unflatten_with_df_issue40(self): } actual = unflatten(dic, '.') self.assertEqual(actual, expected) - + def test_flatten_preserve_lists_issue43_nested(self): """https://github.com/amirziai/flatten/issues/43""" dic = {