Skip to content

Commit

Permalink
adding zip_ordered_iterables
Browse files Browse the repository at this point in the history
  • Loading branch information
seperman committed Sep 1, 2023
1 parent 0cf607d commit 96847f2
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 3 deletions.
7 changes: 5 additions & 2 deletions deepdiff/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def __init__(self,
ignore_type_in_groups=None,
ignore_type_subclasses=False,
iterable_compare_func=None,
zip_ordered_iterables=False,
log_frequency_in_sec=0,
math_epsilon=None,
max_diffs=None,
Expand All @@ -166,7 +167,7 @@ def __init__(self,
"number_format_notation, exclude_paths, include_paths, exclude_types, exclude_regex_paths, ignore_type_in_groups, "
"ignore_string_type_changes, ignore_numeric_type_changes, ignore_type_subclasses, truncate_datetime, "
"ignore_private_variables, ignore_nan_inequality, number_to_string_func, verbose_level, "
"view, hasher, hashes, max_passes, max_diffs, "
"view, hasher, hashes, max_passes, max_diffs, zip_ordered_iterables, "
"cutoff_distance_for_pairs, cutoff_intersection_for_pairs, log_frequency_in_sec, cache_size, "
"cache_tuning_sample_size, get_deep_distance, group_by, cache_purge_level, "
"math_epsilon, iterable_compare_func, _original_type, "
Expand Down Expand Up @@ -208,6 +209,7 @@ def __init__(self,
self.include_obj_callback_strict = include_obj_callback_strict
self.number_to_string = number_to_string_func or number_to_string
self.iterable_compare_func = iterable_compare_func
self.zip_ordered_iterables = zip_ordered_iterables
self.ignore_private_variables = ignore_private_variables
self.ignore_nan_inequality = ignore_nan_inequality
self.hasher = hasher
Expand Down Expand Up @@ -742,7 +744,8 @@ def _diff_iterable_in_order(self, level, parents_ids=frozenset(), _original_type
child_relationship_class = NonSubscriptableIterableRelationship

if (
isinstance(level.t1, Sequence)
not self.zip_ordered_iterables
and isinstance(level.t1, Sequence)
and isinstance(level.t2, Sequence)
and self._all_values_basic_hashable(level.t1)
and self._all_values_basic_hashable(level.t2)
Expand Down
4 changes: 4 additions & 0 deletions docs/diff_doc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@ ignore_encoding_errors: Boolean, default = False
:ref:`ignore_encoding_errors_label` If you want to get away with UnicodeDecodeError without passing explicit character encodings, set this option to True. If you want to make sure the encoding is done properly, keep this as False and instead pass an explicit list of character encodings to be considered via the :ref:`encodings_label` parameter.


zip_ordered_iterables: Boolean, default = False
:ref:`zip_ordered_iterables_label`:
When comparing ordered iterables such as lists, DeepDiff tries to find the smallest difference between the two iterables to report. That means that items in the two lists are not paired individually in the order of appearance in the iterables. Sometimes, that is not the desired behavior. Set this flag to True to make DeepDiff pair and compare the items in the iterables in the order they appear.

iterable_compare_func:
:ref:`iterable_compare_func_label`:
There are times that we want to guide DeepDiff as to what items to compare with other items. In such cases we can pass a iterable_compare_func that takes a function pointer to compare two items. The function takes three parameters (x, y, level) and should return True if it is a match, False if it is not a match or raise CannotCompare if it is unable to compare the two.
Expand Down
23 changes: 23 additions & 0 deletions docs/optimizations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,29 @@ cache_purge_level: int, 0, 1, or 2. default=1
cache_purge_level defines what objects in DeepDiff should be deleted to free the memory once the diff object is calculated. If this value is set to zero, most of the functionality of the diff object is removed and the most memory is released. A value of 1 preserves all the functionalities of the diff object. A value of 2 also preserves the cache and hashes that were calculated during the diff calculations. In most cases the user does not need to have those objects remained in the diff unless for investigation purposes.


.. _zip_ordered_iterables_label:

Zip Ordered Iterables
---------------------

zip_ordered_iterables: Boolean, default = False
When comparing ordered iterables such as lists, DeepDiff tries to find the smallest difference between the two iterables to report. That means that items in the two lists are not paired individually in the order of appearance in the iterables. Sometimes, that is not the desired behavior. Set this flag to True to make DeepDiff pair and compare the items in the iterables in the order they appear.


>>> from pprint import pprint
>>> from deepdiff import DeepDiff
>>> t1 = ["a", "b", "d", "e"]
>>> t2 = ["a", "b", "c", "d", "e"]
>>> DeepDiff(t1, t2)
{'iterable_item_added': {'root[2]': 'c'}}

When this flag is set to True and ignore_order=False, diffing will be faster.

>>> diff=DeepDiff(t1, t2, zip_ordered_iterables=True)
>>> pprint(diff)
{'iterable_item_added': {'root[4]': 'e'},
'values_changed': {'root[2]': {'new_value': 'c', 'old_value': 'd'},
'root[3]': {'new_value': 'd', 'old_value': 'e'}}}



Expand Down
55 changes: 54 additions & 1 deletion tests/test_diff_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,60 @@ def test_list_difference4(self):
result = {'iterable_item_added': {'root[2]': 'c'}}
assert result == ddiff

def test_list_difference5(self):
t1 = ["a", "b", "d", "e", "f", "g"]
t2 = ["a", "b", "c", "d", "e", "f"]
ddiff = DeepDiff(t1, t2)
result = {'iterable_item_added': {'root[2]': 'c'}, 'iterable_item_removed': {'root[5]': 'g'}}
assert result == ddiff

def test_list_difference_with_tiny_variations(self):
t1 = ['a', 'b', 'c', 'd']
t2 = ['f', 'b', 'a', 'g']

values = {
'a': 2.0000000000000027,
'b': 2.500000000000005,
'c': 2.000000000000002,
'd': 3.000000000000001,
'f': 2.000000000000003,
'g': 3.0000000000000027,
}
ddiff = DeepDiff(t1, t2)
result = {
'values_changed': {
'root[0]': {
'new_value': 'f',
'old_value': 'a'
},
'root[2]': {
'new_value': 'a',
'old_value': 'c'
},
'root[3]': {
'new_value': 'g',
'old_value': 'd'
}
}
}
assert result == ddiff

ddiff2 = DeepDiff(t1, t2, zip_ordered_iterables=True)
assert result == ddiff2
# Now we change the characters with numbers with tiny variations

t3 = [2.0000000000000027, 2.500000000000005, 2.000000000000002, 3.000000000000001]
t4 = [2.000000000000003, 2.500000000000005, 2.0000000000000027, 3.0000000000000027]
ddiff3 = DeepDiff(t3, t4)

expected = {'values_changed': {}}
for path, report in result['values_changed'].items():
expected['values_changed'][path] = {
'new_value': values[report['new_value']],
'old_value': values[report['old_value']],
}
assert expected == ddiff3

def test_list_of_booleans(self):
t1 = [False, False, True, True]
t2 = [False, False, False, True]
Expand Down Expand Up @@ -1803,4 +1857,3 @@ class Bar(PydanticBaseModel):
diff = DeepDiff(t1, t2)
expected = {'values_changed': {'root.stuff[0].thing': {'new_value': 2, 'old_value': 1}}}
assert expected == diff

37 changes: 37 additions & 0 deletions tests/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,3 +240,40 @@ def test_prefix_or_suffix_diff(self):

expected2 = {'values_changed': {"root['key1'][2]": {'new_value': 'jill', 'old_value': 'jack'}}}
assert expected2 == ddiff2

def test_custom_operator3_small_numbers(self):
x = [2.0000000000000027, 2.500000000000005, 2.000000000000002, 3.000000000000001]
y = [2.000000000000003, 2.500000000000005, 2.0000000000000027, 3.0000000000000027]
result = DeepDiff(x, y)
expected = {
'values_changed': {
'root[0]': {'new_value': 2.000000000000003, 'old_value': 2.0000000000000027},
'root[2]': {'new_value': 2.0000000000000027, 'old_value': 2.000000000000002},
'root[3]': {'new_value': 3.0000000000000027, 'old_value': 3.000000000000001}}}
assert expected == result

class CustomCompare(BaseOperator):
def __init__(self, tolerance, types):
self.tolerance = tolerance
self.types = types

def match(self, level) -> bool:
if type(level.t1) in self.types:
return True

def give_up_diffing(self, level, diff_instance) -> bool:
relative = abs(abs(level.t1 - level.t2) / level.t1)
if not max(relative, self.tolerance) == self.tolerance:
custom_report = f'relative diff: {relative:.8e}'
diff_instance.custom_report_result('diff', level, custom_report)
return True

def compare_func(x, y, level):
return True

operators = [CustomCompare(types=[float], tolerance=5.5e-5)]
result2 = DeepDiff(x, y, custom_operators=operators, iterable_compare_func=compare_func)
assert {} == result2

result3 = DeepDiff(x, y, custom_operators=operators, zip_ordered_iterables=True)
assert {} == result3, "We should get the same result as result2 when zip_ordered_iterables is True."

0 comments on commit 96847f2

Please sign in to comment.