Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

_diff_text: use repr with escape characters #6702

Closed
52 changes: 47 additions & 5 deletions src/_pytest/assertion/util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Utilities for assertion debugging"""
import collections.abc
import itertools
import pprint
import re
from typing import AbstractSet
from typing import Any
from typing import Callable
Expand Down Expand Up @@ -193,6 +195,7 @@ def _diff_text(left: str, right: str, verbose: int = 0) -> List[str]:
characters which are identical to keep the diff minimal.
"""
from difflib import ndiff
from wcwidth import wcwidth

explanation = [] # type: List[str]

Expand Down Expand Up @@ -220,15 +223,54 @@ def _diff_text(left: str, right: str, verbose: int = 0) -> List[str]:
]
left = left[:-i]
right = right[:-i]
keepends = True
if left.isspace() or right.isspace():
left = repr(str(left))
right = repr(str(right))
explanation += ["Strings contain only whitespace, escaping them using repr()"]
explanation += [
line.strip("\n")
for line in ndiff(left.splitlines(keepends), right.splitlines(keepends))
]

left_split = len(left) and re.split("(\r?\n)", left) or []
left_lines = left_split[::2]
right_split = len(right) and re.split("(\r?\n)", right) or []
right_lines = right_split[::2]

if any(
wcwidth(ch) <= 0
for ch in [ch for lines in left_lines + right_lines for ch in lines]
):
left_lines = [repr(x) for x in left_lines]
right_lines = [repr(x) for x in right_lines]
explanation += [
"NOTE: Strings contain non-printable characters. Escaping them using repr()."
]
else:
max_lines = max(len(left_lines), len(right_lines))
left_ends = left_split[1:max_lines:2]
right_ends = right_split[1:max_lines:2]
if left_ends != right_ends:
explanation += [
"NOTE: Strings contain different line-endings. Escaping them using repr()."
]
left_lines = [line for line in left_lines]
right_lines = [line for line in right_lines]

for idx, (left_line, right_line) in enumerate(
itertools.zip_longest(left_lines, right_lines, fillvalue="")
):
try:
left_end = left_ends[idx]
except IndexError:
left_end = ""
try:
right_end = right_ends[idx]
except IndexError:
right_end = ""
if left_end != right_end:
left_lines[idx] += repr(left_end)[1:-1]
right_lines[idx] += repr(right_end)[1:-1]
if not left_end or not right_end:
break

explanation += [line.strip("\n") for line in ndiff(left_lines, right_lines)]
return explanation


Expand Down
78 changes: 69 additions & 9 deletions testing/test_assertion.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,8 +334,14 @@ def test_multiline_text_diff(self):
left = "foo\nspam\nbar"
right = "foo\neggs\nbar"
diff = callequal(left, right)
assert "- spam" in diff
assert "+ eggs" in diff
assert diff == [
r"'foo\nspam\nbar' == 'foo\neggs\nbar'",
# r"NOTE: Strings contain different line-endings. Escaping them using repr().",
r" foo",
r"- spam",
r"+ eggs",
r" bar",
]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit confused by this change (and others) - do you consider a newline a non-printable/zero-width character? Why? It seems quite confusing to me to see a multi-line output but also \n, i.e. with newlines represented twice.

Copy link
Contributor Author

@blueyed blueyed Feb 14, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It could be made smarter though maybe to not show them when not at the end of a line?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For illustration, on master:

    def test_eq_similar_text():
        x ="foo\n1 bar"
>       assert x == "foo\n2 bar"
E       AssertionError: assert 'foo\n1 bar' == 'foo\n2 bar'
E           foo
E         - 1 bar
E         ? ^
E         + 2 bar
E         ? ^

On this branch:

    def test_eq_similar_text():
        x ="foo\n1 bar"
>       assert x == "foo\n2 bar"
E       AssertionError: assert 'foo\n1 bar' == 'foo\n2 bar'
E         NOTE: Strings contain non-printable/zero-width characters. Escaping them using repr().
E           'foo\n'
E         - '1 bar'
E         ?  ^
E         + '2 bar'
E         ?  ^

Is this what you would prefer @The-Compiler ?

    def test_eq_similar_text():
        x ="foo\n1 bar"
>       assert x == "foo\n2 bar"
E       AssertionError: assert 'foo\n1 bar' == 'foo\n2 bar'
E         NOTE: Strings contain non-printable/zero-width characters. Escaping them using repr().
E         - 'foo\n1 bar'
E         ?       ^
E         + 'foo\n2 bar'
E         ?       ^

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For illustration, on master:

    def test_eq_similar_text():
        x ="foo\n1 bar"
>       assert x == "foo\n2 bar"
E       AssertionError: assert 'foo\n1 bar' == 'foo\n2 bar'
E           foo
E         - 1 bar
E         ? ^
E         + 2 bar
E         ? ^

On master it has a different order (which I find still wrong - it should be left-to-right, top-to-bottom, with only +/- swapped, but off topic here - maybe therefore edited manually?):

    def test_eq_similar_text(self):
        x = "foo\n1 bar"
>       assert x == "foo\n2 bar"
E       AssertionError: assert 'foo\n1 bar' == 'foo\n2 bar'
E           foo
E         - 2 bar
E         ? ^
E         + 1 bar
E         ? ^

How about?

    def test_eq_similar_text(self):
        x = "foo\n1 bar\n"
>       assert x == "foo\n2 bar"
E       AssertionError: assert 'foo\n1 bar\n' == 'foo\n2 bar'
E         NOTE: Strings contain different line-endings. Escaping them using repr().
E         - 'foo\n1 bar\n'
E         ?       ^    --
E         + 'foo\n2 bar'
E         ?       ^

However, with longer strings it is useful to split them on newlines, of course.

>       assert x == "foo\n1 bar"
E       AssertionError: assert 'foo\n1 bar\n' == 'foo\n1 bar'
E         NOTE: Strings contain different line-endings. Escaping them using repr().
E           foo
E         - 1 bar\n
E         ?      --
E         + 1 bar
E         -

FWIW it always looked a bit strange to me seeing:

- foo
+ foo
     ^

(it could also be a space etc)

E         - 'foo\n1 bar'
E         ?       ^
E         + 'foo\n2 bar'
E         ?       ^

This could also be triggered via some minimal length (related: blueyed#218, where I split it onto separate lines with a certain length).

Copy link
Member

@The-Compiler The-Compiler Feb 16, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I guess explicit is indeed better than implicit in this case. I agree having the ^ marker pointing to "nothing" is odd, and I remember people being confused about that.


def test_bytes_diff_normal(self):
"""Check special handling for bytes diff (#5260)"""
Expand Down Expand Up @@ -1007,15 +1013,36 @@ def test_many_lines():
# without -vv, truncate the message showing a few diff lines only
result.stdout.fnmatch_lines(
[
"*- 1*",
"*- 3*",
"*- 5*",
"*truncated (%d lines hidden)*use*-vv*" % expected_truncated_lines,
r"> assert a == b",
r"E AssertionError: assert '000000000000...6666666666666' == '000000000000...6666666666666'",
r"E Skipping 91 identical leading characters in diff, use -v to show",
r"E 000000000",
r"E - 1*",
r"E 2*",
r"E - 3*",
r"E 4*",
r"E ",
r"*truncated (%d lines hidden)*use*-vv*" % expected_truncated_lines,
]
)

result = testdir.runpytest("-vv")
result.stdout.fnmatch_lines(["* 6*"])
result.stdout.fnmatch_lines(
[
r"> assert a == b",
r"E AssertionError: assert ('0*0\n'\n * '5*5\n'\n '6*6')"
r" == ('0*0\n'\n '2*2\n'\n '4*4\n'\n '6*6')",
r"E 0*0",
r"E - 1*1",
r"E 2*2",
r"E - 3*3",
r"E 4*4",
r"E - 5*5",
r"E 6*6",
r"",
],
consecutive=True,
)

monkeypatch.setenv("CI", "1")
result = testdir.runpytest()
Expand Down Expand Up @@ -1068,6 +1095,17 @@ def test_reprcompare_whitespaces():
]


def test_reprcompare_zerowidth_and_non_printable():
assert callequal("\x00\x1b[31mred", "\x1b[31mgreen") == [
r"'\x00\x1b[31mred' == '\x1b[31mgreen'",
r"NOTE: Strings contain non-printable characters. Escaping them using repr().",
r"- '\x00\x1b[31mred'",
r"? ---- ^",
r"+ '\x1b[31mgreen'",
r"? + ^^",
]


def test_pytest_assertrepr_compare_integration(testdir):
testdir.makepyfile(
"""
Expand Down Expand Up @@ -1311,13 +1349,35 @@ def test_diff():
result.stdout.fnmatch_lines(
r"""
*assert 'asdf' == 'asdf\n'
E AssertionError: assert 'asdf' == 'asdf\n'
E NOTE: Strings contain different line-endings. Escaping them using repr().
* - asdf
* + asdf
* ? +
* + asdf\n
* ? ++
"""
)


def test_diff_different_line_endings():
assert callequal("asdf\n", "asdf", verbose=2) == [
r"'asdf\n' == 'asdf'",
r"NOTE: Strings contain different line-endings. Escaping them using repr().",
r"- asdf\n",
r"? --",
r"+ asdf",
r"- ",
]

assert callequal("line1\r\nline2", "line1\nline2", verbose=2) == [
r"'line1\r\nline2' == 'line1\nline2'",
r"NOTE: Strings contain different line-endings. Escaping them using repr().",
r"- line1\r\n",
r"? --",
r"+ line1\n",
r" line2",
]


@pytest.mark.filterwarnings("default")
def test_assert_tuple_warning(testdir):
msg = "assertion is always true"
Expand Down