diff --git a/src/patch.c b/src/patch.c index cd1ba120f..89bfb0768 100644 --- a/src/patch.c +++ b/src/patch.c @@ -169,25 +169,63 @@ Patch_create_from(PyObject *self, PyObject *args, PyObject *kwds) return wrap_patch(patch, oldblob, newblob); } +PyDoc_STRVAR(Patch_data__doc__, "The raw bytes of the patch's contents."); -PyDoc_STRVAR(Patch_patch__doc__, - "Patch diff string. Can be None in some cases, such as empty commits."); +PyObject * +Patch_data__get__(Patch *self) +{ + git_buf buf = {NULL}; + int err; + PyObject *bytes; + + assert(self->patch); + err = git_patch_to_buf(&buf, self->patch); + if (err < 0) + return Error_set(err); + + bytes = PyBytes_FromStringAndSize(buf.ptr, buf.size); + git_buf_dispose(&buf); + return bytes; +} + +PyDoc_STRVAR(Patch_text__doc__, + "Patch diff string. Can be None in some cases, such as empty commits.\n" + "Note that this decodes the content to Unicode assuming UTF-8 encoding. " + "For non-UTF-8 content that can lead be a lossy, non-reversible process. " + "To access the raw, un-decoded patch, use `patch.data`."); PyObject * -Patch_patch__get__(Patch *self) +Patch_text__get__(Patch *self) { git_buf buf = {NULL}; int err; - PyObject *py_patch; + PyObject *text; assert(self->patch); err = git_patch_to_buf(&buf, self->patch); if (err < 0) return Error_set(err); - py_patch = to_unicode(buf.ptr, NULL, NULL); + text = to_unicode(buf.ptr, NULL, NULL); git_buf_dispose(&buf); - return py_patch; + return text; +} + +PyDoc_STRVAR(Patch_patch__doc__, + "Patch diff string (deprecated -- use Patch.text instead).\n" + "Can be None in some cases, such as empty commits. " + "Note that this decodes the content to Unicode assuming UTF-8 encoding. " + "For non-UTF-8 content that can lead be a lossy, non-reversible process. " + "To access the raw, un-decoded patch, use `patch.data`."); + +PyObject * +Patch_patch__get__(Patch *self) +{ + PyErr_WarnEx(PyExc_DeprecationWarning, + "`Patch.patch` assumes UTF-8 encoding and can have unexpected results " + "on other encodings. If decoded text is needed, use `Patch.text` " + "instead. Otherwise use `Patch.data`.", 1); + return Patch_text__get__(self); } PyDoc_STRVAR(Patch_hunks__doc__, "hunks"); @@ -221,8 +259,10 @@ PyMethodDef Patch_methods[] = { PyGetSetDef Patch_getsetters[] = { GETTER(Patch, delta), - GETTER(Patch, patch), GETTER(Patch, line_stats), + GETTER(Patch, data), + GETTER(Patch, text), + GETTER(Patch, patch), GETTER(Patch, hunks), {NULL} }; diff --git a/test/data/encoding.tar b/test/data/encoding.tar new file mode 100644 index 000000000..63462ea43 Binary files /dev/null and b/test/data/encoding.tar differ diff --git a/test/test_blob.py b/test/test_blob.py index 15b72de8f..ab1d258e6 100644 --- a/test/test_blob.py +++ b/test/test_blob.py @@ -172,19 +172,19 @@ def test_diff_blob_to_buffer(self): def test_diff_blob_to_buffer_patch_patch(self): blob = self.repo[BLOB_SHA] patch = blob.diff_to_buffer("hello world") - assert patch.patch == BLOB_PATCH + assert patch.text == BLOB_PATCH def test_diff_blob_to_buffer_delete(self): blob = self.repo[BLOB_SHA] patch = blob.diff_to_buffer(None) - assert patch.patch == BLOB_PATCH_DELETED + assert patch.text == BLOB_PATCH_DELETED def test_diff_blob_create(self): old = self.repo[self.repo.create_blob(BLOB_CONTENT)] new = self.repo[self.repo.create_blob(BLOB_NEW_CONTENT)] patch = old.diff(new) - assert patch.patch == BLOB_PATCH_2 + assert patch.text == BLOB_PATCH_2 def test_blob_from_repo(self): blob = self.repo[BLOB_SHA] @@ -193,4 +193,4 @@ def test_blob_from_repo(self): blob = self.repo[BLOB_SHA] patch_two = blob.diff_to_buffer(None) - assert patch_one.patch == patch_two.patch + assert patch_one.text == patch_two.text diff --git a/test/test_patch.py b/test/test_patch.py index 0c2ca4074..dc111a301 100644 --- a/test/test_patch.py +++ b/test/test_patch.py @@ -96,7 +96,7 @@ def test_patch_create_from_buffers(self): new_as_path=BLOB_NEW_PATH, ) - assert patch.patch == BLOB_PATCH + assert patch.text == BLOB_PATCH def test_patch_create_from_blobs(self): old_blob = self.repo[BLOB_OLD_SHA] @@ -109,7 +109,7 @@ def test_patch_create_from_blobs(self): new_as_path=BLOB_NEW_PATH, ) - assert patch.patch == BLOB_PATCH2 + assert patch.text == BLOB_PATCH2 def test_patch_create_from_blob_buffer(self): old_blob = self.repo[BLOB_OLD_SHA] @@ -120,7 +120,7 @@ def test_patch_create_from_blob_buffer(self): new_as_path=BLOB_NEW_PATH, ) - assert patch.patch == BLOB_PATCH + assert patch.text == BLOB_PATCH def test_patch_create_from_blob_buffer_add(self): patch = pygit2.Patch.create_from( @@ -130,7 +130,7 @@ def test_patch_create_from_blob_buffer_add(self): new_as_path=BLOB_NEW_PATH, ) - assert patch.patch == BLOB_PATCH_ADDED + assert patch.text == BLOB_PATCH_ADDED def test_patch_create_from_blob_buffer_delete(self): old_blob = self.repo[BLOB_OLD_SHA] @@ -142,7 +142,7 @@ def test_patch_create_from_blob_buffer_delete(self): new_as_path=BLOB_NEW_PATH, ) - assert patch.patch == BLOB_PATCH_DELETED + assert patch.text == BLOB_PATCH_DELETED def test_patch_create_from_bad_old_type_arg(self): with pytest.raises(TypeError): @@ -163,8 +163,8 @@ def test_context_lines(self): new_as_path=BLOB_NEW_PATH, ) - context_count = ( - len([line for line in patch.patch.splitlines() if line.startswith(" ")]) + context_count = len( + [line for line in patch.text.splitlines() if line.startswith(" ")] ) assert context_count != 0 @@ -181,13 +181,12 @@ def test_no_context_lines(self): context_lines=0, ) - context_count = ( - len([line for line in patch.patch.splitlines() if line.startswith(" ")]) + context_count = len( + [line for line in patch.text.splitlines() if line.startswith(" ")] ) assert context_count == 0 - def test_patch_create_blob_blobs(self): old_blob = self.repo[self.repo.create_blob(BLOB_OLD_CONTENT)] new_blob = self.repo[self.repo.create_blob(BLOB_NEW_CONTENT)] @@ -199,7 +198,7 @@ def test_patch_create_blob_blobs(self): new_as_path=BLOB_NEW_PATH, ) - assert patch.patch == BLOB_PATCH + assert patch.text == BLOB_PATCH def test_patch_create_blob_buffer(self): blob = self.repo[self.repo.create_blob(BLOB_OLD_CONTENT)] @@ -210,7 +209,7 @@ def test_patch_create_blob_buffer(self): new_as_path=BLOB_NEW_PATH, ) - assert patch.patch == BLOB_PATCH + assert patch.text == BLOB_PATCH def test_patch_create_blob_delete(self): blob = self.repo[self.repo.create_blob(BLOB_OLD_CONTENT)] @@ -221,7 +220,7 @@ def test_patch_create_blob_delete(self): new_as_path=BLOB_NEW_PATH, ) - assert patch.patch == BLOB_PATCH_DELETED + assert patch.text == BLOB_PATCH_DELETED def test_patch_create_blob_add(self): blob = self.repo[self.repo.create_blob(BLOB_NEW_CONTENT)] @@ -232,7 +231,7 @@ def test_patch_create_blob_add(self): new_as_path=BLOB_NEW_PATH, ) - assert patch.patch == BLOB_PATCH_ADDED + assert patch.text == BLOB_PATCH_ADDED def test_patch_delete_blob(self): blob = self.repo[BLOB_OLD_SHA] @@ -246,7 +245,7 @@ def test_patch_delete_blob(self): # Make sure that even after deleting the blob the patch still has the # necessary references to generate its patch del blob - assert patch.patch == BLOB_PATCH_DELETED + assert patch.text == BLOB_PATCH_DELETED def test_patch_multi_blob(self): blob = self.repo[BLOB_OLD_SHA] @@ -254,16 +253,65 @@ def test_patch_multi_blob(self): blob, None ) - patch_text = patch.patch + patch_text = patch.text blob = self.repo[BLOB_OLD_SHA] patch2 = pygit2.Patch.create_from( blob, None ) - patch_text2 = patch.patch + patch_text2 = patch.text assert patch_text == patch_text2 - assert patch_text == patch.patch - assert patch_text2 == patch2.patch - assert patch.patch == patch2.patch + assert patch_text == patch.text + assert patch_text2 == patch2.text + assert patch.text == patch2.text + + +class PatchEncodingTest(utils.AutoRepoTestCase): + repo_spec = 'tar', 'encoding' + expected_diff = b"""diff --git a/iso-8859-1.txt b/iso-8859-1.txt +index e84e339..201e0c9 100644 +--- a/iso-8859-1.txt ++++ b/iso-8859-1.txt +@@ -1 +1,2 @@ + Kristian H\xf8gsberg ++foo +""" + + def test_patch_from_non_utf8(self): + # blobs encoded in ISO-8859-1 + old_content = b'Kristian H\xf8gsberg\n' + new_content = old_content + b'foo\n' + patch = pygit2.Patch.create_from( + old_content, + new_content, + old_as_path='iso-8859-1.txt', + new_as_path='iso-8859-1.txt', + ) + + self.assertEqual(patch.data, self.expected_diff) + + self.assertEqual( + patch.text, self.expected_diff.decode('utf-8', errors='replace')) + + # `patch.text` corrupted the ISO-8859-1 content as it forced UTF-8 + # decoding, so assert that we cannot get the original content back: + self.assertNotEqual(patch.text.encode('utf-8'), self.expected_diff) + + def test_patch_create_from_blobs(self): + patch = pygit2.Patch.create_from( + self.repo['e84e339ac7fcc823106efa65a6972d7a20016c85'], + self.repo['201e0c908e3d9f526659df3e556c3d06384ef0df'], + old_as_path='iso-8859-1.txt', + new_as_path='iso-8859-1.txt', + ) + + self.assertEqual(patch.data, self.expected_diff) + + self.assertEqual( + patch.text, self.expected_diff.decode('utf-8', errors='replace')) + + # `patch.text` corrupted the ISO-8859-1 content as it forced UTF-8 + # decoding, so assert that we cannot get the original content back: + self.assertNotEqual(patch.text.encode('utf-8'), self.expected_diff)