From 8b07235882d86dc72ab9b196d3db462c3c1eee76 Mon Sep 17 00:00:00 2001 From: Jonathan Robson Date: Mon, 25 Mar 2019 18:02:15 -0500 Subject: [PATCH] Deprecate Patch.patch and add Patch.text and Patch.data This is an attempt to finish up the work in PR #790 originally done by @erikvanzijst. Thanks to him for the initial work. Patch.patch assumes all content to be encoded in UTF-8 and forcefully replaces any non-decodeable sequences. This can lead to corruption for content that either does not conform to any specific encoding altogether, or uses an encoding that is incompatible with, or ambiguous to UTF-8. As discussed in #790, this change deprecates Patch.patch in favor of Patch.text and adds Patch.data, which returns the unmodified, raw bytes. --- src/patch.c | 54 +++++++++++++++++++++---- test/data/encoding.tar | Bin 0 -> 50176 bytes test/test_blob.py | 8 ++-- test/test_patch.py | 88 +++++++++++++++++++++++++++++++---------- 4 files changed, 119 insertions(+), 31 deletions(-) create mode 100644 test/data/encoding.tar diff --git a/src/patch.c b/src/patch.c index cd1ba120f..89bfb0768 100644 --- a/src/patch.c +++ b/src/patch.c @@ -169,25 +169,63 @@ Patch_create_from(PyObject *self, PyObject *args, PyObject *kwds) return wrap_patch(patch, oldblob, newblob); } +PyDoc_STRVAR(Patch_data__doc__, "The raw bytes of the patch's contents."); -PyDoc_STRVAR(Patch_patch__doc__, - "Patch diff string. Can be None in some cases, such as empty commits."); +PyObject * +Patch_data__get__(Patch *self) +{ + git_buf buf = {NULL}; + int err; + PyObject *bytes; + + assert(self->patch); + err = git_patch_to_buf(&buf, self->patch); + if (err < 0) + return Error_set(err); + + bytes = PyBytes_FromStringAndSize(buf.ptr, buf.size); + git_buf_dispose(&buf); + return bytes; +} + +PyDoc_STRVAR(Patch_text__doc__, + "Patch diff string. Can be None in some cases, such as empty commits.\n" + "Note that this decodes the content to Unicode assuming UTF-8 encoding. " + "For non-UTF-8 content that can lead be a lossy, non-reversible process. " + "To access the raw, un-decoded patch, use `patch.data`."); PyObject * -Patch_patch__get__(Patch *self) +Patch_text__get__(Patch *self) { git_buf buf = {NULL}; int err; - PyObject *py_patch; + PyObject *text; assert(self->patch); err = git_patch_to_buf(&buf, self->patch); if (err < 0) return Error_set(err); - py_patch = to_unicode(buf.ptr, NULL, NULL); + text = to_unicode(buf.ptr, NULL, NULL); git_buf_dispose(&buf); - return py_patch; + return text; +} + +PyDoc_STRVAR(Patch_patch__doc__, + "Patch diff string (deprecated -- use Patch.text instead).\n" + "Can be None in some cases, such as empty commits. " + "Note that this decodes the content to Unicode assuming UTF-8 encoding. " + "For non-UTF-8 content that can lead be a lossy, non-reversible process. " + "To access the raw, un-decoded patch, use `patch.data`."); + +PyObject * +Patch_patch__get__(Patch *self) +{ + PyErr_WarnEx(PyExc_DeprecationWarning, + "`Patch.patch` assumes UTF-8 encoding and can have unexpected results " + "on other encodings. If decoded text is needed, use `Patch.text` " + "instead. Otherwise use `Patch.data`.", 1); + return Patch_text__get__(self); } PyDoc_STRVAR(Patch_hunks__doc__, "hunks"); @@ -221,8 +259,10 @@ PyMethodDef Patch_methods[] = { PyGetSetDef Patch_getsetters[] = { GETTER(Patch, delta), - GETTER(Patch, patch), GETTER(Patch, line_stats), + GETTER(Patch, data), + GETTER(Patch, text), + GETTER(Patch, patch), GETTER(Patch, hunks), {NULL} }; diff --git a/test/data/encoding.tar b/test/data/encoding.tar new file mode 100644 index 0000000000000000000000000000000000000000..63462ea434da7d6614ea0deebfcd5db76a931059 GIT binary patch literal 50176 zcmeHQ36LCDd0v|sXQl{;O(={j;c0g9?n*mH&$WlVlBJbw3G1@7GO=~`%=GNGH8VZx zo{_Y+1*9NF0GA=0A-Et=K*HhRaHJ?I1OiC_7cqxPnQ%GAskj13VMw_!IM?_8?{!bl zwKJ=om1NUfmUp)M9sm2^fB*k`V^s21!K^IAht}>Vkw{EtGGd5+XXS5v|}OQB(zi}nI00EwF9X!a^0!vHb65@-EQ9I)EX~ZKAm1Pj8e-p|DS3dv`G92 z|Er7nv*GT4Y{9Ip5fK0%{qFx{GM&Z#r&F11u>a?SZoK_B9VmK9zp z5`*?nR}6Uk&+?pje5m|o#_qG4oC3h!eF48|E=6@M!x0@*bZE|{g;Ks7}$U3gCd&u zf5-MM+Xg>tFZrKHPP+D=EB_OTK>iPYBRY%-sXZg`KPSFu=mjTU)}5MR2L@mVR6Qdf zHS@n&DOv*-{=nJ_A{EI0)#AOM z{@Zr$pF_J#@$ZsbAFaIcMfd*khuznNv)L(GL7oYvfZw!6wvj;x- zu@C&oH(!0t)?e&-={wK<^r?H^q%AFEAKdZThcz`x@az0wpqc-Df#3m7zXJL1*?$St ze+2S>fSb@|s1EyI(+3;`T*3aU_-{5D)PHr^umL~ru>bx<&_HJErT-I|R{cMj)dKxL z5Zw0~B%PQ}WeQW1*=)WzscZUFI-4wL#c3^FG}2SWVmh5IYK3Ada0q*0^6B}$ng3<{ z4>dbwvlcBo3o%D8S4+k!m)1l6YpC{c<3CdVCs8LJ@c$~Y-e33Ob@7~8i93to;qb87 zQW3@(-f}^tAY?X(V*dF>BY%Q}{*pQbQiY;yIF7zx#L#mN6<~-W<(5PQh2%muiip!@ zZBbYiL+JKGy=+u!6FdfI5Ue79rW^wXs4eO>v8X%#Io$Dum`AojLbPbrO9eoQNqF#z zRf*ni*cOIBC4a5%2)&332E?Cs%=$m6SZkB$igfN8bPnrFr$U8R+VPP zC=&FTZ%!mausZ8B)`zAlJ7TDY-iWjgU;-n|qftfAA$7t@6oPvKxe}E&anmXoD$?Mz zNGyz=2T_e*CNwb?at0=)DfL1@=;DC6P%#Qot5}Qz%_S39#XHKwxOl<{+bmi%&ue1k z5^1ymk<$_6D;T=K^97?~*m`L~&5&>pSF7v?N zt+OLzwA-h2Tik%S@s@-0JNE8BwsYI|J@evD;h=rAB1WC~(Zgf0^_vcl$JUR;o0c&n z4r}r1=s0Fu00<&_(h-rPBN#d&A|qNv+wVRXQj0~Dh=u_huBR47IOI?XtGv<0A1HE}p39Lug@9VlKrDWZ9IMaYQ7j~)`C zBkMK#f4nd{&T!rl4l$^ku1Sh*%fk_3`qjc`FSuImB>zDSQQOGrj&VNZzowSbkh2ac*OR{y!yN4j!eAC6(05H6>oov@-45NiYyslAb;+aYJ`mHZ;sOykPc7Q~ok3!_}E zEeRYStH{Uno}kFrZRiAGi93$FU;|!hbO~qCI_&_ftO7i|SXDEx`T!)>npBLfqyAbi zECHl44sl39Jz6o&)J9cfiNyt@#;8*OLID_gz3xzdg%Y!dF;26RwGF3Us*%3n<^;H! zMpWy$lIbjB9!O!T8i#CE&t_@XFi8)&vRRVjVdz^adq;*z`X<9bK_wi5+BNgD5ntSC zBpC@CTvjgM9VhB>mdZH`f*oS1D5yyFI1~fKEm;8xu@L|pASqA+L5uCQ>s3W^1a!fu z>Cg@Z?intc;cy<%LCo_E5p)Ur0NA`azJ5F$+PE=Hzm}iE!YXX&6>4obdY<&s9&8xE zp-q9dg_@Rx3JuKDx!ci1DCmY-d%PDgiew2U43M1;*(iH&BsDKL41aH?Iks86_hbMN zIbiEcCFTnU?-^kr8%9?2WMiGsrG6b1*#o<$PBbe?SD{0#nUoV|0t|AFVx6XF5SN5< za~L>3W>${LKWBNjz`{{bM-8k2vaQvYW?=!cy>e7G^FTW8(M9xjJZu^(qDaKkscOGq z*@@1;Yy>se$o8{wTH%xdWW^%xY7~*=nXsAI@!GS0uvZwhVu>o`63iQ-5#QP&l5F47 ziNH5wdx|g+#FAAfP9>w1$S+4#F(7{0b~2Q#N@?jBbvUNAAjiYSQI`%!?ocdrmP`%U z9>;y%lU=$C@25le2G{xEkk(J*0T@m)qe4ILcgt+B$@phd{3I|qj zZ7hDc5}#1uX-D`K)R!RG4IJnZW8RQ0W5QS;ACUux4#|;6lDxi-V<(S{k4w-%6JzTj zG99ROmzxRockqPKO~BGw)s*OGYy`M+{|>=JgE~Agzis>e{es@`%0i(!l9EN=gR`{Y z=GGCpB21tiQ78;iGyEfSUXiXRU4fS0l2>8K1tqP4Rtv&N=nFb@`IrH- z3G%F9;&fIGG94s=ys!ETc1X8*J-M5QLkmF3H1p5k0Yg(`I&>T;37L8z{{)TRsOp>~wCH;iqz(N!igG`}@{X;Y4!B6Cu!Ym}bu#Lh4 zV2z??vO=YgE^zGEqaXgm?!#oXO-4e+$Wt)GUSge~R#2vyX(lnR?B~?#RTK27$j1kTtph8}9wJ{d z;BwTfKn94%Lly8T9TKdhvOrkm&=i=ONQN1kruNf58-mSX0Kr5yU@&EDv!1i+Oq~c) z6+!8=bFypIu!xF;*ougZMO-g;#FHRCMDoH36c?ek0dEG;9oQS;vE*`CWrVg`PabdZ z!1Ls(5dD%nCK;GU7GgJ{2*eJ<1RGfbr%9$c~#>Tc26-&McT~t-1hlLqzOu!&F(_O^`S!JLp0*+a*Shs1%5yE7an^2flwqniV z$w_$lCK`jXU(~Dc`6NXn{zXO4#608JJ9tKmQUYUS?yp3R3j{V|mr)ya5GF+!739NA zp`ulw{ZI%5WJROVp8ph=a*sHX9e&*3ZJ((Xz0@%>{mY3h)TX%u8c`enARXG!BmB56 z^BB$hC$`F$^d4a{{&5>N4|}++p}$>@L7d|@Zc8|U{YVdAI6q9K|UWqFtq|-A; zFuP8QQJ}bVk^FyPgl461AY}c55#FjAHYT1Y1zoPzVX;GW6A{b6j&k-~f99yfCTXlL zEfh^~3+YE7bc-l9aW?V;tl>!o_!bDeg_<{(=LaU)4va)>EXIQYQpf}5Kwy>QF|H8k zE(*i0`M1gd+eO~JTl`l#EaxNso4{GG_V{lW=f8vc@3kkM=J@Xx&x(}M+(!Nn9)($? zFeU^Hm`KmTkWmssk_wsmh?y*x2Sj|uO#?a*jxaRTEF3;TXZF8X?f`_1WL446$M z@z}C z%1W7~CMqI=}4sMVP?koWY)f(Iq3s2tOM_UdRv*x#OgBQ^`yrpG_z8 zxvY_!))U!;kxA!@Q-$nwa%!pwRg<1hq}>RXG=rK|0O5Aafw`SK`FIzZGzd8x%0xjb z&y=Z4*j>2mBNk@8fj+vEH%*oBQ>H%*49hC|pP~3M(T8VDT*!2pB^np_6{ff+^nt?2T@^0E55~D@&71 zIYfnmbT#n;O3ppq786j6#+ho#0-?~Wq7DGr!pC12hocaT0YpU?VE4&{1)FgAtE$5^ zhb3vF8*ECcGZ6=nb|RcwU<{OU2+@Tmgha$T#DoZ=5phIZb4@rzkz%M)cWOae7|~z2 zY55Zd=vTKXNID5G=9nAC$b5RD}qfsKUD8(DWGW0HrR5_40yG0;&IN}mM;`u`Cnme{- z*REMuDYl3f#ONUredUqEiBSYZPv?8Lc~K9cM#WPb5&?USZKl~yFCwQeNg7;f*@&{pARLhc>I}etTnpIUAnoL*{nkf3kaF zw;ATC!4v@X;D0TVZms{sjSxZo=S9W;oDNVXYPF8&r^UB~zyOlW>=xp2HUy;zsM&+{ zNG7AdwX}BGq?U%&C^gK%;+-8Ou7q`A8`Ai9yJ8fsogtG1bt3dK;vbW|QNTscCxNme z=S~s1^6n&yRlrS@;n;WZz>Z^k_s#FzyXV;4-h+GQ-7L8|iyIMPIZ=5hR}+qy5fMe5 zOxJ#)f`yo?aSAS91pliWvxnQJz#wR&>QcB7*QKDFiy*2-k$;K^J@n$wpd5*cXEZ$H!j({ne7iAIY(Nqj?+b{0lF4{ph%lJs zf?=SG{BJ1+UR@lphx||C>i3rV&ty6o-2ZT1Xrf>H4>E>mg1vuffvn^N010o30#Ja1 zKC?%u(yE-LhUiwKDl?HH8OlaOYM=V}ZXt4qkzLSNbwYf@LHZ(3dDMAyAz_GT)w)f0 z(2}M=9HXUvztLMd=Ul2l^RanKzOD-Hp#DG|HBhGM1iDf~1vo}WRMeJnP`p}^Dr(_3 zc^ZJj6buH!G^M&s=u=csDO)#)S~P7KhXe-dNf9HGHT?h~GKc$|Q8rm~W}q`scuR=W zWzxvCjJ;Z32Y2;C|N5rFcFDjUdT|l23hZz^2TGg{TSV;$CgLIm!iBec{sLfn!Tn{Q zh#FSr$JiWKuu~m}sz;^30xXEiC@rk2YQ&J##1c_AVoU3RB36`X0|ast$Q&WmV?v~R zY&s_*PqW|z1-7Tdp#Y+YCcv305q!ZgxELMw3+3@~Ar|NJct^0&Hdq1SEL|*EmONDf zQ(r0XB0&r5AXUWn^Y7r=P&-2$YlIctm_ypi&DThQ)AvI8K46_)%~3 z;wY04cue6=U|JnefIbI|6L(U?i&Ev7XBe5a&hr3+WMJ`+4Dm!zCnaKn3pP6j?7R&= z9v%)!hyWQ}juED}4I9KPLUBG|gjhc=e1)(V@_fj7k)6+kNjmC(%c(`>UEZrx{0c!m z^gpiu&9vx$O$*NdogeC2ss7iYN06WJUl_~I)ou*x}vpY@7EL2*|k_wSfB zsE^d37iX3Pi8zXlX`*yfZQz`&o<=(IKm5r?By05|pcnsVGi~u7d|ynE|6l#?_8y^M z{_iLM58j8Z-k_X|Dgt~C;i1M#Iz)2e5J{V}k4P>BXk_E2NUk@8Ka%TvxUxuY@4fES zq8tBH)Nb{hFK6t*|4Cf-*`ogwX}bRA-V}pIx;D2!MIhFqqrLc369I=uN~; z;K)iyP9srW20S!6rG)|r>1W6)bXA9~SC-(uA@fXCfLw1a2jgjwJI70S`g~J0JUlu> z%H#HNJ^8~sVJy*? z`p_5psLCzCs9a_R2?ci~v=T`Ky;Dafm@Ws!HqiM!il1OOS|E060K)hL zv`iY2SM1yu5o27@0a#J-fy4ifL#JIN0P>V--7et+4#b6TPbX#}xBB`Chh;i|>U9AF zR(%X>Q=hPAwNw01&UOq>!;}?(qfezmvJ)QC*}M_VE%230SP@RhqBv&

pPZti;8W z8Ps&}30b#$V^Dv8cL1t1!pRwV{$qkeB6N2IC0G;+F=mMCc(C{eD3ltI(_n;y{MAkz zKcnMaM3t=r((#-Q12%w*#hOQ_Q$|dZQ8UiomSgoh_DuT{vtE(H(o&pdI zdjx0oBtmfKgqsmXu@#;wO#03vUeC2vjp>Mh6>!+P2`)WW+(&sCw}vJ}2Mg&Rb2NMt zgsBowYd55m{vV+0&&mo+wA}Z9P9{=q^}pI=p#KM}=(~*8(Ep?;x*GsF$b}QQ2}Q3! zr^#F6{QDx_Z?u9dTpc zJdVG>RBT;>tS;(wcLIh&(X>Tj>>S+=ok(!M~EstkdDxnfTT`e4F{K#W|ytd;Ff8X!IVr&dA4i*BG;P_VrBgiJ2FnSyvk z+9%)<6bwld1#BWer(mwgjt6iq88yFbWXmdKKAf)(Y(f`qBwB${!Sw@?ZA_PRU5nqK z=7Ys(#)_CKG6;;ADkG2`QGMK~l$FN-<;4nYFfQ?3P&gsVlhMU3&7H+af^0xC3*~z7 zK`v$!E>hxZ1>Zqzwvc!k3KJ-{u&8i2G9r67{+5HZEwfH{l%Xb~>#XgQy8FPAgg3}1 zIC|B<`UqiOlEixT_O~4mw-V?wy<3|$%?;*VrYo<|6Gy6}CmDKm^cr%}zbq6a zV-CkXd^y{&D(k057?JM=LsCjRNd}yIq}bO!GVUhSd>uKj7HR*^8eFz;C$9BE-HIg8 z_3PJ*eexKA9|oCshEpl!F=`ee@|$XdL`Oq3*aG+ z0^2q6a#&!E+@U@V4>!i3OBs~U)5=y&CnQ`!A=>4k??7p-i|`41`KoCr zJHhLtw?V-xEvZ(Sf}~-duki3-rlLjWq(+AhOKk7wCt2BHSiS zTo!^V+lu+$y|auo-Sz6&`ZFP_(Z$&f9=BnjIsU`n|2gn4friC{e6HZ{fz4+4Hy*ja z@v?FL+x`BR$>97?+g7c~gMO@0Y9q3`p4PH@K9MjAIeki-D(1C9I)_3Xd}Zr24jyMy zQ>k1!E4JI_3ET)=5x1JRp)B!QgoaMU@OSKX`un;CIt&zpC5hY0GRgE*CXMg<716|G zBB3I_kc;ww^!QB6QE3^=*QFWHl#AR1BDXsa?2TqJQ`1om_IIU*gVa*HQo-iI`Rk`} zB@!+*McE(@A2$x~#=?ECsoz`jSHAdkYN~1RGq`345t;M$jcgtGpY`n^i4EklT>O{G zOG6QOBAZp;|BLVc2)_SukXz7otZw@+&Ec-yR`g=!`#+gV2jBm;A{?J<7Ki^tCMir?M^e zpIR1{Kv4fRxE<&|625l>al4Yk@nIb2iSh*}^g)q9l=4BdF|>;NsRP0AU9uvGZiM1u z8)==74kLbtkOswy={OQ4J+DVL0EOc!AYVp-H*#pO+vnuB!8GpUmp9fS!>CTy(6vNX z5hEyI==MkCnk$YNtNXty1=plD5>gZmp>lNOu*$d@Zdk039kTQz^x>6H>spQOyLj$z zn)ScDZpw*gwfMOY+13;M-?#mrga?_>vdL5?`2NQ>M6SVuF8i-vsQpJUf%ZS81@`|M z>~{N*o%cVPxKR6_Os1LtlY#%=zFBMVr1Sn$xz^w)UgXG?!zQ=JR{rvv+MVEA8dpw9bmObt%@4{_cvBlCWf zo$`O65AXcfRW#K;OpO-zWXItuDkh7w>;}h&gFaKpRV8gJ9nM7&ThEu(&&qq9((;)Mqab- zyLbKJKX3cl&3`>|@{@(!{Hs3kcIS2CM`Js-PE3cdjKBU9Z+zjz?WzB|;x)$ANA}tO zd93`JM{j=BzZ`p@`jLm<`P`?HAA8*s|MTy^`1-*ge|%4Qc4+C*?`&MS>YDGb`^ZgS z{->iyUvkq+qYr)X9nX2KMTDV|0jIk|I7M6nMwuczs?Uap1I`4`@gz( zf8tZeACKKX5&P<{uN`>&$v--J&)aT&+XI&ko%y@a{__JzFw2F(Kxh7k|8MBUWX_nv z_f2Ti8GU*xnNH}*w3f~$rw!dmP2~}(pECLax+ninHuOLH|KR@w?7u+&_l4e7H9B+2 zhVhs0KA6yQxP=*C#dFhFCLVa`)AuDl{q4-RH+*C)%-zjOCP z_wGObHS@Be*Z%YcyH+)C(D~xPKxh6({9iK?`RT-zkt$3VGs$c=GhHaAj7%n*PZbiV z^-LSZM4{N%B6{*ad_K4SkK_Na{}X}z-xqFI+sM`b$y~esfBQS$`}NC)-j@r%d$qF! z{V!Gwbmsr89`CQ(yRT*S;{R0R{wG=g8~Fcy;dKy=;Q!yfrSiqZ^S9sf*bAMZEB^i; zo_qIe*Js6F{`k`G+;{ao>)!L0yI%I_UtRHMUq1MzFZ}%M+0Xso*?Sfq_|u1e5E+f^ zd&A=&`uM;8#h#D;F!$3>nKxZ+%>3ZWZ{5FCp1I+-U-{Vcul$cI9)9HV%VPii#8>Wn z-xqg3@^8QV^7l?&|DKn;^s(pdyZP#ce|-5>fBwDt_ilb!^uc%i@GbL~Kk~Jyzy17g zy>j~U>pt_dZP)$i(d(Xd-xnTv@|HtC`PHp&e}AE%Uv=Z&_ka7b2Y(n{_xOv&hu-)7 zpIvh0uk4R~_~;Acx1JgV9)4ZN=)(V6yqL&M;S`RhCG*KdHV37jNEDJeJ(JQidR|XY z>jiD9KMTDV|0gEB`(NDnAJ>2MH~K1fu1>XUNKqK4_UnRysFo1`6JQ{~K!AY& J11rS9{{tsKf(8Hp literal 0 HcmV?d00001 diff --git a/test/test_blob.py b/test/test_blob.py index 15b72de8f..ab1d258e6 100644 --- a/test/test_blob.py +++ b/test/test_blob.py @@ -172,19 +172,19 @@ def test_diff_blob_to_buffer(self): def test_diff_blob_to_buffer_patch_patch(self): blob = self.repo[BLOB_SHA] patch = blob.diff_to_buffer("hello world") - assert patch.patch == BLOB_PATCH + assert patch.text == BLOB_PATCH def test_diff_blob_to_buffer_delete(self): blob = self.repo[BLOB_SHA] patch = blob.diff_to_buffer(None) - assert patch.patch == BLOB_PATCH_DELETED + assert patch.text == BLOB_PATCH_DELETED def test_diff_blob_create(self): old = self.repo[self.repo.create_blob(BLOB_CONTENT)] new = self.repo[self.repo.create_blob(BLOB_NEW_CONTENT)] patch = old.diff(new) - assert patch.patch == BLOB_PATCH_2 + assert patch.text == BLOB_PATCH_2 def test_blob_from_repo(self): blob = self.repo[BLOB_SHA] @@ -193,4 +193,4 @@ def test_blob_from_repo(self): blob = self.repo[BLOB_SHA] patch_two = blob.diff_to_buffer(None) - assert patch_one.patch == patch_two.patch + assert patch_one.text == patch_two.text diff --git a/test/test_patch.py b/test/test_patch.py index 0c2ca4074..dc111a301 100644 --- a/test/test_patch.py +++ b/test/test_patch.py @@ -96,7 +96,7 @@ def test_patch_create_from_buffers(self): new_as_path=BLOB_NEW_PATH, ) - assert patch.patch == BLOB_PATCH + assert patch.text == BLOB_PATCH def test_patch_create_from_blobs(self): old_blob = self.repo[BLOB_OLD_SHA] @@ -109,7 +109,7 @@ def test_patch_create_from_blobs(self): new_as_path=BLOB_NEW_PATH, ) - assert patch.patch == BLOB_PATCH2 + assert patch.text == BLOB_PATCH2 def test_patch_create_from_blob_buffer(self): old_blob = self.repo[BLOB_OLD_SHA] @@ -120,7 +120,7 @@ def test_patch_create_from_blob_buffer(self): new_as_path=BLOB_NEW_PATH, ) - assert patch.patch == BLOB_PATCH + assert patch.text == BLOB_PATCH def test_patch_create_from_blob_buffer_add(self): patch = pygit2.Patch.create_from( @@ -130,7 +130,7 @@ def test_patch_create_from_blob_buffer_add(self): new_as_path=BLOB_NEW_PATH, ) - assert patch.patch == BLOB_PATCH_ADDED + assert patch.text == BLOB_PATCH_ADDED def test_patch_create_from_blob_buffer_delete(self): old_blob = self.repo[BLOB_OLD_SHA] @@ -142,7 +142,7 @@ def test_patch_create_from_blob_buffer_delete(self): new_as_path=BLOB_NEW_PATH, ) - assert patch.patch == BLOB_PATCH_DELETED + assert patch.text == BLOB_PATCH_DELETED def test_patch_create_from_bad_old_type_arg(self): with pytest.raises(TypeError): @@ -163,8 +163,8 @@ def test_context_lines(self): new_as_path=BLOB_NEW_PATH, ) - context_count = ( - len([line for line in patch.patch.splitlines() if line.startswith(" ")]) + context_count = len( + [line for line in patch.text.splitlines() if line.startswith(" ")] ) assert context_count != 0 @@ -181,13 +181,12 @@ def test_no_context_lines(self): context_lines=0, ) - context_count = ( - len([line for line in patch.patch.splitlines() if line.startswith(" ")]) + context_count = len( + [line for line in patch.text.splitlines() if line.startswith(" ")] ) assert context_count == 0 - def test_patch_create_blob_blobs(self): old_blob = self.repo[self.repo.create_blob(BLOB_OLD_CONTENT)] new_blob = self.repo[self.repo.create_blob(BLOB_NEW_CONTENT)] @@ -199,7 +198,7 @@ def test_patch_create_blob_blobs(self): new_as_path=BLOB_NEW_PATH, ) - assert patch.patch == BLOB_PATCH + assert patch.text == BLOB_PATCH def test_patch_create_blob_buffer(self): blob = self.repo[self.repo.create_blob(BLOB_OLD_CONTENT)] @@ -210,7 +209,7 @@ def test_patch_create_blob_buffer(self): new_as_path=BLOB_NEW_PATH, ) - assert patch.patch == BLOB_PATCH + assert patch.text == BLOB_PATCH def test_patch_create_blob_delete(self): blob = self.repo[self.repo.create_blob(BLOB_OLD_CONTENT)] @@ -221,7 +220,7 @@ def test_patch_create_blob_delete(self): new_as_path=BLOB_NEW_PATH, ) - assert patch.patch == BLOB_PATCH_DELETED + assert patch.text == BLOB_PATCH_DELETED def test_patch_create_blob_add(self): blob = self.repo[self.repo.create_blob(BLOB_NEW_CONTENT)] @@ -232,7 +231,7 @@ def test_patch_create_blob_add(self): new_as_path=BLOB_NEW_PATH, ) - assert patch.patch == BLOB_PATCH_ADDED + assert patch.text == BLOB_PATCH_ADDED def test_patch_delete_blob(self): blob = self.repo[BLOB_OLD_SHA] @@ -246,7 +245,7 @@ def test_patch_delete_blob(self): # Make sure that even after deleting the blob the patch still has the # necessary references to generate its patch del blob - assert patch.patch == BLOB_PATCH_DELETED + assert patch.text == BLOB_PATCH_DELETED def test_patch_multi_blob(self): blob = self.repo[BLOB_OLD_SHA] @@ -254,16 +253,65 @@ def test_patch_multi_blob(self): blob, None ) - patch_text = patch.patch + patch_text = patch.text blob = self.repo[BLOB_OLD_SHA] patch2 = pygit2.Patch.create_from( blob, None ) - patch_text2 = patch.patch + patch_text2 = patch.text assert patch_text == patch_text2 - assert patch_text == patch.patch - assert patch_text2 == patch2.patch - assert patch.patch == patch2.patch + assert patch_text == patch.text + assert patch_text2 == patch2.text + assert patch.text == patch2.text + + +class PatchEncodingTest(utils.AutoRepoTestCase): + repo_spec = 'tar', 'encoding' + expected_diff = b"""diff --git a/iso-8859-1.txt b/iso-8859-1.txt +index e84e339..201e0c9 100644 +--- a/iso-8859-1.txt ++++ b/iso-8859-1.txt +@@ -1 +1,2 @@ + Kristian H\xf8gsberg ++foo +""" + + def test_patch_from_non_utf8(self): + # blobs encoded in ISO-8859-1 + old_content = b'Kristian H\xf8gsberg\n' + new_content = old_content + b'foo\n' + patch = pygit2.Patch.create_from( + old_content, + new_content, + old_as_path='iso-8859-1.txt', + new_as_path='iso-8859-1.txt', + ) + + self.assertEqual(patch.data, self.expected_diff) + + self.assertEqual( + patch.text, self.expected_diff.decode('utf-8', errors='replace')) + + # `patch.text` corrupted the ISO-8859-1 content as it forced UTF-8 + # decoding, so assert that we cannot get the original content back: + self.assertNotEqual(patch.text.encode('utf-8'), self.expected_diff) + + def test_patch_create_from_blobs(self): + patch = pygit2.Patch.create_from( + self.repo['e84e339ac7fcc823106efa65a6972d7a20016c85'], + self.repo['201e0c908e3d9f526659df3e556c3d06384ef0df'], + old_as_path='iso-8859-1.txt', + new_as_path='iso-8859-1.txt', + ) + + self.assertEqual(patch.data, self.expected_diff) + + self.assertEqual( + patch.text, self.expected_diff.decode('utf-8', errors='replace')) + + # `patch.text` corrupted the ISO-8859-1 content as it forced UTF-8 + # decoding, so assert that we cannot get the original content back: + self.assertNotEqual(patch.text.encode('utf-8'), self.expected_diff)