From 8adb1ff37b318351e4569fb19e6b62431592817d Mon Sep 17 00:00:00 2001 From: Eli Wang Date: Fri, 20 Aug 2021 21:22:01 +0800 Subject: [PATCH] Fix reading of a comment with an author in xlsx. Short version: comment with an author 'Eli Wang: \ncomment content' was incorrectly extracted as only 'Eli Wang:' When an comment is created in MS Excel, the author name is automatically prefixed as : '. Roo was not handling this correctly - in such case, the comments xml node may contain multiple `/text/r/t` node instead of only one. The code extracting comments was only extract the first such node. As a result, the MS Excel generated comment "Mr Author: comment content" was extracted as "Mr Author:", for which is the first `/text/r/t` node. Furthermore, the comment in Excel is rich text, which is represented by multiple nodes in xml format. Therefore, extracting multiple nodes in a comment is always expected. --- lib/roo/excelx/comments.rb | 2 +- test/files/comments-with-author.xlsx | Bin 0 -> 12524 bytes test/helpers/test_comments.rb | 11 +++++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 test/files/comments-with-author.xlsx diff --git a/lib/roo/excelx/comments.rb b/lib/roo/excelx/comments.rb index 65044a94..c37097a5 100644 --- a/lib/roo/excelx/comments.rb +++ b/lib/roo/excelx/comments.rb @@ -13,7 +13,7 @@ def extract_comments return {} unless doc_exists? doc.xpath('//comments/commentList/comment').each_with_object({}) do |comment, hash| - value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text + value = comment.xpath('./text/r/t', './text/t').text hash[::Roo::Utils.ref_to_key(comment['ref'].to_s)] = value end end diff --git a/test/files/comments-with-author.xlsx b/test/files/comments-with-author.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..6377aaf8960fdd0ee12f93878ca6ad55906f47da GIT binary patch literal 12524 zcmeHtWkVcYwl&bWhXBFd-Q8UR1b26LcTdm+cXtgM65QQ2xCD21xSc#RH}f!=_x%BP z>O)ma?bTi9oV8`0{Z1MHjtT+^0u2HJLI_fjgku{53IgH>2?FvO1R6|J*w)6$*v3g$ z$=%M_QJc=q+KM0t9E>6h1Pplpe~WkJ{2FZ0KP!FhETjUNJ7K3_$)1u1Aih01FhOzMh;=oraw?3vFRoHW@c(&%AzcXF@g zycVB7MGGK6^5|63OU7|3g1}oM%w3sch>m`eUK8{Nm&2X;=ASul>LsDc{upKbrA_u4 zH6vtrPBWB+kylG6Xi35yy(~sXu9E>i(kVTmVluaEF*ouw=s1(ln) z-nBd=@y^~Eii*r3LCn5vt?QM`^yTzbvbdBxg-dfZRcS*}wsijrvDoCPNEPxZohla0 z>wKILblzltjUE~GWrLdv&}kvX!}5^NU)ZzuVnKe@3}~ zat0I_>VWtD-#(gRM}e*$zUMfwIdI(DKE}p4j;lFBuu}6HgiL;Ytcxw-irkhzR`{Dh zKV9~lK#~V<{;|TxwZh}BH;@+T${$H7p=(*^aKKg?R~%a zcFH{)XV~5t0d)OdvUa_5LK=#S3eI~5AOW}e9V;iy(n!C}A>~=s6`ZZzEYmINV<4(kl;a6s9-fL-w>5&MO3AaJ$|HJdD}Z>pa2#5qbV3TG|DGUa z#bH0|lwu9Du`+*uEayhlQL`AH6LWftYAd9h5UWk4;`>o>G>$TQBI#(GR8uy)h}+_T zdxajLuC)(dF#wD0Q-|MF{S-fSDXeFV6mpeg=Tn>(V*@tjaLa?Ni6OafIKhqUS6d0g>D^T5t-n9^PP?CjX=o0sE{br3uNt{mxF4ZIGA zm)^%ip*<(C?L|Jwk{T&an)-0KIe2mBeHXDdSa7ESF}>Va^vhTFhYu!ZR+Vge3xh|T zn)~VK2Q%M!>|PBzqm4V(T=BlQ)yBkgvzkD59^q=fT5|14r2I_UT;XlLv)2N(NZH)Ob?P%sOUEOm&`#Obq;NC~fvc+KkYm;&P*Worwa&ErM; zs|bDTjg~k4vaJg)r;w=3DH&zib%NxvLU$-k+XwQrO;9H~xqg-JRuQDHCxnOGOW3$H z12P>ML4(AY%FM#1h=?6={NSSPm|mmqzdE67kEgGv{AelT!9W5KWM)0#8(;|Plv++z z^>shrqrTXh9-O6b@p)W7u57(ozYK3xSC{P-=ZGx9ZGE1sd_E7=&GPlV>^*LMu;$fK zIbC_UOniRmwcYba7AnQ`zHdxi@qRp8Sb5&l^?j;_Y#~1>gUy@>lDJ^x6c*?4ufyza zU?yj_ufdzdP%MOs5_E~&sS3kg?S&iKvlCvYI!*?;Dgy8gWpX?6Mu}FK4fn~dcY)&B zjf7}TQLTQ!E6rIm&k(x|3~vhK?zpM7Id~F&G6#VmvJ2aoP{O_%~Iu854q>* zuB&+Gy7mIL$04ehgFKNaJ@B`83H^)IkY>0;S`5vOeKWGzyUz~K&%(m|^HuxnC7WZ1 zldsd5T*}ZRc;d-!6FviiUz@QQ`%@w&=BuH18h+a0ZjjKMgo3H?ky7@;*?>oJT~SK^ zL?a@OVyq?9ZI&X#DNWbMrc4#QxHOfDtNlT3@1q*-$r#(060~iFh+=+X#hC;^U9I0f z;>^7Jmv%4i(nt%(#I4j~OckeudMzdldvEixhAFZhm;!7O>zH`RFx9m8tWav8WmCaO%NBGohkCSC zXAWi?^O3gkRgNv;T-*p=D;yePYR)ob!oJZfEp zCfX_KHGni-U>r2i8Y@Q}XNsX+>^gNo+!`L4d5bio!tGkqkV@Yj^gv6Szt{6@Dm{J= zlO|XzWbF?!5&|K{r2~cchH9%#Nd0Z#2ftY6PTW;myFR$g_Ttr``3L&G;-{6Kw0Qjv zEGbE65n>XG192+!ARI7S%;QF(B;5mT6%uN2%;=K0swOV;%nnj>N{)J}$A@@f?b8UU zs4RMRZyraz>50tHcS*K;Q*P(ybR&m6mKhswqdaH2yQF=VzutiBB(T5u`jni+FqPpP zU3aoc=Dcy*tMD=y%PCv6r9ygH$0gk*ypcq#1JTU|1{NefZnK9qO6)WaN(2RV+mk^f z8ozyyW#7}i!9tStCvAn? zQbqEM_kD52OHuYZ?8Rhy76Vg9HTd}-a-3A|MYC|C?8RCRXiAQpW{HhM0 zW@rWb;rWH550ObQLjVM z!!$nNtS6t2u&<0{=59#Y)fRj+lIoe`KV!&YQsj(1$I#WyA3@HYKu&&LK@xcyl5oS} z{zOAH95Oiwkq-$PL}S>*V8ZFL#H9I7aDccj4`mru3eG@a+VP8)h1A&u7t9f42D=@E zJ_CTsbPGu^5RRBip7K~iC-bdEP3!pe3!pKP}p@lIp#l*CYnO(vDUk@QW)xuE&bc{lmt6;>wce z%tzYyblJ=)Ma@t6gxF%q>r%wLncW;R&3CRCdpWd79@07rAr+&?#*cb)n0DN6dwij_ zx2uDUm_}T?VXW-iK8lQWhgc*B1k|5Ro4`C$>#wGCO>g6I@Xi`-Kbep!rXkApz~+%26a&; z_=-}@uW_4!3!Yd6h-H{2AZ50cnk3- zGNeBk(e(pK@KX##{RbJ$^c{?i6rCK*ZA^c0f+f0L3Y-yH_QUBMhq!jf((uh zdYNYjDkynbMsEUllK1Fn1zR*)R9?CA;YbVpCZ|B%t?Pc+Pk^UC?UrM`_kM7RI6&hE zCtU~2dN8W%+TB1Y)&+3B<&JZ78#2n^xz#vYK`mOz#>;>uTx4 zEE>PeDSAOpb#ZX(|gZ+DT;^6h3Cj;mQq<{m1@z3?0Af4YP; zpb!Seh#YcE_7qy+x2|nXQvy3e>9(akgYadMiZRn$+a-t59W=*DycMW95)0dNqN zhE`%i;L3sV+2IHr^TlJDnC82tL@anl-*u|RE~>2v%BG@hp^1Ir0i{yZ4T5!_SJxre z+xsL=Lp9j-rEB&u(&Bwu%(!5!h2IymaaCwin5sk`nIbl)VGVf3XG(UfWLQLN`0f|f z;k#99xWlV;(|zhD+v>asHK53w|ActX4;z~agM`oJy^1$|uPbqT8#TSqdG{QI;PUxG z%cpkCxV^P{Wg?qVPJ51QprmOwbd8z{!{u=_@ydQ&m1T zpY3OvG^T1we_9r&6D!OngDWB)g};!uZ5s{2QPSsafU&NJQCHGyisrEasb94L2J}mf zUzdhkXy5C3G2ue_Sq3jIb6TJJh$NR5RlAXHH-2`&{Ef%<>j-A=20w` zW|Eau$_Hm;NQlXJ)g5&p>GT%IVxsq;c!~`vsjp?ct-&S*VQU3`2mP(H9pqVixnO!l zvCdN}?!8ZSQ}or6BZaGMR^$`9q_F~C1IE~$=iJ^NtebA`qg-T7=5;dP9AUHLthfo!&EI5;FefZ;Ce4W?&~2LqIN47)K!nDy_wyuTp6?YCzHELB)4KKm>_D=F${TrKQ5e4 zOHV0>*fCo@t#YIvTK2}HEx^*)js>bwKGI5#=B;ELgxsw|kXnXboI5_D{fdQ&QagS~ zPzBUbOhgTAmf~{LPJrpIam-LJxoOb2!q%#=ag~aJ_KAs2ZgY4GTMHyFnt1Z(K=NU{ zc8+LI>7q!%rS1WYcoe_6`q%-pP-0~_DvppKG>$@QcxUvc+-d(&Y%p;)+vFRT{tAw8 ze^c@cgI6`^O@aW~zB;HzVQ*Xs*6IbUFP}XEiL}$dxbdbF)Ed;~Jl4pog|jqfIhPOD zi`hF%?k5J@OCb^A+Iwsa&#MJ`%46r(rdCB()q}xlw~w+p?Z!Rc#XW4%z&bG)?@r%# zZx%7I&bUV+P#Dm=LW|1)&@VrXm-n=!>980H%I-U8nYV&>+cUNm!a{qG2j^hIf)G2Pppz`OS?zNMI zf@1qkXwLFL%7*6ZaNSLoZW5SBmjV7?F6-qk-0}!6J2}v$eE`bC{~-eX;%Qo}rtJa& zM#wSwAwR*<>$fptmZ*gj8p``iQ`U#9A)}12P)L2*C08rg-*s^0zE)X$#{l>&UvGYH z;Z_$$M9#UQEA&c{2b1;4Ut%k4_2H9kd502fCxvlLP-u(1yR>y)csU-bT?T?87!b}` zu27;HA*$|=X1`}?H>Y@~C)zRk*|-XkKcCCG|86uHO z0)!alE;_zqBiThE*Ny5uu?g=zrkdA|hX$5K$AfrI7o+!DVtVU0hymEg|)1pSU3n{`Pz6f0+pgT zH;!dN_bJ*vBK053uIDmpkNr`JI-1!lHS%FaPerIHx|j0=i8GfkhDV=n5H9MaINYMR ze1GjB{hH_B5v#U_a)4fyqFR|rQGtR^Vu-q4nW>sdp}IyA5{3&@RN5*&%>RLj8-V@D z#Et!EL;$%58Urm11o_`6r=V}y!GQcoO^AOrK4YC_W38`b)WHU8c;IL?%9ga%v6UnEYe03zGj6YDudtuT9&~4b6H57!|unVI1|b%Je3q4RsDu&?^euO zAp`BK=MaH7b*}WmP?mTikykNX!U0eH3sfn{13NQ5mPb8t0mkY^b1aqi0qe4tM_(iE zr4t@D0GlL?3Xsf1XC-~u;pz^{6@BovC46*}xV6?0yt^kJwKfHvhNMt=+7Gzh_dh>1#$qB<0X<^qO|K=cn zQ9=lw95o9DAE>IV2BgzQ1r-~)$#s;pEtje?EHGy@Wai?$m6>-^wHR-=A!##s=5tle?OPF7` zS>Wm)QtLMZX!sIwX^b<*Gk>(QbR_lAnUk0w$PUosA~<6DaF~eFpkg-5QDP%lo_a~C zHO6w@icQ%DP`gE0C<^vUj@c{iz}vrymJ8e_0h`0rSD?P=j=^+2@!ON$YKefBa-g_T zxP5Ine}Fes+28}lN$<_Ehx;g^Q<=~&xzp+tzf7tKPD_g&RQL387Xk>T3hCtmDB zhfP?Nw6zHU3X)$}v2CZ6H+)*d(r`UA_?O#r_7yK?liooMa$1hcvi?4|xESz=+F+*h z2(2IV*BPI>av6fhW(34kR{I%!?}B8~t=qD}GQpuYm=y?t9-+A`3*85Gg3L&2-@?D$ z+0kN9L6W1+K|WCMTV{bcY65hDpF!n!keK%z=BK`baMavBPfWV3{tTvHMYA210dkm2 zTnFxxX24zU-P^A*BOaOt4s>};X5R5lYEoknh=XdY{D7}^u&TK8W5&#sENMYAO{nvL z+yxMBz<56w+JO6rMp`Yw__waoFM@NzFq5vmro zx6&1MtgW!@7K@4!4XQ)8}OJ$<~hp$a=anMR9)W7y37aL)>Hgf!Qpuoc;A z=HN+%e01H6ZB!%0LDBKS>PY{!o~iz{tSbR|JcA>}ktH7aH}2+$%(&=iv0XtdL|bG- zr+eI6`agR1{>EgFP9P6Bf!RDV@D5;+V`yt_4eUO2{FN?sDawQcQ~2gLclbd^QE6Sn zTs>DY!K?_iW5S6Ox$6{tb8A(SRo3|&-`gUM&nnyDErKNLG&5ohL%o4|Y z?%DK*;t?p0ObQ?($>XcSJl|RmYgzgqYXer}jlvnU7`uZD075wfzM%4pJ{+~6vbJB} za1#der&m$P8&{XtRe6zrGBYJ#9nk@yme<&LyJ-sD^;Y=<3f;{xvB?^Rhp}14S)gr% zv*JhoMc@0&jr-2oOiJviGs?CM`83sz+uk6xA|{WT9HX6;BjzxR#*i-zJ?S!tV1t?A z?p0B2r%dk?2N@_TayaI!X6OSbn!c&8pzFV?MA!Wo7m5g_=C!hF+Iu$TO(afdA4}L> z#xkL1u&*r^PQ=6S{T*HGKKuAG58QoJH-Nx(p(T4=s&8_viUYv9$c6EoOrP7*Ga~Hm zLk;2H>zvN?qr-zqZjk&d!P&FuE^m56LgJ1{Z8N|Vd|o%TmDMA_J93KzKiF1`o^Qe+ zFn4Y)@cyB5KymAA<&P82g$m;%3!HFWU^svEua3{q*1`C{o)0)?|2Vv2v~2?kP(zLr z5BM>T(5eniKFk~8H*3U_I__3;gB8xa{REPF*otqlZ&iPQvYZ$i`Z{JhyP}DHtiZ}B zOahq*iNscfG90ye%aUI#qv|cJ3M-0&6L_G?_e-L0C`w8Yxe}+YMr5TS9q6y zVA2h0{9!fDQm=kId5K+0IDlMp>4wVWphw0-HK2jpBwBgbI++ILf!ncG$44OGBn=Vw z$7sKc%k#%ddGz3*o3)AP1kMk;X=9=f%i7-dEtA-6d$Db;U45?%`C%PV#v!uL7hq1| z`#g>Z=|6b`OR~S@LQpVTpep+3p67p#`9H6J=zxAF{da)BcU}I|@aMG^_>=$AgZa|% z@11D>YWM?KI{xRrw3j$9`;mSlk-_}0SoF`%wwJ~)yE%RvlfwTpe%agc65!>=%Wr^2 zp!5uUir?EXFHK*{)W1#pfO|YYO<#)EFA-jKKlDV`>g=JM0r^>{YJ?l`XkD(66z(& z-xJl}7$6`{q#z(K)7F>PFT?w9doPM#|KV@J{-yQbW5sV<5D;ssfBEfyMviyV5I_d~ R?9M|1`2i&18O_gk{{#HUwTl1% literal 0 HcmV?d00001 diff --git a/test/helpers/test_comments.rb b/test/helpers/test_comments.rb index 2e267862..e204b1aa 100644 --- a/test/helpers/test_comments.rb +++ b/test/helpers/test_comments.rb @@ -40,4 +40,15 @@ def test_comments_from_google_sheet_exported_as_xlsx assert_equal expected_comments, oo.comments(oo.sheets.first), "comments error in class #{oo.class}" end end + + def test_excel_comment_with_author + options = { name: "comments-with-author", format: [:excelx] } + expexted_comments = [ + [6, 2, "Eli Wang:\ncomment with author"] + ] + + with_each_spreadsheet(options) do |oo| + assert_equal expexted_comments, oo.comments(oo.sheets.first), "comments error in class #{oo.class}" + end + end end