From b70415dbd65d8886396c11087d97dca20615c82d Mon Sep 17 00:00:00 2001 From: AilinKid <314806019@qq.com> Date: Tue, 6 Dec 2022 14:30:34 +0800 Subject: [PATCH 01/31] repeat inital draft Signed-off-by: AilinKid <314806019@qq.com> --- a.out | Bin 0 -> 85490 bytes dbms/src/Columns/ColumnArray.cpp | 1 + dbms/src/Columns/ColumnNullable.h | 2 +- dbms/src/Columns/ColumnsCommon.cpp | 2 +- dbms/src/Common/COWPtr.h | 1 + dbms/src/Common/HashTable/HashTable.h | 4 +- dbms/src/Common/TiFlashMetrics.h | 3 +- dbms/src/Core/Block.h | 3 +- dbms/src/Core/ColumnWithTypeAndName.h | 2 + dbms/src/Core/ColumnsWithTypeAndName.h | 1 + .../RepeatSourceBlockInputStream.cpp | 34 +++ .../RepeatSourceBlockInputStream.h | 45 ++++ dbms/src/DataStreams/SquashingTransform.cpp | 9 +- .../Debug/MockExecutor/RepeatSourceBinder.cpp | 67 +++++ .../Debug/MockExecutor/RepeatSourceBinder.h | 42 ++++ .../Coprocessor/DAGExpressionAnalyzer.cpp | 69 ++++- .../Flash/Coprocessor/DAGExpressionAnalyzer.h | 4 + .../DAGExpressionAnalyzerHelper.cpp | 7 +- dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp | 8 + dbms/src/Flash/Coprocessor/DAGQueryBlock.h | 4 +- .../Coprocessor/DAGQueryBlockInterpreter.cpp | 59 +++-- .../Coprocessor/DAGQueryBlockInterpreter.h | 1 + dbms/src/Flash/Coprocessor/DAGUtils.cpp | 2 +- dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 2 +- .../Flash/Coprocessor/InterpreterUtils.cpp | 2 +- .../Coprocessor/JoinInterpreterHelper.cpp | 19 +- .../Coprocessor/collectOutputFieldTypes.cpp | 31 +++ dbms/src/Flash/Mpp/MPPHandler.cpp | 1 + dbms/src/Flash/Mpp/MPPTask.cpp | 4 + dbms/src/Flash/Mpp/MPPTaskScheduleEntry.h | 1 + dbms/src/Flash/Mpp/MPPTunnelSet.cpp | 2 + .../Flash/Statistics/traverseExecutors.cpp | 2 + .../src/Flash/tests/gtest_filter_executor.cpp | 9 + dbms/src/Flash/tests/gtest_interpreter.cpp | 7 + dbms/src/Interpreters/ExpressionActions.cpp | 53 +++- dbms/src/Interpreters/ExpressionActions.h | 8 + dbms/src/Interpreters/Join.cpp | 38 ++- dbms/src/Interpreters/Join.h | 4 +- dbms/src/Interpreters/NullableUtils.cpp | 2 + dbms/src/Interpreters/Repeat.cpp | 231 +++++++++++++++++ dbms/src/Interpreters/Repeat.h | 139 ++++++++++ dbms/src/Interpreters/sortBlock.cpp | 3 +- .../Interpreters/tests/gtest_block_repeat.cpp | 238 ++++++++++++++++++ dbms/src/TestUtils/FunctionTestUtils.cpp | 4 + dbms/src/TestUtils/executorSerializer.cpp | 30 +++ dbms/src/TestUtils/mockExecutor.cpp | 24 ++ dbms/src/TestUtils/mockExecutor.h | 5 + tai.cpp | 64 +++++ tai.h | 14 ++ 49 files changed, 1240 insertions(+), 67 deletions(-) create mode 100755 a.out create mode 100644 dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp create mode 100644 dbms/src/DataStreams/RepeatSourceBlockInputStream.h create mode 100644 dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp create mode 100644 dbms/src/Debug/MockExecutor/RepeatSourceBinder.h create mode 100644 dbms/src/Interpreters/Repeat.cpp create mode 100644 dbms/src/Interpreters/Repeat.h create mode 100644 dbms/src/Interpreters/tests/gtest_block_repeat.cpp create mode 100644 tai.cpp create mode 100644 tai.h diff --git a/a.out b/a.out new file mode 100755 index 0000000000000000000000000000000000000000..8aed9644943b125062ea04d62dd9d638ddaf2013 GIT binary patch literal 85490 zcmeHw4SZC^)%V=p1QrD4BMK@X0ipznkc1Etq`H^{69^C?B44tEWFgUfnJgcQ)hJZy zqgK*VODla0NNuoaeXVVwueP*7iW zeK$WYbN~0w$C)!{&YU?jclOOsKlp5b5Qe~?1J@8-bx}fmsb&m45|`u3#^v$k&AcgV zY1V@I^wjz%6I&kzAj;<*MD%#_vli!DU(570)@OooG#;B5#qfB1-VHvwQ-AjS_Fb(i zGb2yECrtJ7|3#TOt0x{$wQudJYX56{e&^Py{LXMC3G4Z}I6v@E{&_rW3JX2n4TauH zpT}3Q%AQ~229@6t&V;a@pPvwaeudS(s*>_Ger)C6^BYxumHeKt{%6nI<5^q2zNEa! zQ&PUV0uk->#onXxYjmny2;1|}^QU&*F7wHpnLpE$F@N4d)w(q*AK}JJ`m2%u3lmfJ zbP-zz;R4?(v}Oeko?i{O1LemgBDD~Ga7~bR>c2L*&CQuNCo40@l(_>;7ee{R;YW`u zNZFf3(32EUPV|WbLNxXhVhZEN*MvxmQoI?C*Bjmxq5=tzyodS}3ULvhu?UJ8cup7f zt$7H?APn_a{8u5C1bKNep2gxKIh(w;x@vN1$*Rdkca;_a)8Cc2h>qWVJM)#pznc1w zeXEz1y&U)Uh-5sA#f4(=ht$pi@?P=oQ^z%UD(ffT3lWd{Q9fJ8^8*g3+(caTPRhfu z7<5acjTX>TIC0{nq)8yEWR;{dO^fd=8XXXS;a$}}Z`q_9ISFD5f$l2l8UBV4D?v21 zDbYv#rN5E52sh3)#1LHo=w;+D!hr|}A{>ZtAi{wN2O=DZa3I2g2nQk@h;Sgnfd~g8 z9Eflr!hr|}A{>ZtAi{wN2O=DZa3I2g2nQk@h;Sgnfd~g89Efn>--ZJhGK{0G8>9BL zI*r;o=ji60n?~VU(cG9JY8(5CmVrWSYwYW8F+|O=VYqK{if0JB4EO%}hSB@5UuIif zgJ)ms#>_p)tM-7?^Y(=Xfjk|1S~rQ>x=o{-Ph2Eg>Na_rDZdjJxm%7KD$mx9H|{}P zl_oksso=Zk)O20?L$e zGop{4uNbfJ1e8}g`TKqvw8@1nJt}nImEx5=$~GCA~uBP}t9Kw@kQyMYAyaJv`H>SunpZ>o(+Lj1)uv3A!m?qMyR{@}>9G z)(;_%06o+YLKh;wD+=_W{#pG{Hqj(Vul9TYfDrkT-=Kx`+SVN;85*Fe9*_c{?3|X6TaC036e{HJ7olE+Yoh{GcZV`AL*ad(4|e6 zi2V;DFRDB7taW3?o(oQ86UaQ!ZXXX8+sZCn_=d&q01cAI$YTf6*AYE5W}@Epp!aFQ zNGlA{@=fzyA>P4OdD46rjs7%K)KY)i%5kWjPi(?7j3FoQ6Sb6qBnwdZEf7$Gt$mbI1aLR8K!kvga7$MP3&rUmC&-9A|O@c0W%O1RU!ImNV zMo>J^Ci#cB7?HxX4F=zk7v0yxHac_R z!Z2!Q8jqVsi~Uq~G1By*w#K~|_pKXe!JgCF8)NW#@b1IUZ0!vmo4h4H?LpmK&`s1% zW4P_=xP39cZEHe4kj?jFP1%GT`cPgY!k^*2tOM?eFT`u&@dfbm1nMQn5!Abi>!;*W zvsE)X=v(6i*$!XDKKQ_KfYV~UKIZSM4S{;=BBt~yWjds% zbm&)1gO-MAF!d?M|2F*}M7B>to=rVUIsp68{W5^P@sg(_xE$og<+b%=)Kl8&mT@Ks z-DVlo7d}RR{~<2fH&A!#6Bhm4L7rr%Q9dNML3yyrY7?0HUB;C>*5?I5=+DsRK#)b^ zIX>6eR)wn`w2?fK-n<{!ti#SirGHisk3gU7H=t2Kj&=Q__Zf;qcIhbe3CvRh<}%+O z6!_e1R~p~Gjj}9bl`Rj_DFQk$O6n1<&LQHVPg(P`@xj!Eboj@)F-_Sa?KsP^(ka;+qUS|9QQfYihKRES#YJiZV`umb$*; zD9xcUrlm?5!E@plK6!~T?OrJtc3l9P8^J%~HR%tE>w;{NEk|}7>Gh`ZPhNpBaY);o zSIM>z&0qEFqabxh*_Jv5>(fG<{c5PXQD3Asq-B0r~;uD?|Kd3D&ZK&~>;s{60v00QiXrM816w;b} z97FT?t?rhw2ycvq&4PHrY?T}FT&6*Netn79=EA&-_$ljc1b8X+IVs{q|;qmyL274h5C6BH*L83?AM!b*qwk^kTX z*c93!c;sm3wAy9V(vk}O*M(u9t2evXzxuo#{{jNj)QGX$OU*&5ZBXoU3 zww2U57^`I%4ZgviHSb9ek&bt{(5|qtWuM`?5=~8@2R^F(em$h|f#$t5*Nu})Yud`RQrD(F2L4~b6rRHPm`YqOE1ZoybK7X9%w@}c~x&bobI?_+vzt&0 z{wta_-rL&MtY1LgZ1IS%hh*7S84c$D1?^Iv-#d?XlKn+XW3y>UHNt%dK2|O>l(;qh-(?YGL-C)T*78d7i1%5m}@9bq#p=*ar)PLb~#5i zKZW&--OiEC-}!h%nv?;I-`Ar*sXn22_*as3@(sY&+o`pAJ#3Z-QTO^yxB2y_T>k*w ze?}VVw}vlx57t9ShCkr6gzNE~p4G#*jn9x{euj|hOuh_L7uRpP0{VI>;)xF*WxY&& zjn=7l;rXvnhI}tv@2@nJuj3Mv_Dw|l70o}EbSS?A(Yut}N|i4@{B<9dZYSv3_3`nv z#zLc(bYTP1k-aN<0$Ii9)P8@%F2vnS?;)>5Gx;g@aD7al%LiyPT^<{qMs3qL(egD) z2R$RxL1&Sli^?F{;vtVsH;DbPb)@VyPJ~XLFZL5nb0G(?V`47gJ*A~**?87;gL{85 zKC`E1u%lxfxURw_+XTFnJPyT2yeB^11)A)9d>QnU3^X>tAB2AFMI6$fCathX{W1?f z$uaW1neT`eWyjBjj60=_H%@f7d<4I1>8Ft8hQP5zx1DJ%hsFVm{iDgY=;v;;FKqpS zWS{gnl|{CQQS07+2KTXs(g8G17*RC~Zbli>CLAG##@Dib!8;e?<0JW@mVO0Y;`gJX zov44Kf5~#HZS8FOF3HbBETADKuJtlqY z{C@uq>V|SHbwk;0J|!)!O)kj3wENj^lJY@zFK9u%G8ogGM9H|##|C~9&3?VD)s^&8 z@A|`n%&D~UX^txS0y@a{J;3vOq6z0($nh6^iXlw%a#R1QdTM+_TieGn^4++WyVG69 zT^}{|HT#QZ&jm?CGziG7U5@N|Q`_O}kN|nJ#|yAcGlK9V2pwUi3nD9|M@UzZ-qdX_ zs+XU4kOeuOOW$dmtOU~^eIVa<{Xx7BMIYrWF4bg>>Weu5{1E#wSJ;oa%YKX>cbNLi zlu76tz`tt_ZXb<-KWa>oyJacjv?n7cMvHXf(}~ds(xna@sdS*g z*wAh|*3GV1@CvjWM!#zw?Vv0whsOUleWJ;@jh|*;`)$jGZ~Pt?jmPHrBKPEgm%NvOht0lm2-NY2RpX?Ln(guj|!Kf%=Yk zX3k%yQ2o%)EwUCw7Rdfugt+?rbpf?C&p}C-ES6ZCm-4)d=12Z`;L*SS1$hE*&ABN0 zxtzaVug625%_Izd2A#hm%{6#$l^sndfL_vVw3cdL@4JfS(OmB4B$gX$M%&eGnnIA zWo8rUnP~VkYCZ&hoK1pRjXYOMf zsk{?N+l2VCe&a;Tt)N%d8_z#Myw;63VgF8>Z;JG0(|F87@g6>$wz|S6)anx3)>B=( z+TIPJ^@?s+#G_m_Mr!&cgiK320DO>R57`5#Kj~SMe`FtACF`l#6Jf}r*-yv#Y0+#q z)Zd)Hl3t`T<(f%*>nP;k)Ynl$@Pf*H2l8T>S8MbJU02a{D#l^j-%WBsdU6NajC@xl zJE!HCOa5ivD}=IkQ2h{Qkvu`RXx)I~%|@S^(C_tckx%IKSINBw>Y9z`#Z;o+EVsskvtf{j{r-(hI{!8_rV`;}bok z*Oc9<>4G42#5iUfGpu}&Z2;cWe8b8EH7;TP)9w8fdL4*2a*qh;r@dfx*dwwTJTu2% zqKC$IjFau}5ow^YkM{?1-!^^u0rqwQWDI>h#~fe4bE*H~)7=HXeH^tb^Z@Mc7iF8E zkEP=o`PdT>63x(^G3jRdr;s1UgY~rL#%-tbN?8D}Xm5Ww@e1wnI?z9) zdneNvPJIULpGWHy2wh{qPv|%Q7{z_6ZC>7ay`;~BX%C*+Pl?|az0lA$_L{Ouw3%`^ zQKkir+8!U+okwVo&lQl{FVKE*eUA@p#pQTL?I!I+@Lu5A7>o(mkeplef9Sp|+X8$E zWiQJ2&@HAvDwzKG3v{xj9ksgZaz%Q3Gjy(nFCp5J#v`fQ&`0FHUW|hWcwNfmxolI= zCNK}Vm--0$GmW2wX)L`3azpYc?J|`8Cg@bU9DGI`(& zsKq>9`pc>PNY-VWqZ}7#*@L*0?+s>r#35{{bA~o=M?2|z0Mk(ql3Q~R;8c=b#t2)k zlNfaHGx8F!a7GXDn9+mbH**mBQ*(!34A2hkZ!zN8X(!OTdkMU0TMZ{3sB7fPo;PDjD zP4Yr*Woge%)^UhvGO#yh2+~_*Hb^{@r)h|%?`I0|88o!@Yg5+EaZ~jvt?%jW4jz!5 zD8~?%b&^dQA13r8KJzeX@7(o@~l~cpnS-75zlZ0n8O8e^9SP=E2`X?2%d0 zhw9ws@3r@3+1H_$19U+U-W)=?7XIn&tI0I!x}7(Pqr2vaqc!kDnezl{|M&2HiQa~P zrSDBpM@#$J_)EN{JYPp%uubJ&14;v%So-nd6P0Tsw5N%9CC6Ft#?t?S)aT!7eIo=< zzSP4!`6}`g=qr|X4#K-Fh)3&5jrh)>qqx+k&_-d|!qm^cu&@XYkH(|2Bf!x%%~dP$$Vb5wJ# zo(A%@j4<@a-!Q(I>HP7bpWS+VjGwcBDK5>$PrxU$3--6Z&y3PW`F-_mek{wkSg1?D z+=0eDx6piS`HkZ3T`;^6!7PDR3MoT|#|WAD?O5H1lhZJ~`IS3~#Ip7!Sjw zudBZO*^+OX-C(P`NqfNjNw+U}3^si-RsRddrq6o1+h73t@9(qt7F@M?2Jo57ijnI%O&|R;5Ty^HOH=kPlxIh z%GW*sX)X4Mtu07KyCNTE8y4I9h)ws=+(3Sh2wAuL+V4i&&=^U+_9=7^9Z2=<(AN$d zkM@~$*4KU;`1-rE?fcqqA$C!e@y60;* zzpth-HyockjlYl?x$o>P$crf-P4atJO%G-$+l=~LXZ`Rm)3>Y`Upw-{&m&o*c`4Zg z7 zmHp$?w#FDyyB=8TJlxA?GevC`$JtLjr}jtOS5Qy0{cqRu(Ead#N7*tC%DtQ8JcF>K zIUbeq0>y`}u-FC}R(}lDk@^vR!=wA*|L`H}Hk7-wP|Y`fjAx{qNN-TR1o)uOv4i>H z*FzSA=@nOq=i&O+$riHKi~GyR#tZISPJGm!Efv-n*3fxh_=C`)ay%e=8G4w;9rA?} zf9<~T8F)s%@W}{?r|_Z1tTWR;g|x6!x5LL_@;jI>+^&0cpSWq)(_G1)}KFvfZ9@jB24G=7$VpR(^jR?ERllG*>se(@R7eo*tDJ-iMo_}kZf^gYrmg7=#f zU!?61+wSXlPX0KOA^o`ln{O4;eV+dD<}m%^)E4GAxC1)T)Pq;sbz6vb?v&3Q_Wje; zm!K1+&m5S>z2%T8-DmzM-Di$Cxn@j#eFwyYu%{(0hCT|PZ!_zcbjijvwQsMbQ!=Lf%>+zTTt)z z{ORwSaz(UD9SfNQkL>>RMWCC;G#U#nHiu0YkxfGWbV}cz&Z2lEYw3s=)_lDo(0(Rb z$o5xbm2MYmG!Kc#zjBT@?V-?{buGxD8D+*xAZaJKDL|sI$(?q z!pqY(zdH68KR?I;yTUR0ZF%2EyYH{vU##5^(e5wP?yu19M``z0Yxmb^_mi~yWbHmx zyPvMzU$5QYsNK)f?q_TF^R)W~+WjK!eyMhUyLMl$-B)S%>$Lm3wflRu`v`~pbY=B-;fc$Q!2*`njF`ouG-|71RH7L5`oa?QHc9EzYE3y5i3M&lv~8Pg^+PpMwJ>xX>D;-x?Cd@RGI0X_=1r< z!u6KJh~Movu4J;oTr1pPAc5r_ME zr+CTf+7fLXbRGh&Os=A3ge%{O%ht+89THr5MoNnIY@~51{hDn2h$2BWR=Tzr#s+_J z;n#_1%7>K_b&psK9&IsPb%wDOOx0+*%D7TsR}!w+gNE2kt$f6AH5tY`)_6g9O?;HR z)%j>Xv+>)FI%ezfK7ZPL1tpUTD=S5&w=gy~DK>$Zm-7|TRtIneEIlT`hjY1QFUxt!G@BuwPmr@Ybz@&s(jv}Sc)g^ zC@C$C^?9p(lE)(;mVI!cbtMK@1cZeCqVQw9bej)(rO<&N33gQ8F9iMWHqMHazk&P- zHki^MVftNR`pxSdlZ42Gqdgx@x)R|__=une;YwN@Mz{lqA?!xD0o`gJh1l4C7-78| zzhZp15T`Qmd&CIiA%>X<&m&xnFmpcQBCOAYG$7n?vk-L%N5F78i15fFAQwV1u+)@Hs5H2pqJ;LFr!w!T_BpV>UFAg~r2eyDEd-3cYA^H|a z32`V|7*H7^zK`gW0;X-~D~x@8k#Aq&*x*7Q{e+_!lS*-sFzBQ083^+b9zb{;VIqE> zIdgyzn-OkDcot#QK%~V7-P8YGP2gG5y9Ak-CYFkrBV8jb~8*AQWh zz(TwmVg3*i?Zf>6+`olzUGZqkc;SejfcF!GW92pA&o$r=!UNX`N6|##$V0dEC7@0S2VE=I4jFv}ayKFgI0-U{uo&TsQ&2z1;fpE4I1FKbDHUNV%0w6i zxpbwW-Vpw%>EQizv>n1cgnO?8zplf;G6QW7dEIfnh<*w3>be1Kc>`n);VFd0ZsCZU ziMq}dj>8Dg&P2X9f}b~n7r0)6OgCl-;~c_!GKH}<6EtKBrz=Z{>@3iTaQ7@>?3)Fd zzDYP@vLRpDh==e+goh9wo{hTBM!U@s#+f;g)p?MId5{N$`3P6e7ml;&BlHK+$>+No zLp`ZcQaz}Ko1~jN=fZDKSJ-hEQuVglNhzoa`t~^|HJ_n9P5D5H#>id5M z)wlNAy&I+ct%yh}vuKXG8j*g>Lo9?}R#Bdl zyDBLwi{AN)t18xeO1P=sZw5tmWy}qmk^A;w0l1qxR zEYVaMIVssQvuDm*=t=jV=#sNA$&;0pm4;JWJakTrEqgOvJ`Qf_LOLW#N=C&>3O&`n zs*>_GIfe8nsj#@9%HyjlDDhS26lP^vbH)RDl;X)*nA#3)X(bial4+@HygsmjDPQPI zN-C{bgAygXGLp0M<`ha^;7zj5$;>2w5^4%NRY^(SJ3S@kK9BFNN^e$43Fi?k>J)Fq z>UQ%=Rpn}=$ZWdAnp6*{D)CWXWjamW%B;*}QP=6pNXl9;XW>*&w`sDs%M>L*6_6lr zL0NDK%9>KS)|X|eL|DnDt}b0$U2JW;uo9+Ld&_-QcTuGmPO;RvGs(MD)za!Lz2TEn zJRVfuLw%~C(C4l4RD17S>n$(zhF}$`2v3rY%zRINB0Af`X`Z|}=z4kqQ=o@ROIG2C zC{IZ>G*o%DuK?Y`yRM*gZ2``AvT!e@umTOC$v}#?yr{HmlxVq3Eu>xuUE$YkN`JEc zv+*(2R9m6cT~ZR%u6d4?g_|8GP4g%oRIM%ac?!UX@}l55gwmPG=)u+N3o1R}l&7$y zs&H*7wBf3?t5N#AT@@gUaEG1tx<9YPqDWEtt=)iohPn^-G<}a=7{O4uZoqzkvw-v$p&;6RG&r~Q;5ZP z`Z&eDxxy*7;vlI$XfW{$M|U*GX{SHykDXyCu%mr@NsYeU*Dku^k2trjYa)Hh`T5B4 z-<{5=x<$enwe42?cVsDAF-4q@3Kafu$pYxP&95N*8CNdqd-X6J^>)izoU7 z9|P+8IHUG?z_GVhN{)Tjr$;!Z+veYEhXZ=N!+k^XLXY*N%Nh038W4Q20{=DNgR=;A zOZ6dFfDQbpi*4PnxuT+)Y77w-b-0%PKPUfh`X1dMd6xb^`v>~}o#Xzg-@du|DRBu~ z`(`IBPAHCvO&E}pn43_X5R>(+LQG|1d|b*tBmS&2Zp0(eDZ`6O z6JmzXm=T|lJ0p)WL^knA5*Je(H$2|;h>?(%lCZ^ynUU|tOC;QGP_AjX;#1<0VSGY% zCGK|{XABgS8Kp z4OH(d8CL@nKj>UgivJY9r}G*K|B>Iv;+#gpLoZh8_c6Yn@nOcdF+Rume#Wk3mHrIl zOvZz8AR*On1LIW2+ZgX;+{75afhOahXPknA3@QB#oF7T_J;XSV@hQglFpe6m;@2~F zGfu?0la&5Z#@URIF!nLNc!)~BgYi7Z2hH@1E2pabzGuc~{0ZX&j8lfH{F)dqW_+IU z7RGrv50mJtXS|v55ysmY4~bgn4XX9+O%hXlsE`F%0t7Jk2tvHNlr{{Z8MfvG-G zIG2?2YvA{>jQd}q-n$vkVqDC4D`Pr`m(m|+oQCsF36H;0r7vQf$C%FRrT6zT#+j%R z?`Ayw3kv^%v77OL5u6|68H~3xzL#+x&Q+!SUts)*Tj8_7#IJhBMI%*yDL6}&;_qWz z%=ovAYZ&*#!Jx!1I#-zD&t{yLsqp=baW1QTf0!|y(Ms`8Gp=Vm9tVud{1}%pj?Gf> zf5aH)y2|v&7-Kpv@$guc{s7}$jL$Pp7^B|j;w)Gy|3$_t84rXHmdbyG@dUQ;at=9+a!nA7C8E_$=eajFTrQ`5rV^ z#lIbx+P9wZD~t~@Zee_uandy^e#|_TehK4v#@}STlJOrHZ(%%QqDsG)@nXgY7;j_T z#Q4vQN6c5{jYv@G>3nW#uVTiHjDNs*K%RPk9GLoZ3gf>qUdgzb;@IJ?>%rL&k>~ z|BK@vXMBC4O7F^7`F|Oh(&sXMmEUh*JTOVU-_AG(nD~=EXWYd2CyWOyR{2M#sPwUnrvelG zKE~DjK5vPN|2pGG7!RDP((hwDmvJNG#~8bos`P(lT*`P_s!CtO_(8@mF@BbD<}#HY zXMRimlrp||?=gO! zao-zM{Ns%8U|h6H<#&+r2FAARQ|{LeZE)W!b}z4$GCy<4#wlN)cXUBA7b3Z_!Q&wjPqux_%l|k z{9k5V#CRgcWs;{YjCU~J!8j^gy+6qKZpO`wPcf$7Q$RLid5($?kA=j)Vcf(xeztl~ zzq>&3A7H$h@ma?8j91U${1_i&OuyGa=@;ax_w>6Bgby?J-L7!@T=jlC<2}H{|GkW3 zF>X=)n;1XMIOPtNe&l==KbtYRKBfL*+&539Kft&WnBLP*zYsm|^7~lEb8c4e4>SHD zFqJ>3OvRtRK;h+#p9iM=63W&4%NDBlhZ#S{_!Q&leD&T{q2gCEPGo$LaW>;Ax2X7~ zjGqCf@@g2zEK=`_D^-5qV7!}gpIg=YBaEvUH#2Tz9DAoqU$|Js-^{p?@pi_`mZRox^xGH&+PnVNAchLii=d z^cyUM-(*a`#X|TbWBN@N!vADUzs*9pKlCcmOTW=Vcobv$tro(`jOjO92+v|nzuiK3 zDP#H#7sBO?>9<@6KfsuN(}nQkjOn*s2tUJ^e&dDkPZ`s1y%0Xen11tx@Lw6zZ@&=! zlrjAV3}FYZOK?NKb3^zNiJ{M@C=B(9_cikOX6XHWjOq7e2*1ntaJIrED-{16sZh99%x@7nOoHvCT;9^$f=H%{Uj ze|^$z@8{dz-)h4vZ1L&0h;;ruY{R>3_y;!pnhn2g!)FxsxBom8HXm0Wt}o!a9@h=H zZpO6$mmAkiTsPv%$3^nK2p4|ySS-f11lLkr%Wy5nm4Pc0mj~A@TsPs$#+8F>C9VQo zvvJMAwF;LP*J@mAa24Y!!G+&K7I)w(#Z`{0BN`VXPFLx?4QaD*t-uv7&4q|tgsUUk z%Mgd?r@vUN31h>Wxpb@TUNhHN|DV`ardFiwtF-|uU{w9!QM?hjrzdAx&^rB-?AA$Eacv#Z>rj2)XPa)oZSXxlN274m0UDCf> zQ7QrGpo!x7c=UaO9+yPK>EImN;UF1q?j!nRuNgQC!1{2-m)(Zf!cX-(JO2TZmi za+9WFZ!xxoxAhQ30e=hC)^iHi<>h-6=~-4Pr_tv7vdSuNb+xz1Q&~__m6I2mHTDik zGJ&m|Mc&l~YfF9dGa8StU=2Fz{Cv-Rd;p>8fHrE-N#@c=RiGZby0i2TfwV};AtNPq zj_72Y7U>MpxzkoxRIM+lDzfDvD=7E22P)u&$)*O@YM-1Oz-W)R)LX{f&MisI%3YP1 zm6c(qcAYFr%gI7gXzuq2abT79Wt)_XtgPIUB+!cQ=^&1wc&X7FkbzB#lVFyYSCr%H zwUTuu1*M*n)$(h-phZ)W77-7-m$m!t9}iNg{A~cf9pq2xf)kNuenTKXRqB$mfCS4- zter7jRg>9jUhcE@@pVaW-W}Q-0 z;4A2)u1bS?D=5V0QWg&6CV>UM3Qs{%5!o!JO#}1B)?>p~GWK%Jp4?q!+>OTWNE0Xv z&2KYUTH7{p3-xsmI#C9FcuF5#=2sPzSK~`pd<%z;xB$~evzJnTOA1%1QbEF9gd+^}Z+7#n-iom0W}3{X;x3XJO=j9NA3efpD2c_b*Yj33SE`mNgb?vg#3CjoYMpNYyQG@t1Q}^ zZ2iiDBKZL`KGw!p_Q5|SmM2&wrSk~|_>$XG0=E*4^f~5t_4G|LG}Xd1`bU!;zdJSy zk2T>5o=tEsAE`fN72p!2DV_5AF&F%W;k<)_HPu$OrYjENIE@gsOIck~T53(x87fm( z7nGFRjtS_n%rtyIj&lZlUj7)IE7CUMRz3o%{Z#(}BRC!c%nwlul40EnXyeKHk|Lk& z42UrEoLUW;C?{jERBysAx(~DX6b@5#yOfP?yC_|4uV@dD3ZIkyfqgLBfm)_Z6@YnA z<*m%Zq$f*xRD(JV7S4o-2YPLKjS}hS=nN(8nc>}HjNgt5&!5pA9oqPVLwEdhv+y#u zB=$Qx*gOcwNkZrR*wWd}dscS)|u!Y1!yWd#oP!R)}I47%1L7>{uxN@+#; znx4pwq>Bio{Cbi~G8-u0>f(y3P}RrKYxk@6L|g@YVY(0&1g}n7CMZFALte-g(12w>YUprR8G?;h?G$DAws)qeko``T=kGloyYxfm z1ah=dfjQgtVMS$N8zRERI4Hm4L{yr71EQN6Lx9|Oye9^fA@3&v} zFw%r}=2J?m$sz?TD})!m(?c|Ea<9UYX<3>5j7VD8ww&-N8;06MQ#Fbh`1s1oY%anM z9q7Sjr4ynOhNR#;$^c2}M7{jN(b*9^#A>c?z+*kVU_&>y1~B{li{{~P4G1MtwjtA; zi?E9xeqz-#{Uur_c%9R6))9q^3aRC+GRF#{y&;8}i*(v91fChfX{T*gP~E;0G67RR zK-NRf#X|HJ+s-F)BL?#mhTDbO**p_|AGWoAr}+|Ma1JdN!Fgk!J%-?oHedu5j&?Vb zf!zUGA*Aft$g~Kb+FS<3hq29SKWHkFdyTo$W!WFpN$yyCO$Vky2ZRSR#5yO(LF|-( zYTL#L8U(_o;h%_7V6MoB4y!U1GMk>>M%8ZMO9M14yV>)0|zWo zZ0|M~;>-bUHP~Pguv%eXtk73qbG4-kwO;05%P=LmONozE?Lku1B}OO*|G zeI{i0&+9tN{5?NF(Fd5iA&W=pnt2vBElD2wxgGlR&+s7Ip%|h`O;-v_Gz+tLAN1S~ zzo6(smF$}ddQ=7YEi>+(J(@WQzu)S$?0n<#t~V4;uhuBM)v1{tY~#T{DQLaH*mhR3 z!WJ;Sqsh*r{g{E>Z0?mh1dMI=Rkf~HMeVLhvowHxA*buPcg(0>DXUZVe2>)5VmGo3 zgjC%(a68GA!VSZ0;1i#qb0h0W2n!N|Tgi65cDR~!1S=3WsL(z+e4V}@Ow_Vh&X z`K;xe=nnUvhfRpaS97tGeo0TQ5rMUq<+E-L>}K|G3|TxL-OALqwmQ`+2SH|j-RMx7 zhU%364{gzvI<~cp-~9T#dUAkL?1gEPQh6cPg5^&+hFl8{Tex+;+ehQj>&8@gIP1ne zLWX5DP;CCZ2ZJ89PD3n*%5SgIti7{0#s=NZohp}h(VChv3s~RP)-QtmUR%%&n%4fz zQ>1oQ$wlfBg?(}Hx#fJK0wVY0s$vdHv`aT>-p zhh8xm{aX-nu($^`OXnf*Dqn9_*Jg`zQQgus{+fibnWF20p>;m13CUFmIrLk{oUjQ9 z(}uLKwS}F3fPrOEFLHy1RlW4AMQ$n0?oeKcaknd*Il``d8*{VtjUUsjR!;Bm*46`9 zdG}6PA!Y+T?1Z-Hj^2Ot_GJ6*bZOMHl2rP55|IOIo{|TEfoV zZ&S6&s>Ns0dp*;t={lI>^dR$s&6azwmO&0k~@O<=sX@Cd(^}aYxhvsE1(Va-P&o?bqe_VJJTRj z9nZf#sXO_0yr+qmX?k^B@~=ttYAF`4wrSM%&OmElAnMh6h2v%Jooa?EnIy7{`?b0* z%(~eTVCb05{W**pH)Uk--bx%YxvJEgBJ59+lLH@34S1B281QK7w17uxQvx5Q20Tg+ zjFlc3D=9Is$fV>z`je8=14>Am8YI8eq`-Kofi+D^4U#r32$PZmX-S?MB;M4(GLokT z(wv+cSl?v)yq~?jrX;4~_b@*F^MC{RrSu`V@1{O`-`*D%SG;<9^?|709X)!7WB%gk3-`Qu&)r}8!^;n6e>vg1*S9?T zkKZ1B^tpYHT=CHKudb+Yw+ww`$+;8vP5;wJ!|zO<^k`k5FTUYfv|{z5{OI(h2jlkL zIdJ9mKfbpAuI#}Bca1Ck(>tr1=8EIH8}_IAzPa|Y+vk3B-NUD*JiPd4FT|$p9{u(e z^>4oRpR3=SJLJR-Pxbx%(36X=x%P`!F8lR8%l~8O#J`Qb=~tILa8KM1el=<3v0?xH z%*X$_B|80ECztH6$-nM}r01r6=b~@M{psmdTNY2hv#{yUZ~T1jt*^P7p8L(42cwd1 zH7@<-vo~iywBlz!8u$CWvCF>Fa&7JT=ARF_=JBQX9D3uv&stX9_RjE2AG`1D*gY?H zRR8Y2|8B!&S8iMI#?c3-ef`@%-}=vAe)7PtKP&D3@y(AXj7wgVed#zO`<2@t%F6j= zzcY7FKbZPZ^Dlq+%KEQ-8h^`YPyXhO(&LG~3A3)+xh>(ld;d7^mE7vz-m$J~^Pwdr zbN}PigNsJa{qWI0+_3H5#ZSGk$M@Lmwf}4De=hN?8dUzKr|KEcPro%Gv;T-IHokuQ z)mK0JjR%UR9D8QR=3o5y@+G^gw@qIif5VS<{i)?E&Zkoc-|_U`-~2lH%;Cus-q^8d z+>U=$WDR+v?+;)9-cRnhqcZcwC(k51j{fDl53GLd%xjm$jENm}@a@-64Lde?WBvzU z+x@HUcih-_X7ZsGU;oz(*data); /// Make temporary arrays for each components of Tuple. In the same way as for Nullable. diff --git a/dbms/src/Columns/ColumnNullable.h b/dbms/src/Columns/ColumnNullable.h index 2069f80b42e..d993d918509 100644 --- a/dbms/src/Columns/ColumnNullable.h +++ b/dbms/src/Columns/ColumnNullable.h @@ -31,7 +31,7 @@ using ConstNullMapPtr = const NullMap *; /// over a bitmap because columns are usually stored on disk as compressed /// files. In this regard, using a bitmap instead of a byte map would /// greatly complicate the implementation with little to no benefits. -class ColumnNullable final : public COWPtrHelper +class ColumnNullable final : public COWPtrHelper // nullable 列是怎么形成的,一般是一个普通列,一个伴随 bitmap,这里使用的 byte map 来存的 null mapping 而不是 bits { private: friend class COWPtrHelper; diff --git a/dbms/src/Columns/ColumnsCommon.cpp b/dbms/src/Columns/ColumnsCommon.cpp index e969dc99842..9307587ce6c 100644 --- a/dbms/src/Columns/ColumnsCommon.cpp +++ b/dbms/src/Columns/ColumnsCommon.cpp @@ -291,7 +291,7 @@ void filterArraysImplGeneric( while (filt_pos < filt_end) { - if (*filt_pos) + if (*filt_pos) // 如果是 0 的话,说名该列该行被 filter 了 copy_array(offsets_pos); ++filt_pos; diff --git a/dbms/src/Common/COWPtr.h b/dbms/src/Common/COWPtr.h index 1f6bb8dacbb..b4d39620287 100644 --- a/dbms/src/Common/COWPtr.h +++ b/dbms/src/Common/COWPtr.h @@ -105,6 +105,7 @@ class COWPtr : public boost::intrusive_ref_counter T && operator*() const && { return const_cast::type &&>(*boost::intrusive_ptr::get()); } }; + // 这个地方,COWPtr 继承 counter 之后就自带了 ref count 和 add, release 函数。所以私有类实力化到 T 之后 = IntrusivePtr,里面调用的 add, release 函数就有了,其都是操作 T 继承的 ref count 来操作的 protected: template class mutable_ptr : public IntrusivePtr // NOLINT(readability-identifier-naming) diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h index 2c857b9bc1b..3bc3ab5e56c 100644 --- a/dbms/src/Common/HashTable/HashTable.h +++ b/dbms/src/Common/HashTable/HashTable.h @@ -449,7 +449,7 @@ class HashTable : private boost::noncopyable { while (!buf[place_value].isZero(*this) && !buf[place_value].keyEquals(x, hash_value, *this)) { - place_value = grower.next(place_value); + place_value = grower.next(place_value); // closed hash,线性开放地址寻址法 #ifdef DBMS_HASH_MAP_COUNT_COLLISIONS ++collisions; #endif @@ -694,7 +694,7 @@ class HashTable : private boost::noncopyable * HashMap completely, change all its users to the existing internal * iteration interface, and redefine end() to return LookupResult for * compatibility with std find(). Unfortunately, now is not the time to - * do this. + * do this. // 隐式类型转换操作符 */ operator Cell *() const { return nullptr; } // NOLINT(google-explicit-constructor) }; diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h index caf2f1cf300..bf0ec4a9f65 100644 --- a/dbms/src/Common/TiFlashMetrics.h +++ b/dbms/src/Common/TiFlashMetrics.h @@ -61,7 +61,8 @@ namespace DB F(type_limit, {"type", "limit"}), F(type_join, {"type", "join"}), F(type_exchange_sender, {"type", "exchange_sender"}), \ F(type_exchange_receiver, {"type", "exchange_receiver"}), F(type_projection, {"type", "projection"}), \ F(type_partition_ts, {"type", "partition_table_scan"}), \ - F(type_window, {"type", "window"}), F(type_window_sort, {"type", "window_sort"})) \ + F(type_window, {"type", "window"}), F(type_window_sort, {"type", "window_sort"}), \ + F(type_repeat_source, {"type", "repeat_source"})) \ M(tiflash_coprocessor_request_duration_seconds, "Bucketed histogram of request duration", Histogram, \ F(type_cop, {{"type", "cop"}}, ExpBuckets{0.001, 2, 20}), \ F(type_batch, {{"type", "batch"}}, ExpBuckets{0.001, 2, 20}), \ diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index 0d337d6d3e2..3463c47c1bc 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -27,7 +27,7 @@ namespace DB { -/** Container for set of columns for bunch of rows in memory. +/** Container for set of columns for bunch of rows in memory. // 怎么区分这里 rows 的大小呢? * This is unit of data processing. * Also contains metadata - data types of columns and their names * (either original names from a table, or generated names during temporary calculations). @@ -39,6 +39,7 @@ class Context; class Block { private: + // 多列的一个数据 using Container = ColumnsWithTypeAndName; using IndexByName = std::map; diff --git a/dbms/src/Core/ColumnWithTypeAndName.h b/dbms/src/Core/ColumnWithTypeAndName.h index 42a98f795fd..30c4fe8c546 100644 --- a/dbms/src/Core/ColumnWithTypeAndName.h +++ b/dbms/src/Core/ColumnWithTypeAndName.h @@ -32,6 +32,8 @@ class WriteBuffer; struct ColumnWithTypeAndName { + // column 继承子 intrusive 实现 share ptr 功能,同归继承类的两个实现,mutable ptr 和 immutable ptr 可以相互转化 + // columnPtr 是一个基类指针 ColumnPtr column; DataTypePtr type; String name; diff --git a/dbms/src/Core/ColumnsWithTypeAndName.h b/dbms/src/Core/ColumnsWithTypeAndName.h index 61c77cf161e..e7741bbb71e 100644 --- a/dbms/src/Core/ColumnsWithTypeAndName.h +++ b/dbms/src/Core/ColumnsWithTypeAndName.h @@ -21,6 +21,7 @@ namespace DB { +// 这里是一个多列组合的数据 using ColumnsWithTypeAndName = std::vector; } diff --git a/dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp b/dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp new file mode 100644 index 00000000000..f5075f9c87d --- /dev/null +++ b/dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp @@ -0,0 +1,34 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +namespace DB +{ +Block RepeatSourceBlockInputStream::readImpl() +{ + Block block = children.back()->read(); + if (!block) + return block; + repeat_source_actions->execute(block); + return block; +} + +Block RepeatSourceBlockInputStream::getHeader() const +{ + Block res = children.back()->getHeader(); + repeat_source_actions->execute(res); + return res; +} + +} // namespace DB diff --git a/dbms/src/DataStreams/RepeatSourceBlockInputStream.h b/dbms/src/DataStreams/RepeatSourceBlockInputStream.h new file mode 100644 index 00000000000..eaa223ef824 --- /dev/null +++ b/dbms/src/DataStreams/RepeatSourceBlockInputStream.h @@ -0,0 +1,45 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include +#include +#include + +namespace DB +{ +class RepeatSourceBlockInputStream : public IProfilingBlockInputStream +{ + static constexpr auto NAME = "RepeatSource"; + +public: + RepeatSourceBlockInputStream( + const BlockInputStreamPtr & input, + ExpressionActionsPtr repeat_source_actions_) + : repeat_source_actions(repeat_source_actions_) + { + children.push_back(input); + } + String getName() const override { return NAME; } + Block getHeader() const override; + +protected: + Block readImpl() override; + +private: + ExpressionActionsPtr repeat_source_actions; +}; + +} // namespace DB + diff --git a/dbms/src/DataStreams/SquashingTransform.cpp b/dbms/src/DataStreams/SquashingTransform.cpp index d018deaed96..1488b688d27 100644 --- a/dbms/src/DataStreams/SquashingTransform.cpp +++ b/dbms/src/DataStreams/SquashingTransform.cpp @@ -60,7 +60,7 @@ SquashingTransform::Result SquashingTransform::add(Block && block) return Result(std::move(block)); } - append(std::move(block)); + append(std::move(block)); // 攒批 accumulated_block_rows = accumulated_block.rows(); accumulated_block_bytes = accumulated_block.bytes(); @@ -93,10 +93,15 @@ void SquashingTransform::append(Block && block) { MutableColumnPtr mutable_column = (*std::move(accumulated_block.getByPosition(i).column)).mutate(); mutable_column->insertRangeFrom(*block.getByPosition(i).column, 0, rows); - accumulated_block.getByPosition(i).column = std::move(mutable_column); + accumulated_block.getByPosition(i).column = std::move(mutable_column); // column 中的 append 值操作 } } +// 我们可能需要用一个高效的复制行操作,repeatSource 算子首先是 append additional column,然后对于原来的 block 的数据进行 +// 多重 n 复制,每重复制上,修改 block 中特定非 target 列的其他 grouping set column 为 null 值,并且设置 grouping ID +// 列为常量 n. +// +// sample_block bool SquashingTransform::isEnoughSize(size_t rows, size_t bytes) const { diff --git a/dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp b/dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp new file mode 100644 index 00000000000..7633c347282 --- /dev/null +++ b/dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp @@ -0,0 +1,67 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +namespace DB::mock +{ + +bool RepeatSourceBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeRepeatSource); + tipb_executor->set_executor_id(name); + tipb::RepeatSource * repeat_source = tipb_executor->mutable_repeat_source(); + for (const auto & grouping_set : grouping_sets_columns) + { + auto * gss = repeat_source->add_grouping_sets(); + for (const auto & grouping_exprs : grouping_set) + { + auto * ges = gss->add_grouping_exprs(); + for (const auto & grouping_col : grouping_exprs) + { + tipb::Expr* add_column = ges->add_grouping_expr(); + astToPB(children[0]->output_schema, grouping_col, add_column, collator_id, context); // ast column ref change to tipb:Expr column ref + } + } + } + auto * children_executor = repeat_source->mutable_child(); + return children[0]->toTiPBExecutor(children_executor, collator_id, mpp_info, context); +} + +ExecutorBinderPtr compileRepeat(ExecutorBinderPtr input, size_t & executor_index, MockVVecGroupingNameVec grouping_set_columns, std::set in_set) +{ + DAGSchema output_schema; + for (const auto & field : input->output_schema) + { + // if the column is in the grouping sets, make it nullable. + if (in_set.find(field.first) != in_set.end() && field.second.hasNotNullFlag()) + output_schema.push_back(toNullableDAGColumnInfo(field)); + else + output_schema.push_back(field); + } + { + tipb::FieldType field_type{}; + field_type.set_tp(TiDB::TypeLongLong); + field_type.set_charset("binary"); + field_type.set_collate(TiDB::ITiDBCollator::BINARY); + field_type.set_flag(0); + field_type.set_flen(-1); + field_type.set_decimal(-1); + output_schema.push_back(std::make_pair("groupingID", TiDB::fieldTypeToColumnInfo(field_type))); + } + ExecutorBinderPtr repeat_source = std::make_shared(executor_index, output_schema, std::move(grouping_set_columns)); + repeat_source->children.push_back(input); + return repeat_source; +} +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/RepeatSourceBinder.h b/dbms/src/Debug/MockExecutor/RepeatSourceBinder.h new file mode 100644 index 00000000000..473393221cf --- /dev/null +++ b/dbms/src/Debug/MockExecutor/RepeatSourceBinder.h @@ -0,0 +1,42 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + + +namespace DB::mock +{ +using MockGroupingNameVec = std::vector; +using MockVecGroupingNameVec = std::vector; +using MockVVecGroupingNameVec = std::vector; + +class RepeatSourceBinder : public ExecutorBinder +{ +public: + RepeatSourceBinder(size_t & index_, const DAGSchema & output_schema_, MockVVecGroupingNameVec gss) + : ExecutorBinder(index_, "repeat_source" + std::to_string(index_), output_schema_) + , grouping_sets_columns(gss) + {} + + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + + void columnPrune(std::unordered_set &) override { throw Exception("Should not reach here"); } + +private: + // for now, every grouping set is base columns list, modify structure to be one more nested if grouping set merge is enabled. + MockVVecGroupingNameVec grouping_sets_columns; +}; + +ExecutorBinderPtr compileRepeat(ExecutorBinderPtr input, size_t & executor_index, MockVVecGroupingNameVec grouping_set_columns, std::set set); +} // namespace DB::mock diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index ec0728bbf58..01315929ff3 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -668,6 +669,7 @@ String DAGExpressionAnalyzer::applyFunction( const TiDB::TiDBCollatorPtr & collator) { String result_name = genFuncString(func_name, arg_names, {collator}); + // 啊这个好!可以避免相同表达式的重复计算 if (actions->getSampleBlock().has(result_name)) return result_name; const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get(func_name, context); @@ -804,6 +806,55 @@ NamesAndTypes DAGExpressionAnalyzer::buildOrderColumns( return order_columns; } +std::shared_ptr DAGExpressionAnalyzer::buildRepeatGroupingColumns( + const tipb::RepeatSource & repeatSource, const ExpressionActionsPtr & actions) +{ + GroupingSets group_sets_columns; + group_sets_columns.reserve(repeatSource.grouping_sets().size()); + for (const auto& group_set : repeatSource.grouping_sets()){ + GroupingSet group_set_columns; + group_set_columns.reserve(group_set.grouping_exprs().size()); + for (const auto &group_exprs : group_set.grouping_exprs()) { + GroupingColumnNames group_exprs_columns; + group_exprs_columns.reserve(group_exprs.grouping_expr().size()); + for (const auto& group_expr : group_exprs.grouping_expr()){ + if (group_expr.tp() != tipb::ColumnRef){ + throw TiFlashException("grouping sets expression should be column expr", Errors::Coprocessor::BadRequest); + } + String cp_name = getActions(group_expr, actions); + // tidb expression computation is based on column index offset child's chunk schema, change to ck block column name here. + group_exprs_columns.emplace_back(cp_name); + } + // move here, cause basic string is copied from input cols. + group_set_columns.emplace_back(std::move(group_exprs_columns)); + } + group_sets_columns.emplace_back(std::move(group_set_columns)); + } + return Repeat::sharedRepeat(group_sets_columns); +} + +ExpressionActionsPtr DAGExpressionAnalyzer::appendRepeatSource( + const tipb::RepeatSource & repeatSource, ExpressionActionsChain & chain) +{ + auto & last_step = initAndGetLastStep(chain); + auto shared_repeat = buildRepeatGroupingColumns(repeatSource, last_step.actions); + last_step.actions->add(ExpressionAction::repeatSource(shared_repeat)); + for (const auto &origin_col : last_step.actions->getSampleBlock().getNamesAndTypesList()) + { + last_step.required_output.push_back(origin_col.name); + } + // an added column from REPEAT action. + source_columns.emplace_back(Repeat::grouping_identifier_column_name, Repeat::grouping_identifier_column_type); + auto before_repeat_source = chain.getLastActions(); + chain.finalize(); + chain.clear(); + + auto & after_repeat_step = initAndGetLastStep(chain); + for (const auto & column : getCurrentInputColumns()) + after_repeat_step.required_output.push_back(column.name); + return before_repeat_source; +} + std::vector DAGExpressionAnalyzer::appendOrderBy( ExpressionActionsChain & chain, const tipb::TopN & topN) @@ -924,7 +975,7 @@ std::pair DAGExpressionAnalyzer::buildJoinKey( for (int i = 0; i < keys.size(); ++i) { const auto & key = keys.at(i); - bool has_actions = key.tp() != tipb::ExprType::ColumnRef; + bool has_actions = key.tp() != tipb::ExprType::ColumnRef; // join key 如果不是 column ref 说明是有前序动作帮我把表达式给准备成列 String key_name = getActions(key, actions); DataTypePtr current_type = actions->getSampleBlock().getByName(key_name).type; @@ -989,6 +1040,7 @@ bool DAGExpressionAnalyzer::appendJoinKeyAndJoinFilters( ExpressionActionsPtr actions = chain.getLastActions(); bool ret = false; + // build join keys,ck 只输出一个 key,需要 copy 一份,如果是表达式,还需要 append scalar 的 action std::tie(ret, key_names) = buildJoinKey(actions, keys, join_key_types, left, is_right_out_join); if (!filters.empty()) @@ -997,7 +1049,7 @@ bool DAGExpressionAnalyzer::appendJoinKeyAndJoinFilters( std::vector filter_vector; for (const auto & c : filters) filter_vector.push_back(&c); - filter_column_name = appendWhere(chain, filter_vector); + filter_column_name = appendWhere(chain, filter_vector); // 构建了 filter 输出的列 } /// remove useless columns to avoid duplicate columns /// as when compiling the key/filter expression, the origin @@ -1017,18 +1069,18 @@ bool DAGExpressionAnalyzer::appendJoinKeyAndJoinFilters( if (ret) { std::unordered_set needed_columns; - for (const auto & c : getCurrentInputColumns()) + for (const auto & c : getCurrentInputColumns()) // 当前进来的列都要 needed_columns.insert(c.name); - for (const auto & s : key_names) + for (const auto & s : key_names) // 当前怎加的 key col 也要 needed_columns.insert(s); - if (!filter_column_name.empty()) + if (!filter_column_name.empty()) // 当前添加的一侧 filter 的 col 也要 needed_columns.insert(filter_column_name); const auto & names = actions->getSampleBlock().getNames(); for (const auto & name : names) { if (needed_columns.find(name) == needed_columns.end()) - actions->add(ExpressionAction::removeColumn(name)); + actions->add(ExpressionAction::removeColumn(name)); // 增加后续的 action,裁剪掉不要一些 column 列 (这些 immediate 列的最后的结果已经被我 record 了) } } return ret; @@ -1392,9 +1444,12 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, const Expressi if (isLiteralExpr(expr)) { Field value = decodeLiteral(expr); + // 主要对 decimal DataTypePtr flash_type = applyVisitor(FieldToDataType(), value); DataTypePtr target_type = inferDataType4Literal(expr); + // 表达式的 uniuqe name ret = exprToString(expr, getCurrentInputColumns()) + "_" + target_type->getName(); + // 表达式如果有这个名字,说明有这列 if (!actions->getSampleBlock().has(ret)) { ColumnWithTypeAndName column; @@ -1415,10 +1470,12 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, const Expressi } else if (isColumnExpr(expr)) { + // 如果是 column ref,直接从 stream input column 里面拿到 name ret = getColumnNameForColumnExpr(expr, getCurrentInputColumns()); } else if (isScalarFunctionExpr(expr)) { + // 根据 expr 构造 function 加入到 actions 里面 ret = DAGExpressionAnalyzerHelper::buildFunction(this, expr, actions); } else diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h index 79b9880ae1a..f1012df4646 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h @@ -71,6 +71,10 @@ class DAGExpressionAnalyzer : private boost::noncopyable ExpressionActionsChain & chain, const std::vector & conditions); + std::shared_ptr buildRepeatGroupingColumns(const tipb::RepeatSource & repeatSource, const ExpressionActionsPtr & actions); + + ExpressionActionsPtr appendRepeatSource(const tipb::RepeatSource & repeatSource, ExpressionActionsChain & chain); + NamesAndTypes buildWindowOrderColumns(const tipb::Sort & window_sort) const; std::vector appendOrderBy( diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp index b45ade0f7d2..bc805d615c0 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp @@ -150,11 +150,12 @@ String DAGExpressionAnalyzerHelper::buildInFunction( DataTypePtr type = inferDataType4Literal(child); argument_types.push_back(type); } + // find common type DataTypePtr resolved_type = getLeastSupertype(argument_types); if (!removeNullable(resolved_type)->equals(*removeNullable(argument_types[0]))) { // Need cast left argument - key_name = analyzer->appendCast(resolved_type, actions, key_name); + key_name = analyzer->appendCast(resolved_type, actions, key_name); // 对于孩子的输出来说,需要 cast } analyzer->makeExplicitSet(expr, sample_block, false, key_name); argument_names.push_back(key_name); @@ -401,6 +402,7 @@ String DAGExpressionAnalyzerHelper::buildRegexpFunction( return analyzer->applyFunction(func_name, argument_names, actions, collator); } +// case when 函数应该走这里 String DAGExpressionAnalyzerHelper::buildDefaultFunction( DAGExpressionAnalyzer * analyzer, const tipb::Expr & expr, @@ -410,8 +412,9 @@ String DAGExpressionAnalyzerHelper::buildDefaultFunction( Names argument_names; for (const auto & child : expr.children()) { + // 函数参数如果还是函数的,这里需要递归生成多个 actions(深度优先) String name = analyzer->getActions(child, actions); - argument_names.push_back(name); + argument_names.push_back(name); // 拿到孩子的函数输出之后,再将其作为参数 } return analyzer->applyFunction(func_name, argument_names, actions, getCollatorFromExpr(expr)); } diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp index 206b59f38e1..9a4a353eeb5 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp @@ -46,6 +46,7 @@ bool isSourceNode(const tipb::Executor * root) const static String SOURCE_NAME("source"); const static String SEL_NAME("selection"); const static String AGG_NAME("aggregation"); +const static String REPEAT_NAME("repeat_source"); const static String WINDOW_NAME("window"); const static String WINDOW_SORT_NAME("window_sort"); const static String HAVING_NAME("having"); @@ -96,6 +97,12 @@ DAGQueryBlock::DAGQueryBlock(const tipb::Executor & root_, QueryBlockIDGenerator } current = ¤t->selection().child(); break; + case tipb::ExecType::TypeRepeatSource: + GET_METRIC(tiflash_coprocessor_executor_count, type_repeat_source).Increment(); + assignOrThrowException(&repeat_source, current, REPEAT_NAME); + repeat_source_name = current->executor_id(); + current = ¤t->repeat_source().child(); // 非叶节点,继续孩子递归下去 + break; case tipb::ExecType::TypeStreamAgg: RUNTIME_CHECK_MSG(current->aggregation().group_by_size() == 0, STREAM_AGG_ERROR); case tipb::ExecType::TypeAggregation: @@ -134,6 +141,7 @@ DAGQueryBlock::DAGQueryBlock(const tipb::Executor & root_, QueryBlockIDGenerator assignOrThrowException(&source, current, SOURCE_NAME); source_name = current->executor_id(); + // source 节点, if (current->tp() == tipb::ExecType::TypeJoin) { if (source->join().children_size() != 2) diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlock.h b/dbms/src/Flash/Coprocessor/DAGQueryBlock.h index 297a679d4e9..d18ac84fd90 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlock.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlock.h @@ -60,12 +60,14 @@ class DAGQueryBlock String having_name; const tipb::Executor * limit_or_topn = nullptr; String limit_or_topn_name; + const tipb::Executor * repeat_source = nullptr; // repeat source node can only be before sender + String repeat_source_name; const tipb::Executor * exchange_sender = nullptr; String exchange_sender_name; UInt32 id; const tipb::Executor * root; String qb_column_prefix; - std::vector> children; + std::vector> children; // 这里的 children 是每个 dag 算子构造好之后传入的吗 bool can_restore_pipeline_concurrency = true; bool isRootQueryBlock() const { return id == 1; }; diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index e16b711c8f1..3e4cb641f97 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -75,6 +76,7 @@ struct AnalysisResult ExpressionActionsPtr before_having; ExpressionActionsPtr before_order_and_select; ExpressionActionsPtr final_projection; + ExpressionActionsPtr before_repeat_source; String filter_column_name; String having_column_name; @@ -131,6 +133,11 @@ AnalysisResult analyzeExpressions( if (query_block.limit_or_topn && query_block.limit_or_topn->tp() == tipb::ExecType::TypeTopN) { res.order_columns = analyzer.appendOrderBy(chain, query_block.limit_or_topn->topn()); + chain.addStep(); + } + + if (query_block.repeat_source) { + res.before_repeat_source = analyzer.appendRepeatSource(query_block.repeat_source->repeat_source(), chain); } const auto & dag_context = *context.getDAGContext(); @@ -237,6 +244,7 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline & bool is_tiflash_right_join = tiflash_join.isTiFlashRightJoin(); // prepare probe side + // 准备 join 的 probe 端,主要是 append join key 和 filter expr 的 action 的加入 auto [probe_side_prepare_actions, probe_key_names, probe_filter_column_name] = JoinInterpreterHelper::prepareJoin( context, probe_pipeline.firstStream()->getHeader(), @@ -245,9 +253,10 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline & true, is_tiflash_right_join, tiflash_join.getProbeConditions()); - RUNTIME_ASSERT(probe_side_prepare_actions, log, "probe_side_prepare_actions cannot be nullptr"); + RUNTIME_ASSERT(probe_side_prepare_actions, log, "probe_side_prepare_actions cannot be nullptr"); // 有 init 之后至少都有一个 // prepare build side + // 这里的调用函数是同上的一个入口,所以做的事情是如出一辙的 auto [build_side_prepare_actions, build_key_names, build_filter_column_name] = JoinInterpreterHelper::prepareJoin( context, build_pipeline.firstStream()->getHeader(), @@ -258,14 +267,15 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline & tiflash_join.getBuildConditions()); RUNTIME_ASSERT(build_side_prepare_actions, log, "build_side_prepare_actions cannot be nullptr"); + // 对 other condition 和 other eq condition 做了一些 where 的 col append auto [other_condition_expr, other_filter_column_name, other_eq_filter_from_in_column_name] = tiflash_join.genJoinOtherConditionAction(context, left_input_header, right_input_header, probe_side_prepare_actions); const Settings & settings = context.getSettingsRef(); - size_t max_block_size_for_cross_join = settings.max_block_size; + size_t max_block_size_for_cross_join = settings.max_block_size; // 如果 repeat 的结果数量超过 max 控制怎么办? fiu_do_on(FailPoints::minimum_block_size_for_cross_join, { max_block_size_for_cross_join = 1; }); - JoinPtr join_ptr = std::make_shared( + JoinPtr join_ptr = std::make_shared( // make join probe_key_names, build_key_names, tiflash_join.kind, @@ -471,7 +481,7 @@ void DAGQueryBlockInterpreter::recordProfileStreams(DAGPipeline & pipeline, cons void DAGQueryBlockInterpreter::handleExchangeReceiver(DAGPipeline & pipeline) { - auto exchange_receiver = dagContext().getMPPExchangeReceiver(query_block.source_name); + auto exchange_receiver = dagContext().getMPPExchangeReceiver(query_block.source_name); // 从注册的 exchanger 中拿到 source 源 if (unlikely(exchange_receiver == nullptr)) throw Exception("Can not find exchange receiver for " + query_block.source_name, ErrorCodes::LOGICAL_ERROR); // todo choose a more reasonable stream number @@ -494,14 +504,14 @@ void DAGQueryBlockInterpreter::handleExchangeReceiver(DAGPipeline & pipeline) /*stream_id=*/enable_fine_grained_shuffle ? i : 0); exchange_receiver_io_input_streams.push_back(stream); stream->setExtraInfo(extra_info); - pipeline.streams.push_back(stream); + pipeline.streams.push_back(stream); // 每个 pipeline 底层的输入流 } NamesAndTypes source_columns; for (const auto & col : pipeline.firstStream()->getHeader()) { source_columns.emplace_back(col.name, col.type); } - analyzer = std::make_unique(std::move(source_columns), context); + analyzer = std::make_unique(std::move(source_columns), context); // 这里初始化了 analyzer } // for tests, we need to mock ExchangeReceiver blockInputStream as the source stream. @@ -517,7 +527,7 @@ void DAGQueryBlockInterpreter::handleProjection(DAGPipeline & pipeline, const ti { NamesAndTypes input_columns; pipeline.streams = input_streams_vec[0]; - for (auto const & p : pipeline.firstStream()->getHeader().getNamesAndTypesList()) + for (auto const & p : pipeline.firstStream()->getHeader().getNamesAndTypesList()) // 初始的的 block column name input_columns.emplace_back(p.name, p.type); DAGExpressionAnalyzer dag_analyzer(std::move(input_columns), context); ExpressionActionsChain chain; @@ -527,12 +537,12 @@ void DAGQueryBlockInterpreter::handleProjection(DAGPipeline & pipeline, const ti UniqueNameGenerator unique_name_generator; for (const auto & expr : projection.exprs()) { - auto expr_name = dag_analyzer.getActions(expr, last_step.actions); - last_step.required_output.emplace_back(expr_name); + auto expr_name = dag_analyzer.getActions(expr, last_step.actions); // 添加 expr 产生的额外列 + last_step.required_output.emplace_back(expr_name); // 加到这个 step 的最后输出列里面 const auto & col = last_step.actions->getSampleBlock().getByName(expr_name); String alias = unique_name_generator.toUniqueName(col.name); output_columns.emplace_back(alias, col.type); - project_cols.emplace_back(col.name, alias); + project_cols.emplace_back(col.name, alias); // 我只要保证当前 projection 输出列中不含有重复的列名就行了 } executeExpression(pipeline, chain.getLastActions(), log, "before projection"); executeProject(pipeline, project_cols, "projection"); @@ -570,18 +580,18 @@ void DAGQueryBlockInterpreter::handleWindowOrder(DAGPipeline & pipeline, const t } // To execute a query block, you have to: -// 1. generate the date stream and push it to pipeline. +// 1. generate the data stream and push it to pipeline. // 2. assign the analyzer // 3. construct a final projection, even if it's not necessary. just construct it. // Talking about projection, it has the following rules. // 1. if the query block does not contain agg, then the final project is the same as the source Executor -// 2. if the query block contains agg, then the final project is the same as agg Executor +// 2. if the query block contains agg/repeat, then the final project is the same as agg/repeat Executor // 3. if the cop task may contains more then 1 query block, and the current query block is not the root // query block, then the project should add an alias for each column that needs to be projected, something // like final_project.emplace_back(col.name, query_block.qb_column_prefix + col.name); void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) { - if (query_block.source->tp() == tipb::ExecType::TypeJoin) + if (query_block.source->tp() == tipb::ExecType::TypeJoin) // 看底层的 source 算子来源是什么 { SubqueryForSet right_query; handleJoin(query_block.source->join(), pipeline, right_query, query_block.source->fine_grained_shuffle_stream_count()); @@ -632,6 +642,7 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) Errors::Coprocessor::BadRequest); } + // analyzer 是这里用的, analyzer 先拿到最基础的 source column base,然后在来分析 query block 非叶节点的上层各个算子 auto res = analyzeExpressions( context, *analyzer, @@ -676,14 +687,25 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) recordProfileStreams(pipeline, query_block.limit_or_topn_name); } - // execute final project action - executeProject(pipeline, final_project, "final projection"); // execute limit if (query_block.limit_or_topn && query_block.limit_or_topn->tp() == tipb::TypeLimit) { executeLimit(pipeline); recordProfileStreams(pipeline, query_block.limit_or_topn_name); } + + // execute the repeat source OP after all filter/limits and so on. + // since repeat source OP has some row replication work to do, place it after limit can reduce some unnecessary burden. + // and put it before the final projection, because we should recognize some base col as grouping set col before change their alias. + if (res.before_repeat_source) + { + executeRepeatSource(pipeline, res.before_repeat_source); + recordProfileStreams(pipeline, query_block.repeat_source_name); + } + + // execute final project action + executeProject(pipeline, final_project, "final projection"); + restorePipelineConcurrency(pipeline); // execute exchange_sender @@ -724,6 +746,13 @@ void DAGQueryBlockInterpreter::executeLimit(DAGPipeline & pipeline) } } +void DAGQueryBlockInterpreter::executeRepeatSource(DAGPipeline & pipeline, const ExpressionActionsPtr & expr) +{ + pipeline.transform([&](auto &stream) { + stream = std::make_shared(stream, expr); + }); +} + void DAGQueryBlockInterpreter::handleExchangeSender(DAGPipeline & pipeline) { RUNTIME_ASSERT(dagContext().isMPPTask() && dagContext().tunnel_set != nullptr, log, "exchange_sender only run in MPP"); diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h index d2657b5c67a..8b4746bb6a2 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h @@ -69,6 +69,7 @@ class DAGQueryBlockInterpreter void executeWindowOrder(DAGPipeline & pipeline, SortDescription sort_desc, bool enable_fine_grained_shuffle); void executeOrder(DAGPipeline & pipeline, const NamesAndTypes & order_columns); void executeLimit(DAGPipeline & pipeline); + void executeRepeatSource(DAGPipeline & pipeline, const ExpressionActionsPtr & expr); void executeWindow( DAGPipeline & pipeline, WindowDescription & window_description, diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 83563c47338..c74b52ed77d 100755 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -1131,7 +1131,7 @@ Field decodeLiteral(const tipb::Expr & expr) } } -String getColumnNameForColumnExpr(const tipb::Expr & expr, const std::vector & input_col) +String getColumnNameForColumnExpr(const tipb::Expr & expr, const std::vector & input_col) // 这个是将 expr 中的 index 顺序转成 vector 向量中的 column name { auto column_index = decodeDAGInt64(expr.val()); if (column_index < 0 || column_index >= static_cast(input_col.size())) diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index 61249f19642..113602a1d82 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -55,7 +55,7 @@ BlockInputStreams InterpreterDAG::executeQueryBlock(DAGQueryBlock & query_block) } DAGQueryBlockInterpreter query_block_interpreter( context, - input_streams_vec, + input_streams_vec, // 底层 DAG 的输入源 query_block, max_streams); return query_block_interpreter.execute(); diff --git a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp index d2e18a36e00..1e2b102d0c6 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp @@ -89,7 +89,7 @@ void executeExpression( { if (expr_actions && !expr_actions->getActions().empty()) { - pipeline.transform([&](auto & stream) { + pipeline.transform([&](auto & stream) { // 数据流变了,用 ExpressionBlockInputStream 包了一下 stream = std::make_shared(stream, expr_actions, log->identifier()); stream->setExtraInfo(extra_info); }); diff --git a/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp b/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp index 275042fddb0..6c876078d10 100644 --- a/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp +++ b/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp @@ -179,7 +179,7 @@ std::tuple doGenJoinOtherConditionAction( if (join.other_conditions_size() == 0 && join.other_eq_conditions_from_in_size() == 0) return {nullptr, "", ""}; - DAGExpressionAnalyzer dag_analyzer(source_columns, context); + DAGExpressionAnalyzer dag_analyzer(source_columns, context); // 新开了一个 dag analyzer ExpressionActionsChain chain; String filter_column_for_other_condition; @@ -190,7 +190,7 @@ std::tuple doGenJoinOtherConditionAction( { condition_vector.push_back(&c); } - filter_column_for_other_condition = dag_analyzer.appendWhere(chain, condition_vector); + filter_column_for_other_condition = dag_analyzer.appendWhere(chain, condition_vector); // other filter 不会对已经有点 schema 造成影响 } String filter_column_for_other_eq_condition; @@ -201,7 +201,7 @@ std::tuple doGenJoinOtherConditionAction( { condition_vector.push_back(&c); } - filter_column_for_other_eq_condition = dag_analyzer.appendWhere(chain, condition_vector); + filter_column_for_other_eq_condition = dag_analyzer.appendWhere(chain, condition_vector); // other eq filter 不会对已经有点 schema 造成影响 } return {chain.getLastActions(), std::move(filter_column_for_other_condition), std::move(filter_column_for_other_eq_condition)}; @@ -230,7 +230,7 @@ String TiFlashJoin::genMatchHelperName(const Block & header1, const Block & head { match_helper_name = fmt::format("{}{}", Join::match_helper_prefix, ++i); } - return match_helper_name; + return match_helper_name; //一个 unique name } NamesAndTypes TiFlashJoin::genColumnsForOtherJoinFilter( @@ -248,7 +248,8 @@ NamesAndTypes TiFlashJoin::genColumnsForOtherJoinFilter( } return true; }; - if (unlikely(!is_prepare_actions_valid(build_side_index == 1 ? left_input_header : right_input_header, probe_prepare_join_actions))) + // assert 一下 probe side original block 都能在 probe actions 中找到 + if (unlikely(!is_prepare_actions_valid(build_side_index == 1 ? left_input_header : right_input_header, probe_prepare_join_actions))) // 传参数也能三元运算吗 { throw TiFlashException("probe_prepare_join_actions isn't valid", Errors::Coprocessor::Internal); } @@ -295,9 +296,9 @@ NamesAndTypes TiFlashJoin::genColumnsForOtherJoinFilter( bool make_nullable = build_side_index == 1 ? join.join_type() == tipb::JoinType::TypeRightOuterJoin : join.join_type() == tipb::JoinType::TypeLeftOuterJoin; - append_new_columns(probe_prepare_join_actions->getSampleBlock(), make_nullable); + append_new_columns(probe_prepare_join_actions->getSampleBlock(), make_nullable); // probe side 产生的新 column 需要 append - return columns_for_other_join_filter; + return columns_for_other_join_filter; // 需要根据 probe 侧函数的 1-0 来顺势填 null,但是如果势 build 侧的函数 1-0 直接会被过滤护着忽略 } NamesAndTypes TiFlashJoin::genJoinOutputColumns( @@ -334,13 +335,14 @@ std::tuple TiFlashJoin::genJoinOtherCondit const Block & right_input_header, const ExpressionActionsPtr & probe_side_prepare_join) const { + // append 左右的 original col 和 probe side 生成的 col auto columns_for_other_join_filter = genColumnsForOtherJoinFilter( left_input_header, right_input_header, probe_side_prepare_join); - return doGenJoinOtherConditionAction(context, join, columns_for_other_join_filter); + return doGenJoinOtherConditionAction(context, join, columns_for_other_join_filter); // 再根据 other condition 生成新 action (列) } std::tuple prepareJoin( @@ -359,6 +361,7 @@ std::tuple prepareJoin( ExpressionActionsChain chain; Names key_names; String filter_column_name; + // 名副其实,append join key and 一侧的 join filter dag_analyzer.appendJoinKeyAndJoinFilters(chain, keys, join_key_types, key_names, left, is_right_out_join, filters, filter_column_name); return {chain.getLastActions(), std::move(key_names), std::move(filter_column_name)}; } diff --git a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp index 86a5edc7406..d2536255a76 100644 --- a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp +++ b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp @@ -99,6 +99,35 @@ bool collectForTableScan(std::vector & output_field_types, cons return false; } +bool collectForRepeat(std::vector &out_field_types, const tipb::Executor & executor) +{ + + auto &out_child_fields = out_field_types; + // collect output_field_types of children + getChildren(executor).forEach([&out_child_fields](const tipb::Executor & child) { + traverseExecutorTree(child, [&out_child_fields](const tipb::Executor & e) { return collectForExecutor(out_child_fields, e); }); + }); + +// executor.repeat_source().grouping_sets().Get(1).grouping_exprs().Get(1).grouping_expr().Get(1). +// /// the type of grouping set column is always nullable +// auto updated_field_type = field_type; +// updated_field_type.set_flag(updated_field_type.flag() & (~static_cast(TiDB::ColumnFlagNotNull))); +// output_field_types.push_back(updated_field_type); + + { + // for additional groupingID column. + tipb::FieldType field_type{}; + field_type.set_tp(TiDB::TypeLongLong); + field_type.set_charset("binary"); + field_type.set_collate(TiDB::ITiDBCollator::BINARY); + field_type.set_flag(0); + field_type.set_flen(-1); + field_type.set_decimal(-1); + out_field_types.push_back(field_type); + } + return false; +} + bool collectForJoin(std::vector & output_field_types, const tipb::Executor & executor) { // collect output_field_types of children @@ -190,6 +219,8 @@ bool collectForExecutor(std::vector & output_field_types, const return collectForTableScan(output_field_types, executor.partition_table_scan()); case tipb::ExecType::TypeJoin: return collectForJoin(output_field_types, executor); + case tipb::ExecType::TypeRepeatSource: + return collectForRepeat(output_field_types, executor); default: return true; } diff --git a/dbms/src/Flash/Mpp/MPPHandler.cpp b/dbms/src/Flash/Mpp/MPPHandler.cpp index 753653ac7b0..14f1d6e5a05 100644 --- a/dbms/src/Flash/Mpp/MPPHandler.cpp +++ b/dbms/src/Flash/Mpp/MPPHandler.cpp @@ -82,6 +82,7 @@ grpc::Status MPPHandler::execute(const ContextPtr & context, mpp::DispatchTaskRe { Stopwatch stopwatch; task = MPPTask::newTask(task_request.meta(), context); + task->prepare(task_request); addRetryRegion(context, response); diff --git a/dbms/src/Flash/Mpp/MPPTask.cpp b/dbms/src/Flash/Mpp/MPPTask.cpp index 4f97a94afd7..c2d5b4ccc94 100644 --- a/dbms/src/Flash/Mpp/MPPTask.cpp +++ b/dbms/src/Flash/Mpp/MPPTask.cpp @@ -149,6 +149,7 @@ void MPPTask::finishWrite() void MPPTask::run() { + // 用线程池 schedule 任务并 detach newThreadManager()->scheduleThenDetach(true, "MPPTask", [self = shared_from_this()] { self->runImpl(); }); } @@ -213,6 +214,7 @@ void MPPTask::initExchangeReceivers() if (status != RUNNING) throw Exception("exchange receiver map can not be initialized, because the task is not in running state"); + // 因为是 push mode,收到 data 之后我再动 receiver_set_local->addExchangeReceiver(executor_id, exchange_receiver); } return true; @@ -340,6 +342,7 @@ void MPPTask::prepare(const mpp::DispatchTaskRequest & task_request) void MPPTask::preprocess() { auto start_time = Clock::now(); + // 注册一些 receiver initExchangeReceivers(); LOG_DEBUG(log, "init exchange receiver done"); query_executor_holder.set(queryExecute(*context)); @@ -388,6 +391,7 @@ void MPPTask::runImpl() schedule_entry.setNeededThreads(estimateCountOfNewThreads()); LOG_DEBUG(log, "Estimate new thread count of query: {} including tunnel_threads: {}, receiver_threads: {}", schedule_entry.getNeededThreads(), dag_context->tunnel_set->getExternalThreadCnt(), new_thread_count_of_mpp_receiver); + // 类似 golang 等 channel 的过程 scheduleOrWait(); LOG_INFO(log, "task starts running"); diff --git a/dbms/src/Flash/Mpp/MPPTaskScheduleEntry.h b/dbms/src/Flash/Mpp/MPPTaskScheduleEntry.h index 60ccb9297c0..bc5522dfdfe 100644 --- a/dbms/src/Flash/Mpp/MPPTaskScheduleEntry.h +++ b/dbms/src/Flash/Mpp/MPPTaskScheduleEntry.h @@ -52,6 +52,7 @@ class MPPTaskScheduleEntry int needed_threads; std::mutex schedule_mu; + // 条件变量 std::condition_variable schedule_cv; ScheduleState schedule_state; const LoggerPtr log; diff --git a/dbms/src/Flash/Mpp/MPPTunnelSet.cpp b/dbms/src/Flash/Mpp/MPPTunnelSet.cpp index a308a9717a3..a3ba44127e3 100644 --- a/dbms/src/Flash/Mpp/MPPTunnelSet.cpp +++ b/dbms/src/Flash/Mpp/MPPTunnelSet.cpp @@ -244,9 +244,11 @@ void MPPTunnelSetBase::fineGrainedShuffleWrite( template void MPPTunnelSetBase::registerTunnel(const MPPTaskId & receiver_task_id, const TunnelPtr & tunnel) { + // tunnel 注册在 map 里面 if (receiver_task_id_to_index_map.find(receiver_task_id) != receiver_task_id_to_index_map.end()) throw Exception(fmt::format("the tunnel {} has been registered", tunnel->id())); + // tunnel 就是个 vector receiver_task_id_to_index_map[receiver_task_id] = tunnels.size(); tunnels.push_back(tunnel); if (!tunnel->isLocal() && !tunnel->isAsync()) diff --git a/dbms/src/Flash/Statistics/traverseExecutors.cpp b/dbms/src/Flash/Statistics/traverseExecutors.cpp index dd720920dcd..801002a10a8 100644 --- a/dbms/src/Flash/Statistics/traverseExecutors.cpp +++ b/dbms/src/Flash/Statistics/traverseExecutors.cpp @@ -41,6 +41,8 @@ Children getChildren(const tipb::Executor & executor) return Children{&executor.topn().child()}; case tipb::ExecType::TypeLimit: return Children{&executor.limit().child()}; + case tipb::ExecType::TypeRepeatSource: + return Children{&executor.repeat_source().child()}; case tipb::ExecType::TypeProjection: return Children{&executor.projection().child()}; case tipb::ExecType::TypeExchangeSender: diff --git a/dbms/src/Flash/tests/gtest_filter_executor.cpp b/dbms/src/Flash/tests/gtest_filter_executor.cpp index 3a3b5b16b2c..c577772e46c 100644 --- a/dbms/src/Flash/tests/gtest_filter_executor.cpp +++ b/dbms/src/Flash/tests/gtest_filter_executor.cpp @@ -208,6 +208,15 @@ try request, {toNullableVec({"banana"}), toNullableVec({"banana"})}); + + request = context + .scan("test_db", "test_table") + .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .build(context); + executeAndAssertColumnsEqual( + request, + {toNullableVec({"banana"}), + toNullableVec({"banana"})}); } CATCH diff --git a/dbms/src/Flash/tests/gtest_interpreter.cpp b/dbms/src/Flash/tests/gtest_interpreter.cpp index e129c5587a5..b5a2cd80b16 100644 --- a/dbms/src/Flash/tests/gtest_interpreter.cpp +++ b/dbms/src/Flash/tests/gtest_interpreter.cpp @@ -50,6 +50,13 @@ class InterpreterExecuteTest : public DB::tests::InterpreterTestUtils TEST_F(InterpreterExecuteTest, SingleQueryBlock) try { + + //auto grouping_sets = MockVecColumnNameVec{MockColumnNameVec{"s1"}, MockColumnNameVec{"s2"}}; + // auto request = context.scan("test_db", "test_table_1").repeat(grouping_sets).build(context); + // { + // ASSERT_BLOCKINPUTSTREAM_EQAUL("", request, 10); + // } + auto request = context.scan("test_db", "test_table_1") .filter(eq(col("s2"), col("s3"))) .aggregation({Max(col("s1"))}, {col("s2"), col("s3")}) diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 46547a4f686..e25ae02bf88 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -81,6 +81,7 @@ ExpressionAction ExpressionAction::applyFunction(const FunctionBuilderPtr & func return a; } +// 这个适合 repeat source 来用,adding groupingID column ExpressionAction ExpressionAction::addColumn(const ColumnWithTypeAndName & added_column_) { ExpressionAction a; @@ -135,8 +136,16 @@ ExpressionAction ExpressionAction::ordinaryJoin(std::shared_ptr join return a; } +ExpressionAction ExpressionAction::repeatSource(std::shared_ptr repeat_source_) +{ + ExpressionAction a; + a.type = REPEAT; + a.repeat = repeat_source_; + return a; +} -void ExpressionAction::prepare(Block & sample_block) + +void ExpressionAction::prepare(Block & sample_block) // 这个是 prepare 阶段 { /** Constant expressions should be evaluated, and put the result in sample_block. */ @@ -169,6 +178,7 @@ void ExpressionAction::prepare(Block & sample_block) new_column.type = result_type; sample_block.insert(std::move(new_column)); + // 执行参数,和执行结果都是 block 中的列 function->execute(sample_block, arguments, result_position); /// If the result is not a constant, just in case, we will consider the result as unknown. @@ -184,11 +194,12 @@ void ExpressionAction::prepare(Block & sample_block) /// Change the size to 1. if (col.column->empty()) - col.column = col.column->cloneResized(1); + col.column = col.column->cloneResized(1); // 常量列只保留一个值,np } } else { + // 如果不能即时 eval,那么直接插入一个 unknown 的列,附带上类型和名字 sample_block.insert({nullptr, result_type, result_name}); } @@ -222,24 +233,41 @@ void ExpressionAction::prepare(Block & sample_block) } } - for (const auto & col : columns_added_by_join) + for (const auto & col : columns_added_by_join) // 之前的 sample block 是左侧的列,现在才是右侧的 sample_block.insert(ColumnWithTypeAndName(nullptr, col.type, col.name)); break; } + case REPEAT: + { + // sample_block is just for schema check followed by later block, modify it if your schema has changed during this action. + auto name_set = std::set(); + repeat->getAllGroupSetColumnNames(name_set); + // make grouping set column to be nullable. + for (const auto & col_name: name_set) { + auto & column_with_name = sample_block.getByName(col_name); + column_with_name.type = makeNullable(column_with_name.type); + if (column_with_name.column != nullptr) + column_with_name.column = makeNullable(column_with_name.column); + } + // fill one more column: groupingID. + sample_block.insert({nullptr, repeat->grouping_identifier_column_type, repeat->grouping_identifier_column_name}); + break; + } + case PROJECT: { Block new_block; - for (auto & projection : projections) + for (auto & projection : projections) // change alias { const std::string & name = projection.first; const std::string & alias = projection.second; ColumnWithTypeAndName column = sample_block.getByName(name); if (!alias.empty()) column.name = alias; - new_block.insert(std::move(column)); + new_block.insert(std::move(column)); // 相当于直接 move 掉 (因为前面的列可能不要,所以用了个 new block) } sample_block.swap(new_block); @@ -274,7 +302,7 @@ void ExpressionAction::prepare(Block & sample_block) } -void ExpressionAction::execute(Block & block) const +void ExpressionAction::execute(Block & block) const // 执行阶段 { if (type == REMOVE_COLUMN || type == COPY_COLUMN) if (!block.has(source_name)) @@ -293,10 +321,10 @@ void ExpressionAction::execute(Block & block) const { if (!block.has(argument_names[i])) throw Exception("Not found column: '" + argument_names[i] + "'", ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); - arguments[i] = block.getPositionByName(argument_names[i]); + arguments[i] = block.getPositionByName(argument_names[i]); // 找到列的 offset } - size_t num_columns_without_result = block.columns(); + size_t num_columns_without_result = block.columns(); // 拿到当 result 列的 offset block.insert({nullptr, result_type, result_name}); function->execute(block, arguments, num_columns_without_result); @@ -313,6 +341,12 @@ void ExpressionAction::execute(Block & block) const break; } + case REPEAT: + { + repeat->replicateAndFillNull(block); // repeat 的执行阶段直接 fill block 了 + break; + } + case PROJECT: { Block new_block; @@ -458,6 +492,7 @@ void ExpressionActions::addImpl(ExpressionAction action, Names & new_names) arguments[i] = sample_block.getByName(action.argument_names[i]); } + // 一般 default 函数使用 default creator 构造器就行了 action.function = action.function_builder->build(arguments, action.collator); action.result_type = action.function->getReturnType(); } @@ -716,7 +751,7 @@ BlockInputStreamPtr ExpressionActions::createStreamWithNonJoinedDataIfFullOrRigh return {}; } -void ExpressionActionsChain::addStep() +void ExpressionActionsChain::addStep() // 只会为后者加入 new step 的 input col 准备 { if (steps.empty()) throw Exception("Cannot add action to empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR); diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index 68fb35f8048..0a9b9bd99fa 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -34,6 +34,7 @@ using NameWithAlias = std::pair; using NamesWithAliases = std::vector; class Join; +class Repeat; class IFunctionBase; using FunctionBasePtr = std::shared_ptr; @@ -65,6 +66,8 @@ struct ExpressionAction /// Reorder and rename the columns, delete the extra ones. The same column names are allowed in the result. PROJECT, + + REPEAT, }; Type type; @@ -90,6 +93,10 @@ struct ExpressionAction /// For PROJECT. NamesWithAliases projections; + /// For REPEAT_SOURCE. + std::shared_ptr repeat; + NamesAndTypesList columns_added_by_repeat; + /// If result_name_ == "", as name "function_name(arguments separated by commas) is used". static ExpressionAction applyFunction( const FunctionBuilderPtr & function_, @@ -103,6 +110,7 @@ struct ExpressionAction static ExpressionAction project(const NamesWithAliases & projected_columns_); static ExpressionAction project(const Names & projected_columns_); static ExpressionAction ordinaryJoin(std::shared_ptr join_, const NamesAndTypesList & columns_added_by_join_); + static ExpressionAction repeatSource(std::shared_ptr repeat_source_); /// Which columns necessary to perform this action. Names getNeededColumns() const; diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index aca814f8501..3cc7ae92874 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -539,7 +539,7 @@ void insertRowToList(Join::RowRefList * list, Join::RowRefList * elem, Block * s { elem->next = list->next; // NOLINT(clang-analyzer-core.NullDereference) list->next = elem; - elem->block = stored_block; + elem->block = stored_block; // 因为 map all 所以是 list 结构 elem->row_num = index; } @@ -579,7 +579,7 @@ struct Inserter * That is, the former second element, if it was, will be the third, and so on. */ auto elem = reinterpret_cast(pool.alloc(sizeof(MappedType))); - insertRowToList(&emplace_result.getMapped(), elem, stored_block, i); + insertRowToList(&emplace_result.getMapped(), elem, stored_block, i); // hash 表中维护的就是到存储的 store block 和其 row number,这个 list 结果作为 hash key 的 value } } }; @@ -834,7 +834,7 @@ void recordFilteredRows(const Block & block, const String & filter_column, Colum PaddedPODArray & mutable_null_map = static_cast(*mutable_null_map_holder).getData(); const auto & nested_column = column->isColumnNullable() ? static_cast(*column).getNestedColumnPtr() : column; - for (size_t i = 0, size = nested_column->size(); i < size; ++i) + for (size_t i = 0, size = nested_column->size(); i < size; ++i) // 伴随 column 如果取 int 取不出来,说明也是个 null? mutable_null_map[i] |= (!nested_column->getInt(i)); null_map_holder = std::move(mutable_null_map_holder); @@ -861,11 +861,13 @@ void Join::insertFromBlock(const Block & block, size_t stream_index) if (unlikely(!initialized)) throw Exception("Logical error: Join was not initialized", ErrorCodes::LOGICAL_ERROR); + // 物化一个 block 出来 Block * stored_block = nullptr; { std::lock_guard lk(blocks_lock); total_input_build_rows += block.rows(); blocks.push_back(block); + // block cp stored_block = &blocks.back(); original_blocks.push_back(block); } @@ -1371,9 +1373,9 @@ void Join::handleOtherConditions(Block & block, std::unique_ptr { other_condition_ptr->execute(block); - auto filter_column = ColumnUInt8::create(); + auto filter_column = ColumnUInt8::create(); // 创建了一个 u8 表示 true or false 的结果吧 auto & filter = filter_column->getData(); - filter.assign(block.rows(), static_cast(1)); + filter.assign(block.rows(), static_cast(1)); // 直接都给 1? if (!other_filter_column.empty()) { mergeNullAndFilterResult(block, filter, other_filter_column, false); @@ -1562,6 +1564,7 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr /// Memoize key columns to work with. for (size_t i = 0; i < keys_size; ++i) { + // 因为 ColumnPtr 是继承 intrusive_ptr,所以 get 函数可以得到这个类型的原始指针(raw column) key_columns[i] = block.getByName(key_names_left[i]).column.get(); if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst()) @@ -1574,9 +1577,12 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr /// Keys with NULL value in any column won't join to anything. ColumnPtr null_map_holder; ConstNullMapPtr null_map{}; + // 抽取一下 join key 上的 null 或属性 extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); + /// reuse null_map to record the filtered rows, the rows contains NULL or does not /// match the join filter won't join to anything + // 相当于把 left filter column 上的 null 属性输出也叠加到了 null map 里面 recordFilteredRows(block, left_filter_column, null_map_holder, null_map); size_t existing_columns = block.columns(); @@ -1611,12 +1617,12 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr /// Add new columns to the block. size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); MutableColumns added_columns; - added_columns.reserve(num_columns_to_add); + added_columns.reserve(num_columns_to_add); // 创建了几个需要新加的 columns std::vector right_table_column_indexes; for (size_t i = 0; i < num_columns_to_add; ++i) { - right_table_column_indexes.push_back(i + existing_columns); + right_table_column_indexes.push_back(i + existing_columns); // 记录插入的 offset 下标 } std::vector right_indexes; @@ -1639,17 +1645,17 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr if (((kind == ASTTableJoin::Kind::Inner || kind == ASTTableJoin::Kind::Right) && strictness == ASTTableJoin::Strictness::Any) || kind == ASTTableJoin::Kind::Anti) - filter = std::make_unique(rows); + filter = std::make_unique(rows); // 用来从 right block 中 remove elements /// Used with ALL ... JOIN IColumn::Offset current_offset = 0; std::unique_ptr offsets_to_replicate; if (strictness == ASTTableJoin::Strictness::All) - offsets_to_replicate = std::make_unique(rows); + offsets_to_replicate = std::make_unique(rows); // join 的时候暂时标识一下,用来在 left block 中的 replicate rows switch (type) - { + { // join 完了之后,右侧 join 行都 append 到了 add columns 里面,并且填了一行的 replicate 的 offset = joined rows number #define M(TYPE) \ case Join::Type::TYPE: \ joinBlockImplType>::Type>( \ @@ -1676,7 +1682,7 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr } FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_join_prob_failpoint); for (size_t i = 0; i < num_columns_to_add; ++i) - { + { // 将 added cols 插入到左侧的 block 中 const ColumnWithTypeAndName & sample_col = sample_block_with_columns_to_add.getByPosition(i); block.insert(ColumnWithTypeAndName(std::move(added_columns[i]), sample_col.type, sample_col.name)); } @@ -1698,6 +1704,14 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr /// If ALL ... JOIN - we replicate all the columns except the new ones. if (offsets_to_replicate) { + /* + * a, b, c, d offset + * 1, y 1 x 2 这个时候右侧的位置已经填好了,但是左侧 block 的位置还没填好,所以 offsets 是给左侧行看的,尽量复制,跟右侧的行对齐 + * 2, z 1 x + * + * 1, y 1 x 2 + * 1, y 1 x + */ for (size_t i = 0; i < existing_columns; ++i) { block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicateRange(probe_process_info.start_row, probe_process_info.end_row, *offsets_to_replicate); @@ -1719,7 +1733,7 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr if (!other_filter_column.empty() || !other_eq_filter_from_in_column.empty()) { if (!offsets_to_replicate) - throw Exception("Should not reach here, the strictness of join with other condition must be ALL"); + throw Exception("Should not reach here, the strictness of join with other condition must be ALL"); // 处理 other condition handleOtherConditions(block, filter, offsets_to_replicate, right_table_column_indexes); } } diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index abae6268430..d8bfe2afa6e 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -39,7 +39,7 @@ struct ProbeProcessInfo; * JOIN-s could be of nine types: ANY/ALL × LEFT/INNER/RIGHT/FULL, and also CROSS. * * If ANY is specified - then select only one row from the "right" table, (first encountered row), even if there was more matching rows. - * If ALL is specified - usual JOIN, when rows are multiplied by number of matching rows from the "right" table. + * If ALL is specified - usual JOIN, when rows are multiplied by number of matching rows from the "right" table. ALL 会复制行 * ANY is more efficient. * * If INNER is specified - leave only rows that have matching rows from "right" table. @@ -187,7 +187,7 @@ class Join /// Reference to the row in block. struct RowRef { - const Block * block; + const Block * block; // block + row num size_t row_num; RowRef() = default; diff --git a/dbms/src/Interpreters/NullableUtils.cpp b/dbms/src/Interpreters/NullableUtils.cpp index cf8975f8b80..44cb13c0d92 100644 --- a/dbms/src/Interpreters/NullableUtils.cpp +++ b/dbms/src/Interpreters/NullableUtils.cpp @@ -26,6 +26,7 @@ void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & nul return; const ColumnNullable & column_nullable = static_cast(*column); + // 从 nullable column 中拿到伴随 byte map 和基础 column null_map = &column_nullable.getNullMapData(); null_map_holder = column_nullable.getNullMapColumnPtr(); column = &column_nullable.getNestedColumn(); @@ -49,6 +50,7 @@ void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & nul PaddedPODArray & mutable_null_map = static_cast(*mutable_null_map_holder).getData(); const PaddedPODArray & other_null_map = column_nullable.getNullMapData(); + // join key column 来说,一空即空,这里 | 一下 for (size_t i = 0, size = mutable_null_map.size(); i < size; ++i) mutable_null_map[i] |= other_null_map[i]; diff --git a/dbms/src/Interpreters/Repeat.cpp b/dbms/src/Interpreters/Repeat.cpp new file mode 100644 index 00000000000..ff626f36042 --- /dev/null +++ b/dbms/src/Interpreters/Repeat.cpp @@ -0,0 +1,231 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include "DataTypes/DataTypesNumber.h" +#include + +namespace DB +{ + +namespace /// anonymous namespace for storing private function utils. +{ +void convertColumnToNullable(ColumnWithTypeAndName & column) +{ + column.type = makeNullable(column.type); + if (column.column) + column.column = makeNullable(column.column); +} +} + +Repeat::Repeat(const DB::GroupingSets & gss) + : group_sets_names(gss){} + + + +/// for cases like: select count(distinct a), count(distinct b) from t; +/// it will generate 2 group set with and , over which we should +/// repeat one more replica of the source rows from the input block and +/// identify it with the grouping id in the appended new column. +/// +/// eg: source block ==> replicated block +/// ==> a new column is appended +/// 1 1 target a -+-----> 1 null groupingID for a =1 +/// 2 2 +-----> 2 null groupingID for b =2 +/// target b -+-----> null 1 groupingID for a =1 +/// +-----> null a groupingID for b =2 +/// +/// when target a specified group set, other group set columns should be filled +/// with null value to make group by(a,b) operator to meet the equivalence effect +/// of group by(a) and group by(b) since the other group set columns has been filled +/// with null value. +/// +/// \param input the source block +/// \return + +void Repeat::replicateAndFillNull(Block & block) const +{ + size_t origin_rows = block.rows(); + // make a replicate slice, using it to replicate origin rows. + std::unique_ptr offsets_to_replicate; + offsets_to_replicate = std::make_unique(origin_rows); + + // get the replicate offset fixed as group set num. + IColumn::Offset current_offset = 0; + const IColumn::Offset replicate_times_for_one_row = getGroupSetNum(); + + // create a column for grouping id. + auto grouping_id_column = ColumnUInt64::create(); + auto & grouping_id_column_data = grouping_id_column->getData(); + // reserve N times of current block rows size. + grouping_id_column_data.reserve(block.rows() * replicate_times_for_one_row); + + // prepare added mutable grouping id column. + MutableColumns added_grouping_id_column; + added_grouping_id_column.reserve(1); + added_grouping_id_column.push_back(grouping_id_column->getPtr()); + + for (size_t i = 0; i < origin_rows; i++) + { + current_offset += replicate_times_for_one_row; + (*offsets_to_replicate)[i] = current_offset; + + // in the same loop, to fill the grouping id. + for (UInt64 j = 0; j < replicate_times_for_one_row; j++) + { + // start from 1. + Field grouping_id = j + 1; + added_grouping_id_column[0]->insert(grouping_id); + } + } + // todo: for some column overlapping in different grouping set, we should copy the overlapped column as a new column + // and the upper layer OP's computation should be shifted and based on the new one's id. Need a plan side control. + + // replicate the original block rows. + size_t existing_columns = block.columns(); + + if (offsets_to_replicate) + { + for (size_t i = 0; i < existing_columns; ++i) + { + // expand the origin const column, since it may be filled with null value when repeating. + if (block.safeGetByPosition(i).column->isColumnConst()) + block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->convertToFullColumnIfConst(); + + // for every existing column, if the column is a grouping set column, make it nullable. + if (isInGroupSetColumn(block.safeGetByPosition(i).name) && !block.safeGetByPosition(i).column->isColumnNullable()) + { + convertColumnToNullable(block.getByPosition(i)); + } + // replicate it. + block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); + } + } + + + // after replication, it just copied the same row for N times, we still need to fill corresponding Field with null value. + for (size_t grouping_offset = 0; grouping_offset < replicate_times_for_one_row; grouping_offset++) + { + auto grouping_columns = getGroupSetColumnNamesByOffset(grouping_offset); + // for every grouping col, get the mutated one of them. + for (const auto & grouping_col : grouping_columns) + { + assert(block.getByName(grouping_col).column->isColumnNullable()); + + const auto * nullable_column = typeid_cast(block.getByName(grouping_col).column.get()); + auto origin_size = nullable_column->size(); + // clone the nested column. + MutableColumnPtr new_nested_col = nullable_column->getNestedColumn().cloneResized(origin_size); + // just get mutable new null map. + auto new_null_map = ColumnUInt8::create(); + new_null_map->getData().resize(origin_size); + memcpy(new_null_map->getData().data(), nullable_column->getNullMapData().data(), origin_size * sizeof(nullable_column->getNullMapData()[0])); + + auto cloned_one = ColumnNullable::create(std::move(new_nested_col), std::move(new_null_map)); + + /// travel total rows, and set null values for current grouping set column. + /// basically looks like: + /// eg: source block ==> replicated block + /// ==> a new column is appended + /// 1 1 target a -+-----> 1 null groupingID for a =1 + /// 2 2 +-----> 2 null groupingID for b =2 + /// target b -+-----> null 1 groupingID for a =1 + /// +-----> null a groupingID for b =2 + /// + /// after the replicate is now, the data form likes like below + /// ==> for one : in + /// -----------------+ locate the target row in every single small group with the same "offset_of_grouping_col" in set + /// 1 1 1 + replicate_group1 for a, it's 0, we should pick and set: + /// 1 1 2 + replicate_group_rows[0].a = null + /// -----------------+ + /// 2 2 1 + replicate_group2 for b, it's 1, we should pick and set: + /// 2 2 2 + replicate_group_rows[1].b = null + /// -----------------+ + for (size_t i = 0; i < origin_rows; i++) + { + // for every original one row mapped N rows, fill the corresponding group set column as null value according to the offset. + // only when the offset in replicate_group equals to current group_offset, set the data to null. + // eg: for case above, for grouping_offset of = 0, we only set the every offset = 0 in each + // small replicate_group_x to null. + // + for (UInt64 j = 0; j < replicate_times_for_one_row; j++){ + if (j == grouping_offset) { + // only keep this column value for targeted replica. + continue; + } + // set this column as null for all the other targeted replica. + // todo: since nullable column always be prior to computation of null value first, should we clean the old data at the same pos in nested column + auto computed_offset = i * replicate_times_for_one_row + j; + cloned_one->getNullMapData().data()[computed_offset] = 1; + } + } + block.getByName(grouping_col).column = std::move(cloned_one); + } + // finish of adjustment for one grouping set columns. (by now one column for one grouping set). + } + block.insert(ColumnWithTypeAndName(std::move(added_grouping_id_column[0]), std::make_shared(), std::move("groupingID"))); + // return input from block. +} + +bool Repeat::isInGroupSetColumn(String name) const{ + for(const auto& it1 : group_sets_names) + { + // for every grouping set. + for(const auto& it2 : it1) + { + // for every grouping exprs + for(const auto& it3 : it2) + { + if (it3 == name){ + return true; + } + } + } + } + return false; +} + +const GroupingColumnNames& Repeat::getGroupSetColumnNamesByOffset(size_t offset) const +{ + /// currently, there only can be one groupingExprs in one groupingSet before the planner supporting the grouping set merge. + return group_sets_names[offset][0]; +} + +void Repeat::getAllGroupSetColumnNames(std::set& name_set) const +{ + for(const auto& it1 : group_sets_names) + { + // for every grouping set. + for(const auto& it2 : it1) + { + // for every grouping exprs + for(const auto& it3 : it2) + { + name_set.insert(it3); + } + } + } +} + +std::shared_ptr Repeat::sharedRepeat(const GroupingSets & groupingSets) +{ + return std::make_shared(groupingSets); +} + +const std::string Repeat::grouping_identifier_column_name = "groupingID"; +const DataTypePtr Repeat::grouping_identifier_column_type = std::make_shared(); +} diff --git a/dbms/src/Interpreters/Repeat.h b/dbms/src/Interpreters/Repeat.h new file mode 100644 index 00000000000..7c2c05ab85d --- /dev/null +++ b/dbms/src/Interpreters/Repeat.h @@ -0,0 +1,139 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +/// groupingSets are formed as { groupingSet, groupingSet...} +/// groupingSet are formed as { groupingExprs, groupingExprs...} +/// groupingExprs are formed as slice of expression/column names +/// simply for now case like: select count(distinct a), count(distinct b) from t; +/// we got 2 groupings set like: {[], []} +/// +/// shortly soon, we can support the grouping sets merging, which could take case +/// like: select count(distinct a,b), count(distinct a), count(distinct c) from t as +/// we still got 2 grouping sets like: {[, ], []} +/// +/// the second case in which the group layout has been merged with the prefix +/// common group layout into unified one set to reduce the underlying data replication/repeat cost. +/// +using GroupingColumnName = ::String; +using GroupingColumnNames = std::vector; +using GroupingSet = std::vector; +using GroupingSets = std::vector; + + + +/** Data structure for implementation of Repeat. + * + * Repeat is a kind of operator used for replicate low-layer datasource rows to feed different aggregate + * grouping-layout requirement. (Basically known as grouping sets) + * + * For current scenario, it is applied to accelerate the computation of multi distinct aggregates by utilizing + * multi nodes computing resource in a way of scheming 3-phase aggregation under mpp mode. + * + * GroupingSets descriptions are all needed by Repeat operator itself, the length of GroupingSets are the needed + * repeat number (in other words, one grouping set require one replica of source rows). Since different grouping + * set column shouldn't let its targeted rows affected by other grouping set columns (which will also be appear in + * the group by items) when do grouping work, we should isolate different grouping set columns by filling them with + * null values when repeating rows. + * + * Here is an example: + * Say we got a query like this: select count(distinct a), count(distinct b) from t. + * + * Downward requirements formed by this query are consist of two different grouping set , , and both of this + * two columns will be in the group by items. Make record here as --- GROUP BY(a,b) + * + * Different group layouts are doomed to be unable to be feed with same replica of data in shuffling mode Except + * gathering them all to the single node. While the latter one is usually accompanied by a single point of bottleneck. + * + * That's why data repeat happens here. Say we got two tuple as below: + * + * ==> after repeat we got + * 1 1 origin row 1 1 + * 1 2 repeat row 1 1 + * origin row 1 2 + * repeat row 1 2 + * + * See what we got now above, although we have already repeated/doubled the origin rows, while when grouping them together + * with GROUP BY(a,b) clause (resulting 2 group (1,1),(1,2) here), we found that we still can not get the right answer for + * count distinct agg for a. + * + * From the theory, every origin/repeated row should be targeted for one group out requirement, which means row<1> and row<3> + * about should be used to feed count(distinct a), while since the value of b in row<3> is different from that from row<1>, + * that leads them being divided into different group. + * + * Come back to the origin goal to feed count(distinct a), in which we don't even care about what is was in column b from row<1> + * and row<3>, because current agg args is aimed at column a. Therefore, we filled every non-targeted grouping set column in + * repeated row as null value. After that we got as below: + * + * ==> after repeat we got + * 1 1 origin row 1 null ---> target for grouping set a + * 1 2 repeat row null 1 ---> target for grouping set b + * origin row 1 null ---> target for grouping set a + * repeat row null 2 ---> target for grouping set b + * + * Then, when grouping them together with GROUP BY(a,b) clause, we got row<1> and row<3> together, and row<2>, row<4> as a + * self-group individually. Among them, every distinct agg has their self-targeted data grouped correctly. GROUP BY(a,b) clause + * is finally seen/taken as a equivalent group to GROUP BY(a, null) for a-targeted rows, GROUP BY(null, b) for b-targeted rows. + * + * Over the correct grouped data, the result computation for distinct agg is quite reasonable. By the way, if origin row has some + * column that isn't belong to any grouping set, just let it be copied as it was in repeated row. + * + */ +class Repeat +{ +public: + explicit Repeat(const GroupingSets & gss); + + // replicateAndFillNull is the basic functionality that Repeat Operator provided. Briefly, it replicates + // origin rows with regard to local grouping sets description, and appending a new column named as groupingID + // to illustrate what group this row is targeted for. + void replicateAndFillNull(Block & input) const; + + size_t getGroupSetNum() const {return group_sets_names.size();} + + bool isInGroupSetColumn(String name) const; + + const GroupingColumnNames& getGroupSetColumnNamesByOffset(size_t offset) const; + + void getAllGroupSetColumnNames(std::set& name_set) const; + + static std::shared_ptr sharedRepeat(const GroupingSets & groupingSets); + + static const String grouping_identifier_column_name; + + static const DataTypePtr grouping_identifier_column_type; + +private: + GroupingSets group_sets_names; +}; +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Interpreters/sortBlock.cpp b/dbms/src/Interpreters/sortBlock.cpp index 9995329b833..438a14b42bd 100644 --- a/dbms/src/Interpreters/sortBlock.cpp +++ b/dbms/src/Interpreters/sortBlock.cpp @@ -410,6 +410,7 @@ void sortBlock(Block & block, const SortDescription & description, size_t limit) : block.safeGetByPosition(description[0].column_number).column.get(); IColumn::Permutation perm; + // permutation 是列的 offset 调序 if (NeedCollation(column, description[0])) column->getPermutation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm); else @@ -417,7 +418,7 @@ void sortBlock(Block & block, const SortDescription & description, size_t limit) size_t columns = block.columns(); for (size_t i = 0; i < columns; ++i) - block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->permute(perm, limit); + block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->permute(perm, limit); // 根据 offset 调序结果重新组织 column 数据 } else { diff --git a/dbms/src/Interpreters/tests/gtest_block_repeat.cpp b/dbms/src/Interpreters/tests/gtest_block_repeat.cpp new file mode 100644 index 00000000000..b3c1cce713d --- /dev/null +++ b/dbms/src/Interpreters/tests/gtest_block_repeat.cpp @@ -0,0 +1,238 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +namespace DB +{ +namespace tests +{ + +class BlockRepeat : public ::testing::Test +{ +public: + using ColStringType = typename TypeTraits::FieldType; + using ColInt64Type = typename TypeTraits::FieldType; + using ColUInt64Type = typename TypeTraits::FieldType; + using ColumnWithString = std::vector; + using ColumnWithInt64 = std::vector; + using ColumnWithUInt64 = std::vector; + + const String single_col_name{"single_col"}; + const ColumnWithString col0_ori{"col0-1 ", "col0-7", "col0-0 ", "col0-3", "col0-4", "col0-6", "col0-2 ", "col0-5"}; + const std::vector col_name{"age", "gender", "country", "region", "zip"}; +}; + +TEST_F(BlockRepeat, Limit) +try +{ + { + // test basic block repeat operation. (two grouping set) + const ColumnsWithTypeAndName + ori_col + = { + toVec(col_name[0], ColumnWithInt64{1, 0, -1}), + toVec(col_name[1], ColumnWithString{"1 ", "1 ", "1 "}), + toVec(col_name[2], ColumnWithString{"1", "2", "3"}), + toVec(col_name[3], ColumnWithUInt64{1, 1, 0}), + }; + // group set, group set + GroupingSet g_gender = GroupingSet{GroupingColumnNames{col_name[1]}}; + GroupingSet g_country = GroupingSet{GroupingColumnNames{col_name[2]}}; + GroupingSets group_sets = GroupingSets{g_gender, g_country}; + Repeat repeat = Repeat(group_sets); + Block block(ori_col); + auto origin_rows = block.rows(); + + repeat.replicateAndFillNull(block); + // assert the col size is added with 1. + ASSERT_EQ(block.getColumns().size(), size_t(5)); + // assert the new col groupingID is appended. + ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID"); + // assert the block size is equal to origin rows * grouping set num. + auto repeat_rows = block.rows(); + auto grouping_set_num = repeat.getGroupSetNum(); + ASSERT_EQ(origin_rows * grouping_set_num, repeat_rows); // 6 + // assert grouping set column are nullable. + ASSERT_EQ(block.getColumns()[0].get()->isColumnNullable(), false); + ASSERT_EQ(block.getColumns()[1].get()->isColumnNullable(), true); + ASSERT_EQ(block.getColumns()[2].get()->isColumnNullable(), true); + ASSERT_EQ(block.getColumns()[3].get()->isColumnNullable(), false); + ASSERT_EQ(block.getColumns()[4].get()->isColumnNullable(), false); + + // assert the rows layout + // "age", "gender", "country", "region", "groupingID" + // ori_col 1 "1 " null 1 1 + // rpt_col 1 null "1" 1 2 + // + // ori_col 0 "1 " null 1 1 + // rpt_col 0 null "2" 1 2 + // + // ori_col -1 "1 " null 0 1 + // rpt_col -1 null "3" 0 2 + + const auto res0 = ColumnWithInt64{1, 1, 0, 0, -1, -1}; + const auto * col_0 = typeid_cast(block.getColumns()[0].get()); + for (int i = 0; i < int(repeat_rows); ++i) + { + ASSERT_EQ(col_0->getElement(i), res0[i]); + } + + const auto res1 = ColumnWithString{"1 ", "null", "1 ", "null", "1 ", "null"}; + const auto * col_1 = typeid_cast(block.getColumns()[1].get()); + const auto * col_1_nest = &static_cast(col_1->getNestedColumn()); + for (int i = 0; i < int(repeat_rows); ++i) + { + if (res1[i] == "null") { + ASSERT_EQ(col_1->isNullAt(i), true); + } else { + ASSERT_EQ(col_1_nest->getDataAt(i), res1[i]); + } + } + + const auto res2 = ColumnWithString{"null", "1", "null", "2", "null", "3"}; + const auto * col_2 = typeid_cast(block.getColumns()[2].get()); + const auto * col_2_nest = &static_cast(col_2->getNestedColumn()); + for (int i = 0; i < int(repeat_rows); ++i) + { + if (res2[i] == "null") { + ASSERT_EQ(col_2->isNullAt(i), true); + } else { + ASSERT_EQ(col_2_nest->getDataAt(i), res2[i]); + } + } + + const auto res3 = ColumnWithUInt64{1, 1,1,1, 0,0}; + const auto * col_3 = typeid_cast(block.getColumns()[3].get()); + for (int i = 0; i < int(repeat_rows); ++i) + { + ASSERT_EQ(col_3->getElement(i), res3[i]); + } + + const auto res4 = ColumnWithUInt64{1, 2, 1, 2, 1, 2}; + const auto * col_4 = typeid_cast(block.getColumns()[4].get()); + for (int i = 0; i < int(repeat_rows); ++i) + { + ASSERT_EQ(col_4->getElement(i), res4[i]); + } + } + { + // test block repeat operation for multi grouping set (triple here) + const ColumnsWithTypeAndName + ori_col + = { + toVec(col_name[0], ColumnWithInt64{1, 0, -1}), + toVec(col_name[1], ColumnWithString{"aaa", "bbb", "ccc"}), + toVec(col_name[2], ColumnWithString{"1", "2", "3"}), + toVec(col_name[3], ColumnWithUInt64{1, 1, 0}), + }; + // group set, group set + GroupingSet g_gender = GroupingSet{GroupingColumnNames{col_name[1]}}; + GroupingSet g_country = GroupingSet{GroupingColumnNames{col_name[2]}}; + GroupingSet g_region = GroupingSet{GroupingColumnNames{col_name[3]}}; + GroupingSets group_sets = GroupingSets{g_gender, g_country, g_region}; + Repeat repeat = Repeat(group_sets); + Block block(ori_col); + auto origin_rows = block.rows(); + + repeat.replicateAndFillNull(block); + // assert the col size is added with 1. + ASSERT_EQ(block.getColumns().size(), size_t(5)); + // assert the new col groupingID is appended. + ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID"); + // assert the block size is equal to origin rows * grouping set num. + auto repeat_rows = block.rows(); + auto grouping_set_num = repeat.getGroupSetNum(); + ASSERT_EQ(origin_rows * grouping_set_num, repeat_rows); // 9 + // assert grouping set column are nullable. + ASSERT_EQ(block.getColumns()[0].get()->isColumnNullable(), false); + ASSERT_EQ(block.getColumns()[1].get()->isColumnNullable(), true); + ASSERT_EQ(block.getColumns()[2].get()->isColumnNullable(), true); + ASSERT_EQ(block.getColumns()[3].get()->isColumnNullable(), true); + ASSERT_EQ(block.getColumns()[4].get()->isColumnNullable(), false); + + // assert the rows layout + // "age", "gender", "country", "region", "groupingID" + // ori_col 1 "aaa" null null 1 + // rpt_col 1 null "1" null 2 + // rpt_col 1 null null 1 3 + // + // ori_col 0 "bbb" null null 1 + // rpt_col 0 null "2" null 2 + // rpt_col 0 null null 1 3 + + // ori_col -1 "ccc" null null 1 + // rpt_col -1 null "3" null 2 + // rpt_col -1 null null 0 3 + + const auto res0 = ColumnWithInt64{1, 1, 1, 0, 0, 0, -1, -1, -1}; + const auto * col_0 = typeid_cast(block.getColumns()[0].get()); + for (int i = 0; i < int(repeat_rows); ++i) + { + ASSERT_EQ(col_0->getElement(i), res0[i]); + } + + const auto res1 = ColumnWithString{"aaa", "null", "null", "bbb", "null", "null", "ccc", "null", "null"}; + const auto * col_1 = typeid_cast(block.getColumns()[1].get()); + const auto * col_1_nest = &static_cast(col_1->getNestedColumn()); + for (int i = 0; i < int(repeat_rows); ++i) + { + if (res1[i] == "null") { + ASSERT_EQ(col_1->isNullAt(i), true); + } else { + ASSERT_EQ(col_1_nest->getDataAt(i), res1[i]); + } + } + + const auto res2 = ColumnWithString{"null", "1", "null", "null", "2", "null", "null", "3", "null"}; + const auto * col_2 = typeid_cast(block.getColumns()[2].get()); + const auto * col_2_nest = &static_cast(col_2->getNestedColumn()); + for (int i = 0; i < int(repeat_rows); ++i) + { + if (res2[i] == "null") { + ASSERT_EQ(col_2->isNullAt(i), true); + } else { + ASSERT_EQ(col_2_nest->getDataAt(i), res2[i]); + } + } + + // use UInt64(-1) to represent null. + const auto res3 = ColumnWithUInt64{UInt64(-1), UInt64(-1), 1, UInt64(-1), UInt64(-1), 1, UInt64(-1), UInt64(-1), 0}; + const auto * col_3 = typeid_cast(block.getColumns()[3].get()); + const auto * col_3_nest = &typeid_cast(col_3->getNestedColumn()); + for (int i = 0; i < int(repeat_rows); ++i) + { + if (res3[i] == UInt64(-1)) { + ASSERT_EQ(col_3->isNullAt(i), true); + } else { + ASSERT_EQ(col_3_nest->getElement(i), res3[i]); + } + } + + const auto res4 = ColumnWithUInt64{1, 2, 3, 1, 2, 3, 1, 2, 3}; + const auto * col_4 = typeid_cast(block.getColumns()[4].get()); + for (int i = 0; i < int(repeat_rows); ++i) + { + ASSERT_EQ(col_4->getElement(i), res4[i]); + } + } +} +CATCH + +} // namespace tests +} // namespace DB diff --git a/dbms/src/TestUtils/FunctionTestUtils.cpp b/dbms/src/TestUtils/FunctionTestUtils.cpp index a679bbd9885..f7ab140b2fc 100644 --- a/dbms/src/TestUtils/FunctionTestUtils.cpp +++ b/dbms/src/TestUtils/FunctionTestUtils.cpp @@ -220,7 +220,10 @@ ::testing::AssertionResult columnsEqual( ASSERT_EQUAL(expect_col.column->size(), actual_col.column->size(), fmt::format("Column {} size mismatch", i)); auto type_eq = dataTypeEqual(expected[i].type, actual[i].type); if (!type_eq) + { + std::cout << "type equal false" << std::endl; return type_eq; + } } auto const expected_row_set = columnsToRowSet(expected); @@ -259,6 +262,7 @@ ::testing::AssertionResult columnsEqual( .append("\n"); } buf.append("...\n"); + std::cout<"); + } + buf.fmtAppend("]\n"); +} + void serializeJoin(const String & executor_id, const tipb::Join & join, FmtBuffer & buf) { buf.fmtAppend("{} | {}, {}. left_join_keys: {{", executor_id, getJoinTypeName(join.join_type()), getJoinExecTypeName(join.join_exec_type())); @@ -282,6 +306,9 @@ void ExecutorSerializer::serializeListStruct(const tipb::DAGRequest * dag_reques case tipb::ExecType::TypeLimit: serializeLimit("Limit", executor.limit(), buf); break; + case tipb::ExecType::TypeRepeatSource: + serializeRepeatSource("Repeat", executor.repeat_source(), buf); + break; default: throw TiFlashException("Should not reach here", Errors::Coprocessor::Internal); } @@ -339,6 +366,9 @@ void ExecutorSerializer::serializeTreeStruct(const tipb::Executor & root_executo case tipb::ExecType::TypeWindow: serializeWindow(executor.executor_id(), executor.window(), buf); break; + case tipb::ExecType::TypeRepeatSource: + serializeRepeatSource(executor.executor_id(), executor.repeat_source(), buf); + break; default: throw TiFlashException("Should not reach here", Errors::Coprocessor::Internal); } diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp index 2a7c820ce55..1220e873a32 100644 --- a/dbms/src/TestUtils/mockExecutor.cpp +++ b/dbms/src/TestUtils/mockExecutor.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -361,6 +362,29 @@ DAGRequestBuilder & DAGRequestBuilder::sort(MockOrderByItemVec order_by_vec, boo return *this; } +DAGRequestBuilder & DAGRequestBuilder::repeat(MockVVecColumnNameVec grouping_set_columns) +{ + assert(root); + auto grouping_sets_ast = mock::MockVVecGroupingNameVec(); + auto grouping_col_collection = std::set(); + for (const auto & grouping_set : grouping_set_columns) { + auto grouping_set_ast = mock::MockVecGroupingNameVec(); + for (const auto &grouping_exprs : grouping_set) { + auto grouping_exprs_ast = mock::MockGroupingNameVec(); + for (const auto &grouping_col : grouping_exprs) + { + auto ast_col_ptr = buildColumn(grouping_col); // string identifier change to ast column ref + grouping_exprs_ast.emplace_back(std::move(ast_col_ptr)); + grouping_col_collection.insert(grouping_col); + } + grouping_set_ast.emplace_back(std::move(grouping_exprs_ast)); + } + grouping_sets_ast.emplace_back(std::move(grouping_set_ast)); + } + root = compileRepeat(root, getExecutorIndex(), grouping_sets_ast, grouping_col_collection); + return *this; +} + void MockDAGRequestContext::addMockTable(const String & db, const String & table, const MockColumnInfoVec & mock_column_infos, size_t concurrency_hint) { auto columns = getColumnWithTypeAndName(genNamesAndTypes(mockColumnInfosToTiDBColumnInfos(mock_column_infos), "mock_table_scan")); diff --git a/dbms/src/TestUtils/mockExecutor.h b/dbms/src/TestUtils/mockExecutor.h index 307a034a9ac..df9d162f691 100644 --- a/dbms/src/TestUtils/mockExecutor.h +++ b/dbms/src/TestUtils/mockExecutor.h @@ -38,6 +38,8 @@ using MockOrderByItemVec = std::vector; using MockPartitionByItem = std::pair; using MockPartitionByItemVec = std::vector; using MockColumnNameVec = std::vector; +using MockVecColumnNameVec = std::vector; // for grouping set (every groupingExpr element inside is slice of column) +using MockVVecColumnNameVec = std::vector; // for grouping sets using MockAstVec = std::vector; using MockWindowFrame = mock::MockWindowFrame; @@ -145,6 +147,9 @@ class DAGRequestBuilder DAGRequestBuilder & sort(MockOrderByItem order_by, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count = 0); DAGRequestBuilder & sort(MockOrderByItemVec order_by_vec, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count = 0); + // repeat + DAGRequestBuilder & repeat(MockVVecColumnNameVec grouping_set_columns); + void setCollation(Int32 collator_) { properties.collator = convertToTiDBCollation(collator_); } Int32 getCollation() const { return abs(properties.collator); } diff --git a/tai.cpp b/tai.cpp new file mode 100644 index 00000000000..59b2935c6a4 --- /dev/null +++ b/tai.cpp @@ -0,0 +1,64 @@ +// +// Created by arenatlx on 2022/10/27. +// + +#include +#include +#include "tai.h" + + + +class HAHA { +public: + template + HAHA & operator=(std::vector && rhs){ + return rhs[0]; + } +private: + int a; +}; + + +class MY { + using Arra1y = std::vector; +public: + HAHA operator[](size_t n) const; +}; + +HAHA MY::operator[](size_t n) const{ + Arra1y a(n); + return a[0]; +} + +struct Test{ + ~Test(){ + std::cout<<"kill test"<(); + auto tmp = new(Test); // 这个地方直接 Test() 还是会析构一次,有点奇怪,只有 new 才行。 + vec.push_back(std::move(*tmp)); + std::cout< v; + v.push_back("aaaa"); + v.push_back("bbbb"); + v[0][0]='1'; + v.push_back(std::move(v[0])); + std::cout< Date: Tue, 6 Dec 2022 20:46:11 +0800 Subject: [PATCH 02/31] fix the test Exeception because of fmtlib can take '{' as escape symbol Signed-off-by: AilinKid <3148019@qq.com> --- .../Coprocessor/DAGExpressionAnalyzer.cpp | 4 +-- .../Coprocessor/collectOutputFieldTypes.cpp | 3 ++- .../src/Flash/tests/gtest_filter_executor.cpp | 6 +++-- dbms/src/TestUtils/ExecutorTestUtils.cpp | 1 + dbms/src/TestUtils/FunctionTestUtils.cpp | 25 +++++++++++++++++++ dbms/src/TestUtils/executorSerializer.cpp | 4 +-- 6 files changed, 36 insertions(+), 7 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 01315929ff3..6d0162ec09c 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -837,12 +837,12 @@ ExpressionActionsPtr DAGExpressionAnalyzer::appendRepeatSource( const tipb::RepeatSource & repeatSource, ExpressionActionsChain & chain) { auto & last_step = initAndGetLastStep(chain); - auto shared_repeat = buildRepeatGroupingColumns(repeatSource, last_step.actions); - last_step.actions->add(ExpressionAction::repeatSource(shared_repeat)); for (const auto &origin_col : last_step.actions->getSampleBlock().getNamesAndTypesList()) { last_step.required_output.push_back(origin_col.name); } + auto shared_repeat = buildRepeatGroupingColumns(repeatSource, last_step.actions); + last_step.actions->add(ExpressionAction::repeatSource(shared_repeat)); // an added column from REPEAT action. source_columns.emplace_back(Repeat::grouping_identifier_column_name, Repeat::grouping_identifier_column_type); auto before_repeat_source = chain.getLastActions(); diff --git a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp index d2536255a76..b4d3079a9e2 100644 --- a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp +++ b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp @@ -120,7 +120,8 @@ bool collectForRepeat(std::vector &out_field_types, const tipb: field_type.set_tp(TiDB::TypeLongLong); field_type.set_charset("binary"); field_type.set_collate(TiDB::ITiDBCollator::BINARY); - field_type.set_flag(0); + // groupingID column should be Uint64 and NOT NULL. + field_type.set_flag(TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull); field_type.set_flen(-1); field_type.set_decimal(-1); out_field_types.push_back(field_type); diff --git a/dbms/src/Flash/tests/gtest_filter_executor.cpp b/dbms/src/Flash/tests/gtest_filter_executor.cpp index c577772e46c..4605396b4be 100644 --- a/dbms/src/Flash/tests/gtest_filter_executor.cpp +++ b/dbms/src/Flash/tests/gtest_filter_executor.cpp @@ -213,10 +213,12 @@ try .scan("test_db", "test_table") .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) .build(context); + // test is ok now for non-planner enabled. executeAndAssertColumnsEqual( request, - {toNullableVec({"banana"}), - toNullableVec({"banana"})}); + {toNullableVec({"banana", {}, {}, {}, "banana", {}}), + toNullableVec({{}, "apple", {}, {}, {}, "banana"}), + toVec({1,2,1,2,1,2})}); } CATCH diff --git a/dbms/src/TestUtils/ExecutorTestUtils.cpp b/dbms/src/TestUtils/ExecutorTestUtils.cpp index 2810bb033dc..403651569c2 100644 --- a/dbms/src/TestUtils/ExecutorTestUtils.cpp +++ b/dbms/src/TestUtils/ExecutorTestUtils.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include diff --git a/dbms/src/TestUtils/FunctionTestUtils.cpp b/dbms/src/TestUtils/FunctionTestUtils.cpp index f7ab140b2fc..b616c441bf4 100644 --- a/dbms/src/TestUtils/FunctionTestUtils.cpp +++ b/dbms/src/TestUtils/FunctionTestUtils.cpp @@ -193,6 +193,7 @@ std::multiset columnsToRowSet(const ColumnsWithTypeAndName & cols) { for (size_t i = 0, size = col.column->size(); i < size; ++i) { + new (rows[i].place(col_id)) Field((*col.column)[i]); } } @@ -229,6 +230,30 @@ ::testing::AssertionResult columnsEqual( auto const expected_row_set = columnsToRowSet(expected); auto const actual_row_set = columnsToRowSet(actual); + { + auto expect_it = expected_row_set.begin(); + auto actual_it = actual_row_set.begin(); + FmtBuffer buf1; + FmtBuffer buf2; + for (; expect_it != expected_row_set.end(); ++expect_it, ++actual_it) + { + buf1.joinStr( + expect_it->begin(), + expect_it->end(), + [](const auto & v, FmtBuffer & fb) { fb.append(v.toString()); }, + " ") + .append("\n"); + buf2.joinStr( + actual_it->begin(), + actual_it->end(), + [](const auto & v, FmtBuffer & fb) { fb.append(v.toString()); }, + " ") + .append("\n"); + } + auto res1 = buf1.toString(); + auto res2 = buf2.toString(); + } + if (expected_row_set != actual_row_set) { FmtBuffer buf; diff --git a/dbms/src/TestUtils/executorSerializer.cpp b/dbms/src/TestUtils/executorSerializer.cpp index 5d889369f57..33304e298c0 100644 --- a/dbms/src/TestUtils/executorSerializer.cpp +++ b/dbms/src/TestUtils/executorSerializer.cpp @@ -168,7 +168,7 @@ void serializeRepeatSource(const String & executor_id, const tipb::RepeatSource buf.fmtAppend("<"); for (const auto & grouping_exprs : grouping_set.grouping_exprs()) { - buf.fmtAppend("{"); + buf.fmtAppend("{{"); for (auto i = 0; i < grouping_exprs.grouping_expr().size(); i++) { if (i != 0) { @@ -177,7 +177,7 @@ void serializeRepeatSource(const String & executor_id, const tipb::RepeatSource auto expr = grouping_exprs.grouping_expr().Get(i); serializeExpression(expr, buf); } - buf.fmtAppend("}"); + buf.fmtAppend("}}"); } buf.fmtAppend(">"); } From 7f9b656bfdd41a1e2769b1b7d04da6dae44f18b2 Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Wed, 7 Dec 2022 18:32:33 +0800 Subject: [PATCH 03/31] add test for tiflash repeat logic Signed-off-by: AilinKid <3148019@qq.com> --- .../Debug/MockExecutor/RepeatSourceBinder.h | 2 +- .../src/Flash/tests/gtest_filter_executor.cpp | 138 +++++++++++++++++- .../Interpreters/tests/gtest_block_repeat.cpp | 31 ++++ 3 files changed, 168 insertions(+), 3 deletions(-) diff --git a/dbms/src/Debug/MockExecutor/RepeatSourceBinder.h b/dbms/src/Debug/MockExecutor/RepeatSourceBinder.h index 473393221cf..ace010048b2 100644 --- a/dbms/src/Debug/MockExecutor/RepeatSourceBinder.h +++ b/dbms/src/Debug/MockExecutor/RepeatSourceBinder.h @@ -25,7 +25,7 @@ class RepeatSourceBinder : public ExecutorBinder { public: RepeatSourceBinder(size_t & index_, const DAGSchema & output_schema_, MockVVecGroupingNameVec gss) - : ExecutorBinder(index_, "repeat_source" + std::to_string(index_), output_schema_) + : ExecutorBinder(index_, "repeat_source_" + std::to_string(index_), output_schema_) , grouping_sets_columns(gss) {} diff --git a/dbms/src/Flash/tests/gtest_filter_executor.cpp b/dbms/src/Flash/tests/gtest_filter_executor.cpp index 4605396b4be..68b8c39cca9 100644 --- a/dbms/src/Flash/tests/gtest_filter_executor.cpp +++ b/dbms/src/Flash/tests/gtest_filter_executor.cpp @@ -208,17 +208,151 @@ try request, {toNullableVec({"banana"}), toNullableVec({"banana"})}); +} +CATCH - request = context +TEST_F(FilterExecutorTestRunner, RepeatLogical) +try +{ + /// following tests is ok now for non-planner enabled. + + /// case 1 + auto request = context .scan("test_db", "test_table") .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) .build(context); - // test is ok now for non-planner enabled. + /// data flow: + /// + /// s1 s2 + /// "banana" "apple" + /// NULL NULL + /// "banana" "banana" + /// | + /// v + /// s1 s2 groupingID + /// "banana" NULL 1 + /// NULL "apple" 2 + /// NULL NULL 1 + /// NULL NULL 2 + /// "banana" NULL 1 + /// NULL "banana" 2 + /// executeAndAssertColumnsEqual( request, {toNullableVec({"banana", {}, {}, {}, "banana", {}}), toNullableVec({{}, "apple", {}, {}, {}, "banana"}), toVec({1,2,1,2,1,2})}); + + /// case 2 + request = context + .scan("test_db", "test_table") + .filter(eq(col("s1"), col("s2"))) + .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .build(context); + /// data flow: + /// + /// s1 s2 + /// "banana" "apple" + /// NULL NULL + /// "banana" "banana" + /// | + /// v + /// s1 s2 + /// "banana" "banana" + /// | + /// v + /// s1 s2 groupingID + /// "banana" NULL 1 + /// NULL "banana" 2 + /// + executeAndAssertColumnsEqual( + request, + {toNullableVec({"banana", {}}), + toNullableVec({{}, "banana"}), + toVec({1,2})}); + + /// case 3 + request = context + .scan("test_db", "test_table") + .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .filter(eq(col("s1"), col("s2"))) + .build(context); + /// data flow: TiFlash isn't aware of the operation sequence, this filter here will be run before repeat does just like the second test case above. + executeAndAssertColumnsEqual( + request, + {toNullableVec({"banana", {}}), + toNullableVec({{}, "banana"}), + toVec({1,2})}); + + /// case 4 + auto const_false = lit(Field(static_cast(0))); + request = context + .scan("test_db", "test_table") + .filter(const_false) // refuse all rows + .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .build(context); + executeAndAssertColumnsEqual( + request, + {}); + + /// case 5 (test integrated with aggregation) + request = context + .scan("test_db", "test_table") + .aggregation({Count(col("s1"))}, {col("s2")}) + .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .build(context); + /// data flow: + /// + /// s1 s2 + /// "banana" "apple" + /// NULL NULL + /// "banana" "banana" + /// | + /// v + /// count(s1) s2 + /// 1 "apple" + /// 0 NULL + /// 1 "banana" + /// | + /// v + /// count(s1) s2 groupingID + /// 1 NULL 1 + /// NULL "apple" 2 + /// 0 NULL 1 + /// NULL NULL 2 + /// 1 NULL 1 + /// NULL "banana" 2 + /// + executeAndAssertColumnsEqual( + request, + {toNullableVec({1, {}, 0, {}, 1,{}}), + toNullableVec({{}, "apple", {},{},{}, "banana"}), + toVec({1,2,1,2,1,2})}); + + /// case 5 (test integrated with aggregation and projection) + request = context + .scan("test_db", "test_table") + .aggregation({Count(col("s1"))}, {col("s2")}) + .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .project({"count(s1)"}) + .build(context); + executeAndAssertColumnsEqual( + request, + {toNullableVec({1, {}, 0, {}, 1,{}})}); + + /// case 6 (test integrated with aggregation and projection and limit) + /// note: by now, limit is executed before repeat does to reduce unnecessary row repeat work. +// request = context +// .scan("test_db", "test_table") +// .aggregation({Count(col("s1"))}, {col("s2")}) +// .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) +// .project({"count(s1)"}) +// .limit(2) +// .build(context); +// executeAndAssertColumnsEqual( +// request, +// {toNullableVec({1, {}, 0, {}})}); + } CATCH diff --git a/dbms/src/Interpreters/tests/gtest_block_repeat.cpp b/dbms/src/Interpreters/tests/gtest_block_repeat.cpp index b3c1cce713d..21074ff7a95 100644 --- a/dbms/src/Interpreters/tests/gtest_block_repeat.cpp +++ b/dbms/src/Interpreters/tests/gtest_block_repeat.cpp @@ -231,6 +231,37 @@ try ASSERT_EQ(col_4->getElement(i), res4[i]); } } + { + /// test a empty block + const ColumnsWithTypeAndName + ori_col + = { + toVec(col_name[0], ColumnWithInt64{}), // without data. + toVec(col_name[1], ColumnWithString{}), + toVec(col_name[2], ColumnWithString{}), + toVec(col_name[3], ColumnWithUInt64{}), + }; + // group set, group set + GroupingSet g_gender = GroupingSet{GroupingColumnNames{col_name[1]}}; + GroupingSet g_country = GroupingSet{GroupingColumnNames{col_name[2]}}; + GroupingSet g_region = GroupingSet{GroupingColumnNames{col_name[3]}}; + GroupingSets group_sets = GroupingSets{g_gender, g_country, g_region}; + Repeat repeat = Repeat(group_sets); + Block block(ori_col); + auto origin_rows = block.rows(); + + repeat.replicateAndFillNull(block); + // assert the col size is added with 1. + ASSERT_EQ(block.getColumns().size(), size_t(5)); + // assert the new col groupingID is appended. + ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID"); + // assert the block size is equal to origin rows * grouping set num. + auto repeat_rows = block.rows(); + auto grouping_set_num = repeat.getGroupSetNum(); + ASSERT_EQ(origin_rows, 0); + ASSERT_EQ(origin_rows * grouping_set_num, repeat_rows); // 0 + // assert grouping set column are nullable. + } } CATCH From f66f010a0cf8a5b3ad69670a78215da24ad11de5 Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Tue, 13 Dec 2022 11:47:19 +0800 Subject: [PATCH 04/31] fix the repeat source logical test and add the repeat physical planner support Signed-off-by: AilinKid <3148019@qq.com> --- .../RepeatSourceBlockInputStream.cpp | 5 + .../RepeatSourceBlockInputStream.h | 1 + dbms/src/Debug/MockExecutor/AstToPB.cpp | 2 +- .../Debug/MockExecutor/RepeatSourceBinder.cpp | 2 +- dbms/src/Flash/Coprocessor/DAGContext.cpp | 4 +- .../Coprocessor/DAGExpressionAnalyzer.cpp | 15 +- dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp | 2 +- .../Coprocessor/collectOutputFieldTypes.cpp | 20 +- dbms/src/Flash/Planner/PhysicalPlan.cpp | 7 +- dbms/src/Flash/Planner/PlanType.h | 1 + .../Flash/Planner/Plans/PhysicalRepeat.cpp | 116 ++++++ dbms/src/Flash/Planner/Plans/PhysicalRepeat.h | 57 +++ .../src/Flash/tests/gtest_repeat_executor.cpp | 387 ++++++++++++++++++ dbms/src/Interpreters/Repeat.cpp | 23 +- dbms/src/Interpreters/Repeat.h | 2 + dbms/src/TestUtils/mockExecutor.cpp | 2 +- 16 files changed, 630 insertions(+), 16 deletions(-) create mode 100644 dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp create mode 100644 dbms/src/Flash/Planner/Plans/PhysicalRepeat.h create mode 100644 dbms/src/Flash/tests/gtest_repeat_executor.cpp diff --git a/dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp b/dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp index f5075f9c87d..af727442b56 100644 --- a/dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp +++ b/dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp @@ -31,4 +31,9 @@ Block RepeatSourceBlockInputStream::getHeader() const return res; } +void RepeatSourceBlockInputStream::appendInfo(FmtBuffer & buffer) const { + buffer.fmtAppend(": grouping set "); + repeat_source_actions.get()->getActions()[0].repeat->getGroupingSetsDes(buffer); +} + } // namespace DB diff --git a/dbms/src/DataStreams/RepeatSourceBlockInputStream.h b/dbms/src/DataStreams/RepeatSourceBlockInputStream.h index eaa223ef824..d7f9f6db5cc 100644 --- a/dbms/src/DataStreams/RepeatSourceBlockInputStream.h +++ b/dbms/src/DataStreams/RepeatSourceBlockInputStream.h @@ -33,6 +33,7 @@ class RepeatSourceBlockInputStream : public IProfilingBlockInputStream } String getName() const override { return NAME; } Block getHeader() const override; + void appendInfo(FmtBuffer & buffer) const override; protected: Block readImpl() override; diff --git a/dbms/src/Debug/MockExecutor/AstToPB.cpp b/dbms/src/Debug/MockExecutor/AstToPB.cpp index fa58e2e3fc8..8977d8dc279 100644 --- a/dbms/src/Debug/MockExecutor/AstToPB.cpp +++ b/dbms/src/Debug/MockExecutor/AstToPB.cpp @@ -447,7 +447,7 @@ void identifierToPB(const DAGSchema & input, ASTIdentifier * id, tipb::Expr * ex *(expr->mutable_field_type()) = columnInfoToFieldType((*ft).second); expr->mutable_field_type()->set_collate(collator_id); WriteBufferFromOwnString ss; - encodeDAGInt64(ft - input.begin(), ss); + encodeDAGInt64(ft - input.begin(), ss); // 这个地方使用下面的 child input schema 的 offset,替换当前算子使用的 column ref expr->set_val(ss.releaseStr()); } diff --git a/dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp b/dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp index 7633c347282..928160c8996 100644 --- a/dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp +++ b/dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp @@ -55,7 +55,7 @@ ExecutorBinderPtr compileRepeat(ExecutorBinderPtr input, size_t & executor_index field_type.set_tp(TiDB::TypeLongLong); field_type.set_charset("binary"); field_type.set_collate(TiDB::ITiDBCollator::BINARY); - field_type.set_flag(0); + field_type.set_flag(TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull); // should have NOT NULL FLAG field_type.set_flen(-1); field_type.set_decimal(-1); output_schema.push_back(std::make_pair("groupingID", TiDB::fieldTypeToColumnInfo(field_type))); diff --git a/dbms/src/Flash/Coprocessor/DAGContext.cpp b/dbms/src/Flash/Coprocessor/DAGContext.cpp index b4a9f9ad515..66e64c11b64 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.cpp +++ b/dbms/src/Flash/Coprocessor/DAGContext.cpp @@ -129,10 +129,10 @@ DAGContext::DAGContext(const tipb::DAGRequest & dag_request_, String log_identif void DAGContext::initOutputInfo() { - output_field_types = collectOutputFieldTypes(*dag_request); + output_field_types = collectOutputFieldTypes(*dag_request); //那么 field types 对应的就是一个 fragment DAG 的 output schema's field types. output_offsets.clear(); result_field_types.clear(); - for (UInt32 i : dag_request->output_offsets()) + for (UInt32 i : dag_request->output_offsets()) // 这个地方应该是 fragment dag request 自带的 output offsets { output_offsets.push_back(i); if (unlikely(i >= output_field_types.size())) diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 6d0162ec09c..70e92acec51 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -810,6 +810,7 @@ std::shared_ptr DAGExpressionAnalyzer::buildRepeatGroupingColumns( const tipb::RepeatSource & repeatSource, const ExpressionActionsPtr & actions) { GroupingSets group_sets_columns; + std::map map_grouping_col; group_sets_columns.reserve(repeatSource.grouping_sets().size()); for (const auto& group_set : repeatSource.grouping_sets()){ GroupingSet group_set_columns; @@ -824,13 +825,22 @@ std::shared_ptr DAGExpressionAnalyzer::buildRepeatGroupingColumns( String cp_name = getActions(group_expr, actions); // tidb expression computation is based on column index offset child's chunk schema, change to ck block column name here. group_exprs_columns.emplace_back(cp_name); + map_grouping_col.insert(std::pair(cp_name, true)); } // move here, cause basic string is copied from input cols. group_set_columns.emplace_back(std::move(group_exprs_columns)); } group_sets_columns.emplace_back(std::move(group_set_columns)); } - return Repeat::sharedRepeat(group_sets_columns); + // change the original source column to be nullable, and add a new column for groupingID. + for (auto & mutable_one: source_columns) + { + if (map_grouping_col[mutable_one.name]) + mutable_one.type = makeNullable(mutable_one.type); + } + source_columns.emplace_back(Repeat::grouping_identifier_column_name, Repeat::grouping_identifier_column_type); + auto shared_repeat = Repeat::sharedRepeat(group_sets_columns); + return shared_repeat; } ExpressionActionsPtr DAGExpressionAnalyzer::appendRepeatSource( @@ -843,8 +853,7 @@ ExpressionActionsPtr DAGExpressionAnalyzer::appendRepeatSource( } auto shared_repeat = buildRepeatGroupingColumns(repeatSource, last_step.actions); last_step.actions->add(ExpressionAction::repeatSource(shared_repeat)); - // an added column from REPEAT action. - source_columns.emplace_back(Repeat::grouping_identifier_column_name, Repeat::grouping_identifier_column_type); + auto before_repeat_source = chain.getLastActions(); chain.finalize(); chain.clear(); diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp index 9a4a353eeb5..3fdd7cec1f1 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp @@ -157,7 +157,7 @@ DAGQueryBlock::DAGQueryBlock(const tipb::Executor & root_, QueryBlockIDGenerator else if (current->tp() == tipb::ExecType::TypeProjection) { GET_METRIC(tiflash_coprocessor_executor_count, type_projection).Increment(); - children.push_back(std::make_shared(source->projection().child(), id_generator)); + children.push_back(std::make_shared(source->projection().child(), id_generator)); // 将之后的算子重新算作 children } else if (current->tp() == tipb::ExecType::TypeTableScan) { diff --git a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp index b4d3079a9e2..5d15a4d3bc0 100644 --- a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp +++ b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -108,11 +109,20 @@ bool collectForRepeat(std::vector &out_field_types, const tipb: traverseExecutorTree(child, [&out_child_fields](const tipb::Executor & e) { return collectForExecutor(out_child_fields, e); }); }); -// executor.repeat_source().grouping_sets().Get(1).grouping_exprs().Get(1).grouping_expr().Get(1). -// /// the type of grouping set column is always nullable -// auto updated_field_type = field_type; -// updated_field_type.set_flag(updated_field_type.flag() & (~static_cast(TiDB::ColumnFlagNotNull))); -// output_field_types.push_back(updated_field_type); + // 对孩子的节点需要根据 grouping sets 的对应关系,给予 nullable 的处理 + for (const auto & grouping_set : executor.repeat_source().grouping_sets()){ + for (const auto & grouping_exprs : grouping_set.grouping_exprs()){ + for (const auto & grouping_col : grouping_exprs.grouping_expr()){ + // assert that: grouping_col must be the column ref guaranteed by tidb. + auto column_index = decodeDAGInt64(grouping_col.val()); + if (column_index < 0 || column_index >= static_cast(out_child_fields.size())) + { + throw TiFlashException("Column index out of bound", Errors::Coprocessor::BadRequest); + } + out_child_fields[column_index].set_flag(out_child_fields[column_index].flag() & (~TiDB::ColumnFlagNotNull)); + } + } + } { // for additional groupingID column. diff --git a/dbms/src/Flash/Planner/PhysicalPlan.cpp b/dbms/src/Flash/Planner/PhysicalPlan.cpp index 8aabeef3885..aafd805481a 100644 --- a/dbms/src/Flash/Planner/PhysicalPlan.cpp +++ b/dbms/src/Flash/Planner/PhysicalPlan.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -36,7 +37,6 @@ #include #include #include -#include #include namespace DB @@ -197,6 +197,11 @@ void PhysicalPlan::build(const String & executor_id, const tipb::Executor * exec pushBack(PhysicalJoin::build(context, executor_id, log, executor->join(), FineGrainedShuffle(executor), left, right)); break; } + case tipb::ExecType::TypeRepeatSource: + { + pushBack(PhysicalRepeat::build(context, executor_id, log, executor->repeat_source(), popBack())); + break; + } default: throw TiFlashException(fmt::format("{} executor is not supported", executor->tp()), Errors::Planner::Unimplemented); } diff --git a/dbms/src/Flash/Planner/PlanType.h b/dbms/src/Flash/Planner/PlanType.h index 8f347716b2d..4c4d6d283d5 100644 --- a/dbms/src/Flash/Planner/PlanType.h +++ b/dbms/src/Flash/Planner/PlanType.h @@ -37,6 +37,7 @@ struct PlanType MockTableScan = 12, Join = 13, GetResult = 14, + Repeat = 15, }; PlanTypeEnum enum_value; diff --git a/dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp b/dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp new file mode 100644 index 00000000000..adb2a774354 --- /dev/null +++ b/dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp @@ -0,0 +1,116 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +PhysicalPlanNodePtr PhysicalRepeat::build( + const Context & context, + const String & executor_id, + const LoggerPtr & log, + const tipb::RepeatSource & repeat_source, + const PhysicalPlanNodePtr & child) +{ + assert(child); + + child->finalize(); + + if (unlikely(repeat_source.grouping_sets().empty())) + { + //should not reach here + throw TiFlashException("Repeat executor without grouping sets", Errors::Planner::BadRequest); + } + + DAGExpressionAnalyzer analyzer{child->getSchema(), context}; + ExpressionActionsPtr before_repeat_actions = PhysicalPlanHelper::newActions(child->getSampleBlock(), context); + + + auto shared_repeat = analyzer.buildRepeatGroupingColumns(repeat_source, before_repeat_actions); + + // construct sample block. + NamesAndTypes repeat_output_columns; + auto child_header = child->getSchema(); + for (const auto & one : child_header) + { + repeat_output_columns.emplace_back(one.name, shared_repeat->isInGroupSetColumn(one.name)? makeNullable(one.type): one.type); + } + repeat_output_columns.emplace_back(shared_repeat->grouping_identifier_column_name, shared_repeat->grouping_identifier_column_type); + + auto physical_repeat = std::make_shared( + executor_id, + repeat_output_columns, + log->identifier(), + child, + shared_repeat, + Block(repeat_output_columns)); + + return physical_repeat; +} + + +void PhysicalRepeat::repeatTransform(DAGPipeline & child_pipeline, Context & context) +{ + auto repeat_actions = PhysicalPlanHelper::newActions(child_pipeline.firstStream()->getHeader(), context); + repeat_actions->add(ExpressionAction::repeatSource(shared_repeat)); + String repeat_extra_info = fmt::format("repeat source, repeat_executor_id = {}", execId()); + child_pipeline.transform([&](auto &stream) { + stream = std::make_shared(stream, repeat_actions); + stream->setExtraInfo(repeat_extra_info); + }); +} + +void PhysicalRepeat::transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) +{ + child->transform(pipeline, context, max_streams); + repeatTransform(pipeline, context); +} + +void PhysicalRepeat::finalize(const Names & parent_require) +{ + FinalizeHelper::checkSchemaContainsParentRequire(schema, parent_require); + Names required_output; + required_output.reserve( shared_repeat->getGroupSetNum()); // grouping set column should be existed in the child output schema. + auto name_set = std::set(); + shared_repeat->getAllGroupSetColumnNames(name_set); + // append parent_require column it may expect self-filled groupingID. + for (const auto & one : parent_require) + { + if (one != Repeat::grouping_identifier_column_name) + { + name_set.insert(one); + } + } + for (const auto & grouping_name: name_set) { + required_output.emplace_back(grouping_name); + } + child->finalize(required_output); +} + +const Block & PhysicalRepeat::getSampleBlock() const +{ + return sample_block; +} +} // namespace DB diff --git a/dbms/src/Flash/Planner/Plans/PhysicalRepeat.h b/dbms/src/Flash/Planner/Plans/PhysicalRepeat.h new file mode 100644 index 00000000000..5907c7c047e --- /dev/null +++ b/dbms/src/Flash/Planner/Plans/PhysicalRepeat.h @@ -0,0 +1,57 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +namespace DB +{ +class PhysicalRepeat : public PhysicalUnary +{ +public: + static PhysicalPlanNodePtr build( + const Context & context, + const String & executor_id, + const LoggerPtr & log, + const tipb::RepeatSource & repeat, + const PhysicalPlanNodePtr & child); + + PhysicalRepeat( + const String & executor_id_, + const NamesAndTypes & schema_, + const String & req_id, + const PhysicalPlanNodePtr & child_, + const std::shared_ptr & shared_repeat, + const Block & sample_block_) + : PhysicalUnary(executor_id_, PlanType::Repeat, schema_, req_id, child_) + , shared_repeat(shared_repeat), sample_block(sample_block_){} + + void finalize(const Names & parent_require) override; + + void repeatTransform(DAGPipeline & child_pipeline, Context & context); + + const Block & getSampleBlock() const override; + +private: + void transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) override; + std::shared_ptr shared_repeat; + Block sample_block; +}; +} // namespace DB + + diff --git a/dbms/src/Flash/tests/gtest_repeat_executor.cpp b/dbms/src/Flash/tests/gtest_repeat_executor.cpp new file mode 100644 index 00000000000..020e5f19d26 --- /dev/null +++ b/dbms/src/Flash/tests/gtest_repeat_executor.cpp @@ -0,0 +1,387 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +namespace tests +{ +class RepeatExecutorTestRunner : public DB::tests::ExecutorTest +{ +public: + void initializeContext() override + { + ExecutorTest::initializeContext(); + context.addMockTable({"test_db", "test_table"}, + {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}}, + {toNullableVec("s1", {"banana", {}, "banana"}), + toNullableVec("s2", {"apple", {}, "banana"})}); + context.addExchangeReceiver("exchange1", + {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}}, + {toNullableVec("s1", {"banana", {}, "banana"}), + toNullableVec("s2", {"apple", {}, "banana"})}); + } +}; + +TEST_F(RepeatExecutorTestRunner, RepeatLogical) +try +{ + /// case 1 + auto request = context + .scan("test_db", "test_table") + .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .build(context); + /// data flow: + /// + /// s1 s2 + /// "banana" "apple" + /// NULL NULL + /// "banana" "banana" + /// | + /// v + /// s1 s2 groupingID + /// "banana" NULL 1 + /// NULL "apple" 2 + /// NULL NULL 1 + /// NULL NULL 2 + /// "banana" NULL 1 + /// NULL "banana" 2 + /// + executeAndAssertColumnsEqual( + request, + {toNullableVec({"banana", {}, {}, {}, "banana", {}}), + toNullableVec({{}, "apple", {}, {}, {}, "banana"}), + toVec({1,2,1,2,1,2})}); + + /// case 2 + request = context + .scan("test_db", "test_table") + .filter(eq(col("s1"), col("s2"))) + .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .build(context); + /// data flow: + /// + /// s1 s2 + /// "banana" "apple" + /// NULL NULL + /// "banana" "banana" + /// | + /// v + /// s1 s2 + /// "banana" "banana" + /// | + /// v + /// s1 s2 groupingID + /// "banana" NULL 1 + /// NULL "banana" 2 + /// + executeAndAssertColumnsEqual( + request, + {toNullableVec({"banana", {}}), + toNullableVec({{}, "banana"}), + toVec({1,2})}); + + /// case 3: this case is only for non-planner mode. + /// request = context + /// .scan("test_db", "test_table") + /// .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + /// .filter(eq(col("s1"), col("s2"))) + /// .build(context); + /// data flow: TiFlash isn't aware of the operation sequence, this filter here will be run before repeat does just like the second test case above. + /// since this case is only succeed under planner-disabled mode, just comment and assert the result here for a note. + /// + /// executeAndAssertColumnsEqual( + /// request, + /// {toNullableVec({"banana", {}}), + /// toNullableVec({{}, "banana"}), + /// toVec({1,2})}); + + /// case 4 + auto const_false = lit(Field(static_cast(0))); + request = context + .scan("test_db", "test_table") + .filter(const_false) // refuse all rows + .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .build(context); + executeAndAssertColumnsEqual( + request, + {}); + + /// case 5 (test integrated with aggregation) + request = context + .scan("test_db", "test_table") + .aggregation({Count(col("s1"))}, {col("s2")}) + .build(context); + executeAndAssertColumnsEqual( + request, + {toVec({1, 0, 1}), + toNullableVec({"apple", {}, "banana"}),}); + + request = context + .scan("test_db", "test_table") + .aggregation({Count(col("s1"))}, {col("s2")}) + .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .build(context); + /// data flow: + /// + /// s1 s2 + /// "banana" "apple" + /// NULL NULL + /// "banana" "banana" + /// | + /// v + /// count(s1) s2 + /// 1 "apple" + /// 0 NULL + /// 1 "banana" + /// | + /// v + /// count(s1) s2 groupingID + /// 1 NULL 1 + /// NULL "apple" 2 + /// 0 NULL 1 + /// NULL NULL 2 + /// 1 NULL 1 + /// NULL "banana" 2 + /// + executeAndAssertColumnsEqual( + request, + {toNullableVec({1, {}, 0, {}, 1,{}}), + toNullableVec({{}, "apple", {},{},{}, "banana"}), + toVec({1,2,1,2,1,2})}); + + /// case 5 (test integrated with aggregation and projection) + request = context + .scan("test_db", "test_table") + .aggregation({Count(col("s1"))}, {col("s2")}) + .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .project({"count(s1)"}) + .build(context); + executeAndAssertColumnsEqual( + request, + {toNullableVec({1, {}, 0, {}, 1,{}})}); + + /// case 6 (test integrated with aggregation and projection and limit) 1 + /// note: by now, limit is executed before repeat does to reduce unnecessary row repeat work. + /// request = context + /// .scan("test_db", "test_table") + /// .aggregation({Count(col("s1"))}, {col("s2")}) + /// .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + /// .limit(2) + /// .project({"count(s1)"}) + /// .build(context); + /// data flow: + /// + /// s1 s2 + /// "banana" "apple" + /// NULL NULL + /// "banana" "banana" + /// | + /// v + /// count(s1) s2 + /// 1 "apple" + /// 0 NULL + /// 1 "banana" + /// | + /// v + /// count(s1) s2 // limit precede the repeat OP since they are in the same DAG query block. + /// 1 "apple" + /// 0 NULL + /// | + /// v + /// count(s1) s2 groupingID // repeat is always arranged executed after limit to avoid unnecessary replication in the same DAG query block. + /// 1 NULL 1 + /// NULL "apple" 2 + /// 0 NULL 1 + /// NULL NULL 2 + /// 1 NULL 1 + /// NULL "banana" 2 + /// | + /// v + /// count(s1) + /// 1 + /// NULL + /// 0 + /// NULL + /// + /// since this case is only succeed under planner-disabled mode, just comment and assert the result here for a note. + /// + /// executeAndAssertColumnsEqual( + /// request, + /// {toNullableVec({1, {}, 0, {}})}); + + /// case 7 (test integrated with aggregation and projection and limit) 2 + request = context + .scan("test_db", "test_table") + .aggregation({Count(col("s1"))}, {col("s2")}) + .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .project({"count(s1)"}) + .topN({{"count(s1)", true}}, 2) + .build(context); + /// data flow: + /// + /// s1 s2 ---------------+ + /// "banana" "apple" | + /// NULL NULL // table scan | + /// "banana" "banana" | + /// | | + /// v | + /// count(s1) s2 | + /// 1 "apple" // aggregate | + /// 0 NULL | + /// 1 "banana" | + /// | +-------------> Child DAG Query Block + /// v | + /// count(s1) s2 groupingID // repeat | + /// 1 NULL 1 | + /// NULL "apple" 2 | + /// 0 NULL 1 | + /// NULL NULL 2 | + /// 1 NULL 1 | + /// NULL "banana" 2 | + /// | --------------+ + /// v --------------+ + /// count(s1) | + /// 1 | + /// NULL // projection | + /// 0 | + /// NULL | + /// 1 +-------------> parent DAG Query Block + /// NULL | + /// | | + /// v | + /// count(s1) // sort (desc) | + /// 1 | + /// 1 | + /// 0 | + /// NULL | + /// NULL | + /// NULL | + /// | | + /// v | + /// count(s1) // limit 2 | + /// 1 | + /// 1 | + /// ---------------+ + /// + /// Note: you can see some difference from this plan and the last one above, since projection between repeat and topN is a SOURCE node, + /// it will isolate whole DAG into two independent DAG query blocks, limit and repeat OP take a place in each one of them. So we + /// couldn't guarantee that letting repeat OP run after limit does, which can't reduce unnecessary replication work. DAG query block + /// division should be blamed here. + /// + executeAndAssertColumnsEqual( + request, + {toNullableVec({1, 1})}); + + /// case 8 (test integrated with receiver and join) + request = context + .receive("exchange1") + .join(context.scan("test_db", "test_table").project({"s2"}), tipb::JoinType::TypeInnerJoin, {col("s2")}) + .build(context); + executeAndAssertColumnsEqual( + request, + {toNullableVec({"banana", "banana"}), + toNullableVec({"apple", "banana"}), + toNullableVec({"apple", "banana"})}); + + request = context + .receive("exchange1") + .aggregation({Count(col("s1"))}, {col("s2")}) + .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .join(context.scan("test_db", "test_table").project({"s2"}), tipb::JoinType::TypeInnerJoin, {col("s2")}) + .project({"count(s1)", "groupingID"}) + .topN({{"groupingID", true}}, 2) + .build(context); + /// data flow: + /// + /// s1 s2 ---------------+ + /// "banana" "apple" | + /// NULL NULL // table scan | + /// "banana" "banana" | + /// | | + /// v | + /// count(s1) s2 | + /// 1 "apple" // aggregate | + /// 0 NULL | + /// 1 "banana" | + /// | +-------------> Child of Child DAG Query Block + /// v | + /// count(s1) s2 groupingID // repeat | + /// 1 NULL 1 | + /// NULL "apple" 2 | + /// 0 NULL 1 | + /// NULL NULL 2 | + /// 1 NULL 1 | + /// NULL "banana" 2 | + /// | --------------+ + /// v --------------+ + /// count(s1) s2 groupingID * s2 | + /// NULL "apple" 2 "apple" // join | + /// NULL "banana" 2 NULL | + /// "banana" +-------------> Child DAG Query Block + /// | + /// NULL "apple" 2 "apple" | + /// NULL "banana" 2 "banana" | + /// | ---------------+ + /// v | + /// count(s1) groupingID // projection | + /// NULL 2 | + /// NULL 2 | + /// | +-------------> Parent DAG Query Block + /// v | + /// count(s1) groupingID // topN | + /// NULL 2 | + /// NULL 2 | + /// ---------------+ + /// + executeAndAssertColumnsEqual( + request, + {toNullableVec({{}, {}}), + toVec({2,2}),}); + + + /// assert the input stream plan format. (under planner-enabled mode) + String expected = R"( +CreatingSets + Union: + HashJoinBuild x 10: , join_kind = Inner + Expression: + Expression: + Expression: + MockTableScan + Union: + Expression x 10: + SharedQuery: + MergeSorting, limit = 2 + Union: + PartialSorting x 10: limit = 2 + Expression: + Expression: + HashJoinProbe: + Expression: + RepeatSource: : grouping set [<{count(s1)_collator_46 }><{any(s2)_collator_46 }>] + Expression: + SharedQuery: + ParallelAggregating, max_threads: 10, final: true + MockExchangeReceiver x 10)"; + ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); +} +CATCH + +/// TODO: more OP combination tests. + +} // namespace tests +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Interpreters/Repeat.cpp b/dbms/src/Interpreters/Repeat.cpp index ff626f36042..16be88e81c0 100644 --- a/dbms/src/Interpreters/Repeat.cpp +++ b/dbms/src/Interpreters/Repeat.cpp @@ -35,7 +35,28 @@ void convertColumnToNullable(ColumnWithTypeAndName & column) Repeat::Repeat(const DB::GroupingSets & gss) : group_sets_names(gss){} - +void Repeat::getGroupingSetsDes(FmtBuffer & buffer) const +{ + buffer.fmtAppend("["); + for (const auto & grouping_set: group_sets_names) + { + buffer.fmtAppend("<"); + for (const auto & grouping_exprs: grouping_set) + { + buffer.fmtAppend("{{"); + for ( size_t i = 0; i < grouping_exprs.size(); i++) + { + if (i != 0) { + buffer.fmtAppend(","); + } + buffer.fmtAppend(grouping_exprs.at(i)); + } + buffer.fmtAppend("}}"); + } + buffer.fmtAppend(">"); + } + buffer.fmtAppend("]"); +} /// for cases like: select count(distinct a), count(distinct b) from t; /// it will generate 2 group set with and , over which we should diff --git a/dbms/src/Interpreters/Repeat.h b/dbms/src/Interpreters/Repeat.h index 7c2c05ab85d..442050f55e7 100644 --- a/dbms/src/Interpreters/Repeat.h +++ b/dbms/src/Interpreters/Repeat.h @@ -129,6 +129,8 @@ class Repeat static std::shared_ptr sharedRepeat(const GroupingSets & groupingSets); + void getGroupingSetsDes(FmtBuffer & buffer) const; + static const String grouping_identifier_column_name; static const DataTypePtr grouping_identifier_column_type; diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp index 1220e873a32..e3b02019f15 100644 --- a/dbms/src/TestUtils/mockExecutor.cpp +++ b/dbms/src/TestUtils/mockExecutor.cpp @@ -96,7 +96,7 @@ void DAGRequestBuilder::initDAGRequest(tipb::DAGRequest & dag_request) else dag_request.set_encode_type(tipb::EncodeType::TypeDefault); - for (size_t i = 0; i < root->output_schema.size(); ++i) + for (size_t i = 0; i < root->output_schema.size(); ++i) // 根据 root 算子的 output schema 设置加 mock dag request 而 output offsets dag_request.add_output_offsets(i); } From 4bb8fb92914a983dab2cb49c251332cf0a094365 Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Mon, 26 Dec 2022 11:44:35 +0800 Subject: [PATCH 05/31] change the name from repeat source to expand and rebase master Signed-off-by: AilinKid <3148019@qq.com> --- dbms/src/Common/TiFlashMetrics.h | 1 + ...tStream.cpp => ExpandBlockInputStream.cpp} | 15 +-- ...InputStream.h => ExpandBlockInputStream.h} | 12 +- dbms/src/DataStreams/SquashingTransform.cpp | 8 +- ...epeatSourceBinder.cpp => ExpandBinder.cpp} | 19 +-- .../{RepeatSourceBinder.h => ExpandBinder.h} | 6 +- dbms/src/Debug/dbgFuncCoprocessor.h | 1 + .../Coprocessor/DAGExpressionAnalyzer.cpp | 28 ++--- .../Flash/Coprocessor/DAGExpressionAnalyzer.h | 4 +- dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp | 12 +- dbms/src/Flash/Coprocessor/DAGQueryBlock.h | 4 +- .../Coprocessor/DAGQueryBlockInterpreter.cpp | 22 ++-- .../Coprocessor/DAGQueryBlockInterpreter.h | 2 +- .../Coprocessor/collectOutputFieldTypes.cpp | 6 +- dbms/src/Flash/Planner/PhysicalPlan.cpp | 4 +- .../Flash/Planner/Plans/PhysicalExpand.cpp | 116 ++++++++++++++++++ dbms/src/Flash/Planner/Plans/PhysicalExpand.h | 57 +++++++++ .../Flash/Planner/Plans/PhysicalRepeat.cpp | 34 ++--- dbms/src/Flash/Planner/Plans/PhysicalRepeat.h | 12 +- .../src/Flash/Statistics/CommonExecutorImpl.h | 13 ++ .../ExecutorStatisticsCollector.cpp | 3 +- .../Flash/Statistics/traverseExecutors.cpp | 4 +- .../Interpreters/{Repeat.cpp => Expand.cpp} | 29 ++--- dbms/src/Interpreters/{Repeat.h => Expand.h} | 42 +++---- dbms/src/Interpreters/ExpressionActions.cpp | 16 +-- dbms/src/Interpreters/ExpressionActions.h | 14 ++- .../Interpreters/tests/gtest_block_repeat.cpp | 2 +- dbms/src/TestUtils/executorSerializer.cpp | 14 +-- 28 files changed, 346 insertions(+), 154 deletions(-) rename dbms/src/DataStreams/{RepeatSourceBlockInputStream.cpp => ExpandBlockInputStream.cpp} (67%) rename dbms/src/DataStreams/{RepeatSourceBlockInputStream.h => ExpandBlockInputStream.h} (77%) rename dbms/src/Debug/MockExecutor/{RepeatSourceBinder.cpp => ExpandBinder.cpp} (77%) rename dbms/src/Debug/MockExecutor/{RepeatSourceBinder.h => ExpandBinder.h} (86%) create mode 100644 dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp create mode 100644 dbms/src/Flash/Planner/Plans/PhysicalExpand.h rename dbms/src/Interpreters/{Repeat.cpp => Expand.cpp} (93%) rename dbms/src/Interpreters/{Repeat.h => Expand.h} (82%) diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h index bf0ec4a9f65..9f3d5f064c8 100644 --- a/dbms/src/Common/TiFlashMetrics.h +++ b/dbms/src/Common/TiFlashMetrics.h @@ -278,6 +278,7 @@ namespace DB F(type_mpp, {{"type", "mpp"}, ComputeLabelHolder::instance().getClusterIdLabel(), ComputeLabelHolder::instance().getProcessIdLabel()}), \ F(type_cop, {{"type", "cop"}, ComputeLabelHolder::instance().getClusterIdLabel(), ComputeLabelHolder::instance().getProcessIdLabel()}), \ F(type_batch, {{"type", "batch"}, ComputeLabelHolder::instance().getClusterIdLabel(), ComputeLabelHolder::instance().getProcessIdLabel()})) \ + // clang-format on /// Buckets with boundaries [start * base^0, start * base^1, ..., start * base^(size-1)] diff --git a/dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp b/dbms/src/DataStreams/ExpandBlockInputStream.cpp similarity index 67% rename from dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp rename to dbms/src/DataStreams/ExpandBlockInputStream.cpp index af727442b56..2f502c3f708 100644 --- a/dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp +++ b/dbms/src/DataStreams/ExpandBlockInputStream.cpp @@ -12,28 +12,29 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include + namespace DB { -Block RepeatSourceBlockInputStream::readImpl() +Block ExpandBlockInputStream::readImpl() { Block block = children.back()->read(); if (!block) return block; - repeat_source_actions->execute(block); + expand_actions->execute(block); return block; } -Block RepeatSourceBlockInputStream::getHeader() const +Block ExpandBlockInputStream::getHeader() const { Block res = children.back()->getHeader(); - repeat_source_actions->execute(res); + expand_actions->execute(res); return res; } -void RepeatSourceBlockInputStream::appendInfo(FmtBuffer & buffer) const { +void ExpandBlockInputStream::appendInfo(FmtBuffer & buffer) const { buffer.fmtAppend(": grouping set "); - repeat_source_actions.get()->getActions()[0].repeat->getGroupingSetsDes(buffer); + expand_actions.get()->getActions()[0].expand->getGroupingSetsDes(buffer); } } // namespace DB diff --git a/dbms/src/DataStreams/RepeatSourceBlockInputStream.h b/dbms/src/DataStreams/ExpandBlockInputStream.h similarity index 77% rename from dbms/src/DataStreams/RepeatSourceBlockInputStream.h rename to dbms/src/DataStreams/ExpandBlockInputStream.h index d7f9f6db5cc..e502a8c8e1f 100644 --- a/dbms/src/DataStreams/RepeatSourceBlockInputStream.h +++ b/dbms/src/DataStreams/ExpandBlockInputStream.h @@ -19,15 +19,15 @@ namespace DB { -class RepeatSourceBlockInputStream : public IProfilingBlockInputStream +class ExpandBlockInputStream : public IProfilingBlockInputStream { - static constexpr auto NAME = "RepeatSource"; + static constexpr auto NAME = "Expand"; public: - RepeatSourceBlockInputStream( + ExpandBlockInputStream( const BlockInputStreamPtr & input, - ExpressionActionsPtr repeat_source_actions_) - : repeat_source_actions(repeat_source_actions_) + ExpressionActionsPtr expand_actions_) + : expand_actions(expand_actions_) { children.push_back(input); } @@ -39,7 +39,7 @@ class RepeatSourceBlockInputStream : public IProfilingBlockInputStream Block readImpl() override; private: - ExpressionActionsPtr repeat_source_actions; + ExpressionActionsPtr expand_actions; }; } // namespace DB diff --git a/dbms/src/DataStreams/SquashingTransform.cpp b/dbms/src/DataStreams/SquashingTransform.cpp index 1488b688d27..391cd710c8d 100644 --- a/dbms/src/DataStreams/SquashingTransform.cpp +++ b/dbms/src/DataStreams/SquashingTransform.cpp @@ -93,16 +93,10 @@ void SquashingTransform::append(Block && block) { MutableColumnPtr mutable_column = (*std::move(accumulated_block.getByPosition(i).column)).mutate(); mutable_column->insertRangeFrom(*block.getByPosition(i).column, 0, rows); - accumulated_block.getByPosition(i).column = std::move(mutable_column); // column 中的 append 值操作 + accumulated_block.getByPosition(i).column = std::move(mutable_column); } } -// 我们可能需要用一个高效的复制行操作,repeatSource 算子首先是 append additional column,然后对于原来的 block 的数据进行 -// 多重 n 复制,每重复制上,修改 block 中特定非 target 列的其他 grouping set column 为 null 值,并且设置 grouping ID -// 列为常量 n. -// -// sample_block - bool SquashingTransform::isEnoughSize(size_t rows, size_t bytes) const { return (!min_block_size_rows && !min_block_size_bytes) diff --git a/dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp b/dbms/src/Debug/MockExecutor/ExpandBinder.cpp similarity index 77% rename from dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp rename to dbms/src/Debug/MockExecutor/ExpandBinder.cpp index 928160c8996..9d07a0c58f4 100644 --- a/dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp +++ b/dbms/src/Debug/MockExecutor/ExpandBinder.cpp @@ -12,19 +12,20 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include +#include namespace DB::mock { -bool RepeatSourceBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) +bool ExpandBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) { - tipb_executor->set_tp(tipb::ExecType::TypeRepeatSource); + tipb_executor->set_tp(tipb::ExecType::TypeExpand); tipb_executor->set_executor_id(name); - tipb::RepeatSource * repeat_source = tipb_executor->mutable_repeat_source(); + tipb::Expand * expand = tipb_executor->mutable_expand(); for (const auto & grouping_set : grouping_sets_columns) { - auto * gss = repeat_source->add_grouping_sets(); + auto * gss = expand->add_grouping_sets(); for (const auto & grouping_exprs : grouping_set) { auto * ges = gss->add_grouping_exprs(); @@ -35,7 +36,7 @@ bool RepeatSourceBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t } } } - auto * children_executor = repeat_source->mutable_child(); + auto * children_executor = expand->mutable_child(); return children[0]->toTiPBExecutor(children_executor, collator_id, mpp_info, context); } @@ -60,8 +61,8 @@ ExecutorBinderPtr compileRepeat(ExecutorBinderPtr input, size_t & executor_index field_type.set_decimal(-1); output_schema.push_back(std::make_pair("groupingID", TiDB::fieldTypeToColumnInfo(field_type))); } - ExecutorBinderPtr repeat_source = std::make_shared(executor_index, output_schema, std::move(grouping_set_columns)); - repeat_source->children.push_back(input); - return repeat_source; + ExecutorBinderPtr expand = std::make_shared(executor_index, output_schema, std::move(grouping_set_columns)); + expand->children.push_back(input); + return expand; } } // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/RepeatSourceBinder.h b/dbms/src/Debug/MockExecutor/ExpandBinder.h similarity index 86% rename from dbms/src/Debug/MockExecutor/RepeatSourceBinder.h rename to dbms/src/Debug/MockExecutor/ExpandBinder.h index ace010048b2..752046a4d80 100644 --- a/dbms/src/Debug/MockExecutor/RepeatSourceBinder.h +++ b/dbms/src/Debug/MockExecutor/ExpandBinder.h @@ -21,11 +21,11 @@ using MockGroupingNameVec = std::vector; using MockVecGroupingNameVec = std::vector; using MockVVecGroupingNameVec = std::vector; -class RepeatSourceBinder : public ExecutorBinder +class ExpandBinder : public ExecutorBinder { public: - RepeatSourceBinder(size_t & index_, const DAGSchema & output_schema_, MockVVecGroupingNameVec gss) - : ExecutorBinder(index_, "repeat_source_" + std::to_string(index_), output_schema_) + ExpandBinder(size_t & index_, const DAGSchema & output_schema_, MockVVecGroupingNameVec gss) + : ExecutorBinder(index_, "expand_" + std::to_string(index_), output_schema_) , grouping_sets_columns(gss) {} diff --git a/dbms/src/Debug/dbgFuncCoprocessor.h b/dbms/src/Debug/dbgFuncCoprocessor.h index 9a21842fa50..f1b95139f62 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.h +++ b/dbms/src/Debug/dbgFuncCoprocessor.h @@ -15,6 +15,7 @@ #pragma once #include + namespace DB { class Context; diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 70e92acec51..fe74369bf7a 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -32,10 +32,10 @@ #include #include #include +#include #include #include #include -#include #include #include #include @@ -806,13 +806,13 @@ NamesAndTypes DAGExpressionAnalyzer::buildOrderColumns( return order_columns; } -std::shared_ptr DAGExpressionAnalyzer::buildRepeatGroupingColumns( - const tipb::RepeatSource & repeatSource, const ExpressionActionsPtr & actions) +std::shared_ptr DAGExpressionAnalyzer::buildExpandGroupingColumns( + const tipb::Expand & expand, const ExpressionActionsPtr & actions) { GroupingSets group_sets_columns; std::map map_grouping_col; - group_sets_columns.reserve(repeatSource.grouping_sets().size()); - for (const auto& group_set : repeatSource.grouping_sets()){ + group_sets_columns.reserve(expand.grouping_sets().size()); + for (const auto& group_set : expand.grouping_sets()){ GroupingSet group_set_columns; group_set_columns.reserve(group_set.grouping_exprs().size()); for (const auto &group_exprs : group_set.grouping_exprs()) { @@ -838,30 +838,30 @@ std::shared_ptr DAGExpressionAnalyzer::buildRepeatGroupingColumns( if (map_grouping_col[mutable_one.name]) mutable_one.type = makeNullable(mutable_one.type); } - source_columns.emplace_back(Repeat::grouping_identifier_column_name, Repeat::grouping_identifier_column_type); - auto shared_repeat = Repeat::sharedRepeat(group_sets_columns); - return shared_repeat; + source_columns.emplace_back(Expand::grouping_identifier_column_name, Expand::grouping_identifier_column_type); + auto shared_expand = Expand::sharedExpand(group_sets_columns); + return shared_expand; } -ExpressionActionsPtr DAGExpressionAnalyzer::appendRepeatSource( - const tipb::RepeatSource & repeatSource, ExpressionActionsChain & chain) +ExpressionActionsPtr DAGExpressionAnalyzer::appendExpand( + const tipb::Expand & expand, ExpressionActionsChain & chain) { auto & last_step = initAndGetLastStep(chain); for (const auto &origin_col : last_step.actions->getSampleBlock().getNamesAndTypesList()) { last_step.required_output.push_back(origin_col.name); } - auto shared_repeat = buildRepeatGroupingColumns(repeatSource, last_step.actions); - last_step.actions->add(ExpressionAction::repeatSource(shared_repeat)); + auto shared_expand = buildExpandGroupingColumns(expand, last_step.actions); + last_step.actions->add(ExpressionAction::expandSource(shared_expand)); - auto before_repeat_source = chain.getLastActions(); + auto before_expand = chain.getLastActions(); chain.finalize(); chain.clear(); auto & after_repeat_step = initAndGetLastStep(chain); for (const auto & column : getCurrentInputColumns()) after_repeat_step.required_output.push_back(column.name); - return before_repeat_source; + return before_expand; } std::vector DAGExpressionAnalyzer::appendOrderBy( diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h index f1012df4646..7436841034a 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h @@ -71,9 +71,9 @@ class DAGExpressionAnalyzer : private boost::noncopyable ExpressionActionsChain & chain, const std::vector & conditions); - std::shared_ptr buildRepeatGroupingColumns(const tipb::RepeatSource & repeatSource, const ExpressionActionsPtr & actions); + std::shared_ptr buildExpandGroupingColumns(const tipb::Expand & expand, const ExpressionActionsPtr & actions); - ExpressionActionsPtr appendRepeatSource(const tipb::RepeatSource & repeatSource, ExpressionActionsChain & chain); + ExpressionActionsPtr appendExpand(const tipb::Expand & expand, ExpressionActionsChain & chain); NamesAndTypes buildWindowOrderColumns(const tipb::Sort & window_sort) const; diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp index 3fdd7cec1f1..2f5a28347cd 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp @@ -46,7 +46,7 @@ bool isSourceNode(const tipb::Executor * root) const static String SOURCE_NAME("source"); const static String SEL_NAME("selection"); const static String AGG_NAME("aggregation"); -const static String REPEAT_NAME("repeat_source"); +const static String EXPAND_NAME("expand"); const static String WINDOW_NAME("window"); const static String WINDOW_SORT_NAME("window_sort"); const static String HAVING_NAME("having"); @@ -97,11 +97,11 @@ DAGQueryBlock::DAGQueryBlock(const tipb::Executor & root_, QueryBlockIDGenerator } current = ¤t->selection().child(); break; - case tipb::ExecType::TypeRepeatSource: - GET_METRIC(tiflash_coprocessor_executor_count, type_repeat_source).Increment(); - assignOrThrowException(&repeat_source, current, REPEAT_NAME); - repeat_source_name = current->executor_id(); - current = ¤t->repeat_source().child(); // 非叶节点,继续孩子递归下去 + case tipb::ExecType::TypeExpand: + GET_METRIC(tiflash_coprocessor_executor_count, type_expand).Increment(); + assignOrThrowException(&expand, current, EXPAND_NAME); + expand_name = current->executor_id(); + current = ¤t->expand().child(); // 非叶节点,继续孩子递归下去 break; case tipb::ExecType::TypeStreamAgg: RUNTIME_CHECK_MSG(current->aggregation().group_by_size() == 0, STREAM_AGG_ERROR); diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlock.h b/dbms/src/Flash/Coprocessor/DAGQueryBlock.h index d18ac84fd90..86cd14c09df 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlock.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlock.h @@ -60,8 +60,8 @@ class DAGQueryBlock String having_name; const tipb::Executor * limit_or_topn = nullptr; String limit_or_topn_name; - const tipb::Executor * repeat_source = nullptr; // repeat source node can only be before sender - String repeat_source_name; + const tipb::Executor * expand = nullptr; // expand node can only be before sender + String expand_name; const tipb::Executor * exchange_sender = nullptr; String exchange_sender_name; UInt32 id; diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 3e4cb641f97..9889536c48c 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +30,8 @@ #include #include #include +#include +#include #include #include #include @@ -43,8 +46,9 @@ #include #include #include +#include +#include #include -#include #include #include @@ -76,7 +80,7 @@ struct AnalysisResult ExpressionActionsPtr before_having; ExpressionActionsPtr before_order_and_select; ExpressionActionsPtr final_projection; - ExpressionActionsPtr before_repeat_source; + ExpressionActionsPtr before_expand; String filter_column_name; String having_column_name; @@ -136,8 +140,8 @@ AnalysisResult analyzeExpressions( chain.addStep(); } - if (query_block.repeat_source) { - res.before_repeat_source = analyzer.appendRepeatSource(query_block.repeat_source->repeat_source(), chain); + if (query_block.expand) { + res.before_expand = analyzer.appendExpand(query_block.expand->expand(), chain); } const auto & dag_context = *context.getDAGContext(); @@ -697,10 +701,10 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) // execute the repeat source OP after all filter/limits and so on. // since repeat source OP has some row replication work to do, place it after limit can reduce some unnecessary burden. // and put it before the final projection, because we should recognize some base col as grouping set col before change their alias. - if (res.before_repeat_source) + if (res.before_expand) { - executeRepeatSource(pipeline, res.before_repeat_source); - recordProfileStreams(pipeline, query_block.repeat_source_name); + executeExpandSource(pipeline, res.before_expand); + recordProfileStreams(pipeline, query_block.expand_name); } // execute final project action @@ -746,10 +750,10 @@ void DAGQueryBlockInterpreter::executeLimit(DAGPipeline & pipeline) } } -void DAGQueryBlockInterpreter::executeRepeatSource(DAGPipeline & pipeline, const ExpressionActionsPtr & expr) +void DAGQueryBlockInterpreter::executeExpandSource(DAGPipeline & pipeline, const ExpressionActionsPtr & expr) { pipeline.transform([&](auto &stream) { - stream = std::make_shared(stream, expr); + stream = std::make_shared(stream, expr); }); } diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h index 8b4746bb6a2..eae5aa34cec 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h @@ -69,7 +69,7 @@ class DAGQueryBlockInterpreter void executeWindowOrder(DAGPipeline & pipeline, SortDescription sort_desc, bool enable_fine_grained_shuffle); void executeOrder(DAGPipeline & pipeline, const NamesAndTypes & order_columns); void executeLimit(DAGPipeline & pipeline); - void executeRepeatSource(DAGPipeline & pipeline, const ExpressionActionsPtr & expr); + void executeExpandSource(DAGPipeline & pipeline, const ExpressionActionsPtr & expr); void executeWindow( DAGPipeline & pipeline, WindowDescription & window_description, diff --git a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp index 5d15a4d3bc0..923afd56914 100644 --- a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp +++ b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp @@ -109,8 +109,8 @@ bool collectForRepeat(std::vector &out_field_types, const tipb: traverseExecutorTree(child, [&out_child_fields](const tipb::Executor & e) { return collectForExecutor(out_child_fields, e); }); }); - // 对孩子的节点需要根据 grouping sets 的对应关系,给予 nullable 的处理 - for (const auto & grouping_set : executor.repeat_source().grouping_sets()){ + // make the columns from grouping sets nullable. + for (const auto & grouping_set : executor.expand().grouping_sets()){ for (const auto & grouping_exprs : grouping_set.grouping_exprs()){ for (const auto & grouping_col : grouping_exprs.grouping_expr()){ // assert that: grouping_col must be the column ref guaranteed by tidb. @@ -230,7 +230,7 @@ bool collectForExecutor(std::vector & output_field_types, const return collectForTableScan(output_field_types, executor.partition_table_scan()); case tipb::ExecType::TypeJoin: return collectForJoin(output_field_types, executor); - case tipb::ExecType::TypeRepeatSource: + case tipb::ExecType::TypeExpand: return collectForRepeat(output_field_types, executor); default: return true; diff --git a/dbms/src/Flash/Planner/PhysicalPlan.cpp b/dbms/src/Flash/Planner/PhysicalPlan.cpp index aafd805481a..42729693433 100644 --- a/dbms/src/Flash/Planner/PhysicalPlan.cpp +++ b/dbms/src/Flash/Planner/PhysicalPlan.cpp @@ -197,9 +197,9 @@ void PhysicalPlan::build(const String & executor_id, const tipb::Executor * exec pushBack(PhysicalJoin::build(context, executor_id, log, executor->join(), FineGrainedShuffle(executor), left, right)); break; } - case tipb::ExecType::TypeRepeatSource: + case tipb::ExecType::TypeExpand: { - pushBack(PhysicalRepeat::build(context, executor_id, log, executor->repeat_source(), popBack())); + pushBack(PhysicalExpand::build(context, executor_id, log, executor->expand(), popBack())); break; } default: diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp new file mode 100644 index 00000000000..51eaaeaa4c3 --- /dev/null +++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp @@ -0,0 +1,116 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +PhysicalPlanNodePtr PhysicalExpand::build( + const Context & context, + const String & executor_id, + const LoggerPtr & log, + const tipb::Expand & expand, + const PhysicalPlanNodePtr & child) +{ + assert(child); + + child->finalize(); + + if (unlikely(expand.grouping_sets().empty())) + { + //should not reach here + throw TiFlashException("Repeat executor without grouping sets", Errors::Planner::BadRequest); + } + + DAGExpressionAnalyzer analyzer{child->getSchema(), context}; + ExpressionActionsPtr before_repeat_actions = PhysicalPlanHelper::newActions(child->getSampleBlock(), context); + + + auto shared_repeat = analyzer.buildExpandGroupingColumns(expand, before_repeat_actions); + + // construct sample block. + NamesAndTypes repeat_output_columns; + auto child_header = child->getSchema(); + for (const auto & one : child_header) + { + repeat_output_columns.emplace_back(one.name, shared_repeat->isInGroupSetColumn(one.name)? makeNullable(one.type): one.type); + } + repeat_output_columns.emplace_back(shared_repeat->grouping_identifier_column_name, shared_repeat->grouping_identifier_column_type); + + auto physical_repeat = std::make_shared( + executor_id, + repeat_output_columns, + log->identifier(), + child, + shared_repeat, + Block(repeat_output_columns)); + + return physical_repeat; +} + + +void PhysicalExpand::repeatTransform(DAGPipeline & child_pipeline, Context & context) +{ + auto repeat_actions = PhysicalPlanHelper::newActions(child_pipeline.firstStream()->getHeader(), context); + repeat_actions->add(ExpressionAction::expandSource(shared_expand)); + String repeat_extra_info = fmt::format("repeat source, repeat_executor_id = {}", execId()); + child_pipeline.transform([&](auto &stream) { + stream = std::make_shared(stream, repeat_actions); + stream->setExtraInfo(repeat_extra_info); + }); +} + +void PhysicalExpand::transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) +{ + child->transform(pipeline, context, max_streams); + repeatTransform(pipeline, context); +} + +void PhysicalExpand::finalize(const Names & parent_require) +{ + FinalizeHelper::checkSchemaContainsParentRequire(schema, parent_require); + Names required_output; + required_output.reserve( shared_expand->getGroupSetNum()); // grouping set column should be existed in the child output schema. + auto name_set = std::set(); + shared_expand->getAllGroupSetColumnNames(name_set); + // append parent_require column it may expect self-filled groupingID. + for (const auto & one : parent_require) + { + if (one != Expand::grouping_identifier_column_name) + { + name_set.insert(one); + } + } + for (const auto & grouping_name: name_set) { + required_output.emplace_back(grouping_name); + } + child->finalize(required_output); +} + +const Block & PhysicalExpand::getSampleBlock() const +{ + return sample_block; +} +} // namespace DB diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h new file mode 100644 index 00000000000..a2696affb5b --- /dev/null +++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h @@ -0,0 +1,57 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +namespace DB +{ +class PhysicalExpand : public PhysicalUnary +{ +public: + static PhysicalPlanNodePtr build( + const Context & context, + const String & executor_id, + const LoggerPtr & log, + const tipb::Expand & expand, + const PhysicalPlanNodePtr & child); + + PhysicalExpand( + const String & executor_id_, + const NamesAndTypes & schema_, + const String & req_id, + const PhysicalPlanNodePtr & child_, + const std::shared_ptr & shared_expand, + const Block & sample_block_) + : PhysicalUnary(executor_id_, PlanType::Repeat, schema_, req_id, child_) + , shared_expand(shared_expand), sample_block(sample_block_){} + + void finalize(const Names & parent_require) override; + + void repeatTransform(DAGPipeline & child_pipeline, Context & context); + + const Block & getSampleBlock() const override; + +private: + void transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) override; + std::shared_ptr shared_expand; + Block sample_block; +}; +} // namespace DB + + diff --git a/dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp b/dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp index adb2a774354..51eaaeaa4c3 100644 --- a/dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp +++ b/dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp @@ -15,30 +15,30 @@ #include #include #include +#include +#include #include #include #include #include -#include +#include #include #include -#include -#include namespace DB { -PhysicalPlanNodePtr PhysicalRepeat::build( +PhysicalPlanNodePtr PhysicalExpand::build( const Context & context, const String & executor_id, const LoggerPtr & log, - const tipb::RepeatSource & repeat_source, + const tipb::Expand & expand, const PhysicalPlanNodePtr & child) { assert(child); child->finalize(); - if (unlikely(repeat_source.grouping_sets().empty())) + if (unlikely(expand.grouping_sets().empty())) { //should not reach here throw TiFlashException("Repeat executor without grouping sets", Errors::Planner::BadRequest); @@ -48,7 +48,7 @@ PhysicalPlanNodePtr PhysicalRepeat::build( ExpressionActionsPtr before_repeat_actions = PhysicalPlanHelper::newActions(child->getSampleBlock(), context); - auto shared_repeat = analyzer.buildRepeatGroupingColumns(repeat_source, before_repeat_actions); + auto shared_repeat = analyzer.buildExpandGroupingColumns(expand, before_repeat_actions); // construct sample block. NamesAndTypes repeat_output_columns; @@ -59,7 +59,7 @@ PhysicalPlanNodePtr PhysicalRepeat::build( } repeat_output_columns.emplace_back(shared_repeat->grouping_identifier_column_name, shared_repeat->grouping_identifier_column_type); - auto physical_repeat = std::make_shared( + auto physical_repeat = std::make_shared( executor_id, repeat_output_columns, log->identifier(), @@ -71,34 +71,34 @@ PhysicalPlanNodePtr PhysicalRepeat::build( } -void PhysicalRepeat::repeatTransform(DAGPipeline & child_pipeline, Context & context) +void PhysicalExpand::repeatTransform(DAGPipeline & child_pipeline, Context & context) { auto repeat_actions = PhysicalPlanHelper::newActions(child_pipeline.firstStream()->getHeader(), context); - repeat_actions->add(ExpressionAction::repeatSource(shared_repeat)); + repeat_actions->add(ExpressionAction::expandSource(shared_expand)); String repeat_extra_info = fmt::format("repeat source, repeat_executor_id = {}", execId()); child_pipeline.transform([&](auto &stream) { - stream = std::make_shared(stream, repeat_actions); + stream = std::make_shared(stream, repeat_actions); stream->setExtraInfo(repeat_extra_info); }); } -void PhysicalRepeat::transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) +void PhysicalExpand::transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) { child->transform(pipeline, context, max_streams); repeatTransform(pipeline, context); } -void PhysicalRepeat::finalize(const Names & parent_require) +void PhysicalExpand::finalize(const Names & parent_require) { FinalizeHelper::checkSchemaContainsParentRequire(schema, parent_require); Names required_output; - required_output.reserve( shared_repeat->getGroupSetNum()); // grouping set column should be existed in the child output schema. + required_output.reserve( shared_expand->getGroupSetNum()); // grouping set column should be existed in the child output schema. auto name_set = std::set(); - shared_repeat->getAllGroupSetColumnNames(name_set); + shared_expand->getAllGroupSetColumnNames(name_set); // append parent_require column it may expect self-filled groupingID. for (const auto & one : parent_require) { - if (one != Repeat::grouping_identifier_column_name) + if (one != Expand::grouping_identifier_column_name) { name_set.insert(one); } @@ -109,7 +109,7 @@ void PhysicalRepeat::finalize(const Names & parent_require) child->finalize(required_output); } -const Block & PhysicalRepeat::getSampleBlock() const +const Block & PhysicalExpand::getSampleBlock() const { return sample_block; } diff --git a/dbms/src/Flash/Planner/Plans/PhysicalRepeat.h b/dbms/src/Flash/Planner/Plans/PhysicalRepeat.h index 5907c7c047e..a2696affb5b 100644 --- a/dbms/src/Flash/Planner/Plans/PhysicalRepeat.h +++ b/dbms/src/Flash/Planner/Plans/PhysicalRepeat.h @@ -21,25 +21,25 @@ namespace DB { -class PhysicalRepeat : public PhysicalUnary +class PhysicalExpand : public PhysicalUnary { public: static PhysicalPlanNodePtr build( const Context & context, const String & executor_id, const LoggerPtr & log, - const tipb::RepeatSource & repeat, + const tipb::Expand & expand, const PhysicalPlanNodePtr & child); - PhysicalRepeat( + PhysicalExpand( const String & executor_id_, const NamesAndTypes & schema_, const String & req_id, const PhysicalPlanNodePtr & child_, - const std::shared_ptr & shared_repeat, + const std::shared_ptr & shared_expand, const Block & sample_block_) : PhysicalUnary(executor_id_, PlanType::Repeat, schema_, req_id, child_) - , shared_repeat(shared_repeat), sample_block(sample_block_){} + , shared_expand(shared_expand), sample_block(sample_block_){} void finalize(const Names & parent_require) override; @@ -49,7 +49,7 @@ class PhysicalRepeat : public PhysicalUnary private: void transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) override; - std::shared_ptr shared_repeat; + std::shared_ptr shared_expand; Block sample_block; }; } // namespace DB diff --git a/dbms/src/Flash/Statistics/CommonExecutorImpl.h b/dbms/src/Flash/Statistics/CommonExecutorImpl.h index 404fd1acbd6..56d55ea415f 100644 --- a/dbms/src/Flash/Statistics/CommonExecutorImpl.h +++ b/dbms/src/Flash/Statistics/CommonExecutorImpl.h @@ -58,6 +58,19 @@ struct SortImpl }; using SortStatistics = ExecutorStatistics; +struct ExpandImpl +{ + static constexpr bool has_extra_info = false; + + static constexpr auto type = "Expand"; + + static bool isMatch(const tipb::Executor *executor) + { + return executor->has_expand(); + } +}; +using ExpandStatistics = ExecutorStatistics; + struct FilterImpl { static constexpr bool has_extra_info = false; diff --git a/dbms/src/Flash/Statistics/ExecutorStatisticsCollector.cpp b/dbms/src/Flash/Statistics/ExecutorStatisticsCollector.cpp index 44a72e11381..321599d9050 100644 --- a/dbms/src/Flash/Statistics/ExecutorStatisticsCollector.cpp +++ b/dbms/src/Flash/Statistics/ExecutorStatisticsCollector.cpp @@ -64,7 +64,8 @@ void ExecutorStatisticsCollector::initialize(DAGContext * dag_context_) SortStatistics, TableScanStatistics, TopNStatistics, - WindowStatistics>(executor_id, &executor)) + WindowStatistics, + ExpandStatistics>(executor_id, &executor)) { throw TiFlashException( fmt::format("Unknown executor type, executor_id: {}", executor_id), diff --git a/dbms/src/Flash/Statistics/traverseExecutors.cpp b/dbms/src/Flash/Statistics/traverseExecutors.cpp index 801002a10a8..94abeef3b01 100644 --- a/dbms/src/Flash/Statistics/traverseExecutors.cpp +++ b/dbms/src/Flash/Statistics/traverseExecutors.cpp @@ -41,8 +41,8 @@ Children getChildren(const tipb::Executor & executor) return Children{&executor.topn().child()}; case tipb::ExecType::TypeLimit: return Children{&executor.limit().child()}; - case tipb::ExecType::TypeRepeatSource: - return Children{&executor.repeat_source().child()}; + case tipb::ExecType::TypeExpand: + return Children{&executor.expand().child()}; case tipb::ExecType::TypeProjection: return Children{&executor.projection().child()}; case tipb::ExecType::TypeExchangeSender: diff --git a/dbms/src/Interpreters/Repeat.cpp b/dbms/src/Interpreters/Expand.cpp similarity index 93% rename from dbms/src/Interpreters/Repeat.cpp rename to dbms/src/Interpreters/Expand.cpp index 16be88e81c0..7ddbd8c975d 100644 --- a/dbms/src/Interpreters/Repeat.cpp +++ b/dbms/src/Interpreters/Expand.cpp @@ -12,13 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include -#include #include -#include "DataTypes/DataTypesNumber.h" +#include +#include #include +#include "DataTypes/DataTypesNumber.h" + namespace DB { @@ -32,10 +33,10 @@ void convertColumnToNullable(ColumnWithTypeAndName & column) } } -Repeat::Repeat(const DB::GroupingSets & gss) +Expand::Expand(const DB::GroupingSets & gss) : group_sets_names(gss){} -void Repeat::getGroupingSetsDes(FmtBuffer & buffer) const +void Expand::getGroupingSetsDes(FmtBuffer & buffer) const { buffer.fmtAppend("["); for (const auto & grouping_set: group_sets_names) @@ -78,7 +79,7 @@ void Repeat::getGroupingSetsDes(FmtBuffer & buffer) const /// \param input the source block /// \return -void Repeat::replicateAndFillNull(Block & block) const +void Expand::replicateAndFillNull(Block & block) const { size_t origin_rows = block.rows(); // make a replicate slice, using it to replicate origin rows. @@ -119,7 +120,7 @@ void Repeat::replicateAndFillNull(Block & block) const // replicate the original block rows. size_t existing_columns = block.columns(); - if (offsets_to_replicate) + if (offsets_to_replicate && offsets_to_replicate->size() > 0) { for (size_t i = 0; i < existing_columns; ++i) { @@ -202,7 +203,7 @@ void Repeat::replicateAndFillNull(Block & block) const // return input from block. } -bool Repeat::isInGroupSetColumn(String name) const{ +bool Expand::isInGroupSetColumn(String name) const{ for(const auto& it1 : group_sets_names) { // for every grouping set. @@ -220,13 +221,13 @@ bool Repeat::isInGroupSetColumn(String name) const{ return false; } -const GroupingColumnNames& Repeat::getGroupSetColumnNamesByOffset(size_t offset) const +const GroupingColumnNames& Expand::getGroupSetColumnNamesByOffset(size_t offset) const { /// currently, there only can be one groupingExprs in one groupingSet before the planner supporting the grouping set merge. return group_sets_names[offset][0]; } -void Repeat::getAllGroupSetColumnNames(std::set& name_set) const +void Expand::getAllGroupSetColumnNames(std::set& name_set) const { for(const auto& it1 : group_sets_names) { @@ -242,11 +243,11 @@ void Repeat::getAllGroupSetColumnNames(std::set& name_set) const } } -std::shared_ptr Repeat::sharedRepeat(const GroupingSets & groupingSets) +std::shared_ptr Expand::sharedExpand(const GroupingSets & groupingSets) { - return std::make_shared(groupingSets); + return std::make_shared(groupingSets); } -const std::string Repeat::grouping_identifier_column_name = "groupingID"; -const DataTypePtr Repeat::grouping_identifier_column_type = std::make_shared(); +const std::string Expand::grouping_identifier_column_name = "groupingID"; +const DataTypePtr Expand::grouping_identifier_column_type = std::make_shared(); } diff --git a/dbms/src/Interpreters/Repeat.h b/dbms/src/Interpreters/Expand.h similarity index 82% rename from dbms/src/Interpreters/Repeat.h rename to dbms/src/Interpreters/Expand.h index 442050f55e7..d567e58e311 100644 --- a/dbms/src/Interpreters/Repeat.h +++ b/dbms/src/Interpreters/Expand.h @@ -43,7 +43,7 @@ namespace DB /// we still got 2 grouping sets like: {[, ], []} /// /// the second case in which the group layout has been merged with the prefix -/// common group layout into unified one set to reduce the underlying data replication/repeat cost. +/// common group layout into unified one set to reduce the underlying data replication/expand cost. /// using GroupingColumnName = ::String; using GroupingColumnNames = std::vector; @@ -52,19 +52,19 @@ using GroupingSets = std::vector; -/** Data structure for implementation of Repeat. +/** Data structure for implementation of Expand. * - * Repeat is a kind of operator used for replicate low-layer datasource rows to feed different aggregate + * Expand is a kind of operator used for replicate low-layer datasource rows to feed different aggregate * grouping-layout requirement. (Basically known as grouping sets) * * For current scenario, it is applied to accelerate the computation of multi distinct aggregates by utilizing * multi nodes computing resource in a way of scheming 3-phase aggregation under mpp mode. * - * GroupingSets descriptions are all needed by Repeat operator itself, the length of GroupingSets are the needed - * repeat number (in other words, one grouping set require one replica of source rows). Since different grouping + * GroupingSets descriptions are all needed by Expand operator itself, the length of GroupingSets are the needed + * expand number (in other words, one grouping set require one replica of source rows). Since different grouping * set column shouldn't let its targeted rows affected by other grouping set columns (which will also be appear in * the group by items) when do grouping work, we should isolate different grouping set columns by filling them with - * null values when repeating rows. + * null values when expanding rows. * * Here is an example: * Say we got a query like this: select count(distinct a), count(distinct b) from t. @@ -75,46 +75,46 @@ using GroupingSets = std::vector; * Different group layouts are doomed to be unable to be feed with same replica of data in shuffling mode Except * gathering them all to the single node. While the latter one is usually accompanied by a single point of bottleneck. * - * That's why data repeat happens here. Say we got two tuple as below: + * That's why data expand happens here. Say we got two tuple as below: * - * ==> after repeat we got + * ==> after expand we got * 1 1 origin row 1 1 - * 1 2 repeat row 1 1 + * 1 2 expand row 1 1 * origin row 1 2 - * repeat row 1 2 + * expand row 1 2 * - * See what we got now above, although we have already repeated/doubled the origin rows, while when grouping them together + * See what we got now above, although we have already expanded/doubled the origin rows, while when grouping them together * with GROUP BY(a,b) clause (resulting 2 group (1,1),(1,2) here), we found that we still can not get the right answer for * count distinct agg for a. * - * From the theory, every origin/repeated row should be targeted for one group out requirement, which means row<1> and row<3> + * From the theory, every origin/expanded row should be targeted for one group out requirement, which means row<1> and row<3> * about should be used to feed count(distinct a), while since the value of b in row<3> is different from that from row<1>, * that leads them being divided into different group. * * Come back to the origin goal to feed count(distinct a), in which we don't even care about what is was in column b from row<1> * and row<3>, because current agg args is aimed at column a. Therefore, we filled every non-targeted grouping set column in - * repeated row as null value. After that we got as below: + * expanded row as null value. After that we got as below: * - * ==> after repeat we got + * ==> after expand we got * 1 1 origin row 1 null ---> target for grouping set a - * 1 2 repeat row null 1 ---> target for grouping set b + * 1 2 expand row null 1 ---> target for grouping set b * origin row 1 null ---> target for grouping set a - * repeat row null 2 ---> target for grouping set b + * expand row null 2 ---> target for grouping set b * * Then, when grouping them together with GROUP BY(a,b) clause, we got row<1> and row<3> together, and row<2>, row<4> as a * self-group individually. Among them, every distinct agg has their self-targeted data grouped correctly. GROUP BY(a,b) clause * is finally seen/taken as a equivalent group to GROUP BY(a, null) for a-targeted rows, GROUP BY(null, b) for b-targeted rows. * * Over the correct grouped data, the result computation for distinct agg is quite reasonable. By the way, if origin row has some - * column that isn't belong to any grouping set, just let it be copied as it was in repeated row. + * column that isn't belong to any grouping set, just let it be copied as it was in expanded row. * */ -class Repeat +class Expand { public: - explicit Repeat(const GroupingSets & gss); + explicit Expand(const GroupingSets & gss); - // replicateAndFillNull is the basic functionality that Repeat Operator provided. Briefly, it replicates + // replicateAndFillNull is the basic functionality that Expand Operator provided. Briefly, it replicates // origin rows with regard to local grouping sets description, and appending a new column named as groupingID // to illustrate what group this row is targeted for. void replicateAndFillNull(Block & input) const; @@ -127,7 +127,7 @@ class Repeat void getAllGroupSetColumnNames(std::set& name_set) const; - static std::shared_ptr sharedRepeat(const GroupingSets & groupingSets); + static std::shared_ptr sharedExpand(const GroupingSets & groupingSets); void getGroupingSetsDes(FmtBuffer & buffer) const; diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index e25ae02bf88..7b89ed431c3 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -136,11 +136,11 @@ ExpressionAction ExpressionAction::ordinaryJoin(std::shared_ptr join return a; } -ExpressionAction ExpressionAction::repeatSource(std::shared_ptr repeat_source_) +ExpressionAction ExpressionAction::expandSource(std::shared_ptr expand_) { ExpressionAction a; - a.type = REPEAT; - a.repeat = repeat_source_; + a.type = EXPAND; + a.expand = expand_; return a; } @@ -239,11 +239,11 @@ void ExpressionAction::prepare(Block & sample_block) // 这个是 prepare 阶 break; } - case REPEAT: + case EXPAND: { // sample_block is just for schema check followed by later block, modify it if your schema has changed during this action. auto name_set = std::set(); - repeat->getAllGroupSetColumnNames(name_set); + expand->getAllGroupSetColumnNames(name_set); // make grouping set column to be nullable. for (const auto & col_name: name_set) { auto & column_with_name = sample_block.getByName(col_name); @@ -252,7 +252,7 @@ void ExpressionAction::prepare(Block & sample_block) // 这个是 prepare 阶 column_with_name.column = makeNullable(column_with_name.column); } // fill one more column: groupingID. - sample_block.insert({nullptr, repeat->grouping_identifier_column_type, repeat->grouping_identifier_column_name}); + sample_block.insert({nullptr, expand->grouping_identifier_column_type, expand->grouping_identifier_column_name}); break; } @@ -341,9 +341,9 @@ void ExpressionAction::execute(Block & block) const // 执行阶段 break; } - case REPEAT: + case EXPAND: { - repeat->replicateAndFillNull(block); // repeat 的执行阶段直接 fill block 了 + expand->replicateAndFillNull(block); // repeat 的执行阶段直接 fill block 了 break; } diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index 0a9b9bd99fa..e9d98de2106 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include #include @@ -34,7 +36,7 @@ using NameWithAlias = std::pair; using NamesWithAliases = std::vector; class Join; -class Repeat; +class Expand; class IFunctionBase; using FunctionBasePtr = std::shared_ptr; @@ -67,7 +69,7 @@ struct ExpressionAction /// Reorder and rename the columns, delete the extra ones. The same column names are allowed in the result. PROJECT, - REPEAT, + EXPAND, }; Type type; @@ -93,9 +95,9 @@ struct ExpressionAction /// For PROJECT. NamesWithAliases projections; - /// For REPEAT_SOURCE. - std::shared_ptr repeat; - NamesAndTypesList columns_added_by_repeat; + /// For EXPAND. + std::shared_ptr expand; + NamesAndTypesList columns_added_by_expand; /// If result_name_ == "", as name "function_name(arguments separated by commas) is used". static ExpressionAction applyFunction( @@ -110,7 +112,7 @@ struct ExpressionAction static ExpressionAction project(const NamesWithAliases & projected_columns_); static ExpressionAction project(const Names & projected_columns_); static ExpressionAction ordinaryJoin(std::shared_ptr join_, const NamesAndTypesList & columns_added_by_join_); - static ExpressionAction repeatSource(std::shared_ptr repeat_source_); + static ExpressionAction expandSource(std::shared_ptr expand_); /// Which columns necessary to perform this action. Names getNeededColumns() const; diff --git a/dbms/src/Interpreters/tests/gtest_block_repeat.cpp b/dbms/src/Interpreters/tests/gtest_block_repeat.cpp index 21074ff7a95..88c6286898e 100644 --- a/dbms/src/Interpreters/tests/gtest_block_repeat.cpp +++ b/dbms/src/Interpreters/tests/gtest_block_repeat.cpp @@ -14,9 +14,9 @@ #include #include +#include #include #include -#include namespace DB { diff --git a/dbms/src/TestUtils/executorSerializer.cpp b/dbms/src/TestUtils/executorSerializer.cpp index 33304e298c0..46f0d3b03a1 100644 --- a/dbms/src/TestUtils/executorSerializer.cpp +++ b/dbms/src/TestUtils/executorSerializer.cpp @@ -160,10 +160,10 @@ void serializeTopN(const String & executor_id, const tipb::TopN & top_n, FmtBuff buf.fmtAppend("}}, limit: {}\n", top_n.limit()); } -void serializeRepeatSource(const String & executor_id, const tipb::RepeatSource & repeat, FmtBuffer & buf) +void serializeExpandSource(const String & executor_id, const tipb::Expand & expand, FmtBuffer & buf) { - buf.fmtAppend("{} | repeat_source_by: [", executor_id); - for (const auto & grouping_set : repeat.grouping_sets()) + buf.fmtAppend("{} | expanded_by: [", executor_id); + for (const auto & grouping_set : expand.grouping_sets()) { buf.fmtAppend("<"); for (const auto & grouping_exprs : grouping_set.grouping_exprs()) @@ -306,8 +306,8 @@ void ExecutorSerializer::serializeListStruct(const tipb::DAGRequest * dag_reques case tipb::ExecType::TypeLimit: serializeLimit("Limit", executor.limit(), buf); break; - case tipb::ExecType::TypeRepeatSource: - serializeRepeatSource("Repeat", executor.repeat_source(), buf); + case tipb::ExecType::TypeExpand: + serializeExpandSource("Repeat", executor.expand(), buf); break; default: throw TiFlashException("Should not reach here", Errors::Coprocessor::Internal); @@ -366,8 +366,8 @@ void ExecutorSerializer::serializeTreeStruct(const tipb::Executor & root_executo case tipb::ExecType::TypeWindow: serializeWindow(executor.executor_id(), executor.window(), buf); break; - case tipb::ExecType::TypeRepeatSource: - serializeRepeatSource(executor.executor_id(), executor.repeat_source(), buf); + case tipb::ExecType::TypeExpand: + serializeExpandSource(executor.executor_id(), executor.expand(), buf); break; default: throw TiFlashException("Should not reach here", Errors::Coprocessor::Internal); From 697e8650b5542b66df13d2c8102fd9f4802f6281 Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Mon, 26 Dec 2022 12:05:24 +0800 Subject: [PATCH 06/31] remove useless file Signed-off-by: AilinKid <3148019@qq.com> --- tai.cpp | 64 --------------------------------------------------------- tai.h | 14 ------------- 2 files changed, 78 deletions(-) delete mode 100644 tai.cpp delete mode 100644 tai.h diff --git a/tai.cpp b/tai.cpp deleted file mode 100644 index 59b2935c6a4..00000000000 --- a/tai.cpp +++ /dev/null @@ -1,64 +0,0 @@ -// -// Created by arenatlx on 2022/10/27. -// - -#include -#include -#include "tai.h" - - - -class HAHA { -public: - template - HAHA & operator=(std::vector && rhs){ - return rhs[0]; - } -private: - int a; -}; - - -class MY { - using Arra1y = std::vector; -public: - HAHA operator[](size_t n) const; -}; - -HAHA MY::operator[](size_t n) const{ - Arra1y a(n); - return a[0]; -} - -struct Test{ - ~Test(){ - std::cout<<"kill test"<(); - auto tmp = new(Test); // 这个地方直接 Test() 还是会析构一次,有点奇怪,只有 new 才行。 - vec.push_back(std::move(*tmp)); - std::cout< v; - v.push_back("aaaa"); - v.push_back("bbbb"); - v[0][0]='1'; - v.push_back(std::move(v[0])); - std::cout< Date: Mon, 26 Dec 2022 12:15:21 +0800 Subject: [PATCH 07/31] remove debug log Signed-off-by: AilinKid <3148019@qq.com> --- dbms/src/Debug/dbgFuncCoprocessor.h | 1 - .../Coprocessor/DAGExpressionAnalyzer.cpp | 1 - dbms/src/TestUtils/FunctionTestUtils.cpp | 29 ------------------- 3 files changed, 31 deletions(-) diff --git a/dbms/src/Debug/dbgFuncCoprocessor.h b/dbms/src/Debug/dbgFuncCoprocessor.h index f1b95139f62..9a21842fa50 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.h +++ b/dbms/src/Debug/dbgFuncCoprocessor.h @@ -15,7 +15,6 @@ #pragma once #include - namespace DB { class Context; diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index fe74369bf7a..e9a10539378 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -669,7 +669,6 @@ String DAGExpressionAnalyzer::applyFunction( const TiDB::TiDBCollatorPtr & collator) { String result_name = genFuncString(func_name, arg_names, {collator}); - // 啊这个好!可以避免相同表达式的重复计算 if (actions->getSampleBlock().has(result_name)) return result_name; const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get(func_name, context); diff --git a/dbms/src/TestUtils/FunctionTestUtils.cpp b/dbms/src/TestUtils/FunctionTestUtils.cpp index b616c441bf4..a679bbd9885 100644 --- a/dbms/src/TestUtils/FunctionTestUtils.cpp +++ b/dbms/src/TestUtils/FunctionTestUtils.cpp @@ -193,7 +193,6 @@ std::multiset columnsToRowSet(const ColumnsWithTypeAndName & cols) { for (size_t i = 0, size = col.column->size(); i < size; ++i) { - new (rows[i].place(col_id)) Field((*col.column)[i]); } } @@ -221,39 +220,12 @@ ::testing::AssertionResult columnsEqual( ASSERT_EQUAL(expect_col.column->size(), actual_col.column->size(), fmt::format("Column {} size mismatch", i)); auto type_eq = dataTypeEqual(expected[i].type, actual[i].type); if (!type_eq) - { - std::cout << "type equal false" << std::endl; return type_eq; - } } auto const expected_row_set = columnsToRowSet(expected); auto const actual_row_set = columnsToRowSet(actual); - { - auto expect_it = expected_row_set.begin(); - auto actual_it = actual_row_set.begin(); - FmtBuffer buf1; - FmtBuffer buf2; - for (; expect_it != expected_row_set.end(); ++expect_it, ++actual_it) - { - buf1.joinStr( - expect_it->begin(), - expect_it->end(), - [](const auto & v, FmtBuffer & fb) { fb.append(v.toString()); }, - " ") - .append("\n"); - buf2.joinStr( - actual_it->begin(), - actual_it->end(), - [](const auto & v, FmtBuffer & fb) { fb.append(v.toString()); }, - " ") - .append("\n"); - } - auto res1 = buf1.toString(); - auto res2 = buf2.toString(); - } - if (expected_row_set != actual_row_set) { FmtBuffer buf; @@ -287,7 +259,6 @@ ::testing::AssertionResult columnsEqual( .append("\n"); } buf.append("...\n"); - std::cout< Date: Mon, 26 Dec 2022 12:16:23 +0800 Subject: [PATCH 08/31] . Signed-off-by: AilinKid <3148019@qq.com> --- a.out | Bin 85490 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100755 a.out diff --git a/a.out b/a.out deleted file mode 100755 index 8aed9644943b125062ea04d62dd9d638ddaf2013..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 85490 zcmeHw4SZC^)%V=p1QrD4BMK@X0ipznkc1Etq`H^{69^C?B44tEWFgUfnJgcQ)hJZy zqgK*VODla0NNuoaeXVVwueP*7iW zeK$WYbN~0w$C)!{&YU?jclOOsKlp5b5Qe~?1J@8-bx}fmsb&m45|`u3#^v$k&AcgV zY1V@I^wjz%6I&kzAj;<*MD%#_vli!DU(570)@OooG#;B5#qfB1-VHvwQ-AjS_Fb(i zGb2yECrtJ7|3#TOt0x{$wQudJYX56{e&^Py{LXMC3G4Z}I6v@E{&_rW3JX2n4TauH zpT}3Q%AQ~229@6t&V;a@pPvwaeudS(s*>_Ger)C6^BYxumHeKt{%6nI<5^q2zNEa! zQ&PUV0uk->#onXxYjmny2;1|}^QU&*F7wHpnLpE$F@N4d)w(q*AK}JJ`m2%u3lmfJ zbP-zz;R4?(v}Oeko?i{O1LemgBDD~Ga7~bR>c2L*&CQuNCo40@l(_>;7ee{R;YW`u zNZFf3(32EUPV|WbLNxXhVhZEN*MvxmQoI?C*Bjmxq5=tzyodS}3ULvhu?UJ8cup7f zt$7H?APn_a{8u5C1bKNep2gxKIh(w;x@vN1$*Rdkca;_a)8Cc2h>qWVJM)#pznc1w zeXEz1y&U)Uh-5sA#f4(=ht$pi@?P=oQ^z%UD(ffT3lWd{Q9fJ8^8*g3+(caTPRhfu z7<5acjTX>TIC0{nq)8yEWR;{dO^fd=8XXXS;a$}}Z`q_9ISFD5f$l2l8UBV4D?v21 zDbYv#rN5E52sh3)#1LHo=w;+D!hr|}A{>ZtAi{wN2O=DZa3I2g2nQk@h;Sgnfd~g8 z9Eflr!hr|}A{>ZtAi{wN2O=DZa3I2g2nQk@h;Sgnfd~g89Efn>--ZJhGK{0G8>9BL zI*r;o=ji60n?~VU(cG9JY8(5CmVrWSYwYW8F+|O=VYqK{if0JB4EO%}hSB@5UuIif zgJ)ms#>_p)tM-7?^Y(=Xfjk|1S~rQ>x=o{-Ph2Eg>Na_rDZdjJxm%7KD$mx9H|{}P zl_oksso=Zk)O20?L$e zGop{4uNbfJ1e8}g`TKqvw8@1nJt}nImEx5=$~GCA~uBP}t9Kw@kQyMYAyaJv`H>SunpZ>o(+Lj1)uv3A!m?qMyR{@}>9G z)(;_%06o+YLKh;wD+=_W{#pG{Hqj(Vul9TYfDrkT-=Kx`+SVN;85*Fe9*_c{?3|X6TaC036e{HJ7olE+Yoh{GcZV`AL*ad(4|e6 zi2V;DFRDB7taW3?o(oQ86UaQ!ZXXX8+sZCn_=d&q01cAI$YTf6*AYE5W}@Epp!aFQ zNGlA{@=fzyA>P4OdD46rjs7%K)KY)i%5kWjPi(?7j3FoQ6Sb6qBnwdZEf7$Gt$mbI1aLR8K!kvga7$MP3&rUmC&-9A|O@c0W%O1RU!ImNV zMo>J^Ci#cB7?HxX4F=zk7v0yxHac_R z!Z2!Q8jqVsi~Uq~G1By*w#K~|_pKXe!JgCF8)NW#@b1IUZ0!vmo4h4H?LpmK&`s1% zW4P_=xP39cZEHe4kj?jFP1%GT`cPgY!k^*2tOM?eFT`u&@dfbm1nMQn5!Abi>!;*W zvsE)X=v(6i*$!XDKKQ_KfYV~UKIZSM4S{;=BBt~yWjds% zbm&)1gO-MAF!d?M|2F*}M7B>to=rVUIsp68{W5^P@sg(_xE$og<+b%=)Kl8&mT@Ks z-DVlo7d}RR{~<2fH&A!#6Bhm4L7rr%Q9dNML3yyrY7?0HUB;C>*5?I5=+DsRK#)b^ zIX>6eR)wn`w2?fK-n<{!ti#SirGHisk3gU7H=t2Kj&=Q__Zf;qcIhbe3CvRh<}%+O z6!_e1R~p~Gjj}9bl`Rj_DFQk$O6n1<&LQHVPg(P`@xj!Eboj@)F-_Sa?KsP^(ka;+qUS|9QQfYihKRES#YJiZV`umb$*; zD9xcUrlm?5!E@plK6!~T?OrJtc3l9P8^J%~HR%tE>w;{NEk|}7>Gh`ZPhNpBaY);o zSIM>z&0qEFqabxh*_Jv5>(fG<{c5PXQD3Asq-B0r~;uD?|Kd3D&ZK&~>;s{60v00QiXrM816w;b} z97FT?t?rhw2ycvq&4PHrY?T}FT&6*Netn79=EA&-_$ljc1b8X+IVs{q|;qmyL274h5C6BH*L83?AM!b*qwk^kTX z*c93!c;sm3wAy9V(vk}O*M(u9t2evXzxuo#{{jNj)QGX$OU*&5ZBXoU3 zww2U57^`I%4ZgviHSb9ek&bt{(5|qtWuM`?5=~8@2R^F(em$h|f#$t5*Nu})Yud`RQrD(F2L4~b6rRHPm`YqOE1ZoybK7X9%w@}c~x&bobI?_+vzt&0 z{wta_-rL&MtY1LgZ1IS%hh*7S84c$D1?^Iv-#d?XlKn+XW3y>UHNt%dK2|O>l(;qh-(?YGL-C)T*78d7i1%5m}@9bq#p=*ar)PLb~#5i zKZW&--OiEC-}!h%nv?;I-`Ar*sXn22_*as3@(sY&+o`pAJ#3Z-QTO^yxB2y_T>k*w ze?}VVw}vlx57t9ShCkr6gzNE~p4G#*jn9x{euj|hOuh_L7uRpP0{VI>;)xF*WxY&& zjn=7l;rXvnhI}tv@2@nJuj3Mv_Dw|l70o}EbSS?A(Yut}N|i4@{B<9dZYSv3_3`nv z#zLc(bYTP1k-aN<0$Ii9)P8@%F2vnS?;)>5Gx;g@aD7al%LiyPT^<{qMs3qL(egD) z2R$RxL1&Sli^?F{;vtVsH;DbPb)@VyPJ~XLFZL5nb0G(?V`47gJ*A~**?87;gL{85 zKC`E1u%lxfxURw_+XTFnJPyT2yeB^11)A)9d>QnU3^X>tAB2AFMI6$fCathX{W1?f z$uaW1neT`eWyjBjj60=_H%@f7d<4I1>8Ft8hQP5zx1DJ%hsFVm{iDgY=;v;;FKqpS zWS{gnl|{CQQS07+2KTXs(g8G17*RC~Zbli>CLAG##@Dib!8;e?<0JW@mVO0Y;`gJX zov44Kf5~#HZS8FOF3HbBETADKuJtlqY z{C@uq>V|SHbwk;0J|!)!O)kj3wENj^lJY@zFK9u%G8ogGM9H|##|C~9&3?VD)s^&8 z@A|`n%&D~UX^txS0y@a{J;3vOq6z0($nh6^iXlw%a#R1QdTM+_TieGn^4++WyVG69 zT^}{|HT#QZ&jm?CGziG7U5@N|Q`_O}kN|nJ#|yAcGlK9V2pwUi3nD9|M@UzZ-qdX_ zs+XU4kOeuOOW$dmtOU~^eIVa<{Xx7BMIYrWF4bg>>Weu5{1E#wSJ;oa%YKX>cbNLi zlu76tz`tt_ZXb<-KWa>oyJacjv?n7cMvHXf(}~ds(xna@sdS*g z*wAh|*3GV1@CvjWM!#zw?Vv0whsOUleWJ;@jh|*;`)$jGZ~Pt?jmPHrBKPEgm%NvOht0lm2-NY2RpX?Ln(guj|!Kf%=Yk zX3k%yQ2o%)EwUCw7Rdfugt+?rbpf?C&p}C-ES6ZCm-4)d=12Z`;L*SS1$hE*&ABN0 zxtzaVug625%_Izd2A#hm%{6#$l^sndfL_vVw3cdL@4JfS(OmB4B$gX$M%&eGnnIA zWo8rUnP~VkYCZ&hoK1pRjXYOMf zsk{?N+l2VCe&a;Tt)N%d8_z#Myw;63VgF8>Z;JG0(|F87@g6>$wz|S6)anx3)>B=( z+TIPJ^@?s+#G_m_Mr!&cgiK320DO>R57`5#Kj~SMe`FtACF`l#6Jf}r*-yv#Y0+#q z)Zd)Hl3t`T<(f%*>nP;k)Ynl$@Pf*H2l8T>S8MbJU02a{D#l^j-%WBsdU6NajC@xl zJE!HCOa5ivD}=IkQ2h{Qkvu`RXx)I~%|@S^(C_tckx%IKSINBw>Y9z`#Z;o+EVsskvtf{j{r-(hI{!8_rV`;}bok z*Oc9<>4G42#5iUfGpu}&Z2;cWe8b8EH7;TP)9w8fdL4*2a*qh;r@dfx*dwwTJTu2% zqKC$IjFau}5ow^YkM{?1-!^^u0rqwQWDI>h#~fe4bE*H~)7=HXeH^tb^Z@Mc7iF8E zkEP=o`PdT>63x(^G3jRdr;s1UgY~rL#%-tbN?8D}Xm5Ww@e1wnI?z9) zdneNvPJIULpGWHy2wh{qPv|%Q7{z_6ZC>7ay`;~BX%C*+Pl?|az0lA$_L{Ouw3%`^ zQKkir+8!U+okwVo&lQl{FVKE*eUA@p#pQTL?I!I+@Lu5A7>o(mkeplef9Sp|+X8$E zWiQJ2&@HAvDwzKG3v{xj9ksgZaz%Q3Gjy(nFCp5J#v`fQ&`0FHUW|hWcwNfmxolI= zCNK}Vm--0$GmW2wX)L`3azpYc?J|`8Cg@bU9DGI`(& zsKq>9`pc>PNY-VWqZ}7#*@L*0?+s>r#35{{bA~o=M?2|z0Mk(ql3Q~R;8c=b#t2)k zlNfaHGx8F!a7GXDn9+mbH**mBQ*(!34A2hkZ!zN8X(!OTdkMU0TMZ{3sB7fPo;PDjD zP4Yr*Woge%)^UhvGO#yh2+~_*Hb^{@r)h|%?`I0|88o!@Yg5+EaZ~jvt?%jW4jz!5 zD8~?%b&^dQA13r8KJzeX@7(o@~l~cpnS-75zlZ0n8O8e^9SP=E2`X?2%d0 zhw9ws@3r@3+1H_$19U+U-W)=?7XIn&tI0I!x}7(Pqr2vaqc!kDnezl{|M&2HiQa~P zrSDBpM@#$J_)EN{JYPp%uubJ&14;v%So-nd6P0Tsw5N%9CC6Ft#?t?S)aT!7eIo=< zzSP4!`6}`g=qr|X4#K-Fh)3&5jrh)>qqx+k&_-d|!qm^cu&@XYkH(|2Bf!x%%~dP$$Vb5wJ# zo(A%@j4<@a-!Q(I>HP7bpWS+VjGwcBDK5>$PrxU$3--6Z&y3PW`F-_mek{wkSg1?D z+=0eDx6piS`HkZ3T`;^6!7PDR3MoT|#|WAD?O5H1lhZJ~`IS3~#Ip7!Sjw zudBZO*^+OX-C(P`NqfNjNw+U}3^si-RsRddrq6o1+h73t@9(qt7F@M?2Jo57ijnI%O&|R;5Ty^HOH=kPlxIh z%GW*sX)X4Mtu07KyCNTE8y4I9h)ws=+(3Sh2wAuL+V4i&&=^U+_9=7^9Z2=<(AN$d zkM@~$*4KU;`1-rE?fcqqA$C!e@y60;* zzpth-HyockjlYl?x$o>P$crf-P4atJO%G-$+l=~LXZ`Rm)3>Y`Upw-{&m&o*c`4Zg z7 zmHp$?w#FDyyB=8TJlxA?GevC`$JtLjr}jtOS5Qy0{cqRu(Ead#N7*tC%DtQ8JcF>K zIUbeq0>y`}u-FC}R(}lDk@^vR!=wA*|L`H}Hk7-wP|Y`fjAx{qNN-TR1o)uOv4i>H z*FzSA=@nOq=i&O+$riHKi~GyR#tZISPJGm!Efv-n*3fxh_=C`)ay%e=8G4w;9rA?} zf9<~T8F)s%@W}{?r|_Z1tTWR;g|x6!x5LL_@;jI>+^&0cpSWq)(_G1)}KFvfZ9@jB24G=7$VpR(^jR?ERllG*>se(@R7eo*tDJ-iMo_}kZf^gYrmg7=#f zU!?61+wSXlPX0KOA^o`ln{O4;eV+dD<}m%^)E4GAxC1)T)Pq;sbz6vb?v&3Q_Wje; zm!K1+&m5S>z2%T8-DmzM-Di$Cxn@j#eFwyYu%{(0hCT|PZ!_zcbjijvwQsMbQ!=Lf%>+zTTt)z z{ORwSaz(UD9SfNQkL>>RMWCC;G#U#nHiu0YkxfGWbV}cz&Z2lEYw3s=)_lDo(0(Rb z$o5xbm2MYmG!Kc#zjBT@?V-?{buGxD8D+*xAZaJKDL|sI$(?q z!pqY(zdH68KR?I;yTUR0ZF%2EyYH{vU##5^(e5wP?yu19M``z0Yxmb^_mi~yWbHmx zyPvMzU$5QYsNK)f?q_TF^R)W~+WjK!eyMhUyLMl$-B)S%>$Lm3wflRu`v`~pbY=B-;fc$Q!2*`njF`ouG-|71RH7L5`oa?QHc9EzYE3y5i3M&lv~8Pg^+PpMwJ>xX>D;-x?Cd@RGI0X_=1r< z!u6KJh~Movu4J;oTr1pPAc5r_ME zr+CTf+7fLXbRGh&Os=A3ge%{O%ht+89THr5MoNnIY@~51{hDn2h$2BWR=Tzr#s+_J z;n#_1%7>K_b&psK9&IsPb%wDOOx0+*%D7TsR}!w+gNE2kt$f6AH5tY`)_6g9O?;HR z)%j>Xv+>)FI%ezfK7ZPL1tpUTD=S5&w=gy~DK>$Zm-7|TRtIneEIlT`hjY1QFUxt!G@BuwPmr@Ybz@&s(jv}Sc)g^ zC@C$C^?9p(lE)(;mVI!cbtMK@1cZeCqVQw9bej)(rO<&N33gQ8F9iMWHqMHazk&P- zHki^MVftNR`pxSdlZ42Gqdgx@x)R|__=une;YwN@Mz{lqA?!xD0o`gJh1l4C7-78| zzhZp15T`Qmd&CIiA%>X<&m&xnFmpcQBCOAYG$7n?vk-L%N5F78i15fFAQwV1u+)@Hs5H2pqJ;LFr!w!T_BpV>UFAg~r2eyDEd-3cYA^H|a z32`V|7*H7^zK`gW0;X-~D~x@8k#Aq&*x*7Q{e+_!lS*-sFzBQ083^+b9zb{;VIqE> zIdgyzn-OkDcot#QK%~V7-P8YGP2gG5y9Ak-CYFkrBV8jb~8*AQWh zz(TwmVg3*i?Zf>6+`olzUGZqkc;SejfcF!GW92pA&o$r=!UNX`N6|##$V0dEC7@0S2VE=I4jFv}ayKFgI0-U{uo&TsQ&2z1;fpE4I1FKbDHUNV%0w6i zxpbwW-Vpw%>EQizv>n1cgnO?8zplf;G6QW7dEIfnh<*w3>be1Kc>`n);VFd0ZsCZU ziMq}dj>8Dg&P2X9f}b~n7r0)6OgCl-;~c_!GKH}<6EtKBrz=Z{>@3iTaQ7@>?3)Fd zzDYP@vLRpDh==e+goh9wo{hTBM!U@s#+f;g)p?MId5{N$`3P6e7ml;&BlHK+$>+No zLp`ZcQaz}Ko1~jN=fZDKSJ-hEQuVglNhzoa`t~^|HJ_n9P5D5H#>id5M z)wlNAy&I+ct%yh}vuKXG8j*g>Lo9?}R#Bdl zyDBLwi{AN)t18xeO1P=sZw5tmWy}qmk^A;w0l1qxR zEYVaMIVssQvuDm*=t=jV=#sNA$&;0pm4;JWJakTrEqgOvJ`Qf_LOLW#N=C&>3O&`n zs*>_GIfe8nsj#@9%HyjlDDhS26lP^vbH)RDl;X)*nA#3)X(bial4+@HygsmjDPQPI zN-C{bgAygXGLp0M<`ha^;7zj5$;>2w5^4%NRY^(SJ3S@kK9BFNN^e$43Fi?k>J)Fq z>UQ%=Rpn}=$ZWdAnp6*{D)CWXWjamW%B;*}QP=6pNXl9;XW>*&w`sDs%M>L*6_6lr zL0NDK%9>KS)|X|eL|DnDt}b0$U2JW;uo9+Ld&_-QcTuGmPO;RvGs(MD)za!Lz2TEn zJRVfuLw%~C(C4l4RD17S>n$(zhF}$`2v3rY%zRINB0Af`X`Z|}=z4kqQ=o@ROIG2C zC{IZ>G*o%DuK?Y`yRM*gZ2``AvT!e@umTOC$v}#?yr{HmlxVq3Eu>xuUE$YkN`JEc zv+*(2R9m6cT~ZR%u6d4?g_|8GP4g%oRIM%ac?!UX@}l55gwmPG=)u+N3o1R}l&7$y zs&H*7wBf3?t5N#AT@@gUaEG1tx<9YPqDWEtt=)iohPn^-G<}a=7{O4uZoqzkvw-v$p&;6RG&r~Q;5ZP z`Z&eDxxy*7;vlI$XfW{$M|U*GX{SHykDXyCu%mr@NsYeU*Dku^k2trjYa)Hh`T5B4 z-<{5=x<$enwe42?cVsDAF-4q@3Kafu$pYxP&95N*8CNdqd-X6J^>)izoU7 z9|P+8IHUG?z_GVhN{)Tjr$;!Z+veYEhXZ=N!+k^XLXY*N%Nh038W4Q20{=DNgR=;A zOZ6dFfDQbpi*4PnxuT+)Y77w-b-0%PKPUfh`X1dMd6xb^`v>~}o#Xzg-@du|DRBu~ z`(`IBPAHCvO&E}pn43_X5R>(+LQG|1d|b*tBmS&2Zp0(eDZ`6O z6JmzXm=T|lJ0p)WL^knA5*Je(H$2|;h>?(%lCZ^ynUU|tOC;QGP_AjX;#1<0VSGY% zCGK|{XABgS8Kp z4OH(d8CL@nKj>UgivJY9r}G*K|B>Iv;+#gpLoZh8_c6Yn@nOcdF+Rume#Wk3mHrIl zOvZz8AR*On1LIW2+ZgX;+{75afhOahXPknA3@QB#oF7T_J;XSV@hQglFpe6m;@2~F zGfu?0la&5Z#@URIF!nLNc!)~BgYi7Z2hH@1E2pabzGuc~{0ZX&j8lfH{F)dqW_+IU z7RGrv50mJtXS|v55ysmY4~bgn4XX9+O%hXlsE`F%0t7Jk2tvHNlr{{Z8MfvG-G zIG2?2YvA{>jQd}q-n$vkVqDC4D`Pr`m(m|+oQCsF36H;0r7vQf$C%FRrT6zT#+j%R z?`Ayw3kv^%v77OL5u6|68H~3xzL#+x&Q+!SUts)*Tj8_7#IJhBMI%*yDL6}&;_qWz z%=ovAYZ&*#!Jx!1I#-zD&t{yLsqp=baW1QTf0!|y(Ms`8Gp=Vm9tVud{1}%pj?Gf> zf5aH)y2|v&7-Kpv@$guc{s7}$jL$Pp7^B|j;w)Gy|3$_t84rXHmdbyG@dUQ;at=9+a!nA7C8E_$=eajFTrQ`5rV^ z#lIbx+P9wZD~t~@Zee_uandy^e#|_TehK4v#@}STlJOrHZ(%%QqDsG)@nXgY7;j_T z#Q4vQN6c5{jYv@G>3nW#uVTiHjDNs*K%RPk9GLoZ3gf>qUdgzb;@IJ?>%rL&k>~ z|BK@vXMBC4O7F^7`F|Oh(&sXMmEUh*JTOVU-_AG(nD~=EXWYd2CyWOyR{2M#sPwUnrvelG zKE~DjK5vPN|2pGG7!RDP((hwDmvJNG#~8bos`P(lT*`P_s!CtO_(8@mF@BbD<}#HY zXMRimlrp||?=gO! zao-zM{Ns%8U|h6H<#&+r2FAARQ|{LeZE)W!b}z4$GCy<4#wlN)cXUBA7b3Z_!Q&wjPqux_%l|k z{9k5V#CRgcWs;{YjCU~J!8j^gy+6qKZpO`wPcf$7Q$RLid5($?kA=j)Vcf(xeztl~ zzq>&3A7H$h@ma?8j91U${1_i&OuyGa=@;ax_w>6Bgby?J-L7!@T=jlC<2}H{|GkW3 zF>X=)n;1XMIOPtNe&l==KbtYRKBfL*+&539Kft&WnBLP*zYsm|^7~lEb8c4e4>SHD zFqJ>3OvRtRK;h+#p9iM=63W&4%NDBlhZ#S{_!Q&leD&T{q2gCEPGo$LaW>;Ax2X7~ zjGqCf@@g2zEK=`_D^-5qV7!}gpIg=YBaEvUH#2Tz9DAoqU$|Js-^{p?@pi_`mZRox^xGH&+PnVNAchLii=d z^cyUM-(*a`#X|TbWBN@N!vADUzs*9pKlCcmOTW=Vcobv$tro(`jOjO92+v|nzuiK3 zDP#H#7sBO?>9<@6KfsuN(}nQkjOn*s2tUJ^e&dDkPZ`s1y%0Xen11tx@Lw6zZ@&=! zlrjAV3}FYZOK?NKb3^zNiJ{M@C=B(9_cikOX6XHWjOq7e2*1ntaJIrED-{16sZh99%x@7nOoHvCT;9^$f=H%{Uj ze|^$z@8{dz-)h4vZ1L&0h;;ruY{R>3_y;!pnhn2g!)FxsxBom8HXm0Wt}o!a9@h=H zZpO6$mmAkiTsPv%$3^nK2p4|ySS-f11lLkr%Wy5nm4Pc0mj~A@TsPs$#+8F>C9VQo zvvJMAwF;LP*J@mAa24Y!!G+&K7I)w(#Z`{0BN`VXPFLx?4QaD*t-uv7&4q|tgsUUk z%Mgd?r@vUN31h>Wxpb@TUNhHN|DV`ardFiwtF-|uU{w9!QM?hjrzdAx&^rB-?AA$Eacv#Z>rj2)XPa)oZSXxlN274m0UDCf> zQ7QrGpo!x7c=UaO9+yPK>EImN;UF1q?j!nRuNgQC!1{2-m)(Zf!cX-(JO2TZmi za+9WFZ!xxoxAhQ30e=hC)^iHi<>h-6=~-4Pr_tv7vdSuNb+xz1Q&~__m6I2mHTDik zGJ&m|Mc&l~YfF9dGa8StU=2Fz{Cv-Rd;p>8fHrE-N#@c=RiGZby0i2TfwV};AtNPq zj_72Y7U>MpxzkoxRIM+lDzfDvD=7E22P)u&$)*O@YM-1Oz-W)R)LX{f&MisI%3YP1 zm6c(qcAYFr%gI7gXzuq2abT79Wt)_XtgPIUB+!cQ=^&1wc&X7FkbzB#lVFyYSCr%H zwUTuu1*M*n)$(h-phZ)W77-7-m$m!t9}iNg{A~cf9pq2xf)kNuenTKXRqB$mfCS4- zter7jRg>9jUhcE@@pVaW-W}Q-0 z;4A2)u1bS?D=5V0QWg&6CV>UM3Qs{%5!o!JO#}1B)?>p~GWK%Jp4?q!+>OTWNE0Xv z&2KYUTH7{p3-xsmI#C9FcuF5#=2sPzSK~`pd<%z;xB$~evzJnTOA1%1QbEF9gd+^}Z+7#n-iom0W}3{X;x3XJO=j9NA3efpD2c_b*Yj33SE`mNgb?vg#3CjoYMpNYyQG@t1Q}^ zZ2iiDBKZL`KGw!p_Q5|SmM2&wrSk~|_>$XG0=E*4^f~5t_4G|LG}Xd1`bU!;zdJSy zk2T>5o=tEsAE`fN72p!2DV_5AF&F%W;k<)_HPu$OrYjENIE@gsOIck~T53(x87fm( z7nGFRjtS_n%rtyIj&lZlUj7)IE7CUMRz3o%{Z#(}BRC!c%nwlul40EnXyeKHk|Lk& z42UrEoLUW;C?{jERBysAx(~DX6b@5#yOfP?yC_|4uV@dD3ZIkyfqgLBfm)_Z6@YnA z<*m%Zq$f*xRD(JV7S4o-2YPLKjS}hS=nN(8nc>}HjNgt5&!5pA9oqPVLwEdhv+y#u zB=$Qx*gOcwNkZrR*wWd}dscS)|u!Y1!yWd#oP!R)}I47%1L7>{uxN@+#; znx4pwq>Bio{Cbi~G8-u0>f(y3P}RrKYxk@6L|g@YVY(0&1g}n7CMZFALte-g(12w>YUprR8G?;h?G$DAws)qeko``T=kGloyYxfm z1ah=dfjQgtVMS$N8zRERI4Hm4L{yr71EQN6Lx9|Oye9^fA@3&v} zFw%r}=2J?m$sz?TD})!m(?c|Ea<9UYX<3>5j7VD8ww&-N8;06MQ#Fbh`1s1oY%anM z9q7Sjr4ynOhNR#;$^c2}M7{jN(b*9^#A>c?z+*kVU_&>y1~B{li{{~P4G1MtwjtA; zi?E9xeqz-#{Uur_c%9R6))9q^3aRC+GRF#{y&;8}i*(v91fChfX{T*gP~E;0G67RR zK-NRf#X|HJ+s-F)BL?#mhTDbO**p_|AGWoAr}+|Ma1JdN!Fgk!J%-?oHedu5j&?Vb zf!zUGA*Aft$g~Kb+FS<3hq29SKWHkFdyTo$W!WFpN$yyCO$Vky2ZRSR#5yO(LF|-( zYTL#L8U(_o;h%_7V6MoB4y!U1GMk>>M%8ZMO9M14yV>)0|zWo zZ0|M~;>-bUHP~Pguv%eXtk73qbG4-kwO;05%P=LmONozE?Lku1B}OO*|G zeI{i0&+9tN{5?NF(Fd5iA&W=pnt2vBElD2wxgGlR&+s7Ip%|h`O;-v_Gz+tLAN1S~ zzo6(smF$}ddQ=7YEi>+(J(@WQzu)S$?0n<#t~V4;uhuBM)v1{tY~#T{DQLaH*mhR3 z!WJ;Sqsh*r{g{E>Z0?mh1dMI=Rkf~HMeVLhvowHxA*buPcg(0>DXUZVe2>)5VmGo3 zgjC%(a68GA!VSZ0;1i#qb0h0W2n!N|Tgi65cDR~!1S=3WsL(z+e4V}@Ow_Vh&X z`K;xe=nnUvhfRpaS97tGeo0TQ5rMUq<+E-L>}K|G3|TxL-OALqwmQ`+2SH|j-RMx7 zhU%364{gzvI<~cp-~9T#dUAkL?1gEPQh6cPg5^&+hFl8{Tex+;+ehQj>&8@gIP1ne zLWX5DP;CCZ2ZJ89PD3n*%5SgIti7{0#s=NZohp}h(VChv3s~RP)-QtmUR%%&n%4fz zQ>1oQ$wlfBg?(}Hx#fJK0wVY0s$vdHv`aT>-p zhh8xm{aX-nu($^`OXnf*Dqn9_*Jg`zQQgus{+fibnWF20p>;m13CUFmIrLk{oUjQ9 z(}uLKwS}F3fPrOEFLHy1RlW4AMQ$n0?oeKcaknd*Il``d8*{VtjUUsjR!;Bm*46`9 zdG}6PA!Y+T?1Z-Hj^2Ot_GJ6*bZOMHl2rP55|IOIo{|TEfoV zZ&S6&s>Ns0dp*;t={lI>^dR$s&6azwmO&0k~@O<=sX@Cd(^}aYxhvsE1(Va-P&o?bqe_VJJTRj z9nZf#sXO_0yr+qmX?k^B@~=ttYAF`4wrSM%&OmElAnMh6h2v%Jooa?EnIy7{`?b0* z%(~eTVCb05{W**pH)Uk--bx%YxvJEgBJ59+lLH@34S1B281QK7w17uxQvx5Q20Tg+ zjFlc3D=9Is$fV>z`je8=14>Am8YI8eq`-Kofi+D^4U#r32$PZmX-S?MB;M4(GLokT z(wv+cSl?v)yq~?jrX;4~_b@*F^MC{RrSu`V@1{O`-`*D%SG;<9^?|709X)!7WB%gk3-`Qu&)r}8!^;n6e>vg1*S9?T zkKZ1B^tpYHT=CHKudb+Yw+ww`$+;8vP5;wJ!|zO<^k`k5FTUYfv|{z5{OI(h2jlkL zIdJ9mKfbpAuI#}Bca1Ck(>tr1=8EIH8}_IAzPa|Y+vk3B-NUD*JiPd4FT|$p9{u(e z^>4oRpR3=SJLJR-Pxbx%(36X=x%P`!F8lR8%l~8O#J`Qb=~tILa8KM1el=<3v0?xH z%*X$_B|80ECztH6$-nM}r01r6=b~@M{psmdTNY2hv#{yUZ~T1jt*^P7p8L(42cwd1 zH7@<-vo~iywBlz!8u$CWvCF>Fa&7JT=ARF_=JBQX9D3uv&stX9_RjE2AG`1D*gY?H zRR8Y2|8B!&S8iMI#?c3-ef`@%-}=vAe)7PtKP&D3@y(AXj7wgVed#zO`<2@t%F6j= zzcY7FKbZPZ^Dlq+%KEQ-8h^`YPyXhO(&LG~3A3)+xh>(ld;d7^mE7vz-m$J~^Pwdr zbN}PigNsJa{qWI0+_3H5#ZSGk$M@Lmwf}4De=hN?8dUzKr|KEcPro%Gv;T-IHokuQ z)mK0JjR%UR9D8QR=3o5y@+G^gw@qIif5VS<{i)?E&Zkoc-|_U`-~2lH%;Cus-q^8d z+>U=$WDR+v?+;)9-cRnhqcZcwC(k51j{fDl53GLd%xjm$jENm}@a@-64Lde?WBvzU z+x@HUcih-_X7ZsGU;oz Date: Wed, 28 Dec 2022 22:35:13 +0800 Subject: [PATCH 09/31] remove chinese comment Signed-off-by: AilinKid <3148019@qq.com> --- dbms/src/Columns/ColumnArray.cpp | 1 - dbms/src/Columns/ColumnNullable.h | 2 +- dbms/src/Columns/ColumnsCommon.cpp | 2 +- dbms/src/Common/COWPtr.h | 1 - dbms/src/Common/HashTable/HashTable.h | 4 ++-- dbms/src/Core/Block.h | 5 ++-- dbms/src/Core/ColumnWithTypeAndName.h | 2 -- dbms/src/Core/ColumnsWithTypeAndName.h | 1 - dbms/src/DataStreams/SquashingTransform.cpp | 2 +- dbms/src/Debug/MockExecutor/AstToPB.cpp | 2 +- dbms/src/Debug/MockExecutor/ExpandBinder.cpp | 2 +- dbms/src/Debug/MockExecutor/ExpandBinder.h | 3 ++- dbms/src/Flash/Coprocessor/DAGContext.cpp | 4 ++-- .../Coprocessor/DAGExpressionAnalyzer.cpp | 18 +++++--------- .../DAGExpressionAnalyzerHelper.cpp | 8 +++---- dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp | 5 ++-- dbms/src/Flash/Coprocessor/DAGQueryBlock.h | 4 ++-- .../Coprocessor/DAGQueryBlockInterpreter.cpp | 24 ++++++++----------- dbms/src/Flash/Coprocessor/DAGUtils.cpp | 2 +- dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 2 +- .../Flash/Coprocessor/InterpreterUtils.cpp | 2 +- .../Coprocessor/JoinInterpreterHelper.cpp | 19 +++++++-------- dbms/src/Flash/Mpp/MPPHandler.cpp | 1 - dbms/src/Flash/Mpp/MPPTask.cpp | 6 +---- dbms/src/Flash/Mpp/MPPTaskScheduleEntry.h | 1 - dbms/src/Flash/Mpp/MPPTunnelSet.cpp | 4 +--- dbms/src/Interpreters/ExpressionActions.cpp | 22 +++++++---------- dbms/src/Interpreters/Join.cpp | 17 ++++++------- dbms/src/Interpreters/Join.h | 2 +- dbms/src/Interpreters/NullableUtils.cpp | 2 -- dbms/src/Interpreters/sortBlock.cpp | 3 +-- dbms/src/TestUtils/mockExecutor.cpp | 2 +- 32 files changed, 68 insertions(+), 107 deletions(-) diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp index 00a406402b9..8a5ced0b084 100644 --- a/dbms/src/Columns/ColumnArray.cpp +++ b/dbms/src/Columns/ColumnArray.cpp @@ -952,7 +952,6 @@ ColumnPtr ColumnArray::replicateNullable(const Offsets & replicate_offsets) cons ColumnPtr ColumnArray::replicateTuple(const Offsets & replicate_offsets) const { - // data 是一个父累指针 const auto & tuple = static_cast(*data); /// Make temporary arrays for each components of Tuple. In the same way as for Nullable. diff --git a/dbms/src/Columns/ColumnNullable.h b/dbms/src/Columns/ColumnNullable.h index d993d918509..2069f80b42e 100644 --- a/dbms/src/Columns/ColumnNullable.h +++ b/dbms/src/Columns/ColumnNullable.h @@ -31,7 +31,7 @@ using ConstNullMapPtr = const NullMap *; /// over a bitmap because columns are usually stored on disk as compressed /// files. In this regard, using a bitmap instead of a byte map would /// greatly complicate the implementation with little to no benefits. -class ColumnNullable final : public COWPtrHelper // nullable 列是怎么形成的,一般是一个普通列,一个伴随 bitmap,这里使用的 byte map 来存的 null mapping 而不是 bits +class ColumnNullable final : public COWPtrHelper { private: friend class COWPtrHelper; diff --git a/dbms/src/Columns/ColumnsCommon.cpp b/dbms/src/Columns/ColumnsCommon.cpp index 9307587ce6c..e969dc99842 100644 --- a/dbms/src/Columns/ColumnsCommon.cpp +++ b/dbms/src/Columns/ColumnsCommon.cpp @@ -291,7 +291,7 @@ void filterArraysImplGeneric( while (filt_pos < filt_end) { - if (*filt_pos) // 如果是 0 的话,说名该列该行被 filter 了 + if (*filt_pos) copy_array(offsets_pos); ++filt_pos; diff --git a/dbms/src/Common/COWPtr.h b/dbms/src/Common/COWPtr.h index b4d39620287..1f6bb8dacbb 100644 --- a/dbms/src/Common/COWPtr.h +++ b/dbms/src/Common/COWPtr.h @@ -105,7 +105,6 @@ class COWPtr : public boost::intrusive_ref_counter T && operator*() const && { return const_cast::type &&>(*boost::intrusive_ptr::get()); } }; - // 这个地方,COWPtr 继承 counter 之后就自带了 ref count 和 add, release 函数。所以私有类实力化到 T 之后 = IntrusivePtr,里面调用的 add, release 函数就有了,其都是操作 T 继承的 ref count 来操作的 protected: template class mutable_ptr : public IntrusivePtr // NOLINT(readability-identifier-naming) diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h index 3bc3ab5e56c..2c857b9bc1b 100644 --- a/dbms/src/Common/HashTable/HashTable.h +++ b/dbms/src/Common/HashTable/HashTable.h @@ -449,7 +449,7 @@ class HashTable : private boost::noncopyable { while (!buf[place_value].isZero(*this) && !buf[place_value].keyEquals(x, hash_value, *this)) { - place_value = grower.next(place_value); // closed hash,线性开放地址寻址法 + place_value = grower.next(place_value); #ifdef DBMS_HASH_MAP_COUNT_COLLISIONS ++collisions; #endif @@ -694,7 +694,7 @@ class HashTable : private boost::noncopyable * HashMap completely, change all its users to the existing internal * iteration interface, and redefine end() to return LookupResult for * compatibility with std find(). Unfortunately, now is not the time to - * do this. // 隐式类型转换操作符 + * do this. */ operator Cell *() const { return nullptr; } // NOLINT(google-explicit-constructor) }; diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index 3463c47c1bc..a1af433bbc9 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -27,7 +27,7 @@ namespace DB { -/** Container for set of columns for bunch of rows in memory. // 怎么区分这里 rows 的大小呢? +/** Container for set of columns for bunch of rows in memory. * This is unit of data processing. * Also contains metadata - data types of columns and their names * (either original names from a table, or generated names during temporary calculations). @@ -38,8 +38,7 @@ class Context; class Block { -private: - // 多列的一个数据 +private: using Container = ColumnsWithTypeAndName; using IndexByName = std::map; diff --git a/dbms/src/Core/ColumnWithTypeAndName.h b/dbms/src/Core/ColumnWithTypeAndName.h index 30c4fe8c546..42a98f795fd 100644 --- a/dbms/src/Core/ColumnWithTypeAndName.h +++ b/dbms/src/Core/ColumnWithTypeAndName.h @@ -32,8 +32,6 @@ class WriteBuffer; struct ColumnWithTypeAndName { - // column 继承子 intrusive 实现 share ptr 功能,同归继承类的两个实现,mutable ptr 和 immutable ptr 可以相互转化 - // columnPtr 是一个基类指针 ColumnPtr column; DataTypePtr type; String name; diff --git a/dbms/src/Core/ColumnsWithTypeAndName.h b/dbms/src/Core/ColumnsWithTypeAndName.h index e7741bbb71e..61c77cf161e 100644 --- a/dbms/src/Core/ColumnsWithTypeAndName.h +++ b/dbms/src/Core/ColumnsWithTypeAndName.h @@ -21,7 +21,6 @@ namespace DB { -// 这里是一个多列组合的数据 using ColumnsWithTypeAndName = std::vector; } diff --git a/dbms/src/DataStreams/SquashingTransform.cpp b/dbms/src/DataStreams/SquashingTransform.cpp index 391cd710c8d..2425435d90e 100644 --- a/dbms/src/DataStreams/SquashingTransform.cpp +++ b/dbms/src/DataStreams/SquashingTransform.cpp @@ -60,7 +60,7 @@ SquashingTransform::Result SquashingTransform::add(Block && block) return Result(std::move(block)); } - append(std::move(block)); // 攒批 + append(std::move(block)); accumulated_block_rows = accumulated_block.rows(); accumulated_block_bytes = accumulated_block.bytes(); diff --git a/dbms/src/Debug/MockExecutor/AstToPB.cpp b/dbms/src/Debug/MockExecutor/AstToPB.cpp index 8977d8dc279..fa58e2e3fc8 100644 --- a/dbms/src/Debug/MockExecutor/AstToPB.cpp +++ b/dbms/src/Debug/MockExecutor/AstToPB.cpp @@ -447,7 +447,7 @@ void identifierToPB(const DAGSchema & input, ASTIdentifier * id, tipb::Expr * ex *(expr->mutable_field_type()) = columnInfoToFieldType((*ft).second); expr->mutable_field_type()->set_collate(collator_id); WriteBufferFromOwnString ss; - encodeDAGInt64(ft - input.begin(), ss); // 这个地方使用下面的 child input schema 的 offset,替换当前算子使用的 column ref + encodeDAGInt64(ft - input.begin(), ss); expr->set_val(ss.releaseStr()); } diff --git a/dbms/src/Debug/MockExecutor/ExpandBinder.cpp b/dbms/src/Debug/MockExecutor/ExpandBinder.cpp index 9d07a0c58f4..0eb35b71c62 100644 --- a/dbms/src/Debug/MockExecutor/ExpandBinder.cpp +++ b/dbms/src/Debug/MockExecutor/ExpandBinder.cpp @@ -65,4 +65,4 @@ ExecutorBinderPtr compileRepeat(ExecutorBinderPtr input, size_t & executor_index expand->children.push_back(input); return expand; } -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/ExpandBinder.h b/dbms/src/Debug/MockExecutor/ExpandBinder.h index 752046a4d80..d1b4c7d980f 100644 --- a/dbms/src/Debug/MockExecutor/ExpandBinder.h +++ b/dbms/src/Debug/MockExecutor/ExpandBinder.h @@ -12,8 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#pragma once +#include namespace DB::mock { diff --git a/dbms/src/Flash/Coprocessor/DAGContext.cpp b/dbms/src/Flash/Coprocessor/DAGContext.cpp index 66e64c11b64..b4a9f9ad515 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.cpp +++ b/dbms/src/Flash/Coprocessor/DAGContext.cpp @@ -129,10 +129,10 @@ DAGContext::DAGContext(const tipb::DAGRequest & dag_request_, String log_identif void DAGContext::initOutputInfo() { - output_field_types = collectOutputFieldTypes(*dag_request); //那么 field types 对应的就是一个 fragment DAG 的 output schema's field types. + output_field_types = collectOutputFieldTypes(*dag_request); output_offsets.clear(); result_field_types.clear(); - for (UInt32 i : dag_request->output_offsets()) // 这个地方应该是 fragment dag request 自带的 output offsets + for (UInt32 i : dag_request->output_offsets()) { output_offsets.push_back(i); if (unlikely(i >= output_field_types.size())) diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index e9a10539378..18fd7c507d8 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -983,7 +983,7 @@ std::pair DAGExpressionAnalyzer::buildJoinKey( for (int i = 0; i < keys.size(); ++i) { const auto & key = keys.at(i); - bool has_actions = key.tp() != tipb::ExprType::ColumnRef; // join key 如果不是 column ref 说明是有前序动作帮我把表达式给准备成列 + bool has_actions = key.tp() != tipb::ExprType::ColumnRef; String key_name = getActions(key, actions); DataTypePtr current_type = actions->getSampleBlock().getByName(key_name).type; @@ -1048,7 +1048,6 @@ bool DAGExpressionAnalyzer::appendJoinKeyAndJoinFilters( ExpressionActionsPtr actions = chain.getLastActions(); bool ret = false; - // build join keys,ck 只输出一个 key,需要 copy 一份,如果是表达式,还需要 append scalar 的 action std::tie(ret, key_names) = buildJoinKey(actions, keys, join_key_types, left, is_right_out_join); if (!filters.empty()) @@ -1057,7 +1056,7 @@ bool DAGExpressionAnalyzer::appendJoinKeyAndJoinFilters( std::vector filter_vector; for (const auto & c : filters) filter_vector.push_back(&c); - filter_column_name = appendWhere(chain, filter_vector); // 构建了 filter 输出的列 + filter_column_name = appendWhere(chain, filter_vector); } /// remove useless columns to avoid duplicate columns /// as when compiling the key/filter expression, the origin @@ -1077,18 +1076,18 @@ bool DAGExpressionAnalyzer::appendJoinKeyAndJoinFilters( if (ret) { std::unordered_set needed_columns; - for (const auto & c : getCurrentInputColumns()) // 当前进来的列都要 + for (const auto & c : getCurrentInputColumns()) needed_columns.insert(c.name); - for (const auto & s : key_names) // 当前怎加的 key col 也要 + for (const auto & s : key_names) needed_columns.insert(s); - if (!filter_column_name.empty()) // 当前添加的一侧 filter 的 col 也要 + if (!filter_column_name.empty()) needed_columns.insert(filter_column_name); const auto & names = actions->getSampleBlock().getNames(); for (const auto & name : names) { if (needed_columns.find(name) == needed_columns.end()) - actions->add(ExpressionAction::removeColumn(name)); // 增加后续的 action,裁剪掉不要一些 column 列 (这些 immediate 列的最后的结果已经被我 record 了) + actions->add(ExpressionAction::removeColumn(name)); } } return ret; @@ -1452,12 +1451,9 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, const Expressi if (isLiteralExpr(expr)) { Field value = decodeLiteral(expr); - // 主要对 decimal DataTypePtr flash_type = applyVisitor(FieldToDataType(), value); DataTypePtr target_type = inferDataType4Literal(expr); - // 表达式的 uniuqe name ret = exprToString(expr, getCurrentInputColumns()) + "_" + target_type->getName(); - // 表达式如果有这个名字,说明有这列 if (!actions->getSampleBlock().has(ret)) { ColumnWithTypeAndName column; @@ -1478,12 +1474,10 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, const Expressi } else if (isColumnExpr(expr)) { - // 如果是 column ref,直接从 stream input column 里面拿到 name ret = getColumnNameForColumnExpr(expr, getCurrentInputColumns()); } else if (isScalarFunctionExpr(expr)) { - // 根据 expr 构造 function 加入到 actions 里面 ret = DAGExpressionAnalyzerHelper::buildFunction(this, expr, actions); } else diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp index bc805d615c0..7d7a502beb1 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp @@ -150,12 +150,12 @@ String DAGExpressionAnalyzerHelper::buildInFunction( DataTypePtr type = inferDataType4Literal(child); argument_types.push_back(type); } - // find common type + // find common type DataTypePtr resolved_type = getLeastSupertype(argument_types); if (!removeNullable(resolved_type)->equals(*removeNullable(argument_types[0]))) { // Need cast left argument - key_name = analyzer->appendCast(resolved_type, actions, key_name); // 对于孩子的输出来说,需要 cast + key_name = analyzer->appendCast(resolved_type, actions, key_name); } analyzer->makeExplicitSet(expr, sample_block, false, key_name); argument_names.push_back(key_name); @@ -402,7 +402,6 @@ String DAGExpressionAnalyzerHelper::buildRegexpFunction( return analyzer->applyFunction(func_name, argument_names, actions, collator); } -// case when 函数应该走这里 String DAGExpressionAnalyzerHelper::buildDefaultFunction( DAGExpressionAnalyzer * analyzer, const tipb::Expr & expr, @@ -412,9 +411,8 @@ String DAGExpressionAnalyzerHelper::buildDefaultFunction( Names argument_names; for (const auto & child : expr.children()) { - // 函数参数如果还是函数的,这里需要递归生成多个 actions(深度优先) String name = analyzer->getActions(child, actions); - argument_names.push_back(name); // 拿到孩子的函数输出之后,再将其作为参数 + argument_names.push_back(name); } return analyzer->applyFunction(func_name, argument_names, actions, getCollatorFromExpr(expr)); } diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp index 2f5a28347cd..a2a8f6b90f4 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp @@ -101,7 +101,7 @@ DAGQueryBlock::DAGQueryBlock(const tipb::Executor & root_, QueryBlockIDGenerator GET_METRIC(tiflash_coprocessor_executor_count, type_expand).Increment(); assignOrThrowException(&expand, current, EXPAND_NAME); expand_name = current->executor_id(); - current = ¤t->expand().child(); // 非叶节点,继续孩子递归下去 + current = ¤t->expand().child(); break; case tipb::ExecType::TypeStreamAgg: RUNTIME_CHECK_MSG(current->aggregation().group_by_size() == 0, STREAM_AGG_ERROR); @@ -141,7 +141,6 @@ DAGQueryBlock::DAGQueryBlock(const tipb::Executor & root_, QueryBlockIDGenerator assignOrThrowException(&source, current, SOURCE_NAME); source_name = current->executor_id(); - // source 节点, if (current->tp() == tipb::ExecType::TypeJoin) { if (source->join().children_size() != 2) @@ -157,7 +156,7 @@ DAGQueryBlock::DAGQueryBlock(const tipb::Executor & root_, QueryBlockIDGenerator else if (current->tp() == tipb::ExecType::TypeProjection) { GET_METRIC(tiflash_coprocessor_executor_count, type_projection).Increment(); - children.push_back(std::make_shared(source->projection().child(), id_generator)); // 将之后的算子重新算作 children + children.push_back(std::make_shared(source->projection().child(), id_generator)); } else if (current->tp() == tipb::ExecType::TypeTableScan) { diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlock.h b/dbms/src/Flash/Coprocessor/DAGQueryBlock.h index 86cd14c09df..91dc6c2f439 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlock.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlock.h @@ -60,14 +60,14 @@ class DAGQueryBlock String having_name; const tipb::Executor * limit_or_topn = nullptr; String limit_or_topn_name; - const tipb::Executor * expand = nullptr; // expand node can only be before sender + const tipb::Executor * expand = nullptr; String expand_name; const tipb::Executor * exchange_sender = nullptr; String exchange_sender_name; UInt32 id; const tipb::Executor * root; String qb_column_prefix; - std::vector> children; // 这里的 children 是每个 dag 算子构造好之后传入的吗 + std::vector> children; bool can_restore_pipeline_concurrency = true; bool isRootQueryBlock() const { return id == 1; }; diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 9889536c48c..4678b854012 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -248,7 +248,6 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline & bool is_tiflash_right_join = tiflash_join.isTiFlashRightJoin(); // prepare probe side - // 准备 join 的 probe 端,主要是 append join key 和 filter expr 的 action 的加入 auto [probe_side_prepare_actions, probe_key_names, probe_filter_column_name] = JoinInterpreterHelper::prepareJoin( context, probe_pipeline.firstStream()->getHeader(), @@ -257,10 +256,9 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline & true, is_tiflash_right_join, tiflash_join.getProbeConditions()); - RUNTIME_ASSERT(probe_side_prepare_actions, log, "probe_side_prepare_actions cannot be nullptr"); // 有 init 之后至少都有一个 + RUNTIME_ASSERT(probe_side_prepare_actions, log, "probe_side_prepare_actions cannot be nullptr"); // prepare build side - // 这里的调用函数是同上的一个入口,所以做的事情是如出一辙的 auto [build_side_prepare_actions, build_key_names, build_filter_column_name] = JoinInterpreterHelper::prepareJoin( context, build_pipeline.firstStream()->getHeader(), @@ -271,12 +269,11 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline & tiflash_join.getBuildConditions()); RUNTIME_ASSERT(build_side_prepare_actions, log, "build_side_prepare_actions cannot be nullptr"); - // 对 other condition 和 other eq condition 做了一些 where 的 col append auto [other_condition_expr, other_filter_column_name, other_eq_filter_from_in_column_name] = tiflash_join.genJoinOtherConditionAction(context, left_input_header, right_input_header, probe_side_prepare_actions); const Settings & settings = context.getSettingsRef(); - size_t max_block_size_for_cross_join = settings.max_block_size; // 如果 repeat 的结果数量超过 max 控制怎么办? + size_t max_block_size_for_cross_join = settings.max_block_size; fiu_do_on(FailPoints::minimum_block_size_for_cross_join, { max_block_size_for_cross_join = 1; }); JoinPtr join_ptr = std::make_shared( // make join @@ -485,7 +482,7 @@ void DAGQueryBlockInterpreter::recordProfileStreams(DAGPipeline & pipeline, cons void DAGQueryBlockInterpreter::handleExchangeReceiver(DAGPipeline & pipeline) { - auto exchange_receiver = dagContext().getMPPExchangeReceiver(query_block.source_name); // 从注册的 exchanger 中拿到 source 源 + auto exchange_receiver = dagContext().getMPPExchangeReceiver(query_block.source_name); if (unlikely(exchange_receiver == nullptr)) throw Exception("Can not find exchange receiver for " + query_block.source_name, ErrorCodes::LOGICAL_ERROR); // todo choose a more reasonable stream number @@ -508,14 +505,14 @@ void DAGQueryBlockInterpreter::handleExchangeReceiver(DAGPipeline & pipeline) /*stream_id=*/enable_fine_grained_shuffle ? i : 0); exchange_receiver_io_input_streams.push_back(stream); stream->setExtraInfo(extra_info); - pipeline.streams.push_back(stream); // 每个 pipeline 底层的输入流 + pipeline.streams.push_back(stream); } NamesAndTypes source_columns; for (const auto & col : pipeline.firstStream()->getHeader()) { source_columns.emplace_back(col.name, col.type); } - analyzer = std::make_unique(std::move(source_columns), context); // 这里初始化了 analyzer + analyzer = std::make_unique(std::move(source_columns), context); } // for tests, we need to mock ExchangeReceiver blockInputStream as the source stream. @@ -531,7 +528,7 @@ void DAGQueryBlockInterpreter::handleProjection(DAGPipeline & pipeline, const ti { NamesAndTypes input_columns; pipeline.streams = input_streams_vec[0]; - for (auto const & p : pipeline.firstStream()->getHeader().getNamesAndTypesList()) // 初始的的 block column name + for (auto const & p : pipeline.firstStream()->getHeader().getNamesAndTypesList()) input_columns.emplace_back(p.name, p.type); DAGExpressionAnalyzer dag_analyzer(std::move(input_columns), context); ExpressionActionsChain chain; @@ -541,12 +538,12 @@ void DAGQueryBlockInterpreter::handleProjection(DAGPipeline & pipeline, const ti UniqueNameGenerator unique_name_generator; for (const auto & expr : projection.exprs()) { - auto expr_name = dag_analyzer.getActions(expr, last_step.actions); // 添加 expr 产生的额外列 - last_step.required_output.emplace_back(expr_name); // 加到这个 step 的最后输出列里面 + auto expr_name = dag_analyzer.getActions(expr, last_step.actions); + last_step.required_output.emplace_back(expr_name); const auto & col = last_step.actions->getSampleBlock().getByName(expr_name); String alias = unique_name_generator.toUniqueName(col.name); output_columns.emplace_back(alias, col.type); - project_cols.emplace_back(col.name, alias); // 我只要保证当前 projection 输出列中不含有重复的列名就行了 + project_cols.emplace_back(col.name, alias); } executeExpression(pipeline, chain.getLastActions(), log, "before projection"); executeProject(pipeline, project_cols, "projection"); @@ -595,7 +592,7 @@ void DAGQueryBlockInterpreter::handleWindowOrder(DAGPipeline & pipeline, const t // like final_project.emplace_back(col.name, query_block.qb_column_prefix + col.name); void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) { - if (query_block.source->tp() == tipb::ExecType::TypeJoin) // 看底层的 source 算子来源是什么 + if (query_block.source->tp() == tipb::ExecType::TypeJoin) { SubqueryForSet right_query; handleJoin(query_block.source->join(), pipeline, right_query, query_block.source->fine_grained_shuffle_stream_count()); @@ -646,7 +643,6 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) Errors::Coprocessor::BadRequest); } - // analyzer 是这里用的, analyzer 先拿到最基础的 source column base,然后在来分析 query block 非叶节点的上层各个算子 auto res = analyzeExpressions( context, *analyzer, diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index c74b52ed77d..83563c47338 100755 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -1131,7 +1131,7 @@ Field decodeLiteral(const tipb::Expr & expr) } } -String getColumnNameForColumnExpr(const tipb::Expr & expr, const std::vector & input_col) // 这个是将 expr 中的 index 顺序转成 vector 向量中的 column name +String getColumnNameForColumnExpr(const tipb::Expr & expr, const std::vector & input_col) { auto column_index = decodeDAGInt64(expr.val()); if (column_index < 0 || column_index >= static_cast(input_col.size())) diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index 113602a1d82..0869c2c653f 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -55,7 +55,7 @@ BlockInputStreams InterpreterDAG::executeQueryBlock(DAGQueryBlock & query_block) } DAGQueryBlockInterpreter query_block_interpreter( context, - input_streams_vec, // 底层 DAG 的输入源 + input_streams_vec, query_block, max_streams); return query_block_interpreter.execute(); diff --git a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp index 1e2b102d0c6..b031007c3c7 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp @@ -89,7 +89,7 @@ void executeExpression( { if (expr_actions && !expr_actions->getActions().empty()) { - pipeline.transform([&](auto & stream) { // 数据流变了,用 ExpressionBlockInputStream 包了一下 + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expr_actions, log->identifier()); stream->setExtraInfo(extra_info); }); diff --git a/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp b/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp index 6c876078d10..386c8158328 100644 --- a/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp +++ b/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp @@ -179,7 +179,7 @@ std::tuple doGenJoinOtherConditionAction( if (join.other_conditions_size() == 0 && join.other_eq_conditions_from_in_size() == 0) return {nullptr, "", ""}; - DAGExpressionAnalyzer dag_analyzer(source_columns, context); // 新开了一个 dag analyzer + DAGExpressionAnalyzer dag_analyzer(source_columns, context); ExpressionActionsChain chain; String filter_column_for_other_condition; @@ -190,7 +190,7 @@ std::tuple doGenJoinOtherConditionAction( { condition_vector.push_back(&c); } - filter_column_for_other_condition = dag_analyzer.appendWhere(chain, condition_vector); // other filter 不会对已经有点 schema 造成影响 + filter_column_for_other_condition = dag_analyzer.appendWhere(chain, condition_vector); } String filter_column_for_other_eq_condition; @@ -201,7 +201,7 @@ std::tuple doGenJoinOtherConditionAction( { condition_vector.push_back(&c); } - filter_column_for_other_eq_condition = dag_analyzer.appendWhere(chain, condition_vector); // other eq filter 不会对已经有点 schema 造成影响 + filter_column_for_other_eq_condition = dag_analyzer.appendWhere(chain, condition_vector); } return {chain.getLastActions(), std::move(filter_column_for_other_condition), std::move(filter_column_for_other_eq_condition)}; @@ -230,7 +230,7 @@ String TiFlashJoin::genMatchHelperName(const Block & header1, const Block & head { match_helper_name = fmt::format("{}{}", Join::match_helper_prefix, ++i); } - return match_helper_name; //一个 unique name + return match_helper_name; } NamesAndTypes TiFlashJoin::genColumnsForOtherJoinFilter( @@ -248,8 +248,7 @@ NamesAndTypes TiFlashJoin::genColumnsForOtherJoinFilter( } return true; }; - // assert 一下 probe side original block 都能在 probe actions 中找到 - if (unlikely(!is_prepare_actions_valid(build_side_index == 1 ? left_input_header : right_input_header, probe_prepare_join_actions))) // 传参数也能三元运算吗 + if (unlikely(!is_prepare_actions_valid(build_side_index == 1 ? left_input_header : right_input_header, probe_prepare_join_actions))) { throw TiFlashException("probe_prepare_join_actions isn't valid", Errors::Coprocessor::Internal); } @@ -296,9 +295,9 @@ NamesAndTypes TiFlashJoin::genColumnsForOtherJoinFilter( bool make_nullable = build_side_index == 1 ? join.join_type() == tipb::JoinType::TypeRightOuterJoin : join.join_type() == tipb::JoinType::TypeLeftOuterJoin; - append_new_columns(probe_prepare_join_actions->getSampleBlock(), make_nullable); // probe side 产生的新 column 需要 append + append_new_columns(probe_prepare_join_actions->getSampleBlock(), make_nullable); - return columns_for_other_join_filter; // 需要根据 probe 侧函数的 1-0 来顺势填 null,但是如果势 build 侧的函数 1-0 直接会被过滤护着忽略 + return columns_for_other_join_filter; } NamesAndTypes TiFlashJoin::genJoinOutputColumns( @@ -335,14 +334,13 @@ std::tuple TiFlashJoin::genJoinOtherCondit const Block & right_input_header, const ExpressionActionsPtr & probe_side_prepare_join) const { - // append 左右的 original col 和 probe side 生成的 col auto columns_for_other_join_filter = genColumnsForOtherJoinFilter( left_input_header, right_input_header, probe_side_prepare_join); - return doGenJoinOtherConditionAction(context, join, columns_for_other_join_filter); // 再根据 other condition 生成新 action (列) + return doGenJoinOtherConditionAction(context, join, columns_for_other_join_filter); } std::tuple prepareJoin( @@ -361,7 +359,6 @@ std::tuple prepareJoin( ExpressionActionsChain chain; Names key_names; String filter_column_name; - // 名副其实,append join key and 一侧的 join filter dag_analyzer.appendJoinKeyAndJoinFilters(chain, keys, join_key_types, key_names, left, is_right_out_join, filters, filter_column_name); return {chain.getLastActions(), std::move(key_names), std::move(filter_column_name)}; } diff --git a/dbms/src/Flash/Mpp/MPPHandler.cpp b/dbms/src/Flash/Mpp/MPPHandler.cpp index 14f1d6e5a05..753653ac7b0 100644 --- a/dbms/src/Flash/Mpp/MPPHandler.cpp +++ b/dbms/src/Flash/Mpp/MPPHandler.cpp @@ -82,7 +82,6 @@ grpc::Status MPPHandler::execute(const ContextPtr & context, mpp::DispatchTaskRe { Stopwatch stopwatch; task = MPPTask::newTask(task_request.meta(), context); - task->prepare(task_request); addRetryRegion(context, response); diff --git a/dbms/src/Flash/Mpp/MPPTask.cpp b/dbms/src/Flash/Mpp/MPPTask.cpp index c2d5b4ccc94..655e6c724cb 100644 --- a/dbms/src/Flash/Mpp/MPPTask.cpp +++ b/dbms/src/Flash/Mpp/MPPTask.cpp @@ -149,7 +149,6 @@ void MPPTask::finishWrite() void MPPTask::run() { - // 用线程池 schedule 任务并 detach newThreadManager()->scheduleThenDetach(true, "MPPTask", [self = shared_from_this()] { self->runImpl(); }); } @@ -214,7 +213,6 @@ void MPPTask::initExchangeReceivers() if (status != RUNNING) throw Exception("exchange receiver map can not be initialized, because the task is not in running state"); - // 因为是 push mode,收到 data 之后我再动 receiver_set_local->addExchangeReceiver(executor_id, exchange_receiver); } return true; @@ -341,8 +339,7 @@ void MPPTask::prepare(const mpp::DispatchTaskRequest & task_request) void MPPTask::preprocess() { - auto start_time = Clock::now(); - // 注册一些 receiver + auto start_time = Clock::now(); initExchangeReceivers(); LOG_DEBUG(log, "init exchange receiver done"); query_executor_holder.set(queryExecute(*context)); @@ -391,7 +388,6 @@ void MPPTask::runImpl() schedule_entry.setNeededThreads(estimateCountOfNewThreads()); LOG_DEBUG(log, "Estimate new thread count of query: {} including tunnel_threads: {}, receiver_threads: {}", schedule_entry.getNeededThreads(), dag_context->tunnel_set->getExternalThreadCnt(), new_thread_count_of_mpp_receiver); - // 类似 golang 等 channel 的过程 scheduleOrWait(); LOG_INFO(log, "task starts running"); diff --git a/dbms/src/Flash/Mpp/MPPTaskScheduleEntry.h b/dbms/src/Flash/Mpp/MPPTaskScheduleEntry.h index bc5522dfdfe..60ccb9297c0 100644 --- a/dbms/src/Flash/Mpp/MPPTaskScheduleEntry.h +++ b/dbms/src/Flash/Mpp/MPPTaskScheduleEntry.h @@ -52,7 +52,6 @@ class MPPTaskScheduleEntry int needed_threads; std::mutex schedule_mu; - // 条件变量 std::condition_variable schedule_cv; ScheduleState schedule_state; const LoggerPtr log; diff --git a/dbms/src/Flash/Mpp/MPPTunnelSet.cpp b/dbms/src/Flash/Mpp/MPPTunnelSet.cpp index a3ba44127e3..3712172aa7c 100644 --- a/dbms/src/Flash/Mpp/MPPTunnelSet.cpp +++ b/dbms/src/Flash/Mpp/MPPTunnelSet.cpp @@ -243,12 +243,10 @@ void MPPTunnelSetBase::fineGrainedShuffleWrite( template void MPPTunnelSetBase::registerTunnel(const MPPTaskId & receiver_task_id, const TunnelPtr & tunnel) -{ - // tunnel 注册在 map 里面 +{ if (receiver_task_id_to_index_map.find(receiver_task_id) != receiver_task_id_to_index_map.end()) throw Exception(fmt::format("the tunnel {} has been registered", tunnel->id())); - // tunnel 就是个 vector receiver_task_id_to_index_map[receiver_task_id] = tunnels.size(); tunnels.push_back(tunnel); if (!tunnel->isLocal() && !tunnel->isAsync()) diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 7b89ed431c3..ff7cec2b382 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -81,7 +81,6 @@ ExpressionAction ExpressionAction::applyFunction(const FunctionBuilderPtr & func return a; } -// 这个适合 repeat source 来用,adding groupingID column ExpressionAction ExpressionAction::addColumn(const ColumnWithTypeAndName & added_column_) { ExpressionAction a; @@ -145,7 +144,7 @@ ExpressionAction ExpressionAction::expandSource(std::shared_ptr ex } -void ExpressionAction::prepare(Block & sample_block) // 这个是 prepare 阶段 +void ExpressionAction::prepare(Block & sample_block) { /** Constant expressions should be evaluated, and put the result in sample_block. */ @@ -178,7 +177,6 @@ void ExpressionAction::prepare(Block & sample_block) // 这个是 prepare 阶 new_column.type = result_type; sample_block.insert(std::move(new_column)); - // 执行参数,和执行结果都是 block 中的列 function->execute(sample_block, arguments, result_position); /// If the result is not a constant, just in case, we will consider the result as unknown. @@ -194,12 +192,11 @@ void ExpressionAction::prepare(Block & sample_block) // 这个是 prepare 阶 /// Change the size to 1. if (col.column->empty()) - col.column = col.column->cloneResized(1); // 常量列只保留一个值,np + col.column = col.column->cloneResized(1); } } else { - // 如果不能即时 eval,那么直接插入一个 unknown 的列,附带上类型和名字 sample_block.insert({nullptr, result_type, result_name}); } @@ -233,7 +230,7 @@ void ExpressionAction::prepare(Block & sample_block) // 这个是 prepare 阶 } } - for (const auto & col : columns_added_by_join) // 之前的 sample block 是左侧的列,现在才是右侧的 + for (const auto & col : columns_added_by_join) sample_block.insert(ColumnWithTypeAndName(nullptr, col.type, col.name)); break; @@ -267,7 +264,7 @@ void ExpressionAction::prepare(Block & sample_block) // 这个是 prepare 阶 ColumnWithTypeAndName column = sample_block.getByName(name); if (!alias.empty()) column.name = alias; - new_block.insert(std::move(column)); // 相当于直接 move 掉 (因为前面的列可能不要,所以用了个 new block) + new_block.insert(std::move(column)); } sample_block.swap(new_block); @@ -302,7 +299,7 @@ void ExpressionAction::prepare(Block & sample_block) // 这个是 prepare 阶 } -void ExpressionAction::execute(Block & block) const // 执行阶段 +void ExpressionAction::execute(Block & block) const { if (type == REMOVE_COLUMN || type == COPY_COLUMN) if (!block.has(source_name)) @@ -321,10 +318,10 @@ void ExpressionAction::execute(Block & block) const // 执行阶段 { if (!block.has(argument_names[i])) throw Exception("Not found column: '" + argument_names[i] + "'", ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); - arguments[i] = block.getPositionByName(argument_names[i]); // 找到列的 offset + arguments[i] = block.getPositionByName(argument_names[i]); } - size_t num_columns_without_result = block.columns(); // 拿到当 result 列的 offset + size_t num_columns_without_result = block.columns(); block.insert({nullptr, result_type, result_name}); function->execute(block, arguments, num_columns_without_result); @@ -343,7 +340,7 @@ void ExpressionAction::execute(Block & block) const // 执行阶段 case EXPAND: { - expand->replicateAndFillNull(block); // repeat 的执行阶段直接 fill block 了 + expand->replicateAndFillNull(block); break; } @@ -492,7 +489,6 @@ void ExpressionActions::addImpl(ExpressionAction action, Names & new_names) arguments[i] = sample_block.getByName(action.argument_names[i]); } - // 一般 default 函数使用 default creator 构造器就行了 action.function = action.function_builder->build(arguments, action.collator); action.result_type = action.function->getReturnType(); } @@ -751,7 +747,7 @@ BlockInputStreamPtr ExpressionActions::createStreamWithNonJoinedDataIfFullOrRigh return {}; } -void ExpressionActionsChain::addStep() // 只会为后者加入 new step 的 input col 准备 +void ExpressionActionsChain::addStep() { if (steps.empty()) throw Exception("Cannot add action to empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR); diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 3cc7ae92874..1a849dee0c7 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -539,7 +539,7 @@ void insertRowToList(Join::RowRefList * list, Join::RowRefList * elem, Block * s { elem->next = list->next; // NOLINT(clang-analyzer-core.NullDereference) list->next = elem; - elem->block = stored_block; // 因为 map all 所以是 list 结构 + elem->block = stored_block; elem->row_num = index; } @@ -579,7 +579,7 @@ struct Inserter * That is, the former second element, if it was, will be the third, and so on. */ auto elem = reinterpret_cast(pool.alloc(sizeof(MappedType))); - insertRowToList(&emplace_result.getMapped(), elem, stored_block, i); // hash 表中维护的就是到存储的 store block 和其 row number,这个 list 结果作为 hash key 的 value + insertRowToList(&emplace_result.getMapped(), elem, stored_block, i); } } }; @@ -834,7 +834,7 @@ void recordFilteredRows(const Block & block, const String & filter_column, Colum PaddedPODArray & mutable_null_map = static_cast(*mutable_null_map_holder).getData(); const auto & nested_column = column->isColumnNullable() ? static_cast(*column).getNestedColumnPtr() : column; - for (size_t i = 0, size = nested_column->size(); i < size; ++i) // 伴随 column 如果取 int 取不出来,说明也是个 null? + for (size_t i = 0, size = nested_column->size(); i < size; ++i) mutable_null_map[i] |= (!nested_column->getInt(i)); null_map_holder = std::move(mutable_null_map_holder); @@ -1373,9 +1373,9 @@ void Join::handleOtherConditions(Block & block, std::unique_ptr { other_condition_ptr->execute(block); - auto filter_column = ColumnUInt8::create(); // 创建了一个 u8 表示 true or false 的结果吧 + auto filter_column = ColumnUInt8::create(); auto & filter = filter_column->getData(); - filter.assign(block.rows(), static_cast(1)); // 直接都给 1? + filter.assign(block.rows(), static_cast(1)); if (!other_filter_column.empty()) { mergeNullAndFilterResult(block, filter, other_filter_column, false); @@ -1564,7 +1564,6 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr /// Memoize key columns to work with. for (size_t i = 0; i < keys_size; ++i) { - // 因为 ColumnPtr 是继承 intrusive_ptr,所以 get 函数可以得到这个类型的原始指针(raw column) key_columns[i] = block.getByName(key_names_left[i]).column.get(); if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst()) @@ -1577,12 +1576,10 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr /// Keys with NULL value in any column won't join to anything. ColumnPtr null_map_holder; ConstNullMapPtr null_map{}; - // 抽取一下 join key 上的 null 或属性 extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); /// reuse null_map to record the filtered rows, the rows contains NULL or does not /// match the join filter won't join to anything - // 相当于把 left filter column 上的 null 属性输出也叠加到了 null map 里面 recordFilteredRows(block, left_filter_column, null_map_holder, null_map); size_t existing_columns = block.columns(); @@ -1617,12 +1614,12 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr /// Add new columns to the block. size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); MutableColumns added_columns; - added_columns.reserve(num_columns_to_add); // 创建了几个需要新加的 columns + added_columns.reserve(num_columns_to_add); std::vector right_table_column_indexes; for (size_t i = 0; i < num_columns_to_add; ++i) { - right_table_column_indexes.push_back(i + existing_columns); // 记录插入的 offset 下标 + right_table_column_indexes.push_back(i + existing_columns); } std::vector right_indexes; diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index d8bfe2afa6e..63db25d0d99 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -39,7 +39,7 @@ struct ProbeProcessInfo; * JOIN-s could be of nine types: ANY/ALL × LEFT/INNER/RIGHT/FULL, and also CROSS. * * If ANY is specified - then select only one row from the "right" table, (first encountered row), even if there was more matching rows. - * If ALL is specified - usual JOIN, when rows are multiplied by number of matching rows from the "right" table. ALL 会复制行 + * If ALL is specified - usual JOIN, when rows are multiplied by number of matching rows from the "right" table. * ANY is more efficient. * * If INNER is specified - leave only rows that have matching rows from "right" table. diff --git a/dbms/src/Interpreters/NullableUtils.cpp b/dbms/src/Interpreters/NullableUtils.cpp index 44cb13c0d92..cf8975f8b80 100644 --- a/dbms/src/Interpreters/NullableUtils.cpp +++ b/dbms/src/Interpreters/NullableUtils.cpp @@ -26,7 +26,6 @@ void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & nul return; const ColumnNullable & column_nullable = static_cast(*column); - // 从 nullable column 中拿到伴随 byte map 和基础 column null_map = &column_nullable.getNullMapData(); null_map_holder = column_nullable.getNullMapColumnPtr(); column = &column_nullable.getNestedColumn(); @@ -50,7 +49,6 @@ void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & nul PaddedPODArray & mutable_null_map = static_cast(*mutable_null_map_holder).getData(); const PaddedPODArray & other_null_map = column_nullable.getNullMapData(); - // join key column 来说,一空即空,这里 | 一下 for (size_t i = 0, size = mutable_null_map.size(); i < size; ++i) mutable_null_map[i] |= other_null_map[i]; diff --git a/dbms/src/Interpreters/sortBlock.cpp b/dbms/src/Interpreters/sortBlock.cpp index 438a14b42bd..9995329b833 100644 --- a/dbms/src/Interpreters/sortBlock.cpp +++ b/dbms/src/Interpreters/sortBlock.cpp @@ -410,7 +410,6 @@ void sortBlock(Block & block, const SortDescription & description, size_t limit) : block.safeGetByPosition(description[0].column_number).column.get(); IColumn::Permutation perm; - // permutation 是列的 offset 调序 if (NeedCollation(column, description[0])) column->getPermutation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm); else @@ -418,7 +417,7 @@ void sortBlock(Block & block, const SortDescription & description, size_t limit) size_t columns = block.columns(); for (size_t i = 0; i < columns; ++i) - block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->permute(perm, limit); // 根据 offset 调序结果重新组织 column 数据 + block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->permute(perm, limit); } else { diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp index e3b02019f15..1220e873a32 100644 --- a/dbms/src/TestUtils/mockExecutor.cpp +++ b/dbms/src/TestUtils/mockExecutor.cpp @@ -96,7 +96,7 @@ void DAGRequestBuilder::initDAGRequest(tipb::DAGRequest & dag_request) else dag_request.set_encode_type(tipb::EncodeType::TypeDefault); - for (size_t i = 0; i < root->output_schema.size(); ++i) // 根据 root 算子的 output schema 设置加 mock dag request 而 output offsets + for (size_t i = 0; i < root->output_schema.size(); ++i) dag_request.add_output_offsets(i); } From edaa6a2801d066bd9e846bbfeb3ce326c2b97407 Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Wed, 28 Dec 2022 22:50:10 +0800 Subject: [PATCH 10/31] make fmt Signed-off-by: AilinKid <3148019@qq.com> --- dbms/src/Core/Block.h | 2 +- dbms/src/DataStreams/SquashingTransform.cpp | 1 + .../Coprocessor/DAGExpressionAnalyzer.cpp | 2 +- .../Coprocessor/DAGQueryBlockInterpreter.cpp | 10 ++++----- dbms/src/Flash/Coprocessor/InterpreterDAG.cpp | 2 +- .../Flash/Coprocessor/InterpreterUtils.cpp | 2 +- .../Coprocessor/JoinInterpreterHelper.cpp | 2 +- dbms/src/Flash/Mpp/MPPTunnelSet.cpp | 2 +- dbms/src/Interpreters/Expand.h | 2 +- dbms/src/Interpreters/Join.cpp | 22 +++++-------------- 10 files changed, 19 insertions(+), 28 deletions(-) diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index a1af433bbc9..0d337d6d3e2 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -38,7 +38,7 @@ class Context; class Block { -private: +private: using Container = ColumnsWithTypeAndName; using IndexByName = std::map; diff --git a/dbms/src/DataStreams/SquashingTransform.cpp b/dbms/src/DataStreams/SquashingTransform.cpp index 2425435d90e..d018deaed96 100644 --- a/dbms/src/DataStreams/SquashingTransform.cpp +++ b/dbms/src/DataStreams/SquashingTransform.cpp @@ -97,6 +97,7 @@ void SquashingTransform::append(Block && block) } } + bool SquashingTransform::isEnoughSize(size_t rows, size_t bytes) const { return (!min_block_size_rows && !min_block_size_bytes) diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 18fd7c507d8..dfbf1a261af 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -1078,7 +1078,7 @@ bool DAGExpressionAnalyzer::appendJoinKeyAndJoinFilters( std::unordered_set needed_columns; for (const auto & c : getCurrentInputColumns()) needed_columns.insert(c.name); - for (const auto & s : key_names) + for (const auto & s : key_names) needed_columns.insert(s); if (!filter_column_name.empty()) needed_columns.insert(filter_column_name); diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 4678b854012..6fbf8b59d6c 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -505,14 +505,14 @@ void DAGQueryBlockInterpreter::handleExchangeReceiver(DAGPipeline & pipeline) /*stream_id=*/enable_fine_grained_shuffle ? i : 0); exchange_receiver_io_input_streams.push_back(stream); stream->setExtraInfo(extra_info); - pipeline.streams.push_back(stream); + pipeline.streams.push_back(stream); } NamesAndTypes source_columns; for (const auto & col : pipeline.firstStream()->getHeader()) { source_columns.emplace_back(col.name, col.type); } - analyzer = std::make_unique(std::move(source_columns), context); + analyzer = std::make_unique(std::move(source_columns), context); } // for tests, we need to mock ExchangeReceiver blockInputStream as the source stream. @@ -538,12 +538,12 @@ void DAGQueryBlockInterpreter::handleProjection(DAGPipeline & pipeline, const ti UniqueNameGenerator unique_name_generator; for (const auto & expr : projection.exprs()) { - auto expr_name = dag_analyzer.getActions(expr, last_step.actions); - last_step.required_output.emplace_back(expr_name); + auto expr_name = dag_analyzer.getActions(expr, last_step.actions); + last_step.required_output.emplace_back(expr_name); const auto & col = last_step.actions->getSampleBlock().getByName(expr_name); String alias = unique_name_generator.toUniqueName(col.name); output_columns.emplace_back(alias, col.type); - project_cols.emplace_back(col.name, alias); + project_cols.emplace_back(col.name, alias); } executeExpression(pipeline, chain.getLastActions(), log, "before projection"); executeProject(pipeline, project_cols, "projection"); diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index 0869c2c653f..61249f19642 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -55,7 +55,7 @@ BlockInputStreams InterpreterDAG::executeQueryBlock(DAGQueryBlock & query_block) } DAGQueryBlockInterpreter query_block_interpreter( context, - input_streams_vec, + input_streams_vec, query_block, max_streams); return query_block_interpreter.execute(); diff --git a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp index b031007c3c7..d2e18a36e00 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp @@ -89,7 +89,7 @@ void executeExpression( { if (expr_actions && !expr_actions->getActions().empty()) { - pipeline.transform([&](auto & stream) { + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expr_actions, log->identifier()); stream->setExtraInfo(extra_info); }); diff --git a/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp b/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp index 386c8158328..275042fddb0 100644 --- a/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp +++ b/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp @@ -295,7 +295,7 @@ NamesAndTypes TiFlashJoin::genColumnsForOtherJoinFilter( bool make_nullable = build_side_index == 1 ? join.join_type() == tipb::JoinType::TypeRightOuterJoin : join.join_type() == tipb::JoinType::TypeLeftOuterJoin; - append_new_columns(probe_prepare_join_actions->getSampleBlock(), make_nullable); + append_new_columns(probe_prepare_join_actions->getSampleBlock(), make_nullable); return columns_for_other_join_filter; } diff --git a/dbms/src/Flash/Mpp/MPPTunnelSet.cpp b/dbms/src/Flash/Mpp/MPPTunnelSet.cpp index 3712172aa7c..a308a9717a3 100644 --- a/dbms/src/Flash/Mpp/MPPTunnelSet.cpp +++ b/dbms/src/Flash/Mpp/MPPTunnelSet.cpp @@ -243,7 +243,7 @@ void MPPTunnelSetBase::fineGrainedShuffleWrite( template void MPPTunnelSetBase::registerTunnel(const MPPTaskId & receiver_task_id, const TunnelPtr & tunnel) -{ +{ if (receiver_task_id_to_index_map.find(receiver_task_id) != receiver_task_id_to_index_map.end()) throw Exception(fmt::format("the tunnel {} has been registered", tunnel->id())); diff --git a/dbms/src/Interpreters/Expand.h b/dbms/src/Interpreters/Expand.h index d567e58e311..c08aa6230f6 100644 --- a/dbms/src/Interpreters/Expand.h +++ b/dbms/src/Interpreters/Expand.h @@ -138,4 +138,4 @@ class Expand private: GroupingSets group_sets_names; }; -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 1a849dee0c7..df3da902d55 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -861,7 +861,6 @@ void Join::insertFromBlock(const Block & block, size_t stream_index) if (unlikely(!initialized)) throw Exception("Logical error: Join was not initialized", ErrorCodes::LOGICAL_ERROR); - // 物化一个 block 出来 Block * stored_block = nullptr; { std::lock_guard lk(blocks_lock); @@ -1577,7 +1576,6 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr ColumnPtr null_map_holder; ConstNullMapPtr null_map{}; extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); - /// reuse null_map to record the filtered rows, the rows contains NULL or does not /// match the join filter won't join to anything recordFilteredRows(block, left_filter_column, null_map_holder, null_map); @@ -1614,7 +1612,7 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr /// Add new columns to the block. size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); MutableColumns added_columns; - added_columns.reserve(num_columns_to_add); + added_columns.reserve(num_columns_to_add); std::vector right_table_column_indexes; for (size_t i = 0; i < num_columns_to_add; ++i) @@ -1642,17 +1640,17 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr if (((kind == ASTTableJoin::Kind::Inner || kind == ASTTableJoin::Kind::Right) && strictness == ASTTableJoin::Strictness::Any) || kind == ASTTableJoin::Kind::Anti) - filter = std::make_unique(rows); // 用来从 right block 中 remove elements + filter = std::make_unique(rows); /// Used with ALL ... JOIN IColumn::Offset current_offset = 0; std::unique_ptr offsets_to_replicate; if (strictness == ASTTableJoin::Strictness::All) - offsets_to_replicate = std::make_unique(rows); // join 的时候暂时标识一下,用来在 left block 中的 replicate rows + offsets_to_replicate = std::make_unique(rows); switch (type) - { // join 完了之后,右侧 join 行都 append 到了 add columns 里面,并且填了一行的 replicate 的 offset = joined rows number + { #define M(TYPE) \ case Join::Type::TYPE: \ joinBlockImplType>::Type>( \ @@ -1679,7 +1677,7 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr } FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_join_prob_failpoint); for (size_t i = 0; i < num_columns_to_add; ++i) - { // 将 added cols 插入到左侧的 block 中 + { const ColumnWithTypeAndName & sample_col = sample_block_with_columns_to_add.getByPosition(i); block.insert(ColumnWithTypeAndName(std::move(added_columns[i]), sample_col.type, sample_col.name)); } @@ -1701,14 +1699,6 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr /// If ALL ... JOIN - we replicate all the columns except the new ones. if (offsets_to_replicate) { - /* - * a, b, c, d offset - * 1, y 1 x 2 这个时候右侧的位置已经填好了,但是左侧 block 的位置还没填好,所以 offsets 是给左侧行看的,尽量复制,跟右侧的行对齐 - * 2, z 1 x - * - * 1, y 1 x 2 - * 1, y 1 x - */ for (size_t i = 0; i < existing_columns; ++i) { block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicateRange(probe_process_info.start_row, probe_process_info.end_row, *offsets_to_replicate); @@ -1730,7 +1720,7 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr if (!other_filter_column.empty() || !other_eq_filter_from_in_column.empty()) { if (!offsets_to_replicate) - throw Exception("Should not reach here, the strictness of join with other condition must be ALL"); // 处理 other condition + throw Exception("Should not reach here, the strictness of join with other condition must be ALL"); handleOtherConditions(block, filter, offsets_to_replicate, right_table_column_indexes); } } From 7adaf9b0978ba363218060860bdbddda0d1a4048 Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Tue, 10 Jan 2023 14:52:40 +0800 Subject: [PATCH 11/31] rename repeat as expand Signed-off-by: AilinKid <3148019@qq.com> --- dbms/src/Debug/MockExecutor/ExpandBinder.cpp | 2 +- dbms/src/Debug/MockExecutor/ExpandBinder.h | 2 +- .../Coprocessor/DAGExpressionAnalyzer.cpp | 4 +- .../Coprocessor/DAGQueryBlockInterpreter.cpp | 10 +- .../Coprocessor/DAGQueryBlockInterpreter.h | 2 +- .../Coprocessor/collectOutputFieldTypes.cpp | 5 +- dbms/src/Flash/Mpp/MPPTask.cpp | 2 +- dbms/src/Flash/Planner/PlanType.h | 3 +- .../Flash/Planner/Plans/PhysicalExpand.cpp | 36 ++--- dbms/src/Flash/Planner/Plans/PhysicalExpand.h | 4 +- .../src/Flash/tests/gtest_filter_executor.cpp | 147 +----------------- dbms/src/Flash/tests/gtest_interpreter.cpp | 7 - .../src/Flash/tests/gtest_repeat_executor.cpp | 46 +++--- dbms/src/Interpreters/Expand.cpp | 13 +- dbms/src/Interpreters/Expand.h | 2 +- dbms/src/Interpreters/Join.cpp | 3 +- dbms/src/Interpreters/Join.h | 2 +- .../Interpreters/tests/gtest_block_repeat.cpp | 58 +++---- dbms/src/TestUtils/mockExecutor.cpp | 4 +- dbms/src/TestUtils/mockExecutor.h | 4 +- 20 files changed, 102 insertions(+), 254 deletions(-) diff --git a/dbms/src/Debug/MockExecutor/ExpandBinder.cpp b/dbms/src/Debug/MockExecutor/ExpandBinder.cpp index 0eb35b71c62..63fbfa28582 100644 --- a/dbms/src/Debug/MockExecutor/ExpandBinder.cpp +++ b/dbms/src/Debug/MockExecutor/ExpandBinder.cpp @@ -40,7 +40,7 @@ bool ExpandBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collat return children[0]->toTiPBExecutor(children_executor, collator_id, mpp_info, context); } -ExecutorBinderPtr compileRepeat(ExecutorBinderPtr input, size_t & executor_index, MockVVecGroupingNameVec grouping_set_columns, std::set in_set) +ExecutorBinderPtr compileExpand(ExecutorBinderPtr input, size_t & executor_index, MockVVecGroupingNameVec grouping_set_columns, std::set in_set) { DAGSchema output_schema; for (const auto & field : input->output_schema) diff --git a/dbms/src/Debug/MockExecutor/ExpandBinder.h b/dbms/src/Debug/MockExecutor/ExpandBinder.h index d1b4c7d980f..405b0b6e610 100644 --- a/dbms/src/Debug/MockExecutor/ExpandBinder.h +++ b/dbms/src/Debug/MockExecutor/ExpandBinder.h @@ -39,5 +39,5 @@ class ExpandBinder : public ExecutorBinder MockVVecGroupingNameVec grouping_sets_columns; }; -ExecutorBinderPtr compileRepeat(ExecutorBinderPtr input, size_t & executor_index, MockVVecGroupingNameVec grouping_set_columns, std::set set); +ExecutorBinderPtr compileExpand(ExecutorBinderPtr input, size_t & executor_index, MockVVecGroupingNameVec grouping_set_columns, std::set set); } // namespace DB::mock diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index dfbf1a261af..aa2c7014a5f 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -857,9 +857,9 @@ ExpressionActionsPtr DAGExpressionAnalyzer::appendExpand( chain.finalize(); chain.clear(); - auto & after_repeat_step = initAndGetLastStep(chain); + auto & after_expand_step = initAndGetLastStep(chain); for (const auto & column : getCurrentInputColumns()) - after_repeat_step.required_output.push_back(column.name); + after_expand_step.required_output.push_back(column.name); return before_expand; } diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 6fbf8b59d6c..0013b4c5af1 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -586,7 +586,7 @@ void DAGQueryBlockInterpreter::handleWindowOrder(DAGPipeline & pipeline, const t // 3. construct a final projection, even if it's not necessary. just construct it. // Talking about projection, it has the following rules. // 1. if the query block does not contain agg, then the final project is the same as the source Executor -// 2. if the query block contains agg/repeat, then the final project is the same as agg/repeat Executor +// 2. if the query block contains agg/expand, then the final project is the same as agg/expand Executor // 3. if the cop task may contains more then 1 query block, and the current query block is not the root // query block, then the project should add an alias for each column that needs to be projected, something // like final_project.emplace_back(col.name, query_block.qb_column_prefix + col.name); @@ -694,12 +694,12 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) recordProfileStreams(pipeline, query_block.limit_or_topn_name); } - // execute the repeat source OP after all filter/limits and so on. - // since repeat source OP has some row replication work to do, place it after limit can reduce some unnecessary burden. + // execute the expand OP after all filter/limits and so on. + // since expand OP has some row replication work to do, place it after limit can reduce some unnecessary burden. // and put it before the final projection, because we should recognize some base col as grouping set col before change their alias. if (res.before_expand) { - executeExpandSource(pipeline, res.before_expand); + executeExpand(pipeline, res.before_expand); recordProfileStreams(pipeline, query_block.expand_name); } @@ -746,7 +746,7 @@ void DAGQueryBlockInterpreter::executeLimit(DAGPipeline & pipeline) } } -void DAGQueryBlockInterpreter::executeExpandSource(DAGPipeline & pipeline, const ExpressionActionsPtr & expr) +void DAGQueryBlockInterpreter::executeExpand(DAGPipeline & pipeline, const ExpressionActionsPtr & expr) { pipeline.transform([&](auto &stream) { stream = std::make_shared(stream, expr); diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h index eae5aa34cec..48edf039ff5 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h @@ -69,7 +69,7 @@ class DAGQueryBlockInterpreter void executeWindowOrder(DAGPipeline & pipeline, SortDescription sort_desc, bool enable_fine_grained_shuffle); void executeOrder(DAGPipeline & pipeline, const NamesAndTypes & order_columns); void executeLimit(DAGPipeline & pipeline); - void executeExpandSource(DAGPipeline & pipeline, const ExpressionActionsPtr & expr); + void executeExpand(DAGPipeline & pipeline, const ExpressionActionsPtr & expr); void executeWindow( DAGPipeline & pipeline, WindowDescription & window_description, diff --git a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp index 923afd56914..3b5c94a81d8 100644 --- a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp +++ b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp @@ -100,9 +100,8 @@ bool collectForTableScan(std::vector & output_field_types, cons return false; } -bool collectForRepeat(std::vector &out_field_types, const tipb::Executor & executor) +bool collectForExpand(std::vector &out_field_types, const tipb::Executor & executor) { - auto &out_child_fields = out_field_types; // collect output_field_types of children getChildren(executor).forEach([&out_child_fields](const tipb::Executor & child) { @@ -231,7 +230,7 @@ bool collectForExecutor(std::vector & output_field_types, const case tipb::ExecType::TypeJoin: return collectForJoin(output_field_types, executor); case tipb::ExecType::TypeExpand: - return collectForRepeat(output_field_types, executor); + return collectForExpand(output_field_types, executor); default: return true; } diff --git a/dbms/src/Flash/Mpp/MPPTask.cpp b/dbms/src/Flash/Mpp/MPPTask.cpp index 655e6c724cb..4f97a94afd7 100644 --- a/dbms/src/Flash/Mpp/MPPTask.cpp +++ b/dbms/src/Flash/Mpp/MPPTask.cpp @@ -339,7 +339,7 @@ void MPPTask::prepare(const mpp::DispatchTaskRequest & task_request) void MPPTask::preprocess() { - auto start_time = Clock::now(); + auto start_time = Clock::now(); initExchangeReceivers(); LOG_DEBUG(log, "init exchange receiver done"); query_executor_holder.set(queryExecute(*context)); diff --git a/dbms/src/Flash/Planner/PlanType.h b/dbms/src/Flash/Planner/PlanType.h index 4c4d6d283d5..cfbdff03e77 100644 --- a/dbms/src/Flash/Planner/PlanType.h +++ b/dbms/src/Flash/Planner/PlanType.h @@ -15,6 +15,7 @@ #pragma once #include +#include "Common/Exception.h" namespace DB { @@ -37,7 +38,7 @@ struct PlanType MockTableScan = 12, Join = 13, GetResult = 14, - Repeat = 15, + Expand = 15, }; PlanTypeEnum enum_value; diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp index 51eaaeaa4c3..9a52c21b62f 100644 --- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp +++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp @@ -41,51 +41,51 @@ PhysicalPlanNodePtr PhysicalExpand::build( if (unlikely(expand.grouping_sets().empty())) { //should not reach here - throw TiFlashException("Repeat executor without grouping sets", Errors::Planner::BadRequest); + throw TiFlashException("Expand executor without grouping sets", Errors::Planner::BadRequest); } DAGExpressionAnalyzer analyzer{child->getSchema(), context}; - ExpressionActionsPtr before_repeat_actions = PhysicalPlanHelper::newActions(child->getSampleBlock(), context); + ExpressionActionsPtr before_expand_actions = PhysicalPlanHelper::newActions(child->getSampleBlock(), context); - auto shared_repeat = analyzer.buildExpandGroupingColumns(expand, before_repeat_actions); + auto shared_expand = analyzer.buildExpandGroupingColumns(expand, before_expand_actions); // construct sample block. - NamesAndTypes repeat_output_columns; + NamesAndTypes expand_output_columns; auto child_header = child->getSchema(); for (const auto & one : child_header) { - repeat_output_columns.emplace_back(one.name, shared_repeat->isInGroupSetColumn(one.name)? makeNullable(one.type): one.type); + expand_output_columns.emplace_back(one.name, shared_expand->isInGroupSetColumn(one.name)? makeNullable(one.type): one.type); } - repeat_output_columns.emplace_back(shared_repeat->grouping_identifier_column_name, shared_repeat->grouping_identifier_column_type); + expand_output_columns.emplace_back(shared_expand->grouping_identifier_column_name, shared_expand->grouping_identifier_column_type); - auto physical_repeat = std::make_shared( + auto physical_expand = std::make_shared( executor_id, - repeat_output_columns, + expand_output_columns, log->identifier(), child, - shared_repeat, - Block(repeat_output_columns)); + shared_expand, + Block(expand_output_columns)); - return physical_repeat; + return physical_expand; } -void PhysicalExpand::repeatTransform(DAGPipeline & child_pipeline, Context & context) +void PhysicalExpand::expandTransform(DAGPipeline & child_pipeline, Context & context) { - auto repeat_actions = PhysicalPlanHelper::newActions(child_pipeline.firstStream()->getHeader(), context); - repeat_actions->add(ExpressionAction::expandSource(shared_expand)); - String repeat_extra_info = fmt::format("repeat source, repeat_executor_id = {}", execId()); + auto expand_actions = PhysicalPlanHelper::newActions(child_pipeline.firstStream()->getHeader(), context); + expand_actions->add(ExpressionAction::expandSource(shared_expand)); + String expand_extra_info = fmt::format("expand, expand_executor_id = {}", execId()); child_pipeline.transform([&](auto &stream) { - stream = std::make_shared(stream, repeat_actions); - stream->setExtraInfo(repeat_extra_info); + stream = std::make_shared(stream, expand_actions); + stream->setExtraInfo(expand_extra_info); }); } void PhysicalExpand::transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) { child->transform(pipeline, context, max_streams); - repeatTransform(pipeline, context); + expandTransform(pipeline, context); } void PhysicalExpand::finalize(const Names & parent_require) diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h index a2696affb5b..6c798ad35c3 100644 --- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h +++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h @@ -38,12 +38,12 @@ class PhysicalExpand : public PhysicalUnary const PhysicalPlanNodePtr & child_, const std::shared_ptr & shared_expand, const Block & sample_block_) - : PhysicalUnary(executor_id_, PlanType::Repeat, schema_, req_id, child_) + : PhysicalUnary(executor_id_, PlanType::Expand, schema_, req_id, child_) , shared_expand(shared_expand), sample_block(sample_block_){} void finalize(const Names & parent_require) override; - void repeatTransform(DAGPipeline & child_pipeline, Context & context); + void expandTransform(DAGPipeline & child_pipeline, Context & context); const Block & getSampleBlock() const override; diff --git a/dbms/src/Flash/tests/gtest_filter_executor.cpp b/dbms/src/Flash/tests/gtest_filter_executor.cpp index 68b8c39cca9..72cc171d1c7 100644 --- a/dbms/src/Flash/tests/gtest_filter_executor.cpp +++ b/dbms/src/Flash/tests/gtest_filter_executor.cpp @@ -211,152 +211,7 @@ try } CATCH -TEST_F(FilterExecutorTestRunner, RepeatLogical) -try -{ - /// following tests is ok now for non-planner enabled. - - /// case 1 - auto request = context - .scan("test_db", "test_table") - .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) - .build(context); - /// data flow: - /// - /// s1 s2 - /// "banana" "apple" - /// NULL NULL - /// "banana" "banana" - /// | - /// v - /// s1 s2 groupingID - /// "banana" NULL 1 - /// NULL "apple" 2 - /// NULL NULL 1 - /// NULL NULL 2 - /// "banana" NULL 1 - /// NULL "banana" 2 - /// - executeAndAssertColumnsEqual( - request, - {toNullableVec({"banana", {}, {}, {}, "banana", {}}), - toNullableVec({{}, "apple", {}, {}, {}, "banana"}), - toVec({1,2,1,2,1,2})}); - - /// case 2 - request = context - .scan("test_db", "test_table") - .filter(eq(col("s1"), col("s2"))) - .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) - .build(context); - /// data flow: - /// - /// s1 s2 - /// "banana" "apple" - /// NULL NULL - /// "banana" "banana" - /// | - /// v - /// s1 s2 - /// "banana" "banana" - /// | - /// v - /// s1 s2 groupingID - /// "banana" NULL 1 - /// NULL "banana" 2 - /// - executeAndAssertColumnsEqual( - request, - {toNullableVec({"banana", {}}), - toNullableVec({{}, "banana"}), - toVec({1,2})}); - - /// case 3 - request = context - .scan("test_db", "test_table") - .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) - .filter(eq(col("s1"), col("s2"))) - .build(context); - /// data flow: TiFlash isn't aware of the operation sequence, this filter here will be run before repeat does just like the second test case above. - executeAndAssertColumnsEqual( - request, - {toNullableVec({"banana", {}}), - toNullableVec({{}, "banana"}), - toVec({1,2})}); - - /// case 4 - auto const_false = lit(Field(static_cast(0))); - request = context - .scan("test_db", "test_table") - .filter(const_false) // refuse all rows - .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) - .build(context); - executeAndAssertColumnsEqual( - request, - {}); - - /// case 5 (test integrated with aggregation) - request = context - .scan("test_db", "test_table") - .aggregation({Count(col("s1"))}, {col("s2")}) - .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) - .build(context); - /// data flow: - /// - /// s1 s2 - /// "banana" "apple" - /// NULL NULL - /// "banana" "banana" - /// | - /// v - /// count(s1) s2 - /// 1 "apple" - /// 0 NULL - /// 1 "banana" - /// | - /// v - /// count(s1) s2 groupingID - /// 1 NULL 1 - /// NULL "apple" 2 - /// 0 NULL 1 - /// NULL NULL 2 - /// 1 NULL 1 - /// NULL "banana" 2 - /// - executeAndAssertColumnsEqual( - request, - {toNullableVec({1, {}, 0, {}, 1,{}}), - toNullableVec({{}, "apple", {},{},{}, "banana"}), - toVec({1,2,1,2,1,2})}); - - /// case 5 (test integrated with aggregation and projection) - request = context - .scan("test_db", "test_table") - .aggregation({Count(col("s1"))}, {col("s2")}) - .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) - .project({"count(s1)"}) - .build(context); - executeAndAssertColumnsEqual( - request, - {toNullableVec({1, {}, 0, {}, 1,{}})}); - - /// case 6 (test integrated with aggregation and projection and limit) - /// note: by now, limit is executed before repeat does to reduce unnecessary row repeat work. -// request = context -// .scan("test_db", "test_table") -// .aggregation({Count(col("s1"))}, {col("s2")}) -// .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) -// .project({"count(s1)"}) -// .limit(2) -// .build(context); -// executeAndAssertColumnsEqual( -// request, -// {toNullableVec({1, {}, 0, {}})}); - -} -CATCH - -TEST_F(FilterExecutorTestRunner, convertBool) +TEST_F(FilterExecutorTestRunner, convert_bool) try { { diff --git a/dbms/src/Flash/tests/gtest_interpreter.cpp b/dbms/src/Flash/tests/gtest_interpreter.cpp index b5a2cd80b16..e129c5587a5 100644 --- a/dbms/src/Flash/tests/gtest_interpreter.cpp +++ b/dbms/src/Flash/tests/gtest_interpreter.cpp @@ -50,13 +50,6 @@ class InterpreterExecuteTest : public DB::tests::InterpreterTestUtils TEST_F(InterpreterExecuteTest, SingleQueryBlock) try { - - //auto grouping_sets = MockVecColumnNameVec{MockColumnNameVec{"s1"}, MockColumnNameVec{"s2"}}; - // auto request = context.scan("test_db", "test_table_1").repeat(grouping_sets).build(context); - // { - // ASSERT_BLOCKINPUTSTREAM_EQAUL("", request, 10); - // } - auto request = context.scan("test_db", "test_table_1") .filter(eq(col("s2"), col("s3"))) .aggregation({Max(col("s1"))}, {col("s2"), col("s3")}) diff --git a/dbms/src/Flash/tests/gtest_repeat_executor.cpp b/dbms/src/Flash/tests/gtest_repeat_executor.cpp index 020e5f19d26..1b5aaa6f04e 100644 --- a/dbms/src/Flash/tests/gtest_repeat_executor.cpp +++ b/dbms/src/Flash/tests/gtest_repeat_executor.cpp @@ -19,7 +19,7 @@ namespace DB { namespace tests { -class RepeatExecutorTestRunner : public DB::tests::ExecutorTest +class ExpandExecutorTestRunner : public DB::tests::ExecutorTest { public: void initializeContext() override @@ -36,13 +36,13 @@ class RepeatExecutorTestRunner : public DB::tests::ExecutorTest } }; -TEST_F(RepeatExecutorTestRunner, RepeatLogical) +TEST_F(ExpandExecutorTestRunner, ExpandLogical) try { - /// case 1 + /// case 1block.getByName(grouping_col).column->isColumnNullable() auto request = context .scan("test_db", "test_table") - .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) .build(context); /// data flow: /// @@ -70,7 +70,7 @@ try request = context .scan("test_db", "test_table") .filter(eq(col("s1"), col("s2"))) - .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) .build(context); /// data flow: /// @@ -97,10 +97,10 @@ try /// case 3: this case is only for non-planner mode. /// request = context /// .scan("test_db", "test_table") - /// .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + /// .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) /// .filter(eq(col("s1"), col("s2"))) /// .build(context); - /// data flow: TiFlash isn't aware of the operation sequence, this filter here will be run before repeat does just like the second test case above. + /// data flow: TiFlash isn't aware of the operation sequence, this filter here will be run before expand does just like the second test case above. /// since this case is only succeed under planner-disabled mode, just comment and assert the result here for a note. /// /// executeAndAssertColumnsEqual( @@ -114,7 +114,7 @@ try request = context .scan("test_db", "test_table") .filter(const_false) // refuse all rows - .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) .build(context); executeAndAssertColumnsEqual( request, @@ -133,7 +133,7 @@ try request = context .scan("test_db", "test_table") .aggregation({Count(col("s1"))}, {col("s2")}) - .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) .build(context); /// data flow: /// @@ -167,7 +167,7 @@ try request = context .scan("test_db", "test_table") .aggregation({Count(col("s1"))}, {col("s2")}) - .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) .project({"count(s1)"}) .build(context); executeAndAssertColumnsEqual( @@ -175,11 +175,11 @@ try {toNullableVec({1, {}, 0, {}, 1,{}})}); /// case 6 (test integrated with aggregation and projection and limit) 1 - /// note: by now, limit is executed before repeat does to reduce unnecessary row repeat work. + /// note: by now, limit is executed before expand does to reduce unnecessary row expand work. /// request = context /// .scan("test_db", "test_table") /// .aggregation({Count(col("s1"))}, {col("s2")}) - /// .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + /// .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) /// .limit(2) /// .project({"count(s1)"}) /// .build(context); @@ -197,12 +197,12 @@ try /// 1 "banana" /// | /// v - /// count(s1) s2 // limit precede the repeat OP since they are in the same DAG query block. + /// count(s1) s2 // limit precede the expand OP since they are in the same DAG query block. /// 1 "apple" /// 0 NULL /// | /// v - /// count(s1) s2 groupingID // repeat is always arranged executed after limit to avoid unnecessary replication in the same DAG query block. + /// count(s1) s2 groupingID // expand is always arranged executed after limit to avoid unnecessary replication in the same DAG query block. /// 1 NULL 1 /// NULL "apple" 2 /// 0 NULL 1 @@ -227,7 +227,7 @@ try request = context .scan("test_db", "test_table") .aggregation({Count(col("s1"))}, {col("s2")}) - .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) .project({"count(s1)"}) .topN({{"count(s1)", true}}, 2) .build(context); @@ -245,7 +245,7 @@ try /// 1 "banana" | /// | +-------------> Child DAG Query Block /// v | - /// count(s1) s2 groupingID // repeat | + /// count(s1) s2 groupingID // expand | /// 1 NULL 1 | /// NULL "apple" 2 | /// 0 NULL 1 | @@ -277,9 +277,9 @@ try /// 1 | /// ---------------+ /// - /// Note: you can see some difference from this plan and the last one above, since projection between repeat and topN is a SOURCE node, - /// it will isolate whole DAG into two independent DAG query blocks, limit and repeat OP take a place in each one of them. So we - /// couldn't guarantee that letting repeat OP run after limit does, which can't reduce unnecessary replication work. DAG query block + /// Note: you can see some difference from this plan and the last one above, since projection between expand and topN is a SOURCE node, + /// it will isolate whole DAG into two independent DAG query blocks, limit and expand OP take a place in each one of them. So we + /// couldn't guarantee that letting expand OP run after limit does, which can't reduce unnecessary replication work. DAG query block /// division should be blamed here. /// executeAndAssertColumnsEqual( @@ -300,7 +300,7 @@ try request = context .receive("exchange1") .aggregation({Count(col("s1"))}, {col("s2")}) - .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) .join(context.scan("test_db", "test_table").project({"s2"}), tipb::JoinType::TypeInnerJoin, {col("s2")}) .project({"count(s1)", "groupingID"}) .topN({{"groupingID", true}}, 2) @@ -319,7 +319,7 @@ try /// 1 "banana" | /// | +-------------> Child of Child DAG Query Block /// v | - /// count(s1) s2 groupingID // repeat | + /// count(s1) s2 groupingID // expand | /// 1 NULL 1 | /// NULL "apple" 2 | /// 0 NULL 1 | @@ -372,7 +372,7 @@ CreatingSets Expression: HashJoinProbe: Expression: - RepeatSource: : grouping set [<{count(s1)_collator_46 }><{any(s2)_collator_46 }>] + Expand: : grouping set [<{count(s1)_collator_46 }><{any(s2)_collator_46 }>] Expression: SharedQuery: ParallelAggregating, max_threads: 10, final: true @@ -384,4 +384,4 @@ CATCH /// TODO: more OP combination tests. } // namespace tests -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp index 7ddbd8c975d..09bbdd5d662 100644 --- a/dbms/src/Interpreters/Expand.cpp +++ b/dbms/src/Interpreters/Expand.cpp @@ -61,7 +61,7 @@ void Expand::getGroupingSetsDes(FmtBuffer & buffer) const /// for cases like: select count(distinct a), count(distinct b) from t; /// it will generate 2 group set with and , over which we should -/// repeat one more replica of the source rows from the input block and +/// expand one more replica of the source rows from the input block and /// identify it with the grouping id in the appended new column. /// /// eg: source block ==> replicated block @@ -111,7 +111,7 @@ void Expand::replicateAndFillNull(Block & block) const { // start from 1. Field grouping_id = j + 1; - added_grouping_id_column[0]->insert(grouping_id); + added_grouping_id_column[0]->insert(grouping_id); } } // todo: for some column overlapping in different grouping set, we should copy the overlapped column as a new column @@ -120,11 +120,11 @@ void Expand::replicateAndFillNull(Block & block) const // replicate the original block rows. size_t existing_columns = block.columns(); - if (offsets_to_replicate && offsets_to_replicate->size() > 0) + if (offsets_to_replicate) { for (size_t i = 0; i < existing_columns; ++i) { - // expand the origin const column, since it may be filled with null value when repeating. + // expand the origin const column, since it may be filled with null value when expanding. if (block.safeGetByPosition(i).column->isColumnConst()) block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->convertToFullColumnIfConst(); @@ -133,8 +133,9 @@ void Expand::replicateAndFillNull(Block & block) const { convertColumnToNullable(block.getByPosition(i)); } - // replicate it. - block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); + if (!offsets_to_replicate->empty()) + // replicate it. + block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); } } diff --git a/dbms/src/Interpreters/Expand.h b/dbms/src/Interpreters/Expand.h index c08aa6230f6..33f9f94f024 100644 --- a/dbms/src/Interpreters/Expand.h +++ b/dbms/src/Interpreters/Expand.h @@ -117,7 +117,7 @@ class Expand // replicateAndFillNull is the basic functionality that Expand Operator provided. Briefly, it replicates // origin rows with regard to local grouping sets description, and appending a new column named as groupingID // to illustrate what group this row is targeted for. - void replicateAndFillNull(Block & input) const; + void replicateAndFillNull(Block & block) const; size_t getGroupSetNum() const {return group_sets_names.size();} diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index df3da902d55..aca814f8501 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -866,7 +866,6 @@ void Join::insertFromBlock(const Block & block, size_t stream_index) std::lock_guard lk(blocks_lock); total_input_build_rows += block.rows(); blocks.push_back(block); - // block cp stored_block = &blocks.back(); original_blocks.push_back(block); } @@ -1677,7 +1676,7 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr } FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_join_prob_failpoint); for (size_t i = 0; i < num_columns_to_add; ++i) - { + { const ColumnWithTypeAndName & sample_col = sample_block_with_columns_to_add.getByPosition(i); block.insert(ColumnWithTypeAndName(std::move(added_columns[i]), sample_col.type, sample_col.name)); } diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 63db25d0d99..abae6268430 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -187,7 +187,7 @@ class Join /// Reference to the row in block. struct RowRef { - const Block * block; // block + row num + const Block * block; size_t row_num; RowRef() = default; diff --git a/dbms/src/Interpreters/tests/gtest_block_repeat.cpp b/dbms/src/Interpreters/tests/gtest_block_repeat.cpp index 88c6286898e..1a34e0fde30 100644 --- a/dbms/src/Interpreters/tests/gtest_block_repeat.cpp +++ b/dbms/src/Interpreters/tests/gtest_block_repeat.cpp @@ -23,7 +23,7 @@ namespace DB namespace tests { -class BlockRepeat : public ::testing::Test +class BlockExpand : public ::testing::Test { public: using ColStringType = typename TypeTraits::FieldType; @@ -38,11 +38,11 @@ class BlockRepeat : public ::testing::Test const std::vector col_name{"age", "gender", "country", "region", "zip"}; }; -TEST_F(BlockRepeat, Limit) +TEST_F(BlockExpand, ExpandLogic) try { { - // test basic block repeat operation. (two grouping set) + // test basic block expand operation. (two grouping set) const ColumnsWithTypeAndName ori_col = { @@ -55,19 +55,19 @@ try GroupingSet g_gender = GroupingSet{GroupingColumnNames{col_name[1]}}; GroupingSet g_country = GroupingSet{GroupingColumnNames{col_name[2]}}; GroupingSets group_sets = GroupingSets{g_gender, g_country}; - Repeat repeat = Repeat(group_sets); + Expand expand = Expand(group_sets); Block block(ori_col); auto origin_rows = block.rows(); - repeat.replicateAndFillNull(block); + expand.replicateAndFillNull(block); // assert the col size is added with 1. ASSERT_EQ(block.getColumns().size(), size_t(5)); // assert the new col groupingID is appended. ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID"); // assert the block size is equal to origin rows * grouping set num. - auto repeat_rows = block.rows(); - auto grouping_set_num = repeat.getGroupSetNum(); - ASSERT_EQ(origin_rows * grouping_set_num, repeat_rows); // 6 + auto expand_rows = block.rows(); + auto grouping_set_num = expand.getGroupSetNum(); + ASSERT_EQ(origin_rows * grouping_set_num, expand_rows); // 6 // assert grouping set column are nullable. ASSERT_EQ(block.getColumns()[0].get()->isColumnNullable(), false); ASSERT_EQ(block.getColumns()[1].get()->isColumnNullable(), true); @@ -88,7 +88,7 @@ try const auto res0 = ColumnWithInt64{1, 1, 0, 0, -1, -1}; const auto * col_0 = typeid_cast(block.getColumns()[0].get()); - for (int i = 0; i < int(repeat_rows); ++i) + for (int i = 0; i < int(expand_rows); ++i) { ASSERT_EQ(col_0->getElement(i), res0[i]); } @@ -96,7 +96,7 @@ try const auto res1 = ColumnWithString{"1 ", "null", "1 ", "null", "1 ", "null"}; const auto * col_1 = typeid_cast(block.getColumns()[1].get()); const auto * col_1_nest = &static_cast(col_1->getNestedColumn()); - for (int i = 0; i < int(repeat_rows); ++i) + for (int i = 0; i < int(expand_rows); ++i) { if (res1[i] == "null") { ASSERT_EQ(col_1->isNullAt(i), true); @@ -108,7 +108,7 @@ try const auto res2 = ColumnWithString{"null", "1", "null", "2", "null", "3"}; const auto * col_2 = typeid_cast(block.getColumns()[2].get()); const auto * col_2_nest = &static_cast(col_2->getNestedColumn()); - for (int i = 0; i < int(repeat_rows); ++i) + for (int i = 0; i < int(expand_rows); ++i) { if (res2[i] == "null") { ASSERT_EQ(col_2->isNullAt(i), true); @@ -119,20 +119,20 @@ try const auto res3 = ColumnWithUInt64{1, 1,1,1, 0,0}; const auto * col_3 = typeid_cast(block.getColumns()[3].get()); - for (int i = 0; i < int(repeat_rows); ++i) + for (int i = 0; i < int(expand_rows); ++i) { ASSERT_EQ(col_3->getElement(i), res3[i]); } const auto res4 = ColumnWithUInt64{1, 2, 1, 2, 1, 2}; const auto * col_4 = typeid_cast(block.getColumns()[4].get()); - for (int i = 0; i < int(repeat_rows); ++i) + for (int i = 0; i < int(expand_rows); ++i) { ASSERT_EQ(col_4->getElement(i), res4[i]); } } { - // test block repeat operation for multi grouping set (triple here) + // test block expand operation for multi grouping set (triple here) const ColumnsWithTypeAndName ori_col = { @@ -146,19 +146,19 @@ try GroupingSet g_country = GroupingSet{GroupingColumnNames{col_name[2]}}; GroupingSet g_region = GroupingSet{GroupingColumnNames{col_name[3]}}; GroupingSets group_sets = GroupingSets{g_gender, g_country, g_region}; - Repeat repeat = Repeat(group_sets); + Expand expand = Expand(group_sets); Block block(ori_col); auto origin_rows = block.rows(); - repeat.replicateAndFillNull(block); + expand.replicateAndFillNull(block); // assert the col size is added with 1. ASSERT_EQ(block.getColumns().size(), size_t(5)); // assert the new col groupingID is appended. ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID"); // assert the block size is equal to origin rows * grouping set num. - auto repeat_rows = block.rows(); - auto grouping_set_num = repeat.getGroupSetNum(); - ASSERT_EQ(origin_rows * grouping_set_num, repeat_rows); // 9 + auto expand_rows = block.rows(); + auto grouping_set_num = expand.getGroupSetNum(); + ASSERT_EQ(origin_rows * grouping_set_num, expand_rows); // 9 // assert grouping set column are nullable. ASSERT_EQ(block.getColumns()[0].get()->isColumnNullable(), false); ASSERT_EQ(block.getColumns()[1].get()->isColumnNullable(), true); @@ -182,7 +182,7 @@ try const auto res0 = ColumnWithInt64{1, 1, 1, 0, 0, 0, -1, -1, -1}; const auto * col_0 = typeid_cast(block.getColumns()[0].get()); - for (int i = 0; i < int(repeat_rows); ++i) + for (int i = 0; i < int(expand_rows); ++i) { ASSERT_EQ(col_0->getElement(i), res0[i]); } @@ -190,7 +190,7 @@ try const auto res1 = ColumnWithString{"aaa", "null", "null", "bbb", "null", "null", "ccc", "null", "null"}; const auto * col_1 = typeid_cast(block.getColumns()[1].get()); const auto * col_1_nest = &static_cast(col_1->getNestedColumn()); - for (int i = 0; i < int(repeat_rows); ++i) + for (int i = 0; i < int(expand_rows); ++i) { if (res1[i] == "null") { ASSERT_EQ(col_1->isNullAt(i), true); @@ -202,7 +202,7 @@ try const auto res2 = ColumnWithString{"null", "1", "null", "null", "2", "null", "null", "3", "null"}; const auto * col_2 = typeid_cast(block.getColumns()[2].get()); const auto * col_2_nest = &static_cast(col_2->getNestedColumn()); - for (int i = 0; i < int(repeat_rows); ++i) + for (int i = 0; i < int(expand_rows); ++i) { if (res2[i] == "null") { ASSERT_EQ(col_2->isNullAt(i), true); @@ -215,7 +215,7 @@ try const auto res3 = ColumnWithUInt64{UInt64(-1), UInt64(-1), 1, UInt64(-1), UInt64(-1), 1, UInt64(-1), UInt64(-1), 0}; const auto * col_3 = typeid_cast(block.getColumns()[3].get()); const auto * col_3_nest = &typeid_cast(col_3->getNestedColumn()); - for (int i = 0; i < int(repeat_rows); ++i) + for (int i = 0; i < int(expand_rows); ++i) { if (res3[i] == UInt64(-1)) { ASSERT_EQ(col_3->isNullAt(i), true); @@ -226,7 +226,7 @@ try const auto res4 = ColumnWithUInt64{1, 2, 3, 1, 2, 3, 1, 2, 3}; const auto * col_4 = typeid_cast(block.getColumns()[4].get()); - for (int i = 0; i < int(repeat_rows); ++i) + for (int i = 0; i < int(expand_rows); ++i) { ASSERT_EQ(col_4->getElement(i), res4[i]); } @@ -246,20 +246,20 @@ try GroupingSet g_country = GroupingSet{GroupingColumnNames{col_name[2]}}; GroupingSet g_region = GroupingSet{GroupingColumnNames{col_name[3]}}; GroupingSets group_sets = GroupingSets{g_gender, g_country, g_region}; - Repeat repeat = Repeat(group_sets); + Expand expand = Expand(group_sets); Block block(ori_col); auto origin_rows = block.rows(); - repeat.replicateAndFillNull(block); + expand.replicateAndFillNull(block); // assert the col size is added with 1. ASSERT_EQ(block.getColumns().size(), size_t(5)); // assert the new col groupingID is appended. ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID"); // assert the block size is equal to origin rows * grouping set num. - auto repeat_rows = block.rows(); - auto grouping_set_num = repeat.getGroupSetNum(); + auto expand_rows = block.rows(); + auto grouping_set_num = expand.getGroupSetNum(); ASSERT_EQ(origin_rows, 0); - ASSERT_EQ(origin_rows * grouping_set_num, repeat_rows); // 0 + ASSERT_EQ(origin_rows * grouping_set_num, expand_rows); // 0 // assert grouping set column are nullable. } } diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp index 1220e873a32..bcb4ec63a19 100644 --- a/dbms/src/TestUtils/mockExecutor.cpp +++ b/dbms/src/TestUtils/mockExecutor.cpp @@ -362,7 +362,7 @@ DAGRequestBuilder & DAGRequestBuilder::sort(MockOrderByItemVec order_by_vec, boo return *this; } -DAGRequestBuilder & DAGRequestBuilder::repeat(MockVVecColumnNameVec grouping_set_columns) +DAGRequestBuilder & DAGRequestBuilder::expand(MockVVecColumnNameVec grouping_set_columns) { assert(root); auto grouping_sets_ast = mock::MockVVecGroupingNameVec(); @@ -381,7 +381,7 @@ DAGRequestBuilder & DAGRequestBuilder::repeat(MockVVecColumnNameVec grouping_set } grouping_sets_ast.emplace_back(std::move(grouping_set_ast)); } - root = compileRepeat(root, getExecutorIndex(), grouping_sets_ast, grouping_col_collection); + root = compileExpand(root, getExecutorIndex(), grouping_sets_ast, grouping_col_collection); return *this; } diff --git a/dbms/src/TestUtils/mockExecutor.h b/dbms/src/TestUtils/mockExecutor.h index df9d162f691..da369c69ff7 100644 --- a/dbms/src/TestUtils/mockExecutor.h +++ b/dbms/src/TestUtils/mockExecutor.h @@ -147,8 +147,8 @@ class DAGRequestBuilder DAGRequestBuilder & sort(MockOrderByItem order_by, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count = 0); DAGRequestBuilder & sort(MockOrderByItemVec order_by_vec, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count = 0); - // repeat - DAGRequestBuilder & repeat(MockVVecColumnNameVec grouping_set_columns); + // expand + DAGRequestBuilder & expand(MockVVecColumnNameVec grouping_set_columns); void setCollation(Int32 collator_) { properties.collator = convertToTiDBCollation(collator_); } Int32 getCollation() const { return abs(properties.collator); } From 62dc142c270a8f2d646f26b63f0968f22370c5f5 Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Tue, 10 Jan 2023 15:28:12 +0800 Subject: [PATCH 12/31] rename file Signed-off-by: AilinKid <3148019@qq.com> --- .../{gtest_repeat_executor.cpp => gtest_expand_executor.cpp} | 2 +- .../tests/{gtest_block_repeat.cpp => gtest_block_expand.cpp} | 0 dbms/src/TestUtils/executorSerializer.cpp | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename dbms/src/Flash/tests/{gtest_repeat_executor.cpp => gtest_expand_executor.cpp} (99%) rename dbms/src/Interpreters/tests/{gtest_block_repeat.cpp => gtest_block_expand.cpp} (100%) diff --git a/dbms/src/Flash/tests/gtest_repeat_executor.cpp b/dbms/src/Flash/tests/gtest_expand_executor.cpp similarity index 99% rename from dbms/src/Flash/tests/gtest_repeat_executor.cpp rename to dbms/src/Flash/tests/gtest_expand_executor.cpp index 1b5aaa6f04e..93b7974c852 100644 --- a/dbms/src/Flash/tests/gtest_repeat_executor.cpp +++ b/dbms/src/Flash/tests/gtest_expand_executor.cpp @@ -39,7 +39,7 @@ class ExpandExecutorTestRunner : public DB::tests::ExecutorTest TEST_F(ExpandExecutorTestRunner, ExpandLogical) try { - /// case 1block.getByName(grouping_col).column->isColumnNullable() + /// case 1 auto request = context .scan("test_db", "test_table") .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) diff --git a/dbms/src/Interpreters/tests/gtest_block_repeat.cpp b/dbms/src/Interpreters/tests/gtest_block_expand.cpp similarity index 100% rename from dbms/src/Interpreters/tests/gtest_block_repeat.cpp rename to dbms/src/Interpreters/tests/gtest_block_expand.cpp diff --git a/dbms/src/TestUtils/executorSerializer.cpp b/dbms/src/TestUtils/executorSerializer.cpp index 46f0d3b03a1..bffa9baf3cf 100644 --- a/dbms/src/TestUtils/executorSerializer.cpp +++ b/dbms/src/TestUtils/executorSerializer.cpp @@ -307,7 +307,7 @@ void ExecutorSerializer::serializeListStruct(const tipb::DAGRequest * dag_reques serializeLimit("Limit", executor.limit(), buf); break; case tipb::ExecType::TypeExpand: - serializeExpandSource("Repeat", executor.expand(), buf); + serializeExpandSource("Expand", executor.expand(), buf); break; default: throw TiFlashException("Should not reach here", Errors::Coprocessor::Internal); From 9d4077143febd1b4ac04b528bb380fe37df841ed Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Tue, 10 Jan 2023 17:47:51 +0800 Subject: [PATCH 13/31] fix test under new rebased code Signed-off-by: AilinKid <3148019@qq.com> --- dbms/src/Flash/tests/gtest_expand_executor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Flash/tests/gtest_expand_executor.cpp b/dbms/src/Flash/tests/gtest_expand_executor.cpp index 93b7974c852..c2d430e2416 100644 --- a/dbms/src/Flash/tests/gtest_expand_executor.cpp +++ b/dbms/src/Flash/tests/gtest_expand_executor.cpp @@ -370,9 +370,9 @@ CreatingSets PartialSorting x 10: limit = 2 Expression: Expression: - HashJoinProbe: + HashJoinProbe: Expression: - Expand: : grouping set [<{count(s1)_collator_46 }><{any(s2)_collator_46 }>] + Expand: : grouping set [<{count(exchange_receiver_0)_collator_46 }><{any(exchange_receiver_1)_collator_46 }>] Expression: SharedQuery: ParallelAggregating, max_threads: 10, final: true From a34e95272f6faf3558afc48f2bff52aa36933a31 Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Tue, 10 Jan 2023 18:51:33 +0800 Subject: [PATCH 14/31] address haisheng's comment Signed-off-by: AilinKid <3148019@qq.com> --- .../DataStreams/ExpandBlockInputStream.cpp | 40 ---------------- dbms/src/DataStreams/ExpandBlockInputStream.h | 46 ------------------- .../ExpressionBlockInputStream.cpp | 7 +++ .../DataStreams/ExpressionBlockInputStream.h | 1 + .../Coprocessor/DAGQueryBlockInterpreter.cpp | 4 +- dbms/src/Flash/Planner/PhysicalPlan.cpp | 1 + .../Flash/Planner/Plans/PhysicalExpand.cpp | 4 +- .../src/Flash/tests/gtest_expand_executor.cpp | 2 +- dbms/src/Interpreters/Expand.cpp | 8 ++-- 9 files changed, 18 insertions(+), 95 deletions(-) delete mode 100644 dbms/src/DataStreams/ExpandBlockInputStream.cpp delete mode 100644 dbms/src/DataStreams/ExpandBlockInputStream.h diff --git a/dbms/src/DataStreams/ExpandBlockInputStream.cpp b/dbms/src/DataStreams/ExpandBlockInputStream.cpp deleted file mode 100644 index 2f502c3f708..00000000000 --- a/dbms/src/DataStreams/ExpandBlockInputStream.cpp +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -namespace DB -{ -Block ExpandBlockInputStream::readImpl() -{ - Block block = children.back()->read(); - if (!block) - return block; - expand_actions->execute(block); - return block; -} - -Block ExpandBlockInputStream::getHeader() const -{ - Block res = children.back()->getHeader(); - expand_actions->execute(res); - return res; -} - -void ExpandBlockInputStream::appendInfo(FmtBuffer & buffer) const { - buffer.fmtAppend(": grouping set "); - expand_actions.get()->getActions()[0].expand->getGroupingSetsDes(buffer); -} - -} // namespace DB diff --git a/dbms/src/DataStreams/ExpandBlockInputStream.h b/dbms/src/DataStreams/ExpandBlockInputStream.h deleted file mode 100644 index e502a8c8e1f..00000000000 --- a/dbms/src/DataStreams/ExpandBlockInputStream.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -#include -#include -#include - -namespace DB -{ -class ExpandBlockInputStream : public IProfilingBlockInputStream -{ - static constexpr auto NAME = "Expand"; - -public: - ExpandBlockInputStream( - const BlockInputStreamPtr & input, - ExpressionActionsPtr expand_actions_) - : expand_actions(expand_actions_) - { - children.push_back(input); - } - String getName() const override { return NAME; } - Block getHeader() const override; - void appendInfo(FmtBuffer & buffer) const override; - -protected: - Block readImpl() override; - -private: - ExpressionActionsPtr expand_actions; -}; - -} // namespace DB - diff --git a/dbms/src/DataStreams/ExpressionBlockInputStream.cpp b/dbms/src/DataStreams/ExpressionBlockInputStream.cpp index b288155c142..285e8ab61d7 100644 --- a/dbms/src/DataStreams/ExpressionBlockInputStream.cpp +++ b/dbms/src/DataStreams/ExpressionBlockInputStream.cpp @@ -54,4 +54,11 @@ Block ExpressionBlockInputStream::readImpl() return res; } +void ExpressionBlockInputStream::appendInfo(FmtBuffer & buffer) const { + if (!expression.get()->getActions().empty() && expression.get()->getActions()[0].expand != nullptr) { + buffer.fmtAppend(": grouping set "); + expression.get()->getActions()[0].expand->getGroupingSetsDes(buffer); + } +} + } // namespace DB diff --git a/dbms/src/DataStreams/ExpressionBlockInputStream.h b/dbms/src/DataStreams/ExpressionBlockInputStream.h index 1d1e059d51d..73141446414 100644 --- a/dbms/src/DataStreams/ExpressionBlockInputStream.h +++ b/dbms/src/DataStreams/ExpressionBlockInputStream.h @@ -41,6 +41,7 @@ class ExpressionBlockInputStream : public IProfilingBlockInputStream String getName() const override { return NAME; } Block getTotals() override; Block getHeader() const override; + void appendInfo(FmtBuffer & buffer) const override; protected: Block readImpl() override; diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 0013b4c5af1..9634dd38681 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -749,7 +749,7 @@ void DAGQueryBlockInterpreter::executeLimit(DAGPipeline & pipeline) void DAGQueryBlockInterpreter::executeExpand(DAGPipeline & pipeline, const ExpressionActionsPtr & expr) { pipeline.transform([&](auto &stream) { - stream = std::make_shared(stream, expr); + stream = std::make_shared(stream, expr, log->identifier()); }); } diff --git a/dbms/src/Flash/Planner/PhysicalPlan.cpp b/dbms/src/Flash/Planner/PhysicalPlan.cpp index 42729693433..12badaae469 100644 --- a/dbms/src/Flash/Planner/PhysicalPlan.cpp +++ b/dbms/src/Flash/Planner/PhysicalPlan.cpp @@ -199,6 +199,7 @@ void PhysicalPlan::build(const String & executor_id, const tipb::Executor * exec } case tipb::ExecType::TypeExpand: { + GET_METRIC(tiflash_coprocessor_executor_count, type_expand).Increment(); pushBack(PhysicalExpand::build(context, executor_id, log, executor->expand(), popBack())); break; } diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp index 9a52c21b62f..4d459ba08c4 100644 --- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp +++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -77,7 +77,7 @@ void PhysicalExpand::expandTransform(DAGPipeline & child_pipeline, Context & con expand_actions->add(ExpressionAction::expandSource(shared_expand)); String expand_extra_info = fmt::format("expand, expand_executor_id = {}", execId()); child_pipeline.transform([&](auto &stream) { - stream = std::make_shared(stream, expand_actions); + stream = std::make_shared(stream, expand_actions, log->identifier()); stream->setExtraInfo(expand_extra_info); }); } diff --git a/dbms/src/Flash/tests/gtest_expand_executor.cpp b/dbms/src/Flash/tests/gtest_expand_executor.cpp index c2d430e2416..004c568560b 100644 --- a/dbms/src/Flash/tests/gtest_expand_executor.cpp +++ b/dbms/src/Flash/tests/gtest_expand_executor.cpp @@ -372,7 +372,7 @@ CreatingSets Expression: HashJoinProbe: Expression: - Expand: : grouping set [<{count(exchange_receiver_0)_collator_46 }><{any(exchange_receiver_1)_collator_46 }>] + Expression: : grouping set [{}{}] Expression: SharedQuery: ParallelAggregating, max_threads: 10, final: true diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp index 09bbdd5d662..ee1753bf247 100644 --- a/dbms/src/Interpreters/Expand.cpp +++ b/dbms/src/Interpreters/Expand.cpp @@ -41,10 +41,10 @@ void Expand::getGroupingSetsDes(FmtBuffer & buffer) const buffer.fmtAppend("["); for (const auto & grouping_set: group_sets_names) { - buffer.fmtAppend("<"); + buffer.fmtAppend("{{"); for (const auto & grouping_exprs: grouping_set) { - buffer.fmtAppend("{{"); + buffer.fmtAppend("<"); for ( size_t i = 0; i < grouping_exprs.size(); i++) { if (i != 0) { @@ -52,9 +52,9 @@ void Expand::getGroupingSetsDes(FmtBuffer & buffer) const } buffer.fmtAppend(grouping_exprs.at(i)); } - buffer.fmtAppend("}}"); + buffer.fmtAppend(">"); } - buffer.fmtAppend(">"); + buffer.fmtAppend("}}"); } buffer.fmtAppend("]"); } From db7b1ffb87c6c986ba480afbe056c5c7bdd45880 Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Tue, 10 Jan 2023 23:22:12 +0800 Subject: [PATCH 15/31] clang fmt Signed-off-by: AilinKid <3148019@qq.com> --- .../ExpressionBlockInputStream.cpp | 8 +- dbms/src/Debug/MockExecutor/ExpandBinder.cpp | 8 +- .../Coprocessor/DAGExpressionAnalyzer.cpp | 22 +++-- .../Coprocessor/DAGQueryBlockInterpreter.cpp | 9 +- .../Coprocessor/DAGQueryBlockInterpreter.h | 5 +- .../Coprocessor/collectOutputFieldTypes.cpp | 13 +-- .../Flash/Planner/Plans/PhysicalExpand.cpp | 9 +- dbms/src/Flash/Planner/Plans/PhysicalExpand.h | 8 +- .../src/Flash/Statistics/CommonExecutorImpl.h | 4 +- .../src/Flash/tests/gtest_expand_executor.cpp | 89 +++++++++++++++---- dbms/src/Interpreters/Expand.cpp | 66 +++++++------- dbms/src/Interpreters/Expand.h | 7 +- dbms/src/Interpreters/ExpressionActions.cpp | 5 +- .../Interpreters/tests/gtest_block_expand.cpp | 39 +++++--- dbms/src/TestUtils/ExecutorTestUtils.cpp | 2 +- dbms/src/TestUtils/executorSerializer.cpp | 5 +- dbms/src/TestUtils/mockExecutor.cpp | 12 +-- dbms/src/TestUtils/mockExecutor.h | 2 +- 18 files changed, 203 insertions(+), 110 deletions(-) diff --git a/dbms/src/DataStreams/ExpressionBlockInputStream.cpp b/dbms/src/DataStreams/ExpressionBlockInputStream.cpp index 285e8ab61d7..27daa61152a 100644 --- a/dbms/src/DataStreams/ExpressionBlockInputStream.cpp +++ b/dbms/src/DataStreams/ExpressionBlockInputStream.cpp @@ -54,10 +54,12 @@ Block ExpressionBlockInputStream::readImpl() return res; } -void ExpressionBlockInputStream::appendInfo(FmtBuffer & buffer) const { - if (!expression.get()->getActions().empty() && expression.get()->getActions()[0].expand != nullptr) { +void ExpressionBlockInputStream::appendInfo(FmtBuffer & buffer) const +{ + if (!expression.get()->getActions().empty() && expression.get()->getActions()[0].expand != nullptr) + { buffer.fmtAppend(": grouping set "); - expression.get()->getActions()[0].expand->getGroupingSetsDes(buffer); + expression.get()->getActions()[0].expand->getGroupingSetsDes(buffer); } } diff --git a/dbms/src/Debug/MockExecutor/ExpandBinder.cpp b/dbms/src/Debug/MockExecutor/ExpandBinder.cpp index 63fbfa28582..edc124104c4 100644 --- a/dbms/src/Debug/MockExecutor/ExpandBinder.cpp +++ b/dbms/src/Debug/MockExecutor/ExpandBinder.cpp @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include +#include namespace DB::mock { @@ -31,8 +31,8 @@ bool ExpandBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collat auto * ges = gss->add_grouping_exprs(); for (const auto & grouping_col : grouping_exprs) { - tipb::Expr* add_column = ges->add_grouping_expr(); - astToPB(children[0]->output_schema, grouping_col, add_column, collator_id, context); // ast column ref change to tipb:Expr column ref + tipb::Expr * add_column = ges->add_grouping_expr(); + astToPB(children[0]->output_schema, grouping_col, add_column, collator_id, context); // ast column ref change to tipb:Expr column ref } } } @@ -56,7 +56,7 @@ ExecutorBinderPtr compileExpand(ExecutorBinderPtr input, size_t & executor_index field_type.set_tp(TiDB::TypeLongLong); field_type.set_charset("binary"); field_type.set_collate(TiDB::ITiDBCollator::BINARY); - field_type.set_flag(TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull); // should have NOT NULL FLAG + field_type.set_flag(TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull); // should have NOT NULL FLAG field_type.set_flen(-1); field_type.set_decimal(-1); output_schema.push_back(std::make_pair("groupingID", TiDB::fieldTypeToColumnInfo(field_type))); diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index aa2c7014a5f..816b0e324dd 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -806,19 +806,24 @@ NamesAndTypes DAGExpressionAnalyzer::buildOrderColumns( } std::shared_ptr DAGExpressionAnalyzer::buildExpandGroupingColumns( - const tipb::Expand & expand, const ExpressionActionsPtr & actions) + const tipb::Expand & expand, + const ExpressionActionsPtr & actions) { GroupingSets group_sets_columns; std::map map_grouping_col; group_sets_columns.reserve(expand.grouping_sets().size()); - for (const auto& group_set : expand.grouping_sets()){ + for (const auto & group_set : expand.grouping_sets()) + { GroupingSet group_set_columns; group_set_columns.reserve(group_set.grouping_exprs().size()); - for (const auto &group_exprs : group_set.grouping_exprs()) { + for (const auto & group_exprs : group_set.grouping_exprs()) + { GroupingColumnNames group_exprs_columns; group_exprs_columns.reserve(group_exprs.grouping_expr().size()); - for (const auto& group_expr : group_exprs.grouping_expr()){ - if (group_expr.tp() != tipb::ColumnRef){ + for (const auto & group_expr : group_exprs.grouping_expr()) + { + if (group_expr.tp() != tipb::ColumnRef) + { throw TiFlashException("grouping sets expression should be column expr", Errors::Coprocessor::BadRequest); } String cp_name = getActions(group_expr, actions); @@ -832,7 +837,7 @@ std::shared_ptr DAGExpressionAnalyzer::buildExpandGroupingColumns( group_sets_columns.emplace_back(std::move(group_set_columns)); } // change the original source column to be nullable, and add a new column for groupingID. - for (auto & mutable_one: source_columns) + for (auto & mutable_one : source_columns) { if (map_grouping_col[mutable_one.name]) mutable_one.type = makeNullable(mutable_one.type); @@ -843,10 +848,11 @@ std::shared_ptr DAGExpressionAnalyzer::buildExpandGroupingColumns( } ExpressionActionsPtr DAGExpressionAnalyzer::appendExpand( - const tipb::Expand & expand, ExpressionActionsChain & chain) + const tipb::Expand & expand, + ExpressionActionsChain & chain) { auto & last_step = initAndGetLastStep(chain); - for (const auto &origin_col : last_step.actions->getSampleBlock().getNamesAndTypesList()) + for (const auto & origin_col : last_step.actions->getSampleBlock().getNamesAndTypesList()) { last_step.required_output.push_back(origin_col.name); } diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 9634dd38681..887ca17c38c 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -140,7 +140,8 @@ AnalysisResult analyzeExpressions( chain.addStep(); } - if (query_block.expand) { + if (query_block.expand) + { res.before_expand = analyzer.appendExpand(query_block.expand->expand(), chain); } @@ -273,10 +274,10 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline & = tiflash_join.genJoinOtherConditionAction(context, left_input_header, right_input_header, probe_side_prepare_actions); const Settings & settings = context.getSettingsRef(); - size_t max_block_size_for_cross_join = settings.max_block_size; + size_t max_block_size_for_cross_join = settings.max_block_size; fiu_do_on(FailPoints::minimum_block_size_for_cross_join, { max_block_size_for_cross_join = 1; }); - JoinPtr join_ptr = std::make_shared( // make join + JoinPtr join_ptr = std::make_shared( // make join probe_key_names, build_key_names, tiflash_join.kind, @@ -748,7 +749,7 @@ void DAGQueryBlockInterpreter::executeLimit(DAGPipeline & pipeline) void DAGQueryBlockInterpreter::executeExpand(DAGPipeline & pipeline, const ExpressionActionsPtr & expr) { - pipeline.transform([&](auto &stream) { + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expr, log->identifier()); }); } diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h index 48edf039ff5..c3cd27beacf 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h @@ -92,7 +92,10 @@ class DAGQueryBlockInterpreter void restorePipelineConcurrency(DAGPipeline & pipeline); - DAGContext & dagContext() const { return *context.getDAGContext(); } + DAGContext & dagContext() const + { + return *context.getDAGContext(); + } Context & context; std::vector input_streams_vec; diff --git a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp index 3b5c94a81d8..94c1b6c573a 100644 --- a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp +++ b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp @@ -100,18 +100,21 @@ bool collectForTableScan(std::vector & output_field_types, cons return false; } -bool collectForExpand(std::vector &out_field_types, const tipb::Executor & executor) +bool collectForExpand(std::vector & out_field_types, const tipb::Executor & executor) { - auto &out_child_fields = out_field_types; + auto & out_child_fields = out_field_types; // collect output_field_types of children getChildren(executor).forEach([&out_child_fields](const tipb::Executor & child) { traverseExecutorTree(child, [&out_child_fields](const tipb::Executor & e) { return collectForExecutor(out_child_fields, e); }); }); // make the columns from grouping sets nullable. - for (const auto & grouping_set : executor.expand().grouping_sets()){ - for (const auto & grouping_exprs : grouping_set.grouping_exprs()){ - for (const auto & grouping_col : grouping_exprs.grouping_expr()){ + for (const auto & grouping_set : executor.expand().grouping_sets()) + { + for (const auto & grouping_exprs : grouping_set.grouping_exprs()) + { + for (const auto & grouping_col : grouping_exprs.grouping_expr()) + { // assert that: grouping_col must be the column ref guaranteed by tidb. auto column_index = decodeDAGInt64(grouping_col.val()); if (column_index < 0 || column_index >= static_cast(out_child_fields.size())) diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp index 4d459ba08c4..b43d676b23f 100644 --- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp +++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp @@ -55,7 +55,7 @@ PhysicalPlanNodePtr PhysicalExpand::build( auto child_header = child->getSchema(); for (const auto & one : child_header) { - expand_output_columns.emplace_back(one.name, shared_expand->isInGroupSetColumn(one.name)? makeNullable(one.type): one.type); + expand_output_columns.emplace_back(one.name, shared_expand->isInGroupSetColumn(one.name) ? makeNullable(one.type) : one.type); } expand_output_columns.emplace_back(shared_expand->grouping_identifier_column_name, shared_expand->grouping_identifier_column_type); @@ -76,7 +76,7 @@ void PhysicalExpand::expandTransform(DAGPipeline & child_pipeline, Context & con auto expand_actions = PhysicalPlanHelper::newActions(child_pipeline.firstStream()->getHeader(), context); expand_actions->add(ExpressionAction::expandSource(shared_expand)); String expand_extra_info = fmt::format("expand, expand_executor_id = {}", execId()); - child_pipeline.transform([&](auto &stream) { + child_pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expand_actions, log->identifier()); stream->setExtraInfo(expand_extra_info); }); @@ -92,7 +92,7 @@ void PhysicalExpand::finalize(const Names & parent_require) { FinalizeHelper::checkSchemaContainsParentRequire(schema, parent_require); Names required_output; - required_output.reserve( shared_expand->getGroupSetNum()); // grouping set column should be existed in the child output schema. + required_output.reserve(shared_expand->getGroupSetNum()); // grouping set column should be existed in the child output schema. auto name_set = std::set(); shared_expand->getAllGroupSetColumnNames(name_set); // append parent_require column it may expect self-filled groupingID. @@ -103,7 +103,8 @@ void PhysicalExpand::finalize(const Names & parent_require) name_set.insert(one); } } - for (const auto & grouping_name: name_set) { + for (const auto & grouping_name : name_set) + { required_output.emplace_back(grouping_name); } child->finalize(required_output); diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h index 6c798ad35c3..14b910d8a75 100644 --- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h +++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h @@ -39,7 +39,9 @@ class PhysicalExpand : public PhysicalUnary const std::shared_ptr & shared_expand, const Block & sample_block_) : PhysicalUnary(executor_id_, PlanType::Expand, schema_, req_id, child_) - , shared_expand(shared_expand), sample_block(sample_block_){} + , shared_expand(shared_expand) + , sample_block(sample_block_) + {} void finalize(const Names & parent_require) override; @@ -52,6 +54,4 @@ class PhysicalExpand : public PhysicalUnary std::shared_ptr shared_expand; Block sample_block; }; -} // namespace DB - - +} // namespace DB diff --git a/dbms/src/Flash/Statistics/CommonExecutorImpl.h b/dbms/src/Flash/Statistics/CommonExecutorImpl.h index 56d55ea415f..42afeab9971 100644 --- a/dbms/src/Flash/Statistics/CommonExecutorImpl.h +++ b/dbms/src/Flash/Statistics/CommonExecutorImpl.h @@ -62,9 +62,9 @@ struct ExpandImpl { static constexpr bool has_extra_info = false; - static constexpr auto type = "Expand"; + static constexpr auto type = "Expand"; - static bool isMatch(const tipb::Executor *executor) + static bool isMatch(const tipb::Executor * executor) { return executor->has_expand(); } diff --git a/dbms/src/Flash/tests/gtest_expand_executor.cpp b/dbms/src/Flash/tests/gtest_expand_executor.cpp index 004c568560b..e27a6fe0b9f 100644 --- a/dbms/src/Flash/tests/gtest_expand_executor.cpp +++ b/dbms/src/Flash/tests/gtest_expand_executor.cpp @@ -42,7 +42,14 @@ try /// case 1 auto request = context .scan("test_db", "test_table") - .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .expand(MockVVecColumnNameVec{ + MockVecColumnNameVec{ + MockColumnNameVec{"s1"}, + }, + MockVecColumnNameVec{ + MockColumnNameVec{"s2"}, + }, + }) .build(context); /// data flow: /// @@ -64,13 +71,20 @@ try request, {toNullableVec({"banana", {}, {}, {}, "banana", {}}), toNullableVec({{}, "apple", {}, {}, {}, "banana"}), - toVec({1,2,1,2,1,2})}); + toVec({1, 2, 1, 2, 1, 2})}); /// case 2 request = context .scan("test_db", "test_table") .filter(eq(col("s1"), col("s2"))) - .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .expand(MockVVecColumnNameVec{ + MockVecColumnNameVec{ + MockColumnNameVec{"s1"}, + }, + MockVecColumnNameVec{ + MockColumnNameVec{"s2"}, + }, + }) .build(context); /// data flow: /// @@ -92,7 +106,7 @@ try request, {toNullableVec({"banana", {}}), toNullableVec({{}, "banana"}), - toVec({1,2})}); + toVec({1, 2})}); /// case 3: this case is only for non-planner mode. /// request = context @@ -113,8 +127,15 @@ try auto const_false = lit(Field(static_cast(0))); request = context .scan("test_db", "test_table") - .filter(const_false) // refuse all rows - .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .filter(const_false) // refuse all rows + .expand(MockVVecColumnNameVec{ + MockVecColumnNameVec{ + MockColumnNameVec{"s1"}, + }, + MockVecColumnNameVec{ + MockColumnNameVec{"s2"}, + }, + }) .build(context); executeAndAssertColumnsEqual( request, @@ -127,13 +148,22 @@ try .build(context); executeAndAssertColumnsEqual( request, - {toVec({1, 0, 1}), - toNullableVec({"apple", {}, "banana"}),}); + { + toVec({1, 0, 1}), + toNullableVec({"apple", {}, "banana"}), + }); request = context .scan("test_db", "test_table") .aggregation({Count(col("s1"))}, {col("s2")}) - .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .expand(MockVVecColumnNameVec{ + MockVecColumnNameVec{ + MockColumnNameVec{"count(s1)"}, + }, + MockVecColumnNameVec{ + MockColumnNameVec{"s2"}, + }, + }) .build(context); /// data flow: /// @@ -159,20 +189,27 @@ try /// executeAndAssertColumnsEqual( request, - {toNullableVec({1, {}, 0, {}, 1,{}}), - toNullableVec({{}, "apple", {},{},{}, "banana"}), - toVec({1,2,1,2,1,2})}); + {toNullableVec({1, {}, 0, {}, 1, {}}), + toNullableVec({{}, "apple", {}, {}, {}, "banana"}), + toVec({1, 2, 1, 2, 1, 2})}); /// case 5 (test integrated with aggregation and projection) request = context .scan("test_db", "test_table") .aggregation({Count(col("s1"))}, {col("s2")}) - .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .expand(MockVVecColumnNameVec{ + MockVecColumnNameVec{ + MockColumnNameVec{"count(s1)"}, + }, + MockVecColumnNameVec{ + MockColumnNameVec{"s2"}, + }, + }) .project({"count(s1)"}) .build(context); executeAndAssertColumnsEqual( request, - {toNullableVec({1, {}, 0, {}, 1,{}})}); + {toNullableVec({1, {}, 0, {}, 1, {}})}); /// case 6 (test integrated with aggregation and projection and limit) 1 /// note: by now, limit is executed before expand does to reduce unnecessary row expand work. @@ -227,7 +264,14 @@ try request = context .scan("test_db", "test_table") .aggregation({Count(col("s1"))}, {col("s2")}) - .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .expand(MockVVecColumnNameVec{ + MockVecColumnNameVec{ + MockColumnNameVec{"count(s1)"}, + }, + MockVecColumnNameVec{ + MockColumnNameVec{"s2"}, + }, + }) .project({"count(s1)"}) .topN({{"count(s1)", true}}, 2) .build(context); @@ -300,7 +344,14 @@ try request = context .receive("exchange1") .aggregation({Count(col("s1"))}, {col("s2")}) - .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},}) + .expand(MockVVecColumnNameVec{ + MockVecColumnNameVec{ + MockColumnNameVec{"count(s1)"}, + }, + MockVecColumnNameVec{ + MockColumnNameVec{"s2"}, + }, + }) .join(context.scan("test_db", "test_table").project({"s2"}), tipb::JoinType::TypeInnerJoin, {col("s2")}) .project({"count(s1)", "groupingID"}) .topN({{"groupingID", true}}, 2) @@ -349,8 +400,10 @@ try /// executeAndAssertColumnsEqual( request, - {toNullableVec({{}, {}}), - toVec({2,2}),}); + { + toNullableVec({{}, {}}), + toVec({2, 2}), + }); /// assert the input stream plan format. (under planner-enabled mode) diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp index ee1753bf247..6280a05763d 100644 --- a/dbms/src/Interpreters/Expand.cpp +++ b/dbms/src/Interpreters/Expand.cpp @@ -23,7 +23,7 @@ namespace DB { -namespace /// anonymous namespace for storing private function utils. +namespace /// anonymous namespace for storing private function utils. { void convertColumnToNullable(ColumnWithTypeAndName & column) { @@ -31,23 +31,25 @@ void convertColumnToNullable(ColumnWithTypeAndName & column) if (column.column) column.column = makeNullable(column.column); } -} +} // namespace Expand::Expand(const DB::GroupingSets & gss) - : group_sets_names(gss){} + : group_sets_names(gss) +{} void Expand::getGroupingSetsDes(FmtBuffer & buffer) const { buffer.fmtAppend("["); - for (const auto & grouping_set: group_sets_names) + for (const auto & grouping_set : group_sets_names) { buffer.fmtAppend("{{"); - for (const auto & grouping_exprs: grouping_set) + for (const auto & grouping_exprs : grouping_set) { buffer.fmtAppend("<"); - for ( size_t i = 0; i < grouping_exprs.size(); i++) + for (size_t i = 0; i < grouping_exprs.size(); i++) { - if (i != 0) { + if (i != 0) + { buffer.fmtAppend(","); } buffer.fmtAppend(grouping_exprs.at(i)); @@ -111,7 +113,7 @@ void Expand::replicateAndFillNull(Block & block) const { // start from 1. Field grouping_id = j + 1; - added_grouping_id_column[0]->insert(grouping_id); + added_grouping_id_column[0]->insert(grouping_id); } } // todo: for some column overlapping in different grouping set, we should copy the overlapped column as a new column @@ -185,16 +187,18 @@ void Expand::replicateAndFillNull(Block & block) const // eg: for case above, for grouping_offset of = 0, we only set the every offset = 0 in each // small replicate_group_x to null. // - for (UInt64 j = 0; j < replicate_times_for_one_row; j++){ - if (j == grouping_offset) { - // only keep this column value for targeted replica. - continue; - } - // set this column as null for all the other targeted replica. - // todo: since nullable column always be prior to computation of null value first, should we clean the old data at the same pos in nested column - auto computed_offset = i * replicate_times_for_one_row + j; - cloned_one->getNullMapData().data()[computed_offset] = 1; - } + for (UInt64 j = 0; j < replicate_times_for_one_row; j++) + { + if (j == grouping_offset) + { + // only keep this column value for targeted replica. + continue; + } + // set this column as null for all the other targeted replica. + // todo: since nullable column always be prior to computation of null value first, should we clean the old data at the same pos in nested column + auto computed_offset = i * replicate_times_for_one_row + j; + cloned_one->getNullMapData().data()[computed_offset] = 1; + } } block.getByName(grouping_col).column = std::move(cloned_one); } @@ -204,16 +208,18 @@ void Expand::replicateAndFillNull(Block & block) const // return input from block. } -bool Expand::isInGroupSetColumn(String name) const{ - for(const auto& it1 : group_sets_names) +bool Expand::isInGroupSetColumn(String name) const +{ + for (const auto & it1 : group_sets_names) { // for every grouping set. - for(const auto& it2 : it1) + for (const auto & it2 : it1) { // for every grouping exprs - for(const auto& it3 : it2) + for (const auto & it3 : it2) { - if (it3 == name){ + if (it3 == name) + { return true; } } @@ -222,21 +228,21 @@ bool Expand::isInGroupSetColumn(String name) const{ return false; } -const GroupingColumnNames& Expand::getGroupSetColumnNamesByOffset(size_t offset) const +const GroupingColumnNames & Expand::getGroupSetColumnNamesByOffset(size_t offset) const { /// currently, there only can be one groupingExprs in one groupingSet before the planner supporting the grouping set merge. return group_sets_names[offset][0]; } -void Expand::getAllGroupSetColumnNames(std::set& name_set) const +void Expand::getAllGroupSetColumnNames(std::set & name_set) const { - for(const auto& it1 : group_sets_names) + for (const auto & it1 : group_sets_names) { // for every grouping set. - for(const auto& it2 : it1) + for (const auto & it2 : it1) { // for every grouping exprs - for(const auto& it3 : it2) + for (const auto & it3 : it2) { name_set.insert(it3); } @@ -246,9 +252,9 @@ void Expand::getAllGroupSetColumnNames(std::set& name_set) const std::shared_ptr Expand::sharedExpand(const GroupingSets & groupingSets) { - return std::make_shared(groupingSets); + return std::make_shared(groupingSets); } const std::string Expand::grouping_identifier_column_name = "groupingID"; const DataTypePtr Expand::grouping_identifier_column_type = std::make_shared(); -} +} // namespace DB diff --git a/dbms/src/Interpreters/Expand.h b/dbms/src/Interpreters/Expand.h index 33f9f94f024..e2128f476fd 100644 --- a/dbms/src/Interpreters/Expand.h +++ b/dbms/src/Interpreters/Expand.h @@ -51,7 +51,6 @@ using GroupingSet = std::vector; using GroupingSets = std::vector; - /** Data structure for implementation of Expand. * * Expand is a kind of operator used for replicate low-layer datasource rows to feed different aggregate @@ -119,13 +118,13 @@ class Expand // to illustrate what group this row is targeted for. void replicateAndFillNull(Block & block) const; - size_t getGroupSetNum() const {return group_sets_names.size();} + size_t getGroupSetNum() const { return group_sets_names.size(); } bool isInGroupSetColumn(String name) const; - const GroupingColumnNames& getGroupSetColumnNamesByOffset(size_t offset) const; + const GroupingColumnNames & getGroupSetColumnNamesByOffset(size_t offset) const; - void getAllGroupSetColumnNames(std::set& name_set) const; + void getAllGroupSetColumnNames(std::set & name_set) const; static std::shared_ptr sharedExpand(const GroupingSets & groupingSets); diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index ff7cec2b382..f5bc85d4903 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -144,7 +144,7 @@ ExpressionAction ExpressionAction::expandSource(std::shared_ptr ex } -void ExpressionAction::prepare(Block & sample_block) +void ExpressionAction::prepare(Block & sample_block) { /** Constant expressions should be evaluated, and put the result in sample_block. */ @@ -242,7 +242,8 @@ void ExpressionAction::prepare(Block & sample_block) auto name_set = std::set(); expand->getAllGroupSetColumnNames(name_set); // make grouping set column to be nullable. - for (const auto & col_name: name_set) { + for (const auto & col_name : name_set) + { auto & column_with_name = sample_block.getByName(col_name); column_with_name.type = makeNullable(column_with_name.type); if (column_with_name.column != nullptr) diff --git a/dbms/src/Interpreters/tests/gtest_block_expand.cpp b/dbms/src/Interpreters/tests/gtest_block_expand.cpp index 1a34e0fde30..97ef466c245 100644 --- a/dbms/src/Interpreters/tests/gtest_block_expand.cpp +++ b/dbms/src/Interpreters/tests/gtest_block_expand.cpp @@ -98,9 +98,12 @@ try const auto * col_1_nest = &static_cast(col_1->getNestedColumn()); for (int i = 0; i < int(expand_rows); ++i) { - if (res1[i] == "null") { + if (res1[i] == "null") + { ASSERT_EQ(col_1->isNullAt(i), true); - } else { + } + else + { ASSERT_EQ(col_1_nest->getDataAt(i), res1[i]); } } @@ -110,14 +113,17 @@ try const auto * col_2_nest = &static_cast(col_2->getNestedColumn()); for (int i = 0; i < int(expand_rows); ++i) { - if (res2[i] == "null") { + if (res2[i] == "null") + { ASSERT_EQ(col_2->isNullAt(i), true); - } else { + } + else + { ASSERT_EQ(col_2_nest->getDataAt(i), res2[i]); } } - const auto res3 = ColumnWithUInt64{1, 1,1,1, 0,0}; + const auto res3 = ColumnWithUInt64{1, 1, 1, 1, 0, 0}; const auto * col_3 = typeid_cast(block.getColumns()[3].get()); for (int i = 0; i < int(expand_rows); ++i) { @@ -192,9 +198,12 @@ try const auto * col_1_nest = &static_cast(col_1->getNestedColumn()); for (int i = 0; i < int(expand_rows); ++i) { - if (res1[i] == "null") { + if (res1[i] == "null") + { ASSERT_EQ(col_1->isNullAt(i), true); - } else { + } + else + { ASSERT_EQ(col_1_nest->getDataAt(i), res1[i]); } } @@ -204,9 +213,12 @@ try const auto * col_2_nest = &static_cast(col_2->getNestedColumn()); for (int i = 0; i < int(expand_rows); ++i) { - if (res2[i] == "null") { + if (res2[i] == "null") + { ASSERT_EQ(col_2->isNullAt(i), true); - } else { + } + else + { ASSERT_EQ(col_2_nest->getDataAt(i), res2[i]); } } @@ -217,9 +229,12 @@ try const auto * col_3_nest = &typeid_cast(col_3->getNestedColumn()); for (int i = 0; i < int(expand_rows); ++i) { - if (res3[i] == UInt64(-1)) { + if (res3[i] == UInt64(-1)) + { ASSERT_EQ(col_3->isNullAt(i), true); - } else { + } + else + { ASSERT_EQ(col_3_nest->getElement(i), res3[i]); } } @@ -236,7 +251,7 @@ try const ColumnsWithTypeAndName ori_col = { - toVec(col_name[0], ColumnWithInt64{}), // without data. + toVec(col_name[0], ColumnWithInt64{}), // without data. toVec(col_name[1], ColumnWithString{}), toVec(col_name[2], ColumnWithString{}), toVec(col_name[3], ColumnWithUInt64{}), diff --git a/dbms/src/TestUtils/ExecutorTestUtils.cpp b/dbms/src/TestUtils/ExecutorTestUtils.cpp index 403651569c2..505f5b2a284 100644 --- a/dbms/src/TestUtils/ExecutorTestUtils.cpp +++ b/dbms/src/TestUtils/ExecutorTestUtils.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -21,7 +22,6 @@ #include #include #include -#include #include diff --git a/dbms/src/TestUtils/executorSerializer.cpp b/dbms/src/TestUtils/executorSerializer.cpp index bffa9baf3cf..04b1df80abb 100644 --- a/dbms/src/TestUtils/executorSerializer.cpp +++ b/dbms/src/TestUtils/executorSerializer.cpp @@ -171,10 +171,11 @@ void serializeExpandSource(const String & executor_id, const tipb::Expand & expa buf.fmtAppend("{{"); for (auto i = 0; i < grouping_exprs.grouping_expr().size(); i++) { - if (i != 0) { + if (i != 0) + { buf.fmtAppend(","); } - auto expr = grouping_exprs.grouping_expr().Get(i); + auto expr = grouping_exprs.grouping_expr().Get(i); serializeExpression(expr, buf); } buf.fmtAppend("}}"); diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp index bcb4ec63a19..d1be7e1c17f 100644 --- a/dbms/src/TestUtils/mockExecutor.cpp +++ b/dbms/src/TestUtils/mockExecutor.cpp @@ -365,15 +365,17 @@ DAGRequestBuilder & DAGRequestBuilder::sort(MockOrderByItemVec order_by_vec, boo DAGRequestBuilder & DAGRequestBuilder::expand(MockVVecColumnNameVec grouping_set_columns) { assert(root); - auto grouping_sets_ast = mock::MockVVecGroupingNameVec(); + auto grouping_sets_ast = mock::MockVVecGroupingNameVec(); auto grouping_col_collection = std::set(); - for (const auto & grouping_set : grouping_set_columns) { + for (const auto & grouping_set : grouping_set_columns) + { auto grouping_set_ast = mock::MockVecGroupingNameVec(); - for (const auto &grouping_exprs : grouping_set) { + for (const auto & grouping_exprs : grouping_set) + { auto grouping_exprs_ast = mock::MockGroupingNameVec(); - for (const auto &grouping_col : grouping_exprs) + for (const auto & grouping_col : grouping_exprs) { - auto ast_col_ptr = buildColumn(grouping_col); // string identifier change to ast column ref + auto ast_col_ptr = buildColumn(grouping_col); // string identifier change to ast column ref grouping_exprs_ast.emplace_back(std::move(ast_col_ptr)); grouping_col_collection.insert(grouping_col); } diff --git a/dbms/src/TestUtils/mockExecutor.h b/dbms/src/TestUtils/mockExecutor.h index da369c69ff7..248d303abb2 100644 --- a/dbms/src/TestUtils/mockExecutor.h +++ b/dbms/src/TestUtils/mockExecutor.h @@ -38,7 +38,7 @@ using MockOrderByItemVec = std::vector; using MockPartitionByItem = std::pair; using MockPartitionByItemVec = std::vector; using MockColumnNameVec = std::vector; -using MockVecColumnNameVec = std::vector; // for grouping set (every groupingExpr element inside is slice of column) +using MockVecColumnNameVec = std::vector; // for grouping set (every groupingExpr element inside is slice of column) using MockVVecColumnNameVec = std::vector; // for grouping sets using MockAstVec = std::vector; using MockWindowFrame = mock::MockWindowFrame; From 445b3c4590423257872aa9959c704ba9474b6483 Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Sat, 11 Feb 2023 19:08:25 +0800 Subject: [PATCH 16/31] fix rebase error Signed-off-by: AilinKid <3148019@qq.com> --- dbms/src/Common/TiFlashMetrics.h | 2 +- dbms/src/Flash/Planner/PhysicalPlan.cpp | 3 +- .../Flash/Planner/Plans/PhysicalExpand.cpp | 12 +- dbms/src/Flash/Planner/Plans/PhysicalExpand.h | 6 +- .../Flash/Planner/Plans/PhysicalRepeat.cpp | 116 ------------------ dbms/src/Flash/Planner/Plans/PhysicalRepeat.h | 57 --------- .../src/Flash/tests/gtest_expand_executor.cpp | 1 + 7 files changed, 13 insertions(+), 184 deletions(-) delete mode 100644 dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp delete mode 100644 dbms/src/Flash/Planner/Plans/PhysicalRepeat.h diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h index 9f3d5f064c8..4b8778213fd 100644 --- a/dbms/src/Common/TiFlashMetrics.h +++ b/dbms/src/Common/TiFlashMetrics.h @@ -62,7 +62,7 @@ namespace DB F(type_exchange_receiver, {"type", "exchange_receiver"}), F(type_projection, {"type", "projection"}), \ F(type_partition_ts, {"type", "partition_table_scan"}), \ F(type_window, {"type", "window"}), F(type_window_sort, {"type", "window_sort"}), \ - F(type_repeat_source, {"type", "repeat_source"})) \ + F(type_expand, {"type", "expand"})) \ M(tiflash_coprocessor_request_duration_seconds, "Bucketed histogram of request duration", Histogram, \ F(type_cop, {{"type", "cop"}}, ExpBuckets{0.001, 2, 20}), \ F(type_batch, {{"type", "batch"}}, ExpBuckets{0.001, 2, 20}), \ diff --git a/dbms/src/Flash/Planner/PhysicalPlan.cpp b/dbms/src/Flash/Planner/PhysicalPlan.cpp index 12badaae469..059a681b867 100644 --- a/dbms/src/Flash/Planner/PhysicalPlan.cpp +++ b/dbms/src/Flash/Planner/PhysicalPlan.cpp @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -37,6 +37,7 @@ #include #include #include +#include #include namespace DB diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp index b43d676b23f..00c9ada765a 100644 --- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp +++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include @@ -45,7 +45,7 @@ PhysicalPlanNodePtr PhysicalExpand::build( } DAGExpressionAnalyzer analyzer{child->getSchema(), context}; - ExpressionActionsPtr before_expand_actions = PhysicalPlanHelper::newActions(child->getSampleBlock(), context); + ExpressionActionsPtr before_expand_actions = PhysicalPlanHelper::newActions(child->getSampleBlock()); auto shared_expand = analyzer.buildExpandGroupingColumns(expand, before_expand_actions); @@ -71,9 +71,9 @@ PhysicalPlanNodePtr PhysicalExpand::build( } -void PhysicalExpand::expandTransform(DAGPipeline & child_pipeline, Context & context) +void PhysicalExpand::expandTransform(DAGPipeline & child_pipeline) { - auto expand_actions = PhysicalPlanHelper::newActions(child_pipeline.firstStream()->getHeader(), context); + auto expand_actions = PhysicalPlanHelper::newActions(child_pipeline.firstStream()->getHeader()); expand_actions->add(ExpressionAction::expandSource(shared_expand)); String expand_extra_info = fmt::format("expand, expand_executor_id = {}", execId()); child_pipeline.transform([&](auto & stream) { @@ -84,8 +84,8 @@ void PhysicalExpand::expandTransform(DAGPipeline & child_pipeline, Context & con void PhysicalExpand::transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) { - child->transform(pipeline, context, max_streams); - expandTransform(pipeline, context); + child->buildBlockInputStream(pipeline, context, max_streams); + expandTransform(pipeline); } void PhysicalExpand::finalize(const Names & parent_require) diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h index 14b910d8a75..7ff84324319 100644 --- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h +++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h @@ -14,7 +14,7 @@ #pragma once -#include +#include #include #include #include @@ -45,12 +45,12 @@ class PhysicalExpand : public PhysicalUnary void finalize(const Names & parent_require) override; - void expandTransform(DAGPipeline & child_pipeline, Context & context); + void expandTransform(DAGPipeline & child_pipeline); const Block & getSampleBlock() const override; private: - void transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) override; + void transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams); std::shared_ptr shared_expand; Block sample_block; }; diff --git a/dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp b/dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp deleted file mode 100644 index 51eaaeaa4c3..00000000000 --- a/dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ -PhysicalPlanNodePtr PhysicalExpand::build( - const Context & context, - const String & executor_id, - const LoggerPtr & log, - const tipb::Expand & expand, - const PhysicalPlanNodePtr & child) -{ - assert(child); - - child->finalize(); - - if (unlikely(expand.grouping_sets().empty())) - { - //should not reach here - throw TiFlashException("Repeat executor without grouping sets", Errors::Planner::BadRequest); - } - - DAGExpressionAnalyzer analyzer{child->getSchema(), context}; - ExpressionActionsPtr before_repeat_actions = PhysicalPlanHelper::newActions(child->getSampleBlock(), context); - - - auto shared_repeat = analyzer.buildExpandGroupingColumns(expand, before_repeat_actions); - - // construct sample block. - NamesAndTypes repeat_output_columns; - auto child_header = child->getSchema(); - for (const auto & one : child_header) - { - repeat_output_columns.emplace_back(one.name, shared_repeat->isInGroupSetColumn(one.name)? makeNullable(one.type): one.type); - } - repeat_output_columns.emplace_back(shared_repeat->grouping_identifier_column_name, shared_repeat->grouping_identifier_column_type); - - auto physical_repeat = std::make_shared( - executor_id, - repeat_output_columns, - log->identifier(), - child, - shared_repeat, - Block(repeat_output_columns)); - - return physical_repeat; -} - - -void PhysicalExpand::repeatTransform(DAGPipeline & child_pipeline, Context & context) -{ - auto repeat_actions = PhysicalPlanHelper::newActions(child_pipeline.firstStream()->getHeader(), context); - repeat_actions->add(ExpressionAction::expandSource(shared_expand)); - String repeat_extra_info = fmt::format("repeat source, repeat_executor_id = {}", execId()); - child_pipeline.transform([&](auto &stream) { - stream = std::make_shared(stream, repeat_actions); - stream->setExtraInfo(repeat_extra_info); - }); -} - -void PhysicalExpand::transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) -{ - child->transform(pipeline, context, max_streams); - repeatTransform(pipeline, context); -} - -void PhysicalExpand::finalize(const Names & parent_require) -{ - FinalizeHelper::checkSchemaContainsParentRequire(schema, parent_require); - Names required_output; - required_output.reserve( shared_expand->getGroupSetNum()); // grouping set column should be existed in the child output schema. - auto name_set = std::set(); - shared_expand->getAllGroupSetColumnNames(name_set); - // append parent_require column it may expect self-filled groupingID. - for (const auto & one : parent_require) - { - if (one != Expand::grouping_identifier_column_name) - { - name_set.insert(one); - } - } - for (const auto & grouping_name: name_set) { - required_output.emplace_back(grouping_name); - } - child->finalize(required_output); -} - -const Block & PhysicalExpand::getSampleBlock() const -{ - return sample_block; -} -} // namespace DB diff --git a/dbms/src/Flash/Planner/Plans/PhysicalRepeat.h b/dbms/src/Flash/Planner/Plans/PhysicalRepeat.h deleted file mode 100644 index a2696affb5b..00000000000 --- a/dbms/src/Flash/Planner/Plans/PhysicalRepeat.h +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include - -namespace DB -{ -class PhysicalExpand : public PhysicalUnary -{ -public: - static PhysicalPlanNodePtr build( - const Context & context, - const String & executor_id, - const LoggerPtr & log, - const tipb::Expand & expand, - const PhysicalPlanNodePtr & child); - - PhysicalExpand( - const String & executor_id_, - const NamesAndTypes & schema_, - const String & req_id, - const PhysicalPlanNodePtr & child_, - const std::shared_ptr & shared_expand, - const Block & sample_block_) - : PhysicalUnary(executor_id_, PlanType::Repeat, schema_, req_id, child_) - , shared_expand(shared_expand), sample_block(sample_block_){} - - void finalize(const Names & parent_require) override; - - void repeatTransform(DAGPipeline & child_pipeline, Context & context); - - const Block & getSampleBlock() const override; - -private: - void transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) override; - std::shared_ptr shared_expand; - Block sample_block; -}; -} // namespace DB - - diff --git a/dbms/src/Flash/tests/gtest_expand_executor.cpp b/dbms/src/Flash/tests/gtest_expand_executor.cpp index e27a6fe0b9f..3766c6066cc 100644 --- a/dbms/src/Flash/tests/gtest_expand_executor.cpp +++ b/dbms/src/Flash/tests/gtest_expand_executor.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include From 5bba07f44d6777ad7be92c24fb38ce84197e2ef2 Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Mon, 13 Feb 2023 18:46:16 +0800 Subject: [PATCH 17/31] address partial of haisheng's comment Signed-off-by: AilinKid <3148019@qq.com> --- .../ExpressionBlockInputStream.cpp | 10 -------- .../DataStreams/ExpressionBlockInputStream.h | 1 - .../Flash/Planner/Plans/PhysicalExpand.cpp | 23 +++++++++++++++---- dbms/src/Flash/Planner/Plans/PhysicalExpand.h | 7 +++++- .../src/Flash/tests/gtest_expand_executor.cpp | 2 +- dbms/src/Interpreters/Expand.cpp | 16 ++++++------- dbms/src/Interpreters/ExpressionActions.cpp | 2 +- dbms/src/TestUtils/ExecutorTestUtils.h | 4 ++-- dbms/src/TestUtils/executorSerializer.cpp | 12 +++++----- .../TestUtils/tests/gtest_mock_executors.cpp | 1 + 10 files changed, 44 insertions(+), 34 deletions(-) diff --git a/dbms/src/DataStreams/ExpressionBlockInputStream.cpp b/dbms/src/DataStreams/ExpressionBlockInputStream.cpp index 27daa61152a..5bc7a4685e4 100644 --- a/dbms/src/DataStreams/ExpressionBlockInputStream.cpp +++ b/dbms/src/DataStreams/ExpressionBlockInputStream.cpp @@ -53,14 +53,4 @@ Block ExpressionBlockInputStream::readImpl() expression->execute(res); return res; } - -void ExpressionBlockInputStream::appendInfo(FmtBuffer & buffer) const -{ - if (!expression.get()->getActions().empty() && expression.get()->getActions()[0].expand != nullptr) - { - buffer.fmtAppend(": grouping set "); - expression.get()->getActions()[0].expand->getGroupingSetsDes(buffer); - } -} - } // namespace DB diff --git a/dbms/src/DataStreams/ExpressionBlockInputStream.h b/dbms/src/DataStreams/ExpressionBlockInputStream.h index 73141446414..1d1e059d51d 100644 --- a/dbms/src/DataStreams/ExpressionBlockInputStream.h +++ b/dbms/src/DataStreams/ExpressionBlockInputStream.h @@ -41,7 +41,6 @@ class ExpressionBlockInputStream : public IProfilingBlockInputStream String getName() const override { return NAME; } Block getTotals() override; Block getHeader() const override; - void appendInfo(FmtBuffer & buffer) const override; protected: Block readImpl() override; diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp index 00c9ada765a..8ec2083179e 100644 --- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp +++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp @@ -19,10 +19,13 @@ #include #include #include +#include #include #include #include #include +#include +#include #include namespace DB @@ -46,9 +49,10 @@ PhysicalPlanNodePtr PhysicalExpand::build( DAGExpressionAnalyzer analyzer{child->getSchema(), context}; ExpressionActionsPtr before_expand_actions = PhysicalPlanHelper::newActions(child->getSampleBlock()); - + ExpressionActionsPtr expand_actions_itself = PhysicalPlanHelper::newActions(child->getSampleBlock()); auto shared_expand = analyzer.buildExpandGroupingColumns(expand, before_expand_actions); + expand_actions_itself->add(ExpressionAction::expandSource(shared_expand)); // construct sample block. NamesAndTypes expand_output_columns; @@ -65,6 +69,7 @@ PhysicalPlanNodePtr PhysicalExpand::build( log->identifier(), child, shared_expand, + expand_actions_itself, Block(expand_output_columns)); return physical_expand; @@ -73,16 +78,26 @@ PhysicalPlanNodePtr PhysicalExpand::build( void PhysicalExpand::expandTransform(DAGPipeline & child_pipeline) { - auto expand_actions = PhysicalPlanHelper::newActions(child_pipeline.firstStream()->getHeader()); - expand_actions->add(ExpressionAction::expandSource(shared_expand)); String expand_extra_info = fmt::format("expand, expand_executor_id = {}", execId()); + FmtBuffer fb; + fb.append(": grouping set "); + shared_expand->getGroupingSetsDes(fb); + expand_extra_info.append(fb.toString()); child_pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expand_actions, log->identifier()); stream->setExtraInfo(expand_extra_info); }); } -void PhysicalExpand::transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) +void PhysicalExpand::buildPipelineExec(PipelineExecGroupBuilder & group_builder, Context &, size_t) +{ + auto input_header = group_builder.getCurrentHeader(); + group_builder.transform([&](auto &builder) { + builder.appendTransformOp(std::make_unique(group_builder.exec_status, expand_actions, log->identifier())); + }); +} + +void PhysicalExpand::buildBlockInputStreamImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) { child->buildBlockInputStream(pipeline, context, max_streams); expandTransform(pipeline); diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h index 7ff84324319..77d5abdcd87 100644 --- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h +++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h @@ -37,9 +37,11 @@ class PhysicalExpand : public PhysicalUnary const String & req_id, const PhysicalPlanNodePtr & child_, const std::shared_ptr & shared_expand, + const ExpressionActionsPtr & expand_actions, const Block & sample_block_) : PhysicalUnary(executor_id_, PlanType::Expand, schema_, req_id, child_) , shared_expand(shared_expand) + , expand_actions(expand_actions) , sample_block(sample_block_) {} @@ -49,9 +51,12 @@ class PhysicalExpand : public PhysicalUnary const Block & getSampleBlock() const override; + void buildPipelineExec(PipelineExecGroupBuilder & group_builder, Context & /*context*/, size_t /*concurrency*/) override; + private: - void transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams); + void buildBlockInputStreamImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) override; std::shared_ptr shared_expand; + ExpressionActionsPtr expand_actions; Block sample_block; }; } // namespace DB diff --git a/dbms/src/Flash/tests/gtest_expand_executor.cpp b/dbms/src/Flash/tests/gtest_expand_executor.cpp index 3766c6066cc..bf7a1c9a8b7 100644 --- a/dbms/src/Flash/tests/gtest_expand_executor.cpp +++ b/dbms/src/Flash/tests/gtest_expand_executor.cpp @@ -426,7 +426,7 @@ CreatingSets Expression: HashJoinProbe: Expression: - Expression: : grouping set [{}{}] + Expression: }{}]> Expression: SharedQuery: ParallelAggregating, max_threads: 10, final: true diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp index 6280a05763d..d7aa21e2457 100644 --- a/dbms/src/Interpreters/Expand.cpp +++ b/dbms/src/Interpreters/Expand.cpp @@ -39,26 +39,26 @@ Expand::Expand(const DB::GroupingSets & gss) void Expand::getGroupingSetsDes(FmtBuffer & buffer) const { - buffer.fmtAppend("["); + buffer.append("["); for (const auto & grouping_set : group_sets_names) { - buffer.fmtAppend("{{"); + buffer.append("{"); for (const auto & grouping_exprs : grouping_set) { - buffer.fmtAppend("<"); + buffer.append("<"); for (size_t i = 0; i < grouping_exprs.size(); i++) { if (i != 0) { - buffer.fmtAppend(","); + buffer.append(","); } - buffer.fmtAppend(grouping_exprs.at(i)); + buffer.append(grouping_exprs.at(i)); } - buffer.fmtAppend(">"); + buffer.append(">"); } - buffer.fmtAppend("}}"); + buffer.append("}"); } - buffer.fmtAppend("]"); + buffer.append("]"); } /// for cases like: select count(distinct a), count(distinct b) from t; diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index f5bc85d4903..aa5d856a966 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -258,7 +258,7 @@ void ExpressionAction::prepare(Block & sample_block) { Block new_block; - for (auto & projection : projections) // change alias + for (auto & projection : projections) { const std::string & name = projection.first; const std::string & alias = projection.second; diff --git a/dbms/src/TestUtils/ExecutorTestUtils.h b/dbms/src/TestUtils/ExecutorTestUtils.h index 13363768a3b..2bf97d81a17 100644 --- a/dbms/src/TestUtils/ExecutorTestUtils.h +++ b/dbms/src/TestUtils/ExecutorTestUtils.h @@ -32,11 +32,11 @@ ColumnsWithTypeAndName readBlock(BlockInputStreamPtr stream); ColumnsWithTypeAndName readBlocks(std::vector streams); #define WRAP_FOR_TEST_BEGIN \ - std::vector planner_bools{false, true}; \ + std::vector planner_bools{true}; \ for (auto enable_planner : planner_bools) \ { \ enablePlanner(enable_planner); \ - std::vector pipeline_bools{false}; \ + std::vector pipeline_bools{}; \ if (enable_planner) \ pipeline_bools.push_back(true); \ for (auto enable_pipeline : pipeline_bools) \ diff --git a/dbms/src/TestUtils/executorSerializer.cpp b/dbms/src/TestUtils/executorSerializer.cpp index 04b1df80abb..7c1cfa980b6 100644 --- a/dbms/src/TestUtils/executorSerializer.cpp +++ b/dbms/src/TestUtils/executorSerializer.cpp @@ -165,24 +165,24 @@ void serializeExpandSource(const String & executor_id, const tipb::Expand & expa buf.fmtAppend("{} | expanded_by: [", executor_id); for (const auto & grouping_set : expand.grouping_sets()) { - buf.fmtAppend("<"); + buf.append("<"); for (const auto & grouping_exprs : grouping_set.grouping_exprs()) { - buf.fmtAppend("{{"); + buf.append("{"); for (auto i = 0; i < grouping_exprs.grouping_expr().size(); i++) { if (i != 0) { - buf.fmtAppend(","); + buf.append(","); } auto expr = grouping_exprs.grouping_expr().Get(i); serializeExpression(expr, buf); } - buf.fmtAppend("}}"); + buf.append("}"); } - buf.fmtAppend(">"); + buf.append(">"); } - buf.fmtAppend("]\n"); + buf.append("]\n"); } void serializeJoin(const String & executor_id, const tipb::Join & join, FmtBuffer & buf) diff --git a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp index f1826226aeb..c94f2509c84 100644 --- a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp +++ b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp @@ -14,6 +14,7 @@ #include #include +#include namespace DB { From 74ea6521eea09eaf90289c13c93fdc1b90ca3953 Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Mon, 13 Feb 2023 19:25:39 +0800 Subject: [PATCH 18/31] make fmt Signed-off-by: AilinKid <3148019@qq.com> --- dbms/src/Common/TiFlashMetrics.h | 2 +- dbms/src/Flash/Planner/PlanType.h | 1 + .../Flash/Planner/Plans/PhysicalExpand.cpp | 2 +- .../src/Flash/tests/gtest_expand_executor.cpp | 1 - .../Interpreters/tests/gtest_block_expand.cpp | 24 +++++++++---------- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h index 4b8778213fd..3afacefabe4 100644 --- a/dbms/src/Common/TiFlashMetrics.h +++ b/dbms/src/Common/TiFlashMetrics.h @@ -277,7 +277,7 @@ namespace DB M(tiflash_compute_request_unit, "Request Unit used by tiflash compute", Counter, \ F(type_mpp, {{"type", "mpp"}, ComputeLabelHolder::instance().getClusterIdLabel(), ComputeLabelHolder::instance().getProcessIdLabel()}), \ F(type_cop, {{"type", "cop"}, ComputeLabelHolder::instance().getClusterIdLabel(), ComputeLabelHolder::instance().getProcessIdLabel()}), \ - F(type_batch, {{"type", "batch"}, ComputeLabelHolder::instance().getClusterIdLabel(), ComputeLabelHolder::instance().getProcessIdLabel()})) \ + F(type_batch, {{"type", "batch"}, ComputeLabelHolder::instance().getClusterIdLabel(), ComputeLabelHolder::instance().getProcessIdLabel()})) // clang-format on diff --git a/dbms/src/Flash/Planner/PlanType.h b/dbms/src/Flash/Planner/PlanType.h index cfbdff03e77..6b8e540b6a9 100644 --- a/dbms/src/Flash/Planner/PlanType.h +++ b/dbms/src/Flash/Planner/PlanType.h @@ -15,6 +15,7 @@ #pragma once #include + #include "Common/Exception.h" namespace DB diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp index 8ec2083179e..0b69b747177 100644 --- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp +++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp @@ -92,7 +92,7 @@ void PhysicalExpand::expandTransform(DAGPipeline & child_pipeline) void PhysicalExpand::buildPipelineExec(PipelineExecGroupBuilder & group_builder, Context &, size_t) { auto input_header = group_builder.getCurrentHeader(); - group_builder.transform([&](auto &builder) { + group_builder.transform([&](auto & builder) { builder.appendTransformOp(std::make_unique(group_builder.exec_status, expand_actions, log->identifier())); }); } diff --git a/dbms/src/Flash/tests/gtest_expand_executor.cpp b/dbms/src/Flash/tests/gtest_expand_executor.cpp index bf7a1c9a8b7..6401edbc424 100644 --- a/dbms/src/Flash/tests/gtest_expand_executor.cpp +++ b/dbms/src/Flash/tests/gtest_expand_executor.cpp @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include #include diff --git a/dbms/src/Interpreters/tests/gtest_block_expand.cpp b/dbms/src/Interpreters/tests/gtest_block_expand.cpp index 97ef466c245..039a1545888 100644 --- a/dbms/src/Interpreters/tests/gtest_block_expand.cpp +++ b/dbms/src/Interpreters/tests/gtest_block_expand.cpp @@ -88,7 +88,7 @@ try const auto res0 = ColumnWithInt64{1, 1, 0, 0, -1, -1}; const auto * col_0 = typeid_cast(block.getColumns()[0].get()); - for (int i = 0; i < int(expand_rows); ++i) + for (int i = 0; i < static_cast(expand_rows); ++i) { ASSERT_EQ(col_0->getElement(i), res0[i]); } @@ -96,7 +96,7 @@ try const auto res1 = ColumnWithString{"1 ", "null", "1 ", "null", "1 ", "null"}; const auto * col_1 = typeid_cast(block.getColumns()[1].get()); const auto * col_1_nest = &static_cast(col_1->getNestedColumn()); - for (int i = 0; i < int(expand_rows); ++i) + for (int i = 0; i < static_cast(expand_rows); ++i) { if (res1[i] == "null") { @@ -111,7 +111,7 @@ try const auto res2 = ColumnWithString{"null", "1", "null", "2", "null", "3"}; const auto * col_2 = typeid_cast(block.getColumns()[2].get()); const auto * col_2_nest = &static_cast(col_2->getNestedColumn()); - for (int i = 0; i < int(expand_rows); ++i) + for (int i = 0; i < static_cast(expand_rows); ++i) { if (res2[i] == "null") { @@ -125,14 +125,14 @@ try const auto res3 = ColumnWithUInt64{1, 1, 1, 1, 0, 0}; const auto * col_3 = typeid_cast(block.getColumns()[3].get()); - for (int i = 0; i < int(expand_rows); ++i) + for (int i = 0; i < static_cast(expand_rows); ++i) { ASSERT_EQ(col_3->getElement(i), res3[i]); } const auto res4 = ColumnWithUInt64{1, 2, 1, 2, 1, 2}; const auto * col_4 = typeid_cast(block.getColumns()[4].get()); - for (int i = 0; i < int(expand_rows); ++i) + for (int i = 0; i < static_cast(expand_rows); ++i) { ASSERT_EQ(col_4->getElement(i), res4[i]); } @@ -188,7 +188,7 @@ try const auto res0 = ColumnWithInt64{1, 1, 1, 0, 0, 0, -1, -1, -1}; const auto * col_0 = typeid_cast(block.getColumns()[0].get()); - for (int i = 0; i < int(expand_rows); ++i) + for (int i = 0; i < static_cast(expand_rows); ++i) { ASSERT_EQ(col_0->getElement(i), res0[i]); } @@ -196,7 +196,7 @@ try const auto res1 = ColumnWithString{"aaa", "null", "null", "bbb", "null", "null", "ccc", "null", "null"}; const auto * col_1 = typeid_cast(block.getColumns()[1].get()); const auto * col_1_nest = &static_cast(col_1->getNestedColumn()); - for (int i = 0; i < int(expand_rows); ++i) + for (int i = 0; i < static_cast(expand_rows); ++i) { if (res1[i] == "null") { @@ -211,7 +211,7 @@ try const auto res2 = ColumnWithString{"null", "1", "null", "null", "2", "null", "null", "3", "null"}; const auto * col_2 = typeid_cast(block.getColumns()[2].get()); const auto * col_2_nest = &static_cast(col_2->getNestedColumn()); - for (int i = 0; i < int(expand_rows); ++i) + for (int i = 0; i < static_cast(expand_rows); ++i) { if (res2[i] == "null") { @@ -224,12 +224,12 @@ try } // use UInt64(-1) to represent null. - const auto res3 = ColumnWithUInt64{UInt64(-1), UInt64(-1), 1, UInt64(-1), UInt64(-1), 1, UInt64(-1), UInt64(-1), 0}; + const auto res3 = ColumnWithUInt64{static_cast(-1), static_cast(-1), 1, static_cast(-1), static_cast(-1), 1, static_cast(-1), static_cast(-1), 0}; const auto * col_3 = typeid_cast(block.getColumns()[3].get()); const auto * col_3_nest = &typeid_cast(col_3->getNestedColumn()); - for (int i = 0; i < int(expand_rows); ++i) + for (int i = 0; i < static_cast(expand_rows); ++i) { - if (res3[i] == UInt64(-1)) + if (res3[i] == static_cast(-1)) { ASSERT_EQ(col_3->isNullAt(i), true); } @@ -241,7 +241,7 @@ try const auto res4 = ColumnWithUInt64{1, 2, 3, 1, 2, 3, 1, 2, 3}; const auto * col_4 = typeid_cast(block.getColumns()[4].get()); - for (int i = 0; i < int(expand_rows); ++i) + for (int i = 0; i < static_cast(expand_rows); ++i) { ASSERT_EQ(col_4->getElement(i), res4[i]); } From 11e8a46618dcdd42395bdf019e66a800d20d267b Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Fri, 17 Feb 2023 13:40:38 +0800 Subject: [PATCH 19/31] add Signed-off-by: AilinKid <3148019@qq.com> --- .../Coprocessor/DAGExpressionAnalyzer.cpp | 9 ++++---- .../Flash/Coprocessor/DAGExpressionAnalyzer.h | 2 +- .../Coprocessor/collectOutputFieldTypes.cpp | 5 +--- dbms/src/Flash/Planner/PlanType.h | 2 -- .../Flash/Planner/Plans/PhysicalExpand.cpp | 14 +++++------ dbms/src/Flash/Planner/Plans/PhysicalExpand.h | 4 ++-- dbms/src/Interpreters/Expand.cpp | 23 ++++++++----------- dbms/src/Interpreters/Expand.h | 2 +- dbms/src/Interpreters/ExpressionActions.cpp | 8 +++---- dbms/src/Interpreters/ExpressionActions.h | 2 +- dbms/src/TestUtils/executorSerializer.cpp | 2 +- 11 files changed, 30 insertions(+), 43 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 816b0e324dd..370505c9b8c 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -805,7 +805,7 @@ NamesAndTypes DAGExpressionAnalyzer::buildOrderColumns( return order_columns; } -std::shared_ptr DAGExpressionAnalyzer::buildExpandGroupingColumns( +GroupingSets DAGExpressionAnalyzer::buildExpandGroupingColumns( const tipb::Expand & expand, const ExpressionActionsPtr & actions) { @@ -843,8 +843,7 @@ std::shared_ptr DAGExpressionAnalyzer::buildExpandGroupingColumns( mutable_one.type = makeNullable(mutable_one.type); } source_columns.emplace_back(Expand::grouping_identifier_column_name, Expand::grouping_identifier_column_type); - auto shared_expand = Expand::sharedExpand(group_sets_columns); - return shared_expand; + return group_sets_columns; } ExpressionActionsPtr DAGExpressionAnalyzer::appendExpand( @@ -856,8 +855,8 @@ ExpressionActionsPtr DAGExpressionAnalyzer::appendExpand( { last_step.required_output.push_back(origin_col.name); } - auto shared_expand = buildExpandGroupingColumns(expand, last_step.actions); - last_step.actions->add(ExpressionAction::expandSource(shared_expand)); + auto grouping_sets = buildExpandGroupingColumns(expand, last_step.actions); + last_step.actions->add(ExpressionAction::expandSource(grouping_sets)); auto before_expand = chain.getLastActions(); chain.finalize(); diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h index 7436841034a..4cec8ec0358 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h @@ -71,7 +71,7 @@ class DAGExpressionAnalyzer : private boost::noncopyable ExpressionActionsChain & chain, const std::vector & conditions); - std::shared_ptr buildExpandGroupingColumns(const tipb::Expand & expand, const ExpressionActionsPtr & actions); + GroupingSets buildExpandGroupingColumns(const tipb::Expand & expand, const ExpressionActionsPtr & actions); ExpressionActionsPtr appendExpand(const tipb::Expand & expand, ExpressionActionsChain & chain); diff --git a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp index 94c1b6c573a..8813c36f24e 100644 --- a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp +++ b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp @@ -117,10 +117,7 @@ bool collectForExpand(std::vector & out_field_types, const tipb { // assert that: grouping_col must be the column ref guaranteed by tidb. auto column_index = decodeDAGInt64(grouping_col.val()); - if (column_index < 0 || column_index >= static_cast(out_child_fields.size())) - { - throw TiFlashException("Column index out of bound", Errors::Coprocessor::BadRequest); - } + RUNTIME_CHECK_MSG(column_index >= 0 || column_index < static_cast(out_child_fields.size()), "Column index out of bound"); out_child_fields[column_index].set_flag(out_child_fields[column_index].flag() & (~TiDB::ColumnFlagNotNull)); } } diff --git a/dbms/src/Flash/Planner/PlanType.h b/dbms/src/Flash/Planner/PlanType.h index 6b8e540b6a9..c3c31ce3c81 100644 --- a/dbms/src/Flash/Planner/PlanType.h +++ b/dbms/src/Flash/Planner/PlanType.h @@ -16,8 +16,6 @@ #include -#include "Common/Exception.h" - namespace DB { struct PlanType diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp index 0b69b747177..5e0294a5973 100644 --- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp +++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp @@ -51,24 +51,25 @@ PhysicalPlanNodePtr PhysicalExpand::build( ExpressionActionsPtr before_expand_actions = PhysicalPlanHelper::newActions(child->getSampleBlock()); ExpressionActionsPtr expand_actions_itself = PhysicalPlanHelper::newActions(child->getSampleBlock()); - auto shared_expand = analyzer.buildExpandGroupingColumns(expand, before_expand_actions); - expand_actions_itself->add(ExpressionAction::expandSource(shared_expand)); + auto grouping_sets = analyzer.buildExpandGroupingColumns(expand, before_expand_actions); + auto expand_action = ExpressionAction::expandSource(grouping_sets); + expand_actions_itself->add(expand_action); // construct sample block. NamesAndTypes expand_output_columns; auto child_header = child->getSchema(); for (const auto & one : child_header) { - expand_output_columns.emplace_back(one.name, shared_expand->isInGroupSetColumn(one.name) ? makeNullable(one.type) : one.type); + expand_output_columns.emplace_back(one.name, expand_action.expand->isInGroupSetColumn(one.name) ? makeNullable(one.type) : one.type); } - expand_output_columns.emplace_back(shared_expand->grouping_identifier_column_name, shared_expand->grouping_identifier_column_type); + expand_output_columns.emplace_back(expand_action.expand->grouping_identifier_column_name, expand_action.expand->grouping_identifier_column_type); auto physical_expand = std::make_shared( executor_id, expand_output_columns, log->identifier(), child, - shared_expand, + expand_action.expand, expand_actions_itself, Block(expand_output_columns)); @@ -108,8 +109,7 @@ void PhysicalExpand::finalize(const Names & parent_require) FinalizeHelper::checkSchemaContainsParentRequire(schema, parent_require); Names required_output; required_output.reserve(shared_expand->getGroupSetNum()); // grouping set column should be existed in the child output schema. - auto name_set = std::set(); - shared_expand->getAllGroupSetColumnNames(name_set); + auto name_set = shared_expand->getAllGroupSetColumnNames(); // append parent_require column it may expect self-filled groupingID. for (const auto & one : parent_require) { diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h index 77d5abdcd87..bca4f9beedd 100644 --- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h +++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h @@ -36,7 +36,7 @@ class PhysicalExpand : public PhysicalUnary const NamesAndTypes & schema_, const String & req_id, const PhysicalPlanNodePtr & child_, - const std::shared_ptr & shared_expand, + const std::shared_ptr & shared_expand, const ExpressionActionsPtr & expand_actions, const Block & sample_block_) : PhysicalUnary(executor_id_, PlanType::Expand, schema_, req_id, child_) @@ -55,7 +55,7 @@ class PhysicalExpand : public PhysicalUnary private: void buildBlockInputStreamImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) override; - std::shared_ptr shared_expand; + std::shared_ptr shared_expand; ExpressionActionsPtr expand_actions; Block sample_block; }; diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp index d7aa21e2457..702e351cdf3 100644 --- a/dbms/src/Interpreters/Expand.cpp +++ b/dbms/src/Interpreters/Expand.cpp @@ -46,14 +46,7 @@ void Expand::getGroupingSetsDes(FmtBuffer & buffer) const for (const auto & grouping_exprs : grouping_set) { buffer.append("<"); - for (size_t i = 0; i < grouping_exprs.size(); i++) - { - if (i != 0) - { - buffer.append(","); - } - buffer.append(grouping_exprs.at(i)); - } + buffer.joinStr(grouping_exprs.begin(), grouping_exprs.end()); buffer.append(">"); } buffer.append("}"); @@ -103,13 +96,13 @@ void Expand::replicateAndFillNull(Block & block) const added_grouping_id_column.reserve(1); added_grouping_id_column.push_back(grouping_id_column->getPtr()); - for (size_t i = 0; i < origin_rows; i++) + for (size_t i = 0; i < origin_rows; ++i) { current_offset += replicate_times_for_one_row; (*offsets_to_replicate)[i] = current_offset; // in the same loop, to fill the grouping id. - for (UInt64 j = 0; j < replicate_times_for_one_row; j++) + for (UInt64 j = 0; j < replicate_times_for_one_row; ++j) { // start from 1. Field grouping_id = j + 1; @@ -143,7 +136,7 @@ void Expand::replicateAndFillNull(Block & block) const // after replication, it just copied the same row for N times, we still need to fill corresponding Field with null value. - for (size_t grouping_offset = 0; grouping_offset < replicate_times_for_one_row; grouping_offset++) + for (size_t grouping_offset = 0; grouping_offset < replicate_times_for_one_row; ++grouping_offset) { auto grouping_columns = getGroupSetColumnNamesByOffset(grouping_offset); // for every grouping col, get the mutated one of them. @@ -180,14 +173,14 @@ void Expand::replicateAndFillNull(Block & block) const /// 2 2 1 + replicate_group2 for b, it's 1, we should pick and set: /// 2 2 2 + replicate_group_rows[1].b = null /// -----------------+ - for (size_t i = 0; i < origin_rows; i++) + for (size_t i = 0; i < origin_rows; ++i) { // for every original one row mapped N rows, fill the corresponding group set column as null value according to the offset. // only when the offset in replicate_group equals to current group_offset, set the data to null. // eg: for case above, for grouping_offset of = 0, we only set the every offset = 0 in each // small replicate_group_x to null. // - for (UInt64 j = 0; j < replicate_times_for_one_row; j++) + for (UInt64 j = 0; j < replicate_times_for_one_row; ++j) { if (j == grouping_offset) { @@ -234,8 +227,9 @@ const GroupingColumnNames & Expand::getGroupSetColumnNamesByOffset(size_t offset return group_sets_names[offset][0]; } -void Expand::getAllGroupSetColumnNames(std::set & name_set) const +std::set Expand::getAllGroupSetColumnNames() const { + std::set name_set; for (const auto & it1 : group_sets_names) { // for every grouping set. @@ -248,6 +242,7 @@ void Expand::getAllGroupSetColumnNames(std::set & name_set) const } } } + return name_set; } std::shared_ptr Expand::sharedExpand(const GroupingSets & groupingSets) diff --git a/dbms/src/Interpreters/Expand.h b/dbms/src/Interpreters/Expand.h index e2128f476fd..86229309317 100644 --- a/dbms/src/Interpreters/Expand.h +++ b/dbms/src/Interpreters/Expand.h @@ -124,7 +124,7 @@ class Expand const GroupingColumnNames & getGroupSetColumnNamesByOffset(size_t offset) const; - void getAllGroupSetColumnNames(std::set & name_set) const; + std::set getAllGroupSetColumnNames() const; static std::shared_ptr sharedExpand(const GroupingSets & groupingSets); diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index aa5d856a966..0c49880850b 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -135,15 +135,14 @@ ExpressionAction ExpressionAction::ordinaryJoin(std::shared_ptr join return a; } -ExpressionAction ExpressionAction::expandSource(std::shared_ptr expand_) +ExpressionAction ExpressionAction::expandSource(GroupingSets grouping_sets_) { ExpressionAction a; a.type = EXPAND; - a.expand = expand_; + a.expand = std::make_shared(grouping_sets_); return a; } - void ExpressionAction::prepare(Block & sample_block) { /** Constant expressions should be evaluated, and put the result in sample_block. @@ -239,8 +238,7 @@ void ExpressionAction::prepare(Block & sample_block) case EXPAND: { // sample_block is just for schema check followed by later block, modify it if your schema has changed during this action. - auto name_set = std::set(); - expand->getAllGroupSetColumnNames(name_set); + auto name_set = expand->getAllGroupSetColumnNames(); // make grouping set column to be nullable. for (const auto & col_name : name_set) { diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index e9d98de2106..f93266529f5 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -112,7 +112,7 @@ struct ExpressionAction static ExpressionAction project(const NamesWithAliases & projected_columns_); static ExpressionAction project(const Names & projected_columns_); static ExpressionAction ordinaryJoin(std::shared_ptr join_, const NamesAndTypesList & columns_added_by_join_); - static ExpressionAction expandSource(std::shared_ptr expand_); + static ExpressionAction expandSource(GroupingSets grouping_sets); /// Which columns necessary to perform this action. Names getNeededColumns() const; diff --git a/dbms/src/TestUtils/executorSerializer.cpp b/dbms/src/TestUtils/executorSerializer.cpp index 7c1cfa980b6..607bd27c68f 100644 --- a/dbms/src/TestUtils/executorSerializer.cpp +++ b/dbms/src/TestUtils/executorSerializer.cpp @@ -169,7 +169,7 @@ void serializeExpandSource(const String & executor_id, const tipb::Expand & expa for (const auto & grouping_exprs : grouping_set.grouping_exprs()) { buf.append("{"); - for (auto i = 0; i < grouping_exprs.grouping_expr().size(); i++) + for (auto i = 0; i < grouping_exprs.grouping_expr().size(); ++i) { if (i != 0) { From 5609f3b0742cb41191fe8d8941c73a08913944e8 Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Fri, 17 Feb 2023 18:54:37 +0800 Subject: [PATCH 20/31] resolve header file recycle Signed-off-by: AilinKid <3148019@qq.com> --- dbms/src/Interpreters/Expand.h | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/Interpreters/Expand.h b/dbms/src/Interpreters/Expand.h index 86229309317..09182be015d 100644 --- a/dbms/src/Interpreters/Expand.h +++ b/dbms/src/Interpreters/Expand.h @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include From 995d14efc33e35faed9ef505884d1d4087e1fc56 Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Sat, 18 Feb 2023 16:15:22 +0800 Subject: [PATCH 21/31] address jiangtao's comment Signed-off-by: AilinKid <3148019@qq.com> --- dbms/src/Interpreters/Expand.cpp | 18 +++++------------- .../TestUtils/tests/gtest_mock_executors.cpp | 1 - 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp index 702e351cdf3..00a1c0af0e7 100644 --- a/dbms/src/Interpreters/Expand.cpp +++ b/dbms/src/Interpreters/Expand.cpp @@ -64,7 +64,7 @@ void Expand::getGroupingSetsDes(FmtBuffer & buffer) const /// 1 1 target a -+-----> 1 null groupingID for a =1 /// 2 2 +-----> 2 null groupingID for b =2 /// target b -+-----> null 1 groupingID for a =1 -/// +-----> null a groupingID for b =2 +/// +-----> null 2 groupingID for b =2 /// /// when target a specified group set, other group set columns should be filled /// with null value to make group by(a,b) operator to meet the equivalence effect @@ -145,15 +145,8 @@ void Expand::replicateAndFillNull(Block & block) const assert(block.getByName(grouping_col).column->isColumnNullable()); const auto * nullable_column = typeid_cast(block.getByName(grouping_col).column.get()); - auto origin_size = nullable_column->size(); - // clone the nested column. - MutableColumnPtr new_nested_col = nullable_column->getNestedColumn().cloneResized(origin_size); - // just get mutable new null map. - auto new_null_map = ColumnUInt8::create(); - new_null_map->getData().resize(origin_size); - memcpy(new_null_map->getData().data(), nullable_column->getNullMapData().data(), origin_size * sizeof(nullable_column->getNullMapData()[0])); - - auto cloned_one = ColumnNullable::create(std::move(new_nested_col), std::move(new_null_map)); + auto cloned = ColumnNullable::create(nullable_column->getNestedColumnPtr(), nullable_column->getNullMapColumnPtr()); + auto cloned_one = typeid_cast(cloned->assumeMutable().get()); /// travel total rows, and set null values for current grouping set column. /// basically looks like: @@ -162,7 +155,7 @@ void Expand::replicateAndFillNull(Block & block) const /// 1 1 target a -+-----> 1 null groupingID for a =1 /// 2 2 +-----> 2 null groupingID for b =2 /// target b -+-----> null 1 groupingID for a =1 - /// +-----> null a groupingID for b =2 + /// +-----> null 2 groupingID for b =2 /// /// after the replicate is now, the data form likes like below /// ==> for one : in @@ -179,7 +172,6 @@ void Expand::replicateAndFillNull(Block & block) const // only when the offset in replicate_group equals to current group_offset, set the data to null. // eg: for case above, for grouping_offset of = 0, we only set the every offset = 0 in each // small replicate_group_x to null. - // for (UInt64 j = 0; j < replicate_times_for_one_row; ++j) { if (j == grouping_offset) @@ -193,7 +185,7 @@ void Expand::replicateAndFillNull(Block & block) const cloned_one->getNullMapData().data()[computed_offset] = 1; } } - block.getByName(grouping_col).column = std::move(cloned_one); + block.getByName(grouping_col).column = std::move(cloned); } // finish of adjustment for one grouping set columns. (by now one column for one grouping set). } diff --git a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp index c94f2509c84..f1826226aeb 100644 --- a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp +++ b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp @@ -14,7 +14,6 @@ #include #include -#include namespace DB { From 199961caacfb1d8c9907cc429ae7be1676e89425 Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Sat, 18 Feb 2023 16:57:27 +0800 Subject: [PATCH 22/31] fmt Signed-off-by: AilinKid <3148019@qq.com> --- dbms/src/Interpreters/Expand.cpp | 7 +------ dbms/src/Interpreters/Expand.h | 11 ----------- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp index 00a1c0af0e7..cfe9c7eb870 100644 --- a/dbms/src/Interpreters/Expand.cpp +++ b/dbms/src/Interpreters/Expand.cpp @@ -146,7 +146,7 @@ void Expand::replicateAndFillNull(Block & block) const const auto * nullable_column = typeid_cast(block.getByName(grouping_col).column.get()); auto cloned = ColumnNullable::create(nullable_column->getNestedColumnPtr(), nullable_column->getNullMapColumnPtr()); - auto cloned_one = typeid_cast(cloned->assumeMutable().get()); + auto *cloned_one = typeid_cast(cloned->assumeMutable().get()); /// travel total rows, and set null values for current grouping set column. /// basically looks like: @@ -237,11 +237,6 @@ std::set Expand::getAllGroupSetColumnNames() const return name_set; } -std::shared_ptr Expand::sharedExpand(const GroupingSets & groupingSets) -{ - return std::make_shared(groupingSets); -} - const std::string Expand::grouping_identifier_column_name = "groupingID"; const DataTypePtr Expand::grouping_identifier_column_type = std::make_shared(); } // namespace DB diff --git a/dbms/src/Interpreters/Expand.h b/dbms/src/Interpreters/Expand.h index 09182be015d..d23af17cafa 100644 --- a/dbms/src/Interpreters/Expand.h +++ b/dbms/src/Interpreters/Expand.h @@ -18,16 +18,7 @@ #include #include #include -#include -#include #include -#include -#include -#include -#include -#include - -#include namespace DB { @@ -125,8 +116,6 @@ class Expand std::set getAllGroupSetColumnNames() const; - static std::shared_ptr sharedExpand(const GroupingSets & groupingSets); - void getGroupingSetsDes(FmtBuffer & buffer) const; static const String grouping_identifier_column_name; From faf8cf37e727f23a4311a3c9cb395c25557dbd76 Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Sat, 18 Feb 2023 18:43:50 +0800 Subject: [PATCH 23/31] add test for overlap grouping set Signed-off-by: AilinKid <3148019@qq.com> --- dbms/src/Interpreters/Expand.cpp | 4 +- .../Interpreters/tests/gtest_block_expand.cpp | 120 ++++++++++++++++++ 2 files changed, 121 insertions(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp index cfe9c7eb870..f5c7e916f9f 100644 --- a/dbms/src/Interpreters/Expand.cpp +++ b/dbms/src/Interpreters/Expand.cpp @@ -109,8 +109,6 @@ void Expand::replicateAndFillNull(Block & block) const added_grouping_id_column[0]->insert(grouping_id); } } - // todo: for some column overlapping in different grouping set, we should copy the overlapped column as a new column - // and the upper layer OP's computation should be shifted and based on the new one's id. Need a plan side control. // replicate the original block rows. size_t existing_columns = block.columns(); @@ -146,7 +144,7 @@ void Expand::replicateAndFillNull(Block & block) const const auto * nullable_column = typeid_cast(block.getByName(grouping_col).column.get()); auto cloned = ColumnNullable::create(nullable_column->getNestedColumnPtr(), nullable_column->getNullMapColumnPtr()); - auto *cloned_one = typeid_cast(cloned->assumeMutable().get()); + auto * cloned_one = typeid_cast(cloned->assumeMutable().get()); /// travel total rows, and set null values for current grouping set column. /// basically looks like: diff --git a/dbms/src/Interpreters/tests/gtest_block_expand.cpp b/dbms/src/Interpreters/tests/gtest_block_expand.cpp index 039a1545888..e85cb0d7185 100644 --- a/dbms/src/Interpreters/tests/gtest_block_expand.cpp +++ b/dbms/src/Interpreters/tests/gtest_block_expand.cpp @@ -38,6 +38,126 @@ class BlockExpand : public ::testing::Test const std::vector col_name{"age", "gender", "country", "region", "zip"}; }; +/// todo: for some column overlapping in different grouping set, we should copy the overlapped column as a new column +/// and the upper layer OP's computation should be shifted and based on the new one's id. Need a plan side control, +/// tiflash side is ready to go. +/// +/// just a overlapped case for grouping set 1: <{a}>, grouping set 2: <{a,b}> +/// count(distinct a) and count(distinct a, b) planner will clone a new column from a for either one of them +/// count(distinct a') and count(distinct a, b) = need a more column a' here (maybe from lower projection or something) +/// then, according a' and a,b 's index offset in PB, to describe the grouping set definition. +/// when targeting a' 's replicate group, fill a and b as null in the group. +/// when targeting a and b 's replicate group, fill a' as null in the group. +TEST_F(BlockExpand, ExpandLogic4Overlap) +try +{ + { + // test basic block expand operation. (two grouping set: {}, {}) + const ColumnsWithTypeAndName + ori_col + = { + toVec(col_name[0], ColumnWithInt64{1, 0, -1}), + toVec(col_name[1], ColumnWithString{"1 ", "1 ", "1 "}), + toVec(col_name[2], ColumnWithString{"1", "2", "3"}), + toVec(col_name[3], ColumnWithUInt64{1, 1, 0}), + }; + // group set, group set + GroupingSet g_age = GroupingSet{GroupingColumnNames{col_name[0]}}; + GroupingSet g_gender_country = GroupingSet{GroupingColumnNames{col_name[1], col_name[2]}}; + GroupingSets group_sets = GroupingSets{g_age, g_gender_country}; + Expand expand = Expand(group_sets); + Block block(ori_col); + auto origin_rows = block.rows(); + + expand.replicateAndFillNull(block); + // assert the col size is added with 1. + ASSERT_EQ(block.getColumns().size(), size_t(5)); + // assert the new col groupingID is appended. + ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID"); + // assert the block size is equal to origin rows * grouping set num. + auto expand_rows = block.rows(); + auto grouping_set_num = expand.getGroupSetNum(); + ASSERT_EQ(origin_rows * grouping_set_num, expand_rows); // 6 + // assert grouping set column are nullable. + ASSERT_EQ(block.getColumns()[0].get()->isColumnNullable(), true); + ASSERT_EQ(block.getColumns()[1].get()->isColumnNullable(), true); + ASSERT_EQ(block.getColumns()[2].get()->isColumnNullable(), true); + ASSERT_EQ(block.getColumns()[3].get()->isColumnNullable(), false); + ASSERT_EQ(block.getColumns()[4].get()->isColumnNullable(), false); + + // assert the rows layout + // "age", "gender", "country", "region", "groupingID" + // ori_col 1 null null 1 1 + // rpt_col null "1 " "1" 1 2 + // + // ori_col 0 null null 1 1 + // rpt_col null "1 " "2" 1 2 + // + // ori_col -1 null null 0 1 + // rpt_col null "1 " "3" 0 2 + const auto num4_null = 100; + const auto res0 = ColumnWithInt64{1, num4_null, 0, num4_null, -1, num4_null}; + const auto * col_0 = typeid_cast(block.getColumns()[0].get()); + const auto * col_0_nest = &static_cast(col_0->getNestedColumn()); + for (int i = 0; i < static_cast(expand_rows); ++i) + { + if (res0[i] == num4_null) + { + ASSERT_EQ(col_0->isNullAt(i), true); + } + else + { + ASSERT_EQ(col_0_nest->getElement(i), res0[i]); + } + } + + const auto res1 = ColumnWithString{"null", "1 ", "null", "1 ", "null", "1 "}; + const auto * col_1 = typeid_cast(block.getColumns()[1].get()); + const auto * col_1_nest = &static_cast(col_1->getNestedColumn()); + for (int i = 0; i < static_cast(expand_rows); ++i) + { + if (res1[i] == "null") + { + ASSERT_EQ(col_1->isNullAt(i), true); + } + else + { + ASSERT_EQ(col_1_nest->getDataAt(i), res1[i]); + } + } + + const auto res2 = ColumnWithString{"null", "1", "null", "2", "null", "3"}; + const auto * col_2 = typeid_cast(block.getColumns()[2].get()); + const auto * col_2_nest = &static_cast(col_2->getNestedColumn()); + for (int i = 0; i < static_cast(expand_rows); ++i) + { + if (res2[i] == "null") + { + ASSERT_EQ(col_2->isNullAt(i), true); + } + else + { + ASSERT_EQ(col_2_nest->getDataAt(i), res2[i]); + } + } + + const auto res3 = ColumnWithUInt64{1, 1, 1, 1, 0, 0}; + const auto * col_3 = typeid_cast(block.getColumns()[3].get()); + for (int i = 0; i < static_cast(expand_rows); ++i) + { + ASSERT_EQ(col_3->getElement(i), res3[i]); + } + + const auto res4 = ColumnWithUInt64{1, 2, 1, 2, 1, 2}; + const auto * col_4 = typeid_cast(block.getColumns()[4].get()); + for (int i = 0; i < static_cast(expand_rows); ++i) + { + ASSERT_EQ(col_4->getElement(i), res4[i]); + } + } +} +CATCH + TEST_F(BlockExpand, ExpandLogic) try { From 95c3ea1d83764bcd78efcbbfbfaf504e3060660c Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Mon, 20 Feb 2023 11:32:25 +0800 Subject: [PATCH 24/31] remove debug info Signed-off-by: AilinKid <3148019@qq.com> --- dbms/src/TestUtils/ExecutorTestUtils.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/TestUtils/ExecutorTestUtils.h b/dbms/src/TestUtils/ExecutorTestUtils.h index 2bf97d81a17..13363768a3b 100644 --- a/dbms/src/TestUtils/ExecutorTestUtils.h +++ b/dbms/src/TestUtils/ExecutorTestUtils.h @@ -32,11 +32,11 @@ ColumnsWithTypeAndName readBlock(BlockInputStreamPtr stream); ColumnsWithTypeAndName readBlocks(std::vector streams); #define WRAP_FOR_TEST_BEGIN \ - std::vector planner_bools{true}; \ + std::vector planner_bools{false, true}; \ for (auto enable_planner : planner_bools) \ { \ enablePlanner(enable_planner); \ - std::vector pipeline_bools{}; \ + std::vector pipeline_bools{false}; \ if (enable_planner) \ pipeline_bools.push_back(true); \ for (auto enable_pipeline : pipeline_bools) \ From 126d2b0bda1bbd25f32f54d708eeb40bddcb171b Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Mon, 20 Feb 2023 11:39:09 +0800 Subject: [PATCH 25/31] remove useless header file and comment Signed-off-by: AilinKid <3148019@qq.com> --- dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp | 1 - dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp | 2 +- dbms/src/TestUtils/ExecutorTestUtils.cpp | 2 -- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp index 7d7a502beb1..b45ade0f7d2 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp @@ -150,7 +150,6 @@ String DAGExpressionAnalyzerHelper::buildInFunction( DataTypePtr type = inferDataType4Literal(child); argument_types.push_back(type); } - // find common type DataTypePtr resolved_type = getLeastSupertype(argument_types); if (!removeNullable(resolved_type)->equals(*removeNullable(argument_types[0]))) { diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 887ca17c38c..2d9f7c1da25 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -277,7 +277,7 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline & size_t max_block_size_for_cross_join = settings.max_block_size; fiu_do_on(FailPoints::minimum_block_size_for_cross_join, { max_block_size_for_cross_join = 1; }); - JoinPtr join_ptr = std::make_shared( // make join + JoinPtr join_ptr = std::make_shared( probe_key_names, build_key_names, tiflash_join.kind, diff --git a/dbms/src/TestUtils/ExecutorTestUtils.cpp b/dbms/src/TestUtils/ExecutorTestUtils.cpp index 505f5b2a284..d0b72d10948 100644 --- a/dbms/src/TestUtils/ExecutorTestUtils.cpp +++ b/dbms/src/TestUtils/ExecutorTestUtils.cpp @@ -13,11 +13,9 @@ // limitations under the License. #include -#include #include #include #include -#include #include #include #include From c6344fe278fe4a1526a8bf8a26d8ddbab0eb1a42 Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Mon, 20 Feb 2023 16:08:28 +0800 Subject: [PATCH 26/31] enable pipeline model Signed-off-by: AilinKid <3148019@qq.com> --- dbms/src/Flash/Pipeline/Pipeline.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Flash/Pipeline/Pipeline.cpp b/dbms/src/Flash/Pipeline/Pipeline.cpp index 67ba2569ae6..0421c1cd4c0 100644 --- a/dbms/src/Flash/Pipeline/Pipeline.cpp +++ b/dbms/src/Flash/Pipeline/Pipeline.cpp @@ -144,6 +144,7 @@ bool Pipeline::isSupported(const tipb::DAGRequest & dag_request) case tipb::ExecType::TypeTableScan: case tipb::ExecType::TypeExchangeSender: case tipb::ExecType::TypeExchangeReceiver: + case tipb::ExecType::TypeExpand: return true; default: is_supported = false; From 034d06a0284e4bf251d7738b77ad59f23a0e3652 Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Mon, 20 Feb 2023 23:54:47 +0800 Subject: [PATCH 27/31] fix single block test Signed-off-by: AilinKid <3148019@qq.com> --- .../Coprocessor/DAGQueryBlockInterpreter.cpp | 10 ++++++- dbms/src/Flash/tests/gtest_interpreter.out | 28 +++++++++---------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 2d9f7c1da25..169f25c23be 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -137,16 +137,24 @@ AnalysisResult analyzeExpressions( if (query_block.limit_or_topn && query_block.limit_or_topn->tp() == tipb::ExecType::TypeTopN) { res.order_columns = analyzer.appendOrderBy(chain, query_block.limit_or_topn->topn()); - chain.addStep(); } if (query_block.expand) { + chain.addStep(); res.before_expand = analyzer.appendExpand(query_block.expand->expand(), chain); } const auto & dag_context = *context.getDAGContext(); // Append final project results if needed. + if (query_block.isRootQueryBlock()) + { + std::cout << "Wocao1" << std::endl; + } + else + { + std::cout << "Wocao2" << std::endl; + } final_project = query_block.isRootQueryBlock() ? analyzer.appendFinalProjectForRootQueryBlock( chain, diff --git a/dbms/src/Flash/tests/gtest_interpreter.out b/dbms/src/Flash/tests/gtest_interpreter.out index 89e323ef5e8..25f1bf2c0eb 100644 --- a/dbms/src/Flash/tests/gtest_interpreter.out +++ b/dbms/src/Flash/tests/gtest_interpreter.out @@ -20,10 +20,10 @@ Union: ~result: Union: SharedQuery x 10: - Limit, limit = 10 - Union: - Limit x 10, limit = 10 - Expression: + Expression: + Limit, limit = 10 + Union: + Limit x 10, limit = 10 Expression: Filter: SharedQuery: @@ -84,10 +84,10 @@ Union: ~result: Union: SharedQuery x 10: - Limit, limit = 10 - Union: - Limit x 10, limit = 10 - Expression: + Expression: + Limit, limit = 10 + Union: + Limit x 10, limit = 10 Expression: Expression: Expression: @@ -423,10 +423,10 @@ CreatingSets Union: MockExchangeSender x 20 SharedQuery: - Limit, limit = 10 - Union: - Limit x 20, limit = 10 - Expression: + Expression: + Limit, limit = 10 + Union: + Limit x 20, limit = 10 Expression: SharedQuery: ParallelAggregating, max_threads: 20, final: true @@ -439,8 +439,8 @@ CreatingSets ~test_suite_name: ListBase ~result_index: 0 ~result: -Limit, limit = 10 - Expression: +Expression: + Limit, limit = 10 Aggregating Expression: Filter: From 3f954d3dd2a5d54b19498bb304faa49e890ed33a Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Tue, 21 Feb 2023 14:57:36 +0800 Subject: [PATCH 28/31] address haisheng comment Signed-off-by: AilinKid <3148019@qq.com> --- .../Coprocessor/DAGExpressionAnalyzer.cpp | 4 --- .../Coprocessor/DAGQueryBlockInterpreter.cpp | 29 +++++++++--------- .../Flash/Planner/Plans/PhysicalExpand.cpp | 29 ++++-------------- dbms/src/Flash/tests/gtest_interpreter.out | 30 +++++++++---------- dbms/src/Interpreters/Expand.cpp | 9 +++--- dbms/src/Interpreters/Expand.h | 2 +- dbms/src/Interpreters/ExpressionActions.h | 1 - 7 files changed, 41 insertions(+), 63 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 370505c9b8c..197d493fe4a 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -822,10 +822,6 @@ GroupingSets DAGExpressionAnalyzer::buildExpandGroupingColumns( group_exprs_columns.reserve(group_exprs.grouping_expr().size()); for (const auto & group_expr : group_exprs.grouping_expr()) { - if (group_expr.tp() != tipb::ColumnRef) - { - throw TiFlashException("grouping sets expression should be column expr", Errors::Coprocessor::BadRequest); - } String cp_name = getActions(group_expr, actions); // tidb expression computation is based on column index offset child's chunk schema, change to ck block column name here. group_exprs_columns.emplace_back(cp_name); diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 169f25c23be..61b8bfe1550 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -78,9 +78,11 @@ struct AnalysisResult ExpressionActionsPtr before_where; ExpressionActionsPtr before_aggregation; ExpressionActionsPtr before_having; - ExpressionActionsPtr before_order_and_select; - ExpressionActionsPtr final_projection; + // ExpressionActionsPtr before_order_and_select; + ExpressionActionsPtr before_order; ExpressionActionsPtr before_expand; + ExpressionActionsPtr before_select; + ExpressionActionsPtr final_projection; String filter_column_name; String having_column_name; @@ -137,24 +139,18 @@ AnalysisResult analyzeExpressions( if (query_block.limit_or_topn && query_block.limit_or_topn->tp() == tipb::ExecType::TypeTopN) { res.order_columns = analyzer.appendOrderBy(chain, query_block.limit_or_topn->topn()); + res.before_order = chain.getLastActions(); + chain.addStep(); } if (query_block.expand) { - chain.addStep(); res.before_expand = analyzer.appendExpand(query_block.expand->expand(), chain); + chain.addStep(); } const auto & dag_context = *context.getDAGContext(); // Append final project results if needed. - if (query_block.isRootQueryBlock()) - { - std::cout << "Wocao1" << std::endl; - } - else - { - std::cout << "Wocao2" << std::endl; - } final_project = query_block.isRootQueryBlock() ? analyzer.appendFinalProjectForRootQueryBlock( chain, @@ -166,7 +162,7 @@ AnalysisResult analyzeExpressions( chain, query_block.qb_column_prefix); - res.before_order_and_select = chain.getLastActions(); + res.before_select = chain.getLastActions(); chain.finalize(); chain.clear(); @@ -684,9 +680,9 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) executeWhere(pipeline, res.before_having, res.having_column_name, "execute having"); recordProfileStreams(pipeline, query_block.having_name); } - if (res.before_order_and_select) + if (res.before_order) { - executeExpression(pipeline, res.before_order_and_select, log, "before order and select"); + executeExpression(pipeline, res.before_order, log, "before order"); } if (!res.order_columns.empty()) @@ -712,6 +708,11 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) recordProfileStreams(pipeline, query_block.expand_name); } + if (res.before_select) + { + executeExpression(pipeline, res.before_select, log, "before select"); + } + // execute final project action executeProject(pipeline, final_project, "final projection"); diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp index 5e0294a5973..927448b13ec 100644 --- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp +++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp @@ -49,11 +49,11 @@ PhysicalPlanNodePtr PhysicalExpand::build( DAGExpressionAnalyzer analyzer{child->getSchema(), context}; ExpressionActionsPtr before_expand_actions = PhysicalPlanHelper::newActions(child->getSampleBlock()); - ExpressionActionsPtr expand_actions_itself = PhysicalPlanHelper::newActions(child->getSampleBlock()); auto grouping_sets = analyzer.buildExpandGroupingColumns(expand, before_expand_actions); auto expand_action = ExpressionAction::expandSource(grouping_sets); - expand_actions_itself->add(expand_action); + // include expand action itself. + before_expand_actions->add(expand_action); // construct sample block. NamesAndTypes expand_output_columns; @@ -70,7 +70,7 @@ PhysicalPlanNodePtr PhysicalExpand::build( log->identifier(), child, expand_action.expand, - expand_actions_itself, + before_expand_actions, Block(expand_output_columns)); return physical_expand; @@ -79,11 +79,7 @@ PhysicalPlanNodePtr PhysicalExpand::build( void PhysicalExpand::expandTransform(DAGPipeline & child_pipeline) { - String expand_extra_info = fmt::format("expand, expand_executor_id = {}", execId()); - FmtBuffer fb; - fb.append(": grouping set "); - shared_expand->getGroupingSetsDes(fb); - expand_extra_info.append(fb.toString()); + String expand_extra_info = fmt::format("expand, expand_executor_id = {}: grouping set {}", execId(), shared_expand->getGroupingSetsDes()); child_pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expand_actions, log->identifier()); stream->setExtraInfo(expand_extra_info); @@ -107,22 +103,7 @@ void PhysicalExpand::buildBlockInputStreamImpl(DAGPipeline & pipeline, Context & void PhysicalExpand::finalize(const Names & parent_require) { FinalizeHelper::checkSchemaContainsParentRequire(schema, parent_require); - Names required_output; - required_output.reserve(shared_expand->getGroupSetNum()); // grouping set column should be existed in the child output schema. - auto name_set = shared_expand->getAllGroupSetColumnNames(); - // append parent_require column it may expect self-filled groupingID. - for (const auto & one : parent_require) - { - if (one != Expand::grouping_identifier_column_name) - { - name_set.insert(one); - } - } - for (const auto & grouping_name : name_set) - { - required_output.emplace_back(grouping_name); - } - child->finalize(required_output); + child->finalize(expand_actions->getRequiredColumns()); } const Block & PhysicalExpand::getSampleBlock() const diff --git a/dbms/src/Flash/tests/gtest_interpreter.out b/dbms/src/Flash/tests/gtest_interpreter.out index 25f1bf2c0eb..65a46dc9842 100644 --- a/dbms/src/Flash/tests/gtest_interpreter.out +++ b/dbms/src/Flash/tests/gtest_interpreter.out @@ -7,7 +7,7 @@ Union: MergeSorting, limit = 10 Union: PartialSorting x 10: limit = 10 - Expression: + Expression: Filter: SharedQuery: ParallelAggregating, max_threads: 10, final: true @@ -21,10 +21,10 @@ Union: Union: SharedQuery x 10: Expression: - Limit, limit = 10 - Union: - Limit x 10, limit = 10 - Expression: + Expression: + Limit, limit = 10 + Union: + Limit x 10, limit = 10 Filter: SharedQuery: ParallelAggregating, max_threads: 10, final: true @@ -90,7 +90,7 @@ Union: Limit x 10, limit = 10 Expression: Expression: - Expression: + Expression: Filter: Expression: Expression: @@ -138,7 +138,7 @@ Union: ~result: Union: Expression x 10: - Expression: + Expression: SharedQuery: Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} Expression: @@ -153,7 +153,7 @@ Union: ~result: Union: Expression x 10: - Expression: + Expression: Expression: Expression: SharedQuery: @@ -170,7 +170,7 @@ Union: ~result: Union: Expression x 10: - Expression: + Expression: Expression: Expression: SharedQuery: @@ -191,7 +191,7 @@ Union: ~result: Union: Expression x 8: - Expression: + Expression: Window: , function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} Expression: MergeSorting: , limit = 0 @@ -215,7 +215,7 @@ Union: ~result: Union: Expression x 10: - Expression: + Expression: SharedQuery: Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current} Expression: @@ -424,10 +424,10 @@ CreatingSets MockExchangeSender x 20 SharedQuery: Expression: - Limit, limit = 10 - Union: - Limit x 20, limit = 10 - Expression: + Expression: + Limit, limit = 10 + Union: + Limit x 20, limit = 10 SharedQuery: ParallelAggregating, max_threads: 20, final: true Expression x 20: diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp index f5c7e916f9f..0fc84578fcd 100644 --- a/dbms/src/Interpreters/Expand.cpp +++ b/dbms/src/Interpreters/Expand.cpp @@ -14,16 +14,15 @@ #include #include +#include #include #include #include -#include "DataTypes/DataTypesNumber.h" - namespace DB { -namespace /// anonymous namespace for storing private function utils. +namespace { void convertColumnToNullable(ColumnWithTypeAndName & column) { @@ -37,8 +36,9 @@ Expand::Expand(const DB::GroupingSets & gss) : group_sets_names(gss) {} -void Expand::getGroupingSetsDes(FmtBuffer & buffer) const +String Expand::getGroupingSetsDes() const { + FmtBuffer buffer; buffer.append("["); for (const auto & grouping_set : group_sets_names) { @@ -52,6 +52,7 @@ void Expand::getGroupingSetsDes(FmtBuffer & buffer) const buffer.append("}"); } buffer.append("]"); + return buffer.toString(); } /// for cases like: select count(distinct a), count(distinct b) from t; diff --git a/dbms/src/Interpreters/Expand.h b/dbms/src/Interpreters/Expand.h index d23af17cafa..e3e52a4d983 100644 --- a/dbms/src/Interpreters/Expand.h +++ b/dbms/src/Interpreters/Expand.h @@ -116,7 +116,7 @@ class Expand std::set getAllGroupSetColumnNames() const; - void getGroupingSetsDes(FmtBuffer & buffer) const; + String getGroupingSetsDes() const; static const String grouping_identifier_column_name; diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index f93266529f5..c25e8e7193a 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -18,7 +18,6 @@ #include #include #include -#include #include #include From 0eb302935ba056e93bb76bda331b4f42509f0e90 Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Tue, 21 Feb 2023 17:30:48 +0800 Subject: [PATCH 29/31] address haisheng's comment 2 Signed-off-by: AilinKid <3148019@qq.com> --- .../Flash/Planner/Plans/PhysicalExpand.cpp | 14 ++++-- dbms/src/Flash/Planner/Plans/PhysicalExpand.h | 5 +-- dbms/src/Interpreters/Expand.cpp | 44 +++++++------------ dbms/src/Interpreters/Expand.h | 5 ++- dbms/src/Interpreters/ExpressionActions.cpp | 6 +++ .../TestUtils/tests/gtest_mock_executors.cpp | 32 ++++++++++++++ 6 files changed, 70 insertions(+), 36 deletions(-) diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp index 927448b13ec..ddf7a959d61 100644 --- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp +++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp @@ -62,7 +62,7 @@ PhysicalPlanNodePtr PhysicalExpand::build( { expand_output_columns.emplace_back(one.name, expand_action.expand->isInGroupSetColumn(one.name) ? makeNullable(one.type) : one.type); } - expand_output_columns.emplace_back(expand_action.expand->grouping_identifier_column_name, expand_action.expand->grouping_identifier_column_type); + expand_output_columns.emplace_back(Expand::grouping_identifier_column_name, Expand::grouping_identifier_column_type); auto physical_expand = std::make_shared( executor_id, @@ -70,8 +70,7 @@ PhysicalPlanNodePtr PhysicalExpand::build( log->identifier(), child, expand_action.expand, - before_expand_actions, - Block(expand_output_columns)); + before_expand_actions); return physical_expand; } @@ -103,11 +102,18 @@ void PhysicalExpand::buildBlockInputStreamImpl(DAGPipeline & pipeline, Context & void PhysicalExpand::finalize(const Names & parent_require) { FinalizeHelper::checkSchemaContainsParentRequire(schema, parent_require); + Names required_output = parent_require; + required_output.emplace_back(Expand::grouping_identifier_column_name); + expand_actions->finalize(required_output); + + // do the child finalize before require column changed after expand_action finalization. child->finalize(expand_actions->getRequiredColumns()); + FinalizeHelper::prependProjectInputIfNeed(expand_actions, child->getSampleBlock().columns()); + FinalizeHelper::checkSampleBlockContainsParentRequire(getSampleBlock(), parent_require); } const Block & PhysicalExpand::getSampleBlock() const { - return sample_block; + return expand_actions->getSampleBlock(); } } // namespace DB diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h index bca4f9beedd..fa668b64114 100644 --- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h +++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h @@ -37,12 +37,10 @@ class PhysicalExpand : public PhysicalUnary const String & req_id, const PhysicalPlanNodePtr & child_, const std::shared_ptr & shared_expand, - const ExpressionActionsPtr & expand_actions, - const Block & sample_block_) + const ExpressionActionsPtr & expand_actions) : PhysicalUnary(executor_id_, PlanType::Expand, schema_, req_id, child_) , shared_expand(shared_expand) , expand_actions(expand_actions) - , sample_block(sample_block_) {} void finalize(const Names & parent_require) override; @@ -57,6 +55,5 @@ class PhysicalExpand : public PhysicalUnary void buildBlockInputStreamImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) override; std::shared_ptr shared_expand; ExpressionActionsPtr expand_actions; - Block sample_block; }; } // namespace DB diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp index 0fc84578fcd..45b86951041 100644 --- a/dbms/src/Interpreters/Expand.cpp +++ b/dbms/src/Interpreters/Expand.cpp @@ -34,7 +34,9 @@ void convertColumnToNullable(ColumnWithTypeAndName & column) Expand::Expand(const DB::GroupingSets & gss) : group_sets_names(gss) -{} +{ + collectNameSet(); +} String Expand::getGroupingSetsDes() const { @@ -92,11 +94,6 @@ void Expand::replicateAndFillNull(Block & block) const // reserve N times of current block rows size. grouping_id_column_data.reserve(block.rows() * replicate_times_for_one_row); - // prepare added mutable grouping id column. - MutableColumns added_grouping_id_column; - added_grouping_id_column.reserve(1); - added_grouping_id_column.push_back(grouping_id_column->getPtr()); - for (size_t i = 0; i < origin_rows; ++i) { current_offset += replicate_times_for_one_row; @@ -107,7 +104,7 @@ void Expand::replicateAndFillNull(Block & block) const { // start from 1. Field grouping_id = j + 1; - added_grouping_id_column[0]->insert(grouping_id); + grouping_id_column->insert(grouping_id); } } @@ -123,10 +120,10 @@ void Expand::replicateAndFillNull(Block & block) const block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->convertToFullColumnIfConst(); // for every existing column, if the column is a grouping set column, make it nullable. - if (isInGroupSetColumn(block.safeGetByPosition(i).name) && !block.safeGetByPosition(i).column->isColumnNullable()) - { + auto & column = block.safeGetByPosition(i); + if (isInGroupSetColumn(column.name) && !column.column->isColumnNullable()) convertColumnToNullable(block.getByPosition(i)); - } + if (!offsets_to_replicate->empty()) // replicate it. block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); @@ -188,26 +185,16 @@ void Expand::replicateAndFillNull(Block & block) const } // finish of adjustment for one grouping set columns. (by now one column for one grouping set). } - block.insert(ColumnWithTypeAndName(std::move(added_grouping_id_column[0]), std::make_shared(), std::move("groupingID"))); + block.insert(ColumnWithTypeAndName(std::move(grouping_id_column), grouping_identifier_column_type, grouping_identifier_column_name)); // return input from block. } bool Expand::isInGroupSetColumn(String name) const { - for (const auto & it1 : group_sets_names) + for (const auto & it1 : name_set) { - // for every grouping set. - for (const auto & it2 : it1) - { - // for every grouping exprs - for (const auto & it3 : it2) - { - if (it3 == name) - { - return true; - } - } - } + if (it1 == name) + return true; } return false; } @@ -218,9 +205,13 @@ const GroupingColumnNames & Expand::getGroupSetColumnNamesByOffset(size_t offset return group_sets_names[offset][0]; } -std::set Expand::getAllGroupSetColumnNames() const +const std::set & Expand::getAllGroupSetColumnNames() const +{ + return name_set; +} + +void Expand::collectNameSet() { - std::set name_set; for (const auto & it1 : group_sets_names) { // for every grouping set. @@ -233,7 +224,6 @@ std::set Expand::getAllGroupSetColumnNames() const } } } - return name_set; } const std::string Expand::grouping_identifier_column_name = "groupingID"; diff --git a/dbms/src/Interpreters/Expand.h b/dbms/src/Interpreters/Expand.h index e3e52a4d983..51c3ae0422b 100644 --- a/dbms/src/Interpreters/Expand.h +++ b/dbms/src/Interpreters/Expand.h @@ -114,15 +114,18 @@ class Expand const GroupingColumnNames & getGroupSetColumnNamesByOffset(size_t offset) const; - std::set getAllGroupSetColumnNames() const; + const std::set & getAllGroupSetColumnNames() const; String getGroupingSetsDes() const; + void collectNameSet(); + static const String grouping_identifier_column_name; static const DataTypePtr grouping_identifier_column_type; private: GroupingSets group_sets_names; + std::set name_set; }; } // namespace DB diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 0c49880850b..826f86d1059 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -48,6 +48,12 @@ Names ExpressionAction::getNeededColumns() const for (const auto & column : projections) res.push_back(column.first); + if (expand) + { + for (const auto & column : expand->getAllGroupSetColumnNames()) + res.push_back(column); + } + if (!source_name.empty()) res.push_back(source_name); diff --git a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp index f1826226aeb..768f514a66f 100644 --- a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp +++ b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp @@ -319,6 +319,38 @@ try } CATCH +TEST_F(MockDAGRequestTest, Expand) +try +{ + auto request = context.scan("test_db", "test_table").expand(MockVVecColumnNameVec{ + MockVecColumnNameVec{ + MockColumnNameVec{"s1"}, + }, + MockVecColumnNameVec{ + MockColumnNameVec{"s2"}, + }, + }) + .build(context); + { + String expected = "expand_1 | expanded_by: [<{<0, String>}><{<1, String>}>]\n table_scan_0 | {<0, String>, <1, String>}"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } + request = context.receive("sender_1").expand(MockVVecColumnNameVec{ + MockVecColumnNameVec{ + MockColumnNameVec{"s1"}, + }, + MockVecColumnNameVec{ + MockColumnNameVec{"s2"}, + }, + }) + .build(context); + { + String expected = "expand_1 | expanded_by: [<{<0, String>}><{<1, String>}>]\n exchange_receiver_0 | type:PassThrough, {<0, String>, <1, String>, <2, String>}"; + ASSERT_DAGREQUEST_EQAUL(expected, request); + } +} +CATCH + TEST_F(MockDAGRequestTest, MockWindow) try { From b3518e3a3f313415883dcd28fe6629ac6e395b1f Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Wed, 22 Feb 2023 13:38:36 +0800 Subject: [PATCH 30/31] address comment: move plan test and refactor some code Signed-off-by: AilinKid <3148019@qq.com> --- .../Coprocessor/DAGQueryBlockInterpreter.cpp | 5 ++-- .../Flash/Planner/Plans/PhysicalExpand.cpp | 5 ---- .../src/Flash/tests/gtest_expand_executor.cpp | 27 ------------------- dbms/src/Flash/tests/gtest_interpreter.cpp | 25 +++++++++++++++++ dbms/src/Flash/tests/gtest_interpreter.out | 27 +++++++++++++++++++ .../Flash/tests/gtest_planner_interpreter.cpp | 25 +++++++++++++++++ .../Flash/tests/gtest_planner_interpreter.out | 27 +++++++++++++++++++ dbms/src/Interpreters/Expand.cpp | 19 ++++++------- dbms/src/Interpreters/Expand.h | 12 ++++----- dbms/src/Interpreters/ExpressionActions.h | 1 - .../Interpreters/tests/gtest_block_expand.cpp | 8 +++--- .../TestUtils/tests/gtest_mock_executors.cpp | 6 +++-- 12 files changed, 128 insertions(+), 59 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 61b8bfe1550..3db44c4a279 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -30,8 +30,6 @@ #include #include #include -#include -#include #include #include #include @@ -47,7 +45,6 @@ #include #include #include -#include #include #include #include @@ -758,8 +755,10 @@ void DAGQueryBlockInterpreter::executeLimit(DAGPipeline & pipeline) void DAGQueryBlockInterpreter::executeExpand(DAGPipeline & pipeline, const ExpressionActionsPtr & expr) { + String expand_extra_info = fmt::format("expand: grouping set {}", expr->getActions().back().expand->getGroupingSetsDes()); pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expr, log->identifier()); + stream->setExtraInfo(expand_extra_info); }); } diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp index ddf7a959d61..a710ac0fc40 100644 --- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp +++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp @@ -39,8 +39,6 @@ PhysicalPlanNodePtr PhysicalExpand::build( { assert(child); - child->finalize(); - if (unlikely(expand.grouping_sets().empty())) { //should not reach here @@ -55,7 +53,6 @@ PhysicalPlanNodePtr PhysicalExpand::build( // include expand action itself. before_expand_actions->add(expand_action); - // construct sample block. NamesAndTypes expand_output_columns; auto child_header = child->getSchema(); for (const auto & one : child_header) @@ -87,7 +84,6 @@ void PhysicalExpand::expandTransform(DAGPipeline & child_pipeline) void PhysicalExpand::buildPipelineExec(PipelineExecGroupBuilder & group_builder, Context &, size_t) { - auto input_header = group_builder.getCurrentHeader(); group_builder.transform([&](auto & builder) { builder.appendTransformOp(std::make_unique(group_builder.exec_status, expand_actions, log->identifier())); }); @@ -106,7 +102,6 @@ void PhysicalExpand::finalize(const Names & parent_require) required_output.emplace_back(Expand::grouping_identifier_column_name); expand_actions->finalize(required_output); - // do the child finalize before require column changed after expand_action finalization. child->finalize(expand_actions->getRequiredColumns()); FinalizeHelper::prependProjectInputIfNeed(expand_actions, child->getSampleBlock().columns()); FinalizeHelper::checkSampleBlockContainsParentRequire(getSampleBlock(), parent_require); diff --git a/dbms/src/Flash/tests/gtest_expand_executor.cpp b/dbms/src/Flash/tests/gtest_expand_executor.cpp index 6401edbc424..451a9430e98 100644 --- a/dbms/src/Flash/tests/gtest_expand_executor.cpp +++ b/dbms/src/Flash/tests/gtest_expand_executor.cpp @@ -404,33 +404,6 @@ try toNullableVec({{}, {}}), toVec({2, 2}), }); - - - /// assert the input stream plan format. (under planner-enabled mode) - String expected = R"( -CreatingSets - Union: - HashJoinBuild x 10: , join_kind = Inner - Expression: - Expression: - Expression: - MockTableScan - Union: - Expression x 10: - SharedQuery: - MergeSorting, limit = 2 - Union: - PartialSorting x 10: limit = 2 - Expression: - Expression: - HashJoinProbe: - Expression: - Expression: }{}]> - Expression: - SharedQuery: - ParallelAggregating, max_threads: 10, final: true - MockExchangeReceiver x 10)"; - ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } CATCH diff --git a/dbms/src/Flash/tests/gtest_interpreter.cpp b/dbms/src/Flash/tests/gtest_interpreter.cpp index e129c5587a5..634d175a9b0 100644 --- a/dbms/src/Flash/tests/gtest_interpreter.cpp +++ b/dbms/src/Flash/tests/gtest_interpreter.cpp @@ -394,5 +394,30 @@ try } } CATCH + +TEST_F(InterpreterExecuteTest, ExpandPlan) +try +{ + { + auto request = context + .receive("sender_1") + .aggregation({Count(col("s1"))}, {col("s2")}) + .expand(MockVVecColumnNameVec{ + MockVecColumnNameVec{ + MockColumnNameVec{"count(s1)"}, + }, + MockVecColumnNameVec{ + MockColumnNameVec{"s2"}, + }, + }) + .join(context.scan("test_db", "test_table").project({"s2"}), tipb::JoinType::TypeInnerJoin, {col("s2")}) + .project({"count(s1)", "groupingID"}) + .topN({{"groupingID", true}}, 2) + .build(context); + runAndAssert(request, 10); + } +} +CATCH + } // namespace tests } // namespace DB diff --git a/dbms/src/Flash/tests/gtest_interpreter.out b/dbms/src/Flash/tests/gtest_interpreter.out index 65a46dc9842..58e48ec0db4 100644 --- a/dbms/src/Flash/tests/gtest_interpreter.out +++ b/dbms/src/Flash/tests/gtest_interpreter.out @@ -461,3 +461,30 @@ Union: Filter: MockTableScan @ +~test_suite_name: ExpandPlan +~result_index: 0 +~result: +CreatingSets + Union: + HashJoinBuild x 10: , join_kind = Inner + Expression: + Expression: + Expression: + Expression: + MockTableScan + Union: + SharedQuery x 10: + Expression: + MergeSorting, limit = 2 + Union: + PartialSorting x 10: limit = 2 + Expression: + Expression: + Expression: + HashJoinProbe: + Expression: + Expression: }{}]> + SharedQuery: + ParallelAggregating, max_threads: 10, final: true + MockExchangeReceiver x 10 +@ diff --git a/dbms/src/Flash/tests/gtest_planner_interpreter.cpp b/dbms/src/Flash/tests/gtest_planner_interpreter.cpp index f4fd9be7613..5b9bea7b28f 100644 --- a/dbms/src/Flash/tests/gtest_planner_interpreter.cpp +++ b/dbms/src/Flash/tests/gtest_planner_interpreter.cpp @@ -499,5 +499,30 @@ try } CATCH + +TEST_F(PlannerInterpreterExecuteTest, ExpandPlan) +try +{ + { + auto request = context + .receive("sender_1") + .aggregation({Count(col("s1"))}, {col("s2")}) + .expand(MockVVecColumnNameVec{ + MockVecColumnNameVec{ + MockColumnNameVec{"count(s1)"}, + }, + MockVecColumnNameVec{ + MockColumnNameVec{"s2"}, + }, + }) + .join(context.scan("test_db", "test_table").project({"s2"}), tipb::JoinType::TypeInnerJoin, {col("s2")}) + .project({"count(s1)", "groupingID"}) + .topN({{"groupingID", true}}, 2) + .build(context); + runAndAssert(request, 10); + } +} +CATCH + } // namespace tests } // namespace DB diff --git a/dbms/src/Flash/tests/gtest_planner_interpreter.out b/dbms/src/Flash/tests/gtest_planner_interpreter.out index 8c46e98de28..549cdc35503 100644 --- a/dbms/src/Flash/tests/gtest_planner_interpreter.out +++ b/dbms/src/Flash/tests/gtest_planner_interpreter.out @@ -709,3 +709,30 @@ Union: Filter MockTableScan @ +~test_suite_name: ExpandPlan +~result_index: 0 +~result: +CreatingSets + Union: + HashJoinBuild x 10: , join_kind = Inner + Expression: + Expression: + Expression: + MockTableScan + Union: + Expression x 10: + SharedQuery: + MergeSorting, limit = 2 + Union: + PartialSorting x 10: limit = 2 + Expression: + Expression: + HashJoinProbe: + Expression: + Expression: }{}]> + Expression: + SharedQuery: + ParallelAggregating, max_threads: 10, final: true + Expression x 10: + MockExchangeReceiver +@ diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp index 45b86951041..fa36b41e893 100644 --- a/dbms/src/Interpreters/Expand.cpp +++ b/dbms/src/Interpreters/Expand.cpp @@ -92,8 +92,10 @@ void Expand::replicateAndFillNull(Block & block) const auto grouping_id_column = ColumnUInt64::create(); auto & grouping_id_column_data = grouping_id_column->getData(); // reserve N times of current block rows size. - grouping_id_column_data.reserve(block.rows() * replicate_times_for_one_row); + grouping_id_column_data.resize(origin_rows * replicate_times_for_one_row); + // manipulate the data directly to avoid the virtual function overheads. + size_t grouping_id_column_index = 0; for (size_t i = 0; i < origin_rows; ++i) { current_offset += replicate_times_for_one_row; @@ -103,8 +105,9 @@ void Expand::replicateAndFillNull(Block & block) const for (UInt64 j = 0; j < replicate_times_for_one_row; ++j) { // start from 1. - Field grouping_id = j + 1; - grouping_id_column->insert(grouping_id); + auto grouping_id = j + 1; + grouping_id_column_data[grouping_id_column_index] = grouping_id; + ++grouping_id_column_index; } } @@ -125,8 +128,7 @@ void Expand::replicateAndFillNull(Block & block) const convertColumnToNullable(block.getByPosition(i)); if (!offsets_to_replicate->empty()) - // replicate it. - block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); + column.column = column.column->replicate(*offsets_to_replicate); } } @@ -191,12 +193,7 @@ void Expand::replicateAndFillNull(Block & block) const bool Expand::isInGroupSetColumn(String name) const { - for (const auto & it1 : name_set) - { - if (it1 == name) - return true; - } - return false; + return name_set.find(name) != name_set.end(); } const GroupingColumnNames & Expand::getGroupSetColumnNamesByOffset(size_t offset) const diff --git a/dbms/src/Interpreters/Expand.h b/dbms/src/Interpreters/Expand.h index 51c3ae0422b..076abe7b83b 100644 --- a/dbms/src/Interpreters/Expand.h +++ b/dbms/src/Interpreters/Expand.h @@ -108,23 +108,23 @@ class Expand // to illustrate what group this row is targeted for. void replicateAndFillNull(Block & block) const; - size_t getGroupSetNum() const { return group_sets_names.size(); } - bool isInGroupSetColumn(String name) const; - const GroupingColumnNames & getGroupSetColumnNamesByOffset(size_t offset) const; - const std::set & getAllGroupSetColumnNames() const; String getGroupingSetsDes() const; - void collectNameSet(); - static const String grouping_identifier_column_name; static const DataTypePtr grouping_identifier_column_type; private: + void collectNameSet(); + + size_t getGroupSetNum() const { return group_sets_names.size(); } + + const GroupingColumnNames & getGroupSetColumnNamesByOffset(size_t offset) const; + GroupingSets group_sets_names; std::set name_set; }; diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index c25e8e7193a..83090f675ed 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -96,7 +96,6 @@ struct ExpressionAction /// For EXPAND. std::shared_ptr expand; - NamesAndTypesList columns_added_by_expand; /// If result_name_ == "", as name "function_name(arguments separated by commas) is used". static ExpressionAction applyFunction( diff --git a/dbms/src/Interpreters/tests/gtest_block_expand.cpp b/dbms/src/Interpreters/tests/gtest_block_expand.cpp index e85cb0d7185..c5756d3e94a 100644 --- a/dbms/src/Interpreters/tests/gtest_block_expand.cpp +++ b/dbms/src/Interpreters/tests/gtest_block_expand.cpp @@ -76,7 +76,7 @@ try ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID"); // assert the block size is equal to origin rows * grouping set num. auto expand_rows = block.rows(); - auto grouping_set_num = expand.getGroupSetNum(); + auto grouping_set_num = 2; ASSERT_EQ(origin_rows * grouping_set_num, expand_rows); // 6 // assert grouping set column are nullable. ASSERT_EQ(block.getColumns()[0].get()->isColumnNullable(), true); @@ -186,7 +186,7 @@ try ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID"); // assert the block size is equal to origin rows * grouping set num. auto expand_rows = block.rows(); - auto grouping_set_num = expand.getGroupSetNum(); + auto grouping_set_num = 2; ASSERT_EQ(origin_rows * grouping_set_num, expand_rows); // 6 // assert grouping set column are nullable. ASSERT_EQ(block.getColumns()[0].get()->isColumnNullable(), false); @@ -283,7 +283,7 @@ try ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID"); // assert the block size is equal to origin rows * grouping set num. auto expand_rows = block.rows(); - auto grouping_set_num = expand.getGroupSetNum(); + auto grouping_set_num = 3; ASSERT_EQ(origin_rows * grouping_set_num, expand_rows); // 9 // assert grouping set column are nullable. ASSERT_EQ(block.getColumns()[0].get()->isColumnNullable(), false); @@ -392,7 +392,7 @@ try ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID"); // assert the block size is equal to origin rows * grouping set num. auto expand_rows = block.rows(); - auto grouping_set_num = expand.getGroupSetNum(); + auto grouping_set_num = 3; ASSERT_EQ(origin_rows, 0); ASSERT_EQ(origin_rows * grouping_set_num, expand_rows); // 0 // assert grouping set column are nullable. diff --git a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp index 768f514a66f..a000bf3f87b 100644 --- a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp +++ b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp @@ -332,7 +332,8 @@ try }) .build(context); { - String expected = "expand_1 | expanded_by: [<{<0, String>}><{<1, String>}>]\n table_scan_0 | {<0, String>, <1, String>}"; + String expected = "expand_1 | expanded_by: [<{<0, String>}><{<1, String>}>]\n" + " table_scan_0 | {<0, String>, <1, String>}"; ASSERT_DAGREQUEST_EQAUL(expected, request); } request = context.receive("sender_1").expand(MockVVecColumnNameVec{ @@ -345,7 +346,8 @@ try }) .build(context); { - String expected = "expand_1 | expanded_by: [<{<0, String>}><{<1, String>}>]\n exchange_receiver_0 | type:PassThrough, {<0, String>, <1, String>, <2, String>}"; + String expected = "expand_1 | expanded_by: [<{<0, String>}><{<1, String>}>]\n" + " exchange_receiver_0 | type:PassThrough, {<0, String>, <1, String>, <2, String>}"; ASSERT_DAGREQUEST_EQAUL(expected, request); } } From 6c12fd9c3b74927eddb8f2ae8db5c20d843f5539 Mon Sep 17 00:00:00 2001 From: AilinKid <3148019@qq.com> Date: Wed, 22 Feb 2023 15:03:50 +0800 Subject: [PATCH 31/31] add test for gtest_pipeline_interpreter Signed-off-by: AilinKid <3148019@qq.com> --- .../Coprocessor/DAGQueryBlockInterpreter.cpp | 1 - .../tests/gtest_pipeline_interpreter.cpp | 24 ++++++++++ .../tests/gtest_pipeline_interpreter.out | 27 +++++++++++ dbms/src/Interpreters/Expand.cpp | 5 +-- dbms/src/Storages/DeltaMerge/DeltaTree.h | 45 +++++++++++++++---- 5 files changed, 88 insertions(+), 14 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 3db44c4a279..f6e7e69ce44 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -75,7 +75,6 @@ struct AnalysisResult ExpressionActionsPtr before_where; ExpressionActionsPtr before_aggregation; ExpressionActionsPtr before_having; - // ExpressionActionsPtr before_order_and_select; ExpressionActionsPtr before_order; ExpressionActionsPtr before_expand; ExpressionActionsPtr before_select; diff --git a/dbms/src/Flash/tests/gtest_pipeline_interpreter.cpp b/dbms/src/Flash/tests/gtest_pipeline_interpreter.cpp index 1a5478cbdf1..828e14e36ff 100644 --- a/dbms/src/Flash/tests/gtest_pipeline_interpreter.cpp +++ b/dbms/src/Flash/tests/gtest_pipeline_interpreter.cpp @@ -521,5 +521,29 @@ try } CATCH +TEST_F(PipelineInterpreterExecuteTest, ExpandPlan) +try +{ + { + auto request = context + .receive("sender_1") + .aggregation({Count(col("s1"))}, {col("s2")}) + .expand(MockVVecColumnNameVec{ + MockVecColumnNameVec{ + MockColumnNameVec{"count(s1)"}, + }, + MockVecColumnNameVec{ + MockColumnNameVec{"s2"}, + }, + }) + .join(context.scan("test_db", "test_table").project({"s2"}), tipb::JoinType::TypeInnerJoin, {col("s2")}) + .project({"count(s1)", "groupingID"}) + .topN({{"groupingID", true}}, 2) + .build(context); + runAndAssert(request, 10); + } +} +CATCH + } // namespace tests } // namespace DB diff --git a/dbms/src/Flash/tests/gtest_pipeline_interpreter.out b/dbms/src/Flash/tests/gtest_pipeline_interpreter.out index 11a0cddabda..a59672f7809 100644 --- a/dbms/src/Flash/tests/gtest_pipeline_interpreter.out +++ b/dbms/src/Flash/tests/gtest_pipeline_interpreter.out @@ -604,3 +604,30 @@ Union: Filter MockTableScan @ +~test_suite_name: ExpandPlan +~result_index: 0 +~result: +CreatingSets + Union: + HashJoinBuild x 10: , join_kind = Inner + Expression: + Expression: + Expression: + MockTableScan + Union: + Expression x 10: + SharedQuery: + MergeSorting, limit = 2 + Union: + PartialSorting x 10: limit = 2 + Expression: + Expression: + HashJoinProbe: + Expression: + Expression: }{}]> + Expression: + SharedQuery: + ParallelAggregating, max_threads: 10, final: true + Expression x 10: + MockExchangeReceiver +@ diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp index fa36b41e893..3910efec76a 100644 --- a/dbms/src/Interpreters/Expand.cpp +++ b/dbms/src/Interpreters/Expand.cpp @@ -94,7 +94,6 @@ void Expand::replicateAndFillNull(Block & block) const // reserve N times of current block rows size. grouping_id_column_data.resize(origin_rows * replicate_times_for_one_row); - // manipulate the data directly to avoid the virtual function overheads. size_t grouping_id_column_index = 0; for (size_t i = 0; i < origin_rows; ++i) { @@ -105,9 +104,7 @@ void Expand::replicateAndFillNull(Block & block) const for (UInt64 j = 0; j < replicate_times_for_one_row; ++j) { // start from 1. - auto grouping_id = j + 1; - grouping_id_column_data[grouping_id_column_index] = grouping_id; - ++grouping_id_column_index; + grouping_id_column_data[grouping_id_column_index++] = j + 1; } } diff --git a/dbms/src/Storages/DeltaMerge/DeltaTree.h b/dbms/src/Storages/DeltaMerge/DeltaTree.h index ad3fd32b3b1..b51575ba732 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaTree.h +++ b/dbms/src/Storages/DeltaMerge/DeltaTree.h @@ -892,8 +892,14 @@ class DeltaTree } public: - DeltaTree() { init(std::make_shared()); } - explicit DeltaTree(const ValueSpacePtr & insert_value_space_) { init(insert_value_space_); } + DeltaTree() + { + init(std::make_shared()); + } + explicit DeltaTree(const ValueSpacePtr & insert_value_space_) + { + init(insert_value_space_); + } DeltaTree(const Self & o); DeltaTree & operator=(const Self & o) @@ -954,10 +960,19 @@ class DeltaTree check(root, true); } - size_t getBytes() { return bytes; } + size_t getBytes() + { + return bytes; + } - size_t getHeight() const { return height; } - EntryIterator begin() const { return EntryIterator(left_leaf, 0, 0); } + size_t getHeight() const + { + return height; + } + EntryIterator begin() const + { + return EntryIterator(left_leaf, 0, 0); + } EntryIterator end() const { Int64 delta = isLeaf(root) ? as(Leaf, root)->getDelta() : as(Intern, root)->getDelta(); @@ -971,11 +986,23 @@ class DeltaTree return std::make_shared>(left_leaf, num_entries, delta); } - CompactedEntriesPtr getCompactedEntries() { return std::make_shared(begin(), end(), num_entries); } + CompactedEntriesPtr getCompactedEntries() + { + return std::make_shared(begin(), end(), num_entries); + } - size_t numEntries() const { return num_entries; } - size_t numInserts() const { return num_inserts; } - size_t numDeletes() const { return num_deletes; } + size_t numEntries() const + { + return num_entries; + } + size_t numInserts() const + { + return num_inserts; + } + size_t numDeletes() const + { + return num_deletes; + } void addDelete(UInt64 rid); void addInsert(UInt64 rid, UInt64 tuple_id);