From b70415dbd65d8886396c11087d97dca20615c82d Mon Sep 17 00:00:00 2001
From: AilinKid <314806019@qq.com>
Date: Tue, 6 Dec 2022 14:30:34 +0800
Subject: [PATCH 01/31] repeat inital draft

Signed-off-by: AilinKid <314806019@qq.com>
---
 a.out                                         | Bin 0 -> 85490 bytes
 dbms/src/Columns/ColumnArray.cpp              |   1 +
 dbms/src/Columns/ColumnNullable.h             |   2 +-
 dbms/src/Columns/ColumnsCommon.cpp            |   2 +-
 dbms/src/Common/COWPtr.h                      |   1 +
 dbms/src/Common/HashTable/HashTable.h         |   4 +-
 dbms/src/Common/TiFlashMetrics.h              |   3 +-
 dbms/src/Core/Block.h                         |   3 +-
 dbms/src/Core/ColumnWithTypeAndName.h         |   2 +
 dbms/src/Core/ColumnsWithTypeAndName.h        |   1 +
 .../RepeatSourceBlockInputStream.cpp          |  34 +++
 .../RepeatSourceBlockInputStream.h            |  45 ++++
 dbms/src/DataStreams/SquashingTransform.cpp   |   9 +-
 .../Debug/MockExecutor/RepeatSourceBinder.cpp |  67 +++++
 .../Debug/MockExecutor/RepeatSourceBinder.h   |  42 ++++
 .../Coprocessor/DAGExpressionAnalyzer.cpp     |  69 ++++-
 .../Flash/Coprocessor/DAGExpressionAnalyzer.h |   4 +
 .../DAGExpressionAnalyzerHelper.cpp           |   7 +-
 dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp  |   8 +
 dbms/src/Flash/Coprocessor/DAGQueryBlock.h    |   4 +-
 .../Coprocessor/DAGQueryBlockInterpreter.cpp  |  59 +++--
 .../Coprocessor/DAGQueryBlockInterpreter.h    |   1 +
 dbms/src/Flash/Coprocessor/DAGUtils.cpp       |   2 +-
 dbms/src/Flash/Coprocessor/InterpreterDAG.cpp |   2 +-
 .../Flash/Coprocessor/InterpreterUtils.cpp    |   2 +-
 .../Coprocessor/JoinInterpreterHelper.cpp     |  19 +-
 .../Coprocessor/collectOutputFieldTypes.cpp   |  31 +++
 dbms/src/Flash/Mpp/MPPHandler.cpp             |   1 +
 dbms/src/Flash/Mpp/MPPTask.cpp                |   4 +
 dbms/src/Flash/Mpp/MPPTaskScheduleEntry.h     |   1 +
 dbms/src/Flash/Mpp/MPPTunnelSet.cpp           |   2 +
 .../Flash/Statistics/traverseExecutors.cpp    |   2 +
 .../src/Flash/tests/gtest_filter_executor.cpp |   9 +
 dbms/src/Flash/tests/gtest_interpreter.cpp    |   7 +
 dbms/src/Interpreters/ExpressionActions.cpp   |  53 +++-
 dbms/src/Interpreters/ExpressionActions.h     |   8 +
 dbms/src/Interpreters/Join.cpp                |  38 ++-
 dbms/src/Interpreters/Join.h                  |   4 +-
 dbms/src/Interpreters/NullableUtils.cpp       |   2 +
 dbms/src/Interpreters/Repeat.cpp              | 231 +++++++++++++++++
 dbms/src/Interpreters/Repeat.h                | 139 ++++++++++
 dbms/src/Interpreters/sortBlock.cpp           |   3 +-
 .../Interpreters/tests/gtest_block_repeat.cpp | 238 ++++++++++++++++++
 dbms/src/TestUtils/FunctionTestUtils.cpp      |   4 +
 dbms/src/TestUtils/executorSerializer.cpp     |  30 +++
 dbms/src/TestUtils/mockExecutor.cpp           |  24 ++
 dbms/src/TestUtils/mockExecutor.h             |   5 +
 tai.cpp                                       |  64 +++++
 tai.h                                         |  14 ++
 49 files changed, 1240 insertions(+), 67 deletions(-)
 create mode 100755 a.out
 create mode 100644 dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp
 create mode 100644 dbms/src/DataStreams/RepeatSourceBlockInputStream.h
 create mode 100644 dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp
 create mode 100644 dbms/src/Debug/MockExecutor/RepeatSourceBinder.h
 create mode 100644 dbms/src/Interpreters/Repeat.cpp
 create mode 100644 dbms/src/Interpreters/Repeat.h
 create mode 100644 dbms/src/Interpreters/tests/gtest_block_repeat.cpp
 create mode 100644 tai.cpp
 create mode 100644 tai.h

diff --git a/a.out b/a.out
new file mode 100755
index 0000000000000000000000000000000000000000..8aed9644943b125062ea04d62dd9d638ddaf2013
GIT binary patch
literal 85490
zcmeHw4SZC^)%V=p1QrD4BMK@X0ipznkc1Etq`H^{69^C?B44tEWFgUfnJgcQ)hJZy
zqgK*VODla0NNuoaeXVVwueP*7i<T--Yi&#W1f(@oY*DdIe2BdNGjr}{?%myF^X2>W
zeK$WYbN~0w$C)!{&YU?jclOOsKlp5b5Qe~?1J@8-bx}fmsb&m45|`u3#^v$k&AcgV
zY1V@I^wjz%6I&kzAj;<*MD%#_vli!DU(570)@OooG#;B5#qfB1-VHvwQ-AjS_Fb(i
zGb2yECrtJ7|3#TOt0x{$wQudJYX56{e&^Py{LXMC3G4Z}I6v@E{&_rW3JX2n4TauH
zpT}3Q%AQ~229@6t&V;a@pPvwaeudS(s*>_Ger)C6^BYxumHeKt{%6nI<5^q2zNEa!
zQ&PUV0uk->#onXxYjmny2;1|}^QU&*F7wHpnLpE$F@N4d)w(q*AK}JJ`m2%u3lmfJ
zbP-zz;R4?(v}Oeko?i{O1LemgBDD~Ga7~bR>c2L*&CQuNCo40@l(_>;7ee{R;YW`u
zNZFf3(32EUPV|WbLNxXhVhZEN*MvxmQoI?C*Bjmxq5=tzyodS}3ULvhu?UJ8cup7f
zt$7H?APn_a{8u5C1bKNep2gxKIh(w;x@vN1$*Rdkca;_a)8Cc2h>qWVJM)#pznc1w
zeXEz1y&U)Uh-5sA#f4(=ht$pi@?P=oQ^z%UD(ffT3lWd{Q9fJ8^8*g3+(caTPRhfu
z7<5acjTX>TIC0{nq)8yEWR;{dO^fd=8XXXS;a$}}Z`q_9ISFD5f$l2l8UBV4D?v21
zDbYv#rN5E52sh3)#1LHo=w;+D!hr|}A{>ZtAi{wN2O=DZa3I2g2nQk@h;Sgnfd~g8
z9Eflr!hr|}A{>ZtAi{wN2O=DZa3I2g2nQk@h;Sgnfd~g89Efn>--ZJhGK{0G8>9BL
zI*r;o=ji60n?~VU(cG9JY8(5CmVrWSYwYW8F+|O=VYqK{if0JB4EO%}hSB@5UuIif
zgJ)ms#>_p)tM-7?^Y(=Xfjk|1S~rQ>x=o{-Ph2Eg>Na_rDZdjJxm%7KD$mx9H|{}P
zl_o<RmG7ORrAg)Aa@y%`5&il-d;zq`d^b_P#-3KDjp)Gss+PN>ksso=Zk)O20?L$e
zGop{4uNb<gbz=<b7F|np$6{NNSdKIkMGN-B9GOtyZaF<!wA43@+PBj=s(FkM&yP7U
zsdh|^*e35AC)JL^{fWu$mXoMwEO^j3MYLRnXLXxKHrF}9JL1n2cS|guHBJ-YlZa`W
z=5DFqbY*khre)1npspuSwi%b+)BWZAym6wt<-MUuJ6@%wvKq&`TV6%JX57mVx30lB
z+Bi<MNV*11t{qM3$GKbT#yO6@g7h*j?i;V9d}@xx(fcb=KWZ<Ov87MVf_Ouue?Vo8
z*2)^~ZuuUUMe%l`oKqBMm=<T4yJeRdXOp8g0eMq-jpH1(lAhSfwg0WXKY{0yv^aH4
zUt@!z;-^fm{f8bO&&Qhaalg%skFpwvs`{L`M6^tyI$h#!`2cAtlyw{GZi(l*O~m`D
z6o=}J=L5|=kq_}?ocRv*{2tX2a!K`&Jio}@Qlil`hU8s~|0KnS{QBcV_HNVS&qVx-
zwD@16_>fJ1e8}g`TKqvw8@1nJt}nImEx5<lu?4Lg^Y@&uScpCqE#<k%X+VaqY_4y(
zVjsy*E$4;pWik4@(Kd`eYk7Z(4O1JYarrL$JG4a-zoU9If{)Y>=$<a3eS-F$=)X$G
z&SlO28X~sci8R4<LOpas6L{JL-lKoEG$QPW^e4cplhlUb@i<_Mp4ep70ch(Mir0iR
z!nomWiw>~GCA~uBP}t9Kw@kQyMYAyaJv`H>SunpZ>o(+Lj1)uv3A!m?qMyR{@}>9G
z)(;_%06o+YLKh;wD+=_W{#pG{Hqj(Vul9TYfDrkT-=Kx`<xO~xO@A?zH{nuXdT-%N
zhSHxzi(g;zv(tDMAC3B=&ZzJE{`WTinDux&NIhCN{%p?&#0y-(+EUl=y9by&)AYNo
zEVJFdkGQh!gzI4oj~fE{X37i6$r-8hmCU@0Jan0P95h;ZV=LRN=LMn@Jng7lSvRe{
zUc9*d_WBXt>+SVN;85*Fe9*_c{?3|X6TaC036e{HJ7olE+Yoh{GcZV<pf6fCI`_0T
zU_8bcPr9_uiSZa^)A)?Npxc_DPhE!iFvp?E9-ifbmws6Up2L`Qh19>`AL*ad(4|e6
zi2V;DFRDB7taW3?o(oQ86UaQ!ZXXX8+sZCn_=d&q01cAI$YTf6*AYE5W}@Epp!aFQ
zNGlA{@=fzyA>P4OdD46rjs7%K)KY)i%5kWjPi(?7j3FoQ6Sb6<WXEEw=)3_PJ5ioV
zOFejmI<>qBnwdZEf7$Gt$mbI1aLR8K!kvga7$MP3&rUmC&-9A|O@c0W%O1RU!ImNV
zMo>J^Ci#cB7?<Uk9gF8W&n`fpwr=bLI|{ZL)3y_QC3|WY@~eAF94&K*ZS}C%nlSFy
zI}!4@-GnjwG{%3@C&I~Xin2Z?9)N~MyiY<}IaY#}+i_2AL>HxX4F=zk7v0yxHac_R
z!Z2!Q8jqVsi~Uq~G1By*w#K~|_pKXe!JgCF8)NW#@b1IUZ0!vmo4h4H?LpmK&`s1%
zW4P_=xP39cZEHe4kj?jFP1%GT`cPgY!k^*2tOM?eFT`u&@dfbm1nMQn5!Abi>!;*W
zvsE)X=v(6i*$!XDKKQ_KfYV~UKIZSM4S{;=B<PVch5knR`y}X+`AOZQ>Bt~yWjds%
zbm&)1gO-MAF!d?M|2F*}M7B>to=rVUIsp68{W5^P@sg(_xE$og<+b%=)Kl8&mT@Ks
z-DVlo7d}RR{~<2fH&A!#6Bhm4L7rr%Q9dNML3yyrY7?0HUB;C>*5?I5=+DsRK#)b^
zIX>6eR)wn`w2?fK-n<{!ti#SirGHisk3gU7H=t2Kj&=Q__Zf;qcIhbe3CvRh<}%+O
z6!_e1R~p~Gjj}9bl`Rj_DFQk$O6n1<&LQHVPg(P`@xj!Eb<ndzsXviUhOVK0M!Ju9
zK^S4nr4lQhM$e?KgRDjC@xVv2kxkv_kB5Fw^9R{C(2w5*kIcMDZ$?3HzAx*p_1lKP
zen#y^d64{C<jYonl8>oj@)F-_Sa?KsP^(ka;+qUS|9QQfYihKRES#YJiZV`umb$*;
zD9xcUrlm?5!E@plK6!~T?OrJtc3l9P8^J%~HR%tE>w;{NEk|}7>Gh`ZPhNpBaY);o
zSIM>z&0qEFqabxh*_Jv5>(fG<{c5PXQD3Asq<ZX;{hH_z@9*KU5%cV=B71V}P?VDj
znQqkTE<U;UH@Dy!jc0v{2grw>-B0r~;uD?|Kd3D&ZK&~>;s{60v00QiXrM816w;b}
z97FT?t?rhw2ycvq&4PHrY?T}FT&6*Netn79=EA&-_$ljc<D1zJ<Ltb%(M9dmq0ZLQ
z#DhlH*TFxwU8rvUJWw~1=l>1b8X+IVs{q|;qmyL274h5C6BH*L83?AM!b*qwk^kTX
z*<Fy~cfpfWkYT^C$40Z10mwA<laA=_XOEK+M4#L8H^+niNJ~B?vJuQa^kvAM82H+^
z+ACMmM|#y>c93!c;sm3wAy9V(vk}O*M(u9t2evXzxuo#{{jNj)QGX$OU*&5ZBXoU3
zww2U57^`I%4ZgviHSb9ek&bt{(5|qtWuM`?5=~8@2R^F(em$h|f#$t5*Nu<XY!zF*
zsLc$-8EmgpLulT~@dN3ulWBAt$vVE%xF+or$fXPQqIm@A0kTQ=(AWjtT4UOp=zmwk
zekI$3>})Yud`RQrD(F2L4~b6rRHPm`YqOE1ZoybK7X9%w@}c~x&bobI?_+v<hLR<U
z<L7G#8}A*=In6Pr4&{CV4EqkgN1D@sx6ru{K=#T0BRZunCi@;dp?RFC7e7Wi>zt&0
z{wta_-rL&MtY1LgZ1IS%hh*7S84c$D1?^Iv-#d?XlKn+XW3y>UH<K;*UuaWmKjH_?
zhyMsHZ4TlO_<KTPwrB86+B48+NA-A+iOWzn=?5BLC@+df^7KnR9(ej|!jJ*-Tb_hW
zJOx=Y(~y1h1H8A$MTXLAy6=YM($rV}_TS`y9oY{)afg47xC`aem`L-o+2A3yCEedm
ze7Z@sZPQKe{qg;kO+s-S#~HOWFO~B$_=l-~^fTIgWaOW1%;2;c?*08Pt62bg`n&EI
zcb?#JU+X^)(|i<dEc+DQ6Aew6k2Zl9mU!U3UKa9vnC5}{GxCLyt=c%5%b)Dte+oQ&
zmv~7$g%6v2=F#xmWm6r|F0wqd3;9axog?Jj9DQ2aVD`LsV%#TxIi<n(#xcsDIugFt
z5&JMESupXj89o-8_fmYq=YZ)ReMvoY-qt)8e7Bbq&22HpY!mRfbz=_JEF7ATSCu=Q
z<H$0Q-*-^P=>Nt%dK2|O>l(;qh-(?YGL-C)T*78d7i1%5m}@9bq#p=*ar)PLb~#5i
zKZW&--OiEC-}!h%nv?;I-`Ar*sXn22_*as3@(sY&+o`pAJ#3Z-QTO^yxB2y_T>k*w
ze?}VVw}vlx57t9ShCkr6gzNE~p4G#*jn9x{euj|hOuh_L7uRpP0{VI>;)xF*WxY&&
zjn=7l;rXvnhI}tv@2@nJuj3Mv_Dw|l70o}EbSS?A(Yut}N|i4@{B<9dZYSv3_3`nv
z#zLc(bYTP1k-aN<0$Ii9)P8@%F2vnS?;)>5Gx;g@aD7al%LiyPT^<{qMs3qL(egD)
z2R$RxL1&Sli^?F{;vtVsH;DbPb)@VyPJ~XLFZL5nb0G(?V`47gJ*A~**?87;gL{85
zKC`E1u%lxfxURw_+XTFnJPyT2yeB^11)A)9d>QnU3^X>tAB2AFMI6$fCathX{W1?f
z$uaW1neT`eWyjBjj60=_H%@f7d<4I1>8Ft8hQP5zx1DJ%hsFVm{iDgY=;v;;FKqpS
zWS{gnl|{CQQS07+2KTXs(g8G17*RC~Zbli>CLAG##@Dib!8;e?<0JW@mVO0Y;`gJX
zov44Kf5~#HZS8FOF3HbB<x`(&8m8uJ`gkh+1CT5D+EjZ&r(K5jwD)t&DfbgEFt6MX
z84uduEq*ASPXpa;_CgSzo3^KEKaqXxN+Mqdt*2;Xd9buMh;-Rz;I&MD!-4cGP-o(=
zC9SP}s4jOP9?6r1$B1Xzp{K!%c+gJ%7aBX`2f`18bbfoYBYLhAwI`>ETADKuJtlqY
z{C@uq>V|SHbwk;0J|!)!O)kj3wENj^lJY@zFK9u%G8ogGM9H|##|C~9&3?VD)s^&8
z@A|`n%&D~UX^txS0y@a{J;3vOq6z0($nh6^iXlw%a#R1QdTM+_TieGn^4++WyVG69
zT^}{|HT#QZ&jm?CGziG7U5@N|Q`_O}kN|nJ#|yAcGlK9V2pwUi3nD9|M@UzZ-qdX_
zs+XU4kOeuOOW$dmtOU~^eIVa<{Xx7BMIYrWF4bg>>Weu5{1E#wSJ;oa%YKX>cbNLi
zlu76t<txds&9ACQr;&Wr7b@TBmWz~(OP@DnMvhyMjh)s1m8N{mO81e^|5?NZJ*<1%
z{C&C~mEt{xc$AOK3*+u>z`tt_ZXb<-KWa>oyJacjv?n7cMvHXf(}~ds(xna@sdS*g
z*wAh|*3GV1@CvjWM!#zw?Vv0whsOUleWJ;@jh|*;`)$jGZ~Pt?jmPHrBKPEgm%<P&
z$IN%&56wYvzKdEnixz(W(rX+?))Vt~b8SrOA><Qm4q_Qs8Un{`bNrzCm}5v=y|n%w
zME31^vI+Fjyo~5Gb!9W25x!LR2coe}{)5>NvOht0lm2-NY2RpX?Ln(guj|!Kf%=Yk
zX3k%yQ2o%)EwUCw7Rdfugt+?rbpf?C&p}C-ES6ZCm-4)d=12Z`;L*SS1$hE*&ABN0
zxtzaVug625%_Izd2A#hm%{6#$l^sndfL_vVw3cdL@4JfS(OmB<q_#qTCVNR=@1y$A
zxLJl!$_MVrPin6BQTh6MAMuBD1N9vXU1;zBiM~nL(H7q%?bEQxflYR#&hAN{q%D8y
zZ{&X?AEbp2@Xx-t&+PwZ-(MrsX#L)nw@EwAH!OMEWWBR~y1znpXuOnux@hI6d$JRL
zy0rd1<fpUM7xF4?xq$K47LRxya!jzrB_2{g7g(sFJ&lm{BV!gP)_$b+2Av(V(%tgI
ze+Kb~h0wu=kxus)o(8t~3tQd48B4x$*savBLir2dLpqCow$V*>4B$gX$M%&eGnnIA
zWo8rUnP~VkY<nb#=TC`RqPYZVNUyFS`GC%7gdgg1mKE|NY{Gk8ca#EC9VtEetX$D*
z?t8B+I}3AJwZ}oXNhmIkG_=?xY2d{RXoCw*^3nCh{+l)$<(Li{(?JL6W1^47?5`od
zi4BxX^J%H?@SfHcAI5u=R`Mazy!-{kk$XU(UmM54Ck9_4)syP>CZ&hoK1pRjXYOMf
zsk{?N+l2VCe&a;Tt)N%d8_z#Myw;63VgF8>Z;JG0(|F87@g6>$wz|S6)anx3)>B=(
z+TIPJ^@?s+#G_m_Mr!&cgiK320DO>R57`5#Kj~SMe`FtACF`l#6Jf}r*-yv#Y0+#q
z)Zd)Hl3t`T<(f%*>nP;k)Ynl$@Pf*H2l8T>S8MbJU02a{D#l^j-%WBsdU6NajC@xl
zJE!HCOa5ivD}=IkQ2h{Qkvu`RXx)I~%|@S^(C_tckx%I<j!$DY`L4)+O7r$?#BqJG
z-xKf|*OJe7XQ4Ry734?bHN_)23mBV2v?Hw_%KieHX`j_e<o_<(4)ab)pGDt<$RiwC
zxJ&v~I*^4RZDZ2*^;Yl+{K_O>KSINBw>Y9z`#Z;o+EVsskvtf{j{r-(hI{!8<M~4z
z=K%4XbQt2EKs`+zaG#b3tzFT&QoJErzVu0(-TWBG*@w^~E1QDbp9d&D^g(-LG1V~{
zbWs1(_r$zJ_9NTt)W)R4zKLg~f2fVAUIKj3Z7^MLQk+J}&JxICFx~Mt>_rV`;}bok
z*Oc9<>4G42#5iUfGpu}&Z2;cWe8b8EH7;TP)9w8fdL4*2a*qh;r@dfx*dwwTJTu2%
zqKC$IjFau}5ow^YkM{?1-!^^u0rqwQWDI>h#~fe4bE*H~)7=HXeH^tb^Z@Mc7iF8E
zkEP=o`PdT>63x(^G3jRdr;s1UgY~rL#%-<ZI_l?poYL<sbsMc6%V)9-wGVP4@};^W
zU)jI){ewEc%smmNovDupM0db=V3Pr9f1w;&BhNy7OB>tbN?8D}Xm5Ww@e1wnI?z9)
zdneNvPJIULpGWHy2wh{qPv|%Q7{z_6ZC>7ay`;~BX%C*+Pl?|az0lA$_L{Ouw3%`^
zQKkir+8!U+okwVo&lQl{FVKE*eUA@p#pQTL?I!I+@Lu5A7>o(mkeplef9Sp|+X8$E
zWiQJ2&@HAvDwzKG3v{xj9ksgZaz%Q3Gjy(nFCp5J#v`fQ&`0FHUW|hWcwNfmxolI=
zCNK}Vm--0$GmW2wX)L`3azpYc?J|`8Cg@bU9DGI`(&<guPhc6_g3wNSa1QdKzIA$>
zsKq>9`pc>PNY-VWqZ}7#*@L*0?+s>r#35{{bA~o=M?2|z0Mk(ql3Q~R;8c=b#t2)k
zlNfaHGx8<SoPVMo4>F!a7<IP9gKipw=h9ZC@g4ff%v<gOL|%jOULQx$?s6|`d+q;|
zk8SPGX{f#Z>GXDn9+mbH**mBQ*(!34A2hkZ!zN8X(!OTdkMU0TMZ{3sB7fPo;PDjD
zP4Yr*Woge%)^UhvGO#yh2+~_*Hb^{@r)h|%?`I0|88o!@Yg5+EaZ~jvt?%jW4jz!5
zD8~?%b&^dQA13r8KJ<IPPTL>zeX@7(o@~l~cpnS-75zlZ0n8O8e^9SP=E2`X?2%d0
zhw9ws@3r@3+1H_$19U+U-W)=?7XIn&tI0I!x}7(Pqr2vaqc!kDnezl{|M&2HiQa~P
zrSDBpM@#$J_)EN{JYPp%uubJ&14;v%So-nd6P0Tsw5N%9CC6Ft#?t?S)aT!7eIo=<
zzSP4!`6}`g=qr|X4#K-Fh)3&5jrh)>qqx+k&_-d|!qm<mLbrT^OX{f%*uLmDx<CH!
z=DWhs{`kLe9P0ZgHYpp8zEx=(_q9HOGDvPdPk(&WxwbxR`r~8pjOgu(KmIK0`|sk9
zpB_vWHd*~m)aS_`Pxhi58=*T+z%E__Ead{2>^cu&@XYkH(|2Bf!x%%~dP$$Vb5wJ#
zo(A%@j4<@a-!Q(I>HP7bpWS+VjGwcBDK5>$PrxU$3--6Z&y3PW`F-_mek{wkSg1?D
z+=0eD<Uide&&$a!L|HDIuf7O$kR5v$!eHYgehb2)TUP<MjgOkVH$dLO%g*j2rastF
zUmt3Z!+p!X->x6piS`HkZ3T`;^6!7PDR3MoT|#|WAD?O5H1lhZJ~`IS3~#Ip7!Sjw
zudBZO*^+OX-C(P`NqfNjNw+U}3^si-<i{Yt0m%T#p1JR<Mzb+2zWoB_+y6G?&$KUS
zjHL9T_TbKp>RsRddrq6o1+h73t@9(qt7F@M?2Jo57ijnI%O&|R;5Ty^HOH=kPlxIh
z%GW*sX)X4Mtu07KyCNTE8y4I9h)ws=+(3Sh2wAuL+V4i&&=^U+_9=7^9Z2=<(AN$d
zkM@~$*4KU;`1-rE?fcqqA$<j#p8N$QZ%z#x@U?#f^_B93_vD}XZ?f;fL$c#dU;BQ<
z(S7ab5r^tYb$grALtjg~ANucSz|?+Jp5NC_^&2Nz?gPE@U4y7Sk9h6*+Rt%5GCuVB
zSft%Sb?s_<Hw4ZLg85eBQEq^5C4^j?ec803{eBGQAKAg<Wj#CaV+5%a>C!e@y60;*
zzpth-HyockjlYl?x$o>P$crf-P4atJO%G-$+l=~LXZ`Rm)3>Y`Upw-{&m&o*c`4Zg
z7<aqshu@36m*g+#DL?#9k`vV1qKmpACm!jy?m$k0$grG$q2GRk{qRd@UJAMl!|I2>
zmHp$?w#FDyyB=8TJlxA?GevC`$JtLjr}jtOS5Qy0{cqRu(Ead#N7*tC%DtQ8JcF>K
zIUbeq0>y`}u-FC}R(}lDk@^vR!=wA*|L`H}Hk7-wP|Y`fjAx{qNN-TR1o)uOv4i>H
z*FzSA=@nOq=i&O+$riHKi~GyR#tZISPJGm!Efv-n*3fxh_=C`)ay%e=8G4w;9rA?}
zf9<~T8F)s%@W}{?r|_Z1tTWR;g|x6!x5LL_@;jI>+^&0cpSWq)(_G<yvR|C$F_w0;
z>1)}KFvfZ9@jB24G=7$VpR(^jR?ERllG*>se(@R7eo*tDJ-iMo_}kZf^gYrmg7=#f
zU!?61+wSXlPX0KOA^o`ln{O4;eV+dD<}m%^)E4GAxC1)T)Pq;sbz6vb?v&3Q_Wje;
zm!K1+&m5S>z2%T8-DmzM-Di$C<jW4%XTAV=1@oDkb8ynbF6c|sXHIi#86SKI@R@J2
z`UG^J`K_o2@k;v4F~%<>xn@j#eF<U6hUp8p`vpj^%+k|9_v8`=9e%$6`DHaf!gRc+
zcPF6NPQqS%3Vr~yO!B3het>wyYu%{(0hCT|PZ!_zcbjijvwQsMbQ!=Lf%>+zTTt)z
z{ORwSaz(UD9SfNQkL>>RMWCC;G#U#nHiu0YkxfGWbV}cz&Z2lEYw3s=)_lDo(0(Rb
z$o5xbm2MYm<F}kIh3{8?lI$1c>G!Kc#zjBT@?V-?{buGxD8D+*xAZaJKDL|sI$(?q
z!pqY(zdH68KR?I;yTUR0ZF%2EyYH{vU##5^(e5wP?yu19M``z0Yxmb^_mi~yWbHmx
zyPvMzU$5QYsNK)f?q_TF^R)W~+WjK!eyMhUyLMl$-B)S%>$Lm3wflRu`v<i92eteE
z(C)vY-G58F|4;3Hmv;ZOcK?ia|D1OJf_DES?f$3g{=yRl#v)NEL}mVwXmQYp6Qap*
z;S{dzj_mW%;*?|1BYi}(W5A9+VvBPG?(3Zc_Vp3_oQ(#+j%XJm)kjl;=cBQo;COVM
z@kn3ed>`~pbY=B-;fc$Q!2*`njF`ouG-|71RH7L5`oa?<jTliX<a4pZa2+y?-3Dfb
zoch8O0}a;<*K#8>QHc9EzYE3y5i3M&lv~8Pg^+PpMwJ>xX>D;-x?Cd@RGI0X_=1r<
z!u6KJh~Mov<uJ}VTxXm{jgy{iae}11&Zxsq@w^kozvGO;yW>u4J;oTr1pPAc5r_ME
zr+CTf+7fLXbRGh&Os=A3ge%{O%ht+89THr5MoNnIY@~51{hDn2h$2BWR=Tzr#s+_J
z;n#_1%7>K_b&psK9&IsPb%wDOOx0+*%D7TsR}!w+gNE2kt$f6AH5tY`)_6g9O?;HR
z)%j>Xv+>)FI%ezfK7ZPL1tpUTD=S5&w=gy~DK<GVIawqnPZ0(96RYqi3Q9{W3JZJ{
zRWtIhpFW+DH@>>$Zm-7|TRtIneEIlT`hjY1QFUxt!G@BuwPmr@Ybz@&s(jv}Sc)g^
zC@C$C^?9p(lE)(;mVI!cbtMK@1cZeCqVQw9bej)(rO<&N33gQ8F9iMWHqMHazk&P-
zHki^MVftNR`pxSdlZ42Gqdgx@x)R|__=une;YwN@Mz{lqA?!xD0o`gJh1l4C7-78|
zzhZp15T`Qmd&CIiA%>X<&m&xnFmpcQBCOAYG$7n?vk-L%N5F78i15fFA<iH?umS{a
z6e4yF24sYTijfz>QwV1u+)@Hs5H2pqJ;LFr!w!T_BpV>UFAg~r2eyDEd-3cYA^H|a
z32`V|7*H7^zK`gW0;X-~D~x@8k#Aq&*x*7Q{e+_!lS*-sFzBQ083^+b9zb{;VIqE>
zIdgyzn-OkDcot#QK%~V7-P<n_&cl}o12&0>8YGP2gG5y9Ak-CYFkrBV8jb~8*AQWh
zz(TwmVg3*i?Zf>6+`olz<xpYxuz=u-5zgEgVQfJkIu#=vHCPbad#NxETq+!im!X_t
zxF04Q&tERYTbB#t?B&974}{?P5yCixFkz%{Y{x|Y;7GhfxOJ3pzBCH$Hd+|<qfxh5
zVZ_G@5jRE{X=8*~j<9l!aJ+;2^SGZeRzziv6^<ig(JpbqNJMvCe3dXtuR?vV7SV$s
zBg@Al4Z?E>UGZqkc;SejfcF!GW92pA&o$r=!UNX`N6|##$V0dEC7@0S2VE<SxN9MM
z*9zx0ga?2d5ssLQvM>=I4jFv}ayKFgI0-U{uo&TsQ&2z1;fpE4I1FKbDHUNV%0w6i
zxpbwW-Vpw%>EQizv>n1cgnO?8zplf;G6QW7dEIfnh<*w3>be1Kc>`n);VFd0ZsCZU
ziMq}dj>8Dg&P2X9f}b~n7r0)6OgCl-;~c_!GKH}<6EtKBrz=Z{>@3iTaQ7@>?3)Fd
zzDYP@vLRpDh==e+goh9wo{hTBM!U@s#+f;g)p?MId5{N$`3P6e7ml;&BlHK+$>+No
zLp`ZcQaz}K<Qy?-Aks2~NXwA)06m9D%h1JPKMwnI2vNr0pc;s<3?aTUyqH6Xu*9H2
zh7esDLQG`{F_j@iRE7{y8A3c|h_x~q4(AY}D)AK@UdbWE6@P<jAf_^en97jEl%B_O
zIEF)rB>o1~jN=fZDKSJ-hEQuVglNhzo<oSH#1KgtPUH~cC^5uOhLbpi7}8&j{6F$v
zgaZ){L^u%PK!gJk4n#N*;Xs4~5e`H+5aB?C0}&2HI1u4LgaZ){L^u%PK!gJk4n#N*
z;Xs4~5e`H+5aB?C0}&2HI1u4LgaZ){L^u%PK!gJk4n#N*;Xs4~5e`H+5aB?C0}&2H
zI1u4LgaZ){L^u%PK!gJk4n#N*;Xs4~5e`H+5aB?C1K>a`t~^|HJ_n9P5D5H#>id5M
z)wlNAy&I<i)cB*ybaX6+fsg2oT|%^;#kJB9tw#(T*5JS)6u4YY(K;R1R$RxN!Wb2W
z0}XJ^!@(JixNh$wjCXL&#=#kfaJgNg^%-2H{e<yqKhfHR(AQrWyZa+wT(d3`t*_(4
zVHw6x2MQzcV$oWR>+ct%yh}vuKXG8j*g>Lo9<CKQG@}$(9j;e#ox^q6U}4;dYx!W|
z*n(3CJf2(1*O!zRc@}u9*OqyKm(H8B&^N{7NlH%ic-DCfakPqORYA4aQ&>?}R#Bdl
zyDBLwi{AN)t18xeO1<T4e8nDbRaHe*);c|XDjm#H>P=sZw5tmWy}qmk^A;w0l1qxR
zEYVaMIVssQvuDm*=t=jV=#sNA$&;0pm4;JWJakTrEqgOvJ`Qf_LOLW#N=C&>3O&`n
zs*>_GIfe8nsj#@9%HyjlDDhS26lP^vbH)RDl;X)*nA#3)X(bial4+@HygsmjDPQPI
zN-C{bgAygXGLp0M<`ha^;7zj5$;>2w5^4%NRY^(SJ3S@kK9BFNN^e$43Fi?k>J)Fq
z>UQ%=Rpn}=$ZWdAnp6*{D)CWXWjamW%B;*}QP=6pNXl9;XW>*&w`sDs%M>L*6_6lr
zL0NDK%9>KS)|X|eL|DnDt}b0$U2JW;uo9+Ld&_-QcTuGmPO;RvGs(MD)za!Lz2TEn
zJRVfuLw%~C(C4l4RD17S>n$(zhF}$`2v3rY%zRINB0Af`X`Z|}=z4kqQ=o@ROIG2C
zC{IZ>G*o%DuK?Y`yRM*gZ2``AvT!e@umTOC$v}#?yr{HmlxVq3Eu>xuUE$YkN`JEc
zv+*(2R9m6cT~ZR%u6d4?g_|8GP4g%oRIM%ac?!UX@}l55gwmPG=)u+N3o1R}l&7$y
zs&H*7wBf3?t5<ugRNJ6Mt1C-OeEg7fG)Y;O&R3IAYRfKleO<+NfuvNBG|ftqUn5U}
z7OD1v{DPuYs6_#WJ`$UfA_!-dSB_~Gq46i2T3G5WsLHxsYx&aZ9IB#y$jAyR#fAAK
z!>N#AT@@gUaEG1tx<<NZ!JI&Dqcrd#pGHxldZ8~JU9haA96dvh)cHB~afhPiFADaw
zs5DQeDPK?h&r`Uez+(!jhmKdP^p#YUn{QTm*OZid3JZLN#pWZJ0;Qg{<=zdIFe$u6
z=3_{xHQwqfuQxD5*)Ei3&4vx0N^ezlMR@^)@Gj4~L_xzh#xP}=86FWW93C;Kte~V^
zEEA40*d`mig=>9YPqDWEtt=)iohPn^-G<}a=7{O4uZoqzkvw-v$p&;6RG&r~Q;5ZP
z`Z&eDxxy*7;vlI$XfW{$M|U*GX{SHykDXyCu%mr@NsYeU*Dku^k2trjYa)Hh`T5B4
z-<{5=x<$enwe42?cVsDAF-4q@3Kafu$<eI2?x>pYxP&95N*8CNdqd-X6J^>)izoU7
z9|P+8IHUG?z_GVhN{)Tjr$;!Z+veYEhXZ=N!+k^XLXY*N%Nh038W4Q20{=DNgR=;A
zOZ6dFfDQbpi*4PnxuT+)Y77w-b-0%PKPUfh`X1dMd6xb^`v>~}o#Xzg-@du|DRBu~
z`(`IBPAHCvO&E}pn43_X5R<UY*kbH34jbEznE03(DI@Y@+%X9WF}~pm@$q|&gt+*a
z{FIou7@YQ5G$1}DH!cn!Ha;%KjcZ0iLgt9r_>>(+LQG|1d|b*tBmS&2Zp0(eDZ`6O
z6JmzXm=T|lJ0p)WL^knA5*Je(H$2|;h>?(%lCZ^ynUU|tOC;QGP_AjX;#1<0VSGY%
zCGK|{XABgS8<UWk5WCmN%#C@;NC0u5D=sGgxG@7{7RAiiV#E(e^^Bsu#-O;E4e@EX
zkB?iNuZql%PteMGOO+KbtH6c%EJfwIwW8vXb4-(25V9Q}=SD$?R6%Yk2%7dB4$d28
z)QAB%NRF5uhXeF3#*LeCKI0<Bn;945U^$Ba3&v*{f5dn=4xFR+lW-6p<yXVFl<_vk
zb&OwRjH$Gwr;%|JzdytH1{~-|`NiTOJIa4E;~k9g`vLO(A;#GQ6i&o}cNBj&<K>Kp
z4OH(d8CL@nKj>UgivJY9r}G*K|B>Iv;+#gpLoZh8_c6Yn@nOcdF+Rume#Wk3mHrIl
zOvZz8AR*On1LIW2+ZgX;+{75afhOahXPknA3@QB#oF7T_J;XSV@hQglFpe6m;@2~F
zGfu?0la&5Z#@URIF!nLNc!)~BgYi7Z2hH@1E2pabzGuc~{0ZX&j8lfH{F)dqW_+IU
z7RGrv50mJtXS|v55ysmY4~<dz;S5a~e+A<sj2~nC4&xs&rt@DZ|KnzS#-A~6W_<aj
zD*ptW$4T*17;j))$an|i?=U{V_#celVoc|rQu{SCzJu|Abd~?Bj1w4t$~d2K@-UTt
z1LLKPA7T70#)ldIk?}dkBZsT>bgn4XX9+O%hXlsE`F%0t7Jk2tvHNlr{{Z8MfvG-G
zIG2?2YvA{>jQd}q-n$vkVqDC4D`Pr`m(m|+oQCsF36H;0r7vQf$C%FRrT6zT#+j%R
z?`Ayw3kv^%v77OL5u6|68H~3xzL#+x&Q+!SUts)*Tj8_7#IJhBMI%*yDL6}&;_qWz
z%=ovAYZ&*#!Jx!1I#-zD&t{yLsqp=baW1QTf0!|y(Ms`8Gp=Vm9tVud{1}%pj?Gf>
zf5aH)y2|v&7-Kpv@$guc{s7}$jL$Pp7^B|j;w)Gy|3$_t84rXHmdbyG@dU<nb}Yp&
zV0@VIgNz#)|DJKmO)CEQaVo!J#`iJa%J?0|`xs}&srbhk|Ag^b#_uzxGi-^Tfmf;c
zbe^-Mk1?HXOZaidIOkU4A26nKpXq%Q<1IM~kG@*v_afsNjL$G$#yEDiivKX<T*eKI
z-(vh8<0i%n#;g3}=BV^9Gv2_sZ@hZHk8u&>Q;at=9+a!nA7C8E_$=eajFTrQ`5rV^
z#lIbx+P9wZD~t~@Zee_uandy^e#|_TehK4v#@}STlJOrHZ(%%QqDsG)@nXgY7;j_T
z#Q4vQN6c5{jYv@G>3nW#uVTiHjDNs*K%RPk9GLoZ3gf>qUdgzb<I_3cl;73Ys{DuF
z%=t5(!T1Hnd5k||Oy`4B`dO1ydO9bZ@HWP-g$n<caRTE(lNEh##>;@IJ?>%rL&k>~
z|BK@vXMBC4O7F^7`F|Oh(&sXMmEUh*JTOVU-_AG(nD~=<i%S0xzu&^Rk?~uM$0V!x
zyBDeW%Ydo8%v%+%XS|&8tBkiW9yCS8-^+MD<F^>EXWYd2CyWOyR{2M#sPwUnrvelG
zKE~DjK5vPN|2pGG7!RDP((hwDmvJNG#~8bos`P(lT*`P_s!CtO_(8@mF@BbD<}#HY
zXMRimlrp|<n!=kIf01!L<Nb`^Vmvxc#V=Z}@^dpj#P~kOzT4FM*BEbR{4d7!j8oGU
zzg}Wo2~6#mvO=YQj`4EFXBlr}eEoD4pU%BUGVu)KJdeU3Fy6p8@j4ZM2jhnrpJV)c
z#@Q=X`otM3ekJ2AjJGrXnDI-Di@%`aA7T7B<8zFE%-B_+@)y^u_{$mJ!gw>|?=gO!
zao-zM{Ns%8U|h6H<#&+r2FA<WD*lU%KVs}ERPpbesorNZ{*<wg@vIxw`#Q$YF@BM8
z{|xoMiSY(tlK+>ARQ|{LeZE)W!b}z4$GCy<4#wlN)cXUBA7b3Z_!Q&wjPqux_%l|k
z{9k5V#CRgcWs;{YjCU~J!8j^gy+6qKZpO`wPcf$7Q$RLid5($?kA=j)Vcf(xeztl~
zzq>&3A7H$h@ma?8j91U${1_i&OuyGa=@;ax_w>6Bgby?J-L7!@T=jlC<2}H{|GkW3
zF>X=)n;1XMIOPtNe&l==KbtYRKBfL*+&539Kft&WnBLP*zYsm|^7~lEb8c4e4>SHD
zFqJ>3OvRtRK;h+#p9iM=63W&4%NDBlhZ#S{_!Q&leD&T{q2gCEPGo$LaW>;Ax2X7~
zjGqCf@@g2zEK=`_D^-5qV7!}gpIg=YBaEvUH#2Tz9DAoqU$|Js-^{p?@pi_`mZ<k{
zF@BD5QI$$R5_+5X^$6o_jCV73EK~0fF}|I#t6HT$&v-lI1<=cs-+9I_Gd}23@uxuV
z()+g<KhC(BaewGl;@5z+Dt;v}#ox^M4~+LR&i1JHXBhv2G5yX6l8L1&)%$wJuQ5Kt
zIIRHho%p?o4J!U@#`K#il>Rox^xG<gD;d*otPtMBn0{-8@Z*f>H&+PnVNAchLii=d
z^cyUM-(*a`#X|TbWBN@N!vADUzs*9pKlCcmOTW=Vcobv$tro(`jOjO92+v|nzuiK3
zDP#H#7sBO?>9<@6KfsuN(}nQkjOn*s2tUJ^e&dDkPZ`s1y%0Xen11tx@Lw6zZ@&=!
zlrjAV3}FYZOK?NKb3^zNiJ{M@C=B(9_cikOX6XHWjOq7e2*1ntaJIrED-{16<H?MR
z-Rk`<jOll9DE*HZADqVdF(x~l-cJC5l;6I|3Kub^-`k=0Pcv@1Md5*H8;W1a`lgO?
zEa&$wWBjIyOus0K;txVj3g0d<ehX&2!j-@%hu9?lY80lT<=+7te$R%Zqh)%3{Lwa?
zX2V~yVf@$^mG4h~c^@lYVZ&dy;Wun}U|(zc92>sZh99%x@7nOoHvCT;9^$f=H%{Uj
ze|^$z@8{dz-)h4vZ1L&0h;;ruY{R>3_y;!pnhn2g!)FxsxBom8HXm0Wt}o!a9@h=H
zZpO6$mmAkiTsPv%$3^nK2p4|ySS-f11lLkr%Wy5nm4Pc0mj~A@TsPs$#+8F>C9VQo
zvvJMAwF;LP*J@mAa24Y!!G+&K7I)w(#Z`{0BN`VXPFLx?4QaD*t-uv7&4q|tgsUUk
z%Mgd?r@vUN31h>Wxpb@TUNhHN|DV`ardFiwtF-|uU{w9!QM<Sctd|y^+vp5q`MWK0
zcNX2-a_LYoA{%h5dmOv7D&EHSf3LkVfxAXLazn}g{q3^;{YX8vwX{ncv?3clgYEnJ
z|97Kjz}CvnZHn{@b&u@M`+t0&TboYpa45qTV12i2hw<h9w@&15+uLg2xEpi}Ru=XM
z=gp!$qu9$EY}aT?sO>?hjrzdAx<M~w{ParfpT<Vd!aHd9capvnIya@PVx2c<flQl~
zHJ>&^rB-?AA$Eacv#Z>rj2)XPa)<qVkg6=rFpEynzE=n{3G>oZSXxlN274m0UDCf>
zQ<lw^U6=HAMiT8*O)p<tT3YGD=M5O)t@}{Bwof;Ny!6P{vJkD^BY9gTBUP6Svyb#h
zIX3>7QrGpo!x7c=UaO9+yPK>EImN;UF1q?j!nRuNgQC!1{2-m)(Zf!cX-(JO2TZmi
za+9WFZ!xxoxAhQ30e=hC)^iHi<>h-6=~-4Pr_tv7vdSuNb+xz1Q&~__m6I2mHTDik
zGJ&m|Mc&l~YfF9dGa8StU=2Fz{Cv-Rd;p>8fHrE-N#@c=RiGZby0i2TfwV};AtNPq
zj_72Y7U>MpxzkoxRIM+lDzfDvD=7E22P)u&$)*O@YM-1Oz-W)R)LX{f&MisI%3YP1
zm6c(qcAYFr%gI7gXzuq2abT79Wt)_XtgPIUB+!cQ=^&1wc&X7FkbzB#lVFyYSCr%H
zwUTuu1*M*n)$(h-phZ)W77-7-m$m!t9}iNg{A~cf9pq2xf)kNuenTKXRqB$mfCS4-
zter7jRg><ak7O|1R$95yp>9jUhcE@@<rO}BOyMc_uD58q+$s3VBBu-=g3QVHkfme&
z3@dEzm|s5TAOlEm8O#{YiS<(0jTW3sDdqU)vkKoMmX?&!_l~7mQ&6OZlUx-vAB3wE
zx-F*+_2QaM_28pAQ#Ub5TC~DaFrdsva)1!)yS~ZxG^^Gr)r4=}@EMQ!y%%Q@Qd0yK
zMt^5iLxF`0$&e@h&dkm-*?$Ar7FbKar0Y_i8=^B=jk8=*(_DO8HfBE7FGn#Vv?aE^
z(!V#QPh4`7@$~@bTY;}?3g}y3$a2nXYT~?gnU(qau+zE0!rCHAFZ5i4bdHv(t4pe?
zebyEYa}==%gDK)2eZw~eeDx~_$fd7j&Dx5!@&iR5zDvTlba`{u(brk>pVaW-W}Q-0
z;4A2)u1bS?D=5V0QWg&6CV>UM3Qs{%5!o!JO#}1B)?>p~GWK%Jp4?q!+>OTWNE0Xv
z&2KYUTH7{p3-xsmI#C9FcuF5#=2sPzSK~`pd<%z;xB$~evzJnTOA1%<r_(2iGQ%90
zK(n);``6Ko8EunUX)^{p9H~Ok*I8peL^d!Z?9~j=eR_40P#x5!M{8nQGHiT=D{lNn
zr)8=UZVM$P7J2=uST+qB%F@>1QbEF9gd+^}Z+7#n-iom0W}3{X;x3XJO=j9NA<ZON
zwlV`6(NbfItk*U-35FDpBpPG#)|FXeB~x81P+gOCo??7}Tk4g^JLD+ASXX8nc5-PI
z%05aJ!Q%IMb7tqEnpyUNs>3efpD2c_b*Yj33SE`mNgb?vg#3CjoYMpNYyQG@t1Q}^
zZ2iiDBKZL`KGw!p_Q5|SmM2&wrSk~|_>$XG0=E*4^f~5t_4G|LG}Xd1`bU!;zdJSy
zk2T>5o=tEsAE`fN72p!2DV_5AF&F%W;k<)_HPu$OrYjENIE@gsOIck~T53(x87fm(
z7nGFRjtS_n%rtyIj&lZlUj7)IE7CUMRz3o%{Z#(}BRC!c%nwlul40EnXyeKHk|Lk&
z42UrEoLUW;C?{jERBysAx(~DX6b@5#yOfP?yC_|4uV@dD3ZIkyfqgLBfm)_Z6@YnA
z<*m%Zq$f*xRD(JV7S4o-2YPLKjS}hS=nN(8nc>}HjNgt5&!5pA9oqPVLwEdhv+y#u
zB=$Qx*gOcwNkZrR*wWd}<vEB0E5>dscS)|u!Y1!yWd#oP!R)}I47%1L7>{uxN@+#;
znx4pwq>Bio{Cbi~G8-u0>f(y3P}RrKYxk@6L<hFa30bYRCy-rh2G$t<juB*nk(9#C
zZo0~#va^yB{SPZ(U|Scj(s{~Ou{wvPZ~=x@UCRd@??VjKFyO-)s5=}Nr&bkVh_i}Y
zn}QGWvf~^v{CUeQXD8*hohL?)WdAxdIxIQc<S{6G@{YyK@JE~)n5Ume`^ava0ft<d
z>|g@YVY(0&1g}n7CMZFALte-g(12w>YUprR8G?;h?G$DAws)qeko``T=kGloyYxfm
z1ah=dfjQgtVMS$N8zRE<zmqfs4ln*WODJIpIcEu#AoQFi#5_4vMx8FX2i#wwXwgkb
z?lm29&hC~{)whr5ED}M6bo)#`6m9l$Oj|kJ^r6-wI@b|9HLltESFq73T<W{zO(9mI
zFIapFVR=(RwE@l3OZ5{ML4Hzz12a@=;&iutF<d9pu@asyMkX?owE1GBQ2IS}3Rwr6
z0HtQ(rQJ`{IZ^743tw@;uVA|)Spoeb%wbuL1?))mRFr%9NW7fcayt;@4vr(t(=@}W
zLtR2ECjM<jp6XJZfLdALE6$<)MaXY~CAZFT+tP7W6+@0@9rxRCYSLqFYZiIk&kMIT
zxD00x9CRUl*KG^%3+Ax?MSFx=Z^w2qwH>RI4Hm4L{yr71EQN6Lx9|Oye9^fA@3&v}
zFw%r}=2J?m$sz?TD})!m(?c|Ea<9UYX<3>5j7VD8ww&-N8;06MQ#Fbh`1s1oY%anM
z9q7Sjr4ynOhNR#;$^c2}M7{jN(b*9^#A>c?z+*kVU_&>y1~B{li{{~P4G1MtwjtA;
zi?E9xeqz-#{Uur_c%9R6))9q^3aRC+GRF#{y&;8}i*(v91fChfX{T*gP~E;0G67RR
zK-NRf#X|HJ+s-F)BL?#mhTDbO**p_|AGWoAr}+|Ma1JdN!Fgk!J%-?oHedu5j&?Vb
zf!zUGA*Aft$g~Kb+FS<3hq29SKWHkFdyTo$W!WFpN$yyCO$Vky2ZRSR#5yO(LF|-(
zYTL#<WgP`y_03H|U-hqd%kA2<OVG2vgpQRrPnPwpS&Jj2;T-kKGj{_Dgvm?`OJsmy
zKN3#8r4xWPJ0UlPjy1*>L8U(_o;h%_7V6MoB4y!U1GMk>>M%8ZMO9M14yV>)0|zWo
zZ0|M~;><NK)v&oqsd5{7$!hH7Q}t&JtQ=fcPoKG^*sNH`JK3$Lpogz0{3b@HRfJ_`
zXb!327i^zace;G)Z5_?(!Y3zGZqP?b)5v|lrV3jxr9eYob5_Z{s1{dbCq|tB?P{{G
z+g-M6?(6^uT+e(*5PbcE8@g*NDxnuFx|J;&B-DBZ=NqJDNHn@s3n*)Iy)lT^4LNpo
zw>-bUHP~Pguv%eXtk73qbG4-kwO;05%P=LmON<G&o1#k@nfw0b!j|^YPPeEdKX99r
zXt^p8F~^E7H<NxmzTE|FbJz838O)VnI~}^jeR9EQgLF`{sAp=>ozE?Lku1B}OO*|G
zeI{i0&+9tN{5?NF(Fd5iA&W=pnt2vBElD2wxgGlR&+s7Ip%|h`O;-v_Gz+tLAN1S~
zzo6(smF$}ddQ=7YEi>+(J(@WQzu)S$?0n<#t~V4;uhuBM)v1{tY~#T{DQLaH*mhR3
z!WJ;Sqsh*r{g{E>Z0?mh1dMI=Rkf~HMeVLhvowHxA*buPcg(0>DXUZVe2>)5VmGo3
zgjC%<F>(a68GA!VSZ0;1i#qb0h0W2n!N|Tgi65cDR~!1S=3WsL(z+e4V}@Ow_Vh&X
z`K;xe=nnUvhfRpaS97tGeo0TQ5rMUq<+E-L>}K|G3|TxL-OALqwmQ`+2SH|j-RMx7
zhU%364{gzvI<~cp-~9T#dUAkL?1gEPQh6cPg5^&+hFl8{Tex+;+ehQj>&8@gIP1ne
zLWX5DP;CCZ2ZJ89PD3n*%5SgIti7{0#s=NZohp}h(VChv3s~RP)-QtmUR%%&n%4fz
zQ>1oQ$<u?Za(o^<_b;0EM(=HB_1)UcT~U4h$)MhxQ)P3~3SIDJi_e9n=2oBHu-!Yc
z1EA}w2a8eu-^x|H1=yhPMT1RmMRdwJurI>wlfBg?(}Hx#fJK0wVY0s$vdHv`aT>-p
zhh8xm{aX-nu($^`OXnf*Dqn9_*Jg`zQQgus{+fibnWF20p>;m13CUFmIrLk{oUjQ9
z(}uLKwS}F3fPrOEFLHy1RlW4AMQ$n0?oeKcaknd*Il``d8*{VtjUUsjR!;Bm*46`9
zdG}6PA!Y+T?1Z<prQHi3qAfM!t=k=>-Hj^2Ot_GJ6*bZOMHl2rP55|IOIo{|TEfoV
zZ&S6&s>Ns0dp*;t={lI>^dR$s&6azwmO&<R-5QW|qY0+(9J&80xaHg34Lf1WP@Bdh
zms{KBo*S{b=f+^0d&16H?=U>0k~@O<=sX@Cd(^}aYxhvsE1(Va-P&o?bqe_VJJTRj
z9nZf#sXO_0yr+qmX?k^B@~=ttYAF`4wrSM%&OmElAnMh6h2v%Jooa?EnIy7{`?b0*
z%(~eTVCb05{W**pH)Uk--bx%YxvJEgBJ59+lLH@34S1B281QK7w17uxQvx5Q20Tg+
zjFlc3D=9Is$fV>z`je8=14>Am8YI8eq`-Kofi+D^4U#r32$PZmX-S?MB;M4(GLokT
z(wv+cSl?v)yq~?jrX;4~_b@*F^MC{RrSu`V<zL7UF?fdGC2@Qv9Qe^0hy0Bi{5q4s
zFHp(9ONBAJG1eglFzz6zG2a^nlW^{g7=G{lt?%`@C~@bo^<O&ild4OHoWAGPy?1|k
zf5R)kOC0v>@1{O`-`*D%SG;<9^?|709X)!7WB%gk3-`Qu&)r}8!^;n6e>vg1*S9?T
zkKZ1B^tpYHT=CHKudb+Yw+ww`$+;8vP5;wJ!|zO<^k`k5FTUYfv|{z5{OI(h2jlkL
zIdJ9mKfbpAuI#}Bca1Ck(>tr1=8EIH8}_IAzPa|Y+vk3B-NUD*JiPd4FT|$p9{u(e
z^>4oRpR3=SJLJR-Pxbx%(36X=x%P`!F8lR8%l~8O#J`Qb=~tILa8KM1el=<3v0?xH
z%*X$_B|80ECztH6$-nM}r01r6=b~@M{psmdTNY2hv#{yUZ~T1jt*^P7p8L(42cwd1
zH7@<-vo~iywBlz!8u$CWvCF>Fa&7JT=ARF_=JBQX9D3uv&stX9_RjE2AG`1D*gY?H
zRR8Y2|8B!&S8iMI#?c3-ef`@%-}=vAe)7PtKP&D3@y(AXj7wgVed#zO`<2@t%F6j=
zzcY7FKbZPZ^Dlq+%KEQ-8h^`YPyXhO(&LG~3A3)+xh>(ld;d7^mE7vz-m$J~^Pwdr
zbN}PigNsJa{qWI0+_3H5#ZSGk$M@Lmwf}4De=hN?8dUzKr|KEcPro%Gv;T-IHokuQ
z)mK0JjR%UR9D8QR=3o5y@+G^gw@qIif5VS<{i)?E&Zkoc-|_U`-~2lH%;Cus-q^8d
z+>U=$WDR+v?+;)9-cRnhqcZcwC(k51j{fDl53GLd%xjm$jENm}@a@-64Lde?WBvzU
z+x@HUcih-_X7ZsGU;o<Jx3gC~m;dn3MeSt?KfCDnuclsn_r`zxrsC}0l<)ug_kS7o
j`i3FjI#=?^;r$JJ>z<t8J}|x{dD7TX57+<kVBY@$z*Pz+

literal 0
HcmV?d00001

diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp
index 8a5ced0b084..00a406402b9 100644
--- a/dbms/src/Columns/ColumnArray.cpp
+++ b/dbms/src/Columns/ColumnArray.cpp
@@ -952,6 +952,7 @@ ColumnPtr ColumnArray::replicateNullable(const Offsets & replicate_offsets) cons
 
 ColumnPtr ColumnArray::replicateTuple(const Offsets & replicate_offsets) const
 {
+    // data 是一个父累指针
     const auto & tuple = static_cast<const ColumnTuple &>(*data);
 
     /// Make temporary arrays for each components of Tuple. In the same way as for Nullable.
diff --git a/dbms/src/Columns/ColumnNullable.h b/dbms/src/Columns/ColumnNullable.h
index 2069f80b42e..d993d918509 100644
--- a/dbms/src/Columns/ColumnNullable.h
+++ b/dbms/src/Columns/ColumnNullable.h
@@ -31,7 +31,7 @@ using ConstNullMapPtr = const NullMap *;
 /// over a bitmap because columns are usually stored on disk as compressed
 /// files. In this regard, using a bitmap instead of a byte map would
 /// greatly complicate the implementation with little to no benefits.
-class ColumnNullable final : public COWPtrHelper<IColumn, ColumnNullable>
+class ColumnNullable final : public COWPtrHelper<IColumn, ColumnNullable> // nullable 列是怎么形成的，一般是一个普通列，一个伴随 bitmap，这里使用的 byte map 来存的 null mapping 而不是 bits
 {
 private:
     friend class COWPtrHelper<IColumn, ColumnNullable>;
diff --git a/dbms/src/Columns/ColumnsCommon.cpp b/dbms/src/Columns/ColumnsCommon.cpp
index e969dc99842..9307587ce6c 100644
--- a/dbms/src/Columns/ColumnsCommon.cpp
+++ b/dbms/src/Columns/ColumnsCommon.cpp
@@ -291,7 +291,7 @@ void filterArraysImplGeneric(
 
     while (filt_pos < filt_end)
     {
-        if (*filt_pos)
+        if (*filt_pos)  // 如果是 0 的话，说名该列该行被 filter 了
             copy_array(offsets_pos);
 
         ++filt_pos;
diff --git a/dbms/src/Common/COWPtr.h b/dbms/src/Common/COWPtr.h
index 1f6bb8dacbb..b4d39620287 100644
--- a/dbms/src/Common/COWPtr.h
+++ b/dbms/src/Common/COWPtr.h
@@ -105,6 +105,7 @@ class COWPtr : public boost::intrusive_ref_counter<Derived>
         T && operator*() const && { return const_cast<typename std::remove_const<T>::type &&>(*boost::intrusive_ptr<T>::get()); }
     };
 
+    // 这个地方，COWPtr 继承 counter 之后就自带了 ref count 和 add, release 函数。所以私有类实力化到 T 之后 = IntrusivePtr<T>，里面调用的 add, release 函数就有了，其都是操作 T 继承的 ref count 来操作的
 protected:
     template <typename T>
     class mutable_ptr : public IntrusivePtr<T> // NOLINT(readability-identifier-naming)
diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h
index 2c857b9bc1b..3bc3ab5e56c 100644
--- a/dbms/src/Common/HashTable/HashTable.h
+++ b/dbms/src/Common/HashTable/HashTable.h
@@ -449,7 +449,7 @@ class HashTable : private boost::noncopyable
     {
         while (!buf[place_value].isZero(*this) && !buf[place_value].keyEquals(x, hash_value, *this))
         {
-            place_value = grower.next(place_value);
+            place_value = grower.next(place_value);  // closed hash，线性开放地址寻址法
 #ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
             ++collisions;
 #endif
@@ -694,7 +694,7 @@ class HashTable : private boost::noncopyable
           * HashMap completely, change all its users to the existing internal
           * iteration interface, and redefine end() to return LookupResult for
           * compatibility with std find(). Unfortunately, now is not the time to
-          * do this.
+          * do this.  // 隐式类型转换操作符
           */
         operator Cell *() const { return nullptr; } // NOLINT(google-explicit-constructor)
     };
diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h
index caf2f1cf300..bf0ec4a9f65 100644
--- a/dbms/src/Common/TiFlashMetrics.h
+++ b/dbms/src/Common/TiFlashMetrics.h
@@ -61,7 +61,8 @@ namespace DB
         F(type_limit, {"type", "limit"}), F(type_join, {"type", "join"}), F(type_exchange_sender, {"type", "exchange_sender"}),                     \
         F(type_exchange_receiver, {"type", "exchange_receiver"}), F(type_projection, {"type", "projection"}),                                       \
         F(type_partition_ts, {"type", "partition_table_scan"}),                                                                                     \
-        F(type_window, {"type", "window"}), F(type_window_sort, {"type", "window_sort"}))                                                           \
+        F(type_window, {"type", "window"}), F(type_window_sort, {"type", "window_sort"}),                                                           \
+        F(type_repeat_source, {"type", "repeat_source"}))                                                                                           \
     M(tiflash_coprocessor_request_duration_seconds, "Bucketed histogram of request duration", Histogram,                                            \
         F(type_cop, {{"type", "cop"}}, ExpBuckets{0.001, 2, 20}),                                                                                   \
         F(type_batch, {{"type", "batch"}}, ExpBuckets{0.001, 2, 20}),                                                                               \
diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h
index 0d337d6d3e2..3463c47c1bc 100644
--- a/dbms/src/Core/Block.h
+++ b/dbms/src/Core/Block.h
@@ -27,7 +27,7 @@
 
 namespace DB
 {
-/** Container for set of columns for bunch of rows in memory.
+/** Container for set of columns for bunch of rows in memory.  // 怎么区分这里 rows 的大小呢？
   * This is unit of data processing.
   * Also contains metadata - data types of columns and their names
   *  (either original names from a table, or generated names during temporary calculations).
@@ -39,6 +39,7 @@ class Context;
 class Block
 {
 private:
+    // 多列的一个数据
     using Container = ColumnsWithTypeAndName;
     using IndexByName = std::map<String, size_t>;
 
diff --git a/dbms/src/Core/ColumnWithTypeAndName.h b/dbms/src/Core/ColumnWithTypeAndName.h
index 42a98f795fd..30c4fe8c546 100644
--- a/dbms/src/Core/ColumnWithTypeAndName.h
+++ b/dbms/src/Core/ColumnWithTypeAndName.h
@@ -32,6 +32,8 @@ class WriteBuffer;
 
 struct ColumnWithTypeAndName
 {
+    // column 继承子 intrusive 实现 share ptr 功能，同归继承类的两个实现，mutable ptr 和 immutable ptr 可以相互转化
+    // columnPtr 是一个基类指针
     ColumnPtr column;
     DataTypePtr type;
     String name;
diff --git a/dbms/src/Core/ColumnsWithTypeAndName.h b/dbms/src/Core/ColumnsWithTypeAndName.h
index 61c77cf161e..e7741bbb71e 100644
--- a/dbms/src/Core/ColumnsWithTypeAndName.h
+++ b/dbms/src/Core/ColumnsWithTypeAndName.h
@@ -21,6 +21,7 @@
 
 namespace DB
 {
+// 这里是一个多列组合的数据
 using ColumnsWithTypeAndName = std::vector<ColumnWithTypeAndName>;
 
 }
diff --git a/dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp b/dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp
new file mode 100644
index 00000000000..f5075f9c87d
--- /dev/null
+++ b/dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp
@@ -0,0 +1,34 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <DataStreams//RepeatSourceBlockInputStream.h>
+namespace DB
+{
+Block RepeatSourceBlockInputStream::readImpl()
+{
+    Block block = children.back()->read();
+    if (!block)
+        return block;
+    repeat_source_actions->execute(block);
+    return block;
+}
+
+Block RepeatSourceBlockInputStream::getHeader() const
+{
+    Block res = children.back()->getHeader();
+    repeat_source_actions->execute(res);
+    return res;
+}
+
+} // namespace DB
diff --git a/dbms/src/DataStreams/RepeatSourceBlockInputStream.h b/dbms/src/DataStreams/RepeatSourceBlockInputStream.h
new file mode 100644
index 00000000000..eaa223ef824
--- /dev/null
+++ b/dbms/src/DataStreams/RepeatSourceBlockInputStream.h
@@ -0,0 +1,45 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include <DataStreams/IProfilingBlockInputStream.h>
+#include <Interpreters/ExpressionAnalyzer.h>
+#include <Interpreters/ExpressionActions.h>
+
+namespace DB
+{
+class RepeatSourceBlockInputStream : public IProfilingBlockInputStream
+{
+    static constexpr auto NAME = "RepeatSource";
+
+public:
+    RepeatSourceBlockInputStream(
+        const BlockInputStreamPtr & input,
+        ExpressionActionsPtr repeat_source_actions_)
+        : repeat_source_actions(repeat_source_actions_)
+    {
+        children.push_back(input);
+    }
+    String getName() const override { return NAME; }
+    Block getHeader() const override;
+
+protected:
+    Block readImpl() override;
+
+private:
+    ExpressionActionsPtr repeat_source_actions;
+};
+
+} // namespace DB
+
diff --git a/dbms/src/DataStreams/SquashingTransform.cpp b/dbms/src/DataStreams/SquashingTransform.cpp
index d018deaed96..1488b688d27 100644
--- a/dbms/src/DataStreams/SquashingTransform.cpp
+++ b/dbms/src/DataStreams/SquashingTransform.cpp
@@ -60,7 +60,7 @@ SquashingTransform::Result SquashingTransform::add(Block && block)
         return Result(std::move(block));
     }
 
-    append(std::move(block));
+    append(std::move(block));   // 攒批
 
     accumulated_block_rows = accumulated_block.rows();
     accumulated_block_bytes = accumulated_block.bytes();
@@ -93,10 +93,15 @@ void SquashingTransform::append(Block && block)
     {
         MutableColumnPtr mutable_column = (*std::move(accumulated_block.getByPosition(i).column)).mutate();
         mutable_column->insertRangeFrom(*block.getByPosition(i).column, 0, rows);
-        accumulated_block.getByPosition(i).column = std::move(mutable_column);
+        accumulated_block.getByPosition(i).column = std::move(mutable_column);   // column 中的 append 值操作
     }
 }
 
+// 我们可能需要用一个高效的复制行操作，repeatSource 算子首先是 append additional column，然后对于原来的 block 的数据进行
+// 多重 n 复制，每重复制上，修改 block 中特定非 target 列的其他 grouping set column 为 null 值，并且设置 grouping ID
+// 列为常量 n.
+//
+// sample_block
 
 bool SquashingTransform::isEnoughSize(size_t rows, size_t bytes) const
 {
diff --git a/dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp b/dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp
new file mode 100644
index 00000000000..7633c347282
--- /dev/null
+++ b/dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp
@@ -0,0 +1,67 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <Debug/MockExecutor/RepeatSourceBinder.h>
+
+namespace DB::mock
+{
+
+bool RepeatSourceBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context)
+{
+    tipb_executor->set_tp(tipb::ExecType::TypeRepeatSource);
+    tipb_executor->set_executor_id(name);
+    tipb::RepeatSource * repeat_source = tipb_executor->mutable_repeat_source();
+    for (const auto & grouping_set : grouping_sets_columns)
+    {
+        auto * gss = repeat_source->add_grouping_sets();
+        for (const auto & grouping_exprs : grouping_set)
+        {
+            auto * ges = gss->add_grouping_exprs();
+            for (const auto & grouping_col : grouping_exprs)
+            {
+                tipb::Expr* add_column = ges->add_grouping_expr();
+                astToPB(children[0]->output_schema, grouping_col, add_column, collator_id, context);    // ast column ref change to tipb:Expr column ref
+            }
+        }
+    }
+    auto * children_executor = repeat_source->mutable_child();
+    return children[0]->toTiPBExecutor(children_executor, collator_id, mpp_info, context);
+}
+
+ExecutorBinderPtr compileRepeat(ExecutorBinderPtr input, size_t & executor_index, MockVVecGroupingNameVec grouping_set_columns, std::set<String> in_set)
+{
+    DAGSchema output_schema;
+    for (const auto & field : input->output_schema)
+    {
+        // if the column is in the grouping sets, make it nullable.
+        if (in_set.find(field.first) != in_set.end() && field.second.hasNotNullFlag())
+            output_schema.push_back(toNullableDAGColumnInfo(field));
+        else
+            output_schema.push_back(field);
+    }
+    {
+        tipb::FieldType field_type{};
+        field_type.set_tp(TiDB::TypeLongLong);
+        field_type.set_charset("binary");
+        field_type.set_collate(TiDB::ITiDBCollator::BINARY);
+        field_type.set_flag(0);
+        field_type.set_flen(-1);
+        field_type.set_decimal(-1);
+        output_schema.push_back(std::make_pair("groupingID", TiDB::fieldTypeToColumnInfo(field_type)));
+    }
+    ExecutorBinderPtr repeat_source = std::make_shared<RepeatSourceBinder>(executor_index, output_schema, std::move(grouping_set_columns));
+    repeat_source->children.push_back(input);
+    return repeat_source;
+}
+} // namespace DB::mock
\ No newline at end of file
diff --git a/dbms/src/Debug/MockExecutor/RepeatSourceBinder.h b/dbms/src/Debug/MockExecutor/RepeatSourceBinder.h
new file mode 100644
index 00000000000..473393221cf
--- /dev/null
+++ b/dbms/src/Debug/MockExecutor/RepeatSourceBinder.h
@@ -0,0 +1,42 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <Debug/MockExecutor/ExecutorBinder.h>
+
+
+namespace DB::mock
+{
+using MockGroupingNameVec = std::vector<ASTPtr>;
+using MockVecGroupingNameVec = std::vector<MockGroupingNameVec>;
+using MockVVecGroupingNameVec = std::vector<MockVecGroupingNameVec>;
+
+class RepeatSourceBinder : public ExecutorBinder
+{
+public:
+    RepeatSourceBinder(size_t & index_, const DAGSchema & output_schema_, MockVVecGroupingNameVec gss)
+        : ExecutorBinder(index_, "repeat_source" + std::to_string(index_), output_schema_)
+        , grouping_sets_columns(gss)
+    {}
+
+    bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override;
+
+    void columnPrune(std::unordered_set<String> &) override { throw Exception("Should not reach here"); }
+
+private:
+    // for now, every grouping set is base columns list, modify structure to be one more nested if grouping set merge is enabled.
+    MockVVecGroupingNameVec grouping_sets_columns;
+};
+
+ExecutorBinderPtr compileRepeat(ExecutorBinderPtr input, size_t & executor_index, MockVVecGroupingNameVec grouping_set_columns, std::set<String> set);
+} // namespace DB::mock
diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
index ec0728bbf58..01315929ff3 100644
--- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
@@ -35,6 +35,7 @@
 #include <Interpreters/Set.h>
 #include <Interpreters/Settings.h>
 #include <Interpreters/convertFieldToType.h>
+#include <Interpreters/Repeat.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Storages/Transaction/TypeMapping.h>
 #include <WindowFunctions/WindowFunctionFactory.h>
@@ -668,6 +669,7 @@ String DAGExpressionAnalyzer::applyFunction(
     const TiDB::TiDBCollatorPtr & collator)
 {
     String result_name = genFuncString(func_name, arg_names, {collator});
+    // 啊这个好！可以避免相同表达式的重复计算
     if (actions->getSampleBlock().has(result_name))
         return result_name;
     const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get(func_name, context);
@@ -804,6 +806,55 @@ NamesAndTypes DAGExpressionAnalyzer::buildOrderColumns(
     return order_columns;
 }
 
+std::shared_ptr<Repeat> DAGExpressionAnalyzer::buildRepeatGroupingColumns(
+    const tipb::RepeatSource & repeatSource, const ExpressionActionsPtr & actions)
+{
+    GroupingSets group_sets_columns;
+    group_sets_columns.reserve(repeatSource.grouping_sets().size());
+    for (const auto& group_set : repeatSource.grouping_sets()){
+        GroupingSet group_set_columns;
+        group_set_columns.reserve(group_set.grouping_exprs().size());
+        for (const auto &group_exprs : group_set.grouping_exprs()) {
+            GroupingColumnNames group_exprs_columns;
+            group_exprs_columns.reserve(group_exprs.grouping_expr().size());
+            for (const auto& group_expr : group_exprs.grouping_expr()){
+                if (group_expr.tp() != tipb::ColumnRef){
+                    throw TiFlashException("grouping sets expression should be column expr", Errors::Coprocessor::BadRequest);
+                }
+                String cp_name = getActions(group_expr, actions);
+                // tidb expression computation is based on column index offset child's chunk schema, change to ck block column name here.
+                group_exprs_columns.emplace_back(cp_name);
+            }
+            // move here, cause basic string is copied from input cols.
+            group_set_columns.emplace_back(std::move(group_exprs_columns));
+        }
+        group_sets_columns.emplace_back(std::move(group_set_columns));
+    }
+    return Repeat::sharedRepeat(group_sets_columns);
+}
+
+ExpressionActionsPtr DAGExpressionAnalyzer::appendRepeatSource(
+        const tipb::RepeatSource & repeatSource, ExpressionActionsChain & chain)
+{
+    auto & last_step = initAndGetLastStep(chain);
+    auto shared_repeat = buildRepeatGroupingColumns(repeatSource, last_step.actions);
+    last_step.actions->add(ExpressionAction::repeatSource(shared_repeat));
+    for (const auto &origin_col : last_step.actions->getSampleBlock().getNamesAndTypesList())
+    {
+        last_step.required_output.push_back(origin_col.name);
+    }
+    // an added column from REPEAT action.
+    source_columns.emplace_back(Repeat::grouping_identifier_column_name, Repeat::grouping_identifier_column_type);
+    auto before_repeat_source = chain.getLastActions();
+    chain.finalize();
+    chain.clear();
+
+    auto & after_repeat_step = initAndGetLastStep(chain);
+    for (const auto & column : getCurrentInputColumns())
+        after_repeat_step.required_output.push_back(column.name);
+    return before_repeat_source;
+}
+
 std::vector<NameAndTypePair> DAGExpressionAnalyzer::appendOrderBy(
     ExpressionActionsChain & chain,
     const tipb::TopN & topN)
@@ -924,7 +975,7 @@ std::pair<bool, Names> DAGExpressionAnalyzer::buildJoinKey(
     for (int i = 0; i < keys.size(); ++i)
     {
         const auto & key = keys.at(i);
-        bool has_actions = key.tp() != tipb::ExprType::ColumnRef;
+        bool has_actions = key.tp() != tipb::ExprType::ColumnRef; // join key 如果不是 column ref 说明是有前序动作帮我把表达式给准备成列
 
         String key_name = getActions(key, actions);
         DataTypePtr current_type = actions->getSampleBlock().getByName(key_name).type;
@@ -989,6 +1040,7 @@ bool DAGExpressionAnalyzer::appendJoinKeyAndJoinFilters(
     ExpressionActionsPtr actions = chain.getLastActions();
 
     bool ret = false;
+    // build join keys，ck 只输出一个 key，需要 copy 一份，如果是表达式，还需要 append scalar 的 action
     std::tie(ret, key_names) = buildJoinKey(actions, keys, join_key_types, left, is_right_out_join);
 
     if (!filters.empty())
@@ -997,7 +1049,7 @@ bool DAGExpressionAnalyzer::appendJoinKeyAndJoinFilters(
         std::vector<const tipb::Expr *> filter_vector;
         for (const auto & c : filters)
             filter_vector.push_back(&c);
-        filter_column_name = appendWhere(chain, filter_vector);
+        filter_column_name = appendWhere(chain, filter_vector);  // 构建了 filter 输出的列
     }
     /// remove useless columns to avoid duplicate columns
     /// as when compiling the key/filter expression, the origin
@@ -1017,18 +1069,18 @@ bool DAGExpressionAnalyzer::appendJoinKeyAndJoinFilters(
     if (ret)
     {
         std::unordered_set<String> needed_columns;
-        for (const auto & c : getCurrentInputColumns())
+        for (const auto & c : getCurrentInputColumns()) // 当前进来的列都要
             needed_columns.insert(c.name);
-        for (const auto & s : key_names)
+        for (const auto & s : key_names)                // 当前怎加的 key col 也要
             needed_columns.insert(s);
-        if (!filter_column_name.empty())
+        if (!filter_column_name.empty())                // 当前添加的一侧 filter 的 col 也要
             needed_columns.insert(filter_column_name);
 
         const auto & names = actions->getSampleBlock().getNames();
         for (const auto & name : names)
         {
             if (needed_columns.find(name) == needed_columns.end())
-                actions->add(ExpressionAction::removeColumn(name));
+                actions->add(ExpressionAction::removeColumn(name)); // 增加后续的 action，裁剪掉不要一些 column 列 （这些 immediate 列的最后的结果已经被我 record 了）
         }
     }
     return ret;
@@ -1392,9 +1444,12 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, const Expressi
     if (isLiteralExpr(expr))
     {
         Field value = decodeLiteral(expr);
+        // 主要对 decimal
         DataTypePtr flash_type = applyVisitor(FieldToDataType(), value);
         DataTypePtr target_type = inferDataType4Literal(expr);
+        // 表达式的 uniuqe name
         ret = exprToString(expr, getCurrentInputColumns()) + "_" + target_type->getName();
+        // 表达式如果有这个名字，说明有这列
         if (!actions->getSampleBlock().has(ret))
         {
             ColumnWithTypeAndName column;
@@ -1415,10 +1470,12 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, const Expressi
     }
     else if (isColumnExpr(expr))
     {
+        // 如果是 column ref，直接从 stream input column 里面拿到 name
         ret = getColumnNameForColumnExpr(expr, getCurrentInputColumns());
     }
     else if (isScalarFunctionExpr(expr))
     {
+        // 根据 expr 构造 function 加入到 actions 里面
         ret = DAGExpressionAnalyzerHelper::buildFunction(this, expr, actions);
     }
     else
diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h
index 79b9880ae1a..f1012df4646 100644
--- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h
+++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h
@@ -71,6 +71,10 @@ class DAGExpressionAnalyzer : private boost::noncopyable
         ExpressionActionsChain & chain,
         const std::vector<const tipb::Expr *> & conditions);
 
+    std::shared_ptr<Repeat> buildRepeatGroupingColumns(const tipb::RepeatSource & repeatSource, const ExpressionActionsPtr & actions);
+
+    ExpressionActionsPtr appendRepeatSource(const tipb::RepeatSource & repeatSource, ExpressionActionsChain & chain);
+
     NamesAndTypes buildWindowOrderColumns(const tipb::Sort & window_sort) const;
 
     std::vector<NameAndTypePair> appendOrderBy(
diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp
index b45ade0f7d2..bc805d615c0 100644
--- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp
@@ -150,11 +150,12 @@ String DAGExpressionAnalyzerHelper::buildInFunction(
         DataTypePtr type = inferDataType4Literal(child);
         argument_types.push_back(type);
     }
+    //  find common type
     DataTypePtr resolved_type = getLeastSupertype(argument_types);
     if (!removeNullable(resolved_type)->equals(*removeNullable(argument_types[0])))
     {
         // Need cast left argument
-        key_name = analyzer->appendCast(resolved_type, actions, key_name);
+        key_name = analyzer->appendCast(resolved_type, actions, key_name); // 对于孩子的输出来说，需要 cast
     }
     analyzer->makeExplicitSet(expr, sample_block, false, key_name);
     argument_names.push_back(key_name);
@@ -401,6 +402,7 @@ String DAGExpressionAnalyzerHelper::buildRegexpFunction(
     return analyzer->applyFunction(func_name, argument_names, actions, collator);
 }
 
+// case when 函数应该走这里
 String DAGExpressionAnalyzerHelper::buildDefaultFunction(
     DAGExpressionAnalyzer * analyzer,
     const tipb::Expr & expr,
@@ -410,8 +412,9 @@ String DAGExpressionAnalyzerHelper::buildDefaultFunction(
     Names argument_names;
     for (const auto & child : expr.children())
     {
+        // 函数参数如果还是函数的，这里需要递归生成多个 actions（深度优先）
         String name = analyzer->getActions(child, actions);
-        argument_names.push_back(name);
+        argument_names.push_back(name);   // 拿到孩子的函数输出之后，再将其作为参数
     }
     return analyzer->applyFunction(func_name, argument_names, actions, getCollatorFromExpr(expr));
 }
diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp
index 206b59f38e1..9a4a353eeb5 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp
@@ -46,6 +46,7 @@ bool isSourceNode(const tipb::Executor * root)
 const static String SOURCE_NAME("source");
 const static String SEL_NAME("selection");
 const static String AGG_NAME("aggregation");
+const static String REPEAT_NAME("repeat_source");
 const static String WINDOW_NAME("window");
 const static String WINDOW_SORT_NAME("window_sort");
 const static String HAVING_NAME("having");
@@ -96,6 +97,12 @@ DAGQueryBlock::DAGQueryBlock(const tipb::Executor & root_, QueryBlockIDGenerator
             }
             current = &current->selection().child();
             break;
+        case tipb::ExecType::TypeRepeatSource:
+            GET_METRIC(tiflash_coprocessor_executor_count, type_repeat_source).Increment();
+            assignOrThrowException(&repeat_source, current, REPEAT_NAME);
+            repeat_source_name = current->executor_id();
+            current = &current->repeat_source().child();         // 非叶节点，继续孩子递归下去
+            break;
         case tipb::ExecType::TypeStreamAgg:
             RUNTIME_CHECK_MSG(current->aggregation().group_by_size() == 0, STREAM_AGG_ERROR);
         case tipb::ExecType::TypeAggregation:
@@ -134,6 +141,7 @@ DAGQueryBlock::DAGQueryBlock(const tipb::Executor & root_, QueryBlockIDGenerator
 
     assignOrThrowException(&source, current, SOURCE_NAME);
     source_name = current->executor_id();
+    // source 节点，
     if (current->tp() == tipb::ExecType::TypeJoin)
     {
         if (source->join().children_size() != 2)
diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlock.h b/dbms/src/Flash/Coprocessor/DAGQueryBlock.h
index 297a679d4e9..d18ac84fd90 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlock.h
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlock.h
@@ -60,12 +60,14 @@ class DAGQueryBlock
     String having_name;
     const tipb::Executor * limit_or_topn = nullptr;
     String limit_or_topn_name;
+    const tipb::Executor * repeat_source = nullptr;          // repeat source node can only be before sender
+    String repeat_source_name;
     const tipb::Executor * exchange_sender = nullptr;
     String exchange_sender_name;
     UInt32 id;
     const tipb::Executor * root;
     String qb_column_prefix;
-    std::vector<std::shared_ptr<DAGQueryBlock>> children;
+    std::vector<std::shared_ptr<DAGQueryBlock>> children;   // 这里的 children 是每个 dag 算子构造好之后传入的吗
     bool can_restore_pipeline_concurrency = true;
 
     bool isRootQueryBlock() const { return id == 1; };
diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
index e16b711c8f1..3e4cb641f97 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
@@ -44,6 +44,7 @@
 #include <Flash/Mpp/newMPPExchangeWriter.h>
 #include <Interpreters/Aggregator.h>
 #include <Interpreters/Join.h>
+#include <Interpreters/Repeat.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <Storages/Transaction/TMTContext.h>
 
@@ -75,6 +76,7 @@ struct AnalysisResult
     ExpressionActionsPtr before_having;
     ExpressionActionsPtr before_order_and_select;
     ExpressionActionsPtr final_projection;
+    ExpressionActionsPtr before_repeat_source;
 
     String filter_column_name;
     String having_column_name;
@@ -131,6 +133,11 @@ AnalysisResult analyzeExpressions(
     if (query_block.limit_or_topn && query_block.limit_or_topn->tp() == tipb::ExecType::TypeTopN)
     {
         res.order_columns = analyzer.appendOrderBy(chain, query_block.limit_or_topn->topn());
+        chain.addStep();
+    }
+
+    if (query_block.repeat_source) {
+        res.before_repeat_source = analyzer.appendRepeatSource(query_block.repeat_source->repeat_source(), chain);
     }
 
     const auto & dag_context = *context.getDAGContext();
@@ -237,6 +244,7 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline &
     bool is_tiflash_right_join = tiflash_join.isTiFlashRightJoin();
 
     // prepare probe side
+    // 准备 join 的 probe 端，主要是 append join key 和 filter expr 的 action 的加入
     auto [probe_side_prepare_actions, probe_key_names, probe_filter_column_name] = JoinInterpreterHelper::prepareJoin(
         context,
         probe_pipeline.firstStream()->getHeader(),
@@ -245,9 +253,10 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline &
         true,
         is_tiflash_right_join,
         tiflash_join.getProbeConditions());
-    RUNTIME_ASSERT(probe_side_prepare_actions, log, "probe_side_prepare_actions cannot be nullptr");
+    RUNTIME_ASSERT(probe_side_prepare_actions, log, "probe_side_prepare_actions cannot be nullptr"); // 有 init 之后至少都有一个
 
     // prepare build side
+    // 这里的调用函数是同上的一个入口，所以做的事情是如出一辙的
     auto [build_side_prepare_actions, build_key_names, build_filter_column_name] = JoinInterpreterHelper::prepareJoin(
         context,
         build_pipeline.firstStream()->getHeader(),
@@ -258,14 +267,15 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline &
         tiflash_join.getBuildConditions());
     RUNTIME_ASSERT(build_side_prepare_actions, log, "build_side_prepare_actions cannot be nullptr");
 
+    // 对 other condition 和 other eq condition 做了一些 where 的 col append
     auto [other_condition_expr, other_filter_column_name, other_eq_filter_from_in_column_name]
         = tiflash_join.genJoinOtherConditionAction(context, left_input_header, right_input_header, probe_side_prepare_actions);
 
     const Settings & settings = context.getSettingsRef();
-    size_t max_block_size_for_cross_join = settings.max_block_size;
+    size_t max_block_size_for_cross_join = settings.max_block_size;   // 如果 repeat 的结果数量超过 max 控制怎么办？
     fiu_do_on(FailPoints::minimum_block_size_for_cross_join, { max_block_size_for_cross_join = 1; });
 
-    JoinPtr join_ptr = std::make_shared<Join>(
+    JoinPtr join_ptr = std::make_shared<Join>(   // make join
         probe_key_names,
         build_key_names,
         tiflash_join.kind,
@@ -471,7 +481,7 @@ void DAGQueryBlockInterpreter::recordProfileStreams(DAGPipeline & pipeline, cons
 
 void DAGQueryBlockInterpreter::handleExchangeReceiver(DAGPipeline & pipeline)
 {
-    auto exchange_receiver = dagContext().getMPPExchangeReceiver(query_block.source_name);
+    auto exchange_receiver = dagContext().getMPPExchangeReceiver(query_block.source_name);   // 从注册的 exchanger 中拿到 source 源
     if (unlikely(exchange_receiver == nullptr))
         throw Exception("Can not find exchange receiver for " + query_block.source_name, ErrorCodes::LOGICAL_ERROR);
     // todo choose a more reasonable stream number
@@ -494,14 +504,14 @@ void DAGQueryBlockInterpreter::handleExchangeReceiver(DAGPipeline & pipeline)
                                                                                    /*stream_id=*/enable_fine_grained_shuffle ? i : 0);
         exchange_receiver_io_input_streams.push_back(stream);
         stream->setExtraInfo(extra_info);
-        pipeline.streams.push_back(stream);
+        pipeline.streams.push_back(stream);                      // 每个 pipeline 底层的输入流
     }
     NamesAndTypes source_columns;
     for (const auto & col : pipeline.firstStream()->getHeader())
     {
         source_columns.emplace_back(col.name, col.type);
     }
-    analyzer = std::make_unique<DAGExpressionAnalyzer>(std::move(source_columns), context);
+    analyzer = std::make_unique<DAGExpressionAnalyzer>(std::move(source_columns), context);   // 这里初始化了 analyzer
 }
 
 // for tests, we need to mock ExchangeReceiver blockInputStream as the source stream.
@@ -517,7 +527,7 @@ void DAGQueryBlockInterpreter::handleProjection(DAGPipeline & pipeline, const ti
 {
     NamesAndTypes input_columns;
     pipeline.streams = input_streams_vec[0];
-    for (auto const & p : pipeline.firstStream()->getHeader().getNamesAndTypesList())
+    for (auto const & p : pipeline.firstStream()->getHeader().getNamesAndTypesList()) // 初始的的 block column name
         input_columns.emplace_back(p.name, p.type);
     DAGExpressionAnalyzer dag_analyzer(std::move(input_columns), context);
     ExpressionActionsChain chain;
@@ -527,12 +537,12 @@ void DAGQueryBlockInterpreter::handleProjection(DAGPipeline & pipeline, const ti
     UniqueNameGenerator unique_name_generator;
     for (const auto & expr : projection.exprs())
     {
-        auto expr_name = dag_analyzer.getActions(expr, last_step.actions);
-        last_step.required_output.emplace_back(expr_name);
+        auto expr_name = dag_analyzer.getActions(expr, last_step.actions);     // 添加 expr 产生的额外列
+        last_step.required_output.emplace_back(expr_name);                     // 加到这个 step 的最后输出列里面
         const auto & col = last_step.actions->getSampleBlock().getByName(expr_name);
         String alias = unique_name_generator.toUniqueName(col.name);
         output_columns.emplace_back(alias, col.type);
-        project_cols.emplace_back(col.name, alias);
+        project_cols.emplace_back(col.name, alias);                    // 我只要保证当前 projection 输出列中不含有重复的列名就行了
     }
     executeExpression(pipeline, chain.getLastActions(), log, "before projection");
     executeProject(pipeline, project_cols, "projection");
@@ -570,18 +580,18 @@ void DAGQueryBlockInterpreter::handleWindowOrder(DAGPipeline & pipeline, const t
 }
 
 // To execute a query block, you have to:
-// 1. generate the date stream and push it to pipeline.
+// 1. generate the data stream and push it to pipeline.
 // 2. assign the analyzer
 // 3. construct a final projection, even if it's not necessary. just construct it.
 // Talking about projection, it has the following rules.
 // 1. if the query block does not contain agg, then the final project is the same as the source Executor
-// 2. if the query block contains agg, then the final project is the same as agg Executor
+// 2. if the query block contains agg/repeat, then the final project is the same as agg/repeat Executor
 // 3. if the cop task may contains more then 1 query block, and the current query block is not the root
 //    query block, then the project should add an alias for each column that needs to be projected, something
 //    like final_project.emplace_back(col.name, query_block.qb_column_prefix + col.name);
 void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline)
 {
-    if (query_block.source->tp() == tipb::ExecType::TypeJoin)
+    if (query_block.source->tp() == tipb::ExecType::TypeJoin)   // 看底层的 source 算子来源是什么
     {
         SubqueryForSet right_query;
         handleJoin(query_block.source->join(), pipeline, right_query, query_block.source->fine_grained_shuffle_stream_count());
@@ -632,6 +642,7 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline)
             Errors::Coprocessor::BadRequest);
     }
 
+    // analyzer 是这里用的, analyzer 先拿到最基础的 source column base，然后在来分析 query block 非叶节点的上层各个算子
     auto res = analyzeExpressions(
         context,
         *analyzer,
@@ -676,14 +687,25 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline)
         recordProfileStreams(pipeline, query_block.limit_or_topn_name);
     }
 
-    // execute final project action
-    executeProject(pipeline, final_project, "final projection");
     // execute limit
     if (query_block.limit_or_topn && query_block.limit_or_topn->tp() == tipb::TypeLimit)
     {
         executeLimit(pipeline);
         recordProfileStreams(pipeline, query_block.limit_or_topn_name);
     }
+
+    // execute the repeat source OP after all filter/limits and so on.
+    // since repeat source OP has some row replication work to do, place it after limit can reduce some unnecessary burden.
+    // and put it before the final projection, because we should recognize some base col as grouping set col before change their alias.
+    if (res.before_repeat_source)
+    {
+        executeRepeatSource(pipeline, res.before_repeat_source);
+        recordProfileStreams(pipeline, query_block.repeat_source_name);
+    }
+
+    // execute final project action
+    executeProject(pipeline, final_project, "final projection");
+
     restorePipelineConcurrency(pipeline);
 
     // execute exchange_sender
@@ -724,6 +746,13 @@ void DAGQueryBlockInterpreter::executeLimit(DAGPipeline & pipeline)
     }
 }
 
+void DAGQueryBlockInterpreter::executeRepeatSource(DAGPipeline & pipeline, const ExpressionActionsPtr & expr)
+{
+    pipeline.transform([&](auto &stream) {
+        stream = std::make_shared<RepeatSourceBlockInputStream>(stream, expr);
+    });
+}
+
 void DAGQueryBlockInterpreter::handleExchangeSender(DAGPipeline & pipeline)
 {
     RUNTIME_ASSERT(dagContext().isMPPTask() && dagContext().tunnel_set != nullptr, log, "exchange_sender only run in MPP");
diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h
index d2657b5c67a..8b4746bb6a2 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h
@@ -69,6 +69,7 @@ class DAGQueryBlockInterpreter
     void executeWindowOrder(DAGPipeline & pipeline, SortDescription sort_desc, bool enable_fine_grained_shuffle);
     void executeOrder(DAGPipeline & pipeline, const NamesAndTypes & order_columns);
     void executeLimit(DAGPipeline & pipeline);
+    void executeRepeatSource(DAGPipeline & pipeline, const ExpressionActionsPtr & expr);
     void executeWindow(
         DAGPipeline & pipeline,
         WindowDescription & window_description,
diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp
index 83563c47338..c74b52ed77d 100755
--- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp
@@ -1131,7 +1131,7 @@ Field decodeLiteral(const tipb::Expr & expr)
     }
 }
 
-String getColumnNameForColumnExpr(const tipb::Expr & expr, const std::vector<NameAndTypePair> & input_col)
+String getColumnNameForColumnExpr(const tipb::Expr & expr, const std::vector<NameAndTypePair> & input_col) // 这个是将 expr 中的 index 顺序转成 vector 向量中的 column name
 {
     auto column_index = decodeDAGInt64(expr.val());
     if (column_index < 0 || column_index >= static_cast<Int64>(input_col.size()))
diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp
index 61249f19642..113602a1d82 100644
--- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp
+++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp
@@ -55,7 +55,7 @@ BlockInputStreams InterpreterDAG::executeQueryBlock(DAGQueryBlock & query_block)
     }
     DAGQueryBlockInterpreter query_block_interpreter(
         context,
-        input_streams_vec,
+        input_streams_vec,   // 底层 DAG 的输入源
         query_block,
         max_streams);
     return query_block_interpreter.execute();
diff --git a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp
index d2e18a36e00..1e2b102d0c6 100644
--- a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp
+++ b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp
@@ -89,7 +89,7 @@ void executeExpression(
 {
     if (expr_actions && !expr_actions->getActions().empty())
     {
-        pipeline.transform([&](auto & stream) {
+        pipeline.transform([&](auto & stream) {   // 数据流变了，用 ExpressionBlockInputStream 包了一下
             stream = std::make_shared<ExpressionBlockInputStream>(stream, expr_actions, log->identifier());
             stream->setExtraInfo(extra_info);
         });
diff --git a/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp b/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp
index 275042fddb0..6c876078d10 100644
--- a/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp
+++ b/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp
@@ -179,7 +179,7 @@ std::tuple<ExpressionActionsPtr, String, String> doGenJoinOtherConditionAction(
     if (join.other_conditions_size() == 0 && join.other_eq_conditions_from_in_size() == 0)
         return {nullptr, "", ""};
 
-    DAGExpressionAnalyzer dag_analyzer(source_columns, context);
+    DAGExpressionAnalyzer dag_analyzer(source_columns, context);  // 新开了一个 dag analyzer
     ExpressionActionsChain chain;
 
     String filter_column_for_other_condition;
@@ -190,7 +190,7 @@ std::tuple<ExpressionActionsPtr, String, String> doGenJoinOtherConditionAction(
         {
             condition_vector.push_back(&c);
         }
-        filter_column_for_other_condition = dag_analyzer.appendWhere(chain, condition_vector);
+        filter_column_for_other_condition = dag_analyzer.appendWhere(chain, condition_vector); // other filter 不会对已经有点 schema 造成影响
     }
 
     String filter_column_for_other_eq_condition;
@@ -201,7 +201,7 @@ std::tuple<ExpressionActionsPtr, String, String> doGenJoinOtherConditionAction(
         {
             condition_vector.push_back(&c);
         }
-        filter_column_for_other_eq_condition = dag_analyzer.appendWhere(chain, condition_vector);
+        filter_column_for_other_eq_condition = dag_analyzer.appendWhere(chain, condition_vector);  // other eq filter 不会对已经有点 schema 造成影响
     }
 
     return {chain.getLastActions(), std::move(filter_column_for_other_condition), std::move(filter_column_for_other_eq_condition)};
@@ -230,7 +230,7 @@ String TiFlashJoin::genMatchHelperName(const Block & header1, const Block & head
     {
         match_helper_name = fmt::format("{}{}", Join::match_helper_prefix, ++i);
     }
-    return match_helper_name;
+    return match_helper_name; //一个 unique name
 }
 
 NamesAndTypes TiFlashJoin::genColumnsForOtherJoinFilter(
@@ -248,7 +248,8 @@ NamesAndTypes TiFlashJoin::genColumnsForOtherJoinFilter(
         }
         return true;
     };
-    if (unlikely(!is_prepare_actions_valid(build_side_index == 1 ? left_input_header : right_input_header, probe_prepare_join_actions)))
+    // assert 一下 probe side original block 都能在 probe actions 中找到
+    if (unlikely(!is_prepare_actions_valid(build_side_index == 1 ? left_input_header : right_input_header, probe_prepare_join_actions))) // 传参数也能三元运算吗
     {
         throw TiFlashException("probe_prepare_join_actions isn't valid", Errors::Coprocessor::Internal);
     }
@@ -295,9 +296,9 @@ NamesAndTypes TiFlashJoin::genColumnsForOtherJoinFilter(
     bool make_nullable = build_side_index == 1
         ? join.join_type() == tipb::JoinType::TypeRightOuterJoin
         : join.join_type() == tipb::JoinType::TypeLeftOuterJoin;
-    append_new_columns(probe_prepare_join_actions->getSampleBlock(), make_nullable);
+    append_new_columns(probe_prepare_join_actions->getSampleBlock(), make_nullable);  // probe side 产生的新 column 需要 append
 
-    return columns_for_other_join_filter;
+    return columns_for_other_join_filter; // 需要根据 probe 侧函数的 1-0 来顺势填 null，但是如果势 build 侧的函数 1-0 直接会被过滤护着忽略
 }
 
 NamesAndTypes TiFlashJoin::genJoinOutputColumns(
@@ -334,13 +335,14 @@ std::tuple<ExpressionActionsPtr, String, String> TiFlashJoin::genJoinOtherCondit
     const Block & right_input_header,
     const ExpressionActionsPtr & probe_side_prepare_join) const
 {
+    // append 左右的 original col 和 probe side 生成的 col
     auto columns_for_other_join_filter
         = genColumnsForOtherJoinFilter(
             left_input_header,
             right_input_header,
             probe_side_prepare_join);
 
-    return doGenJoinOtherConditionAction(context, join, columns_for_other_join_filter);
+    return doGenJoinOtherConditionAction(context, join, columns_for_other_join_filter); // 再根据 other condition 生成新 action （列）
 }
 
 std::tuple<ExpressionActionsPtr, Names, String> prepareJoin(
@@ -359,6 +361,7 @@ std::tuple<ExpressionActionsPtr, Names, String> prepareJoin(
     ExpressionActionsChain chain;
     Names key_names;
     String filter_column_name;
+    // 名副其实，append join key and 一侧的 join filter
     dag_analyzer.appendJoinKeyAndJoinFilters(chain, keys, join_key_types, key_names, left, is_right_out_join, filters, filter_column_name);
     return {chain.getLastActions(), std::move(key_names), std::move(filter_column_name)};
 }
diff --git a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp
index 86a5edc7406..d2536255a76 100644
--- a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp
+++ b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp
@@ -99,6 +99,35 @@ bool collectForTableScan(std::vector<tipb::FieldType> & output_field_types, cons
     return false;
 }
 
+bool collectForRepeat(std::vector<tipb::FieldType> &out_field_types, const tipb::Executor & executor)
+{
+
+    auto &out_child_fields = out_field_types;
+    // collect output_field_types of children
+    getChildren(executor).forEach([&out_child_fields](const tipb::Executor & child) {
+        traverseExecutorTree(child, [&out_child_fields](const tipb::Executor & e) { return collectForExecutor(out_child_fields, e); });
+    });
+
+//    executor.repeat_source().grouping_sets().Get(1).grouping_exprs().Get(1).grouping_expr().Get(1).
+//    /// the type of grouping set column is always nullable
+//    auto updated_field_type = field_type;
+//    updated_field_type.set_flag(updated_field_type.flag() & (~static_cast<UInt32>(TiDB::ColumnFlagNotNull)));
+//    output_field_types.push_back(updated_field_type);
+
+    {
+        // for additional groupingID column.
+        tipb::FieldType field_type{};
+        field_type.set_tp(TiDB::TypeLongLong);
+        field_type.set_charset("binary");
+        field_type.set_collate(TiDB::ITiDBCollator::BINARY);
+        field_type.set_flag(0);
+        field_type.set_flen(-1);
+        field_type.set_decimal(-1);
+        out_field_types.push_back(field_type);
+    }
+    return false;
+}
+
 bool collectForJoin(std::vector<tipb::FieldType> & output_field_types, const tipb::Executor & executor)
 {
     // collect output_field_types of children
@@ -190,6 +219,8 @@ bool collectForExecutor(std::vector<tipb::FieldType> & output_field_types, const
         return collectForTableScan(output_field_types, executor.partition_table_scan());
     case tipb::ExecType::TypeJoin:
         return collectForJoin(output_field_types, executor);
+    case tipb::ExecType::TypeRepeatSource:
+        return collectForRepeat(output_field_types, executor);
     default:
         return true;
     }
diff --git a/dbms/src/Flash/Mpp/MPPHandler.cpp b/dbms/src/Flash/Mpp/MPPHandler.cpp
index 753653ac7b0..14f1d6e5a05 100644
--- a/dbms/src/Flash/Mpp/MPPHandler.cpp
+++ b/dbms/src/Flash/Mpp/MPPHandler.cpp
@@ -82,6 +82,7 @@ grpc::Status MPPHandler::execute(const ContextPtr & context, mpp::DispatchTaskRe
     {
         Stopwatch stopwatch;
         task = MPPTask::newTask(task_request.meta(), context);
+
         task->prepare(task_request);
 
         addRetryRegion(context, response);
diff --git a/dbms/src/Flash/Mpp/MPPTask.cpp b/dbms/src/Flash/Mpp/MPPTask.cpp
index 4f97a94afd7..c2d5b4ccc94 100644
--- a/dbms/src/Flash/Mpp/MPPTask.cpp
+++ b/dbms/src/Flash/Mpp/MPPTask.cpp
@@ -149,6 +149,7 @@ void MPPTask::finishWrite()
 
 void MPPTask::run()
 {
+    // 用线程池 schedule 任务并 detach
     newThreadManager()->scheduleThenDetach(true, "MPPTask", [self = shared_from_this()] { self->runImpl(); });
 }
 
@@ -213,6 +214,7 @@ void MPPTask::initExchangeReceivers()
             if (status != RUNNING)
                 throw Exception("exchange receiver map can not be initialized, because the task is not in running state");
 
+            // 因为是 push mode，收到 data 之后我再动
             receiver_set_local->addExchangeReceiver(executor_id, exchange_receiver);
         }
         return true;
@@ -340,6 +342,7 @@ void MPPTask::prepare(const mpp::DispatchTaskRequest & task_request)
 void MPPTask::preprocess()
 {
     auto start_time = Clock::now();
+    // 注册一些 receiver
     initExchangeReceivers();
     LOG_DEBUG(log, "init exchange receiver done");
     query_executor_holder.set(queryExecute(*context));
@@ -388,6 +391,7 @@ void MPPTask::runImpl()
         schedule_entry.setNeededThreads(estimateCountOfNewThreads());
         LOG_DEBUG(log, "Estimate new thread count of query: {} including tunnel_threads: {}, receiver_threads: {}", schedule_entry.getNeededThreads(), dag_context->tunnel_set->getExternalThreadCnt(), new_thread_count_of_mpp_receiver);
 
+        // 类似 golang 等 channel 的过程
         scheduleOrWait();
 
         LOG_INFO(log, "task starts running");
diff --git a/dbms/src/Flash/Mpp/MPPTaskScheduleEntry.h b/dbms/src/Flash/Mpp/MPPTaskScheduleEntry.h
index 60ccb9297c0..bc5522dfdfe 100644
--- a/dbms/src/Flash/Mpp/MPPTaskScheduleEntry.h
+++ b/dbms/src/Flash/Mpp/MPPTaskScheduleEntry.h
@@ -52,6 +52,7 @@ class MPPTaskScheduleEntry
     int needed_threads;
 
     std::mutex schedule_mu;
+    // 条件变量
     std::condition_variable schedule_cv;
     ScheduleState schedule_state;
     const LoggerPtr log;
diff --git a/dbms/src/Flash/Mpp/MPPTunnelSet.cpp b/dbms/src/Flash/Mpp/MPPTunnelSet.cpp
index a308a9717a3..a3ba44127e3 100644
--- a/dbms/src/Flash/Mpp/MPPTunnelSet.cpp
+++ b/dbms/src/Flash/Mpp/MPPTunnelSet.cpp
@@ -244,9 +244,11 @@ void MPPTunnelSetBase<Tunnel>::fineGrainedShuffleWrite(
 template <typename Tunnel>
 void MPPTunnelSetBase<Tunnel>::registerTunnel(const MPPTaskId & receiver_task_id, const TunnelPtr & tunnel)
 {
+    // tunnel 注册在 map 里面
     if (receiver_task_id_to_index_map.find(receiver_task_id) != receiver_task_id_to_index_map.end())
         throw Exception(fmt::format("the tunnel {} has been registered", tunnel->id()));
 
+    // tunnel 就是个 vector
     receiver_task_id_to_index_map[receiver_task_id] = tunnels.size();
     tunnels.push_back(tunnel);
     if (!tunnel->isLocal() && !tunnel->isAsync())
diff --git a/dbms/src/Flash/Statistics/traverseExecutors.cpp b/dbms/src/Flash/Statistics/traverseExecutors.cpp
index dd720920dcd..801002a10a8 100644
--- a/dbms/src/Flash/Statistics/traverseExecutors.cpp
+++ b/dbms/src/Flash/Statistics/traverseExecutors.cpp
@@ -41,6 +41,8 @@ Children getChildren(const tipb::Executor & executor)
         return Children{&executor.topn().child()};
     case tipb::ExecType::TypeLimit:
         return Children{&executor.limit().child()};
+    case tipb::ExecType::TypeRepeatSource:
+        return Children{&executor.repeat_source().child()};
     case tipb::ExecType::TypeProjection:
         return Children{&executor.projection().child()};
     case tipb::ExecType::TypeExchangeSender:
diff --git a/dbms/src/Flash/tests/gtest_filter_executor.cpp b/dbms/src/Flash/tests/gtest_filter_executor.cpp
index 3a3b5b16b2c..c577772e46c 100644
--- a/dbms/src/Flash/tests/gtest_filter_executor.cpp
+++ b/dbms/src/Flash/tests/gtest_filter_executor.cpp
@@ -208,6 +208,15 @@ try
         request,
         {toNullableVec<String>({"banana"}),
          toNullableVec<String>({"banana"})});
+
+    request = context
+                  .scan("test_db", "test_table")
+                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .build(context);
+    executeAndAssertColumnsEqual(
+        request,
+        {toNullableVec<String>({"banana"}),
+         toNullableVec<String>({"banana"})});
 }
 CATCH
 
diff --git a/dbms/src/Flash/tests/gtest_interpreter.cpp b/dbms/src/Flash/tests/gtest_interpreter.cpp
index e129c5587a5..b5a2cd80b16 100644
--- a/dbms/src/Flash/tests/gtest_interpreter.cpp
+++ b/dbms/src/Flash/tests/gtest_interpreter.cpp
@@ -50,6 +50,13 @@ class InterpreterExecuteTest : public DB::tests::InterpreterTestUtils
 TEST_F(InterpreterExecuteTest, SingleQueryBlock)
 try
 {
+
+    //auto grouping_sets = MockVecColumnNameVec{MockColumnNameVec{"s1"}, MockColumnNameVec{"s2"}};
+    //    auto request = context.scan("test_db", "test_table_1").repeat(grouping_sets).build(context);
+    //    {
+    //        ASSERT_BLOCKINPUTSTREAM_EQAUL("", request, 10);
+    //    }
+
     auto request = context.scan("test_db", "test_table_1")
                        .filter(eq(col("s2"), col("s3")))
                        .aggregation({Max(col("s1"))}, {col("s2"), col("s3")})
diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp
index 46547a4f686..e25ae02bf88 100644
--- a/dbms/src/Interpreters/ExpressionActions.cpp
+++ b/dbms/src/Interpreters/ExpressionActions.cpp
@@ -81,6 +81,7 @@ ExpressionAction ExpressionAction::applyFunction(const FunctionBuilderPtr & func
     return a;
 }
 
+// 这个适合 repeat source 来用，adding groupingID column
 ExpressionAction ExpressionAction::addColumn(const ColumnWithTypeAndName & added_column_)
 {
     ExpressionAction a;
@@ -135,8 +136,16 @@ ExpressionAction ExpressionAction::ordinaryJoin(std::shared_ptr<const Join> join
     return a;
 }
 
+ExpressionAction ExpressionAction::repeatSource(std::shared_ptr<const Repeat> repeat_source_)
+{
+    ExpressionAction a;
+    a.type = REPEAT;
+    a.repeat = repeat_source_;
+    return a;
+}
 
-void ExpressionAction::prepare(Block & sample_block)
+
+void ExpressionAction::prepare(Block & sample_block)    // 这个是 prepare 阶段
 {
     /** Constant expressions should be evaluated, and put the result in sample_block.
       */
@@ -169,6 +178,7 @@ void ExpressionAction::prepare(Block & sample_block)
             new_column.type = result_type;
             sample_block.insert(std::move(new_column));
 
+            // 执行参数，和执行结果都是 block 中的列
             function->execute(sample_block, arguments, result_position);
 
             /// If the result is not a constant, just in case, we will consider the result as unknown.
@@ -184,11 +194,12 @@ void ExpressionAction::prepare(Block & sample_block)
                 /// Change the size to 1.
 
                 if (col.column->empty())
-                    col.column = col.column->cloneResized(1);
+                    col.column = col.column->cloneResized(1);  // 常量列只保留一个值，np
             }
         }
         else
         {
+            // 如果不能即时 eval，那么直接插入一个 unknown 的列，附带上类型和名字
             sample_block.insert({nullptr, result_type, result_name});
         }
 
@@ -222,24 +233,41 @@ void ExpressionAction::prepare(Block & sample_block)
             }
         }
 
-        for (const auto & col : columns_added_by_join)
+        for (const auto & col : columns_added_by_join)   // 之前的 sample block 是左侧的列，现在才是右侧的
             sample_block.insert(ColumnWithTypeAndName(nullptr, col.type, col.name));
 
         break;
     }
 
+    case REPEAT:
+    {
+        // sample_block is just for schema check followed by later block, modify it if your schema has changed during this action.
+        auto name_set = std::set<String>();
+        repeat->getAllGroupSetColumnNames(name_set);
+        // make grouping set column to be nullable.
+        for (const auto & col_name: name_set) {
+            auto & column_with_name = sample_block.getByName(col_name);
+            column_with_name.type = makeNullable(column_with_name.type);
+            if (column_with_name.column != nullptr)
+                column_with_name.column = makeNullable(column_with_name.column);
+        }
+        // fill one more column: groupingID.
+        sample_block.insert({nullptr, repeat->grouping_identifier_column_type, repeat->grouping_identifier_column_name});
+        break;
+    }
+
     case PROJECT:
     {
         Block new_block;
 
-        for (auto & projection : projections)
+        for (auto & projection : projections) // change alias
         {
             const std::string & name = projection.first;
             const std::string & alias = projection.second;
             ColumnWithTypeAndName column = sample_block.getByName(name);
             if (!alias.empty())
                 column.name = alias;
-            new_block.insert(std::move(column));
+            new_block.insert(std::move(column));  // 相当于直接 move 掉 （因为前面的列可能不要，所以用了个 new block）
         }
 
         sample_block.swap(new_block);
@@ -274,7 +302,7 @@ void ExpressionAction::prepare(Block & sample_block)
 }
 
 
-void ExpressionAction::execute(Block & block) const
+void ExpressionAction::execute(Block & block) const   // 执行阶段
 {
     if (type == REMOVE_COLUMN || type == COPY_COLUMN)
         if (!block.has(source_name))
@@ -293,10 +321,10 @@ void ExpressionAction::execute(Block & block) const
         {
             if (!block.has(argument_names[i]))
                 throw Exception("Not found column: '" + argument_names[i] + "'", ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
-            arguments[i] = block.getPositionByName(argument_names[i]);
+            arguments[i] = block.getPositionByName(argument_names[i]);  // 找到列的 offset
         }
 
-        size_t num_columns_without_result = block.columns();
+        size_t num_columns_without_result = block.columns();  // 拿到当 result 列的 offset
         block.insert({nullptr, result_type, result_name});
 
         function->execute(block, arguments, num_columns_without_result);
@@ -313,6 +341,12 @@ void ExpressionAction::execute(Block & block) const
         break;
     }
 
+    case REPEAT:
+    {
+        repeat->replicateAndFillNull(block); // repeat 的执行阶段直接 fill block 了
+        break;
+    }
+
     case PROJECT:
     {
         Block new_block;
@@ -458,6 +492,7 @@ void ExpressionActions::addImpl(ExpressionAction action, Names & new_names)
             arguments[i] = sample_block.getByName(action.argument_names[i]);
         }
 
+        // 一般 default 函数使用 default creator 构造器就行了
         action.function = action.function_builder->build(arguments, action.collator);
         action.result_type = action.function->getReturnType();
     }
@@ -716,7 +751,7 @@ BlockInputStreamPtr ExpressionActions::createStreamWithNonJoinedDataIfFullOrRigh
     return {};
 }
 
-void ExpressionActionsChain::addStep()
+void ExpressionActionsChain::addStep() // 只会为后者加入 new step 的 input col 准备
 {
     if (steps.empty())
         throw Exception("Cannot add action to empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR);
diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h
index 68fb35f8048..0a9b9bd99fa 100644
--- a/dbms/src/Interpreters/ExpressionActions.h
+++ b/dbms/src/Interpreters/ExpressionActions.h
@@ -34,6 +34,7 @@ using NameWithAlias = std::pair<std::string, std::string>;
 using NamesWithAliases = std::vector<NameWithAlias>;
 
 class Join;
+class Repeat;
 
 class IFunctionBase;
 using FunctionBasePtr = std::shared_ptr<IFunctionBase>;
@@ -65,6 +66,8 @@ struct ExpressionAction
 
         /// Reorder and rename the columns, delete the extra ones. The same column names are allowed in the result.
         PROJECT,
+
+        REPEAT,
     };
 
     Type type;
@@ -90,6 +93,10 @@ struct ExpressionAction
     /// For PROJECT.
     NamesWithAliases projections;
 
+    /// For REPEAT_SOURCE.
+    std::shared_ptr<const Repeat> repeat;
+    NamesAndTypesList columns_added_by_repeat;
+
     /// If result_name_ == "", as name "function_name(arguments separated by commas) is used".
     static ExpressionAction applyFunction(
         const FunctionBuilderPtr & function_,
@@ -103,6 +110,7 @@ struct ExpressionAction
     static ExpressionAction project(const NamesWithAliases & projected_columns_);
     static ExpressionAction project(const Names & projected_columns_);
     static ExpressionAction ordinaryJoin(std::shared_ptr<const Join> join_, const NamesAndTypesList & columns_added_by_join_);
+    static ExpressionAction repeatSource(std::shared_ptr<const Repeat> repeat_source_);
 
     /// Which columns necessary to perform this action.
     Names getNeededColumns() const;
diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp
index aca814f8501..3cc7ae92874 100644
--- a/dbms/src/Interpreters/Join.cpp
+++ b/dbms/src/Interpreters/Join.cpp
@@ -539,7 +539,7 @@ void insertRowToList(Join::RowRefList * list, Join::RowRefList * elem, Block * s
 {
     elem->next = list->next; // NOLINT(clang-analyzer-core.NullDereference)
     list->next = elem;
-    elem->block = stored_block;
+    elem->block = stored_block;  // 因为 map all 所以是 list 结构
     elem->row_num = index;
 }
 
@@ -579,7 +579,7 @@ struct Inserter<ASTTableJoin::Strictness::All, Map, KeyGetter>
                  * That is, the former second element, if it was, will be the third, and so on.
                  */
             auto elem = reinterpret_cast<MappedType *>(pool.alloc(sizeof(MappedType)));
-            insertRowToList(&emplace_result.getMapped(), elem, stored_block, i);
+            insertRowToList(&emplace_result.getMapped(), elem, stored_block, i); // hash 表中维护的就是到存储的 store block 和其 row number，这个 list 结果作为 hash key 的 value
         }
     }
 };
@@ -834,7 +834,7 @@ void recordFilteredRows(const Block & block, const String & filter_column, Colum
     PaddedPODArray<UInt8> & mutable_null_map = static_cast<ColumnUInt8 &>(*mutable_null_map_holder).getData();
 
     const auto & nested_column = column->isColumnNullable() ? static_cast<const ColumnNullable &>(*column).getNestedColumnPtr() : column;
-    for (size_t i = 0, size = nested_column->size(); i < size; ++i)
+    for (size_t i = 0, size = nested_column->size(); i < size; ++i) // 伴随 column 如果取 int 取不出来，说明也是个 null？
         mutable_null_map[i] |= (!nested_column->getInt(i));
 
     null_map_holder = std::move(mutable_null_map_holder);
@@ -861,11 +861,13 @@ void Join::insertFromBlock(const Block & block, size_t stream_index)
 
     if (unlikely(!initialized))
         throw Exception("Logical error: Join was not initialized", ErrorCodes::LOGICAL_ERROR);
+    //  物化一个 block 出来
     Block * stored_block = nullptr;
     {
         std::lock_guard lk(blocks_lock);
         total_input_build_rows += block.rows();
         blocks.push_back(block);
+        // block cp
         stored_block = &blocks.back();
         original_blocks.push_back(block);
     }
@@ -1371,9 +1373,9 @@ void Join::handleOtherConditions(Block & block, std::unique_ptr<IColumn::Filter>
 {
     other_condition_ptr->execute(block);
 
-    auto filter_column = ColumnUInt8::create();
+    auto filter_column = ColumnUInt8::create();    // 创建了一个 u8 表示 true or false 的结果吧
     auto & filter = filter_column->getData();
-    filter.assign(block.rows(), static_cast<UInt8>(1));
+    filter.assign(block.rows(), static_cast<UInt8>(1));  // 直接都给 1？
     if (!other_filter_column.empty())
     {
         mergeNullAndFilterResult(block, filter, other_filter_column, false);
@@ -1562,6 +1564,7 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr
     /// Memoize key columns to work with.
     for (size_t i = 0; i < keys_size; ++i)
     {
+        // 因为 ColumnPtr 是继承 intrusive_ptr，所以 get 函数可以得到这个类型的原始指针（raw column）
         key_columns[i] = block.getByName(key_names_left[i]).column.get();
 
         if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst())
@@ -1574,9 +1577,12 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr
     /// Keys with NULL value in any column won't join to anything.
     ColumnPtr null_map_holder;
     ConstNullMapPtr null_map{};
+    // 抽取一下 join key 上的 null 或属性
     extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
+
     /// reuse null_map to record the filtered rows, the rows contains NULL or does not
     /// match the join filter won't join to anything
+    // 相当于把 left filter column 上的 null 属性输出也叠加到了 null map 里面
     recordFilteredRows(block, left_filter_column, null_map_holder, null_map);
 
     size_t existing_columns = block.columns();
@@ -1611,12 +1617,12 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr
     /// Add new columns to the block.
     size_t num_columns_to_add = sample_block_with_columns_to_add.columns();
     MutableColumns added_columns;
-    added_columns.reserve(num_columns_to_add);
+    added_columns.reserve(num_columns_to_add);   // 创建了几个需要新加的 columns
 
     std::vector<size_t> right_table_column_indexes;
     for (size_t i = 0; i < num_columns_to_add; ++i)
     {
-        right_table_column_indexes.push_back(i + existing_columns);
+        right_table_column_indexes.push_back(i + existing_columns);  // 记录插入的 offset 下标
     }
 
     std::vector<size_t> right_indexes;
@@ -1639,17 +1645,17 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr
 
     if (((kind == ASTTableJoin::Kind::Inner || kind == ASTTableJoin::Kind::Right) && strictness == ASTTableJoin::Strictness::Any)
         || kind == ASTTableJoin::Kind::Anti)
-        filter = std::make_unique<IColumn::Filter>(rows);
+        filter = std::make_unique<IColumn::Filter>(rows);  // 用来从 right block 中 remove elements
 
     /// Used with ALL ... JOIN
     IColumn::Offset current_offset = 0;
     std::unique_ptr<IColumn::Offsets> offsets_to_replicate;
 
     if (strictness == ASTTableJoin::Strictness::All)
-        offsets_to_replicate = std::make_unique<IColumn::Offsets>(rows);
+        offsets_to_replicate = std::make_unique<IColumn::Offsets>(rows);  // join 的时候暂时标识一下，用来在 left block 中的 replicate rows
 
     switch (type)
-    {
+    { // join 完了之后，右侧 join 行都 append 到了 add columns 里面，并且填了一行的 replicate 的 offset = joined rows number
 #define M(TYPE)                                                                                                                                \
     case Join::Type::TYPE:                                                                                                                     \
         joinBlockImplType<KIND, STRICTNESS, typename KeyGetterForType<Join::Type::TYPE, std::remove_reference_t<decltype(*maps.TYPE)>>::Type>( \
@@ -1676,7 +1682,7 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr
     }
     FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_join_prob_failpoint);
     for (size_t i = 0; i < num_columns_to_add; ++i)
-    {
+    {   // 将 added cols 插入到左侧的 block 中
         const ColumnWithTypeAndName & sample_col = sample_block_with_columns_to_add.getByPosition(i);
         block.insert(ColumnWithTypeAndName(std::move(added_columns[i]), sample_col.type, sample_col.name));
     }
@@ -1698,6 +1704,14 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr
         /// If ALL ... JOIN - we replicate all the columns except the new ones.
         if (offsets_to_replicate)
         {
+            /*
+             *   a, b, c, d   offset
+            *   1, y  1  x   2         这个时候右侧的位置已经填好了，但是左侧 block 的位置还没填好，所以 offsets 是给左侧行看的，尽量复制，跟右侧的行对齐
+            *   2, z  1  x
+            *
+            *   1, y  1  x   2
+            *   1, y  1  x
+            */
             for (size_t i = 0; i < existing_columns; ++i)
             {
                 block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicateRange(probe_process_info.start_row, probe_process_info.end_row, *offsets_to_replicate);
@@ -1719,7 +1733,7 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr
     if (!other_filter_column.empty() || !other_eq_filter_from_in_column.empty())
     {
         if (!offsets_to_replicate)
-            throw Exception("Should not reach here, the strictness of join with other condition must be ALL");
+            throw Exception("Should not reach here, the strictness of join with other condition must be ALL");   // 处理 other condition
         handleOtherConditions(block, filter, offsets_to_replicate, right_table_column_indexes);
     }
 }
diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h
index abae6268430..d8bfe2afa6e 100644
--- a/dbms/src/Interpreters/Join.h
+++ b/dbms/src/Interpreters/Join.h
@@ -39,7 +39,7 @@ struct ProbeProcessInfo;
   * JOIN-s could be of nine types: ANY/ALL × LEFT/INNER/RIGHT/FULL, and also CROSS.
   *
   * If ANY is specified - then select only one row from the "right" table, (first encountered row), even if there was more matching rows.
-  * If ALL is specified - usual JOIN, when rows are multiplied by number of matching rows from the "right" table.
+  * If ALL is specified - usual JOIN, when rows are multiplied by number of matching rows from the "right" table. ALL 会复制行
   * ANY is more efficient.
   *
   * If INNER is specified - leave only rows that have matching rows from "right" table.
@@ -187,7 +187,7 @@ class Join
     /// Reference to the row in block.
     struct RowRef
     {
-        const Block * block;
+        const Block * block;   // block + row num
         size_t row_num;
 
         RowRef() = default;
diff --git a/dbms/src/Interpreters/NullableUtils.cpp b/dbms/src/Interpreters/NullableUtils.cpp
index cf8975f8b80..44cb13c0d92 100644
--- a/dbms/src/Interpreters/NullableUtils.cpp
+++ b/dbms/src/Interpreters/NullableUtils.cpp
@@ -26,6 +26,7 @@ void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & nul
             return;
 
         const ColumnNullable & column_nullable = static_cast<const ColumnNullable &>(*column);
+        // 从 nullable column 中拿到伴随 byte map 和基础 column
         null_map = &column_nullable.getNullMapData();
         null_map_holder = column_nullable.getNullMapColumnPtr();
         column = &column_nullable.getNestedColumn();
@@ -49,6 +50,7 @@ void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & nul
 
                     PaddedPODArray<UInt8> & mutable_null_map = static_cast<ColumnUInt8 &>(*mutable_null_map_holder).getData();
                     const PaddedPODArray<UInt8> & other_null_map = column_nullable.getNullMapData();
+                    // join key column 来说，一空即空，这里 ｜ 一下
                     for (size_t i = 0, size = mutable_null_map.size(); i < size; ++i)
                         mutable_null_map[i] |= other_null_map[i];
 
diff --git a/dbms/src/Interpreters/Repeat.cpp b/dbms/src/Interpreters/Repeat.cpp
new file mode 100644
index 00000000000..ff626f36042
--- /dev/null
+++ b/dbms/src/Interpreters/Repeat.cpp
@@ -0,0 +1,231 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <Interpreters/Repeat.h>
+#include <Columns/ColumnNullable.h>
+#include <Functions/FunctionHelpers.h>
+#include <DataTypes/DataTypeNullable.h>
+#include "DataTypes/DataTypesNumber.h"
+#include <tipb/executor.pb.h>
+
+namespace DB
+{
+
+namespace   /// anonymous namespace for storing private function utils.
+{
+void convertColumnToNullable(ColumnWithTypeAndName & column)
+{
+    column.type = makeNullable(column.type);
+    if (column.column)
+        column.column = makeNullable(column.column);
+}
+}
+
+Repeat::Repeat(const DB::GroupingSets & gss)
+    : group_sets_names(gss){}
+
+
+
+/// for cases like: select count(distinct a), count(distinct b) from t;
+/// it will generate 2 group set with <a> and <b>, over which we should
+/// repeat one more replica of the source rows from the input block and
+/// identify it with the grouping id in the appended new column.
+///
+/// eg: source block         ==>        replicated block
+///      <a, b>              ==>         <a, b, groupingID>  a new column is appended
+///      1  1       target a -+----->     1  null  groupingID for a =1
+///      2  2                 +----->     2  null  groupingID for b =2
+///                 target b -+----->     null  1  groupingID for a =1
+///                           +----->     null  a  groupingID for b =2
+///
+/// when target a specified group set, other group set columns should be filled
+/// with null value to make group by(a,b) operator to meet the equivalence effect
+/// of group by(a) and group by(b) since the other group set columns has been filled
+/// with null value.
+///
+/// \param input the source block
+/// \return
+
+void Repeat::replicateAndFillNull(Block & block) const
+{
+    size_t origin_rows = block.rows();
+    // make a replicate slice, using it to replicate origin rows.
+    std::unique_ptr<IColumn::Offsets> offsets_to_replicate;
+    offsets_to_replicate = std::make_unique<IColumn::Offsets>(origin_rows);
+
+    // get the replicate offset fixed as group set num.
+    IColumn::Offset current_offset = 0;
+    const IColumn::Offset replicate_times_for_one_row = getGroupSetNum();
+
+    // create a column for grouping id.
+    auto grouping_id_column = ColumnUInt64::create();
+    auto & grouping_id_column_data = grouping_id_column->getData();
+    // reserve N times of current block rows size.
+    grouping_id_column_data.reserve(block.rows() * replicate_times_for_one_row);
+
+    // prepare added mutable grouping id column.
+    MutableColumns added_grouping_id_column;
+    added_grouping_id_column.reserve(1);
+    added_grouping_id_column.push_back(grouping_id_column->getPtr());
+
+    for (size_t i = 0; i < origin_rows; i++)
+    {
+        current_offset += replicate_times_for_one_row;
+        (*offsets_to_replicate)[i] = current_offset;
+
+        // in the same loop, to fill the grouping id.
+        for (UInt64 j = 0; j < replicate_times_for_one_row; j++)
+        {
+            // start from 1.
+            Field grouping_id = j + 1;
+            added_grouping_id_column[0]->insert(grouping_id);
+        }
+    }
+    // todo: for some column overlapping in different grouping set, we should copy the overlapped column as a new column
+    //  and the upper layer OP's computation should be shifted and based on the new one's id. Need a plan side control.
+
+    // replicate the original block rows.
+    size_t existing_columns = block.columns();
+
+    if (offsets_to_replicate)
+    {
+        for (size_t i = 0; i < existing_columns; ++i)
+        {
+            // expand the origin const column, since it may be filled with null value when repeating.
+            if (block.safeGetByPosition(i).column->isColumnConst())
+                block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->convertToFullColumnIfConst();
+
+            // for every existing column, if the column is a grouping set column, make it nullable.
+            if (isInGroupSetColumn(block.safeGetByPosition(i).name) && !block.safeGetByPosition(i).column->isColumnNullable())
+            {
+                convertColumnToNullable(block.getByPosition(i));
+            }
+            // replicate it.
+            block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate);
+        }
+    }
+
+
+    // after replication, it just copied the same row for N times, we still need to fill corresponding Field with null value.
+    for (size_t grouping_offset = 0; grouping_offset < replicate_times_for_one_row; grouping_offset++)
+    {
+        auto grouping_columns = getGroupSetColumnNamesByOffset(grouping_offset);
+        // for every grouping col, get the mutated one of them.
+        for (const auto & grouping_col : grouping_columns)
+        {
+            assert(block.getByName(grouping_col).column->isColumnNullable());
+
+            const auto * nullable_column = typeid_cast<const ColumnNullable *>(block.getByName(grouping_col).column.get());
+            auto origin_size = nullable_column->size();
+            // clone the nested column.
+            MutableColumnPtr new_nested_col = nullable_column->getNestedColumn().cloneResized(origin_size);
+            // just get mutable new null map.
+            auto new_null_map = ColumnUInt8::create();
+            new_null_map->getData().resize(origin_size);
+            memcpy(new_null_map->getData().data(), nullable_column->getNullMapData().data(), origin_size * sizeof(nullable_column->getNullMapData()[0]));
+
+            auto cloned_one = ColumnNullable::create(std::move(new_nested_col), std::move(new_null_map));
+
+            /// travel total rows, and set null values for current grouping set column.
+            /// basically looks like:
+            /// eg: source block         ==>        replicated block
+            ///      <a, b>              ==>         <a, b, groupingID>  a new column is appended
+            ///      1  1       target a -+----->     1  null  groupingID for a =1
+            ///      2  2                 +----->     2  null  groupingID for b =2
+            ///                 target b -+----->     null  1  groupingID for a =1
+            ///                           +----->     null  a  groupingID for b =2
+            ///
+            /// after the replicate is now, the data form likes like below
+            ///      <a, b, groupingID>              ==>       for one : in <a, b>
+            ///    -----------------+                          locate the target row in every single small group with the same "offset_of_grouping_col" in set <a, b>
+            ///      1  1       1   +  replicate_group1        for a, it's 0, we should pick and set:
+            ///      1  1       2   +                              replicate_group_rows[0].a = null
+            ///    -----------------+
+            ///      2  2       1   +  replicate_group2        for b, it's 1, we should pick and set:
+            ///      2  2       2   +                              replicate_group_rows[1].b = null
+            ///    -----------------+
+            for (size_t i = 0; i < origin_rows; i++)
+            {
+                // for every original one row mapped N rows, fill the corresponding group set column as null value according to the offset.
+                // only when the offset in replicate_group equals to current group_offset, set the data to null.
+                // eg: for case above, for grouping_offset of <a> = 0, we only set the every offset = 0 in each
+                // small replicate_group_x to null.
+                //
+                 for (UInt64 j = 0; j < replicate_times_for_one_row; j++){
+                     if (j == grouping_offset) {
+                         // only keep this column value for targeted replica.
+                         continue;
+                     }
+                     // set this column as null for all the other targeted replica.
+                     // todo: since nullable column always be prior to computation of null value first, should we clean the old data at the same pos in nested column
+                     auto computed_offset = i * replicate_times_for_one_row + j;
+                     cloned_one->getNullMapData().data()[computed_offset] = 1;
+                 }
+            }
+            block.getByName(grouping_col).column = std::move(cloned_one);
+        }
+        // finish of adjustment for one grouping set columns. (by now one column for one grouping set).
+    }
+    block.insert(ColumnWithTypeAndName(std::move(added_grouping_id_column[0]), std::make_shared<DataTypeUInt64>(), std::move("groupingID")));
+    // return input from block.
+}
+
+bool Repeat::isInGroupSetColumn(String name) const{
+    for(const auto& it1 : group_sets_names)
+    {
+        // for every grouping set.
+        for(const auto& it2 : it1)
+        {
+            // for every grouping exprs
+            for(const auto& it3 : it2)
+            {
+                if (it3 == name){
+                    return true;
+                }
+            }
+        }
+    }
+    return false;
+}
+
+const GroupingColumnNames& Repeat::getGroupSetColumnNamesByOffset(size_t offset) const
+{
+    /// currently, there only can be one groupingExprs in one groupingSet before the planner supporting the grouping set merge.
+    return group_sets_names[offset][0];
+}
+
+void Repeat::getAllGroupSetColumnNames(std::set<String>& name_set) const
+{
+    for(const auto& it1 : group_sets_names)
+    {
+        // for every grouping set.
+        for(const auto& it2 : it1)
+        {
+            // for every grouping exprs
+            for(const auto& it3 : it2)
+            {
+                name_set.insert(it3);
+            }
+        }
+    }
+}
+
+std::shared_ptr<Repeat> Repeat::sharedRepeat(const GroupingSets & groupingSets)
+{
+   return std::make_shared<Repeat>(groupingSets);
+}
+
+const std::string Repeat::grouping_identifier_column_name = "groupingID";
+const DataTypePtr Repeat::grouping_identifier_column_type = std::make_shared<DataTypeUInt64>();
+}
diff --git a/dbms/src/Interpreters/Repeat.h b/dbms/src/Interpreters/Repeat.h
new file mode 100644
index 00000000000..7c2c05ab85d
--- /dev/null
+++ b/dbms/src/Interpreters/Repeat.h
@@ -0,0 +1,139 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#pragma once
+
+#include <Columns/ColumnFixedString.h>
+#include <Columns/ColumnNullable.h>
+#include <Columns/ColumnString.h>
+#include <Common/Arena.h>
+#include <Common/Logger.h>
+#include <DataStreams/IBlockInputStream.h>
+#include <DataStreams/SizeLimits.h>
+#include <Interpreters/AggregationCommon.h>
+#include <Interpreters/ExpressionActions.h>
+#include <Interpreters/SettingsCommon.h>
+#include <Parsers/ASTTablesInSelectQuery.h>
+#include <common/ThreadPool.h>
+
+#include <shared_mutex>
+
+namespace DB
+{
+/// groupingSets are formed as { groupingSet, groupingSet...}
+/// groupingSet are formed as { groupingExprs, groupingExprs...}
+/// groupingExprs are formed as slice of expression/column names
+/// simply for now case like: select count(distinct a), count(distinct b) from t;
+///     we got 2 groupings set like: {[<a>], [<b>]}
+///
+/// shortly soon, we can support the grouping sets merging, which could take case
+/// like: select count(distinct a,b), count(distinct a), count(distinct c) from t as
+///     we still got 2 grouping sets like: {[<a>, <a,b>], [<c>]}
+///
+/// the second case in which the group layout <a,b> has been merged with the prefix
+/// common group layout <a> into unified one set to reduce the underlying data replication/repeat cost.
+///
+using GroupingColumnName = ::String;
+using GroupingColumnNames = std::vector<GroupingColumnName>;
+using GroupingSet = std::vector<GroupingColumnNames>;
+using GroupingSets = std::vector<GroupingSet>;
+
+
+
+/** Data structure for implementation of Repeat.
+  *
+  * Repeat is a kind of operator used for replicate low-layer datasource rows to feed different aggregate
+  * grouping-layout requirement. (Basically known as grouping sets)
+  *
+  * For current scenario, it is applied to accelerate the computation of multi distinct aggregates by utilizing
+  * multi nodes computing resource in a way of scheming 3-phase aggregation under mpp mode.
+  *
+  * GroupingSets descriptions are all needed by Repeat operator itself, the length of GroupingSets are the needed
+  * repeat number (in other words, one grouping set require one replica of source rows). Since different grouping
+  * set column shouldn't let its targeted rows affected by other grouping set columns (which will also be appear in
+  * the group by items) when do grouping work, we should isolate different grouping set columns by filling them with
+  * null values when repeating rows.
+  *
+  * Here is an example:
+  * Say we got a query like this:                   select count(distinct a), count(distinct b) from t.
+  *
+  * Downward requirements formed by this query are consist of two different grouping set <a>, <b>, and both of this
+  * two columns will be in the group by items. Make record here as ---  GROUP BY(a,b)
+  *
+  * Different group layouts are doomed to be unable to be feed with same replica of data in shuffling mode Except
+  * gathering them all to the single node. While the latter one is usually accompanied by a single point of bottleneck.
+  *
+  * That's why data repeat happens here. Say we got two tuple as below:
+  *
+  * <a>     <b>         ==> after repeat we got            <a>    <b>
+  *  1       1                                origin row    1      1
+  *  1       2                                repeat row    1      1
+  *                                           origin row    1      2
+  *                                           repeat row    1      2
+  *
+  * See what we got now above, although we have already repeated/doubled the origin rows, while when grouping them together
+  * with GROUP BY(a,b) clause (resulting 2 group (1,1),(1,2) here), we found that we still can not get the right answer for
+  * count distinct agg for a.
+  *
+  * From the theory, every origin/repeated row should be targeted for one group out requirement, which means row<1> and row<3>
+  * about should be used to feed count(distinct a), while since the value of b in row<3> is different from that from row<1>,
+  * that leads them being divided into different group.
+  *
+  * Come back to the origin goal to feed count(distinct a), in which we don't even care about what is was in column b from row<1>
+  * and row<3>, because current agg args is aimed at column a. Therefore, we filled every non-targeted grouping set column in
+  * repeated row as null value. After that we got as below:
+  *
+  * <a>     <b>         ==> after repeat we got            <a>    <b>
+  *  1       1                                origin row    1     null         ---> target for grouping set a
+  *  1       2                                repeat row   null    1           ---> target for grouping set b
+  *                                           origin row    1     null         ---> target for grouping set a
+  *                                           repeat row   null    2           ---> target for grouping set b
+  *
+  * Then, when grouping them together with GROUP BY(a,b) clause, we got row<1> and row<3> together, and row<2>, row<4> as a
+  * self-group individually. Among them, every distinct agg has their self-targeted data grouped correctly. GROUP BY(a,b) clause
+  * is finally seen/taken as a equivalent group to GROUP BY(a, null) for a-targeted rows, GROUP BY(null, b) for b-targeted rows.
+  *
+  * Over the correct grouped data, the result computation for distinct agg is quite reasonable. By the way, if origin row has some
+  * column that isn't belong to any grouping set, just let it be copied as it was in repeated row.
+  *
+  */
+class Repeat
+{
+public:
+    explicit Repeat(const GroupingSets & gss);
+
+    // replicateAndFillNull is the basic functionality that Repeat Operator provided. Briefly, it replicates
+    // origin rows with regard to local grouping sets description, and appending a new column named as groupingID
+    // to illustrate what group this row is targeted for.
+    void replicateAndFillNull(Block & input) const;
+
+    size_t getGroupSetNum() const {return group_sets_names.size();}
+
+    bool isInGroupSetColumn(String name) const;
+
+    const GroupingColumnNames& getGroupSetColumnNamesByOffset(size_t offset) const;
+
+    void getAllGroupSetColumnNames(std::set<String>& name_set) const;
+
+    static std::shared_ptr<Repeat> sharedRepeat(const GroupingSets & groupingSets);
+
+    static const String grouping_identifier_column_name;
+
+    static const DataTypePtr grouping_identifier_column_type;
+
+private:
+    GroupingSets group_sets_names;
+};
+} // namespace DB
\ No newline at end of file
diff --git a/dbms/src/Interpreters/sortBlock.cpp b/dbms/src/Interpreters/sortBlock.cpp
index 9995329b833..438a14b42bd 100644
--- a/dbms/src/Interpreters/sortBlock.cpp
+++ b/dbms/src/Interpreters/sortBlock.cpp
@@ -410,6 +410,7 @@ void sortBlock(Block & block, const SortDescription & description, size_t limit)
             : block.safeGetByPosition(description[0].column_number).column.get();
 
         IColumn::Permutation perm;
+        // permutation 是列的 offset 调序
         if (NeedCollation(column, description[0]))
             column->getPermutation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm);
         else
@@ -417,7 +418,7 @@ void sortBlock(Block & block, const SortDescription & description, size_t limit)
 
         size_t columns = block.columns();
         for (size_t i = 0; i < columns; ++i)
-            block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->permute(perm, limit);
+            block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->permute(perm, limit); // 根据 offset 调序结果重新组织 column 数据
     }
     else
     {
diff --git a/dbms/src/Interpreters/tests/gtest_block_repeat.cpp b/dbms/src/Interpreters/tests/gtest_block_repeat.cpp
new file mode 100644
index 00000000000..b3c1cce713d
--- /dev/null
+++ b/dbms/src/Interpreters/tests/gtest_block_repeat.cpp
@@ -0,0 +1,238 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnsNumber.h>
+#include <Interpreters/sortBlock.h>
+#include <TestUtils/FunctionTestUtils.h>
+#include <Interpreters/Repeat.h>
+
+namespace DB
+{
+namespace tests
+{
+
+class BlockRepeat : public ::testing::Test
+{
+public:
+    using ColStringType = typename TypeTraits<String>::FieldType;
+    using ColInt64Type = typename TypeTraits<Int64>::FieldType;
+    using ColUInt64Type = typename TypeTraits<UInt64>::FieldType;
+    using ColumnWithString = std::vector<ColStringType>;
+    using ColumnWithInt64 = std::vector<ColInt64Type>;
+    using ColumnWithUInt64 = std::vector<ColUInt64Type>;
+
+    const String single_col_name{"single_col"};
+    const ColumnWithString col0_ori{"col0-1  ", "col0-7", "col0-0    ", "col0-3", "col0-4", "col0-6", "col0-2 ", "col0-5"};
+    const std::vector<String> col_name{"age", "gender", "country", "region", "zip"};
+};
+
+TEST_F(BlockRepeat, Limit)
+try
+{
+    {
+        // test basic block repeat operation. (two grouping set)
+        const ColumnsWithTypeAndName
+            ori_col
+            = {
+                toVec<Int64>(col_name[0], ColumnWithInt64{1, 0, -1}),
+                toVec<String>(col_name[1], ColumnWithString{"1   ", "1  ", "1 "}),
+                toVec<String>(col_name[2], ColumnWithString{"1", "2", "3"}),
+                toVec<UInt64>(col_name[3], ColumnWithUInt64{1, 1, 0}),
+            };
+        // group set<gender>, group set<country>
+        GroupingSet g_gender = GroupingSet{GroupingColumnNames{col_name[1]}};
+        GroupingSet g_country = GroupingSet{GroupingColumnNames{col_name[2]}};
+        GroupingSets group_sets = GroupingSets{g_gender, g_country};
+        Repeat repeat = Repeat(group_sets);
+        Block block(ori_col);
+        auto origin_rows = block.rows();
+
+        repeat.replicateAndFillNull(block);
+        // assert the col size is added with 1.
+        ASSERT_EQ(block.getColumns().size(), size_t(5));
+        // assert the new col groupingID is appended.
+        ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID");
+        // assert the block size is equal to origin rows * grouping set num.
+        auto repeat_rows = block.rows();
+        auto grouping_set_num = repeat.getGroupSetNum();
+        ASSERT_EQ(origin_rows * grouping_set_num, repeat_rows); // 6
+        // assert grouping set column are nullable.
+        ASSERT_EQ(block.getColumns()[0].get()->isColumnNullable(), false);
+        ASSERT_EQ(block.getColumns()[1].get()->isColumnNullable(), true);
+        ASSERT_EQ(block.getColumns()[2].get()->isColumnNullable(), true);
+        ASSERT_EQ(block.getColumns()[3].get()->isColumnNullable(), false);
+        ASSERT_EQ(block.getColumns()[4].get()->isColumnNullable(), false);
+
+        // assert the rows layout
+        //          "age", "gender", "country", "region", "groupingID"
+        //  ori_col   1     "1   "    null       1         1
+        //  rpt_col   1     null      "1"        1         2
+        //
+        //  ori_col   0     "1  "     null       1         1
+        //  rpt_col   0     null      "2"        1         2
+        //
+        //  ori_col  -1     "1 "      null       0         1
+        //  rpt_col  -1     null      "3"        0         2
+
+        const auto res0 = ColumnWithInt64{1, 1, 0, 0, -1, -1};
+        const auto * col_0 = typeid_cast<const ColumnInt64 *>(block.getColumns()[0].get());
+        for (int i = 0; i < int(repeat_rows); ++i)
+        {
+            ASSERT_EQ(col_0->getElement(i), res0[i]);
+        }
+
+        const auto res1 = ColumnWithString{"1   ", "null", "1  ", "null", "1 ", "null"};
+        const auto * col_1 = typeid_cast<const ColumnNullable *>(block.getColumns()[1].get());
+        const auto * col_1_nest = &static_cast<const ColumnString &>(col_1->getNestedColumn());
+        for (int i = 0; i < int(repeat_rows); ++i)
+        {
+            if (res1[i] == "null") {
+                ASSERT_EQ(col_1->isNullAt(i), true);
+            } else {
+                ASSERT_EQ(col_1_nest->getDataAt(i), res1[i]);
+            }
+        }
+
+        const auto res2 = ColumnWithString{"null", "1", "null", "2", "null", "3"};
+        const auto * col_2 = typeid_cast<const ColumnNullable *>(block.getColumns()[2].get());
+        const auto * col_2_nest = &static_cast<const ColumnString &>(col_2->getNestedColumn());
+        for (int i = 0; i < int(repeat_rows); ++i)
+        {
+            if (res2[i] == "null") {
+                ASSERT_EQ(col_2->isNullAt(i), true);
+            } else {
+                ASSERT_EQ(col_2_nest->getDataAt(i), res2[i]);
+            }
+        }
+
+        const auto res3 = ColumnWithUInt64{1, 1,1,1, 0,0};
+        const auto * col_3 = typeid_cast<const ColumnUInt64 *>(block.getColumns()[3].get());
+        for (int i = 0; i < int(repeat_rows); ++i)
+        {
+            ASSERT_EQ(col_3->getElement(i), res3[i]);
+        }
+
+        const auto res4 = ColumnWithUInt64{1, 2, 1, 2, 1, 2};
+        const auto * col_4 = typeid_cast<const ColumnUInt64 *>(block.getColumns()[4].get());
+        for (int i = 0; i < int(repeat_rows); ++i)
+        {
+            ASSERT_EQ(col_4->getElement(i), res4[i]);
+        }
+    }
+    {
+        // test block repeat operation for multi grouping set (triple here)
+        const ColumnsWithTypeAndName
+            ori_col
+            = {
+                toVec<Int64>(col_name[0], ColumnWithInt64{1, 0, -1}),
+                toVec<String>(col_name[1], ColumnWithString{"aaa", "bbb", "ccc"}),
+                toVec<String>(col_name[2], ColumnWithString{"1", "2", "3"}),
+                toVec<UInt64>(col_name[3], ColumnWithUInt64{1, 1, 0}),
+            };
+        // group set<gender>, group set<country>
+        GroupingSet g_gender = GroupingSet{GroupingColumnNames{col_name[1]}};
+        GroupingSet g_country = GroupingSet{GroupingColumnNames{col_name[2]}};
+        GroupingSet g_region = GroupingSet{GroupingColumnNames{col_name[3]}};
+        GroupingSets group_sets = GroupingSets{g_gender, g_country, g_region};
+        Repeat repeat = Repeat(group_sets);
+        Block block(ori_col);
+        auto origin_rows = block.rows();
+
+        repeat.replicateAndFillNull(block);
+        // assert the col size is added with 1.
+        ASSERT_EQ(block.getColumns().size(), size_t(5));
+        // assert the new col groupingID is appended.
+        ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID");
+        // assert the block size is equal to origin rows * grouping set num.
+        auto repeat_rows = block.rows();
+        auto grouping_set_num = repeat.getGroupSetNum();
+        ASSERT_EQ(origin_rows * grouping_set_num, repeat_rows); // 9
+        // assert grouping set column are nullable.
+        ASSERT_EQ(block.getColumns()[0].get()->isColumnNullable(), false);
+        ASSERT_EQ(block.getColumns()[1].get()->isColumnNullable(), true);
+        ASSERT_EQ(block.getColumns()[2].get()->isColumnNullable(), true);
+        ASSERT_EQ(block.getColumns()[3].get()->isColumnNullable(), true);
+        ASSERT_EQ(block.getColumns()[4].get()->isColumnNullable(), false);
+
+        // assert the rows layout
+        //          "age", "gender", "country", "region", "groupingID"
+        //  ori_col   1     "aaa"     null      null       1
+        //  rpt_col   1     null      "1"       null       2
+        //  rpt_col   1     null      null       1         3
+        //
+        //  ori_col   0     "bbb"     null      null       1
+        //  rpt_col   0     null      "2"       null       2
+        //  rpt_col   0     null      null       1         3
+
+        //  ori_col  -1     "ccc"     null      null       1
+        //  rpt_col  -1     null      "3"       null       2
+        //  rpt_col  -1     null      null       0         3
+
+        const auto res0 = ColumnWithInt64{1, 1, 1, 0, 0, 0, -1, -1, -1};
+        const auto * col_0 = typeid_cast<const ColumnInt64 *>(block.getColumns()[0].get());
+        for (int i = 0; i < int(repeat_rows); ++i)
+        {
+            ASSERT_EQ(col_0->getElement(i), res0[i]);
+        }
+
+        const auto res1 = ColumnWithString{"aaa", "null", "null", "bbb", "null", "null", "ccc", "null", "null"};
+        const auto * col_1 = typeid_cast<const ColumnNullable *>(block.getColumns()[1].get());
+        const auto * col_1_nest = &static_cast<const ColumnString &>(col_1->getNestedColumn());
+        for (int i = 0; i < int(repeat_rows); ++i)
+        {
+            if (res1[i] == "null") {
+                ASSERT_EQ(col_1->isNullAt(i), true);
+            } else {
+                ASSERT_EQ(col_1_nest->getDataAt(i), res1[i]);
+            }
+        }
+
+        const auto res2 = ColumnWithString{"null", "1", "null", "null", "2", "null", "null", "3", "null"};
+        const auto * col_2 = typeid_cast<const ColumnNullable *>(block.getColumns()[2].get());
+        const auto * col_2_nest = &static_cast<const ColumnString &>(col_2->getNestedColumn());
+        for (int i = 0; i < int(repeat_rows); ++i)
+        {
+            if (res2[i] == "null") {
+                ASSERT_EQ(col_2->isNullAt(i), true);
+            } else {
+                ASSERT_EQ(col_2_nest->getDataAt(i), res2[i]);
+            }
+        }
+
+        // use UInt64(-1) to represent null.
+        const auto res3 = ColumnWithUInt64{UInt64(-1), UInt64(-1), 1, UInt64(-1), UInt64(-1), 1, UInt64(-1), UInt64(-1), 0};
+        const auto * col_3 = typeid_cast<const ColumnNullable *>(block.getColumns()[3].get());
+        const auto * col_3_nest = &typeid_cast<const ColumnUInt64 &>(col_3->getNestedColumn());
+        for (int i = 0; i < int(repeat_rows); ++i)
+        {
+            if (res3[i] == UInt64(-1)) {
+                ASSERT_EQ(col_3->isNullAt(i), true);
+            } else {
+                ASSERT_EQ(col_3_nest->getElement(i), res3[i]);
+            }
+        }
+
+        const auto res4 = ColumnWithUInt64{1, 2, 3, 1, 2, 3, 1, 2, 3};
+        const auto * col_4 = typeid_cast<const ColumnUInt64 *>(block.getColumns()[4].get());
+        for (int i = 0; i < int(repeat_rows); ++i)
+        {
+            ASSERT_EQ(col_4->getElement(i), res4[i]);
+        }
+    }
+}
+CATCH
+
+} // namespace tests
+} // namespace DB
diff --git a/dbms/src/TestUtils/FunctionTestUtils.cpp b/dbms/src/TestUtils/FunctionTestUtils.cpp
index a679bbd9885..f7ab140b2fc 100644
--- a/dbms/src/TestUtils/FunctionTestUtils.cpp
+++ b/dbms/src/TestUtils/FunctionTestUtils.cpp
@@ -220,7 +220,10 @@ ::testing::AssertionResult columnsEqual(
         ASSERT_EQUAL(expect_col.column->size(), actual_col.column->size(), fmt::format("Column {} size mismatch", i));
         auto type_eq = dataTypeEqual(expected[i].type, actual[i].type);
         if (!type_eq)
+        {
+            std::cout << "type equal false" << std::endl;
             return type_eq;
+        }
     }
 
     auto const expected_row_set = columnsToRowSet(expected);
@@ -259,6 +262,7 @@ ::testing::AssertionResult columnsEqual(
                 .append("\n");
         }
         buf.append("...\n");
+        std::cout<<buf.toString()<<std::endl;
 
         return testing::AssertionFailure() << buf.toString();
     }
diff --git a/dbms/src/TestUtils/executorSerializer.cpp b/dbms/src/TestUtils/executorSerializer.cpp
index de0880c2e56..5d889369f57 100644
--- a/dbms/src/TestUtils/executorSerializer.cpp
+++ b/dbms/src/TestUtils/executorSerializer.cpp
@@ -160,6 +160,30 @@ void serializeTopN(const String & executor_id, const tipb::TopN & top_n, FmtBuff
     buf.fmtAppend("}}, limit: {}\n", top_n.limit());
 }
 
+void serializeRepeatSource(const String & executor_id, const tipb::RepeatSource & repeat, FmtBuffer & buf)
+{
+    buf.fmtAppend("{} | repeat_source_by: [", executor_id);
+    for (const auto & grouping_set : repeat.grouping_sets())
+    {
+        buf.fmtAppend("<");
+        for (const auto & grouping_exprs : grouping_set.grouping_exprs())
+        {
+            buf.fmtAppend("{");
+            for (auto i = 0; i < grouping_exprs.grouping_expr().size(); i++)
+            {
+                if (i != 0) {
+                    buf.fmtAppend(",");
+                }
+                auto expr =  grouping_exprs.grouping_expr().Get(i);
+                serializeExpression(expr, buf);
+            }
+            buf.fmtAppend("}");
+        }
+        buf.fmtAppend(">");
+    }
+    buf.fmtAppend("]\n");
+}
+
 void serializeJoin(const String & executor_id, const tipb::Join & join, FmtBuffer & buf)
 {
     buf.fmtAppend("{} | {}, {}. left_join_keys: {{", executor_id, getJoinTypeName(join.join_type()), getJoinExecTypeName(join.join_exec_type()));
@@ -282,6 +306,9 @@ void ExecutorSerializer::serializeListStruct(const tipb::DAGRequest * dag_reques
         case tipb::ExecType::TypeLimit:
             serializeLimit("Limit", executor.limit(), buf);
             break;
+        case tipb::ExecType::TypeRepeatSource:
+            serializeRepeatSource("Repeat", executor.repeat_source(), buf);
+            break;
         default:
             throw TiFlashException("Should not reach here", Errors::Coprocessor::Internal);
         }
@@ -339,6 +366,9 @@ void ExecutorSerializer::serializeTreeStruct(const tipb::Executor & root_executo
         case tipb::ExecType::TypeWindow:
             serializeWindow(executor.executor_id(), executor.window(), buf);
             break;
+        case tipb::ExecType::TypeRepeatSource:
+            serializeRepeatSource(executor.executor_id(), executor.repeat_source(), buf);
+            break;
         default:
             throw TiFlashException("Should not reach here", Errors::Coprocessor::Internal);
         }
diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp
index 2a7c820ce55..1220e873a32 100644
--- a/dbms/src/TestUtils/mockExecutor.cpp
+++ b/dbms/src/TestUtils/mockExecutor.cpp
@@ -17,6 +17,7 @@
 #include <Debug/MockExecutor/ExchangeReceiverBinder.h>
 #include <Debug/MockExecutor/ExchangeSenderBinder.h>
 #include <Debug/MockExecutor/ExecutorBinder.h>
+#include <Debug/MockExecutor/ExpandBinder.h>
 #include <Debug/MockExecutor/JoinBinder.h>
 #include <Debug/MockExecutor/LimitBinder.h>
 #include <Debug/MockExecutor/ProjectBinder.h>
@@ -361,6 +362,29 @@ DAGRequestBuilder & DAGRequestBuilder::sort(MockOrderByItemVec order_by_vec, boo
     return *this;
 }
 
+DAGRequestBuilder & DAGRequestBuilder::repeat(MockVVecColumnNameVec grouping_set_columns)
+{
+    assert(root);
+    auto grouping_sets_ast =  mock::MockVVecGroupingNameVec();
+    auto grouping_col_collection = std::set<String>();
+    for (const auto & grouping_set : grouping_set_columns) {
+        auto grouping_set_ast = mock::MockVecGroupingNameVec();
+        for (const auto &grouping_exprs : grouping_set) {
+            auto grouping_exprs_ast = mock::MockGroupingNameVec();
+            for (const auto &grouping_col : grouping_exprs)
+            {
+                auto ast_col_ptr = buildColumn(grouping_col);             // string identifier change to ast column ref
+                grouping_exprs_ast.emplace_back(std::move(ast_col_ptr));
+                grouping_col_collection.insert(grouping_col);
+            }
+            grouping_set_ast.emplace_back(std::move(grouping_exprs_ast));
+        }
+        grouping_sets_ast.emplace_back(std::move(grouping_set_ast));
+    }
+    root = compileRepeat(root, getExecutorIndex(), grouping_sets_ast, grouping_col_collection);
+    return *this;
+}
+
 void MockDAGRequestContext::addMockTable(const String & db, const String & table, const MockColumnInfoVec & mock_column_infos, size_t concurrency_hint)
 {
     auto columns = getColumnWithTypeAndName(genNamesAndTypes(mockColumnInfosToTiDBColumnInfos(mock_column_infos), "mock_table_scan"));
diff --git a/dbms/src/TestUtils/mockExecutor.h b/dbms/src/TestUtils/mockExecutor.h
index 307a034a9ac..df9d162f691 100644
--- a/dbms/src/TestUtils/mockExecutor.h
+++ b/dbms/src/TestUtils/mockExecutor.h
@@ -38,6 +38,8 @@ using MockOrderByItemVec = std::vector<MockOrderByItem>;
 using MockPartitionByItem = std::pair<String, bool>;
 using MockPartitionByItemVec = std::vector<MockPartitionByItem>;
 using MockColumnNameVec = std::vector<String>;
+using MockVecColumnNameVec = std::vector<MockColumnNameVec>;     // for grouping set (every groupingExpr element inside is slice of column)
+using MockVVecColumnNameVec = std::vector<MockVecColumnNameVec>; // for grouping sets
 using MockAstVec = std::vector<ASTPtr>;
 using MockWindowFrame = mock::MockWindowFrame;
 
@@ -145,6 +147,9 @@ class DAGRequestBuilder
     DAGRequestBuilder & sort(MockOrderByItem order_by, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count = 0);
     DAGRequestBuilder & sort(MockOrderByItemVec order_by_vec, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count = 0);
 
+    // repeat
+    DAGRequestBuilder & repeat(MockVVecColumnNameVec grouping_set_columns);
+
     void setCollation(Int32 collator_) { properties.collator = convertToTiDBCollation(collator_); }
     Int32 getCollation() const { return abs(properties.collator); }
 
diff --git a/tai.cpp b/tai.cpp
new file mode 100644
index 00000000000..59b2935c6a4
--- /dev/null
+++ b/tai.cpp
@@ -0,0 +1,64 @@
+//
+// Created by arenatlx on 2022/10/27.
+//
+
+#include <iostream>
+#include <vector>
+#include "tai.h"
+
+
+
+class HAHA {
+public:
+    template <class T>
+    HAHA & operator=(std::vector<T> && rhs){
+        return rhs[0];
+    }
+private:
+    int a;
+};
+
+
+class MY {
+    using Arra1y = std::vector<HAHA>;
+public:
+    HAHA operator[](size_t n) const;
+};
+
+HAHA MY::operator[](size_t n) const{
+    Arra1y a(n);
+    return a[0];
+}
+
+struct Test{
+    ~Test(){
+        std::cout<<"kill test"<<std::endl;
+    }
+};
+
+
+int main(int argc, char* argv[]){
+    std::cout<<__FILE_NAME__<< __DATE__ << std::endl;
+    MY* m = new(MY);
+    m[1];
+    {
+        auto vec = std::vector<Test>();
+        auto tmp = new(Test);    // 这个地方直接 Test() 还是会析构一次，有点奇怪，只有 new 才行。
+        vec.push_back(std::move(*tmp));
+        std::cout<<vec.size()<<std::endl;
+    }
+    std::string a = "123";
+    auto cp_a = a;
+    cp_a[0] = '4';
+    std::cout<<cp_a<<std::endl;
+    std::cout<<a<<std::endl;
+
+    std::vector<std::string> v;
+    v.push_back("aaaa");
+    v.push_back("bbbb");
+    v[0][0]='1';
+    v.push_back(std::move(v[0]));
+    std::cout<<v[0]<<std::endl;
+    std::cout<<v[1]<<std::endl;
+    std::cout<<v[2]<<std::endl;
+}
\ No newline at end of file
diff --git a/tai.h b/tai.h
new file mode 100644
index 00000000000..81ca1b1b640
--- /dev/null
+++ b/tai.h
@@ -0,0 +1,14 @@
+//
+// Created by arenatlx on 2022/10/27.
+//
+
+#ifndef TIFLASH_TAI_H
+#define TIFLASH_TAI_H
+
+
+class tai
+{
+};
+
+
+#endif //TIFLASH_TAI_H

From 8f7150f9f5bd66f6d036bfeaa7c009991b8e7c66 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Tue, 6 Dec 2022 20:46:11 +0800
Subject: [PATCH 02/31] fix the test Exeception because of fmtlib can take '{'
 as escape symbol

Signed-off-by: AilinKid <3148019@qq.com>
---
 .../Coprocessor/DAGExpressionAnalyzer.cpp     |  4 +--
 .../Coprocessor/collectOutputFieldTypes.cpp   |  3 ++-
 .../src/Flash/tests/gtest_filter_executor.cpp |  6 +++--
 dbms/src/TestUtils/ExecutorTestUtils.cpp      |  1 +
 dbms/src/TestUtils/FunctionTestUtils.cpp      | 25 +++++++++++++++++++
 dbms/src/TestUtils/executorSerializer.cpp     |  4 +--
 6 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
index 01315929ff3..6d0162ec09c 100644
--- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
@@ -837,12 +837,12 @@ ExpressionActionsPtr DAGExpressionAnalyzer::appendRepeatSource(
         const tipb::RepeatSource & repeatSource, ExpressionActionsChain & chain)
 {
     auto & last_step = initAndGetLastStep(chain);
-    auto shared_repeat = buildRepeatGroupingColumns(repeatSource, last_step.actions);
-    last_step.actions->add(ExpressionAction::repeatSource(shared_repeat));
     for (const auto &origin_col : last_step.actions->getSampleBlock().getNamesAndTypesList())
     {
         last_step.required_output.push_back(origin_col.name);
     }
+    auto shared_repeat = buildRepeatGroupingColumns(repeatSource, last_step.actions);
+    last_step.actions->add(ExpressionAction::repeatSource(shared_repeat));
     // an added column from REPEAT action.
     source_columns.emplace_back(Repeat::grouping_identifier_column_name, Repeat::grouping_identifier_column_type);
     auto before_repeat_source = chain.getLastActions();
diff --git a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp
index d2536255a76..b4d3079a9e2 100644
--- a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp
+++ b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp
@@ -120,7 +120,8 @@ bool collectForRepeat(std::vector<tipb::FieldType> &out_field_types, const tipb:
         field_type.set_tp(TiDB::TypeLongLong);
         field_type.set_charset("binary");
         field_type.set_collate(TiDB::ITiDBCollator::BINARY);
-        field_type.set_flag(0);
+        // groupingID column should be Uint64 and NOT NULL.
+        field_type.set_flag(TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull);
         field_type.set_flen(-1);
         field_type.set_decimal(-1);
         out_field_types.push_back(field_type);
diff --git a/dbms/src/Flash/tests/gtest_filter_executor.cpp b/dbms/src/Flash/tests/gtest_filter_executor.cpp
index c577772e46c..4605396b4be 100644
--- a/dbms/src/Flash/tests/gtest_filter_executor.cpp
+++ b/dbms/src/Flash/tests/gtest_filter_executor.cpp
@@ -213,10 +213,12 @@ try
                   .scan("test_db", "test_table")
                   .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
                   .build(context);
+    // test is ok now for non-planner enabled.
     executeAndAssertColumnsEqual(
         request,
-        {toNullableVec<String>({"banana"}),
-         toNullableVec<String>({"banana"})});
+        {toNullableVec<String>({"banana", {}, {}, {}, "banana", {}}),
+         toNullableVec<String>({{}, "apple", {}, {}, {}, "banana"}),
+         toVec<UInt64>({1,2,1,2,1,2})});
 }
 CATCH
 
diff --git a/dbms/src/TestUtils/ExecutorTestUtils.cpp b/dbms/src/TestUtils/ExecutorTestUtils.cpp
index 2810bb033dc..403651569c2 100644
--- a/dbms/src/TestUtils/ExecutorTestUtils.cpp
+++ b/dbms/src/TestUtils/ExecutorTestUtils.cpp
@@ -21,6 +21,7 @@
 #include <Flash/executeQuery.h>
 #include <TestUtils/ExecutorTestUtils.h>
 #include <TestUtils/executorSerializer.h>
+#include <Columns/ColumnNullable.h>
 
 #include <functional>
 
diff --git a/dbms/src/TestUtils/FunctionTestUtils.cpp b/dbms/src/TestUtils/FunctionTestUtils.cpp
index f7ab140b2fc..b616c441bf4 100644
--- a/dbms/src/TestUtils/FunctionTestUtils.cpp
+++ b/dbms/src/TestUtils/FunctionTestUtils.cpp
@@ -193,6 +193,7 @@ std::multiset<Row> columnsToRowSet(const ColumnsWithTypeAndName & cols)
     {
         for (size_t i = 0, size = col.column->size(); i < size; ++i)
         {
+        
             new (rows[i].place(col_id)) Field((*col.column)[i]);
         }
     }
@@ -229,6 +230,30 @@ ::testing::AssertionResult columnsEqual(
     auto const expected_row_set = columnsToRowSet(expected);
     auto const actual_row_set = columnsToRowSet(actual);
 
+    {
+        auto expect_it = expected_row_set.begin();
+        auto actual_it = actual_row_set.begin();
+        FmtBuffer buf1;
+        FmtBuffer buf2;
+        for (; expect_it != expected_row_set.end(); ++expect_it, ++actual_it)
+        {
+            buf1.joinStr(
+                   expect_it->begin(),
+                   expect_it->end(),
+                   [](const auto & v, FmtBuffer & fb) { fb.append(v.toString()); },
+                   " ")
+                .append("\n");
+            buf2.joinStr(
+                    actual_it->begin(),
+                    actual_it->end(),
+                    [](const auto & v, FmtBuffer & fb) { fb.append(v.toString()); },
+                    " ")
+                .append("\n");
+        }
+        auto res1 = buf1.toString();
+        auto res2 = buf2.toString();
+    }
+
     if (expected_row_set != actual_row_set)
     {
         FmtBuffer buf;
diff --git a/dbms/src/TestUtils/executorSerializer.cpp b/dbms/src/TestUtils/executorSerializer.cpp
index 5d889369f57..33304e298c0 100644
--- a/dbms/src/TestUtils/executorSerializer.cpp
+++ b/dbms/src/TestUtils/executorSerializer.cpp
@@ -168,7 +168,7 @@ void serializeRepeatSource(const String & executor_id, const tipb::RepeatSource
         buf.fmtAppend("<");
         for (const auto & grouping_exprs : grouping_set.grouping_exprs())
         {
-            buf.fmtAppend("{");
+            buf.fmtAppend("{{");
             for (auto i = 0; i < grouping_exprs.grouping_expr().size(); i++)
             {
                 if (i != 0) {
@@ -177,7 +177,7 @@ void serializeRepeatSource(const String & executor_id, const tipb::RepeatSource
                 auto expr =  grouping_exprs.grouping_expr().Get(i);
                 serializeExpression(expr, buf);
             }
-            buf.fmtAppend("}");
+            buf.fmtAppend("}}");
         }
         buf.fmtAppend(">");
     }

From 7f9b656bfdd41a1e2769b1b7d04da6dae44f18b2 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Wed, 7 Dec 2022 18:32:33 +0800
Subject: [PATCH 03/31] add test for tiflash repeat logic

Signed-off-by: AilinKid <3148019@qq.com>
---
 .../Debug/MockExecutor/RepeatSourceBinder.h   |   2 +-
 .../src/Flash/tests/gtest_filter_executor.cpp | 138 +++++++++++++++++-
 .../Interpreters/tests/gtest_block_repeat.cpp |  31 ++++
 3 files changed, 168 insertions(+), 3 deletions(-)

diff --git a/dbms/src/Debug/MockExecutor/RepeatSourceBinder.h b/dbms/src/Debug/MockExecutor/RepeatSourceBinder.h
index 473393221cf..ace010048b2 100644
--- a/dbms/src/Debug/MockExecutor/RepeatSourceBinder.h
+++ b/dbms/src/Debug/MockExecutor/RepeatSourceBinder.h
@@ -25,7 +25,7 @@ class RepeatSourceBinder : public ExecutorBinder
 {
 public:
     RepeatSourceBinder(size_t & index_, const DAGSchema & output_schema_, MockVVecGroupingNameVec gss)
-        : ExecutorBinder(index_, "repeat_source" + std::to_string(index_), output_schema_)
+        : ExecutorBinder(index_, "repeat_source_" + std::to_string(index_), output_schema_)
         , grouping_sets_columns(gss)
     {}
 
diff --git a/dbms/src/Flash/tests/gtest_filter_executor.cpp b/dbms/src/Flash/tests/gtest_filter_executor.cpp
index 4605396b4be..68b8c39cca9 100644
--- a/dbms/src/Flash/tests/gtest_filter_executor.cpp
+++ b/dbms/src/Flash/tests/gtest_filter_executor.cpp
@@ -208,17 +208,151 @@ try
         request,
         {toNullableVec<String>({"banana"}),
          toNullableVec<String>({"banana"})});
+}
+CATCH
 
-    request = context
+TEST_F(FilterExecutorTestRunner, RepeatLogical)
+try
+{
+    /// following tests is ok now for non-planner enabled.
+
+    /// case 1
+    auto request = context
                   .scan("test_db", "test_table")
                   .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
                   .build(context);
-    // test is ok now for non-planner enabled.
+    /// data flow:
+    ///
+    ///    s1       s2
+    /// "banana"  "apple"
+    ///   NULL      NULL
+    /// "banana"  "banana"
+    ///          |
+    ///          v
+    ///    s1       s2      groupingID
+    ///  "banana"  NULL         1
+    ///   NULL    "apple"       2
+    ///   NULL     NULL         1
+    ///   NULL     NULL         2
+    ///  "banana"  NULL         1
+    ///   NULL   "banana"       2
+    ///
     executeAndAssertColumnsEqual(
         request,
         {toNullableVec<String>({"banana", {}, {}, {}, "banana", {}}),
          toNullableVec<String>({{}, "apple", {}, {}, {}, "banana"}),
          toVec<UInt64>({1,2,1,2,1,2})});
+
+    /// case 2
+    request = context
+                  .scan("test_db", "test_table")
+                  .filter(eq(col("s1"), col("s2")))
+                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .build(context);
+    /// data flow:
+    ///
+    ///    s1       s2
+    /// "banana"  "apple"
+    ///   NULL      NULL
+    /// "banana"  "banana"
+    ///          |
+    ///          v
+    ///    s1       s2
+    /// "banana"  "banana"
+    ///          |
+    ///          v
+    ///    s1       s2      groupingID
+    ///  "banana"  NULL         1
+    ///   NULL   "banana"       2
+    ///
+    executeAndAssertColumnsEqual(
+        request,
+        {toNullableVec<String>({"banana", {}}),
+         toNullableVec<String>({{}, "banana"}),
+         toVec<UInt64>({1,2})});
+
+    /// case 3
+    request = context
+                  .scan("test_db", "test_table")
+                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .filter(eq(col("s1"), col("s2")))
+                  .build(context);
+    /// data flow: TiFlash isn't aware of the operation sequence, this filter here will be run before repeat does just like the second test case above.
+    executeAndAssertColumnsEqual(
+        request,
+        {toNullableVec<String>({"banana", {}}),
+         toNullableVec<String>({{}, "banana"}),
+         toVec<UInt64>({1,2})});
+
+    /// case 4
+    auto const_false = lit(Field(static_cast<UInt64>(0)));
+    request = context
+                  .scan("test_db", "test_table")
+                  .filter(const_false)                      // refuse all rows
+                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .build(context);
+    executeAndAssertColumnsEqual(
+        request,
+        {});
+
+    /// case 5   (test integrated with aggregation)
+    request = context
+                  .scan("test_db", "test_table")
+                  .aggregation({Count(col("s1"))}, {col("s2")})
+                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .build(context);
+    /// data flow:
+    ///
+    ///    s1       s2
+    /// "banana"  "apple"
+    ///   NULL      NULL
+    /// "banana"  "banana"
+    ///          |
+    ///          v
+    ///  count(s1)   s2
+    ///    1      "apple"
+    ///    0       NULL
+    ///    1      "banana"
+    ///          |
+    ///          v
+    ///  count(s1)   s2      groupingID
+    ///    1        NULL        1
+    ///   NULL     "apple"      2
+    ///    0        NULL        1
+    ///   NULL      NULL        2
+    ///    1        NULL        1
+    ///   NULL     "banana"     2
+    ///
+    executeAndAssertColumnsEqual(
+        request,
+        {toNullableVec<UInt64>({1, {}, 0, {}, 1,{}}),
+            toNullableVec<String>({{}, "apple", {},{},{}, "banana"}),
+                toVec<UInt64>({1,2,1,2,1,2})});
+
+    /// case 5   (test integrated with aggregation and projection)
+    request = context
+                  .scan("test_db", "test_table")
+                  .aggregation({Count(col("s1"))}, {col("s2")})
+                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .project({"count(s1)"})
+                  .build(context);
+    executeAndAssertColumnsEqual(
+        request,
+        {toNullableVec<UInt64>({1, {}, 0, {}, 1,{}})});
+
+    /// case 6   (test integrated with aggregation and projection and limit)
+    /// note: by now, limit is executed before repeat does to reduce unnecessary row repeat work.
+//    request = context
+//                  .scan("test_db", "test_table")
+//                  .aggregation({Count(col("s1"))}, {col("s2")})
+//                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+//                  .project({"count(s1)"})
+//                  .limit(2)
+//                  .build(context);
+//    executeAndAssertColumnsEqual(
+//        request,
+//        {toNullableVec<UInt64>({1, {}, 0, {}})});
+
 }
 CATCH
 
diff --git a/dbms/src/Interpreters/tests/gtest_block_repeat.cpp b/dbms/src/Interpreters/tests/gtest_block_repeat.cpp
index b3c1cce713d..21074ff7a95 100644
--- a/dbms/src/Interpreters/tests/gtest_block_repeat.cpp
+++ b/dbms/src/Interpreters/tests/gtest_block_repeat.cpp
@@ -231,6 +231,37 @@ try
             ASSERT_EQ(col_4->getElement(i), res4[i]);
         }
     }
+    {
+        /// test a empty block
+        const ColumnsWithTypeAndName
+            ori_col
+            = {
+                toVec<Int64>(col_name[0], ColumnWithInt64{}),  // without data.
+                toVec<String>(col_name[1], ColumnWithString{}),
+                toVec<String>(col_name[2], ColumnWithString{}),
+                toVec<UInt64>(col_name[3], ColumnWithUInt64{}),
+            };
+        // group set<gender>, group set<country>
+        GroupingSet g_gender = GroupingSet{GroupingColumnNames{col_name[1]}};
+        GroupingSet g_country = GroupingSet{GroupingColumnNames{col_name[2]}};
+        GroupingSet g_region = GroupingSet{GroupingColumnNames{col_name[3]}};
+        GroupingSets group_sets = GroupingSets{g_gender, g_country, g_region};
+        Repeat repeat = Repeat(group_sets);
+        Block block(ori_col);
+        auto origin_rows = block.rows();
+
+        repeat.replicateAndFillNull(block);
+        // assert the col size is added with 1.
+        ASSERT_EQ(block.getColumns().size(), size_t(5));
+        // assert the new col groupingID is appended.
+        ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID");
+        // assert the block size is equal to origin rows * grouping set num.
+        auto repeat_rows = block.rows();
+        auto grouping_set_num = repeat.getGroupSetNum();
+        ASSERT_EQ(origin_rows, 0);
+        ASSERT_EQ(origin_rows * grouping_set_num, repeat_rows); // 0
+        // assert grouping set column are nullable.
+    }
 }
 CATCH
 

From f66f010a0cf8a5b3ad69670a78215da24ad11de5 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Tue, 13 Dec 2022 11:47:19 +0800
Subject: [PATCH 04/31] fix the repeat source logical test and add the repeat
 physical planner support

Signed-off-by: AilinKid <3148019@qq.com>
---
 .../RepeatSourceBlockInputStream.cpp          |   5 +
 .../RepeatSourceBlockInputStream.h            |   1 +
 dbms/src/Debug/MockExecutor/AstToPB.cpp       |   2 +-
 .../Debug/MockExecutor/RepeatSourceBinder.cpp |   2 +-
 dbms/src/Flash/Coprocessor/DAGContext.cpp     |   4 +-
 .../Coprocessor/DAGExpressionAnalyzer.cpp     |  15 +-
 dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp  |   2 +-
 .../Coprocessor/collectOutputFieldTypes.cpp   |  20 +-
 dbms/src/Flash/Planner/PhysicalPlan.cpp       |   7 +-
 dbms/src/Flash/Planner/PlanType.h             |   1 +
 .../Flash/Planner/Plans/PhysicalRepeat.cpp    | 116 ++++++
 dbms/src/Flash/Planner/Plans/PhysicalRepeat.h |  57 +++
 .../src/Flash/tests/gtest_repeat_executor.cpp | 387 ++++++++++++++++++
 dbms/src/Interpreters/Repeat.cpp              |  23 +-
 dbms/src/Interpreters/Repeat.h                |   2 +
 dbms/src/TestUtils/mockExecutor.cpp           |   2 +-
 16 files changed, 630 insertions(+), 16 deletions(-)
 create mode 100644 dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp
 create mode 100644 dbms/src/Flash/Planner/Plans/PhysicalRepeat.h
 create mode 100644 dbms/src/Flash/tests/gtest_repeat_executor.cpp

diff --git a/dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp b/dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp
index f5075f9c87d..af727442b56 100644
--- a/dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp
+++ b/dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp
@@ -31,4 +31,9 @@ Block RepeatSourceBlockInputStream::getHeader() const
     return res;
 }
 
+void RepeatSourceBlockInputStream::appendInfo(FmtBuffer & buffer) const {
+    buffer.fmtAppend(": grouping set ");
+    repeat_source_actions.get()->getActions()[0].repeat->getGroupingSetsDes(buffer);
+}
+
 } // namespace DB
diff --git a/dbms/src/DataStreams/RepeatSourceBlockInputStream.h b/dbms/src/DataStreams/RepeatSourceBlockInputStream.h
index eaa223ef824..d7f9f6db5cc 100644
--- a/dbms/src/DataStreams/RepeatSourceBlockInputStream.h
+++ b/dbms/src/DataStreams/RepeatSourceBlockInputStream.h
@@ -33,6 +33,7 @@ class RepeatSourceBlockInputStream : public IProfilingBlockInputStream
     }
     String getName() const override { return NAME; }
     Block getHeader() const override;
+    void appendInfo(FmtBuffer & buffer) const override;
 
 protected:
     Block readImpl() override;
diff --git a/dbms/src/Debug/MockExecutor/AstToPB.cpp b/dbms/src/Debug/MockExecutor/AstToPB.cpp
index fa58e2e3fc8..8977d8dc279 100644
--- a/dbms/src/Debug/MockExecutor/AstToPB.cpp
+++ b/dbms/src/Debug/MockExecutor/AstToPB.cpp
@@ -447,7 +447,7 @@ void identifierToPB(const DAGSchema & input, ASTIdentifier * id, tipb::Expr * ex
     *(expr->mutable_field_type()) = columnInfoToFieldType((*ft).second);
     expr->mutable_field_type()->set_collate(collator_id);
     WriteBufferFromOwnString ss;
-    encodeDAGInt64(ft - input.begin(), ss);
+    encodeDAGInt64(ft - input.begin(), ss);   // 这个地方使用下面的 child input schema 的 offset,替换当前算子使用的 column ref
     expr->set_val(ss.releaseStr());
 }
 
diff --git a/dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp b/dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp
index 7633c347282..928160c8996 100644
--- a/dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp
+++ b/dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp
@@ -55,7 +55,7 @@ ExecutorBinderPtr compileRepeat(ExecutorBinderPtr input, size_t & executor_index
         field_type.set_tp(TiDB::TypeLongLong);
         field_type.set_charset("binary");
         field_type.set_collate(TiDB::ITiDBCollator::BINARY);
-        field_type.set_flag(0);
+        field_type.set_flag(TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull);  // should have NOT NULL FLAG
         field_type.set_flen(-1);
         field_type.set_decimal(-1);
         output_schema.push_back(std::make_pair("groupingID", TiDB::fieldTypeToColumnInfo(field_type)));
diff --git a/dbms/src/Flash/Coprocessor/DAGContext.cpp b/dbms/src/Flash/Coprocessor/DAGContext.cpp
index b4a9f9ad515..66e64c11b64 100644
--- a/dbms/src/Flash/Coprocessor/DAGContext.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGContext.cpp
@@ -129,10 +129,10 @@ DAGContext::DAGContext(const tipb::DAGRequest & dag_request_, String log_identif
 
 void DAGContext::initOutputInfo()
 {
-    output_field_types = collectOutputFieldTypes(*dag_request);
+    output_field_types = collectOutputFieldTypes(*dag_request);   //那么 field types 对应的就是一个 fragment DAG 的 output schema's field types.
     output_offsets.clear();
     result_field_types.clear();
-    for (UInt32 i : dag_request->output_offsets())
+    for (UInt32 i : dag_request->output_offsets())    // 这个地方应该是 fragment dag request 自带的 output offsets
     {
         output_offsets.push_back(i);
         if (unlikely(i >= output_field_types.size()))
diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
index 6d0162ec09c..70e92acec51 100644
--- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
@@ -810,6 +810,7 @@ std::shared_ptr<Repeat> DAGExpressionAnalyzer::buildRepeatGroupingColumns(
     const tipb::RepeatSource & repeatSource, const ExpressionActionsPtr & actions)
 {
     GroupingSets group_sets_columns;
+    std::map<String, bool> map_grouping_col;
     group_sets_columns.reserve(repeatSource.grouping_sets().size());
     for (const auto& group_set : repeatSource.grouping_sets()){
         GroupingSet group_set_columns;
@@ -824,13 +825,22 @@ std::shared_ptr<Repeat> DAGExpressionAnalyzer::buildRepeatGroupingColumns(
                 String cp_name = getActions(group_expr, actions);
                 // tidb expression computation is based on column index offset child's chunk schema, change to ck block column name here.
                 group_exprs_columns.emplace_back(cp_name);
+                map_grouping_col.insert(std::pair<String, bool>(cp_name, true));
             }
             // move here, cause basic string is copied from input cols.
             group_set_columns.emplace_back(std::move(group_exprs_columns));
         }
         group_sets_columns.emplace_back(std::move(group_set_columns));
     }
-    return Repeat::sharedRepeat(group_sets_columns);
+    // change the original source column to be nullable, and add a new column for groupingID.
+    for (auto & mutable_one: source_columns)
+    {
+        if (map_grouping_col[mutable_one.name])
+            mutable_one.type = makeNullable(mutable_one.type);
+    }
+    source_columns.emplace_back(Repeat::grouping_identifier_column_name, Repeat::grouping_identifier_column_type);
+    auto shared_repeat = Repeat::sharedRepeat(group_sets_columns);
+    return shared_repeat;
 }
 
 ExpressionActionsPtr DAGExpressionAnalyzer::appendRepeatSource(
@@ -843,8 +853,7 @@ ExpressionActionsPtr DAGExpressionAnalyzer::appendRepeatSource(
     }
     auto shared_repeat = buildRepeatGroupingColumns(repeatSource, last_step.actions);
     last_step.actions->add(ExpressionAction::repeatSource(shared_repeat));
-    // an added column from REPEAT action.
-    source_columns.emplace_back(Repeat::grouping_identifier_column_name, Repeat::grouping_identifier_column_type);
+
     auto before_repeat_source = chain.getLastActions();
     chain.finalize();
     chain.clear();
diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp
index 9a4a353eeb5..3fdd7cec1f1 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp
@@ -157,7 +157,7 @@ DAGQueryBlock::DAGQueryBlock(const tipb::Executor & root_, QueryBlockIDGenerator
     else if (current->tp() == tipb::ExecType::TypeProjection)
     {
         GET_METRIC(tiflash_coprocessor_executor_count, type_projection).Increment();
-        children.push_back(std::make_shared<DAGQueryBlock>(source->projection().child(), id_generator));
+        children.push_back(std::make_shared<DAGQueryBlock>(source->projection().child(), id_generator)); // 将之后的算子重新算作 children
     }
     else if (current->tp() == tipb::ExecType::TypeTableScan)
     {
diff --git a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp
index b4d3079a9e2..5d15a4d3bc0 100644
--- a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp
+++ b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <Flash/Coprocessor/DAGCodec.h>
 #include <Flash/Coprocessor/DAGUtils.h>
 #include <Flash/Coprocessor/collectOutputFieldTypes.h>
 #include <common/types.h>
@@ -108,11 +109,20 @@ bool collectForRepeat(std::vector<tipb::FieldType> &out_field_types, const tipb:
         traverseExecutorTree(child, [&out_child_fields](const tipb::Executor & e) { return collectForExecutor(out_child_fields, e); });
     });
 
-//    executor.repeat_source().grouping_sets().Get(1).grouping_exprs().Get(1).grouping_expr().Get(1).
-//    /// the type of grouping set column is always nullable
-//    auto updated_field_type = field_type;
-//    updated_field_type.set_flag(updated_field_type.flag() & (~static_cast<UInt32>(TiDB::ColumnFlagNotNull)));
-//    output_field_types.push_back(updated_field_type);
+    // 对孩子的节点需要根据 grouping sets 的对应关系，给予 nullable 的处理
+    for (const auto & grouping_set : executor.repeat_source().grouping_sets()){
+        for (const auto & grouping_exprs : grouping_set.grouping_exprs()){
+            for (const auto & grouping_col : grouping_exprs.grouping_expr()){
+                // assert that: grouping_col must be the column ref guaranteed by tidb.
+                auto column_index = decodeDAGInt64(grouping_col.val());
+                if (column_index < 0 || column_index >= static_cast<Int64>(out_child_fields.size()))
+                {
+                    throw TiFlashException("Column index out of bound", Errors::Coprocessor::BadRequest);
+                }
+                out_child_fields[column_index].set_flag(out_child_fields[column_index].flag() & (~TiDB::ColumnFlagNotNull));
+            }
+        }
+    }
 
     {
         // for additional groupingID column.
diff --git a/dbms/src/Flash/Planner/PhysicalPlan.cpp b/dbms/src/Flash/Planner/PhysicalPlan.cpp
index 8aabeef3885..aafd805481a 100644
--- a/dbms/src/Flash/Planner/PhysicalPlan.cpp
+++ b/dbms/src/Flash/Planner/PhysicalPlan.cpp
@@ -24,6 +24,7 @@
 #include <Flash/Planner/Plans/PhysicalAggregation.h>
 #include <Flash/Planner/Plans/PhysicalExchangeReceiver.h>
 #include <Flash/Planner/Plans/PhysicalExchangeSender.h>
+#include <Flash/Planner/plans/PhysicalExpand.h>
 #include <Flash/Planner/Plans/PhysicalFilter.h>
 #include <Flash/Planner/Plans/PhysicalJoin.h>
 #include <Flash/Planner/Plans/PhysicalLimit.h>
@@ -36,7 +37,6 @@
 #include <Flash/Planner/Plans/PhysicalWindow.h>
 #include <Flash/Planner/Plans/PhysicalWindowSort.h>
 #include <Flash/Planner/optimize.h>
-#include <Flash/Statistics/traverseExecutors.h>
 #include <Interpreters/Context.h>
 
 namespace DB
@@ -197,6 +197,11 @@ void PhysicalPlan::build(const String & executor_id, const tipb::Executor * exec
         pushBack(PhysicalJoin::build(context, executor_id, log, executor->join(), FineGrainedShuffle(executor), left, right));
         break;
     }
+    case tipb::ExecType::TypeRepeatSource:
+    {
+        pushBack(PhysicalRepeat::build(context, executor_id, log, executor->repeat_source(), popBack()));
+        break;
+    }
     default:
         throw TiFlashException(fmt::format("{} executor is not supported", executor->tp()), Errors::Planner::Unimplemented);
     }
diff --git a/dbms/src/Flash/Planner/PlanType.h b/dbms/src/Flash/Planner/PlanType.h
index 8f347716b2d..4c4d6d283d5 100644
--- a/dbms/src/Flash/Planner/PlanType.h
+++ b/dbms/src/Flash/Planner/PlanType.h
@@ -37,6 +37,7 @@ struct PlanType
         MockTableScan = 12,
         Join = 13,
         GetResult = 14,
+        Repeat = 15,
     };
     PlanTypeEnum enum_value;
 
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp b/dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp
new file mode 100644
index 00000000000..adb2a774354
--- /dev/null
+++ b/dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp
@@ -0,0 +1,116 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <Common/FailPoint.h>
+#include <Common/Logger.h>
+#include <Common/TiFlashException.h>
+#include <Flash/Coprocessor/DAGExpressionAnalyzer.h>
+#include <Flash/Coprocessor/DAGPipeline.h>
+#include <Flash/Planner/FinalizeHelper.h>
+#include <Flash/Planner/PhysicalPlanHelper.h>
+#include <Flash/Planner/plans/PhysicalRepeat.h>
+#include <Interpreters/Context.h>
+#include <fmt/format.h>
+#include <DataStreams/RepeatSourceBlockInputStream.h>
+#include <DataTypes/DataTypeNullable.h>
+
+namespace DB
+{
+PhysicalPlanNodePtr PhysicalRepeat::build(
+    const Context & context,
+    const String & executor_id,
+    const LoggerPtr & log,
+    const tipb::RepeatSource & repeat_source,
+    const PhysicalPlanNodePtr & child)
+{
+    assert(child);
+
+    child->finalize();
+
+    if (unlikely(repeat_source.grouping_sets().empty()))
+    {
+        //should not reach here
+        throw TiFlashException("Repeat executor without grouping sets", Errors::Planner::BadRequest);
+    }
+
+    DAGExpressionAnalyzer analyzer{child->getSchema(), context};
+    ExpressionActionsPtr before_repeat_actions = PhysicalPlanHelper::newActions(child->getSampleBlock(), context);
+
+
+    auto shared_repeat = analyzer.buildRepeatGroupingColumns(repeat_source, before_repeat_actions);
+
+    // construct sample block.
+    NamesAndTypes repeat_output_columns;
+    auto child_header = child->getSchema();
+    for (const auto & one : child_header)
+    {
+        repeat_output_columns.emplace_back(one.name, shared_repeat->isInGroupSetColumn(one.name)? makeNullable(one.type): one.type);
+    }
+    repeat_output_columns.emplace_back(shared_repeat->grouping_identifier_column_name, shared_repeat->grouping_identifier_column_type);
+
+    auto physical_repeat = std::make_shared<PhysicalRepeat>(
+        executor_id,
+        repeat_output_columns,
+        log->identifier(),
+        child,
+        shared_repeat,
+        Block(repeat_output_columns));
+
+    return physical_repeat;
+}
+
+
+void PhysicalRepeat::repeatTransform(DAGPipeline & child_pipeline, Context & context)
+{
+    auto repeat_actions = PhysicalPlanHelper::newActions(child_pipeline.firstStream()->getHeader(), context);
+    repeat_actions->add(ExpressionAction::repeatSource(shared_repeat));
+    String repeat_extra_info = fmt::format("repeat source, repeat_executor_id = {}", execId());
+    child_pipeline.transform([&](auto &stream) {
+        stream = std::make_shared<RepeatSourceBlockInputStream>(stream, repeat_actions);
+        stream->setExtraInfo(repeat_extra_info);
+    });
+}
+
+void PhysicalRepeat::transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams)
+{
+    child->transform(pipeline, context, max_streams);
+    repeatTransform(pipeline, context);
+}
+
+void PhysicalRepeat::finalize(const Names & parent_require)
+{
+    FinalizeHelper::checkSchemaContainsParentRequire(schema, parent_require);
+    Names required_output;
+    required_output.reserve( shared_repeat->getGroupSetNum());    // grouping set column should be existed in the child output schema.
+    auto name_set = std::set<String>();
+    shared_repeat->getAllGroupSetColumnNames(name_set);
+    // append parent_require column it may expect self-filled groupingID.
+    for (const auto & one : parent_require)
+    {
+        if (one != Repeat::grouping_identifier_column_name)
+        {
+            name_set.insert(one);
+        }
+    }
+    for (const auto & grouping_name: name_set) {
+        required_output.emplace_back(grouping_name);
+    }
+    child->finalize(required_output);
+}
+
+const Block & PhysicalRepeat::getSampleBlock() const
+{
+    return sample_block;
+}
+} // namespace DB
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalRepeat.h b/dbms/src/Flash/Planner/Plans/PhysicalRepeat.h
new file mode 100644
index 00000000000..5907c7c047e
--- /dev/null
+++ b/dbms/src/Flash/Planner/Plans/PhysicalRepeat.h
@@ -0,0 +1,57 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <Flash/Planner/plans/PhysicalUnary.h>
+#include <Interpreters/ExpressionActions.h>
+#include <Interpreters/Join.h>
+#include <tipb/executor.pb.h>
+
+namespace DB
+{
+class PhysicalRepeat : public PhysicalUnary
+{
+public:
+    static PhysicalPlanNodePtr build(
+        const Context & context,
+        const String & executor_id,
+        const LoggerPtr & log,
+        const tipb::RepeatSource & repeat,
+        const PhysicalPlanNodePtr & child);
+
+    PhysicalRepeat(
+        const String & executor_id_,
+        const NamesAndTypes & schema_,
+        const String & req_id,
+        const PhysicalPlanNodePtr & child_,
+        const std::shared_ptr<Repeat> & shared_repeat,
+        const Block & sample_block_)
+        : PhysicalUnary(executor_id_, PlanType::Repeat, schema_, req_id, child_)
+        , shared_repeat(shared_repeat), sample_block(sample_block_){}
+
+    void finalize(const Names & parent_require) override;
+
+    void repeatTransform(DAGPipeline & child_pipeline, Context & context);
+
+    const Block & getSampleBlock() const override;
+
+private:
+    void transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) override;
+    std::shared_ptr<Repeat> shared_repeat;
+    Block sample_block;
+};
+}  // namespace DB
+
+
diff --git a/dbms/src/Flash/tests/gtest_repeat_executor.cpp b/dbms/src/Flash/tests/gtest_repeat_executor.cpp
new file mode 100644
index 00000000000..020e5f19d26
--- /dev/null
+++ b/dbms/src/Flash/tests/gtest_repeat_executor.cpp
@@ -0,0 +1,387 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <TestUtils/ExecutorTestUtils.h>
+#include <TestUtils/mockExecutor.h>
+
+namespace DB
+{
+namespace tests
+{
+class RepeatExecutorTestRunner : public DB::tests::ExecutorTest
+{
+public:
+    void initializeContext() override
+    {
+        ExecutorTest::initializeContext();
+        context.addMockTable({"test_db", "test_table"},
+                             {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}},
+                             {toNullableVec<String>("s1", {"banana", {}, "banana"}),
+                              toNullableVec<String>("s2", {"apple", {}, "banana"})});
+        context.addExchangeReceiver("exchange1",
+                                    {{"s1", TiDB::TP::TypeString}, {"s2", TiDB::TP::TypeString}},
+                                    {toNullableVec<String>("s1", {"banana", {}, "banana"}),
+                                     toNullableVec<String>("s2", {"apple", {}, "banana"})});
+    }
+};
+
+TEST_F(RepeatExecutorTestRunner, RepeatLogical)
+try
+{
+    /// case 1
+    auto request = context
+                       .scan("test_db", "test_table")
+                       .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                       .build(context);
+    /// data flow:
+    ///
+    ///    s1       s2
+    /// "banana"  "apple"
+    ///   NULL      NULL
+    /// "banana"  "banana"
+    ///          |
+    ///          v
+    ///    s1       s2      groupingID
+    ///  "banana"  NULL         1
+    ///   NULL    "apple"       2
+    ///   NULL     NULL         1
+    ///   NULL     NULL         2
+    ///  "banana"  NULL         1
+    ///   NULL   "banana"       2
+    ///
+    executeAndAssertColumnsEqual(
+        request,
+        {toNullableVec<String>({"banana", {}, {}, {}, "banana", {}}),
+         toNullableVec<String>({{}, "apple", {}, {}, {}, "banana"}),
+         toVec<UInt64>({1,2,1,2,1,2})});
+
+    /// case 2
+    request = context
+                  .scan("test_db", "test_table")
+                  .filter(eq(col("s1"), col("s2")))
+                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .build(context);
+    /// data flow:
+    ///
+    ///    s1       s2
+    /// "banana"  "apple"
+    ///   NULL      NULL
+    /// "banana"  "banana"
+    ///          |
+    ///          v
+    ///    s1       s2
+    /// "banana"  "banana"
+    ///          |
+    ///          v
+    ///    s1       s2      groupingID
+    ///  "banana"  NULL         1
+    ///   NULL   "banana"       2
+    ///
+    executeAndAssertColumnsEqual(
+        request,
+        {toNullableVec<String>({"banana", {}}),
+         toNullableVec<String>({{}, "banana"}),
+         toVec<UInt64>({1,2})});
+
+    /// case 3: this case is only for non-planner mode.
+    /// request = context
+    ///                 .scan("test_db", "test_table")
+    ///                 .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+    ///                 .filter(eq(col("s1"), col("s2")))
+    ///                 .build(context);
+    /// data flow: TiFlash isn't aware of the operation sequence, this filter here will be run before repeat does just like the second test case above.
+    /// since this case is only succeed under planner-disabled mode, just comment and assert the result here for a note.
+    ///
+    /// executeAndAssertColumnsEqual(
+    ///        request,
+    ///        {toNullableVec<String>({"banana", {}}),
+    ///        toNullableVec<String>({{}, "banana"}),
+    ///        toVec<UInt64>({1,2})});
+
+    /// case 4
+    auto const_false = lit(Field(static_cast<UInt64>(0)));
+    request = context
+                  .scan("test_db", "test_table")
+                  .filter(const_false)                      // refuse all rows
+                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .build(context);
+    executeAndAssertColumnsEqual(
+        request,
+        {});
+
+    /// case 5   (test integrated with aggregation)
+    request = context
+                  .scan("test_db", "test_table")
+                  .aggregation({Count(col("s1"))}, {col("s2")})
+                  .build(context);
+    executeAndAssertColumnsEqual(
+        request,
+        {toVec<UInt64>({1, 0, 1}),
+            toNullableVec<String>({"apple", {}, "banana"}),});
+
+    request = context
+                  .scan("test_db", "test_table")
+                  .aggregation({Count(col("s1"))}, {col("s2")})
+                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .build(context);
+    /// data flow:
+    ///
+    ///    s1       s2
+    /// "banana"  "apple"
+    ///   NULL      NULL
+    /// "banana"  "banana"
+    ///          |
+    ///          v
+    ///  count(s1)   s2
+    ///    1      "apple"
+    ///    0       NULL
+    ///    1      "banana"
+    ///          |
+    ///          v
+    ///  count(s1)   s2      groupingID
+    ///    1        NULL        1
+    ///   NULL     "apple"      2
+    ///    0        NULL        1
+    ///   NULL      NULL        2
+    ///    1        NULL        1
+    ///   NULL     "banana"     2
+    ///
+    executeAndAssertColumnsEqual(
+        request,
+        {toNullableVec<UInt64>({1, {}, 0, {}, 1,{}}),
+         toNullableVec<String>({{}, "apple", {},{},{}, "banana"}),
+         toVec<UInt64>({1,2,1,2,1,2})});
+
+    /// case 5   (test integrated with aggregation and projection)
+    request = context
+                  .scan("test_db", "test_table")
+                  .aggregation({Count(col("s1"))}, {col("s2")})
+                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .project({"count(s1)"})
+                  .build(context);
+    executeAndAssertColumnsEqual(
+        request,
+        {toNullableVec<UInt64>({1, {}, 0, {}, 1,{}})});
+
+    /// case 6   (test integrated with aggregation and projection and limit) 1
+    /// note: by now, limit is executed before repeat does to reduce unnecessary row repeat work.
+    /// request = context
+    ///               .scan("test_db", "test_table")
+    ///               .aggregation({Count(col("s1"))}, {col("s2")})
+    ///               .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+    ///               .limit(2)
+    ///               .project({"count(s1)"})
+    ///               .build(context);
+    /// data flow:
+    ///
+    ///    s1       s2
+    /// "banana"  "apple"
+    ///   NULL      NULL
+    /// "banana"  "banana"
+    ///          |
+    ///          v
+    ///  count(s1)   s2
+    ///    1      "apple"
+    ///    0       NULL
+    ///    1      "banana"
+    ///          |
+    ///          v
+    ///  count(s1)   s2                    // limit precede the repeat OP since they are in the same DAG query block.
+    ///    1      "apple"
+    ///    0       NULL
+    ///          |
+    ///          v
+    ///  count(s1)   s2      groupingID    // repeat is always arranged executed after limit to avoid unnecessary replication in the same DAG query block.
+    ///    1        NULL        1
+    ///   NULL     "apple"      2
+    ///    0        NULL        1
+    ///   NULL      NULL        2
+    ///    1        NULL        1
+    ///   NULL     "banana"     2
+    ///          |
+    ///          v
+    ///  count(s1)
+    ///    1
+    ///   NULL
+    ///    0
+    ///   NULL
+    ///
+    /// since this case is only succeed under planner-disabled mode, just comment and assert the result here for a note.
+    ///
+    /// executeAndAssertColumnsEqual(
+    ///   request,
+    ///   {toNullableVec<UInt64>({1, {}, 0, {}})});
+
+    /// case 7   (test integrated with aggregation and projection and limit) 2
+    request = context
+                  .scan("test_db", "test_table")
+                  .aggregation({Count(col("s1"))}, {col("s2")})
+                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .project({"count(s1)"})
+                  .topN({{"count(s1)", true}}, 2)
+                  .build(context);
+    /// data flow:
+    ///
+    ///    s1       s2                                         ---------------+
+    /// "banana"  "apple"                                                     |
+    ///   NULL      NULL                  // table scan                       |
+    /// "banana"  "banana"                                                    |
+    ///          |                                                            |
+    ///          v                                                            |
+    ///  count(s1)   s2                                                       |
+    ///    1      "apple"                 // aggregate                        |
+    ///    0       NULL                                                       |
+    ///    1      "banana"                                                    |
+    ///          |                                                            +------------->  Child DAG Query Block
+    ///          v                                                            |
+    ///  count(s1)   s2      groupingID   // repeat                           |
+    ///    1        NULL        1                                             |
+    ///   NULL     "apple"      2                                             |
+    ///    0        NULL        1                                             |
+    ///   NULL      NULL        2                                             |
+    ///    1        NULL        1                                             |
+    ///   NULL     "banana"     2                                             |
+    ///          |                                              --------------+
+    ///          v                                              --------------+
+    ///  count(s1)                                                            |
+    ///    1                                                                  |
+    ///   NULL                             // projection                      |
+    ///    0                                                                  |
+    ///   NULL                                                                |
+    ///    1                                                                  +------------->  parent DAG Query Block
+    ///   NULL                                                                |
+    ///          |                                                            |
+    ///          v                                                            |
+    ///  count(s1)                         // sort (desc)                     |
+    ///    1                                                                  |
+    ///    1                                                                  |
+    ///    0                                                                  |
+    ///   NULL                                                                |
+    ///   NULL                                                                |
+    ///   NULL                                                                |
+    ///          |                                                            |
+    ///          v                                                            |
+    ///   count(s1)                        // limit 2                         |
+    ///    1                                                                  |
+    ///    1                                                                  |
+    ///                                                        ---------------+
+    ///
+    ///  Note: you can see some difference from this plan and the last one above, since projection between repeat and topN is a SOURCE node,
+    ///        it will isolate whole DAG into two independent DAG query blocks, limit and repeat OP take a place in each one of them. So we
+    ///        couldn't guarantee that letting repeat OP run after limit does, which can't reduce unnecessary replication work. DAG query block
+    ///        division should be blamed here.
+    ///
+    executeAndAssertColumnsEqual(
+        request,
+        {toNullableVec<UInt64>({1, 1})});
+
+    /// case 8  (test integrated with receiver and join)
+    request = context
+                  .receive("exchange1")
+                  .join(context.scan("test_db", "test_table").project({"s2"}), tipb::JoinType::TypeInnerJoin, {col("s2")})
+                  .build(context);
+    executeAndAssertColumnsEqual(
+        request,
+        {toNullableVec<String>({"banana", "banana"}),
+         toNullableVec<String>({"apple", "banana"}),
+         toNullableVec<String>({"apple", "banana"})});
+
+    request = context
+                  .receive("exchange1")
+                  .aggregation({Count(col("s1"))}, {col("s2")})
+                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .join(context.scan("test_db", "test_table").project({"s2"}), tipb::JoinType::TypeInnerJoin, {col("s2")})
+                  .project({"count(s1)", "groupingID"})
+                  .topN({{"groupingID", true}}, 2)
+                  .build(context);
+    /// data flow:
+    ///
+    ///    s1       s2                                         ---------------+
+    /// "banana"  "apple"                                                     |
+    ///   NULL      NULL                  // table scan                       |
+    /// "banana"  "banana"                                                    |
+    ///          |                                                            |
+    ///          v                                                            |
+    ///  count(s1)   s2                                                       |
+    ///    1      "apple"                 // aggregate                        |
+    ///    0       NULL                                                       |
+    ///    1      "banana"                                                    |
+    ///          |                                                            +------------->  Child of Child DAG Query Block
+    ///          v                                                            |
+    ///  count(s1)   s2      groupingID   // repeat                           |
+    ///    1        NULL        1                                             |
+    ///   NULL     "apple"      2                                             |
+    ///    0        NULL        1                                             |
+    ///   NULL      NULL        2                                             |
+    ///    1        NULL        1                                             |
+    ///   NULL     "banana"     2                                             |
+    ///          |                                              --------------+
+    ///          v                                              --------------+
+    ///  count(s1)   s2      groupingID  *    s2                              |
+    ///   NULL     "apple"      2           "apple"       // join             |
+    ///   NULL     "banana"     2            NULL                             |
+    ///                                     "banana"                          +------------->  Child DAG Query Block
+    ///                                                                       |
+    ///   NULL     "apple"      2          "apple"                            |
+    ///   NULL     "banana"     2          "banana"                           |
+    ///          |                                             ---------------+
+    ///          v                                                            |
+    ///  count(s1)  groupingID             // projection                      |
+    ///   NULL         2                                                      |
+    ///   NULL         2                                                      |
+    ///          |                                                            +------------->  Parent DAG Query Block
+    ///          v                                                            |
+    ///   count(s1)  groupingID            // topN                            |
+    ///   NULL         2                                                      |
+    ///   NULL         2                                                      |
+    ///                                                        ---------------+
+    ///
+    executeAndAssertColumnsEqual(
+        request,
+        {toNullableVec<UInt64>({{}, {}}),
+            toVec<UInt64>({2,2}),});
+
+
+    /// assert the input stream plan format. (under planner-enabled mode)
+    String expected = R"(
+CreatingSets
+ Union: <for join>
+  HashJoinBuild x 10: <join build, build_side_root_executor_id = project_4>, join_kind = Inner
+   Expression: <append join key and join filters for build side>
+    Expression: <final projection>
+     Expression: <projection>
+      MockTableScan
+ Union: <for test>
+  Expression x 10: <final projection>
+   SharedQuery: <restore concurrency>
+    MergeSorting, limit = 2
+     Union: <for partial order>
+      PartialSorting x 10: limit = 2
+       Expression: <projection>
+        Expression: <remove useless column after join>
+         HashJoinProbe: <join probe, join_executor_id = Join_5>
+          Expression: <final projection>
+           RepeatSource: <repeat source, repeat_executor_id = repeat_source_2>: grouping set [<{count(s1)_collator_46 }><{any(s2)_collator_46 }>]
+            Expression: <expr after aggregation>
+             SharedQuery: <restore concurrency>
+              ParallelAggregating, max_threads: 10, final: true
+               MockExchangeReceiver x 10)";
+    ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10);
+}
+CATCH
+
+/// TODO: more OP combination tests.
+
+} // namespace tests
+} // namespace DB
\ No newline at end of file
diff --git a/dbms/src/Interpreters/Repeat.cpp b/dbms/src/Interpreters/Repeat.cpp
index ff626f36042..16be88e81c0 100644
--- a/dbms/src/Interpreters/Repeat.cpp
+++ b/dbms/src/Interpreters/Repeat.cpp
@@ -35,7 +35,28 @@ void convertColumnToNullable(ColumnWithTypeAndName & column)
 Repeat::Repeat(const DB::GroupingSets & gss)
     : group_sets_names(gss){}
 
-
+void Repeat::getGroupingSetsDes(FmtBuffer & buffer) const
+{
+    buffer.fmtAppend("[");
+    for (const auto & grouping_set: group_sets_names)
+    {
+        buffer.fmtAppend("<");
+        for (const auto  & grouping_exprs: grouping_set)
+        {
+            buffer.fmtAppend("{{");
+            for ( size_t i = 0; i < grouping_exprs.size(); i++)
+            {
+                if (i != 0) {
+                    buffer.fmtAppend(",");
+                }
+                buffer.fmtAppend(grouping_exprs.at(i));
+            }
+            buffer.fmtAppend("}}");
+        }
+        buffer.fmtAppend(">");
+    }
+    buffer.fmtAppend("]");
+}
 
 /// for cases like: select count(distinct a), count(distinct b) from t;
 /// it will generate 2 group set with <a> and <b>, over which we should
diff --git a/dbms/src/Interpreters/Repeat.h b/dbms/src/Interpreters/Repeat.h
index 7c2c05ab85d..442050f55e7 100644
--- a/dbms/src/Interpreters/Repeat.h
+++ b/dbms/src/Interpreters/Repeat.h
@@ -129,6 +129,8 @@ class Repeat
 
     static std::shared_ptr<Repeat> sharedRepeat(const GroupingSets & groupingSets);
 
+    void getGroupingSetsDes(FmtBuffer & buffer) const;
+
     static const String grouping_identifier_column_name;
 
     static const DataTypePtr grouping_identifier_column_type;
diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp
index 1220e873a32..e3b02019f15 100644
--- a/dbms/src/TestUtils/mockExecutor.cpp
+++ b/dbms/src/TestUtils/mockExecutor.cpp
@@ -96,7 +96,7 @@ void DAGRequestBuilder::initDAGRequest(tipb::DAGRequest & dag_request)
     else
         dag_request.set_encode_type(tipb::EncodeType::TypeDefault);
 
-    for (size_t i = 0; i < root->output_schema.size(); ++i)
+    for (size_t i = 0; i < root->output_schema.size(); ++i)   // 根据 root 算子的 output schema 设置加 mock dag request 而 output offsets
         dag_request.add_output_offsets(i);
 }
 

From 4bb8fb92914a983dab2cb49c251332cf0a094365 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Mon, 26 Dec 2022 11:44:35 +0800
Subject: [PATCH 05/31] change the name from repeat source to expand and rebase
 master

Signed-off-by: AilinKid <3148019@qq.com>
---
 dbms/src/Common/TiFlashMetrics.h              |   1 +
 ...tStream.cpp => ExpandBlockInputStream.cpp} |  15 +--
 ...InputStream.h => ExpandBlockInputStream.h} |  12 +-
 dbms/src/DataStreams/SquashingTransform.cpp   |   8 +-
 ...epeatSourceBinder.cpp => ExpandBinder.cpp} |  19 +--
 .../{RepeatSourceBinder.h => ExpandBinder.h}  |   6 +-
 dbms/src/Debug/dbgFuncCoprocessor.h           |   1 +
 .../Coprocessor/DAGExpressionAnalyzer.cpp     |  28 ++---
 .../Flash/Coprocessor/DAGExpressionAnalyzer.h |   4 +-
 dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp  |  12 +-
 dbms/src/Flash/Coprocessor/DAGQueryBlock.h    |   4 +-
 .../Coprocessor/DAGQueryBlockInterpreter.cpp  |  22 ++--
 .../Coprocessor/DAGQueryBlockInterpreter.h    |   2 +-
 .../Coprocessor/collectOutputFieldTypes.cpp   |   6 +-
 dbms/src/Flash/Planner/PhysicalPlan.cpp       |   4 +-
 .../Flash/Planner/Plans/PhysicalExpand.cpp    | 116 ++++++++++++++++++
 dbms/src/Flash/Planner/Plans/PhysicalExpand.h |  57 +++++++++
 .../Flash/Planner/Plans/PhysicalRepeat.cpp    |  34 ++---
 dbms/src/Flash/Planner/Plans/PhysicalRepeat.h |  12 +-
 .../src/Flash/Statistics/CommonExecutorImpl.h |  13 ++
 .../ExecutorStatisticsCollector.cpp           |   3 +-
 .../Flash/Statistics/traverseExecutors.cpp    |   4 +-
 .../Interpreters/{Repeat.cpp => Expand.cpp}   |  29 ++---
 dbms/src/Interpreters/{Repeat.h => Expand.h}  |  42 +++----
 dbms/src/Interpreters/ExpressionActions.cpp   |  16 +--
 dbms/src/Interpreters/ExpressionActions.h     |  14 ++-
 .../Interpreters/tests/gtest_block_repeat.cpp |   2 +-
 dbms/src/TestUtils/executorSerializer.cpp     |  14 +--
 28 files changed, 346 insertions(+), 154 deletions(-)
 rename dbms/src/DataStreams/{RepeatSourceBlockInputStream.cpp => ExpandBlockInputStream.cpp} (67%)
 rename dbms/src/DataStreams/{RepeatSourceBlockInputStream.h => ExpandBlockInputStream.h} (77%)
 rename dbms/src/Debug/MockExecutor/{RepeatSourceBinder.cpp => ExpandBinder.cpp} (77%)
 rename dbms/src/Debug/MockExecutor/{RepeatSourceBinder.h => ExpandBinder.h} (86%)
 create mode 100644 dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
 create mode 100644 dbms/src/Flash/Planner/Plans/PhysicalExpand.h
 rename dbms/src/Interpreters/{Repeat.cpp => Expand.cpp} (93%)
 rename dbms/src/Interpreters/{Repeat.h => Expand.h} (82%)

diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h
index bf0ec4a9f65..9f3d5f064c8 100644
--- a/dbms/src/Common/TiFlashMetrics.h
+++ b/dbms/src/Common/TiFlashMetrics.h
@@ -278,6 +278,7 @@ namespace DB
         F(type_mpp, {{"type", "mpp"}, ComputeLabelHolder::instance().getClusterIdLabel(), ComputeLabelHolder::instance().getProcessIdLabel()}),     \
         F(type_cop, {{"type", "cop"}, ComputeLabelHolder::instance().getClusterIdLabel(), ComputeLabelHolder::instance().getProcessIdLabel()}),     \
         F(type_batch, {{"type", "batch"}, ComputeLabelHolder::instance().getClusterIdLabel(), ComputeLabelHolder::instance().getProcessIdLabel()})) \
+
 // clang-format on
 
 /// Buckets with boundaries [start * base^0, start * base^1, ..., start * base^(size-1)]
diff --git a/dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp b/dbms/src/DataStreams/ExpandBlockInputStream.cpp
similarity index 67%
rename from dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp
rename to dbms/src/DataStreams/ExpandBlockInputStream.cpp
index af727442b56..2f502c3f708 100644
--- a/dbms/src/DataStreams/RepeatSourceBlockInputStream.cpp
+++ b/dbms/src/DataStreams/ExpandBlockInputStream.cpp
@@ -12,28 +12,29 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <DataStreams//RepeatSourceBlockInputStream.h>
+#include <DataStreams/ExpandBlockInputStream.h>
+
 namespace DB
 {
-Block RepeatSourceBlockInputStream::readImpl()
+Block ExpandBlockInputStream::readImpl()
 {
     Block block = children.back()->read();
     if (!block)
         return block;
-    repeat_source_actions->execute(block);
+    expand_actions->execute(block);
     return block;
 }
 
-Block RepeatSourceBlockInputStream::getHeader() const
+Block ExpandBlockInputStream::getHeader() const
 {
     Block res = children.back()->getHeader();
-    repeat_source_actions->execute(res);
+    expand_actions->execute(res);
     return res;
 }
 
-void RepeatSourceBlockInputStream::appendInfo(FmtBuffer & buffer) const {
+void ExpandBlockInputStream::appendInfo(FmtBuffer & buffer) const {
     buffer.fmtAppend(": grouping set ");
-    repeat_source_actions.get()->getActions()[0].repeat->getGroupingSetsDes(buffer);
+    expand_actions.get()->getActions()[0].expand->getGroupingSetsDes(buffer);
 }
 
 } // namespace DB
diff --git a/dbms/src/DataStreams/RepeatSourceBlockInputStream.h b/dbms/src/DataStreams/ExpandBlockInputStream.h
similarity index 77%
rename from dbms/src/DataStreams/RepeatSourceBlockInputStream.h
rename to dbms/src/DataStreams/ExpandBlockInputStream.h
index d7f9f6db5cc..e502a8c8e1f 100644
--- a/dbms/src/DataStreams/RepeatSourceBlockInputStream.h
+++ b/dbms/src/DataStreams/ExpandBlockInputStream.h
@@ -19,15 +19,15 @@
 
 namespace DB
 {
-class RepeatSourceBlockInputStream : public IProfilingBlockInputStream
+class ExpandBlockInputStream : public IProfilingBlockInputStream
 {
-    static constexpr auto NAME = "RepeatSource";
+    static constexpr auto NAME = "Expand";
 
 public:
-    RepeatSourceBlockInputStream(
+    ExpandBlockInputStream(
         const BlockInputStreamPtr & input,
-        ExpressionActionsPtr repeat_source_actions_)
-        : repeat_source_actions(repeat_source_actions_)
+        ExpressionActionsPtr expand_actions_)
+        : expand_actions(expand_actions_)
     {
         children.push_back(input);
     }
@@ -39,7 +39,7 @@ class RepeatSourceBlockInputStream : public IProfilingBlockInputStream
     Block readImpl() override;
 
 private:
-    ExpressionActionsPtr repeat_source_actions;
+    ExpressionActionsPtr expand_actions;
 };
 
 } // namespace DB
diff --git a/dbms/src/DataStreams/SquashingTransform.cpp b/dbms/src/DataStreams/SquashingTransform.cpp
index 1488b688d27..391cd710c8d 100644
--- a/dbms/src/DataStreams/SquashingTransform.cpp
+++ b/dbms/src/DataStreams/SquashingTransform.cpp
@@ -93,16 +93,10 @@ void SquashingTransform::append(Block && block)
     {
         MutableColumnPtr mutable_column = (*std::move(accumulated_block.getByPosition(i).column)).mutate();
         mutable_column->insertRangeFrom(*block.getByPosition(i).column, 0, rows);
-        accumulated_block.getByPosition(i).column = std::move(mutable_column);   // column 中的 append 值操作
+        accumulated_block.getByPosition(i).column = std::move(mutable_column);
     }
 }
 
-// 我们可能需要用一个高效的复制行操作，repeatSource 算子首先是 append additional column，然后对于原来的 block 的数据进行
-// 多重 n 复制，每重复制上，修改 block 中特定非 target 列的其他 grouping set column 为 null 值，并且设置 grouping ID
-// 列为常量 n.
-//
-// sample_block
-
 bool SquashingTransform::isEnoughSize(size_t rows, size_t bytes) const
 {
     return (!min_block_size_rows && !min_block_size_bytes)
diff --git a/dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp b/dbms/src/Debug/MockExecutor/ExpandBinder.cpp
similarity index 77%
rename from dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp
rename to dbms/src/Debug/MockExecutor/ExpandBinder.cpp
index 928160c8996..9d07a0c58f4 100644
--- a/dbms/src/Debug/MockExecutor/RepeatSourceBinder.cpp
+++ b/dbms/src/Debug/MockExecutor/ExpandBinder.cpp
@@ -12,19 +12,20 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <Debug/MockExecutor/RepeatSourceBinder.h>
+#include <Debug/MockExecutor/ExpandBinder.h>
+#include <Debug/MockExecutor/AstToPBUtils.h>
 
 namespace DB::mock
 {
 
-bool RepeatSourceBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context)
+bool ExpandBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context)
 {
-    tipb_executor->set_tp(tipb::ExecType::TypeRepeatSource);
+    tipb_executor->set_tp(tipb::ExecType::TypeExpand);
     tipb_executor->set_executor_id(name);
-    tipb::RepeatSource * repeat_source = tipb_executor->mutable_repeat_source();
+    tipb::Expand * expand = tipb_executor->mutable_expand();
     for (const auto & grouping_set : grouping_sets_columns)
     {
-        auto * gss = repeat_source->add_grouping_sets();
+        auto * gss = expand->add_grouping_sets();
         for (const auto & grouping_exprs : grouping_set)
         {
             auto * ges = gss->add_grouping_exprs();
@@ -35,7 +36,7 @@ bool RepeatSourceBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t
             }
         }
     }
-    auto * children_executor = repeat_source->mutable_child();
+    auto * children_executor = expand->mutable_child();
     return children[0]->toTiPBExecutor(children_executor, collator_id, mpp_info, context);
 }
 
@@ -60,8 +61,8 @@ ExecutorBinderPtr compileRepeat(ExecutorBinderPtr input, size_t & executor_index
         field_type.set_decimal(-1);
         output_schema.push_back(std::make_pair("groupingID", TiDB::fieldTypeToColumnInfo(field_type)));
     }
-    ExecutorBinderPtr repeat_source = std::make_shared<RepeatSourceBinder>(executor_index, output_schema, std::move(grouping_set_columns));
-    repeat_source->children.push_back(input);
-    return repeat_source;
+    ExecutorBinderPtr expand = std::make_shared<ExpandBinder>(executor_index, output_schema, std::move(grouping_set_columns));
+    expand->children.push_back(input);
+    return expand;
 }
 } // namespace DB::mock
\ No newline at end of file
diff --git a/dbms/src/Debug/MockExecutor/RepeatSourceBinder.h b/dbms/src/Debug/MockExecutor/ExpandBinder.h
similarity index 86%
rename from dbms/src/Debug/MockExecutor/RepeatSourceBinder.h
rename to dbms/src/Debug/MockExecutor/ExpandBinder.h
index ace010048b2..752046a4d80 100644
--- a/dbms/src/Debug/MockExecutor/RepeatSourceBinder.h
+++ b/dbms/src/Debug/MockExecutor/ExpandBinder.h
@@ -21,11 +21,11 @@ using MockGroupingNameVec = std::vector<ASTPtr>;
 using MockVecGroupingNameVec = std::vector<MockGroupingNameVec>;
 using MockVVecGroupingNameVec = std::vector<MockVecGroupingNameVec>;
 
-class RepeatSourceBinder : public ExecutorBinder
+class ExpandBinder : public ExecutorBinder
 {
 public:
-    RepeatSourceBinder(size_t & index_, const DAGSchema & output_schema_, MockVVecGroupingNameVec gss)
-        : ExecutorBinder(index_, "repeat_source_" + std::to_string(index_), output_schema_)
+    ExpandBinder(size_t & index_, const DAGSchema & output_schema_, MockVVecGroupingNameVec gss)
+        : ExecutorBinder(index_, "expand_" + std::to_string(index_), output_schema_)
         , grouping_sets_columns(gss)
     {}
 
diff --git a/dbms/src/Debug/dbgFuncCoprocessor.h b/dbms/src/Debug/dbgFuncCoprocessor.h
index 9a21842fa50..f1b95139f62 100644
--- a/dbms/src/Debug/dbgFuncCoprocessor.h
+++ b/dbms/src/Debug/dbgFuncCoprocessor.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #include <Debug/DBGInvoker.h>
+
 namespace DB
 {
 class Context;
diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
index 70e92acec51..fe74369bf7a 100644
--- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
@@ -32,10 +32,10 @@
 #include <Functions/FunctionHelpers.h>
 #include <Functions/FunctionsTiDBConversion.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/Expand.h>
 #include <Interpreters/Set.h>
 #include <Interpreters/Settings.h>
 #include <Interpreters/convertFieldToType.h>
-#include <Interpreters/Repeat.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Storages/Transaction/TypeMapping.h>
 #include <WindowFunctions/WindowFunctionFactory.h>
@@ -806,13 +806,13 @@ NamesAndTypes DAGExpressionAnalyzer::buildOrderColumns(
     return order_columns;
 }
 
-std::shared_ptr<Repeat> DAGExpressionAnalyzer::buildRepeatGroupingColumns(
-    const tipb::RepeatSource & repeatSource, const ExpressionActionsPtr & actions)
+std::shared_ptr<Expand> DAGExpressionAnalyzer::buildExpandGroupingColumns(
+    const tipb::Expand & expand, const ExpressionActionsPtr & actions)
 {
     GroupingSets group_sets_columns;
     std::map<String, bool> map_grouping_col;
-    group_sets_columns.reserve(repeatSource.grouping_sets().size());
-    for (const auto& group_set : repeatSource.grouping_sets()){
+    group_sets_columns.reserve(expand.grouping_sets().size());
+    for (const auto& group_set : expand.grouping_sets()){
         GroupingSet group_set_columns;
         group_set_columns.reserve(group_set.grouping_exprs().size());
         for (const auto &group_exprs : group_set.grouping_exprs()) {
@@ -838,30 +838,30 @@ std::shared_ptr<Repeat> DAGExpressionAnalyzer::buildRepeatGroupingColumns(
         if (map_grouping_col[mutable_one.name])
             mutable_one.type = makeNullable(mutable_one.type);
     }
-    source_columns.emplace_back(Repeat::grouping_identifier_column_name, Repeat::grouping_identifier_column_type);
-    auto shared_repeat = Repeat::sharedRepeat(group_sets_columns);
-    return shared_repeat;
+    source_columns.emplace_back(Expand::grouping_identifier_column_name, Expand::grouping_identifier_column_type);
+    auto shared_expand = Expand::sharedExpand(group_sets_columns);
+    return shared_expand;
 }
 
-ExpressionActionsPtr DAGExpressionAnalyzer::appendRepeatSource(
-        const tipb::RepeatSource & repeatSource, ExpressionActionsChain & chain)
+ExpressionActionsPtr DAGExpressionAnalyzer::appendExpand(
+        const tipb::Expand & expand, ExpressionActionsChain & chain)
 {
     auto & last_step = initAndGetLastStep(chain);
     for (const auto &origin_col : last_step.actions->getSampleBlock().getNamesAndTypesList())
     {
         last_step.required_output.push_back(origin_col.name);
     }
-    auto shared_repeat = buildRepeatGroupingColumns(repeatSource, last_step.actions);
-    last_step.actions->add(ExpressionAction::repeatSource(shared_repeat));
+    auto shared_expand = buildExpandGroupingColumns(expand, last_step.actions);
+    last_step.actions->add(ExpressionAction::expandSource(shared_expand));
 
-    auto before_repeat_source = chain.getLastActions();
+    auto before_expand = chain.getLastActions();
     chain.finalize();
     chain.clear();
 
     auto & after_repeat_step = initAndGetLastStep(chain);
     for (const auto & column : getCurrentInputColumns())
         after_repeat_step.required_output.push_back(column.name);
-    return before_repeat_source;
+    return before_expand;
 }
 
 std::vector<NameAndTypePair> DAGExpressionAnalyzer::appendOrderBy(
diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h
index f1012df4646..7436841034a 100644
--- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h
+++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h
@@ -71,9 +71,9 @@ class DAGExpressionAnalyzer : private boost::noncopyable
         ExpressionActionsChain & chain,
         const std::vector<const tipb::Expr *> & conditions);
 
-    std::shared_ptr<Repeat> buildRepeatGroupingColumns(const tipb::RepeatSource & repeatSource, const ExpressionActionsPtr & actions);
+    std::shared_ptr<Expand> buildExpandGroupingColumns(const tipb::Expand & expand, const ExpressionActionsPtr & actions);
 
-    ExpressionActionsPtr appendRepeatSource(const tipb::RepeatSource & repeatSource, ExpressionActionsChain & chain);
+    ExpressionActionsPtr appendExpand(const tipb::Expand & expand, ExpressionActionsChain & chain);
 
     NamesAndTypes buildWindowOrderColumns(const tipb::Sort & window_sort) const;
 
diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp
index 3fdd7cec1f1..2f5a28347cd 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp
@@ -46,7 +46,7 @@ bool isSourceNode(const tipb::Executor * root)
 const static String SOURCE_NAME("source");
 const static String SEL_NAME("selection");
 const static String AGG_NAME("aggregation");
-const static String REPEAT_NAME("repeat_source");
+const static String EXPAND_NAME("expand");
 const static String WINDOW_NAME("window");
 const static String WINDOW_SORT_NAME("window_sort");
 const static String HAVING_NAME("having");
@@ -97,11 +97,11 @@ DAGQueryBlock::DAGQueryBlock(const tipb::Executor & root_, QueryBlockIDGenerator
             }
             current = &current->selection().child();
             break;
-        case tipb::ExecType::TypeRepeatSource:
-            GET_METRIC(tiflash_coprocessor_executor_count, type_repeat_source).Increment();
-            assignOrThrowException(&repeat_source, current, REPEAT_NAME);
-            repeat_source_name = current->executor_id();
-            current = &current->repeat_source().child();         // 非叶节点，继续孩子递归下去
+        case tipb::ExecType::TypeExpand:
+            GET_METRIC(tiflash_coprocessor_executor_count, type_expand).Increment();
+            assignOrThrowException(&expand, current, EXPAND_NAME);
+            expand_name = current->executor_id();
+            current = &current->expand().child();         // 非叶节点，继续孩子递归下去
             break;
         case tipb::ExecType::TypeStreamAgg:
             RUNTIME_CHECK_MSG(current->aggregation().group_by_size() == 0, STREAM_AGG_ERROR);
diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlock.h b/dbms/src/Flash/Coprocessor/DAGQueryBlock.h
index d18ac84fd90..86cd14c09df 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlock.h
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlock.h
@@ -60,8 +60,8 @@ class DAGQueryBlock
     String having_name;
     const tipb::Executor * limit_or_topn = nullptr;
     String limit_or_topn_name;
-    const tipb::Executor * repeat_source = nullptr;          // repeat source node can only be before sender
-    String repeat_source_name;
+    const tipb::Executor * expand = nullptr;          // expand node can only be before sender
+    String expand_name;
     const tipb::Executor * exchange_sender = nullptr;
     String exchange_sender_name;
     UInt32 id;
diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
index 3e4cb641f97..9889536c48c 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
@@ -18,6 +18,7 @@
 #include <Core/NamesAndTypes.h>
 #include <DataStreams/AggregatingBlockInputStream.h>
 #include <DataStreams/ExchangeSenderBlockInputStream.h>
+#include <DataStreams/ExpandBlockInputStream.h>
 #include <DataStreams/FilterBlockInputStream.h>
 #include <DataStreams/HashJoinBuildBlockInputStream.h>
 #include <DataStreams/HashJoinProbeBlockInputStream.h>
@@ -29,6 +30,8 @@
 #include <DataStreams/ParallelAggregatingBlockInputStream.h>
 #include <DataStreams/TiRemoteBlockInputStream.h>
 #include <DataStreams/WindowBlockInputStream.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Debug/MockStorage.h>
 #include <Flash/Coprocessor/AggregationInterpreterHelper.h>
 #include <Flash/Coprocessor/DAGExpressionAnalyzer.h>
 #include <Flash/Coprocessor/DAGQueryBlockInterpreter.h>
@@ -43,8 +46,9 @@
 #include <Flash/Coprocessor/StorageDisaggregatedInterpreter.h>
 #include <Flash/Mpp/newMPPExchangeWriter.h>
 #include <Interpreters/Aggregator.h>
+#include <Interpreters/Expand.h>
+#include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/Join.h>
-#include <Interpreters/Repeat.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <Storages/Transaction/TMTContext.h>
 
@@ -76,7 +80,7 @@ struct AnalysisResult
     ExpressionActionsPtr before_having;
     ExpressionActionsPtr before_order_and_select;
     ExpressionActionsPtr final_projection;
-    ExpressionActionsPtr before_repeat_source;
+    ExpressionActionsPtr before_expand;
 
     String filter_column_name;
     String having_column_name;
@@ -136,8 +140,8 @@ AnalysisResult analyzeExpressions(
         chain.addStep();
     }
 
-    if (query_block.repeat_source) {
-        res.before_repeat_source = analyzer.appendRepeatSource(query_block.repeat_source->repeat_source(), chain);
+    if (query_block.expand) {
+        res.before_expand = analyzer.appendExpand(query_block.expand->expand(), chain);
     }
 
     const auto & dag_context = *context.getDAGContext();
@@ -697,10 +701,10 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline)
     // execute the repeat source OP after all filter/limits and so on.
     // since repeat source OP has some row replication work to do, place it after limit can reduce some unnecessary burden.
     // and put it before the final projection, because we should recognize some base col as grouping set col before change their alias.
-    if (res.before_repeat_source)
+    if (res.before_expand)
     {
-        executeRepeatSource(pipeline, res.before_repeat_source);
-        recordProfileStreams(pipeline, query_block.repeat_source_name);
+        executeExpandSource(pipeline, res.before_expand);
+        recordProfileStreams(pipeline, query_block.expand_name);
     }
 
     // execute final project action
@@ -746,10 +750,10 @@ void DAGQueryBlockInterpreter::executeLimit(DAGPipeline & pipeline)
     }
 }
 
-void DAGQueryBlockInterpreter::executeRepeatSource(DAGPipeline & pipeline, const ExpressionActionsPtr & expr)
+void DAGQueryBlockInterpreter::executeExpandSource(DAGPipeline & pipeline, const ExpressionActionsPtr & expr)
 {
     pipeline.transform([&](auto &stream) {
-        stream = std::make_shared<RepeatSourceBlockInputStream>(stream, expr);
+        stream = std::make_shared<ExpandBlockInputStream>(stream, expr);
     });
 }
 
diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h
index 8b4746bb6a2..eae5aa34cec 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h
@@ -69,7 +69,7 @@ class DAGQueryBlockInterpreter
     void executeWindowOrder(DAGPipeline & pipeline, SortDescription sort_desc, bool enable_fine_grained_shuffle);
     void executeOrder(DAGPipeline & pipeline, const NamesAndTypes & order_columns);
     void executeLimit(DAGPipeline & pipeline);
-    void executeRepeatSource(DAGPipeline & pipeline, const ExpressionActionsPtr & expr);
+    void executeExpandSource(DAGPipeline & pipeline, const ExpressionActionsPtr & expr);
     void executeWindow(
         DAGPipeline & pipeline,
         WindowDescription & window_description,
diff --git a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp
index 5d15a4d3bc0..923afd56914 100644
--- a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp
+++ b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp
@@ -109,8 +109,8 @@ bool collectForRepeat(std::vector<tipb::FieldType> &out_field_types, const tipb:
         traverseExecutorTree(child, [&out_child_fields](const tipb::Executor & e) { return collectForExecutor(out_child_fields, e); });
     });
 
-    // 对孩子的节点需要根据 grouping sets 的对应关系，给予 nullable 的处理
-    for (const auto & grouping_set : executor.repeat_source().grouping_sets()){
+    // make the columns from grouping sets nullable.
+    for (const auto & grouping_set : executor.expand().grouping_sets()){
         for (const auto & grouping_exprs : grouping_set.grouping_exprs()){
             for (const auto & grouping_col : grouping_exprs.grouping_expr()){
                 // assert that: grouping_col must be the column ref guaranteed by tidb.
@@ -230,7 +230,7 @@ bool collectForExecutor(std::vector<tipb::FieldType> & output_field_types, const
         return collectForTableScan(output_field_types, executor.partition_table_scan());
     case tipb::ExecType::TypeJoin:
         return collectForJoin(output_field_types, executor);
-    case tipb::ExecType::TypeRepeatSource:
+    case tipb::ExecType::TypeExpand:
         return collectForRepeat(output_field_types, executor);
     default:
         return true;
diff --git a/dbms/src/Flash/Planner/PhysicalPlan.cpp b/dbms/src/Flash/Planner/PhysicalPlan.cpp
index aafd805481a..42729693433 100644
--- a/dbms/src/Flash/Planner/PhysicalPlan.cpp
+++ b/dbms/src/Flash/Planner/PhysicalPlan.cpp
@@ -197,9 +197,9 @@ void PhysicalPlan::build(const String & executor_id, const tipb::Executor * exec
         pushBack(PhysicalJoin::build(context, executor_id, log, executor->join(), FineGrainedShuffle(executor), left, right));
         break;
     }
-    case tipb::ExecType::TypeRepeatSource:
+    case tipb::ExecType::TypeExpand:
     {
-        pushBack(PhysicalRepeat::build(context, executor_id, log, executor->repeat_source(), popBack()));
+        pushBack(PhysicalExpand::build(context, executor_id, log, executor->expand(), popBack()));
         break;
     }
     default:
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
new file mode 100644
index 00000000000..51eaaeaa4c3
--- /dev/null
+++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
@@ -0,0 +1,116 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <Common/FailPoint.h>
+#include <Common/Logger.h>
+#include <Common/TiFlashException.h>
+#include <DataStreams/ExpandBlockInputStream.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <Flash/Coprocessor/DAGExpressionAnalyzer.h>
+#include <Flash/Coprocessor/DAGPipeline.h>
+#include <Flash/Planner/FinalizeHelper.h>
+#include <Flash/Planner/PhysicalPlanHelper.h>
+#include <Flash/Planner/plans/PhysicalExpand.h>
+#include <Interpreters/Context.h>
+#include <fmt/format.h>
+
+namespace DB
+{
+PhysicalPlanNodePtr PhysicalExpand::build(
+    const Context & context,
+    const String & executor_id,
+    const LoggerPtr & log,
+    const tipb::Expand & expand,
+    const PhysicalPlanNodePtr & child)
+{
+    assert(child);
+
+    child->finalize();
+
+    if (unlikely(expand.grouping_sets().empty()))
+    {
+        //should not reach here
+        throw TiFlashException("Repeat executor without grouping sets", Errors::Planner::BadRequest);
+    }
+
+    DAGExpressionAnalyzer analyzer{child->getSchema(), context};
+    ExpressionActionsPtr before_repeat_actions = PhysicalPlanHelper::newActions(child->getSampleBlock(), context);
+
+
+    auto shared_repeat = analyzer.buildExpandGroupingColumns(expand, before_repeat_actions);
+
+    // construct sample block.
+    NamesAndTypes repeat_output_columns;
+    auto child_header = child->getSchema();
+    for (const auto & one : child_header)
+    {
+        repeat_output_columns.emplace_back(one.name, shared_repeat->isInGroupSetColumn(one.name)? makeNullable(one.type): one.type);
+    }
+    repeat_output_columns.emplace_back(shared_repeat->grouping_identifier_column_name, shared_repeat->grouping_identifier_column_type);
+
+    auto physical_repeat = std::make_shared<PhysicalExpand>(
+        executor_id,
+        repeat_output_columns,
+        log->identifier(),
+        child,
+        shared_repeat,
+        Block(repeat_output_columns));
+
+    return physical_repeat;
+}
+
+
+void PhysicalExpand::repeatTransform(DAGPipeline & child_pipeline, Context & context)
+{
+    auto repeat_actions = PhysicalPlanHelper::newActions(child_pipeline.firstStream()->getHeader(), context);
+    repeat_actions->add(ExpressionAction::expandSource(shared_expand));
+    String repeat_extra_info = fmt::format("repeat source, repeat_executor_id = {}", execId());
+    child_pipeline.transform([&](auto &stream) {
+        stream = std::make_shared<ExpandBlockInputStream>(stream, repeat_actions);
+        stream->setExtraInfo(repeat_extra_info);
+    });
+}
+
+void PhysicalExpand::transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams)
+{
+    child->transform(pipeline, context, max_streams);
+    repeatTransform(pipeline, context);
+}
+
+void PhysicalExpand::finalize(const Names & parent_require)
+{
+    FinalizeHelper::checkSchemaContainsParentRequire(schema, parent_require);
+    Names required_output;
+    required_output.reserve( shared_expand->getGroupSetNum());    // grouping set column should be existed in the child output schema.
+    auto name_set = std::set<String>();
+    shared_expand->getAllGroupSetColumnNames(name_set);
+    // append parent_require column it may expect self-filled groupingID.
+    for (const auto & one : parent_require)
+    {
+        if (one != Expand::grouping_identifier_column_name)
+        {
+            name_set.insert(one);
+        }
+    }
+    for (const auto & grouping_name: name_set) {
+        required_output.emplace_back(grouping_name);
+    }
+    child->finalize(required_output);
+}
+
+const Block & PhysicalExpand::getSampleBlock() const
+{
+    return sample_block;
+}
+} // namespace DB
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h
new file mode 100644
index 00000000000..a2696affb5b
--- /dev/null
+++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h
@@ -0,0 +1,57 @@
+// Copyright 2022 PingCAP, Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <Flash/Planner/plans/PhysicalUnary.h>
+#include <Interpreters/ExpressionActions.h>
+#include <Interpreters/Join.h>
+#include <tipb/executor.pb.h>
+
+namespace DB
+{
+class PhysicalExpand : public PhysicalUnary
+{
+public:
+    static PhysicalPlanNodePtr build(
+        const Context & context,
+        const String & executor_id,
+        const LoggerPtr & log,
+        const tipb::Expand & expand,
+        const PhysicalPlanNodePtr & child);
+
+    PhysicalExpand(
+        const String & executor_id_,
+        const NamesAndTypes & schema_,
+        const String & req_id,
+        const PhysicalPlanNodePtr & child_,
+        const std::shared_ptr<Expand> & shared_expand,
+        const Block & sample_block_)
+        : PhysicalUnary(executor_id_, PlanType::Repeat, schema_, req_id, child_)
+        , shared_expand(shared_expand), sample_block(sample_block_){}
+
+    void finalize(const Names & parent_require) override;
+
+    void repeatTransform(DAGPipeline & child_pipeline, Context & context);
+
+    const Block & getSampleBlock() const override;
+
+private:
+    void transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) override;
+    std::shared_ptr<Expand> shared_expand;
+    Block sample_block;
+};
+}  // namespace DB
+
+
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp b/dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp
index adb2a774354..51eaaeaa4c3 100644
--- a/dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp
+++ b/dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp
@@ -15,30 +15,30 @@
 #include <Common/FailPoint.h>
 #include <Common/Logger.h>
 #include <Common/TiFlashException.h>
+#include <DataStreams/ExpandBlockInputStream.h>
+#include <DataTypes/DataTypeNullable.h>
 #include <Flash/Coprocessor/DAGExpressionAnalyzer.h>
 #include <Flash/Coprocessor/DAGPipeline.h>
 #include <Flash/Planner/FinalizeHelper.h>
 #include <Flash/Planner/PhysicalPlanHelper.h>
-#include <Flash/Planner/plans/PhysicalRepeat.h>
+#include <Flash/Planner/plans/PhysicalExpand.h>
 #include <Interpreters/Context.h>
 #include <fmt/format.h>
-#include <DataStreams/RepeatSourceBlockInputStream.h>
-#include <DataTypes/DataTypeNullable.h>
 
 namespace DB
 {
-PhysicalPlanNodePtr PhysicalRepeat::build(
+PhysicalPlanNodePtr PhysicalExpand::build(
     const Context & context,
     const String & executor_id,
     const LoggerPtr & log,
-    const tipb::RepeatSource & repeat_source,
+    const tipb::Expand & expand,
     const PhysicalPlanNodePtr & child)
 {
     assert(child);
 
     child->finalize();
 
-    if (unlikely(repeat_source.grouping_sets().empty()))
+    if (unlikely(expand.grouping_sets().empty()))
     {
         //should not reach here
         throw TiFlashException("Repeat executor without grouping sets", Errors::Planner::BadRequest);
@@ -48,7 +48,7 @@ PhysicalPlanNodePtr PhysicalRepeat::build(
     ExpressionActionsPtr before_repeat_actions = PhysicalPlanHelper::newActions(child->getSampleBlock(), context);
 
 
-    auto shared_repeat = analyzer.buildRepeatGroupingColumns(repeat_source, before_repeat_actions);
+    auto shared_repeat = analyzer.buildExpandGroupingColumns(expand, before_repeat_actions);
 
     // construct sample block.
     NamesAndTypes repeat_output_columns;
@@ -59,7 +59,7 @@ PhysicalPlanNodePtr PhysicalRepeat::build(
     }
     repeat_output_columns.emplace_back(shared_repeat->grouping_identifier_column_name, shared_repeat->grouping_identifier_column_type);
 
-    auto physical_repeat = std::make_shared<PhysicalRepeat>(
+    auto physical_repeat = std::make_shared<PhysicalExpand>(
         executor_id,
         repeat_output_columns,
         log->identifier(),
@@ -71,34 +71,34 @@ PhysicalPlanNodePtr PhysicalRepeat::build(
 }
 
 
-void PhysicalRepeat::repeatTransform(DAGPipeline & child_pipeline, Context & context)
+void PhysicalExpand::repeatTransform(DAGPipeline & child_pipeline, Context & context)
 {
     auto repeat_actions = PhysicalPlanHelper::newActions(child_pipeline.firstStream()->getHeader(), context);
-    repeat_actions->add(ExpressionAction::repeatSource(shared_repeat));
+    repeat_actions->add(ExpressionAction::expandSource(shared_expand));
     String repeat_extra_info = fmt::format("repeat source, repeat_executor_id = {}", execId());
     child_pipeline.transform([&](auto &stream) {
-        stream = std::make_shared<RepeatSourceBlockInputStream>(stream, repeat_actions);
+        stream = std::make_shared<ExpandBlockInputStream>(stream, repeat_actions);
         stream->setExtraInfo(repeat_extra_info);
     });
 }
 
-void PhysicalRepeat::transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams)
+void PhysicalExpand::transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams)
 {
     child->transform(pipeline, context, max_streams);
     repeatTransform(pipeline, context);
 }
 
-void PhysicalRepeat::finalize(const Names & parent_require)
+void PhysicalExpand::finalize(const Names & parent_require)
 {
     FinalizeHelper::checkSchemaContainsParentRequire(schema, parent_require);
     Names required_output;
-    required_output.reserve( shared_repeat->getGroupSetNum());    // grouping set column should be existed in the child output schema.
+    required_output.reserve( shared_expand->getGroupSetNum());    // grouping set column should be existed in the child output schema.
     auto name_set = std::set<String>();
-    shared_repeat->getAllGroupSetColumnNames(name_set);
+    shared_expand->getAllGroupSetColumnNames(name_set);
     // append parent_require column it may expect self-filled groupingID.
     for (const auto & one : parent_require)
     {
-        if (one != Repeat::grouping_identifier_column_name)
+        if (one != Expand::grouping_identifier_column_name)
         {
             name_set.insert(one);
         }
@@ -109,7 +109,7 @@ void PhysicalRepeat::finalize(const Names & parent_require)
     child->finalize(required_output);
 }
 
-const Block & PhysicalRepeat::getSampleBlock() const
+const Block & PhysicalExpand::getSampleBlock() const
 {
     return sample_block;
 }
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalRepeat.h b/dbms/src/Flash/Planner/Plans/PhysicalRepeat.h
index 5907c7c047e..a2696affb5b 100644
--- a/dbms/src/Flash/Planner/Plans/PhysicalRepeat.h
+++ b/dbms/src/Flash/Planner/Plans/PhysicalRepeat.h
@@ -21,25 +21,25 @@
 
 namespace DB
 {
-class PhysicalRepeat : public PhysicalUnary
+class PhysicalExpand : public PhysicalUnary
 {
 public:
     static PhysicalPlanNodePtr build(
         const Context & context,
         const String & executor_id,
         const LoggerPtr & log,
-        const tipb::RepeatSource & repeat,
+        const tipb::Expand & expand,
         const PhysicalPlanNodePtr & child);
 
-    PhysicalRepeat(
+    PhysicalExpand(
         const String & executor_id_,
         const NamesAndTypes & schema_,
         const String & req_id,
         const PhysicalPlanNodePtr & child_,
-        const std::shared_ptr<Repeat> & shared_repeat,
+        const std::shared_ptr<Expand> & shared_expand,
         const Block & sample_block_)
         : PhysicalUnary(executor_id_, PlanType::Repeat, schema_, req_id, child_)
-        , shared_repeat(shared_repeat), sample_block(sample_block_){}
+        , shared_expand(shared_expand), sample_block(sample_block_){}
 
     void finalize(const Names & parent_require) override;
 
@@ -49,7 +49,7 @@ class PhysicalRepeat : public PhysicalUnary
 
 private:
     void transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) override;
-    std::shared_ptr<Repeat> shared_repeat;
+    std::shared_ptr<Expand> shared_expand;
     Block sample_block;
 };
 }  // namespace DB
diff --git a/dbms/src/Flash/Statistics/CommonExecutorImpl.h b/dbms/src/Flash/Statistics/CommonExecutorImpl.h
index 404fd1acbd6..56d55ea415f 100644
--- a/dbms/src/Flash/Statistics/CommonExecutorImpl.h
+++ b/dbms/src/Flash/Statistics/CommonExecutorImpl.h
@@ -58,6 +58,19 @@ struct SortImpl
 };
 using SortStatistics = ExecutorStatistics<SortImpl>;
 
+struct ExpandImpl
+{
+    static constexpr bool has_extra_info = false;
+
+    static constexpr  auto type = "Expand";
+
+    static bool isMatch(const tipb::Executor *executor)
+    {
+        return executor->has_expand();
+    }
+};
+using ExpandStatistics = ExecutorStatistics<ExpandImpl>;
+
 struct FilterImpl
 {
     static constexpr bool has_extra_info = false;
diff --git a/dbms/src/Flash/Statistics/ExecutorStatisticsCollector.cpp b/dbms/src/Flash/Statistics/ExecutorStatisticsCollector.cpp
index 44a72e11381..321599d9050 100644
--- a/dbms/src/Flash/Statistics/ExecutorStatisticsCollector.cpp
+++ b/dbms/src/Flash/Statistics/ExecutorStatisticsCollector.cpp
@@ -64,7 +64,8 @@ void ExecutorStatisticsCollector::initialize(DAGContext * dag_context_)
                 SortStatistics,
                 TableScanStatistics,
                 TopNStatistics,
-                WindowStatistics>(executor_id, &executor))
+                WindowStatistics,
+                ExpandStatistics>(executor_id, &executor))
         {
             throw TiFlashException(
                 fmt::format("Unknown executor type, executor_id: {}", executor_id),
diff --git a/dbms/src/Flash/Statistics/traverseExecutors.cpp b/dbms/src/Flash/Statistics/traverseExecutors.cpp
index 801002a10a8..94abeef3b01 100644
--- a/dbms/src/Flash/Statistics/traverseExecutors.cpp
+++ b/dbms/src/Flash/Statistics/traverseExecutors.cpp
@@ -41,8 +41,8 @@ Children getChildren(const tipb::Executor & executor)
         return Children{&executor.topn().child()};
     case tipb::ExecType::TypeLimit:
         return Children{&executor.limit().child()};
-    case tipb::ExecType::TypeRepeatSource:
-        return Children{&executor.repeat_source().child()};
+    case tipb::ExecType::TypeExpand:
+        return Children{&executor.expand().child()};
     case tipb::ExecType::TypeProjection:
         return Children{&executor.projection().child()};
     case tipb::ExecType::TypeExchangeSender:
diff --git a/dbms/src/Interpreters/Repeat.cpp b/dbms/src/Interpreters/Expand.cpp
similarity index 93%
rename from dbms/src/Interpreters/Repeat.cpp
rename to dbms/src/Interpreters/Expand.cpp
index 16be88e81c0..7ddbd8c975d 100644
--- a/dbms/src/Interpreters/Repeat.cpp
+++ b/dbms/src/Interpreters/Expand.cpp
@@ -12,13 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <Interpreters/Repeat.h>
 #include <Columns/ColumnNullable.h>
-#include <Functions/FunctionHelpers.h>
 #include <DataTypes/DataTypeNullable.h>
-#include "DataTypes/DataTypesNumber.h"
+#include <Functions/FunctionHelpers.h>
+#include <Interpreters/Expand.h>
 #include <tipb/executor.pb.h>
 
+#include "DataTypes/DataTypesNumber.h"
+
 namespace DB
 {
 
@@ -32,10 +33,10 @@ void convertColumnToNullable(ColumnWithTypeAndName & column)
 }
 }
 
-Repeat::Repeat(const DB::GroupingSets & gss)
+Expand::Expand(const DB::GroupingSets & gss)
     : group_sets_names(gss){}
 
-void Repeat::getGroupingSetsDes(FmtBuffer & buffer) const
+void Expand::getGroupingSetsDes(FmtBuffer & buffer) const
 {
     buffer.fmtAppend("[");
     for (const auto & grouping_set: group_sets_names)
@@ -78,7 +79,7 @@ void Repeat::getGroupingSetsDes(FmtBuffer & buffer) const
 /// \param input the source block
 /// \return
 
-void Repeat::replicateAndFillNull(Block & block) const
+void Expand::replicateAndFillNull(Block & block) const
 {
     size_t origin_rows = block.rows();
     // make a replicate slice, using it to replicate origin rows.
@@ -119,7 +120,7 @@ void Repeat::replicateAndFillNull(Block & block) const
     // replicate the original block rows.
     size_t existing_columns = block.columns();
 
-    if (offsets_to_replicate)
+    if (offsets_to_replicate && offsets_to_replicate->size() > 0)
     {
         for (size_t i = 0; i < existing_columns; ++i)
         {
@@ -202,7 +203,7 @@ void Repeat::replicateAndFillNull(Block & block) const
     // return input from block.
 }
 
-bool Repeat::isInGroupSetColumn(String name) const{
+bool Expand::isInGroupSetColumn(String name) const{
     for(const auto& it1 : group_sets_names)
     {
         // for every grouping set.
@@ -220,13 +221,13 @@ bool Repeat::isInGroupSetColumn(String name) const{
     return false;
 }
 
-const GroupingColumnNames& Repeat::getGroupSetColumnNamesByOffset(size_t offset) const
+const GroupingColumnNames& Expand::getGroupSetColumnNamesByOffset(size_t offset) const
 {
     /// currently, there only can be one groupingExprs in one groupingSet before the planner supporting the grouping set merge.
     return group_sets_names[offset][0];
 }
 
-void Repeat::getAllGroupSetColumnNames(std::set<String>& name_set) const
+void Expand::getAllGroupSetColumnNames(std::set<String>& name_set) const
 {
     for(const auto& it1 : group_sets_names)
     {
@@ -242,11 +243,11 @@ void Repeat::getAllGroupSetColumnNames(std::set<String>& name_set) const
     }
 }
 
-std::shared_ptr<Repeat> Repeat::sharedRepeat(const GroupingSets & groupingSets)
+std::shared_ptr<Expand> Expand::sharedExpand(const GroupingSets & groupingSets)
 {
-   return std::make_shared<Repeat>(groupingSets);
+   return std::make_shared<Expand>(groupingSets);
 }
 
-const std::string Repeat::grouping_identifier_column_name = "groupingID";
-const DataTypePtr Repeat::grouping_identifier_column_type = std::make_shared<DataTypeUInt64>();
+const std::string Expand::grouping_identifier_column_name = "groupingID";
+const DataTypePtr Expand::grouping_identifier_column_type = std::make_shared<DataTypeUInt64>();
 }
diff --git a/dbms/src/Interpreters/Repeat.h b/dbms/src/Interpreters/Expand.h
similarity index 82%
rename from dbms/src/Interpreters/Repeat.h
rename to dbms/src/Interpreters/Expand.h
index 442050f55e7..d567e58e311 100644
--- a/dbms/src/Interpreters/Repeat.h
+++ b/dbms/src/Interpreters/Expand.h
@@ -43,7 +43,7 @@ namespace DB
 ///     we still got 2 grouping sets like: {[<a>, <a,b>], [<c>]}
 ///
 /// the second case in which the group layout <a,b> has been merged with the prefix
-/// common group layout <a> into unified one set to reduce the underlying data replication/repeat cost.
+/// common group layout <a> into unified one set to reduce the underlying data replication/expand cost.
 ///
 using GroupingColumnName = ::String;
 using GroupingColumnNames = std::vector<GroupingColumnName>;
@@ -52,19 +52,19 @@ using GroupingSets = std::vector<GroupingSet>;
 
 
-/** Data structure for implementation of Repeat.
+/** Data structure for implementation of Expand.
   *
-  * Repeat is a kind of operator used for replicate low-layer datasource rows to feed different aggregate
+  * Expand is a kind of operator used for replicate low-layer datasource rows to feed different aggregate
   * grouping-layout requirement. (Basically known as grouping sets)
   *
   * For current scenario, it is applied to accelerate the computation of multi distinct aggregates by utilizing
   * multi nodes computing resource in a way of scheming 3-phase aggregation under mpp mode.
   *
-  * GroupingSets descriptions are all needed by Repeat operator itself, the length of GroupingSets are the needed
-  * repeat number (in other words, one grouping set require one replica of source rows). Since different grouping
+  * GroupingSets descriptions are all needed by Expand operator itself, the length of GroupingSets are the needed
+  * expand number (in other words, one grouping set require one replica of source rows). Since different grouping
   * set column shouldn't let its targeted rows affected by other grouping set columns (which will also be appear in
   * the group by items) when do grouping work, we should isolate different grouping set columns by filling them with
-  * null values when repeating rows.
+  * null values when expanding rows.
   *
   * Here is an example:
   * Say we got a query like this:                   select count(distinct a), count(distinct b) from t.
@@ -75,46 +75,46 @@ using GroupingSets = std::vector<GroupingSet>;
   * Different group layouts are doomed to be unable to be feed with same replica of data in shuffling mode Except
   * gathering them all to the single node. While the latter one is usually accompanied by a single point of bottleneck.
   *
-  * That's why data repeat happens here. Say we got two tuple as below:
+  * That's why data expand happens here. Say we got two tuple as below:
   *
-  * <a>     <b>         ==> after repeat we got            <a>    <b>
+  * <a>     <b>         ==> after expand we got            <a>    <b>
   *  1       1                                origin row    1      1
-  *  1       2                                repeat row    1      1
+  *  1       2                                expand row    1      1
   *                                           origin row    1      2
-  *                                           repeat row    1      2
+  *                                           expand row    1      2
   *
-  * See what we got now above, although we have already repeated/doubled the origin rows, while when grouping them together
+  * See what we got now above, although we have already expanded/doubled the origin rows, while when grouping them together
   * with GROUP BY(a,b) clause (resulting 2 group (1,1),(1,2) here), we found that we still can not get the right answer for
   * count distinct agg for a.
   *
-  * From the theory, every origin/repeated row should be targeted for one group out requirement, which means row<1> and row<3>
+  * From the theory, every origin/expanded row should be targeted for one group out requirement, which means row<1> and row<3>
   * about should be used to feed count(distinct a), while since the value of b in row<3> is different from that from row<1>,
   * that leads them being divided into different group.
   *
   * Come back to the origin goal to feed count(distinct a), in which we don't even care about what is was in column b from row<1>
   * and row<3>, because current agg args is aimed at column a. Therefore, we filled every non-targeted grouping set column in
-  * repeated row as null value. After that we got as below:
+  * expanded row as null value. After that we got as below:
   *
-  * <a>     <b>         ==> after repeat we got            <a>    <b>
+  * <a>     <b>         ==> after expand we got            <a>    <b>
   *  1       1                                origin row    1     null         ---> target for grouping set a
-  *  1       2                                repeat row   null    1           ---> target for grouping set b
+  *  1       2                                expand row   null    1           ---> target for grouping set b
   *                                           origin row    1     null         ---> target for grouping set a
-  *                                           repeat row   null    2           ---> target for grouping set b
+  *                                           expand row   null    2           ---> target for grouping set b
   *
   * Then, when grouping them together with GROUP BY(a,b) clause, we got row<1> and row<3> together, and row<2>, row<4> as a
   * self-group individually. Among them, every distinct agg has their self-targeted data grouped correctly. GROUP BY(a,b) clause
   * is finally seen/taken as a equivalent group to GROUP BY(a, null) for a-targeted rows, GROUP BY(null, b) for b-targeted rows.
   *
   * Over the correct grouped data, the result computation for distinct agg is quite reasonable. By the way, if origin row has some
-  * column that isn't belong to any grouping set, just let it be copied as it was in repeated row.
+  * column that isn't belong to any grouping set, just let it be copied as it was in expanded row.
   *
   */
-class Repeat
+class Expand
 {
 public:
-    explicit Repeat(const GroupingSets & gss);
+    explicit Expand(const GroupingSets & gss);
 
-    // replicateAndFillNull is the basic functionality that Repeat Operator provided. Briefly, it replicates
+    // replicateAndFillNull is the basic functionality that Expand Operator provided. Briefly, it replicates
     // origin rows with regard to local grouping sets description, and appending a new column named as groupingID
     // to illustrate what group this row is targeted for.
     void replicateAndFillNull(Block & input) const;
@@ -127,7 +127,7 @@ class Repeat
 
     void getAllGroupSetColumnNames(std::set<String>& name_set) const;
 
-    static std::shared_ptr<Repeat> sharedRepeat(const GroupingSets & groupingSets);
+    static std::shared_ptr<Expand> sharedExpand(const GroupingSets & groupingSets);
 
     void getGroupingSetsDes(FmtBuffer & buffer) const;
 
diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp
index e25ae02bf88..7b89ed431c3 100644
--- a/dbms/src/Interpreters/ExpressionActions.cpp
+++ b/dbms/src/Interpreters/ExpressionActions.cpp
@@ -136,11 +136,11 @@ ExpressionAction ExpressionAction::ordinaryJoin(std::shared_ptr<const Join> join
     return a;
 }
 
-ExpressionAction ExpressionAction::repeatSource(std::shared_ptr<const Repeat> repeat_source_)
+ExpressionAction ExpressionAction::expandSource(std::shared_ptr<const Expand> expand_)
 {
     ExpressionAction a;
-    a.type = REPEAT;
-    a.repeat = repeat_source_;
+    a.type = EXPAND;
+    a.expand = expand_;
     return a;
 }
 
@@ -239,11 +239,11 @@ void ExpressionAction::prepare(Block & sample_block)    // 这个是 prepare 阶
         break;
     }
 
-    case REPEAT:
+    case EXPAND:
     {
         // sample_block is just for schema check followed by later block, modify it if your schema has changed during this action.
         auto name_set = std::set<String>();
-        repeat->getAllGroupSetColumnNames(name_set);
+        expand->getAllGroupSetColumnNames(name_set);
         // make grouping set column to be nullable.
         for (const auto & col_name: name_set) {
             auto & column_with_name = sample_block.getByName(col_name);
@@ -252,7 +252,7 @@ void ExpressionAction::prepare(Block & sample_block)    // 这个是 prepare 阶
                 column_with_name.column = makeNullable(column_with_name.column);
         }
         // fill one more column: groupingID.
-        sample_block.insert({nullptr, repeat->grouping_identifier_column_type, repeat->grouping_identifier_column_name});
+        sample_block.insert({nullptr, expand->grouping_identifier_column_type, expand->grouping_identifier_column_name});
         break;
     }
 
@@ -341,9 +341,9 @@ void ExpressionAction::execute(Block & block) const   // 执行阶段
         break;
     }
 
-    case REPEAT:
+    case EXPAND:
     {
-        repeat->replicateAndFillNull(block); // repeat 的执行阶段直接 fill block 了
+        expand->replicateAndFillNull(block); // repeat 的执行阶段直接 fill block 了
         break;
     }
 
diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h
index 0a9b9bd99fa..e9d98de2106 100644
--- a/dbms/src/Interpreters/ExpressionActions.h
+++ b/dbms/src/Interpreters/ExpressionActions.h
@@ -17,6 +17,8 @@
 #include <Core/Block.h>
 #include <Core/ColumnWithTypeAndName.h>
 #include <Core/Names.h>
+#include <Interpreters/Expand.h>
+#include <Interpreters/Settings.h>
 #include <Storages/Transaction/Collator.h>
 
 #include <unordered_map>
@@ -34,7 +36,7 @@ using NameWithAlias = std::pair<std::string, std::string>;
 using NamesWithAliases = std::vector<NameWithAlias>;
 
 class Join;
-class Repeat;
+class Expand;
 
 class IFunctionBase;
 using FunctionBasePtr = std::shared_ptr<IFunctionBase>;
@@ -67,7 +69,7 @@ struct ExpressionAction
         /// Reorder and rename the columns, delete the extra ones. The same column names are allowed in the result.
         PROJECT,
 
-        REPEAT,
+        EXPAND,
     };
 
     Type type;
@@ -93,9 +95,9 @@ struct ExpressionAction
     /// For PROJECT.
     NamesWithAliases projections;
 
-    /// For REPEAT_SOURCE.
-    std::shared_ptr<const Repeat> repeat;
-    NamesAndTypesList columns_added_by_repeat;
+    /// For EXPAND.
+    std::shared_ptr<const Expand> expand;
+    NamesAndTypesList columns_added_by_expand;
 
     /// If result_name_ == "", as name "function_name(arguments separated by commas) is used".
     static ExpressionAction applyFunction(
@@ -110,7 +112,7 @@ struct ExpressionAction
     static ExpressionAction project(const NamesWithAliases & projected_columns_);
     static ExpressionAction project(const Names & projected_columns_);
     static ExpressionAction ordinaryJoin(std::shared_ptr<const Join> join_, const NamesAndTypesList & columns_added_by_join_);
-    static ExpressionAction repeatSource(std::shared_ptr<const Repeat> repeat_source_);
+    static ExpressionAction expandSource(std::shared_ptr<const Expand> expand_);
 
     /// Which columns necessary to perform this action.
     Names getNeededColumns() const;
diff --git a/dbms/src/Interpreters/tests/gtest_block_repeat.cpp b/dbms/src/Interpreters/tests/gtest_block_repeat.cpp
index 21074ff7a95..88c6286898e 100644
--- a/dbms/src/Interpreters/tests/gtest_block_repeat.cpp
+++ b/dbms/src/Interpreters/tests/gtest_block_repeat.cpp
@@ -14,9 +14,9 @@
 
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnsNumber.h>
+#include <Interpreters/Expand.h>
 #include <Interpreters/sortBlock.h>
 #include <TestUtils/FunctionTestUtils.h>
-#include <Interpreters/Repeat.h>
 
 namespace DB
 {
diff --git a/dbms/src/TestUtils/executorSerializer.cpp b/dbms/src/TestUtils/executorSerializer.cpp
index 33304e298c0..46f0d3b03a1 100644
--- a/dbms/src/TestUtils/executorSerializer.cpp
+++ b/dbms/src/TestUtils/executorSerializer.cpp
@@ -160,10 +160,10 @@ void serializeTopN(const String & executor_id, const tipb::TopN & top_n, FmtBuff
     buf.fmtAppend("}}, limit: {}\n", top_n.limit());
 }
 
-void serializeRepeatSource(const String & executor_id, const tipb::RepeatSource & repeat, FmtBuffer & buf)
+void serializeExpandSource(const String & executor_id, const tipb::Expand & expand, FmtBuffer & buf)
 {
-    buf.fmtAppend("{} | repeat_source_by: [", executor_id);
-    for (const auto & grouping_set : repeat.grouping_sets())
+    buf.fmtAppend("{} | expanded_by: [", executor_id);
+    for (const auto & grouping_set : expand.grouping_sets())
     {
         buf.fmtAppend("<");
         for (const auto & grouping_exprs : grouping_set.grouping_exprs())
@@ -306,8 +306,8 @@ void ExecutorSerializer::serializeListStruct(const tipb::DAGRequest * dag_reques
         case tipb::ExecType::TypeLimit:
             serializeLimit("Limit", executor.limit(), buf);
             break;
-        case tipb::ExecType::TypeRepeatSource:
-            serializeRepeatSource("Repeat", executor.repeat_source(), buf);
+        case tipb::ExecType::TypeExpand:
+            serializeExpandSource("Repeat", executor.expand(), buf);
             break;
         default:
             throw TiFlashException("Should not reach here", Errors::Coprocessor::Internal);
@@ -366,8 +366,8 @@ void ExecutorSerializer::serializeTreeStruct(const tipb::Executor & root_executo
         case tipb::ExecType::TypeWindow:
             serializeWindow(executor.executor_id(), executor.window(), buf);
             break;
-        case tipb::ExecType::TypeRepeatSource:
-            serializeRepeatSource(executor.executor_id(), executor.repeat_source(), buf);
+        case tipb::ExecType::TypeExpand:
+            serializeExpandSource(executor.executor_id(), executor.expand(), buf);
             break;
         default:
             throw TiFlashException("Should not reach here", Errors::Coprocessor::Internal);

From 697e8650b5542b66df13d2c8102fd9f4802f6281 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Mon, 26 Dec 2022 12:05:24 +0800
Subject: [PATCH 06/31] remove useless file

Signed-off-by: AilinKid <3148019@qq.com>
---
 tai.cpp | 64 ---------------------------------------------------------
 tai.h   | 14 -------------
 2 files changed, 78 deletions(-)
 delete mode 100644 tai.cpp
 delete mode 100644 tai.h

diff --git a/tai.cpp b/tai.cpp
deleted file mode 100644
index 59b2935c6a4..00000000000
--- a/tai.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-//
-// Created by arenatlx on 2022/10/27.
-//
-
-#include <iostream>
-#include <vector>
-#include "tai.h"
-
-
-
-class HAHA {
-public:
-    template <class T>
-    HAHA & operator=(std::vector<T> && rhs){
-        return rhs[0];
-    }
-private:
-    int a;
-};
-
-
-class MY {
-    using Arra1y = std::vector<HAHA>;
-public:
-    HAHA operator[](size_t n) const;
-};
-
-HAHA MY::operator[](size_t n) const{
-    Arra1y a(n);
-    return a[0];
-}
-
-struct Test{
-    ~Test(){
-        std::cout<<"kill test"<<std::endl;
-    }
-};
-
-
-int main(int argc, char* argv[]){
-    std::cout<<__FILE_NAME__<< __DATE__ << std::endl;
-    MY* m = new(MY);
-    m[1];
-    {
-        auto vec = std::vector<Test>();
-        auto tmp = new(Test);    // 这个地方直接 Test() 还是会析构一次，有点奇怪，只有 new 才行。
-        vec.push_back(std::move(*tmp));
-        std::cout<<vec.size()<<std::endl;
-    }
-    std::string a = "123";
-    auto cp_a = a;
-    cp_a[0] = '4';
-    std::cout<<cp_a<<std::endl;
-    std::cout<<a<<std::endl;
-
-    std::vector<std::string> v;
-    v.push_back("aaaa");
-    v.push_back("bbbb");
-    v[0][0]='1';
-    v.push_back(std::move(v[0]));
-    std::cout<<v[0]<<std::endl;
-    std::cout<<v[1]<<std::endl;
-    std::cout<<v[2]<<std::endl;
-}
\ No newline at end of file
diff --git a/tai.h b/tai.h
deleted file mode 100644
index 81ca1b1b640..00000000000
--- a/tai.h
+++ /dev/null
@@ -1,14 +0,0 @@
-//
-// Created by arenatlx on 2022/10/27.
-//
-
-#ifndef TIFLASH_TAI_H
-#define TIFLASH_TAI_H
-
-
-class tai
-{
-};
-
-
-#endif //TIFLASH_TAI_H

From 37e36629e167dcdb9bf7cec46e88676c729b9a64 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Mon, 26 Dec 2022 12:15:21 +0800
Subject: [PATCH 07/31] remove debug log

Signed-off-by: AilinKid <3148019@qq.com>
---
 dbms/src/Debug/dbgFuncCoprocessor.h           |  1 -
 .../Coprocessor/DAGExpressionAnalyzer.cpp     |  1 -
 dbms/src/TestUtils/FunctionTestUtils.cpp      | 29 -------------------
 3 files changed, 31 deletions(-)

diff --git a/dbms/src/Debug/dbgFuncCoprocessor.h b/dbms/src/Debug/dbgFuncCoprocessor.h
index f1b95139f62..9a21842fa50 100644
--- a/dbms/src/Debug/dbgFuncCoprocessor.h
+++ b/dbms/src/Debug/dbgFuncCoprocessor.h
@@ -15,7 +15,6 @@
 #pragma once
 
 #include <Debug/DBGInvoker.h>
-
 namespace DB
 {
 class Context;
diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
index fe74369bf7a..e9a10539378 100644
--- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
@@ -669,7 +669,6 @@ String DAGExpressionAnalyzer::applyFunction(
     const TiDB::TiDBCollatorPtr & collator)
 {
     String result_name = genFuncString(func_name, arg_names, {collator});
-    // 啊这个好！可以避免相同表达式的重复计算
     if (actions->getSampleBlock().has(result_name))
         return result_name;
     const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get(func_name, context);
diff --git a/dbms/src/TestUtils/FunctionTestUtils.cpp b/dbms/src/TestUtils/FunctionTestUtils.cpp
index b616c441bf4..a679bbd9885 100644
--- a/dbms/src/TestUtils/FunctionTestUtils.cpp
+++ b/dbms/src/TestUtils/FunctionTestUtils.cpp
@@ -193,7 +193,6 @@ std::multiset<Row> columnsToRowSet(const ColumnsWithTypeAndName & cols)
     {
         for (size_t i = 0, size = col.column->size(); i < size; ++i)
         {
-        
             new (rows[i].place(col_id)) Field((*col.column)[i]);
         }
     }
@@ -221,39 +220,12 @@ ::testing::AssertionResult columnsEqual(
         ASSERT_EQUAL(expect_col.column->size(), actual_col.column->size(), fmt::format("Column {} size mismatch", i));
         auto type_eq = dataTypeEqual(expected[i].type, actual[i].type);
         if (!type_eq)
-        {
-            std::cout << "type equal false" << std::endl;
             return type_eq;
-        }
     }
 
     auto const expected_row_set = columnsToRowSet(expected);
     auto const actual_row_set = columnsToRowSet(actual);
 
-    {
-        auto expect_it = expected_row_set.begin();
-        auto actual_it = actual_row_set.begin();
-        FmtBuffer buf1;
-        FmtBuffer buf2;
-        for (; expect_it != expected_row_set.end(); ++expect_it, ++actual_it)
-        {
-            buf1.joinStr(
-                   expect_it->begin(),
-                   expect_it->end(),
-                   [](const auto & v, FmtBuffer & fb) { fb.append(v.toString()); },
-                   " ")
-                .append("\n");
-            buf2.joinStr(
-                    actual_it->begin(),
-                    actual_it->end(),
-                    [](const auto & v, FmtBuffer & fb) { fb.append(v.toString()); },
-                    " ")
-                .append("\n");
-        }
-        auto res1 = buf1.toString();
-        auto res2 = buf2.toString();
-    }
-
     if (expected_row_set != actual_row_set)
     {
         FmtBuffer buf;
@@ -287,7 +259,6 @@ ::testing::AssertionResult columnsEqual(
                 .append("\n");
         }
         buf.append("...\n");
-        std::cout<<buf.toString()<<std::endl;
 
         return testing::AssertionFailure() << buf.toString();
     }

From 35e1f5e47f92c4edfb88cc6c2533a08eacfa238c Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Mon, 26 Dec 2022 12:16:23 +0800
Subject: [PATCH 08/31] .

Signed-off-by: AilinKid <3148019@qq.com>
---
 a.out | Bin 85490 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100755 a.out

diff --git a/a.out b/a.out
deleted file mode 100755
index 8aed9644943b125062ea04d62dd9d638ddaf2013..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 85490
zcmeHw4SZC^)%V=p1QrD4BMK@X0ipznkc1Etq`H^{69^C?B44tEWFgUfnJgcQ)hJZy
zqgK*VODla0NNuoaeXVVwueP*7i<T--Yi&#W1f(@oY*DdIe2BdNGjr}{?%myF^X2>W
zeK$WYbN~0w$C)!{&YU?jclOOsKlp5b5Qe~?1J@8-bx}fmsb&m45|`u3#^v$k&AcgV
zY1V@I^wjz%6I&kzAj;<*MD%#_vli!DU(570)@OooG#;B5#qfB1-VHvwQ-AjS_Fb(i
zGb2yECrtJ7|3#TOt0x{$wQudJYX56{e&^Py{LXMC3G4Z}I6v@E{&_rW3JX2n4TauH
zpT}3Q%AQ~229@6t&V;a@pPvwaeudS(s*>_Ger)C6^BYxumHeKt{%6nI<5^q2zNEa!
zQ&PUV0uk->#onXxYjmny2;1|}^QU&*F7wHpnLpE$F@N4d)w(q*AK}JJ`m2%u3lmfJ
zbP-zz;R4?(v}Oeko?i{O1LemgBDD~Ga7~bR>c2L*&CQuNCo40@l(_>;7ee{R;YW`u
zNZFf3(32EUPV|WbLNxXhVhZEN*MvxmQoI?C*Bjmxq5=tzyodS}3ULvhu?UJ8cup7f
zt$7H?APn_a{8u5C1bKNep2gxKIh(w;x@vN1$*Rdkca;_a)8Cc2h>qWVJM)#pznc1w
zeXEz1y&U)Uh-5sA#f4(=ht$pi@?P=oQ^z%UD(ffT3lWd{Q9fJ8^8*g3+(caTPRhfu
z7<5acjTX>TIC0{nq)8yEWR;{dO^fd=8XXXS;a$}}Z`q_9ISFD5f$l2l8UBV4D?v21
zDbYv#rN5E52sh3)#1LHo=w;+D!hr|}A{>ZtAi{wN2O=DZa3I2g2nQk@h;Sgnfd~g8
z9Eflr!hr|}A{>ZtAi{wN2O=DZa3I2g2nQk@h;Sgnfd~g89Efn>--ZJhGK{0G8>9BL
zI*r;o=ji60n?~VU(cG9JY8(5CmVrWSYwYW8F+|O=VYqK{if0JB4EO%}hSB@5UuIif
zgJ)ms#>_p)tM-7?^Y(=Xfjk|1S~rQ>x=o{-Ph2Eg>Na_rDZdjJxm%7KD$mx9H|{}P
zl_o<RmG7ORrAg)Aa@y%`5&il-d;zq`d^b_P#-3KDjp)Gss+PN>ksso=Zk)O20?L$e
zGop{4uNb<gbz=<b7F|np$6{NNSdKIkMGN-B9GOtyZaF<!wA43@+PBj=s(FkM&yP7U
zsdh|^*e35AC)JL^{fWu$mXoMwEO^j3MYLRnXLXxKHrF}9JL1n2cS|guHBJ-YlZa`W
z=5DFqbY*khre)1npspuSwi%b+)BWZAym6wt<-MUuJ6@%wvKq&`TV6%JX57mVx30lB
z+Bi<MNV*11t{qM3$GKbT#yO6@g7h*j?i;V9d}@xx(fcb=KWZ<Ov87MVf_Ouue?Vo8
z*2)^~ZuuUUMe%l`oKqBMm=<T4yJeRdXOp8g0eMq-jpH1(lAhSfwg0WXKY{0yv^aH4
zUt@!z;-^fm{f8bO&&Qhaalg%skFpwvs`{L`M6^tyI$h#!`2cAtlyw{GZi(l*O~m`D
z6o=}J=L5|=kq_}?ocRv*{2tX2a!K`&Jio}@Qlil`hU8s~|0KnS{QBcV_HNVS&qVx-
zwD@16_>fJ1e8}g`TKqvw8@1nJt}nImEx5<lu?4Lg^Y@&uScpCqE#<k%X+VaqY_4y(
zVjsy*E$4;pWik4@(Kd`eYk7Z(4O1JYarrL$JG4a-zoU9If{)Y>=$<a3eS-F$=)X$G
z&SlO28X~sci8R4<LOpas6L{JL-lKoEG$QPW^e4cplhlUb@i<_Mp4ep70ch(Mir0iR
z!nomWiw>~GCA~uBP}t9Kw@kQyMYAyaJv`H>SunpZ>o(+Lj1)uv3A!m?qMyR{@}>9G
z)(;_%06o+YLKh;wD+=_W{#pG{Hqj(Vul9TYfDrkT-=Kx`<xO~xO@A?zH{nuXdT-%N
zhSHxzi(g;zv(tDMAC3B=&ZzJE{`WTinDux&NIhCN{%p?&#0y-(+EUl=y9by&)AYNo
zEVJFdkGQh!gzI4oj~fE{X37i6$r-8hmCU@0Jan0P95h;ZV=LRN=LMn@Jng7lSvRe{
zUc9*d_WBXt>+SVN;85*Fe9*_c{?3|X6TaC036e{HJ7olE+Yoh{GcZV<pf6fCI`_0T
zU_8bcPr9_uiSZa^)A)?Npxc_DPhE!iFvp?E9-ifbmws6Up2L`Qh19>`AL*ad(4|e6
zi2V;DFRDB7taW3?o(oQ86UaQ!ZXXX8+sZCn_=d&q01cAI$YTf6*AYE5W}@Epp!aFQ
zNGlA{@=fzyA>P4OdD46rjs7%K)KY)i%5kWjPi(?7j3FoQ6Sb6<WXEEw=)3_PJ5ioV
zOFejmI<>qBnwdZEf7$Gt$mbI1aLR8K!kvga7$MP3&rUmC&-9A|O@c0W%O1RU!ImNV
zMo>J^Ci#cB7?<Uk9gF8W&n`fpwr=bLI|{ZL)3y_QC3|WY@~eAF94&K*ZS}C%nlSFy
zI}!4@-GnjwG{%3@C&I~Xin2Z?9)N~MyiY<}IaY#}+i_2AL>HxX4F=zk7v0yxHac_R
z!Z2!Q8jqVsi~Uq~G1By*w#K~|_pKXe!JgCF8)NW#@b1IUZ0!vmo4h4H?LpmK&`s1%
zW4P_=xP39cZEHe4kj?jFP1%GT`cPgY!k^*2tOM?eFT`u&@dfbm1nMQn5!Abi>!;*W
zvsE)X=v(6i*$!XDKKQ_KfYV~UKIZSM4S{;=B<PVch5knR`y}X+`AOZQ>Bt~yWjds%
zbm&)1gO-MAF!d?M|2F*}M7B>to=rVUIsp68{W5^P@sg(_xE$og<+b%=)Kl8&mT@Ks
z-DVlo7d}RR{~<2fH&A!#6Bhm4L7rr%Q9dNML3yyrY7?0HUB;C>*5?I5=+DsRK#)b^
zIX>6eR)wn`w2?fK-n<{!ti#SirGHisk3gU7H=t2Kj&=Q__Zf;qcIhbe3CvRh<}%+O
z6!_e1R~p~Gjj}9bl`Rj_DFQk$O6n1<&LQHVPg(P`@xj!Eb<ndzsXviUhOVK0M!Ju9
zK^S4nr4lQhM$e?KgRDjC@xVv2kxkv_kB5Fw^9R{C(2w5*kIcMDZ$?3HzAx*p_1lKP
zen#y^d64{C<jYonl8>oj@)F-_Sa?KsP^(ka;+qUS|9QQfYihKRES#YJiZV`umb$*;
zD9xcUrlm?5!E@plK6!~T?OrJtc3l9P8^J%~HR%tE>w;{NEk|}7>Gh`ZPhNpBaY);o
zSIM>z&0qEFqabxh*_Jv5>(fG<{c5PXQD3Asq<ZX;{hH_z@9*KU5%cV=B71V}P?VDj
znQqkTE<U;UH@Dy!jc0v{2grw>-B0r~;uD?|Kd3D&ZK&~>;s{60v00QiXrM816w;b}
z97FT?t?rhw2ycvq&4PHrY?T}FT&6*Netn79=EA&-_$ljc<D1zJ<Ltb%(M9dmq0ZLQ
z#DhlH*TFxwU8rvUJWw~1=l>1b8X+IVs{q|;qmyL274h5C6BH*L83?AM!b*qwk^kTX
z*<Fy~cfpfWkYT^C$40Z10mwA<laA=_XOEK+M4#L8H^+niNJ~B?vJuQa^kvAM82H+^
z+ACMmM|#y>c93!c;sm3wAy9V(vk}O*M(u9t2evXzxuo#{{jNj)QGX$OU*&5ZBXoU3
zww2U57^`I%4ZgviHSb9ek&bt{(5|qtWuM`?5=~8@2R^F(em$h|f#$t5*Nu<XY!zF*
zsLc$-8EmgpLulT~@dN3ulWBAt$vVE%xF+or$fXPQqIm@A0kTQ=(AWjtT4UOp=zmwk
zekI$3>})Yud`RQrD(F2L4~b6rRHPm`YqOE1ZoybK7X9%w@}c~x&bobI?_+v<hLR<U
z<L7G#8}A*=In6Pr4&{CV4EqkgN1D@sx6ru{K=#T0BRZunCi@;dp?RFC7e7Wi>zt&0
z{wta_-rL&MtY1LgZ1IS%hh*7S84c$D1?^Iv-#d?XlKn+XW3y>UH<K;*UuaWmKjH_?
zhyMsHZ4TlO_<KTPwrB86+B48+NA-A+iOWzn=?5BLC@+df^7KnR9(ej|!jJ*-Tb_hW
zJOx=Y(~y1h1H8A$MTXLAy6=YM($rV}_TS`y9oY{)afg47xC`aem`L-o+2A3yCEedm
ze7Z@sZPQKe{qg;kO+s-S#~HOWFO~B$_=l-~^fTIgWaOW1%;2;c?*08Pt62bg`n&EI
zcb?#JU+X^)(|i<dEc+DQ6Aew6k2Zl9mU!U3UKa9vnC5}{GxCLyt=c%5%b)Dte+oQ&
zmv~7$g%6v2=F#xmWm6r|F0wqd3;9axog?Jj9DQ2aVD`LsV%#TxIi<n(#xcsDIugFt
z5&JMESupXj89o-8_fmYq=YZ)ReMvoY-qt)8e7Bbq&22HpY!mRfbz=_JEF7ATSCu=Q
z<H$0Q-*-^P=>Nt%dK2|O>l(;qh-(?YGL-C)T*78d7i1%5m}@9bq#p=*ar)PLb~#5i
zKZW&--OiEC-}!h%nv?;I-`Ar*sXn22_*as3@(sY&+o`pAJ#3Z-QTO^yxB2y_T>k*w
ze?}VVw}vlx57t9ShCkr6gzNE~p4G#*jn9x{euj|hOuh_L7uRpP0{VI>;)xF*WxY&&
zjn=7l;rXvnhI}tv@2@nJuj3Mv_Dw|l70o}EbSS?A(Yut}N|i4@{B<9dZYSv3_3`nv
z#zLc(bYTP1k-aN<0$Ii9)P8@%F2vnS?;)>5Gx;g@aD7al%LiyPT^<{qMs3qL(egD)
z2R$RxL1&Sli^?F{;vtVsH;DbPb)@VyPJ~XLFZL5nb0G(?V`47gJ*A~**?87;gL{85
zKC`E1u%lxfxURw_+XTFnJPyT2yeB^11)A)9d>QnU3^X>tAB2AFMI6$fCathX{W1?f
z$uaW1neT`eWyjBjj60=_H%@f7d<4I1>8Ft8hQP5zx1DJ%hsFVm{iDgY=;v;;FKqpS
zWS{gnl|{CQQS07+2KTXs(g8G17*RC~Zbli>CLAG##@Dib!8;e?<0JW@mVO0Y;`gJX
zov44Kf5~#HZS8FOF3HbB<x`(&8m8uJ`gkh+1CT5D+EjZ&r(K5jwD)t&DfbgEFt6MX
z84uduEq*ASPXpa;_CgSzo3^KEKaqXxN+Mqdt*2;Xd9buMh;-Rz;I&MD!-4cGP-o(=
zC9SP}s4jOP9?6r1$B1Xzp{K!%c+gJ%7aBX`2f`18bbfoYBYLhAwI`>ETADKuJtlqY
z{C@uq>V|SHbwk;0J|!)!O)kj3wENj^lJY@zFK9u%G8ogGM9H|##|C~9&3?VD)s^&8
z@A|`n%&D~UX^txS0y@a{J;3vOq6z0($nh6^iXlw%a#R1QdTM+_TieGn^4++WyVG69
zT^}{|HT#QZ&jm?CGziG7U5@N|Q`_O}kN|nJ#|yAcGlK9V2pwUi3nD9|M@UzZ-qdX_
zs+XU4kOeuOOW$dmtOU~^eIVa<{Xx7BMIYrWF4bg>>Weu5{1E#wSJ;oa%YKX>cbNLi
zlu76t<txds&9ACQr;&Wr7b@TBmWz~(OP@DnMvhyMjh)s1m8N{mO81e^|5?NZJ*<1%
z{C&C~mEt{xc$AOK3*+u>z`tt_ZXb<-KWa>oyJacjv?n7cMvHXf(}~ds(xna@sdS*g
z*wAh|*3GV1@CvjWM!#zw?Vv0whsOUleWJ;@jh|*;`)$jGZ~Pt?jmPHrBKPEgm%<P&
z$IN%&56wYvzKdEnixz(W(rX+?))Vt~b8SrOA><Qm4q_Qs8Un{`bNrzCm}5v=y|n%w
zME31^vI+Fjyo~5Gb!9W25x!LR2coe}{)5>NvOht0lm2-NY2RpX?Ln(guj|!Kf%=Yk
zX3k%yQ2o%)EwUCw7Rdfugt+?rbpf?C&p}C-ES6ZCm-4)d=12Z`;L*SS1$hE*&ABN0
zxtzaVug625%_Izd2A#hm%{6#$l^sndfL_vVw3cdL@4JfS(OmB<q_#qTCVNR=@1y$A
zxLJl!$_MVrPin6BQTh6MAMuBD1N9vXU1;zBiM~nL(H7q%?bEQxflYR#&hAN{q%D8y
zZ{&X?AEbp2@Xx-t&+PwZ-(MrsX#L)nw@EwAH!OMEWWBR~y1znpXuOnux@hI6d$JRL
zy0rd1<fpUM7xF4?xq$K47LRxya!jzrB_2{g7g(sFJ&lm{BV!gP)_$b+2Av(V(%tgI
ze+Kb~h0wu=kxus)o(8t~3tQd48B4x$*savBLir2dLpqCow$V*>4B$gX$M%&eGnnIA
zWo8rUnP~VkY<nb#=TC`RqPYZVNUyFS`GC%7gdgg1mKE|NY{Gk8ca#EC9VtEetX$D*
z?t8B+I}3AJwZ}oXNhmIkG_=?xY2d{RXoCw*^3nCh{+l)$<(Li{(?JL6W1^47?5`od
zi4BxX^J%H?@SfHcAI5u=R`Mazy!-{kk$XU(UmM54Ck9_4)syP>CZ&hoK1pRjXYOMf
zsk{?N+l2VCe&a;Tt)N%d8_z#Myw;63VgF8>Z;JG0(|F87@g6>$wz|S6)anx3)>B=(
z+TIPJ^@?s+#G_m_Mr!&cgiK320DO>R57`5#Kj~SMe`FtACF`l#6Jf}r*-yv#Y0+#q
z)Zd)Hl3t`T<(f%*>nP;k)Ynl$@Pf*H2l8T>S8MbJU02a{D#l^j-%WBsdU6NajC@xl
zJE!HCOa5ivD}=IkQ2h{Qkvu`RXx)I~%|@S^(C_tckx%I<j!$DY`L4)+O7r$?#BqJG
z-xKf|*OJe7XQ4Ry734?bHN_)23mBV2v?Hw_%KieHX`j_e<o_<(4)ab)pGDt<$RiwC
zxJ&v~I*^4RZDZ2*^;Yl+{K_O>KSINBw>Y9z`#Z;o+EVsskvtf{j{r-(hI{!8<M~4z
z=K%4XbQt2EKs`+zaG#b3tzFT&QoJErzVu0(-TWBG*@w^~E1QDbp9d&D^g(-LG1V~{
zbWs1(_r$zJ_9NTt)W)R4zKLg~f2fVAUIKj3Z7^MLQk+J}&JxICFx~Mt>_rV`;}bok
z*Oc9<>4G42#5iUfGpu}&Z2;cWe8b8EH7;TP)9w8fdL4*2a*qh;r@dfx*dwwTJTu2%
zqKC$IjFau}5ow^YkM{?1-!^^u0rqwQWDI>h#~fe4bE*H~)7=HXeH^tb^Z@Mc7iF8E
zkEP=o`PdT>63x(^G3jRdr;s1UgY~rL#%-<ZI_l?poYL<sbsMc6%V)9-wGVP4@};^W
zU)jI){ewEc%smmNovDupM0db=V3Pr9f1w;&BhNy7OB>tbN?8D}Xm5Ww@e1wnI?z9)
zdneNvPJIULpGWHy2wh{qPv|%Q7{z_6ZC>7ay`;~BX%C*+Pl?|az0lA$_L{Ouw3%`^
zQKkir+8!U+okwVo&lQl{FVKE*eUA@p#pQTL?I!I+@Lu5A7>o(mkeplef9Sp|+X8$E
zWiQJ2&@HAvDwzKG3v{xj9ksgZaz%Q3Gjy(nFCp5J#v`fQ&`0FHUW|hWcwNfmxolI=
zCNK}Vm--0$GmW2wX)L`3azpYc?J|`8Cg@bU9DGI`(&<guPhc6_g3wNSa1QdKzIA$>
zsKq>9`pc>PNY-VWqZ}7#*@L*0?+s>r#35{{bA~o=M?2|z0Mk(ql3Q~R;8c=b#t2)k
zlNfaHGx8<SoPVMo4>F!a7<IP9gKipw=h9ZC@g4ff%v<gOL|%jOULQx$?s6|`d+q;|
zk8SPGX{f#Z>GXDn9+mbH**mBQ*(!34A2hkZ!zN8X(!OTdkMU0TMZ{3sB7fPo;PDjD
zP4Yr*Woge%)^UhvGO#yh2+~_*Hb^{@r)h|%?`I0|88o!@Yg5+EaZ~jvt?%jW4jz!5
zD8~?%b&^dQA13r8KJ<IPPTL>zeX@7(o@~l~cpnS-75zlZ0n8O8e^9SP=E2`X?2%d0
zhw9ws@3r@3+1H_$19U+U-W)=?7XIn&tI0I!x}7(Pqr2vaqc!kDnezl{|M&2HiQa~P
zrSDBpM@#$J_)EN{JYPp%uubJ&14;v%So-nd6P0Tsw5N%9CC6Ft#?t?S)aT!7eIo=<
zzSP4!`6}`g=qr|X4#K-Fh)3&5jrh)>qqx+k&_-d|!qm<mLbrT^OX{f%*uLmDx<CH!
z=DWhs{`kLe9P0ZgHYpp8zEx=(_q9HOGDvPdPk(&WxwbxR`r~8pjOgu(KmIK0`|sk9
zpB_vWHd*~m)aS_`Pxhi58=*T+z%E__Ead{2>^cu&@XYkH(|2Bf!x%%~dP$$Vb5wJ#
zo(A%@j4<@a-!Q(I>HP7bpWS+VjGwcBDK5>$PrxU$3--6Z&y3PW`F-_mek{wkSg1?D
z+=0eD<Uide&&$a!L|HDIuf7O$kR5v$!eHYgehb2)TUP<MjgOkVH$dLO%g*j2rastF
zUmt3Z!+p!X->x6piS`HkZ3T`;^6!7PDR3MoT|#|WAD?O5H1lhZJ~`IS3~#Ip7!Sjw
zudBZO*^+OX-C(P`NqfNjNw+U}3^si-<i{Yt0m%T#p1JR<Mzb+2zWoB_+y6G?&$KUS
zjHL9T_TbKp>RsRddrq6o1+h73t@9(qt7F@M?2Jo57ijnI%O&|R;5Ty^HOH=kPlxIh
z%GW*sX)X4Mtu07KyCNTE8y4I9h)ws=+(3Sh2wAuL+V4i&&=^U+_9=7^9Z2=<(AN$d
zkM@~$*4KU;`1-rE?fcqqA$<j#p8N$QZ%z#x@U?#f^_B93_vD}XZ?f;fL$c#dU;BQ<
z(S7ab5r^tYb$grALtjg~ANucSz|?+Jp5NC_^&2Nz?gPE@U4y7Sk9h6*+Rt%5GCuVB
zSft%Sb?s_<Hw4ZLg85eBQEq^5C4^j?ec803{eBGQAKAg<Wj#CaV+5%a>C!e@y60;*
zzpth-HyockjlYl?x$o>P$crf-P4atJO%G-$+l=~LXZ`Rm)3>Y`Upw-{&m&o*c`4Zg
z7<aqshu@36m*g+#DL?#9k`vV1qKmpACm!jy?m$k0$grG$q2GRk{qRd@UJAMl!|I2>
zmHp$?w#FDyyB=8TJlxA?GevC`$JtLjr}jtOS5Qy0{cqRu(Ead#N7*tC%DtQ8JcF>K
zIUbeq0>y`}u-FC}R(}lDk@^vR!=wA*|L`H}Hk7-wP|Y`fjAx{qNN-TR1o)uOv4i>H
z*FzSA=@nOq=i&O+$riHKi~GyR#tZISPJGm!Efv-n*3fxh_=C`)ay%e=8G4w;9rA?}
zf9<~T8F)s%@W}{?r|_Z1tTWR;g|x6!x5LL_@;jI>+^&0cpSWq)(_G<yvR|C$F_w0;
z>1)}KFvfZ9@jB24G=7$VpR(^jR?ERllG*>se(@R7eo*tDJ-iMo_}kZf^gYrmg7=#f
zU!?61+wSXlPX0KOA^o`ln{O4;eV+dD<}m%^)E4GAxC1)T)Pq;sbz6vb?v&3Q_Wje;
zm!K1+&m5S>z2%T8-DmzM-Di$C<jW4%XTAV=1@oDkb8ynbF6c|sXHIi#86SKI@R@J2
z`UG^J`K_o2@k;v4F~%<>xn@j#eF<U6hUp8p`vpj^%+k|9_v8`=9e%$6`DHaf!gRc+
zcPF6NPQqS%3Vr~yO!B3het>wyYu%{(0hCT|PZ!_zcbjijvwQsMbQ!=Lf%>+zTTt)z
z{ORwSaz(UD9SfNQkL>>RMWCC;G#U#nHiu0YkxfGWbV}cz&Z2lEYw3s=)_lDo(0(Rb
z$o5xbm2MYm<F}kIh3{8?lI$1c>G!Kc#zjBT@?V-?{buGxD8D+*xAZaJKDL|sI$(?q
z!pqY(zdH68KR?I;yTUR0ZF%2EyYH{vU##5^(e5wP?yu19M``z0Yxmb^_mi~yWbHmx
zyPvMzU$5QYsNK)f?q_TF^R)W~+WjK!eyMhUyLMl$-B)S%>$Lm3wflRu`v<i92eteE
z(C)vY-G58F|4;3Hmv;ZOcK?ia|D1OJf_DES?f$3g{=yRl#v)NEL}mVwXmQYp6Qap*
z;S{dzj_mW%;*?|1BYi}(W5A9+VvBPG?(3Zc_Vp3_oQ(#+j%XJm)kjl;=cBQo;COVM
z@kn3ed>`~pbY=B-;fc$Q!2*`njF`ouG-|71RH7L5`oa?<jTliX<a4pZa2+y?-3Dfb
zoch8O0}a;<*K#8>QHc9EzYE3y5i3M&lv~8Pg^+PpMwJ>xX>D;-x?Cd@RGI0X_=1r<
z!u6KJh~Mov<uJ}VTxXm{jgy{iae}11&Zxsq@w^kozvGO;yW>u4J;oTr1pPAc5r_ME
zr+CTf+7fLXbRGh&Os=A3ge%{O%ht+89THr5MoNnIY@~51{hDn2h$2BWR=Tzr#s+_J
z;n#_1%7>K_b&psK9&IsPb%wDOOx0+*%D7TsR}!w+gNE2kt$f6AH5tY`)_6g9O?;HR
z)%j>Xv+>)FI%ezfK7ZPL1tpUTD=S5&w=gy~DK<GVIawqnPZ0(96RYqi3Q9{W3JZJ{
zRWtIhpFW+DH@>>$Zm-7|TRtIneEIlT`hjY1QFUxt!G@BuwPmr@Ybz@&s(jv}Sc)g^
zC@C$C^?9p(lE)(;mVI!cbtMK@1cZeCqVQw9bej)(rO<&N33gQ8F9iMWHqMHazk&P-
zHki^MVftNR`pxSdlZ42Gqdgx@x)R|__=une;YwN@Mz{lqA?!xD0o`gJh1l4C7-78|
zzhZp15T`Qmd&CIiA%>X<&m&xnFmpcQBCOAYG$7n?vk-L%N5F78i15fFA<iH?umS{a
z6e4yF24sYTijfz>QwV1u+)@Hs5H2pqJ;LFr!w!T_BpV>UFAg~r2eyDEd-3cYA^H|a
z32`V|7*H7^zK`gW0;X-~D~x@8k#Aq&*x*7Q{e+_!lS*-sFzBQ083^+b9zb{;VIqE>
zIdgyzn-OkDcot#QK%~V7-P<n_&cl}o12&0>8YGP2gG5y9Ak-CYFkrBV8jb~8*AQWh
zz(TwmVg3*i?Zf>6+`olz<xpYxuz=u-5zgEgVQfJkIu#=vHCPbad#NxETq+!im!X_t
zxF04Q&tERYTbB#t?B&974}{?P5yCixFkz%{Y{x|Y;7GhfxOJ3pzBCH$Hd+|<qfxh5
zVZ_G@5jRE{X=8*~j<9l!aJ+;2^SGZeRzziv6^<ig(JpbqNJMvCe3dXtuR?vV7SV$s
zBg@Al4Z?E>UGZqkc;SejfcF!GW92pA&o$r=!UNX`N6|##$V0dEC7@0S2VE<SxN9MM
z*9zx0ga?2d5ssLQvM>=I4jFv}ayKFgI0-U{uo&TsQ&2z1;fpE4I1FKbDHUNV%0w6i
zxpbwW-Vpw%>EQizv>n1cgnO?8zplf;G6QW7dEIfnh<*w3>be1Kc>`n);VFd0ZsCZU
ziMq}dj>8Dg&P2X9f}b~n7r0)6OgCl-;~c_!GKH}<6EtKBrz=Z{>@3iTaQ7@>?3)Fd
zzDYP@vLRpDh==e+goh9wo{hTBM!U@s#+f;g)p?MId5{N$`3P6e7ml;&BlHK+$>+No
zLp`ZcQaz}K<Qy?-Aks2~NXwA)06m9D%h1JPKMwnI2vNr0pc;s<3?aTUyqH6Xu*9H2
zh7esDLQG`{F_j@iRE7{y8A3c|h_x~q4(AY}D)AK@UdbWE6@P<jAf_^en97jEl%B_O
zIEF)rB>o1~jN=fZDKSJ-hEQuVglNhzo<oSH#1KgtPUH~cC^5uOhLbpi7}8&j{6F$v
zgaZ){L^u%PK!gJk4n#N*;Xs4~5e`H+5aB?C0}&2HI1u4LgaZ){L^u%PK!gJk4n#N*
z;Xs4~5e`H+5aB?C0}&2HI1u4LgaZ){L^u%PK!gJk4n#N*;Xs4~5e`H+5aB?C0}&2H
zI1u4LgaZ){L^u%PK!gJk4n#N*;Xs4~5e`H+5aB?C1K>a`t~^|HJ_n9P5D5H#>id5M
z)wlNAy&I<i)cB*ybaX6+fsg2oT|%^;#kJB9tw#(T*5JS)6u4YY(K;R1R$RxN!Wb2W
z0}XJ^!@(JixNh$wjCXL&#=#kfaJgNg^%-2H{e<yqKhfHR(AQrWyZa+wT(d3`t*_(4
zVHw6x2MQzcV$oWR>+ct%yh}vuKXG8j*g>Lo9<CKQG@}$(9j;e#ox^q6U}4;dYx!W|
z*n(3CJf2(1*O!zRc@}u9*OqyKm(H8B&^N{7NlH%ic-DCfakPqORYA4aQ&>?}R#Bdl
zyDBLwi{AN)t18xeO1<T4e8nDbRaHe*);c|XDjm#H>P=sZw5tmWy}qmk^A;w0l1qxR
zEYVaMIVssQvuDm*=t=jV=#sNA$&;0pm4;JWJakTrEqgOvJ`Qf_LOLW#N=C&>3O&`n
zs*>_GIfe8nsj#@9%HyjlDDhS26lP^vbH)RDl;X)*nA#3)X(bial4+@HygsmjDPQPI
zN-C{bgAygXGLp0M<`ha^;7zj5$;>2w5^4%NRY^(SJ3S@kK9BFNN^e$43Fi?k>J)Fq
z>UQ%=Rpn}=$ZWdAnp6*{D)CWXWjamW%B;*}QP=6pNXl9;XW>*&w`sDs%M>L*6_6lr
zL0NDK%9>KS)|X|eL|DnDt}b0$U2JW;uo9+Ld&_-QcTuGmPO;RvGs(MD)za!Lz2TEn
zJRVfuLw%~C(C4l4RD17S>n$(zhF}$`2v3rY%zRINB0Af`X`Z|}=z4kqQ=o@ROIG2C
zC{IZ>G*o%DuK?Y`yRM*gZ2``AvT!e@umTOC$v}#?yr{HmlxVq3Eu>xuUE$YkN`JEc
zv+*(2R9m6cT~ZR%u6d4?g_|8GP4g%oRIM%ac?!UX@}l55gwmPG=)u+N3o1R}l&7$y
zs&H*7wBf3?t5<ugRNJ6Mt1C-OeEg7fG)Y;O&R3IAYRfKleO<+NfuvNBG|ftqUn5U}
z7OD1v{DPuYs6_#WJ`$UfA_!-dSB_~Gq46i2T3G5WsLHxsYx&aZ9IB#y$jAyR#fAAK
z!>N#AT@@gUaEG1tx<<NZ!JI&Dqcrd#pGHxldZ8~JU9haA96dvh)cHB~afhPiFADaw
zs5DQeDPK?h&r`Uez+(!jhmKdP^p#YUn{QTm*OZid3JZLN#pWZJ0;Qg{<=zdIFe$u6
z=3_{xHQwqfuQxD5*)Ei3&4vx0N^ezlMR@^)@Gj4~L_xzh#xP}=86FWW93C;Kte~V^
zEEA40*d`mig=>9YPqDWEtt=)iohPn^-G<}a=7{O4uZoqzkvw-v$p&;6RG&r~Q;5ZP
z`Z&eDxxy*7;vlI$XfW{$M|U*GX{SHykDXyCu%mr@NsYeU*Dku^k2trjYa)Hh`T5B4
z-<{5=x<$enwe42?cVsDAF-4q@3Kafu$<eI2?x>pYxP&95N*8CNdqd-X6J^>)izoU7
z9|P+8IHUG?z_GVhN{)Tjr$;!Z+veYEhXZ=N!+k^XLXY*N%Nh038W4Q20{=DNgR=;A
zOZ6dFfDQbpi*4PnxuT+)Y77w-b-0%PKPUfh`X1dMd6xb^`v>~}o#Xzg-@du|DRBu~
z`(`IBPAHCvO&E}pn43_X5R<UY*kbH34jbEznE03(DI@Y@+%X9WF}~pm@$q|&gt+*a
z{FIou7@YQ5G$1}DH!cn!Ha;%KjcZ0iLgt9r_>>(+LQG|1d|b*tBmS&2Zp0(eDZ`6O
z6JmzXm=T|lJ0p)WL^knA5*Je(H$2|;h>?(%lCZ^ynUU|tOC;QGP_AjX;#1<0VSGY%
zCGK|{XABgS8<UWk5WCmN%#C@;NC0u5D=sGgxG@7{7RAiiV#E(e^^Bsu#-O;E4e@EX
zkB?iNuZql%PteMGOO+KbtH6c%EJfwIwW8vXb4-(25V9Q}=SD$?R6%Yk2%7dB4$d28
z)QAB%NRF5uhXeF3#*LeCKI0<Bn;945U^$Ba3&v*{f5dn=4xFR+lW-6p<yXVFl<_vk
zb&OwRjH$Gwr;%|JzdytH1{~-|`NiTOJIa4E;~k9g`vLO(A;#GQ6i&o}cNBj&<K>Kp
z4OH(d8CL@nKj>UgivJY9r}G*K|B>Iv;+#gpLoZh8_c6Yn@nOcdF+Rume#Wk3mHrIl
zOvZz8AR*On1LIW2+ZgX;+{75afhOahXPknA3@QB#oF7T_J;XSV@hQglFpe6m;@2~F
zGfu?0la&5Z#@URIF!nLNc!)~BgYi7Z2hH@1E2pabzGuc~{0ZX&j8lfH{F)dqW_+IU
z7RGrv50mJtXS|v55ysmY4~<dz;S5a~e+A<sj2~nC4&xs&rt@DZ|KnzS#-A~6W_<aj
zD*ptW$4T*17;j))$an|i?=U{V_#celVoc|rQu{SCzJu|Abd~?Bj1w4t$~d2K@-UTt
z1LLKPA7T70#)ldIk?}dkBZsT>bgn4XX9+O%hXlsE`F%0t7Jk2tvHNlr{{Z8MfvG-G
zIG2?2YvA{>jQd}q-n$vkVqDC4D`Pr`m(m|+oQCsF36H;0r7vQf$C%FRrT6zT#+j%R
z?`Ayw3kv^%v77OL5u6|68H~3xzL#+x&Q+!SUts)*Tj8_7#IJhBMI%*yDL6}&;_qWz
z%=ovAYZ&*#!Jx!1I#-zD&t{yLsqp=baW1QTf0!|y(Ms`8Gp=Vm9tVud{1}%pj?Gf>
zf5aH)y2|v&7-Kpv@$guc{s7}$jL$Pp7^B|j;w)Gy|3$_t84rXHmdbyG@dU<nb}Yp&
zV0@VIgNz#)|DJKmO)CEQaVo!J#`iJa%J?0|`xs}&srbhk|Ag^b#_uzxGi-^Tfmf;c
zbe^-Mk1?HXOZaidIOkU4A26nKpXq%Q<1IM~kG@*v_afsNjL$G$#yEDiivKX<T*eKI
z-(vh8<0i%n#;g3}=BV^9Gv2_sZ@hZHk8u&>Q;at=9+a!nA7C8E_$=eajFTrQ`5rV^
z#lIbx+P9wZD~t~@Zee_uandy^e#|_TehK4v#@}STlJOrHZ(%%QqDsG)@nXgY7;j_T
z#Q4vQN6c5{jYv@G>3nW#uVTiHjDNs*K%RPk9GLoZ3gf>qUdgzb<I_3cl;73Ys{DuF
z%=t5(!T1Hnd5k||Oy`4B`dO1ydO9bZ@HWP-g$n<caRTE(lNEh##>;@IJ?>%rL&k>~
z|BK@vXMBC4O7F^7`F|Oh(&sXMmEUh*JTOVU-_AG(nD~=<i%S0xzu&^Rk?~uM$0V!x
zyBDeW%Ydo8%v%+%XS|&8tBkiW9yCS8-^+MD<F^>EXWYd2CyWOyR{2M#sPwUnrvelG
zKE~DjK5vPN|2pGG7!RDP((hwDmvJNG#~8bos`P(lT*`P_s!CtO_(8@mF@BbD<}#HY
zXMRimlrp|<n!=kIf01!L<Nb`^Vmvxc#V=Z}@^dpj#P~kOzT4FM*BEbR{4d7!j8oGU
zzg}Wo2~6#mvO=YQj`4EFXBlr}eEoD4pU%BUGVu)KJdeU3Fy6p8@j4ZM2jhnrpJV)c
z#@Q=X`otM3ekJ2AjJGrXnDI-Di@%`aA7T7B<8zFE%-B_+@)y^u_{$mJ!gw>|?=gO!
zao-zM{Ns%8U|h6H<#&+r2FA<WD*lU%KVs}ERPpbesorNZ{*<wg@vIxw`#Q$YF@BM8
z{|xoMiSY(tlK+>ARQ|{LeZE)W!b}z4$GCy<4#wlN)cXUBA7b3Z_!Q&wjPqux_%l|k
z{9k5V#CRgcWs;{YjCU~J!8j^gy+6qKZpO`wPcf$7Q$RLid5($?kA=j)Vcf(xeztl~
zzq>&3A7H$h@ma?8j91U${1_i&OuyGa=@;ax_w>6Bgby?J-L7!@T=jlC<2}H{|GkW3
zF>X=)n;1XMIOPtNe&l==KbtYRKBfL*+&539Kft&WnBLP*zYsm|^7~lEb8c4e4>SHD
zFqJ>3OvRtRK;h+#p9iM=63W&4%NDBlhZ#S{_!Q&leD&T{q2gCEPGo$LaW>;Ax2X7~
zjGqCf@@g2zEK=`_D^-5qV7!}gpIg=YBaEvUH#2Tz9DAoqU$|Js-^{p?@pi_`mZ<k{
zF@BD5QI$$R5_+5X^$6o_jCV73EK~0fF}|I#t6HT$&v-lI1<=cs-+9I_Gd}23@uxuV
z()+g<KhC(BaewGl;@5z+Dt;v}#ox^M4~+LR&i1JHXBhv2G5yX6l8L1&)%$wJuQ5Kt
zIIRHho%p?o4J!U@#`K#il>Rox^xG<gD;d*otPtMBn0{-8@Z*f>H&+PnVNAchLii=d
z^cyUM-(*a`#X|TbWBN@N!vADUzs*9pKlCcmOTW=Vcobv$tro(`jOjO92+v|nzuiK3
zDP#H#7sBO?>9<@6KfsuN(}nQkjOn*s2tUJ^e&dDkPZ`s1y%0Xen11tx@Lw6zZ@&=!
zlrjAV3}FYZOK?NKb3^zNiJ{M@C=B(9_cikOX6XHWjOq7e2*1ntaJIrED-{16<H?MR
z-Rk`<jOll9DE*HZADqVdF(x~l-cJC5l;6I|3Kub^-`k=0Pcv@1Md5*H8;W1a`lgO?
zEa&$wWBjIyOus0K;txVj3g0d<ehX&2!j-@%hu9?lY80lT<=+7te$R%Zqh)%3{Lwa?
zX2V~yVf@$^mG4h~c^@lYVZ&dy;Wun}U|(zc92>sZh99%x@7nOoHvCT;9^$f=H%{Uj
ze|^$z@8{dz-)h4vZ1L&0h;;ruY{R>3_y;!pnhn2g!)FxsxBom8HXm0Wt}o!a9@h=H
zZpO6$mmAkiTsPv%$3^nK2p4|ySS-f11lLkr%Wy5nm4Pc0mj~A@TsPs$#+8F>C9VQo
zvvJMAwF;LP*J@mAa24Y!!G+&K7I)w(#Z`{0BN`VXPFLx?4QaD*t-uv7&4q|tgsUUk
z%Mgd?r@vUN31h>Wxpb@TUNhHN|DV`ardFiwtF-|uU{w9!QM<Sctd|y^+vp5q`MWK0
zcNX2-a_LYoA{%h5dmOv7D&EHSf3LkVfxAXLazn}g{q3^;{YX8vwX{ncv?3clgYEnJ
z|97Kjz}CvnZHn{@b&u@M`+t0&TboYpa45qTV12i2hw<h9w@&15+uLg2xEpi}Ru=XM
z=gp!$qu9$EY}aT?sO>?hjrzdAx<M~w{ParfpT<Vd!aHd9capvnIya@PVx2c<flQl~
zHJ>&^rB-?AA$Eacv#Z>rj2)XPa)<qVkg6=rFpEynzE=n{3G>oZSXxlN274m0UDCf>
zQ<lw^U6=HAMiT8*O)p<tT3YGD=M5O)t@}{Bwof;Ny!6P{vJkD^BY9gTBUP6Svyb#h
zIX3>7QrGpo!x7c=UaO9+yPK>EImN;UF1q?j!nRuNgQC!1{2-m)(Zf!cX-(JO2TZmi
za+9WFZ!xxoxAhQ30e=hC)^iHi<>h-6=~-4Pr_tv7vdSuNb+xz1Q&~__m6I2mHTDik
zGJ&m|Mc&l~YfF9dGa8StU=2Fz{Cv-Rd;p>8fHrE-N#@c=RiGZby0i2TfwV};AtNPq
zj_72Y7U>MpxzkoxRIM+lDzfDvD=7E22P)u&$)*O@YM-1Oz-W)R)LX{f&MisI%3YP1
zm6c(qcAYFr%gI7gXzuq2abT79Wt)_XtgPIUB+!cQ=^&1wc&X7FkbzB#lVFyYSCr%H
zwUTuu1*M*n)$(h-phZ)W77-7-m$m!t9}iNg{A~cf9pq2xf)kNuenTKXRqB$mfCS4-
zter7jRg><ak7O|1R$95yp>9jUhcE@@<rO}BOyMc_uD58q+$s3VBBu-=g3QVHkfme&
z3@dEzm|s5TAOlEm8O#{YiS<(0jTW3sDdqU)vkKoMmX?&!_l~7mQ&6OZlUx-vAB3wE
zx-F*+_2QaM_28pAQ#Ub5TC~DaFrdsva)1!)yS~ZxG^^Gr)r4=}@EMQ!y%%Q@Qd0yK
zMt^5iLxF`0$&e@h&dkm-*?$Ar7FbKar0Y_i8=^B=jk8=*(_DO8HfBE7FGn#Vv?aE^
z(!V#QPh4`7@$~@bTY;}?3g}y3$a2nXYT~?gnU(qau+zE0!rCHAFZ5i4bdHv(t4pe?
zebyEYa}==%gDK)2eZw~eeDx~_$fd7j&Dx5!@&iR5zDvTlba`{u(brk>pVaW-W}Q-0
z;4A2)u1bS?D=5V0QWg&6CV>UM3Qs{%5!o!JO#}1B)?>p~GWK%Jp4?q!+>OTWNE0Xv
z&2KYUTH7{p3-xsmI#C9FcuF5#=2sPzSK~`pd<%z;xB$~evzJnTOA1%<r_(2iGQ%90
zK(n);``6Ko8EunUX)^{p9H~Ok*I8peL^d!Z?9~j=eR_40P#x5!M{8nQGHiT=D{lNn
zr)8=UZVM$P7J2=uST+qB%F@>1QbEF9gd+^}Z+7#n-iom0W}3{X;x3XJO=j9NA<ZON
zwlV`6(NbfItk*U-35FDpBpPG#)|FXeB~x81P+gOCo??7}Tk4g^JLD+ASXX8nc5-PI
z%05aJ!Q%IMb7tqEnpyUNs>3efpD2c_b*Yj33SE`mNgb?vg#3CjoYMpNYyQG@t1Q}^
zZ2iiDBKZL`KGw!p_Q5|SmM2&wrSk~|_>$XG0=E*4^f~5t_4G|LG}Xd1`bU!;zdJSy
zk2T>5o=tEsAE`fN72p!2DV_5AF&F%W;k<)_HPu$OrYjENIE@gsOIck~T53(x87fm(
z7nGFRjtS_n%rtyIj&lZlUj7)IE7CUMRz3o%{Z#(}BRC!c%nwlul40EnXyeKHk|Lk&
z42UrEoLUW;C?{jERBysAx(~DX6b@5#yOfP?yC_|4uV@dD3ZIkyfqgLBfm)_Z6@YnA
z<*m%Zq$f*xRD(JV7S4o-2YPLKjS}hS=nN(8nc>}HjNgt5&!5pA9oqPVLwEdhv+y#u
zB=$Qx*gOcwNkZrR*wWd}<vEB0E5>dscS)|u!Y1!yWd#oP!R)}I47%1L7>{uxN@+#;
znx4pwq>Bio{Cbi~G8-u0>f(y3P}RrKYxk@6L<hFa30bYRCy-rh2G$t<juB*nk(9#C
zZo0~#va^yB{SPZ(U|Scj(s{~Ou{wvPZ~=x@UCRd@??VjKFyO-)s5=}Nr&bkVh_i}Y
zn}QGWvf~^v{CUeQXD8*hohL?)WdAxdIxIQc<S{6G@{YyK@JE~)n5Ume`^ava0ft<d
z>|g@YVY(0&1g}n7CMZFALte-g(12w>YUprR8G?;h?G$DAws)qeko``T=kGloyYxfm
z1ah=dfjQgtVMS$N8zRE<zmqfs4ln*WODJIpIcEu#AoQFi#5_4vMx8FX2i#wwXwgkb
z?lm29&hC~{)whr5ED}M6bo)#`6m9l$Oj|kJ^r6-wI@b|9HLltESFq73T<W{zO(9mI
zFIapFVR=(RwE@l3OZ5{ML4Hzz12a@=;&iutF<d9pu@asyMkX?owE1GBQ2IS}3Rwr6
z0HtQ(rQJ`{IZ^743tw@;uVA|)Spoeb%wbuL1?))mRFr%9NW7fcayt;@4vr(t(=@}W
zLtR2ECjM<jp6XJZfLdALE6$<)MaXY~CAZFT+tP7W6+@0@9rxRCYSLqFYZiIk&kMIT
zxD00x9CRUl*KG^%3+Ax?MSFx=Z^w2qwH>RI4Hm4L{yr71EQN6Lx9|Oye9^fA@3&v}
zFw%r}=2J?m$sz?TD})!m(?c|Ea<9UYX<3>5j7VD8ww&-N8;06MQ#Fbh`1s1oY%anM
z9q7Sjr4ynOhNR#;$^c2}M7{jN(b*9^#A>c?z+*kVU_&>y1~B{li{{~P4G1MtwjtA;
zi?E9xeqz-#{Uur_c%9R6))9q^3aRC+GRF#{y&;8}i*(v91fChfX{T*gP~E;0G67RR
zK-NRf#X|HJ+s-F)BL?#mhTDbO**p_|AGWoAr}+|Ma1JdN!Fgk!J%-?oHedu5j&?Vb
zf!zUGA*Aft$g~Kb+FS<3hq29SKWHkFdyTo$W!WFpN$yyCO$Vky2ZRSR#5yO(LF|-(
zYTL#<WgP`y_03H|U-hqd%kA2<OVG2vgpQRrPnPwpS&Jj2;T-kKGj{_Dgvm?`OJsmy
zKN3#8r4xWPJ0UlPjy1*>L8U(_o;h%_7V6MoB4y!U1GMk>>M%8ZMO9M14yV>)0|zWo
zZ0|M~;><NK)v&oqsd5{7$!hH7Q}t&JtQ=fcPoKG^*sNH`JK3$Lpogz0{3b@HRfJ_`
zXb!327i^zace;G)Z5_?(!Y3zGZqP?b)5v|lrV3jxr9eYob5_Z{s1{dbCq|tB?P{{G
z+g-M6?(6^uT+e(*5PbcE8@g*NDxnuFx|J;&B-DBZ=NqJDNHn@s3n*)Iy)lT^4LNpo
zw>-bUHP~Pguv%eXtk73qbG4-kwO;05%P=LmON<G&o1#k@nfw0b!j|^YPPeEdKX99r
zXt^p8F~^E7H<NxmzTE|FbJz838O)VnI~}^jeR9EQgLF`{sAp=>ozE?Lku1B}OO*|G
zeI{i0&+9tN{5?NF(Fd5iA&W=pnt2vBElD2wxgGlR&+s7Ip%|h`O;-v_Gz+tLAN1S~
zzo6(smF$}ddQ=7YEi>+(J(@WQzu)S$?0n<#t~V4;uhuBM)v1{tY~#T{DQLaH*mhR3
z!WJ;Sqsh*r{g{E>Z0?mh1dMI=Rkf~HMeVLhvowHxA*buPcg(0>DXUZVe2>)5VmGo3
zgjC%<F>(a68GA!VSZ0;1i#qb0h0W2n!N|Tgi65cDR~!1S=3WsL(z+e4V}@Ow_Vh&X
z`K;xe=nnUvhfRpaS97tGeo0TQ5rMUq<+E-L>}K|G3|TxL-OALqwmQ`+2SH|j-RMx7
zhU%364{gzvI<~cp-~9T#dUAkL?1gEPQh6cPg5^&+hFl8{Tex+;+ehQj>&8@gIP1ne
zLWX5DP;CCZ2ZJ89PD3n*%5SgIti7{0#s=NZohp}h(VChv3s~RP)-QtmUR%%&n%4fz
zQ>1oQ$<u?Za(o^<_b;0EM(=HB_1)UcT~U4h$)MhxQ)P3~3SIDJi_e9n=2oBHu-!Yc
z1EA}w2a8eu-^x|H1=yhPMT1RmMRdwJurI>wlfBg?(}Hx#fJK0wVY0s$vdHv`aT>-p
zhh8xm{aX-nu($^`OXnf*Dqn9_*Jg`zQQgus{+fibnWF20p>;m13CUFmIrLk{oUjQ9
z(}uLKwS}F3fPrOEFLHy1RlW4AMQ$n0?oeKcaknd*Il``d8*{VtjUUsjR!;Bm*46`9
zdG}6PA!Y+T?1Z<prQHi3qAfM!t=k=>-Hj^2Ot_GJ6*bZOMHl2rP55|IOIo{|TEfoV
zZ&S6&s>Ns0dp*;t={lI>^dR$s&6azwmO&<R-5QW|qY0+(9J&80xaHg34Lf1WP@Bdh
zms{KBo*S{b=f+^0d&16H?=U>0k~@O<=sX@Cd(^}aYxhvsE1(Va-P&o?bqe_VJJTRj
z9nZf#sXO_0yr+qmX?k^B@~=ttYAF`4wrSM%&OmElAnMh6h2v%Jooa?EnIy7{`?b0*
z%(~eTVCb05{W**pH)Uk--bx%YxvJEgBJ59+lLH@34S1B281QK7w17uxQvx5Q20Tg+
zjFlc3D=9Is$fV>z`je8=14>Am8YI8eq`-Kofi+D^4U#r32$PZmX-S?MB;M4(GLokT
z(wv+cSl?v)yq~?jrX;4~_b@*F^MC{RrSu`V<zL7UF?fdGC2@Qv9Qe^0hy0Bi{5q4s
zFHp(9ONBAJG1eglFzz6zG2a^nlW^{g7=G{lt?%`@C~@bo^<O&ild4OHoWAGPy?1|k
zf5R)kOC0v>@1{O`-`*D%SG;<9^?|709X)!7WB%gk3-`Qu&)r}8!^;n6e>vg1*S9?T
zkKZ1B^tpYHT=CHKudb+Yw+ww`$+;8vP5;wJ!|zO<^k`k5FTUYfv|{z5{OI(h2jlkL
zIdJ9mKfbpAuI#}Bca1Ck(>tr1=8EIH8}_IAzPa|Y+vk3B-NUD*JiPd4FT|$p9{u(e
z^>4oRpR3=SJLJR-Pxbx%(36X=x%P`!F8lR8%l~8O#J`Qb=~tILa8KM1el=<3v0?xH
z%*X$_B|80ECztH6$-nM}r01r6=b~@M{psmdTNY2hv#{yUZ~T1jt*^P7p8L(42cwd1
zH7@<-vo~iywBlz!8u$CWvCF>Fa&7JT=ARF_=JBQX9D3uv&stX9_RjE2AG`1D*gY?H
zRR8Y2|8B!&S8iMI#?c3-ef`@%-}=vAe)7PtKP&D3@y(AXj7wgVed#zO`<2@t%F6j=
zzcY7FKbZPZ^Dlq+%KEQ-8h^`YPyXhO(&LG~3A3)+xh>(ld;d7^mE7vz-m$J~^Pwdr
zbN}PigNsJa{qWI0+_3H5#ZSGk$M@Lmwf}4De=hN?8dUzKr|KEcPro%Gv;T-IHokuQ
z)mK0JjR%UR9D8QR=3o5y@+G^gw@qIif5VS<{i)?E&Zkoc-|_U`-~2lH%;Cus-q^8d
z+>U=$WDR+v?+;)9-cRnhqcZcwC(k51j{fDl53GLd%xjm$jENm}@a@-64Lde?WBvzU
z+x@HUcih-_X7ZsGU;o<Jx3gC~m;dn3MeSt?KfCDnuclsn_r`zxrsC}0l<)ug_kS7o
j`i3FjI#=?^;r$JJ>z<t8J}|x{dD7TX57+<kVBY@$z*Pz+


From 29f4b9439598876cbcdedc663902086d04d0079a Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Wed, 28 Dec 2022 22:35:13 +0800
Subject: [PATCH 09/31] remove chinese comment

Signed-off-by: AilinKid <3148019@qq.com>
---
 dbms/src/Columns/ColumnArray.cpp              |  1 -
 dbms/src/Columns/ColumnNullable.h             |  2 +-
 dbms/src/Columns/ColumnsCommon.cpp            |  2 +-
 dbms/src/Common/COWPtr.h                      |  1 -
 dbms/src/Common/HashTable/HashTable.h         |  4 ++--
 dbms/src/Core/Block.h                         |  5 ++--
 dbms/src/Core/ColumnWithTypeAndName.h         |  2 --
 dbms/src/Core/ColumnsWithTypeAndName.h        |  1 -
 dbms/src/DataStreams/SquashingTransform.cpp   |  2 +-
 dbms/src/Debug/MockExecutor/AstToPB.cpp       |  2 +-
 dbms/src/Debug/MockExecutor/ExpandBinder.cpp  |  2 +-
 dbms/src/Debug/MockExecutor/ExpandBinder.h    |  3 ++-
 dbms/src/Flash/Coprocessor/DAGContext.cpp     |  4 ++--
 .../Coprocessor/DAGExpressionAnalyzer.cpp     | 18 +++++---------
 .../DAGExpressionAnalyzerHelper.cpp           |  8 +++----
 dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp  |  5 ++--
 dbms/src/Flash/Coprocessor/DAGQueryBlock.h    |  4 ++--
 .../Coprocessor/DAGQueryBlockInterpreter.cpp  | 24 ++++++++-----------
 dbms/src/Flash/Coprocessor/DAGUtils.cpp       |  2 +-
 dbms/src/Flash/Coprocessor/InterpreterDAG.cpp |  2 +-
 .../Flash/Coprocessor/InterpreterUtils.cpp    |  2 +-
 .../Coprocessor/JoinInterpreterHelper.cpp     | 19 +++++++--------
 dbms/src/Flash/Mpp/MPPHandler.cpp             |  1 -
 dbms/src/Flash/Mpp/MPPTask.cpp                |  6 +----
 dbms/src/Flash/Mpp/MPPTaskScheduleEntry.h     |  1 -
 dbms/src/Flash/Mpp/MPPTunnelSet.cpp           |  4 +---
 dbms/src/Interpreters/ExpressionActions.cpp   | 22 +++++++----------
 dbms/src/Interpreters/Join.cpp                | 17 ++++++-------
 dbms/src/Interpreters/Join.h                  |  2 +-
 dbms/src/Interpreters/NullableUtils.cpp       |  2 --
 dbms/src/Interpreters/sortBlock.cpp           |  3 +--
 dbms/src/TestUtils/mockExecutor.cpp           |  2 +-
 32 files changed, 68 insertions(+), 107 deletions(-)

diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp
index 00a406402b9..8a5ced0b084 100644
--- a/dbms/src/Columns/ColumnArray.cpp
+++ b/dbms/src/Columns/ColumnArray.cpp
@@ -952,7 +952,6 @@ ColumnPtr ColumnArray::replicateNullable(const Offsets & replicate_offsets) cons
 
 ColumnPtr ColumnArray::replicateTuple(const Offsets & replicate_offsets) const
 {
-    // data 是一个父累指针
     const auto & tuple = static_cast<const ColumnTuple &>(*data);
 
     /// Make temporary arrays for each components of Tuple. In the same way as for Nullable.
diff --git a/dbms/src/Columns/ColumnNullable.h b/dbms/src/Columns/ColumnNullable.h
index d993d918509..2069f80b42e 100644
--- a/dbms/src/Columns/ColumnNullable.h
+++ b/dbms/src/Columns/ColumnNullable.h
@@ -31,7 +31,7 @@ using ConstNullMapPtr = const NullMap *;
 /// over a bitmap because columns are usually stored on disk as compressed
 /// files. In this regard, using a bitmap instead of a byte map would
 /// greatly complicate the implementation with little to no benefits.
-class ColumnNullable final : public COWPtrHelper<IColumn, ColumnNullable> // nullable 列是怎么形成的，一般是一个普通列，一个伴随 bitmap，这里使用的 byte map 来存的 null mapping 而不是 bits
+class ColumnNullable final : public COWPtrHelper<IColumn, ColumnNullable>
 {
 private:
     friend class COWPtrHelper<IColumn, ColumnNullable>;
diff --git a/dbms/src/Columns/ColumnsCommon.cpp b/dbms/src/Columns/ColumnsCommon.cpp
index 9307587ce6c..e969dc99842 100644
--- a/dbms/src/Columns/ColumnsCommon.cpp
+++ b/dbms/src/Columns/ColumnsCommon.cpp
@@ -291,7 +291,7 @@ void filterArraysImplGeneric(
 
     while (filt_pos < filt_end)
     {
-        if (*filt_pos)  // 如果是 0 的话，说名该列该行被 filter 了
+        if (*filt_pos)
             copy_array(offsets_pos);
 
         ++filt_pos;
diff --git a/dbms/src/Common/COWPtr.h b/dbms/src/Common/COWPtr.h
index b4d39620287..1f6bb8dacbb 100644
--- a/dbms/src/Common/COWPtr.h
+++ b/dbms/src/Common/COWPtr.h
@@ -105,7 +105,6 @@ class COWPtr : public boost::intrusive_ref_counter<Derived>
         T && operator*() const && { return const_cast<typename std::remove_const<T>::type &&>(*boost::intrusive_ptr<T>::get()); }
     };
 
-    // 这个地方，COWPtr 继承 counter 之后就自带了 ref count 和 add, release 函数。所以私有类实力化到 T 之后 = IntrusivePtr<T>，里面调用的 add, release 函数就有了，其都是操作 T 继承的 ref count 来操作的
 protected:
     template <typename T>
     class mutable_ptr : public IntrusivePtr<T> // NOLINT(readability-identifier-naming)
diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h
index 3bc3ab5e56c..2c857b9bc1b 100644
--- a/dbms/src/Common/HashTable/HashTable.h
+++ b/dbms/src/Common/HashTable/HashTable.h
@@ -449,7 +449,7 @@ class HashTable : private boost::noncopyable
     {
         while (!buf[place_value].isZero(*this) && !buf[place_value].keyEquals(x, hash_value, *this))
         {
-            place_value = grower.next(place_value);  // closed hash，线性开放地址寻址法
+            place_value = grower.next(place_value);
 #ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
             ++collisions;
 #endif
@@ -694,7 +694,7 @@ class HashTable : private boost::noncopyable
           * HashMap completely, change all its users to the existing internal
           * iteration interface, and redefine end() to return LookupResult for
           * compatibility with std find(). Unfortunately, now is not the time to
-          * do this.  // 隐式类型转换操作符
+          * do this.
           */
         operator Cell *() const { return nullptr; } // NOLINT(google-explicit-constructor)
     };
diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h
index 3463c47c1bc..a1af433bbc9 100644
--- a/dbms/src/Core/Block.h
+++ b/dbms/src/Core/Block.h
@@ -27,7 +27,7 @@
 
 namespace DB
 {
-/** Container for set of columns for bunch of rows in memory.  // 怎么区分这里 rows 的大小呢？
+/** Container for set of columns for bunch of rows in memory.
   * This is unit of data processing.
   * Also contains metadata - data types of columns and their names
   *  (either original names from a table, or generated names during temporary calculations).
@@ -38,8 +38,7 @@ class Context;
 
 class Block
 {
-private:
-    // 多列的一个数据
+private: 
     using Container = ColumnsWithTypeAndName;
     using IndexByName = std::map<String, size_t>;
 
diff --git a/dbms/src/Core/ColumnWithTypeAndName.h b/dbms/src/Core/ColumnWithTypeAndName.h
index 30c4fe8c546..42a98f795fd 100644
--- a/dbms/src/Core/ColumnWithTypeAndName.h
+++ b/dbms/src/Core/ColumnWithTypeAndName.h
@@ -32,8 +32,6 @@ class WriteBuffer;
 
 struct ColumnWithTypeAndName
 {
-    // column 继承子 intrusive 实现 share ptr 功能，同归继承类的两个实现，mutable ptr 和 immutable ptr 可以相互转化
-    // columnPtr 是一个基类指针
     ColumnPtr column;
     DataTypePtr type;
     String name;
diff --git a/dbms/src/Core/ColumnsWithTypeAndName.h b/dbms/src/Core/ColumnsWithTypeAndName.h
index e7741bbb71e..61c77cf161e 100644
--- a/dbms/src/Core/ColumnsWithTypeAndName.h
+++ b/dbms/src/Core/ColumnsWithTypeAndName.h
@@ -21,7 +21,6 @@
 
 namespace DB
 {
-// 这里是一个多列组合的数据
 using ColumnsWithTypeAndName = std::vector<ColumnWithTypeAndName>;
 
 }
diff --git a/dbms/src/DataStreams/SquashingTransform.cpp b/dbms/src/DataStreams/SquashingTransform.cpp
index 391cd710c8d..2425435d90e 100644
--- a/dbms/src/DataStreams/SquashingTransform.cpp
+++ b/dbms/src/DataStreams/SquashingTransform.cpp
@@ -60,7 +60,7 @@ SquashingTransform::Result SquashingTransform::add(Block && block)
         return Result(std::move(block));
     }
 
-    append(std::move(block));   // 攒批
+    append(std::move(block));
 
     accumulated_block_rows = accumulated_block.rows();
     accumulated_block_bytes = accumulated_block.bytes();
diff --git a/dbms/src/Debug/MockExecutor/AstToPB.cpp b/dbms/src/Debug/MockExecutor/AstToPB.cpp
index 8977d8dc279..fa58e2e3fc8 100644
--- a/dbms/src/Debug/MockExecutor/AstToPB.cpp
+++ b/dbms/src/Debug/MockExecutor/AstToPB.cpp
@@ -447,7 +447,7 @@ void identifierToPB(const DAGSchema & input, ASTIdentifier * id, tipb::Expr * ex
     *(expr->mutable_field_type()) = columnInfoToFieldType((*ft).second);
     expr->mutable_field_type()->set_collate(collator_id);
     WriteBufferFromOwnString ss;
-    encodeDAGInt64(ft - input.begin(), ss);   // 这个地方使用下面的 child input schema 的 offset,替换当前算子使用的 column ref
+    encodeDAGInt64(ft - input.begin(), ss);
     expr->set_val(ss.releaseStr());
 }
 
diff --git a/dbms/src/Debug/MockExecutor/ExpandBinder.cpp b/dbms/src/Debug/MockExecutor/ExpandBinder.cpp
index 9d07a0c58f4..0eb35b71c62 100644
--- a/dbms/src/Debug/MockExecutor/ExpandBinder.cpp
+++ b/dbms/src/Debug/MockExecutor/ExpandBinder.cpp
@@ -65,4 +65,4 @@ ExecutorBinderPtr compileRepeat(ExecutorBinderPtr input, size_t & executor_index
     expand->children.push_back(input);
     return expand;
 }
-} // namespace DB::mock
\ No newline at end of file
+} // namespace DB::mock
diff --git a/dbms/src/Debug/MockExecutor/ExpandBinder.h b/dbms/src/Debug/MockExecutor/ExpandBinder.h
index 752046a4d80..d1b4c7d980f 100644
--- a/dbms/src/Debug/MockExecutor/ExpandBinder.h
+++ b/dbms/src/Debug/MockExecutor/ExpandBinder.h
@@ -12,8 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <Debug/MockExecutor/ExecutorBinder.h>
+#pragma once
 
+#include <Debug/MockExecutor/ExecutorBinder.h>
 
 namespace DB::mock
 {
diff --git a/dbms/src/Flash/Coprocessor/DAGContext.cpp b/dbms/src/Flash/Coprocessor/DAGContext.cpp
index 66e64c11b64..b4a9f9ad515 100644
--- a/dbms/src/Flash/Coprocessor/DAGContext.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGContext.cpp
@@ -129,10 +129,10 @@ DAGContext::DAGContext(const tipb::DAGRequest & dag_request_, String log_identif
 
 void DAGContext::initOutputInfo()
 {
-    output_field_types = collectOutputFieldTypes(*dag_request);   //那么 field types 对应的就是一个 fragment DAG 的 output schema's field types.
+    output_field_types = collectOutputFieldTypes(*dag_request);
     output_offsets.clear();
     result_field_types.clear();
-    for (UInt32 i : dag_request->output_offsets())    // 这个地方应该是 fragment dag request 自带的 output offsets
+    for (UInt32 i : dag_request->output_offsets())
     {
         output_offsets.push_back(i);
         if (unlikely(i >= output_field_types.size()))
diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
index e9a10539378..18fd7c507d8 100644
--- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
@@ -983,7 +983,7 @@ std::pair<bool, Names> DAGExpressionAnalyzer::buildJoinKey(
     for (int i = 0; i < keys.size(); ++i)
     {
         const auto & key = keys.at(i);
-        bool has_actions = key.tp() != tipb::ExprType::ColumnRef; // join key 如果不是 column ref 说明是有前序动作帮我把表达式给准备成列
+        bool has_actions = key.tp() != tipb::ExprType::ColumnRef;
 
         String key_name = getActions(key, actions);
         DataTypePtr current_type = actions->getSampleBlock().getByName(key_name).type;
@@ -1048,7 +1048,6 @@ bool DAGExpressionAnalyzer::appendJoinKeyAndJoinFilters(
     ExpressionActionsPtr actions = chain.getLastActions();
 
     bool ret = false;
-    // build join keys，ck 只输出一个 key，需要 copy 一份，如果是表达式，还需要 append scalar 的 action
     std::tie(ret, key_names) = buildJoinKey(actions, keys, join_key_types, left, is_right_out_join);
 
     if (!filters.empty())
@@ -1057,7 +1056,7 @@ bool DAGExpressionAnalyzer::appendJoinKeyAndJoinFilters(
         std::vector<const tipb::Expr *> filter_vector;
         for (const auto & c : filters)
             filter_vector.push_back(&c);
-        filter_column_name = appendWhere(chain, filter_vector);  // 构建了 filter 输出的列
+        filter_column_name = appendWhere(chain, filter_vector);
     }
     /// remove useless columns to avoid duplicate columns
     /// as when compiling the key/filter expression, the origin
@@ -1077,18 +1076,18 @@ bool DAGExpressionAnalyzer::appendJoinKeyAndJoinFilters(
     if (ret)
     {
         std::unordered_set<String> needed_columns;
-        for (const auto & c : getCurrentInputColumns()) // 当前进来的列都要
+        for (const auto & c : getCurrentInputColumns())
             needed_columns.insert(c.name);
-        for (const auto & s : key_names)                // 当前怎加的 key col 也要
+        for (const auto & s : key_names)              
             needed_columns.insert(s);
-        if (!filter_column_name.empty())                // 当前添加的一侧 filter 的 col 也要
+        if (!filter_column_name.empty())
             needed_columns.insert(filter_column_name);
 
         const auto & names = actions->getSampleBlock().getNames();
         for (const auto & name : names)
         {
             if (needed_columns.find(name) == needed_columns.end())
-                actions->add(ExpressionAction::removeColumn(name)); // 增加后续的 action，裁剪掉不要一些 column 列 （这些 immediate 列的最后的结果已经被我 record 了）
+                actions->add(ExpressionAction::removeColumn(name));
         }
     }
     return ret;
@@ -1452,12 +1451,9 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, const Expressi
     if (isLiteralExpr(expr))
     {
         Field value = decodeLiteral(expr);
-        // 主要对 decimal
         DataTypePtr flash_type = applyVisitor(FieldToDataType(), value);
         DataTypePtr target_type = inferDataType4Literal(expr);
-        // 表达式的 uniuqe name
         ret = exprToString(expr, getCurrentInputColumns()) + "_" + target_type->getName();
-        // 表达式如果有这个名字，说明有这列
         if (!actions->getSampleBlock().has(ret))
         {
             ColumnWithTypeAndName column;
@@ -1478,12 +1474,10 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, const Expressi
     }
     else if (isColumnExpr(expr))
     {
-        // 如果是 column ref，直接从 stream input column 里面拿到 name
         ret = getColumnNameForColumnExpr(expr, getCurrentInputColumns());
     }
     else if (isScalarFunctionExpr(expr))
     {
-        // 根据 expr 构造 function 加入到 actions 里面
         ret = DAGExpressionAnalyzerHelper::buildFunction(this, expr, actions);
     }
     else
diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp
index bc805d615c0..7d7a502beb1 100644
--- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp
@@ -150,12 +150,12 @@ String DAGExpressionAnalyzerHelper::buildInFunction(
         DataTypePtr type = inferDataType4Literal(child);
         argument_types.push_back(type);
     }
-    //  find common type
+    // find common type
     DataTypePtr resolved_type = getLeastSupertype(argument_types);
     if (!removeNullable(resolved_type)->equals(*removeNullable(argument_types[0])))
     {
         // Need cast left argument
-        key_name = analyzer->appendCast(resolved_type, actions, key_name); // 对于孩子的输出来说，需要 cast
+        key_name = analyzer->appendCast(resolved_type, actions, key_name);
     }
     analyzer->makeExplicitSet(expr, sample_block, false, key_name);
     argument_names.push_back(key_name);
@@ -402,7 +402,6 @@ String DAGExpressionAnalyzerHelper::buildRegexpFunction(
     return analyzer->applyFunction(func_name, argument_names, actions, collator);
 }
 
-// case when 函数应该走这里
 String DAGExpressionAnalyzerHelper::buildDefaultFunction(
     DAGExpressionAnalyzer * analyzer,
     const tipb::Expr & expr,
@@ -412,9 +411,8 @@ String DAGExpressionAnalyzerHelper::buildDefaultFunction(
     Names argument_names;
     for (const auto & child : expr.children())
     {
-        // 函数参数如果还是函数的，这里需要递归生成多个 actions（深度优先）
         String name = analyzer->getActions(child, actions);
-        argument_names.push_back(name);   // 拿到孩子的函数输出之后，再将其作为参数
+        argument_names.push_back(name);
     }
     return analyzer->applyFunction(func_name, argument_names, actions, getCollatorFromExpr(expr));
 }
diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp
index 2f5a28347cd..a2a8f6b90f4 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlock.cpp
@@ -101,7 +101,7 @@ DAGQueryBlock::DAGQueryBlock(const tipb::Executor & root_, QueryBlockIDGenerator
             GET_METRIC(tiflash_coprocessor_executor_count, type_expand).Increment();
             assignOrThrowException(&expand, current, EXPAND_NAME);
             expand_name = current->executor_id();
-            current = &current->expand().child();         // 非叶节点，继续孩子递归下去
+            current = &current->expand().child();
             break;
         case tipb::ExecType::TypeStreamAgg:
             RUNTIME_CHECK_MSG(current->aggregation().group_by_size() == 0, STREAM_AGG_ERROR);
@@ -141,7 +141,6 @@ DAGQueryBlock::DAGQueryBlock(const tipb::Executor & root_, QueryBlockIDGenerator
 
     assignOrThrowException(&source, current, SOURCE_NAME);
     source_name = current->executor_id();
-    // source 节点，
     if (current->tp() == tipb::ExecType::TypeJoin)
     {
         if (source->join().children_size() != 2)
@@ -157,7 +156,7 @@ DAGQueryBlock::DAGQueryBlock(const tipb::Executor & root_, QueryBlockIDGenerator
     else if (current->tp() == tipb::ExecType::TypeProjection)
     {
         GET_METRIC(tiflash_coprocessor_executor_count, type_projection).Increment();
-        children.push_back(std::make_shared<DAGQueryBlock>(source->projection().child(), id_generator)); // 将之后的算子重新算作 children
+        children.push_back(std::make_shared<DAGQueryBlock>(source->projection().child(), id_generator));
     }
     else if (current->tp() == tipb::ExecType::TypeTableScan)
     {
diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlock.h b/dbms/src/Flash/Coprocessor/DAGQueryBlock.h
index 86cd14c09df..91dc6c2f439 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlock.h
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlock.h
@@ -60,14 +60,14 @@ class DAGQueryBlock
     String having_name;
     const tipb::Executor * limit_or_topn = nullptr;
     String limit_or_topn_name;
-    const tipb::Executor * expand = nullptr;          // expand node can only be before sender
+    const tipb::Executor * expand = nullptr;
     String expand_name;
     const tipb::Executor * exchange_sender = nullptr;
     String exchange_sender_name;
     UInt32 id;
     const tipb::Executor * root;
     String qb_column_prefix;
-    std::vector<std::shared_ptr<DAGQueryBlock>> children;   // 这里的 children 是每个 dag 算子构造好之后传入的吗
+    std::vector<std::shared_ptr<DAGQueryBlock>> children;
     bool can_restore_pipeline_concurrency = true;
 
     bool isRootQueryBlock() const { return id == 1; };
diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
index 9889536c48c..4678b854012 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
@@ -248,7 +248,6 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline &
     bool is_tiflash_right_join = tiflash_join.isTiFlashRightJoin();
 
     // prepare probe side
-    // 准备 join 的 probe 端，主要是 append join key 和 filter expr 的 action 的加入
     auto [probe_side_prepare_actions, probe_key_names, probe_filter_column_name] = JoinInterpreterHelper::prepareJoin(
         context,
         probe_pipeline.firstStream()->getHeader(),
@@ -257,10 +256,9 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline &
         true,
         is_tiflash_right_join,
         tiflash_join.getProbeConditions());
-    RUNTIME_ASSERT(probe_side_prepare_actions, log, "probe_side_prepare_actions cannot be nullptr"); // 有 init 之后至少都有一个
+    RUNTIME_ASSERT(probe_side_prepare_actions, log, "probe_side_prepare_actions cannot be nullptr");
 
     // prepare build side
-    // 这里的调用函数是同上的一个入口，所以做的事情是如出一辙的
     auto [build_side_prepare_actions, build_key_names, build_filter_column_name] = JoinInterpreterHelper::prepareJoin(
         context,
         build_pipeline.firstStream()->getHeader(),
@@ -271,12 +269,11 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline &
         tiflash_join.getBuildConditions());
     RUNTIME_ASSERT(build_side_prepare_actions, log, "build_side_prepare_actions cannot be nullptr");
 
-    // 对 other condition 和 other eq condition 做了一些 where 的 col append
     auto [other_condition_expr, other_filter_column_name, other_eq_filter_from_in_column_name]
         = tiflash_join.genJoinOtherConditionAction(context, left_input_header, right_input_header, probe_side_prepare_actions);
 
     const Settings & settings = context.getSettingsRef();
-    size_t max_block_size_for_cross_join = settings.max_block_size;   // 如果 repeat 的结果数量超过 max 控制怎么办？
+    size_t max_block_size_for_cross_join = settings.max_block_size; 
     fiu_do_on(FailPoints::minimum_block_size_for_cross_join, { max_block_size_for_cross_join = 1; });
 
     JoinPtr join_ptr = std::make_shared<Join>(   // make join
@@ -485,7 +482,7 @@ void DAGQueryBlockInterpreter::recordProfileStreams(DAGPipeline & pipeline, cons
 
 void DAGQueryBlockInterpreter::handleExchangeReceiver(DAGPipeline & pipeline)
 {
-    auto exchange_receiver = dagContext().getMPPExchangeReceiver(query_block.source_name);   // 从注册的 exchanger 中拿到 source 源
+    auto exchange_receiver = dagContext().getMPPExchangeReceiver(query_block.source_name);
     if (unlikely(exchange_receiver == nullptr))
         throw Exception("Can not find exchange receiver for " + query_block.source_name, ErrorCodes::LOGICAL_ERROR);
     // todo choose a more reasonable stream number
@@ -508,14 +505,14 @@ void DAGQueryBlockInterpreter::handleExchangeReceiver(DAGPipeline & pipeline)
                                                                                    /*stream_id=*/enable_fine_grained_shuffle ? i : 0);
         exchange_receiver_io_input_streams.push_back(stream);
         stream->setExtraInfo(extra_info);
-        pipeline.streams.push_back(stream);                      // 每个 pipeline 底层的输入流
+        pipeline.streams.push_back(stream);                     
     }
     NamesAndTypes source_columns;
     for (const auto & col : pipeline.firstStream()->getHeader())
     {
         source_columns.emplace_back(col.name, col.type);
     }
-    analyzer = std::make_unique<DAGExpressionAnalyzer>(std::move(source_columns), context);   // 这里初始化了 analyzer
+    analyzer = std::make_unique<DAGExpressionAnalyzer>(std::move(source_columns), context); 
 }
 
 // for tests, we need to mock ExchangeReceiver blockInputStream as the source stream.
@@ -531,7 +528,7 @@ void DAGQueryBlockInterpreter::handleProjection(DAGPipeline & pipeline, const ti
 {
     NamesAndTypes input_columns;
     pipeline.streams = input_streams_vec[0];
-    for (auto const & p : pipeline.firstStream()->getHeader().getNamesAndTypesList()) // 初始的的 block column name
+    for (auto const & p : pipeline.firstStream()->getHeader().getNamesAndTypesList())
         input_columns.emplace_back(p.name, p.type);
     DAGExpressionAnalyzer dag_analyzer(std::move(input_columns), context);
     ExpressionActionsChain chain;
@@ -541,12 +538,12 @@ void DAGQueryBlockInterpreter::handleProjection(DAGPipeline & pipeline, const ti
     UniqueNameGenerator unique_name_generator;
     for (const auto & expr : projection.exprs())
     {
-        auto expr_name = dag_analyzer.getActions(expr, last_step.actions);     // 添加 expr 产生的额外列
-        last_step.required_output.emplace_back(expr_name);                     // 加到这个 step 的最后输出列里面
+        auto expr_name = dag_analyzer.getActions(expr, last_step.actions);    
+        last_step.required_output.emplace_back(expr_name);                    
         const auto & col = last_step.actions->getSampleBlock().getByName(expr_name);
         String alias = unique_name_generator.toUniqueName(col.name);
         output_columns.emplace_back(alias, col.type);
-        project_cols.emplace_back(col.name, alias);                    // 我只要保证当前 projection 输出列中不含有重复的列名就行了
+        project_cols.emplace_back(col.name, alias);                   
     }
     executeExpression(pipeline, chain.getLastActions(), log, "before projection");
     executeProject(pipeline, project_cols, "projection");
@@ -595,7 +592,7 @@ void DAGQueryBlockInterpreter::handleWindowOrder(DAGPipeline & pipeline, const t
 //    like final_project.emplace_back(col.name, query_block.qb_column_prefix + col.name);
 void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline)
 {
-    if (query_block.source->tp() == tipb::ExecType::TypeJoin)   // 看底层的 source 算子来源是什么
+    if (query_block.source->tp() == tipb::ExecType::TypeJoin)
     {
         SubqueryForSet right_query;
         handleJoin(query_block.source->join(), pipeline, right_query, query_block.source->fine_grained_shuffle_stream_count());
@@ -646,7 +643,6 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline)
             Errors::Coprocessor::BadRequest);
     }
 
-    // analyzer 是这里用的, analyzer 先拿到最基础的 source column base，然后在来分析 query block 非叶节点的上层各个算子
     auto res = analyzeExpressions(
         context,
         *analyzer,
diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp
index c74b52ed77d..83563c47338 100755
--- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp
@@ -1131,7 +1131,7 @@ Field decodeLiteral(const tipb::Expr & expr)
     }
 }
 
-String getColumnNameForColumnExpr(const tipb::Expr & expr, const std::vector<NameAndTypePair> & input_col) // 这个是将 expr 中的 index 顺序转成 vector 向量中的 column name
+String getColumnNameForColumnExpr(const tipb::Expr & expr, const std::vector<NameAndTypePair> & input_col)
 {
     auto column_index = decodeDAGInt64(expr.val());
     if (column_index < 0 || column_index >= static_cast<Int64>(input_col.size()))
diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp
index 113602a1d82..0869c2c653f 100644
--- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp
+++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp
@@ -55,7 +55,7 @@ BlockInputStreams InterpreterDAG::executeQueryBlock(DAGQueryBlock & query_block)
     }
     DAGQueryBlockInterpreter query_block_interpreter(
         context,
-        input_streams_vec,   // 底层 DAG 的输入源
+        input_streams_vec,  
         query_block,
         max_streams);
     return query_block_interpreter.execute();
diff --git a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp
index 1e2b102d0c6..b031007c3c7 100644
--- a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp
+++ b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp
@@ -89,7 +89,7 @@ void executeExpression(
 {
     if (expr_actions && !expr_actions->getActions().empty())
     {
-        pipeline.transform([&](auto & stream) {   // 数据流变了，用 ExpressionBlockInputStream 包了一下
+        pipeline.transform([&](auto & stream) { 
             stream = std::make_shared<ExpressionBlockInputStream>(stream, expr_actions, log->identifier());
             stream->setExtraInfo(extra_info);
         });
diff --git a/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp b/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp
index 6c876078d10..386c8158328 100644
--- a/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp
+++ b/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp
@@ -179,7 +179,7 @@ std::tuple<ExpressionActionsPtr, String, String> doGenJoinOtherConditionAction(
     if (join.other_conditions_size() == 0 && join.other_eq_conditions_from_in_size() == 0)
         return {nullptr, "", ""};
 
-    DAGExpressionAnalyzer dag_analyzer(source_columns, context);  // 新开了一个 dag analyzer
+    DAGExpressionAnalyzer dag_analyzer(source_columns, context);
     ExpressionActionsChain chain;
 
     String filter_column_for_other_condition;
@@ -190,7 +190,7 @@ std::tuple<ExpressionActionsPtr, String, String> doGenJoinOtherConditionAction(
         {
             condition_vector.push_back(&c);
         }
-        filter_column_for_other_condition = dag_analyzer.appendWhere(chain, condition_vector); // other filter 不会对已经有点 schema 造成影响
+        filter_column_for_other_condition = dag_analyzer.appendWhere(chain, condition_vector);
     }
 
     String filter_column_for_other_eq_condition;
@@ -201,7 +201,7 @@ std::tuple<ExpressionActionsPtr, String, String> doGenJoinOtherConditionAction(
         {
             condition_vector.push_back(&c);
         }
-        filter_column_for_other_eq_condition = dag_analyzer.appendWhere(chain, condition_vector);  // other eq filter 不会对已经有点 schema 造成影响
+        filter_column_for_other_eq_condition = dag_analyzer.appendWhere(chain, condition_vector);
     }
 
     return {chain.getLastActions(), std::move(filter_column_for_other_condition), std::move(filter_column_for_other_eq_condition)};
@@ -230,7 +230,7 @@ String TiFlashJoin::genMatchHelperName(const Block & header1, const Block & head
     {
         match_helper_name = fmt::format("{}{}", Join::match_helper_prefix, ++i);
     }
-    return match_helper_name; //一个 unique name
+    return match_helper_name;
 }
 
 NamesAndTypes TiFlashJoin::genColumnsForOtherJoinFilter(
@@ -248,8 +248,7 @@ NamesAndTypes TiFlashJoin::genColumnsForOtherJoinFilter(
         }
         return true;
     };
-    // assert 一下 probe side original block 都能在 probe actions 中找到
-    if (unlikely(!is_prepare_actions_valid(build_side_index == 1 ? left_input_header : right_input_header, probe_prepare_join_actions))) // 传参数也能三元运算吗
+    if (unlikely(!is_prepare_actions_valid(build_side_index == 1 ? left_input_header : right_input_header, probe_prepare_join_actions)))
     {
         throw TiFlashException("probe_prepare_join_actions isn't valid", Errors::Coprocessor::Internal);
     }
@@ -296,9 +295,9 @@ NamesAndTypes TiFlashJoin::genColumnsForOtherJoinFilter(
     bool make_nullable = build_side_index == 1
         ? join.join_type() == tipb::JoinType::TypeRightOuterJoin
         : join.join_type() == tipb::JoinType::TypeLeftOuterJoin;
-    append_new_columns(probe_prepare_join_actions->getSampleBlock(), make_nullable);  // probe side 产生的新 column 需要 append
+    append_new_columns(probe_prepare_join_actions->getSampleBlock(), make_nullable); 
 
-    return columns_for_other_join_filter; // 需要根据 probe 侧函数的 1-0 来顺势填 null，但是如果势 build 侧的函数 1-0 直接会被过滤护着忽略
+    return columns_for_other_join_filter;
 }
 
 NamesAndTypes TiFlashJoin::genJoinOutputColumns(
@@ -335,14 +334,13 @@ std::tuple<ExpressionActionsPtr, String, String> TiFlashJoin::genJoinOtherCondit
     const Block & right_input_header,
     const ExpressionActionsPtr & probe_side_prepare_join) const
 {
-    // append 左右的 original col 和 probe side 生成的 col
     auto columns_for_other_join_filter
         = genColumnsForOtherJoinFilter(
             left_input_header,
             right_input_header,
             probe_side_prepare_join);
 
-    return doGenJoinOtherConditionAction(context, join, columns_for_other_join_filter); // 再根据 other condition 生成新 action （列）
+    return doGenJoinOtherConditionAction(context, join, columns_for_other_join_filter);
 }
 
 std::tuple<ExpressionActionsPtr, Names, String> prepareJoin(
@@ -361,7 +359,6 @@ std::tuple<ExpressionActionsPtr, Names, String> prepareJoin(
     ExpressionActionsChain chain;
     Names key_names;
     String filter_column_name;
-    // 名副其实，append join key and 一侧的 join filter
     dag_analyzer.appendJoinKeyAndJoinFilters(chain, keys, join_key_types, key_names, left, is_right_out_join, filters, filter_column_name);
     return {chain.getLastActions(), std::move(key_names), std::move(filter_column_name)};
 }
diff --git a/dbms/src/Flash/Mpp/MPPHandler.cpp b/dbms/src/Flash/Mpp/MPPHandler.cpp
index 14f1d6e5a05..753653ac7b0 100644
--- a/dbms/src/Flash/Mpp/MPPHandler.cpp
+++ b/dbms/src/Flash/Mpp/MPPHandler.cpp
@@ -82,7 +82,6 @@ grpc::Status MPPHandler::execute(const ContextPtr & context, mpp::DispatchTaskRe
     {
         Stopwatch stopwatch;
         task = MPPTask::newTask(task_request.meta(), context);
-
         task->prepare(task_request);
 
         addRetryRegion(context, response);
diff --git a/dbms/src/Flash/Mpp/MPPTask.cpp b/dbms/src/Flash/Mpp/MPPTask.cpp
index c2d5b4ccc94..655e6c724cb 100644
--- a/dbms/src/Flash/Mpp/MPPTask.cpp
+++ b/dbms/src/Flash/Mpp/MPPTask.cpp
@@ -149,7 +149,6 @@ void MPPTask::finishWrite()
 
 void MPPTask::run()
 {
-    // 用线程池 schedule 任务并 detach
     newThreadManager()->scheduleThenDetach(true, "MPPTask", [self = shared_from_this()] { self->runImpl(); });
 }
 
@@ -214,7 +213,6 @@ void MPPTask::initExchangeReceivers()
             if (status != RUNNING)
                 throw Exception("exchange receiver map can not be initialized, because the task is not in running state");
 
-            // 因为是 push mode，收到 data 之后我再动
             receiver_set_local->addExchangeReceiver(executor_id, exchange_receiver);
         }
         return true;
@@ -341,8 +339,7 @@ void MPPTask::prepare(const mpp::DispatchTaskRequest & task_request)
 
 void MPPTask::preprocess()
 {
-    auto start_time = Clock::now();
-    // 注册一些 receiver
+    auto start_time = Clock::now();  
     initExchangeReceivers();
     LOG_DEBUG(log, "init exchange receiver done");
     query_executor_holder.set(queryExecute(*context));
@@ -391,7 +388,6 @@ void MPPTask::runImpl()
         schedule_entry.setNeededThreads(estimateCountOfNewThreads());
         LOG_DEBUG(log, "Estimate new thread count of query: {} including tunnel_threads: {}, receiver_threads: {}", schedule_entry.getNeededThreads(), dag_context->tunnel_set->getExternalThreadCnt(), new_thread_count_of_mpp_receiver);
 
-        // 类似 golang 等 channel 的过程
         scheduleOrWait();
 
         LOG_INFO(log, "task starts running");
diff --git a/dbms/src/Flash/Mpp/MPPTaskScheduleEntry.h b/dbms/src/Flash/Mpp/MPPTaskScheduleEntry.h
index bc5522dfdfe..60ccb9297c0 100644
--- a/dbms/src/Flash/Mpp/MPPTaskScheduleEntry.h
+++ b/dbms/src/Flash/Mpp/MPPTaskScheduleEntry.h
@@ -52,7 +52,6 @@ class MPPTaskScheduleEntry
     int needed_threads;
 
     std::mutex schedule_mu;
-    // 条件变量
     std::condition_variable schedule_cv;
     ScheduleState schedule_state;
     const LoggerPtr log;
diff --git a/dbms/src/Flash/Mpp/MPPTunnelSet.cpp b/dbms/src/Flash/Mpp/MPPTunnelSet.cpp
index a3ba44127e3..3712172aa7c 100644
--- a/dbms/src/Flash/Mpp/MPPTunnelSet.cpp
+++ b/dbms/src/Flash/Mpp/MPPTunnelSet.cpp
@@ -243,12 +243,10 @@ void MPPTunnelSetBase<Tunnel>::fineGrainedShuffleWrite(
 
 template <typename Tunnel>
 void MPPTunnelSetBase<Tunnel>::registerTunnel(const MPPTaskId & receiver_task_id, const TunnelPtr & tunnel)
-{
-    // tunnel 注册在 map 里面
+{ 
     if (receiver_task_id_to_index_map.find(receiver_task_id) != receiver_task_id_to_index_map.end())
         throw Exception(fmt::format("the tunnel {} has been registered", tunnel->id()));
 
-    // tunnel 就是个 vector
     receiver_task_id_to_index_map[receiver_task_id] = tunnels.size();
     tunnels.push_back(tunnel);
     if (!tunnel->isLocal() && !tunnel->isAsync())
diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp
index 7b89ed431c3..ff7cec2b382 100644
--- a/dbms/src/Interpreters/ExpressionActions.cpp
+++ b/dbms/src/Interpreters/ExpressionActions.cpp
@@ -81,7 +81,6 @@ ExpressionAction ExpressionAction::applyFunction(const FunctionBuilderPtr & func
     return a;
 }
 
-// 这个适合 repeat source 来用，adding groupingID column
 ExpressionAction ExpressionAction::addColumn(const ColumnWithTypeAndName & added_column_)
 {
     ExpressionAction a;
@@ -145,7 +144,7 @@ ExpressionAction ExpressionAction::expandSource(std::shared_ptr<const Expand> ex
 }
 
 
-void ExpressionAction::prepare(Block & sample_block)    // 这个是 prepare 阶段
+void ExpressionAction::prepare(Block & sample_block)  
 {
     /** Constant expressions should be evaluated, and put the result in sample_block.
       */
@@ -178,7 +177,6 @@ void ExpressionAction::prepare(Block & sample_block)    // 这个是 prepare 阶
             new_column.type = result_type;
             sample_block.insert(std::move(new_column));
 
-            // 执行参数，和执行结果都是 block 中的列
             function->execute(sample_block, arguments, result_position);
 
             /// If the result is not a constant, just in case, we will consider the result as unknown.
@@ -194,12 +192,11 @@ void ExpressionAction::prepare(Block & sample_block)    // 这个是 prepare 阶
                 /// Change the size to 1.
 
                 if (col.column->empty())
-                    col.column = col.column->cloneResized(1);  // 常量列只保留一个值，np
+                    col.column = col.column->cloneResized(1);
             }
         }
         else
         {
-            // 如果不能即时 eval，那么直接插入一个 unknown 的列，附带上类型和名字
             sample_block.insert({nullptr, result_type, result_name});
         }
 
@@ -233,7 +230,7 @@ void ExpressionAction::prepare(Block & sample_block)    // 这个是 prepare 阶
             }
         }
 
-        for (const auto & col : columns_added_by_join)   // 之前的 sample block 是左侧的列，现在才是右侧的
+        for (const auto & col : columns_added_by_join)
             sample_block.insert(ColumnWithTypeAndName(nullptr, col.type, col.name));
 
         break;
@@ -267,7 +264,7 @@ void ExpressionAction::prepare(Block & sample_block)    // 这个是 prepare 阶
             ColumnWithTypeAndName column = sample_block.getByName(name);
             if (!alias.empty())
                 column.name = alias;
-            new_block.insert(std::move(column));  // 相当于直接 move 掉 （因为前面的列可能不要，所以用了个 new block）
+            new_block.insert(std::move(column));
         }
 
         sample_block.swap(new_block);
@@ -302,7 +299,7 @@ void ExpressionAction::prepare(Block & sample_block)    // 这个是 prepare 阶
 }
 
 
-void ExpressionAction::execute(Block & block) const   // 执行阶段
+void ExpressionAction::execute(Block & block) const
 {
     if (type == REMOVE_COLUMN || type == COPY_COLUMN)
         if (!block.has(source_name))
@@ -321,10 +318,10 @@ void ExpressionAction::execute(Block & block) const   // 执行阶段
         {
             if (!block.has(argument_names[i]))
                 throw Exception("Not found column: '" + argument_names[i] + "'", ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
-            arguments[i] = block.getPositionByName(argument_names[i]);  // 找到列的 offset
+            arguments[i] = block.getPositionByName(argument_names[i]);
         }
 
-        size_t num_columns_without_result = block.columns();  // 拿到当 result 列的 offset
+        size_t num_columns_without_result = block.columns();
         block.insert({nullptr, result_type, result_name});
 
         function->execute(block, arguments, num_columns_without_result);
@@ -343,7 +340,7 @@ void ExpressionAction::execute(Block & block) const   // 执行阶段
 
     case EXPAND:
     {
-        expand->replicateAndFillNull(block); // repeat 的执行阶段直接 fill block 了
+        expand->replicateAndFillNull(block);
         break;
     }
 
@@ -492,7 +489,6 @@ void ExpressionActions::addImpl(ExpressionAction action, Names & new_names)
             arguments[i] = sample_block.getByName(action.argument_names[i]);
         }
 
-        // 一般 default 函数使用 default creator 构造器就行了
         action.function = action.function_builder->build(arguments, action.collator);
         action.result_type = action.function->getReturnType();
     }
@@ -751,7 +747,7 @@ BlockInputStreamPtr ExpressionActions::createStreamWithNonJoinedDataIfFullOrRigh
     return {};
 }
 
-void ExpressionActionsChain::addStep() // 只会为后者加入 new step 的 input col 准备
+void ExpressionActionsChain::addStep()
 {
     if (steps.empty())
         throw Exception("Cannot add action to empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR);
diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp
index 3cc7ae92874..1a849dee0c7 100644
--- a/dbms/src/Interpreters/Join.cpp
+++ b/dbms/src/Interpreters/Join.cpp
@@ -539,7 +539,7 @@ void insertRowToList(Join::RowRefList * list, Join::RowRefList * elem, Block * s
 {
     elem->next = list->next; // NOLINT(clang-analyzer-core.NullDereference)
     list->next = elem;
-    elem->block = stored_block;  // 因为 map all 所以是 list 结构
+    elem->block = stored_block;
     elem->row_num = index;
 }
 
@@ -579,7 +579,7 @@ struct Inserter<ASTTableJoin::Strictness::All, Map, KeyGetter>
                  * That is, the former second element, if it was, will be the third, and so on.
                  */
             auto elem = reinterpret_cast<MappedType *>(pool.alloc(sizeof(MappedType)));
-            insertRowToList(&emplace_result.getMapped(), elem, stored_block, i); // hash 表中维护的就是到存储的 store block 和其 row number，这个 list 结果作为 hash key 的 value
+            insertRowToList(&emplace_result.getMapped(), elem, stored_block, i);
         }
     }
 };
@@ -834,7 +834,7 @@ void recordFilteredRows(const Block & block, const String & filter_column, Colum
     PaddedPODArray<UInt8> & mutable_null_map = static_cast<ColumnUInt8 &>(*mutable_null_map_holder).getData();
 
     const auto & nested_column = column->isColumnNullable() ? static_cast<const ColumnNullable &>(*column).getNestedColumnPtr() : column;
-    for (size_t i = 0, size = nested_column->size(); i < size; ++i) // 伴随 column 如果取 int 取不出来，说明也是个 null？
+    for (size_t i = 0, size = nested_column->size(); i < size; ++i)
         mutable_null_map[i] |= (!nested_column->getInt(i));
 
     null_map_holder = std::move(mutable_null_map_holder);
@@ -1373,9 +1373,9 @@ void Join::handleOtherConditions(Block & block, std::unique_ptr<IColumn::Filter>
 {
     other_condition_ptr->execute(block);
 
-    auto filter_column = ColumnUInt8::create();    // 创建了一个 u8 表示 true or false 的结果吧
+    auto filter_column = ColumnUInt8::create();
     auto & filter = filter_column->getData();
-    filter.assign(block.rows(), static_cast<UInt8>(1));  // 直接都给 1？
+    filter.assign(block.rows(), static_cast<UInt8>(1));
     if (!other_filter_column.empty())
     {
         mergeNullAndFilterResult(block, filter, other_filter_column, false);
@@ -1564,7 +1564,6 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr
     /// Memoize key columns to work with.
     for (size_t i = 0; i < keys_size; ++i)
     {
-        // 因为 ColumnPtr 是继承 intrusive_ptr，所以 get 函数可以得到这个类型的原始指针（raw column）
         key_columns[i] = block.getByName(key_names_left[i]).column.get();
 
         if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst())
@@ -1577,12 +1576,10 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr
     /// Keys with NULL value in any column won't join to anything.
     ColumnPtr null_map_holder;
     ConstNullMapPtr null_map{};
-    // 抽取一下 join key 上的 null 或属性
     extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
 
     /// reuse null_map to record the filtered rows, the rows contains NULL or does not
     /// match the join filter won't join to anything
-    // 相当于把 left filter column 上的 null 属性输出也叠加到了 null map 里面
     recordFilteredRows(block, left_filter_column, null_map_holder, null_map);
 
     size_t existing_columns = block.columns();
@@ -1617,12 +1614,12 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr
     /// Add new columns to the block.
     size_t num_columns_to_add = sample_block_with_columns_to_add.columns();
     MutableColumns added_columns;
-    added_columns.reserve(num_columns_to_add);   // 创建了几个需要新加的 columns
+    added_columns.reserve(num_columns_to_add); 
 
     std::vector<size_t> right_table_column_indexes;
     for (size_t i = 0; i < num_columns_to_add; ++i)
     {
-        right_table_column_indexes.push_back(i + existing_columns);  // 记录插入的 offset 下标
+        right_table_column_indexes.push_back(i + existing_columns);
     }
 
     std::vector<size_t> right_indexes;
diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h
index d8bfe2afa6e..63db25d0d99 100644
--- a/dbms/src/Interpreters/Join.h
+++ b/dbms/src/Interpreters/Join.h
@@ -39,7 +39,7 @@ struct ProbeProcessInfo;
   * JOIN-s could be of nine types: ANY/ALL × LEFT/INNER/RIGHT/FULL, and also CROSS.
   *
   * If ANY is specified - then select only one row from the "right" table, (first encountered row), even if there was more matching rows.
-  * If ALL is specified - usual JOIN, when rows are multiplied by number of matching rows from the "right" table. ALL 会复制行
+  * If ALL is specified - usual JOIN, when rows are multiplied by number of matching rows from the "right" table.
   * ANY is more efficient.
   *
   * If INNER is specified - leave only rows that have matching rows from "right" table.
diff --git a/dbms/src/Interpreters/NullableUtils.cpp b/dbms/src/Interpreters/NullableUtils.cpp
index 44cb13c0d92..cf8975f8b80 100644
--- a/dbms/src/Interpreters/NullableUtils.cpp
+++ b/dbms/src/Interpreters/NullableUtils.cpp
@@ -26,7 +26,6 @@ void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & nul
             return;
 
         const ColumnNullable & column_nullable = static_cast<const ColumnNullable &>(*column);
-        // 从 nullable column 中拿到伴随 byte map 和基础 column
         null_map = &column_nullable.getNullMapData();
         null_map_holder = column_nullable.getNullMapColumnPtr();
         column = &column_nullable.getNestedColumn();
@@ -50,7 +49,6 @@ void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & nul
 
                     PaddedPODArray<UInt8> & mutable_null_map = static_cast<ColumnUInt8 &>(*mutable_null_map_holder).getData();
                     const PaddedPODArray<UInt8> & other_null_map = column_nullable.getNullMapData();
-                    // join key column 来说，一空即空，这里 ｜ 一下
                     for (size_t i = 0, size = mutable_null_map.size(); i < size; ++i)
                         mutable_null_map[i] |= other_null_map[i];
 
diff --git a/dbms/src/Interpreters/sortBlock.cpp b/dbms/src/Interpreters/sortBlock.cpp
index 438a14b42bd..9995329b833 100644
--- a/dbms/src/Interpreters/sortBlock.cpp
+++ b/dbms/src/Interpreters/sortBlock.cpp
@@ -410,7 +410,6 @@ void sortBlock(Block & block, const SortDescription & description, size_t limit)
             : block.safeGetByPosition(description[0].column_number).column.get();
 
         IColumn::Permutation perm;
-        // permutation 是列的 offset 调序
         if (NeedCollation(column, description[0]))
             column->getPermutation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm);
         else
@@ -418,7 +417,7 @@ void sortBlock(Block & block, const SortDescription & description, size_t limit)
 
         size_t columns = block.columns();
         for (size_t i = 0; i < columns; ++i)
-            block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->permute(perm, limit); // 根据 offset 调序结果重新组织 column 数据
+            block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->permute(perm, limit);
     }
     else
     {
diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp
index e3b02019f15..1220e873a32 100644
--- a/dbms/src/TestUtils/mockExecutor.cpp
+++ b/dbms/src/TestUtils/mockExecutor.cpp
@@ -96,7 +96,7 @@ void DAGRequestBuilder::initDAGRequest(tipb::DAGRequest & dag_request)
     else
         dag_request.set_encode_type(tipb::EncodeType::TypeDefault);
 
-    for (size_t i = 0; i < root->output_schema.size(); ++i)   // 根据 root 算子的 output schema 设置加 mock dag request 而 output offsets
+    for (size_t i = 0; i < root->output_schema.size(); ++i)
         dag_request.add_output_offsets(i);
 }
 

From edaa6a2801d066bd9e846bbfeb3ce326c2b97407 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Wed, 28 Dec 2022 22:50:10 +0800
Subject: [PATCH 10/31] make fmt

Signed-off-by: AilinKid <3148019@qq.com>
---
 dbms/src/Core/Block.h                         |  2 +-
 dbms/src/DataStreams/SquashingTransform.cpp   |  1 +
 .../Coprocessor/DAGExpressionAnalyzer.cpp     |  2 +-
 .../Coprocessor/DAGQueryBlockInterpreter.cpp  | 10 ++++-----
 dbms/src/Flash/Coprocessor/InterpreterDAG.cpp |  2 +-
 .../Flash/Coprocessor/InterpreterUtils.cpp    |  2 +-
 .../Coprocessor/JoinInterpreterHelper.cpp     |  2 +-
 dbms/src/Flash/Mpp/MPPTunnelSet.cpp           |  2 +-
 dbms/src/Interpreters/Expand.h                |  2 +-
 dbms/src/Interpreters/Join.cpp                | 22 +++++--------------
 10 files changed, 19 insertions(+), 28 deletions(-)

diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h
index a1af433bbc9..0d337d6d3e2 100644
--- a/dbms/src/Core/Block.h
+++ b/dbms/src/Core/Block.h
@@ -38,7 +38,7 @@ class Context;
 
 class Block
 {
-private: 
+private:
     using Container = ColumnsWithTypeAndName;
     using IndexByName = std::map<String, size_t>;
 
diff --git a/dbms/src/DataStreams/SquashingTransform.cpp b/dbms/src/DataStreams/SquashingTransform.cpp
index 2425435d90e..d018deaed96 100644
--- a/dbms/src/DataStreams/SquashingTransform.cpp
+++ b/dbms/src/DataStreams/SquashingTransform.cpp
@@ -97,6 +97,7 @@ void SquashingTransform::append(Block && block)
     }
 }
 
+
 bool SquashingTransform::isEnoughSize(size_t rows, size_t bytes) const
 {
     return (!min_block_size_rows && !min_block_size_bytes)
diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
index 18fd7c507d8..dfbf1a261af 100644
--- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
@@ -1078,7 +1078,7 @@ bool DAGExpressionAnalyzer::appendJoinKeyAndJoinFilters(
         std::unordered_set<String> needed_columns;
         for (const auto & c : getCurrentInputColumns())
             needed_columns.insert(c.name);
-        for (const auto & s : key_names)              
+        for (const auto & s : key_names)
             needed_columns.insert(s);
         if (!filter_column_name.empty())
             needed_columns.insert(filter_column_name);
diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
index 4678b854012..6fbf8b59d6c 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
@@ -505,14 +505,14 @@ void DAGQueryBlockInterpreter::handleExchangeReceiver(DAGPipeline & pipeline)
                                                                                    /*stream_id=*/enable_fine_grained_shuffle ? i : 0);
         exchange_receiver_io_input_streams.push_back(stream);
         stream->setExtraInfo(extra_info);
-        pipeline.streams.push_back(stream);                     
+        pipeline.streams.push_back(stream);
     }
     NamesAndTypes source_columns;
     for (const auto & col : pipeline.firstStream()->getHeader())
     {
         source_columns.emplace_back(col.name, col.type);
     }
-    analyzer = std::make_unique<DAGExpressionAnalyzer>(std::move(source_columns), context); 
+    analyzer = std::make_unique<DAGExpressionAnalyzer>(std::move(source_columns), context);
 }
 
 // for tests, we need to mock ExchangeReceiver blockInputStream as the source stream.
@@ -538,12 +538,12 @@ void DAGQueryBlockInterpreter::handleProjection(DAGPipeline & pipeline, const ti
     UniqueNameGenerator unique_name_generator;
     for (const auto & expr : projection.exprs())
     {
-        auto expr_name = dag_analyzer.getActions(expr, last_step.actions);    
-        last_step.required_output.emplace_back(expr_name);                    
+        auto expr_name = dag_analyzer.getActions(expr, last_step.actions);
+        last_step.required_output.emplace_back(expr_name);
         const auto & col = last_step.actions->getSampleBlock().getByName(expr_name);
         String alias = unique_name_generator.toUniqueName(col.name);
         output_columns.emplace_back(alias, col.type);
-        project_cols.emplace_back(col.name, alias);                   
+        project_cols.emplace_back(col.name, alias);
     }
     executeExpression(pipeline, chain.getLastActions(), log, "before projection");
     executeProject(pipeline, project_cols, "projection");
diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp
index 0869c2c653f..61249f19642 100644
--- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp
+++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp
@@ -55,7 +55,7 @@ BlockInputStreams InterpreterDAG::executeQueryBlock(DAGQueryBlock & query_block)
     }
     DAGQueryBlockInterpreter query_block_interpreter(
         context,
-        input_streams_vec,  
+        input_streams_vec,
         query_block,
         max_streams);
     return query_block_interpreter.execute();
diff --git a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp
index b031007c3c7..d2e18a36e00 100644
--- a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp
+++ b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp
@@ -89,7 +89,7 @@ void executeExpression(
 {
     if (expr_actions && !expr_actions->getActions().empty())
     {
-        pipeline.transform([&](auto & stream) { 
+        pipeline.transform([&](auto & stream) {
             stream = std::make_shared<ExpressionBlockInputStream>(stream, expr_actions, log->identifier());
             stream->setExtraInfo(extra_info);
         });
diff --git a/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp b/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp
index 386c8158328..275042fddb0 100644
--- a/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp
+++ b/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp
@@ -295,7 +295,7 @@ NamesAndTypes TiFlashJoin::genColumnsForOtherJoinFilter(
     bool make_nullable = build_side_index == 1
         ? join.join_type() == tipb::JoinType::TypeRightOuterJoin
         : join.join_type() == tipb::JoinType::TypeLeftOuterJoin;
-    append_new_columns(probe_prepare_join_actions->getSampleBlock(), make_nullable); 
+    append_new_columns(probe_prepare_join_actions->getSampleBlock(), make_nullable);
 
     return columns_for_other_join_filter;
 }
diff --git a/dbms/src/Flash/Mpp/MPPTunnelSet.cpp b/dbms/src/Flash/Mpp/MPPTunnelSet.cpp
index 3712172aa7c..a308a9717a3 100644
--- a/dbms/src/Flash/Mpp/MPPTunnelSet.cpp
+++ b/dbms/src/Flash/Mpp/MPPTunnelSet.cpp
@@ -243,7 +243,7 @@ void MPPTunnelSetBase<Tunnel>::fineGrainedShuffleWrite(
 
 template <typename Tunnel>
 void MPPTunnelSetBase<Tunnel>::registerTunnel(const MPPTaskId & receiver_task_id, const TunnelPtr & tunnel)
-{ 
+{
     if (receiver_task_id_to_index_map.find(receiver_task_id) != receiver_task_id_to_index_map.end())
         throw Exception(fmt::format("the tunnel {} has been registered", tunnel->id()));
 
diff --git a/dbms/src/Interpreters/Expand.h b/dbms/src/Interpreters/Expand.h
index d567e58e311..c08aa6230f6 100644
--- a/dbms/src/Interpreters/Expand.h
+++ b/dbms/src/Interpreters/Expand.h
@@ -138,4 +138,4 @@ class Expand
 private:
     GroupingSets group_sets_names;
 };
-} // namespace DB
\ No newline at end of file
+} // namespace DB
diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp
index 1a849dee0c7..df3da902d55 100644
--- a/dbms/src/Interpreters/Join.cpp
+++ b/dbms/src/Interpreters/Join.cpp
@@ -861,7 +861,6 @@ void Join::insertFromBlock(const Block & block, size_t stream_index)
 
     if (unlikely(!initialized))
         throw Exception("Logical error: Join was not initialized", ErrorCodes::LOGICAL_ERROR);
-    //  物化一个 block 出来
     Block * stored_block = nullptr;
     {
         std::lock_guard lk(blocks_lock);
@@ -1577,7 +1576,6 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr
     ColumnPtr null_map_holder;
     ConstNullMapPtr null_map{};
     extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
-
     /// reuse null_map to record the filtered rows, the rows contains NULL or does not
     /// match the join filter won't join to anything
     recordFilteredRows(block, left_filter_column, null_map_holder, null_map);
@@ -1614,7 +1612,7 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr
     /// Add new columns to the block.
     size_t num_columns_to_add = sample_block_with_columns_to_add.columns();
     MutableColumns added_columns;
-    added_columns.reserve(num_columns_to_add); 
+    added_columns.reserve(num_columns_to_add);
 
     std::vector<size_t> right_table_column_indexes;
     for (size_t i = 0; i < num_columns_to_add; ++i)
@@ -1642,17 +1640,17 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr
 
     if (((kind == ASTTableJoin::Kind::Inner || kind == ASTTableJoin::Kind::Right) && strictness == ASTTableJoin::Strictness::Any)
         || kind == ASTTableJoin::Kind::Anti)
-        filter = std::make_unique<IColumn::Filter>(rows);  // 用来从 right block 中 remove elements
+        filter = std::make_unique<IColumn::Filter>(rows);
 
     /// Used with ALL ... JOIN
     IColumn::Offset current_offset = 0;
     std::unique_ptr<IColumn::Offsets> offsets_to_replicate;
 
     if (strictness == ASTTableJoin::Strictness::All)
-        offsets_to_replicate = std::make_unique<IColumn::Offsets>(rows);  // join 的时候暂时标识一下，用来在 left block 中的 replicate rows
+        offsets_to_replicate = std::make_unique<IColumn::Offsets>(rows);
 
     switch (type)
-    { // join 完了之后，右侧 join 行都 append 到了 add columns 里面，并且填了一行的 replicate 的 offset = joined rows number
+    {
 #define M(TYPE)                                                                                                                                \
     case Join::Type::TYPE:                                                                                                                     \
         joinBlockImplType<KIND, STRICTNESS, typename KeyGetterForType<Join::Type::TYPE, std::remove_reference_t<decltype(*maps.TYPE)>>::Type>( \
@@ -1679,7 +1677,7 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr
     }
     FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_join_prob_failpoint);
     for (size_t i = 0; i < num_columns_to_add; ++i)
-    {   // 将 added cols 插入到左侧的 block 中
+    {  
         const ColumnWithTypeAndName & sample_col = sample_block_with_columns_to_add.getByPosition(i);
         block.insert(ColumnWithTypeAndName(std::move(added_columns[i]), sample_col.type, sample_col.name));
     }
@@ -1701,14 +1699,6 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr
         /// If ALL ... JOIN - we replicate all the columns except the new ones.
         if (offsets_to_replicate)
         {
-            /*
-             *   a, b, c, d   offset
-            *   1, y  1  x   2         这个时候右侧的位置已经填好了，但是左侧 block 的位置还没填好，所以 offsets 是给左侧行看的，尽量复制，跟右侧的行对齐
-            *   2, z  1  x
-            *
-            *   1, y  1  x   2
-            *   1, y  1  x
-            */
             for (size_t i = 0; i < existing_columns; ++i)
             {
                 block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicateRange(probe_process_info.start_row, probe_process_info.end_row, *offsets_to_replicate);
@@ -1730,7 +1720,7 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr
     if (!other_filter_column.empty() || !other_eq_filter_from_in_column.empty())
     {
         if (!offsets_to_replicate)
-            throw Exception("Should not reach here, the strictness of join with other condition must be ALL");   // 处理 other condition
+            throw Exception("Should not reach here, the strictness of join with other condition must be ALL");
         handleOtherConditions(block, filter, offsets_to_replicate, right_table_column_indexes);
     }
 }

From 7adaf9b0978ba363218060860bdbddda0d1a4048 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Tue, 10 Jan 2023 14:52:40 +0800
Subject: [PATCH 11/31] rename repeat as expand

Signed-off-by: AilinKid <3148019@qq.com>
---
 dbms/src/Debug/MockExecutor/ExpandBinder.cpp  |   2 +-
 dbms/src/Debug/MockExecutor/ExpandBinder.h    |   2 +-
 .../Coprocessor/DAGExpressionAnalyzer.cpp     |   4 +-
 .../Coprocessor/DAGQueryBlockInterpreter.cpp  |  10 +-
 .../Coprocessor/DAGQueryBlockInterpreter.h    |   2 +-
 .../Coprocessor/collectOutputFieldTypes.cpp   |   5 +-
 dbms/src/Flash/Mpp/MPPTask.cpp                |   2 +-
 dbms/src/Flash/Planner/PlanType.h             |   3 +-
 .../Flash/Planner/Plans/PhysicalExpand.cpp    |  36 ++---
 dbms/src/Flash/Planner/Plans/PhysicalExpand.h |   4 +-
 .../src/Flash/tests/gtest_filter_executor.cpp | 147 +-----------------
 dbms/src/Flash/tests/gtest_interpreter.cpp    |   7 -
 .../src/Flash/tests/gtest_repeat_executor.cpp |  46 +++---
 dbms/src/Interpreters/Expand.cpp              |  13 +-
 dbms/src/Interpreters/Expand.h                |   2 +-
 dbms/src/Interpreters/Join.cpp                |   3 +-
 dbms/src/Interpreters/Join.h                  |   2 +-
 .../Interpreters/tests/gtest_block_repeat.cpp |  58 +++----
 dbms/src/TestUtils/mockExecutor.cpp           |   4 +-
 dbms/src/TestUtils/mockExecutor.h             |   4 +-
 20 files changed, 102 insertions(+), 254 deletions(-)

diff --git a/dbms/src/Debug/MockExecutor/ExpandBinder.cpp b/dbms/src/Debug/MockExecutor/ExpandBinder.cpp
index 0eb35b71c62..63fbfa28582 100644
--- a/dbms/src/Debug/MockExecutor/ExpandBinder.cpp
+++ b/dbms/src/Debug/MockExecutor/ExpandBinder.cpp
@@ -40,7 +40,7 @@ bool ExpandBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collat
     return children[0]->toTiPBExecutor(children_executor, collator_id, mpp_info, context);
 }
 
-ExecutorBinderPtr compileRepeat(ExecutorBinderPtr input, size_t & executor_index, MockVVecGroupingNameVec grouping_set_columns, std::set<String> in_set)
+ExecutorBinderPtr compileExpand(ExecutorBinderPtr input, size_t & executor_index, MockVVecGroupingNameVec grouping_set_columns, std::set<String> in_set)
 {
     DAGSchema output_schema;
     for (const auto & field : input->output_schema)
diff --git a/dbms/src/Debug/MockExecutor/ExpandBinder.h b/dbms/src/Debug/MockExecutor/ExpandBinder.h
index d1b4c7d980f..405b0b6e610 100644
--- a/dbms/src/Debug/MockExecutor/ExpandBinder.h
+++ b/dbms/src/Debug/MockExecutor/ExpandBinder.h
@@ -39,5 +39,5 @@ class ExpandBinder : public ExecutorBinder
     MockVVecGroupingNameVec grouping_sets_columns;
 };
 
-ExecutorBinderPtr compileRepeat(ExecutorBinderPtr input, size_t & executor_index, MockVVecGroupingNameVec grouping_set_columns, std::set<String> set);
+ExecutorBinderPtr compileExpand(ExecutorBinderPtr input, size_t & executor_index, MockVVecGroupingNameVec grouping_set_columns, std::set<String> set);
 } // namespace DB::mock
diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
index dfbf1a261af..aa2c7014a5f 100644
--- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
@@ -857,9 +857,9 @@ ExpressionActionsPtr DAGExpressionAnalyzer::appendExpand(
     chain.finalize();
     chain.clear();
 
-    auto & after_repeat_step = initAndGetLastStep(chain);
+    auto & after_expand_step = initAndGetLastStep(chain);
     for (const auto & column : getCurrentInputColumns())
-        after_repeat_step.required_output.push_back(column.name);
+        after_expand_step.required_output.push_back(column.name);
     return before_expand;
 }
 
diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
index 6fbf8b59d6c..0013b4c5af1 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
@@ -586,7 +586,7 @@ void DAGQueryBlockInterpreter::handleWindowOrder(DAGPipeline & pipeline, const t
 // 3. construct a final projection, even if it's not necessary. just construct it.
 // Talking about projection, it has the following rules.
 // 1. if the query block does not contain agg, then the final project is the same as the source Executor
-// 2. if the query block contains agg/repeat, then the final project is the same as agg/repeat Executor
+// 2. if the query block contains agg/expand, then the final project is the same as agg/expand Executor
 // 3. if the cop task may contains more then 1 query block, and the current query block is not the root
 //    query block, then the project should add an alias for each column that needs to be projected, something
 //    like final_project.emplace_back(col.name, query_block.qb_column_prefix + col.name);
@@ -694,12 +694,12 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline)
         recordProfileStreams(pipeline, query_block.limit_or_topn_name);
     }
 
-    // execute the repeat source OP after all filter/limits and so on.
-    // since repeat source OP has some row replication work to do, place it after limit can reduce some unnecessary burden.
+    // execute the expand OP after all filter/limits and so on.
+    // since expand OP has some row replication work to do, place it after limit can reduce some unnecessary burden.
     // and put it before the final projection, because we should recognize some base col as grouping set col before change their alias.
     if (res.before_expand)
     {
-        executeExpandSource(pipeline, res.before_expand);
+        executeExpand(pipeline, res.before_expand);
         recordProfileStreams(pipeline, query_block.expand_name);
     }
 
@@ -746,7 +746,7 @@ void DAGQueryBlockInterpreter::executeLimit(DAGPipeline & pipeline)
     }
 }
 
-void DAGQueryBlockInterpreter::executeExpandSource(DAGPipeline & pipeline, const ExpressionActionsPtr & expr)
+void DAGQueryBlockInterpreter::executeExpand(DAGPipeline & pipeline, const ExpressionActionsPtr & expr)
 {
     pipeline.transform([&](auto &stream) {
         stream = std::make_shared<ExpandBlockInputStream>(stream, expr);
diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h
index eae5aa34cec..48edf039ff5 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h
@@ -69,7 +69,7 @@ class DAGQueryBlockInterpreter
     void executeWindowOrder(DAGPipeline & pipeline, SortDescription sort_desc, bool enable_fine_grained_shuffle);
     void executeOrder(DAGPipeline & pipeline, const NamesAndTypes & order_columns);
     void executeLimit(DAGPipeline & pipeline);
-    void executeExpandSource(DAGPipeline & pipeline, const ExpressionActionsPtr & expr);
+    void executeExpand(DAGPipeline & pipeline, const ExpressionActionsPtr & expr);
     void executeWindow(
         DAGPipeline & pipeline,
         WindowDescription & window_description,
diff --git a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp
index 923afd56914..3b5c94a81d8 100644
--- a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp
+++ b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp
@@ -100,9 +100,8 @@ bool collectForTableScan(std::vector<tipb::FieldType> & output_field_types, cons
     return false;
 }
 
-bool collectForRepeat(std::vector<tipb::FieldType> &out_field_types, const tipb::Executor & executor)
+bool collectForExpand(std::vector<tipb::FieldType> &out_field_types, const tipb::Executor & executor)
 {
-
     auto &out_child_fields = out_field_types;
     // collect output_field_types of children
     getChildren(executor).forEach([&out_child_fields](const tipb::Executor & child) {
@@ -231,7 +230,7 @@ bool collectForExecutor(std::vector<tipb::FieldType> & output_field_types, const
     case tipb::ExecType::TypeJoin:
         return collectForJoin(output_field_types, executor);
     case tipb::ExecType::TypeExpand:
-        return collectForRepeat(output_field_types, executor);
+        return collectForExpand(output_field_types, executor);
     default:
         return true;
     }
diff --git a/dbms/src/Flash/Mpp/MPPTask.cpp b/dbms/src/Flash/Mpp/MPPTask.cpp
index 655e6c724cb..4f97a94afd7 100644
--- a/dbms/src/Flash/Mpp/MPPTask.cpp
+++ b/dbms/src/Flash/Mpp/MPPTask.cpp
@@ -339,7 +339,7 @@ void MPPTask::prepare(const mpp::DispatchTaskRequest & task_request)
 
 void MPPTask::preprocess()
 {
-    auto start_time = Clock::now();  
+    auto start_time = Clock::now();
     initExchangeReceivers();
     LOG_DEBUG(log, "init exchange receiver done");
     query_executor_holder.set(queryExecute(*context));
diff --git a/dbms/src/Flash/Planner/PlanType.h b/dbms/src/Flash/Planner/PlanType.h
index 4c4d6d283d5..cfbdff03e77 100644
--- a/dbms/src/Flash/Planner/PlanType.h
+++ b/dbms/src/Flash/Planner/PlanType.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #include <common/types.h>
+#include "Common/Exception.h"
 
 namespace DB
 {
@@ -37,7 +38,7 @@ struct PlanType
         MockTableScan = 12,
         Join = 13,
         GetResult = 14,
-        Repeat = 15,
+        Expand = 15,
     };
     PlanTypeEnum enum_value;
 
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
index 51eaaeaa4c3..9a52c21b62f 100644
--- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
+++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
@@ -41,51 +41,51 @@ PhysicalPlanNodePtr PhysicalExpand::build(
     if (unlikely(expand.grouping_sets().empty()))
     {
         //should not reach here
-        throw TiFlashException("Repeat executor without grouping sets", Errors::Planner::BadRequest);
+        throw TiFlashException("Expand executor without grouping sets", Errors::Planner::BadRequest);
     }
 
     DAGExpressionAnalyzer analyzer{child->getSchema(), context};
-    ExpressionActionsPtr before_repeat_actions = PhysicalPlanHelper::newActions(child->getSampleBlock(), context);
+    ExpressionActionsPtr before_expand_actions = PhysicalPlanHelper::newActions(child->getSampleBlock(), context);
 
 
-    auto shared_repeat = analyzer.buildExpandGroupingColumns(expand, before_repeat_actions);
+    auto shared_expand = analyzer.buildExpandGroupingColumns(expand, before_expand_actions);
 
     // construct sample block.
-    NamesAndTypes repeat_output_columns;
+    NamesAndTypes expand_output_columns;
     auto child_header = child->getSchema();
     for (const auto & one : child_header)
     {
-        repeat_output_columns.emplace_back(one.name, shared_repeat->isInGroupSetColumn(one.name)? makeNullable(one.type): one.type);
+        expand_output_columns.emplace_back(one.name, shared_expand->isInGroupSetColumn(one.name)? makeNullable(one.type): one.type);
     }
-    repeat_output_columns.emplace_back(shared_repeat->grouping_identifier_column_name, shared_repeat->grouping_identifier_column_type);
+    expand_output_columns.emplace_back(shared_expand->grouping_identifier_column_name, shared_expand->grouping_identifier_column_type);
 
-    auto physical_repeat = std::make_shared<PhysicalExpand>(
+    auto physical_expand = std::make_shared<PhysicalExpand>(
         executor_id,
-        repeat_output_columns,
+        expand_output_columns,
         log->identifier(),
         child,
-        shared_repeat,
-        Block(repeat_output_columns));
+        shared_expand,
+        Block(expand_output_columns));
 
-    return physical_repeat;
+    return physical_expand;
 }
 
 
-void PhysicalExpand::repeatTransform(DAGPipeline & child_pipeline, Context & context)
+void PhysicalExpand::expandTransform(DAGPipeline & child_pipeline, Context & context)
 {
-    auto repeat_actions = PhysicalPlanHelper::newActions(child_pipeline.firstStream()->getHeader(), context);
-    repeat_actions->add(ExpressionAction::expandSource(shared_expand));
-    String repeat_extra_info = fmt::format("repeat source, repeat_executor_id = {}", execId());
+    auto expand_actions = PhysicalPlanHelper::newActions(child_pipeline.firstStream()->getHeader(), context);
+    expand_actions->add(ExpressionAction::expandSource(shared_expand));
+    String expand_extra_info = fmt::format("expand, expand_executor_id = {}", execId());
     child_pipeline.transform([&](auto &stream) {
-        stream = std::make_shared<ExpandBlockInputStream>(stream, repeat_actions);
-        stream->setExtraInfo(repeat_extra_info);
+        stream = std::make_shared<ExpandBlockInputStream>(stream, expand_actions);
+        stream->setExtraInfo(expand_extra_info);
     });
 }
 
 void PhysicalExpand::transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams)
 {
     child->transform(pipeline, context, max_streams);
-    repeatTransform(pipeline, context);
+    expandTransform(pipeline, context);
 }
 
 void PhysicalExpand::finalize(const Names & parent_require)
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h
index a2696affb5b..6c798ad35c3 100644
--- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h
+++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h
@@ -38,12 +38,12 @@ class PhysicalExpand : public PhysicalUnary
         const PhysicalPlanNodePtr & child_,
         const std::shared_ptr<Expand> & shared_expand,
         const Block & sample_block_)
-        : PhysicalUnary(executor_id_, PlanType::Repeat, schema_, req_id, child_)
+        : PhysicalUnary(executor_id_, PlanType::Expand, schema_, req_id, child_)
         , shared_expand(shared_expand), sample_block(sample_block_){}
 
     void finalize(const Names & parent_require) override;
 
-    void repeatTransform(DAGPipeline & child_pipeline, Context & context);
+    void expandTransform(DAGPipeline & child_pipeline, Context & context);
 
     const Block & getSampleBlock() const override;
 
diff --git a/dbms/src/Flash/tests/gtest_filter_executor.cpp b/dbms/src/Flash/tests/gtest_filter_executor.cpp
index 68b8c39cca9..72cc171d1c7 100644
--- a/dbms/src/Flash/tests/gtest_filter_executor.cpp
+++ b/dbms/src/Flash/tests/gtest_filter_executor.cpp
@@ -211,152 +211,7 @@ try
 }
 CATCH
 
-TEST_F(FilterExecutorTestRunner, RepeatLogical)
-try
-{
-    /// following tests is ok now for non-planner enabled.
-
-    /// case 1
-    auto request = context
-                  .scan("test_db", "test_table")
-                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
-                  .build(context);
-    /// data flow:
-    ///
-    ///    s1       s2
-    /// "banana"  "apple"
-    ///   NULL      NULL
-    /// "banana"  "banana"
-    ///          |
-    ///          v
-    ///    s1       s2      groupingID
-    ///  "banana"  NULL         1
-    ///   NULL    "apple"       2
-    ///   NULL     NULL         1
-    ///   NULL     NULL         2
-    ///  "banana"  NULL         1
-    ///   NULL   "banana"       2
-    ///
-    executeAndAssertColumnsEqual(
-        request,
-        {toNullableVec<String>({"banana", {}, {}, {}, "banana", {}}),
-         toNullableVec<String>({{}, "apple", {}, {}, {}, "banana"}),
-         toVec<UInt64>({1,2,1,2,1,2})});
-
-    /// case 2
-    request = context
-                  .scan("test_db", "test_table")
-                  .filter(eq(col("s1"), col("s2")))
-                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
-                  .build(context);
-    /// data flow:
-    ///
-    ///    s1       s2
-    /// "banana"  "apple"
-    ///   NULL      NULL
-    /// "banana"  "banana"
-    ///          |
-    ///          v
-    ///    s1       s2
-    /// "banana"  "banana"
-    ///          |
-    ///          v
-    ///    s1       s2      groupingID
-    ///  "banana"  NULL         1
-    ///   NULL   "banana"       2
-    ///
-    executeAndAssertColumnsEqual(
-        request,
-        {toNullableVec<String>({"banana", {}}),
-         toNullableVec<String>({{}, "banana"}),
-         toVec<UInt64>({1,2})});
-
-    /// case 3
-    request = context
-                  .scan("test_db", "test_table")
-                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
-                  .filter(eq(col("s1"), col("s2")))
-                  .build(context);
-    /// data flow: TiFlash isn't aware of the operation sequence, this filter here will be run before repeat does just like the second test case above.
-    executeAndAssertColumnsEqual(
-        request,
-        {toNullableVec<String>({"banana", {}}),
-         toNullableVec<String>({{}, "banana"}),
-         toVec<UInt64>({1,2})});
-
-    /// case 4
-    auto const_false = lit(Field(static_cast<UInt64>(0)));
-    request = context
-                  .scan("test_db", "test_table")
-                  .filter(const_false)                      // refuse all rows
-                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
-                  .build(context);
-    executeAndAssertColumnsEqual(
-        request,
-        {});
-
-    /// case 5   (test integrated with aggregation)
-    request = context
-                  .scan("test_db", "test_table")
-                  .aggregation({Count(col("s1"))}, {col("s2")})
-                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
-                  .build(context);
-    /// data flow:
-    ///
-    ///    s1       s2
-    /// "banana"  "apple"
-    ///   NULL      NULL
-    /// "banana"  "banana"
-    ///          |
-    ///          v
-    ///  count(s1)   s2
-    ///    1      "apple"
-    ///    0       NULL
-    ///    1      "banana"
-    ///          |
-    ///          v
-    ///  count(s1)   s2      groupingID
-    ///    1        NULL        1
-    ///   NULL     "apple"      2
-    ///    0        NULL        1
-    ///   NULL      NULL        2
-    ///    1        NULL        1
-    ///   NULL     "banana"     2
-    ///
-    executeAndAssertColumnsEqual(
-        request,
-        {toNullableVec<UInt64>({1, {}, 0, {}, 1,{}}),
-            toNullableVec<String>({{}, "apple", {},{},{}, "banana"}),
-                toVec<UInt64>({1,2,1,2,1,2})});
-
-    /// case 5   (test integrated with aggregation and projection)
-    request = context
-                  .scan("test_db", "test_table")
-                  .aggregation({Count(col("s1"))}, {col("s2")})
-                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
-                  .project({"count(s1)"})
-                  .build(context);
-    executeAndAssertColumnsEqual(
-        request,
-        {toNullableVec<UInt64>({1, {}, 0, {}, 1,{}})});
-
-    /// case 6   (test integrated with aggregation and projection and limit)
-    /// note: by now, limit is executed before repeat does to reduce unnecessary row repeat work.
-//    request = context
-//                  .scan("test_db", "test_table")
-//                  .aggregation({Count(col("s1"))}, {col("s2")})
-//                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
-//                  .project({"count(s1)"})
-//                  .limit(2)
-//                  .build(context);
-//    executeAndAssertColumnsEqual(
-//        request,
-//        {toNullableVec<UInt64>({1, {}, 0, {}})});
-
-}
-CATCH
-
-TEST_F(FilterExecutorTestRunner, convertBool)
+TEST_F(FilterExecutorTestRunner, convert_bool)
 try
 {
     {
diff --git a/dbms/src/Flash/tests/gtest_interpreter.cpp b/dbms/src/Flash/tests/gtest_interpreter.cpp
index b5a2cd80b16..e129c5587a5 100644
--- a/dbms/src/Flash/tests/gtest_interpreter.cpp
+++ b/dbms/src/Flash/tests/gtest_interpreter.cpp
@@ -50,13 +50,6 @@ class InterpreterExecuteTest : public DB::tests::InterpreterTestUtils
 TEST_F(InterpreterExecuteTest, SingleQueryBlock)
 try
 {
-
-    //auto grouping_sets = MockVecColumnNameVec{MockColumnNameVec{"s1"}, MockColumnNameVec{"s2"}};
-    //    auto request = context.scan("test_db", "test_table_1").repeat(grouping_sets).build(context);
-    //    {
-    //        ASSERT_BLOCKINPUTSTREAM_EQAUL("", request, 10);
-    //    }
-
     auto request = context.scan("test_db", "test_table_1")
                        .filter(eq(col("s2"), col("s3")))
                        .aggregation({Max(col("s1"))}, {col("s2"), col("s3")})
diff --git a/dbms/src/Flash/tests/gtest_repeat_executor.cpp b/dbms/src/Flash/tests/gtest_repeat_executor.cpp
index 020e5f19d26..1b5aaa6f04e 100644
--- a/dbms/src/Flash/tests/gtest_repeat_executor.cpp
+++ b/dbms/src/Flash/tests/gtest_repeat_executor.cpp
@@ -19,7 +19,7 @@ namespace DB
 {
 namespace tests
 {
-class RepeatExecutorTestRunner : public DB::tests::ExecutorTest
+class ExpandExecutorTestRunner : public DB::tests::ExecutorTest
 {
 public:
     void initializeContext() override
@@ -36,13 +36,13 @@ class RepeatExecutorTestRunner : public DB::tests::ExecutorTest
     }
 };
 
-TEST_F(RepeatExecutorTestRunner, RepeatLogical)
+TEST_F(ExpandExecutorTestRunner, ExpandLogical)
 try
 {
-    /// case 1
+    /// case 1block.getByName(grouping_col).column->isColumnNullable()
     auto request = context
                        .scan("test_db", "test_table")
-                       .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                       .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
                        .build(context);
     /// data flow:
     ///
@@ -70,7 +70,7 @@ try
     request = context
                   .scan("test_db", "test_table")
                   .filter(eq(col("s1"), col("s2")))
-                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
                   .build(context);
     /// data flow:
     ///
@@ -97,10 +97,10 @@ try
     /// case 3: this case is only for non-planner mode.
     /// request = context
     ///                 .scan("test_db", "test_table")
-    ///                 .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+    ///                 .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
     ///                 .filter(eq(col("s1"), col("s2")))
     ///                 .build(context);
-    /// data flow: TiFlash isn't aware of the operation sequence, this filter here will be run before repeat does just like the second test case above.
+    /// data flow: TiFlash isn't aware of the operation sequence, this filter here will be run before expand does just like the second test case above.
     /// since this case is only succeed under planner-disabled mode, just comment and assert the result here for a note.
     ///
     /// executeAndAssertColumnsEqual(
@@ -114,7 +114,7 @@ try
     request = context
                   .scan("test_db", "test_table")
                   .filter(const_false)                      // refuse all rows
-                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
                   .build(context);
     executeAndAssertColumnsEqual(
         request,
@@ -133,7 +133,7 @@ try
     request = context
                   .scan("test_db", "test_table")
                   .aggregation({Count(col("s1"))}, {col("s2")})
-                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
                   .build(context);
     /// data flow:
     ///
@@ -167,7 +167,7 @@ try
     request = context
                   .scan("test_db", "test_table")
                   .aggregation({Count(col("s1"))}, {col("s2")})
-                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
                   .project({"count(s1)"})
                   .build(context);
     executeAndAssertColumnsEqual(
@@ -175,11 +175,11 @@ try
         {toNullableVec<UInt64>({1, {}, 0, {}, 1,{}})});
 
     /// case 6   (test integrated with aggregation and projection and limit) 1
-    /// note: by now, limit is executed before repeat does to reduce unnecessary row repeat work.
+    /// note: by now, limit is executed before expand does to reduce unnecessary row expand work.
     /// request = context
     ///               .scan("test_db", "test_table")
     ///               .aggregation({Count(col("s1"))}, {col("s2")})
-    ///               .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+    ///               .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
     ///               .limit(2)
     ///               .project({"count(s1)"})
     ///               .build(context);
@@ -197,12 +197,12 @@ try
     ///    1      "banana"
     ///          |
     ///          v
-    ///  count(s1)   s2                    // limit precede the repeat OP since they are in the same DAG query block.
+    ///  count(s1)   s2                    // limit precede the expand OP since they are in the same DAG query block.
     ///    1      "apple"
     ///    0       NULL
     ///          |
     ///          v
-    ///  count(s1)   s2      groupingID    // repeat is always arranged executed after limit to avoid unnecessary replication in the same DAG query block.
+    ///  count(s1)   s2      groupingID    // expand is always arranged executed after limit to avoid unnecessary replication in the same DAG query block.
     ///    1        NULL        1
     ///   NULL     "apple"      2
     ///    0        NULL        1
@@ -227,7 +227,7 @@ try
     request = context
                   .scan("test_db", "test_table")
                   .aggregation({Count(col("s1"))}, {col("s2")})
-                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
                   .project({"count(s1)"})
                   .topN({{"count(s1)", true}}, 2)
                   .build(context);
@@ -245,7 +245,7 @@ try
     ///    1      "banana"                                                    |
     ///          |                                                            +------------->  Child DAG Query Block
     ///          v                                                            |
-    ///  count(s1)   s2      groupingID   // repeat                           |
+    ///  count(s1)   s2      groupingID   // expand                           |
     ///    1        NULL        1                                             |
     ///   NULL     "apple"      2                                             |
     ///    0        NULL        1                                             |
@@ -277,9 +277,9 @@ try
     ///    1                                                                  |
     ///                                                        ---------------+
     ///
-    ///  Note: you can see some difference from this plan and the last one above, since projection between repeat and topN is a SOURCE node,
-    ///        it will isolate whole DAG into two independent DAG query blocks, limit and repeat OP take a place in each one of them. So we
-    ///        couldn't guarantee that letting repeat OP run after limit does, which can't reduce unnecessary replication work. DAG query block
+    ///  Note: you can see some difference from this plan and the last one above, since projection between expand and topN is a SOURCE node,
+    ///        it will isolate whole DAG into two independent DAG query blocks, limit and expand OP take a place in each one of them. So we
+    ///        couldn't guarantee that letting expand OP run after limit does, which can't reduce unnecessary replication work. DAG query block
     ///        division should be blamed here.
     ///
     executeAndAssertColumnsEqual(
@@ -300,7 +300,7 @@ try
     request = context
                   .receive("exchange1")
                   .aggregation({Count(col("s1"))}, {col("s2")})
-                  .repeat(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
                   .join(context.scan("test_db", "test_table").project({"s2"}), tipb::JoinType::TypeInnerJoin, {col("s2")})
                   .project({"count(s1)", "groupingID"})
                   .topN({{"groupingID", true}}, 2)
@@ -319,7 +319,7 @@ try
     ///    1      "banana"                                                    |
     ///          |                                                            +------------->  Child of Child DAG Query Block
     ///          v                                                            |
-    ///  count(s1)   s2      groupingID   // repeat                           |
+    ///  count(s1)   s2      groupingID   // expand                           |
     ///    1        NULL        1                                             |
     ///   NULL     "apple"      2                                             |
     ///    0        NULL        1                                             |
@@ -372,7 +372,7 @@ CreatingSets
         Expression: <remove useless column after join>
          HashJoinProbe: <join probe, join_executor_id = Join_5>
           Expression: <final projection>
-           RepeatSource: <repeat source, repeat_executor_id = repeat_source_2>: grouping set [<{count(s1)_collator_46 }><{any(s2)_collator_46 }>]
+           Expand: <expand, expand_executor_id = expand_2>: grouping set [<{count(s1)_collator_46 }><{any(s2)_collator_46 }>]
             Expression: <expr after aggregation>
              SharedQuery: <restore concurrency>
               ParallelAggregating, max_threads: 10, final: true
@@ -384,4 +384,4 @@ CATCH
 /// TODO: more OP combination tests.
 
 } // namespace tests
-} // namespace DB
\ No newline at end of file
+} // namespace DB
diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp
index 7ddbd8c975d..09bbdd5d662 100644
--- a/dbms/src/Interpreters/Expand.cpp
+++ b/dbms/src/Interpreters/Expand.cpp
@@ -61,7 +61,7 @@ void Expand::getGroupingSetsDes(FmtBuffer & buffer) const
 
 /// for cases like: select count(distinct a), count(distinct b) from t;
 /// it will generate 2 group set with <a> and <b>, over which we should
-/// repeat one more replica of the source rows from the input block and
+/// expand one more replica of the source rows from the input block and
 /// identify it with the grouping id in the appended new column.
 ///
 /// eg: source block         ==>        replicated block
@@ -111,7 +111,7 @@ void Expand::replicateAndFillNull(Block & block) const
         {
             // start from 1.
             Field grouping_id = j + 1;
-            added_grouping_id_column[0]->insert(grouping_id);
+            added_grouping_id_column[0]->insert(grouping_id); 
         }
     }
     // todo: for some column overlapping in different grouping set, we should copy the overlapped column as a new column
@@ -120,11 +120,11 @@ void Expand::replicateAndFillNull(Block & block) const
     // replicate the original block rows.
     size_t existing_columns = block.columns();
 
-    if (offsets_to_replicate && offsets_to_replicate->size() > 0)
+    if (offsets_to_replicate)
     {
         for (size_t i = 0; i < existing_columns; ++i)
         {
-            // expand the origin const column, since it may be filled with null value when repeating.
+            // expand the origin const column, since it may be filled with null value when expanding.
             if (block.safeGetByPosition(i).column->isColumnConst())
                 block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->convertToFullColumnIfConst();
 
@@ -133,8 +133,9 @@ void Expand::replicateAndFillNull(Block & block) const
             {
                 convertColumnToNullable(block.getByPosition(i));
             }
-            // replicate it.
-            block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate);
+            if (!offsets_to_replicate->empty())
+                // replicate it.
+                block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate);
         }
     }
 
diff --git a/dbms/src/Interpreters/Expand.h b/dbms/src/Interpreters/Expand.h
index c08aa6230f6..33f9f94f024 100644
--- a/dbms/src/Interpreters/Expand.h
+++ b/dbms/src/Interpreters/Expand.h
@@ -117,7 +117,7 @@ class Expand
     // replicateAndFillNull is the basic functionality that Expand Operator provided. Briefly, it replicates
     // origin rows with regard to local grouping sets description, and appending a new column named as groupingID
     // to illustrate what group this row is targeted for.
-    void replicateAndFillNull(Block & input) const;
+    void replicateAndFillNull(Block & block) const;
 
     size_t getGroupSetNum() const {return group_sets_names.size();}
 
diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp
index df3da902d55..aca814f8501 100644
--- a/dbms/src/Interpreters/Join.cpp
+++ b/dbms/src/Interpreters/Join.cpp
@@ -866,7 +866,6 @@ void Join::insertFromBlock(const Block & block, size_t stream_index)
         std::lock_guard lk(blocks_lock);
         total_input_build_rows += block.rows();
         blocks.push_back(block);
-        // block cp
         stored_block = &blocks.back();
         original_blocks.push_back(block);
     }
@@ -1677,7 +1676,7 @@ void Join::joinBlockImpl(Block & block, const Maps & maps, ProbeProcessInfo & pr
     }
     FAIL_POINT_TRIGGER_EXCEPTION(FailPoints::random_join_prob_failpoint);
     for (size_t i = 0; i < num_columns_to_add; ++i)
-    {  
+    {
         const ColumnWithTypeAndName & sample_col = sample_block_with_columns_to_add.getByPosition(i);
         block.insert(ColumnWithTypeAndName(std::move(added_columns[i]), sample_col.type, sample_col.name));
     }
diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h
index 63db25d0d99..abae6268430 100644
--- a/dbms/src/Interpreters/Join.h
+++ b/dbms/src/Interpreters/Join.h
@@ -187,7 +187,7 @@ class Join
     /// Reference to the row in block.
     struct RowRef
     {
-        const Block * block;   // block + row num
+        const Block * block;
         size_t row_num;
 
         RowRef() = default;
diff --git a/dbms/src/Interpreters/tests/gtest_block_repeat.cpp b/dbms/src/Interpreters/tests/gtest_block_repeat.cpp
index 88c6286898e..1a34e0fde30 100644
--- a/dbms/src/Interpreters/tests/gtest_block_repeat.cpp
+++ b/dbms/src/Interpreters/tests/gtest_block_repeat.cpp
@@ -23,7 +23,7 @@ namespace DB
 namespace tests
 {
 
-class BlockRepeat : public ::testing::Test
+class BlockExpand : public ::testing::Test
 {
 public:
     using ColStringType = typename TypeTraits<String>::FieldType;
@@ -38,11 +38,11 @@ class BlockRepeat : public ::testing::Test
     const std::vector<String> col_name{"age", "gender", "country", "region", "zip"};
 };
 
-TEST_F(BlockRepeat, Limit)
+TEST_F(BlockExpand, ExpandLogic)
 try
 {
     {
-        // test basic block repeat operation. (two grouping set)
+        // test basic block expand operation. (two grouping set)
         const ColumnsWithTypeAndName
             ori_col
             = {
@@ -55,19 +55,19 @@ try
         GroupingSet g_gender = GroupingSet{GroupingColumnNames{col_name[1]}};
         GroupingSet g_country = GroupingSet{GroupingColumnNames{col_name[2]}};
         GroupingSets group_sets = GroupingSets{g_gender, g_country};
-        Repeat repeat = Repeat(group_sets);
+        Expand expand = Expand(group_sets);
         Block block(ori_col);
         auto origin_rows = block.rows();
 
-        repeat.replicateAndFillNull(block);
+        expand.replicateAndFillNull(block);
         // assert the col size is added with 1.
         ASSERT_EQ(block.getColumns().size(), size_t(5));
         // assert the new col groupingID is appended.
         ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID");
         // assert the block size is equal to origin rows * grouping set num.
-        auto repeat_rows = block.rows();
-        auto grouping_set_num = repeat.getGroupSetNum();
-        ASSERT_EQ(origin_rows * grouping_set_num, repeat_rows); // 6
+        auto expand_rows = block.rows();
+        auto grouping_set_num = expand.getGroupSetNum();
+        ASSERT_EQ(origin_rows * grouping_set_num, expand_rows); // 6
         // assert grouping set column are nullable.
         ASSERT_EQ(block.getColumns()[0].get()->isColumnNullable(), false);
         ASSERT_EQ(block.getColumns()[1].get()->isColumnNullable(), true);
@@ -88,7 +88,7 @@ try
 
         const auto res0 = ColumnWithInt64{1, 1, 0, 0, -1, -1};
         const auto * col_0 = typeid_cast<const ColumnInt64 *>(block.getColumns()[0].get());
-        for (int i = 0; i < int(repeat_rows); ++i)
+        for (int i = 0; i < int(expand_rows); ++i)
         {
             ASSERT_EQ(col_0->getElement(i), res0[i]);
         }
@@ -96,7 +96,7 @@ try
         const auto res1 = ColumnWithString{"1   ", "null", "1  ", "null", "1 ", "null"};
         const auto * col_1 = typeid_cast<const ColumnNullable *>(block.getColumns()[1].get());
         const auto * col_1_nest = &static_cast<const ColumnString &>(col_1->getNestedColumn());
-        for (int i = 0; i < int(repeat_rows); ++i)
+        for (int i = 0; i < int(expand_rows); ++i)
         {
             if (res1[i] == "null") {
                 ASSERT_EQ(col_1->isNullAt(i), true);
@@ -108,7 +108,7 @@ try
         const auto res2 = ColumnWithString{"null", "1", "null", "2", "null", "3"};
         const auto * col_2 = typeid_cast<const ColumnNullable *>(block.getColumns()[2].get());
         const auto * col_2_nest = &static_cast<const ColumnString &>(col_2->getNestedColumn());
-        for (int i = 0; i < int(repeat_rows); ++i)
+        for (int i = 0; i < int(expand_rows); ++i)
         {
             if (res2[i] == "null") {
                 ASSERT_EQ(col_2->isNullAt(i), true);
@@ -119,20 +119,20 @@ try
 
         const auto res3 = ColumnWithUInt64{1, 1,1,1, 0,0};
         const auto * col_3 = typeid_cast<const ColumnUInt64 *>(block.getColumns()[3].get());
-        for (int i = 0; i < int(repeat_rows); ++i)
+        for (int i = 0; i < int(expand_rows); ++i)
         {
             ASSERT_EQ(col_3->getElement(i), res3[i]);
         }
 
         const auto res4 = ColumnWithUInt64{1, 2, 1, 2, 1, 2};
         const auto * col_4 = typeid_cast<const ColumnUInt64 *>(block.getColumns()[4].get());
-        for (int i = 0; i < int(repeat_rows); ++i)
+        for (int i = 0; i < int(expand_rows); ++i)
         {
             ASSERT_EQ(col_4->getElement(i), res4[i]);
         }
     }
     {
-        // test block repeat operation for multi grouping set (triple here)
+        // test block expand operation for multi grouping set (triple here)
         const ColumnsWithTypeAndName
             ori_col
             = {
@@ -146,19 +146,19 @@ try
         GroupingSet g_country = GroupingSet{GroupingColumnNames{col_name[2]}};
         GroupingSet g_region = GroupingSet{GroupingColumnNames{col_name[3]}};
         GroupingSets group_sets = GroupingSets{g_gender, g_country, g_region};
-        Repeat repeat = Repeat(group_sets);
+        Expand expand = Expand(group_sets);
         Block block(ori_col);
         auto origin_rows = block.rows();
 
-        repeat.replicateAndFillNull(block);
+        expand.replicateAndFillNull(block);
         // assert the col size is added with 1.
         ASSERT_EQ(block.getColumns().size(), size_t(5));
         // assert the new col groupingID is appended.
         ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID");
         // assert the block size is equal to origin rows * grouping set num.
-        auto repeat_rows = block.rows();
-        auto grouping_set_num = repeat.getGroupSetNum();
-        ASSERT_EQ(origin_rows * grouping_set_num, repeat_rows); // 9
+        auto expand_rows = block.rows();
+        auto grouping_set_num = expand.getGroupSetNum();
+        ASSERT_EQ(origin_rows * grouping_set_num, expand_rows); // 9
         // assert grouping set column are nullable.
         ASSERT_EQ(block.getColumns()[0].get()->isColumnNullable(), false);
         ASSERT_EQ(block.getColumns()[1].get()->isColumnNullable(), true);
@@ -182,7 +182,7 @@ try
 
         const auto res0 = ColumnWithInt64{1, 1, 1, 0, 0, 0, -1, -1, -1};
         const auto * col_0 = typeid_cast<const ColumnInt64 *>(block.getColumns()[0].get());
-        for (int i = 0; i < int(repeat_rows); ++i)
+        for (int i = 0; i < int(expand_rows); ++i)
         {
             ASSERT_EQ(col_0->getElement(i), res0[i]);
         }
@@ -190,7 +190,7 @@ try
         const auto res1 = ColumnWithString{"aaa", "null", "null", "bbb", "null", "null", "ccc", "null", "null"};
         const auto * col_1 = typeid_cast<const ColumnNullable *>(block.getColumns()[1].get());
         const auto * col_1_nest = &static_cast<const ColumnString &>(col_1->getNestedColumn());
-        for (int i = 0; i < int(repeat_rows); ++i)
+        for (int i = 0; i < int(expand_rows); ++i)
         {
             if (res1[i] == "null") {
                 ASSERT_EQ(col_1->isNullAt(i), true);
@@ -202,7 +202,7 @@ try
         const auto res2 = ColumnWithString{"null", "1", "null", "null", "2", "null", "null", "3", "null"};
         const auto * col_2 = typeid_cast<const ColumnNullable *>(block.getColumns()[2].get());
         const auto * col_2_nest = &static_cast<const ColumnString &>(col_2->getNestedColumn());
-        for (int i = 0; i < int(repeat_rows); ++i)
+        for (int i = 0; i < int(expand_rows); ++i)
         {
             if (res2[i] == "null") {
                 ASSERT_EQ(col_2->isNullAt(i), true);
@@ -215,7 +215,7 @@ try
         const auto res3 = ColumnWithUInt64{UInt64(-1), UInt64(-1), 1, UInt64(-1), UInt64(-1), 1, UInt64(-1), UInt64(-1), 0};
         const auto * col_3 = typeid_cast<const ColumnNullable *>(block.getColumns()[3].get());
         const auto * col_3_nest = &typeid_cast<const ColumnUInt64 &>(col_3->getNestedColumn());
-        for (int i = 0; i < int(repeat_rows); ++i)
+        for (int i = 0; i < int(expand_rows); ++i)
         {
             if (res3[i] == UInt64(-1)) {
                 ASSERT_EQ(col_3->isNullAt(i), true);
@@ -226,7 +226,7 @@ try
 
         const auto res4 = ColumnWithUInt64{1, 2, 3, 1, 2, 3, 1, 2, 3};
         const auto * col_4 = typeid_cast<const ColumnUInt64 *>(block.getColumns()[4].get());
-        for (int i = 0; i < int(repeat_rows); ++i)
+        for (int i = 0; i < int(expand_rows); ++i)
         {
             ASSERT_EQ(col_4->getElement(i), res4[i]);
         }
@@ -246,20 +246,20 @@ try
         GroupingSet g_country = GroupingSet{GroupingColumnNames{col_name[2]}};
         GroupingSet g_region = GroupingSet{GroupingColumnNames{col_name[3]}};
         GroupingSets group_sets = GroupingSets{g_gender, g_country, g_region};
-        Repeat repeat = Repeat(group_sets);
+        Expand expand = Expand(group_sets);
         Block block(ori_col);
         auto origin_rows = block.rows();
 
-        repeat.replicateAndFillNull(block);
+        expand.replicateAndFillNull(block);
         // assert the col size is added with 1.
         ASSERT_EQ(block.getColumns().size(), size_t(5));
         // assert the new col groupingID is appended.
         ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID");
         // assert the block size is equal to origin rows * grouping set num.
-        auto repeat_rows = block.rows();
-        auto grouping_set_num = repeat.getGroupSetNum();
+        auto expand_rows = block.rows();
+        auto grouping_set_num = expand.getGroupSetNum();
         ASSERT_EQ(origin_rows, 0);
-        ASSERT_EQ(origin_rows * grouping_set_num, repeat_rows); // 0
+        ASSERT_EQ(origin_rows * grouping_set_num, expand_rows); // 0
         // assert grouping set column are nullable.
     }
 }
diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp
index 1220e873a32..bcb4ec63a19 100644
--- a/dbms/src/TestUtils/mockExecutor.cpp
+++ b/dbms/src/TestUtils/mockExecutor.cpp
@@ -362,7 +362,7 @@ DAGRequestBuilder & DAGRequestBuilder::sort(MockOrderByItemVec order_by_vec, boo
     return *this;
 }
 
-DAGRequestBuilder & DAGRequestBuilder::repeat(MockVVecColumnNameVec grouping_set_columns)
+DAGRequestBuilder & DAGRequestBuilder::expand(MockVVecColumnNameVec grouping_set_columns)
 {
     assert(root);
     auto grouping_sets_ast =  mock::MockVVecGroupingNameVec();
@@ -381,7 +381,7 @@ DAGRequestBuilder & DAGRequestBuilder::repeat(MockVVecColumnNameVec grouping_set
         }
         grouping_sets_ast.emplace_back(std::move(grouping_set_ast));
     }
-    root = compileRepeat(root, getExecutorIndex(), grouping_sets_ast, grouping_col_collection);
+    root = compileExpand(root, getExecutorIndex(), grouping_sets_ast, grouping_col_collection);
     return *this;
 }
 
diff --git a/dbms/src/TestUtils/mockExecutor.h b/dbms/src/TestUtils/mockExecutor.h
index df9d162f691..da369c69ff7 100644
--- a/dbms/src/TestUtils/mockExecutor.h
+++ b/dbms/src/TestUtils/mockExecutor.h
@@ -147,8 +147,8 @@ class DAGRequestBuilder
     DAGRequestBuilder & sort(MockOrderByItem order_by, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count = 0);
     DAGRequestBuilder & sort(MockOrderByItemVec order_by_vec, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count = 0);
 
-    // repeat
-    DAGRequestBuilder & repeat(MockVVecColumnNameVec grouping_set_columns);
+    // expand
+    DAGRequestBuilder & expand(MockVVecColumnNameVec grouping_set_columns);
 
     void setCollation(Int32 collator_) { properties.collator = convertToTiDBCollation(collator_); }
     Int32 getCollation() const { return abs(properties.collator); }

From 62dc142c270a8f2d646f26b63f0968f22370c5f5 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Tue, 10 Jan 2023 15:28:12 +0800
Subject: [PATCH 12/31] rename file

Signed-off-by: AilinKid <3148019@qq.com>
---
 .../{gtest_repeat_executor.cpp => gtest_expand_executor.cpp}    | 2 +-
 .../tests/{gtest_block_repeat.cpp => gtest_block_expand.cpp}    | 0
 dbms/src/TestUtils/executorSerializer.cpp                       | 2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename dbms/src/Flash/tests/{gtest_repeat_executor.cpp => gtest_expand_executor.cpp} (99%)
 rename dbms/src/Interpreters/tests/{gtest_block_repeat.cpp => gtest_block_expand.cpp} (100%)

diff --git a/dbms/src/Flash/tests/gtest_repeat_executor.cpp b/dbms/src/Flash/tests/gtest_expand_executor.cpp
similarity index 99%
rename from dbms/src/Flash/tests/gtest_repeat_executor.cpp
rename to dbms/src/Flash/tests/gtest_expand_executor.cpp
index 1b5aaa6f04e..93b7974c852 100644
--- a/dbms/src/Flash/tests/gtest_repeat_executor.cpp
+++ b/dbms/src/Flash/tests/gtest_expand_executor.cpp
@@ -39,7 +39,7 @@ class ExpandExecutorTestRunner : public DB::tests::ExecutorTest
 TEST_F(ExpandExecutorTestRunner, ExpandLogical)
 try
 {
-    /// case 1block.getByName(grouping_col).column->isColumnNullable()
+    /// case 1
     auto request = context
                        .scan("test_db", "test_table")
                        .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
diff --git a/dbms/src/Interpreters/tests/gtest_block_repeat.cpp b/dbms/src/Interpreters/tests/gtest_block_expand.cpp
similarity index 100%
rename from dbms/src/Interpreters/tests/gtest_block_repeat.cpp
rename to dbms/src/Interpreters/tests/gtest_block_expand.cpp
diff --git a/dbms/src/TestUtils/executorSerializer.cpp b/dbms/src/TestUtils/executorSerializer.cpp
index 46f0d3b03a1..bffa9baf3cf 100644
--- a/dbms/src/TestUtils/executorSerializer.cpp
+++ b/dbms/src/TestUtils/executorSerializer.cpp
@@ -307,7 +307,7 @@ void ExecutorSerializer::serializeListStruct(const tipb::DAGRequest * dag_reques
             serializeLimit("Limit", executor.limit(), buf);
             break;
         case tipb::ExecType::TypeExpand:
-            serializeExpandSource("Repeat", executor.expand(), buf);
+            serializeExpandSource("Expand", executor.expand(), buf);
             break;
         default:
             throw TiFlashException("Should not reach here", Errors::Coprocessor::Internal);

From 9d4077143febd1b4ac04b528bb380fe37df841ed Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Tue, 10 Jan 2023 17:47:51 +0800
Subject: [PATCH 13/31] fix test under new rebased code

Signed-off-by: AilinKid <3148019@qq.com>
---
 dbms/src/Flash/tests/gtest_expand_executor.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/src/Flash/tests/gtest_expand_executor.cpp b/dbms/src/Flash/tests/gtest_expand_executor.cpp
index 93b7974c852..c2d430e2416 100644
--- a/dbms/src/Flash/tests/gtest_expand_executor.cpp
+++ b/dbms/src/Flash/tests/gtest_expand_executor.cpp
@@ -370,9 +370,9 @@ CreatingSets
       PartialSorting x 10: limit = 2
        Expression: <projection>
         Expression: <remove useless column after join>
-         HashJoinProbe: <join probe, join_executor_id = Join_5>
+         HashJoinProbe: <join probe, join_executor_id = Join_5, has_non_joined_data = false>
           Expression: <final projection>
-           Expand: <expand, expand_executor_id = expand_2>: grouping set [<{count(s1)_collator_46 }><{any(s2)_collator_46 }>]
+           Expand: <expand, expand_executor_id = expand_2>: grouping set [<{count(exchange_receiver_0)_collator_46 }><{any(exchange_receiver_1)_collator_46 }>]
             Expression: <expr after aggregation>
              SharedQuery: <restore concurrency>
               ParallelAggregating, max_threads: 10, final: true

From a34e95272f6faf3558afc48f2bff52aa36933a31 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Tue, 10 Jan 2023 18:51:33 +0800
Subject: [PATCH 14/31] address haisheng's comment

Signed-off-by: AilinKid <3148019@qq.com>
---
 .../DataStreams/ExpandBlockInputStream.cpp    | 40 ----------------
 dbms/src/DataStreams/ExpandBlockInputStream.h | 46 -------------------
 .../ExpressionBlockInputStream.cpp            |  7 +++
 .../DataStreams/ExpressionBlockInputStream.h  |  1 +
 .../Coprocessor/DAGQueryBlockInterpreter.cpp  |  4 +-
 dbms/src/Flash/Planner/PhysicalPlan.cpp       |  1 +
 .../Flash/Planner/Plans/PhysicalExpand.cpp    |  4 +-
 .../src/Flash/tests/gtest_expand_executor.cpp |  2 +-
 dbms/src/Interpreters/Expand.cpp              |  8 ++--
 9 files changed, 18 insertions(+), 95 deletions(-)
 delete mode 100644 dbms/src/DataStreams/ExpandBlockInputStream.cpp
 delete mode 100644 dbms/src/DataStreams/ExpandBlockInputStream.h

diff --git a/dbms/src/DataStreams/ExpandBlockInputStream.cpp b/dbms/src/DataStreams/ExpandBlockInputStream.cpp
deleted file mode 100644
index 2f502c3f708..00000000000
--- a/dbms/src/DataStreams/ExpandBlockInputStream.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright 2022 PingCAP, Ltd.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <DataStreams/ExpandBlockInputStream.h>
-
-namespace DB
-{
-Block ExpandBlockInputStream::readImpl()
-{
-    Block block = children.back()->read();
-    if (!block)
-        return block;
-    expand_actions->execute(block);
-    return block;
-}
-
-Block ExpandBlockInputStream::getHeader() const
-{
-    Block res = children.back()->getHeader();
-    expand_actions->execute(res);
-    return res;
-}
-
-void ExpandBlockInputStream::appendInfo(FmtBuffer & buffer) const {
-    buffer.fmtAppend(": grouping set ");
-    expand_actions.get()->getActions()[0].expand->getGroupingSetsDes(buffer);
-}
-
-} // namespace DB
diff --git a/dbms/src/DataStreams/ExpandBlockInputStream.h b/dbms/src/DataStreams/ExpandBlockInputStream.h
deleted file mode 100644
index e502a8c8e1f..00000000000
--- a/dbms/src/DataStreams/ExpandBlockInputStream.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2022 PingCAP, Ltd.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include <DataStreams/IProfilingBlockInputStream.h>
-#include <Interpreters/ExpressionAnalyzer.h>
-#include <Interpreters/ExpressionActions.h>
-
-namespace DB
-{
-class ExpandBlockInputStream : public IProfilingBlockInputStream
-{
-    static constexpr auto NAME = "Expand";
-
-public:
-    ExpandBlockInputStream(
-        const BlockInputStreamPtr & input,
-        ExpressionActionsPtr expand_actions_)
-        : expand_actions(expand_actions_)
-    {
-        children.push_back(input);
-    }
-    String getName() const override { return NAME; }
-    Block getHeader() const override;
-    void appendInfo(FmtBuffer & buffer) const override;
-
-protected:
-    Block readImpl() override;
-
-private:
-    ExpressionActionsPtr expand_actions;
-};
-
-} // namespace DB
-
diff --git a/dbms/src/DataStreams/ExpressionBlockInputStream.cpp b/dbms/src/DataStreams/ExpressionBlockInputStream.cpp
index b288155c142..285e8ab61d7 100644
--- a/dbms/src/DataStreams/ExpressionBlockInputStream.cpp
+++ b/dbms/src/DataStreams/ExpressionBlockInputStream.cpp
@@ -54,4 +54,11 @@ Block ExpressionBlockInputStream::readImpl()
     return res;
 }
 
+void ExpressionBlockInputStream::appendInfo(FmtBuffer & buffer) const {
+    if (!expression.get()->getActions().empty() && expression.get()->getActions()[0].expand != nullptr) {
+        buffer.fmtAppend(": grouping set ");
+        expression.get()->getActions()[0].expand->getGroupingSetsDes(buffer);   
+    }
+}
+
 } // namespace DB
diff --git a/dbms/src/DataStreams/ExpressionBlockInputStream.h b/dbms/src/DataStreams/ExpressionBlockInputStream.h
index 1d1e059d51d..73141446414 100644
--- a/dbms/src/DataStreams/ExpressionBlockInputStream.h
+++ b/dbms/src/DataStreams/ExpressionBlockInputStream.h
@@ -41,6 +41,7 @@ class ExpressionBlockInputStream : public IProfilingBlockInputStream
     String getName() const override { return NAME; }
     Block getTotals() override;
     Block getHeader() const override;
+    void appendInfo(FmtBuffer & buffer) const override;
 
 protected:
     Block readImpl() override;
diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
index 0013b4c5af1..9634dd38681 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
@@ -18,7 +18,7 @@
 #include <Core/NamesAndTypes.h>
 #include <DataStreams/AggregatingBlockInputStream.h>
 #include <DataStreams/ExchangeSenderBlockInputStream.h>
-#include <DataStreams/ExpandBlockInputStream.h>
+#include <DataStreams/ExpressionBlockInputStream.h>
 #include <DataStreams/FilterBlockInputStream.h>
 #include <DataStreams/HashJoinBuildBlockInputStream.h>
 #include <DataStreams/HashJoinProbeBlockInputStream.h>
@@ -749,7 +749,7 @@ void DAGQueryBlockInterpreter::executeLimit(DAGPipeline & pipeline)
 void DAGQueryBlockInterpreter::executeExpand(DAGPipeline & pipeline, const ExpressionActionsPtr & expr)
 {
     pipeline.transform([&](auto &stream) {
-        stream = std::make_shared<ExpandBlockInputStream>(stream, expr);
+        stream = std::make_shared<ExpressionBlockInputStream>(stream, expr, log->identifier());
     });
 }
 
diff --git a/dbms/src/Flash/Planner/PhysicalPlan.cpp b/dbms/src/Flash/Planner/PhysicalPlan.cpp
index 42729693433..12badaae469 100644
--- a/dbms/src/Flash/Planner/PhysicalPlan.cpp
+++ b/dbms/src/Flash/Planner/PhysicalPlan.cpp
@@ -199,6 +199,7 @@ void PhysicalPlan::build(const String & executor_id, const tipb::Executor * exec
     }
     case tipb::ExecType::TypeExpand:
     {
+        GET_METRIC(tiflash_coprocessor_executor_count, type_expand).Increment();
         pushBack(PhysicalExpand::build(context, executor_id, log, executor->expand(), popBack()));
         break;
     }
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
index 9a52c21b62f..4d459ba08c4 100644
--- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
+++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
@@ -15,7 +15,7 @@
 #include <Common/FailPoint.h>
 #include <Common/Logger.h>
 #include <Common/TiFlashException.h>
-#include <DataStreams/ExpandBlockInputStream.h>
+#include <DataStreams/ExpressionBlockInputStream.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <Flash/Coprocessor/DAGExpressionAnalyzer.h>
 #include <Flash/Coprocessor/DAGPipeline.h>
@@ -77,7 +77,7 @@ void PhysicalExpand::expandTransform(DAGPipeline & child_pipeline, Context & con
     expand_actions->add(ExpressionAction::expandSource(shared_expand));
     String expand_extra_info = fmt::format("expand, expand_executor_id = {}", execId());
     child_pipeline.transform([&](auto &stream) {
-        stream = std::make_shared<ExpandBlockInputStream>(stream, expand_actions);
+        stream = std::make_shared<ExpressionBlockInputStream>(stream, expand_actions, log->identifier());
         stream->setExtraInfo(expand_extra_info);
     });
 }
diff --git a/dbms/src/Flash/tests/gtest_expand_executor.cpp b/dbms/src/Flash/tests/gtest_expand_executor.cpp
index c2d430e2416..004c568560b 100644
--- a/dbms/src/Flash/tests/gtest_expand_executor.cpp
+++ b/dbms/src/Flash/tests/gtest_expand_executor.cpp
@@ -372,7 +372,7 @@ CreatingSets
         Expression: <remove useless column after join>
          HashJoinProbe: <join probe, join_executor_id = Join_5, has_non_joined_data = false>
           Expression: <final projection>
-           Expand: <expand, expand_executor_id = expand_2>: grouping set [<{count(exchange_receiver_0)_collator_46 }><{any(exchange_receiver_1)_collator_46 }>]
+           Expression: <expand, expand_executor_id = expand_2>: grouping set [{<count(exchange_receiver_0)_collator_46 >}{<any(exchange_receiver_1)_collator_46 >}]
             Expression: <expr after aggregation>
              SharedQuery: <restore concurrency>
               ParallelAggregating, max_threads: 10, final: true
diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp
index 09bbdd5d662..ee1753bf247 100644
--- a/dbms/src/Interpreters/Expand.cpp
+++ b/dbms/src/Interpreters/Expand.cpp
@@ -41,10 +41,10 @@ void Expand::getGroupingSetsDes(FmtBuffer & buffer) const
     buffer.fmtAppend("[");
     for (const auto & grouping_set: group_sets_names)
     {
-        buffer.fmtAppend("<");
+        buffer.fmtAppend("{{");
         for (const auto  & grouping_exprs: grouping_set)
         {
-            buffer.fmtAppend("{{");
+            buffer.fmtAppend("<");
             for ( size_t i = 0; i < grouping_exprs.size(); i++)
             {
                 if (i != 0) {
@@ -52,9 +52,9 @@ void Expand::getGroupingSetsDes(FmtBuffer & buffer) const
                 }
                 buffer.fmtAppend(grouping_exprs.at(i));
             }
-            buffer.fmtAppend("}}");
+            buffer.fmtAppend(">");
         }
-        buffer.fmtAppend(">");
+        buffer.fmtAppend("}}");
     }
     buffer.fmtAppend("]");
 }

From db7b1ffb87c6c986ba480afbe056c5c7bdd45880 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Tue, 10 Jan 2023 23:22:12 +0800
Subject: [PATCH 15/31] clang fmt

Signed-off-by: AilinKid <3148019@qq.com>
---
 .../ExpressionBlockInputStream.cpp            |  8 +-
 dbms/src/Debug/MockExecutor/ExpandBinder.cpp  |  8 +-
 .../Coprocessor/DAGExpressionAnalyzer.cpp     | 22 +++--
 .../Coprocessor/DAGQueryBlockInterpreter.cpp  |  9 +-
 .../Coprocessor/DAGQueryBlockInterpreter.h    |  5 +-
 .../Coprocessor/collectOutputFieldTypes.cpp   | 13 +--
 .../Flash/Planner/Plans/PhysicalExpand.cpp    |  9 +-
 dbms/src/Flash/Planner/Plans/PhysicalExpand.h |  8 +-
 .../src/Flash/Statistics/CommonExecutorImpl.h |  4 +-
 .../src/Flash/tests/gtest_expand_executor.cpp | 89 +++++++++++++++----
 dbms/src/Interpreters/Expand.cpp              | 66 +++++++-------
 dbms/src/Interpreters/Expand.h                |  7 +-
 dbms/src/Interpreters/ExpressionActions.cpp   |  5 +-
 .../Interpreters/tests/gtest_block_expand.cpp | 39 +++++---
 dbms/src/TestUtils/ExecutorTestUtils.cpp      |  2 +-
 dbms/src/TestUtils/executorSerializer.cpp     |  5 +-
 dbms/src/TestUtils/mockExecutor.cpp           | 12 +--
 dbms/src/TestUtils/mockExecutor.h             |  2 +-
 18 files changed, 203 insertions(+), 110 deletions(-)

diff --git a/dbms/src/DataStreams/ExpressionBlockInputStream.cpp b/dbms/src/DataStreams/ExpressionBlockInputStream.cpp
index 285e8ab61d7..27daa61152a 100644
--- a/dbms/src/DataStreams/ExpressionBlockInputStream.cpp
+++ b/dbms/src/DataStreams/ExpressionBlockInputStream.cpp
@@ -54,10 +54,12 @@ Block ExpressionBlockInputStream::readImpl()
     return res;
 }
 
-void ExpressionBlockInputStream::appendInfo(FmtBuffer & buffer) const {
-    if (!expression.get()->getActions().empty() && expression.get()->getActions()[0].expand != nullptr) {
+void ExpressionBlockInputStream::appendInfo(FmtBuffer & buffer) const
+{
+    if (!expression.get()->getActions().empty() && expression.get()->getActions()[0].expand != nullptr)
+    {
         buffer.fmtAppend(": grouping set ");
-        expression.get()->getActions()[0].expand->getGroupingSetsDes(buffer);   
+        expression.get()->getActions()[0].expand->getGroupingSetsDes(buffer);
     }
 }
 
diff --git a/dbms/src/Debug/MockExecutor/ExpandBinder.cpp b/dbms/src/Debug/MockExecutor/ExpandBinder.cpp
index 63fbfa28582..edc124104c4 100644
--- a/dbms/src/Debug/MockExecutor/ExpandBinder.cpp
+++ b/dbms/src/Debug/MockExecutor/ExpandBinder.cpp
@@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <Debug/MockExecutor/ExpandBinder.h>
 #include <Debug/MockExecutor/AstToPBUtils.h>
+#include <Debug/MockExecutor/ExpandBinder.h>
 
 namespace DB::mock
 {
@@ -31,8 +31,8 @@ bool ExpandBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collat
             auto * ges = gss->add_grouping_exprs();
             for (const auto & grouping_col : grouping_exprs)
             {
-                tipb::Expr* add_column = ges->add_grouping_expr();
-                astToPB(children[0]->output_schema, grouping_col, add_column, collator_id, context);    // ast column ref change to tipb:Expr column ref
+                tipb::Expr * add_column = ges->add_grouping_expr();
+                astToPB(children[0]->output_schema, grouping_col, add_column, collator_id, context); // ast column ref change to tipb:Expr column ref
             }
         }
     }
@@ -56,7 +56,7 @@ ExecutorBinderPtr compileExpand(ExecutorBinderPtr input, size_t & executor_index
         field_type.set_tp(TiDB::TypeLongLong);
         field_type.set_charset("binary");
         field_type.set_collate(TiDB::ITiDBCollator::BINARY);
-        field_type.set_flag(TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull);  // should have NOT NULL FLAG
+        field_type.set_flag(TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull); // should have NOT NULL FLAG
         field_type.set_flen(-1);
         field_type.set_decimal(-1);
         output_schema.push_back(std::make_pair("groupingID", TiDB::fieldTypeToColumnInfo(field_type)));
diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
index aa2c7014a5f..816b0e324dd 100644
--- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
@@ -806,19 +806,24 @@ NamesAndTypes DAGExpressionAnalyzer::buildOrderColumns(
 }
 
 std::shared_ptr<Expand> DAGExpressionAnalyzer::buildExpandGroupingColumns(
-    const tipb::Expand & expand, const ExpressionActionsPtr & actions)
+    const tipb::Expand & expand,
+    const ExpressionActionsPtr & actions)
 {
     GroupingSets group_sets_columns;
     std::map<String, bool> map_grouping_col;
     group_sets_columns.reserve(expand.grouping_sets().size());
-    for (const auto& group_set : expand.grouping_sets()){
+    for (const auto & group_set : expand.grouping_sets())
+    {
         GroupingSet group_set_columns;
         group_set_columns.reserve(group_set.grouping_exprs().size());
-        for (const auto &group_exprs : group_set.grouping_exprs()) {
+        for (const auto & group_exprs : group_set.grouping_exprs())
+        {
             GroupingColumnNames group_exprs_columns;
             group_exprs_columns.reserve(group_exprs.grouping_expr().size());
-            for (const auto& group_expr : group_exprs.grouping_expr()){
-                if (group_expr.tp() != tipb::ColumnRef){
+            for (const auto & group_expr : group_exprs.grouping_expr())
+            {
+                if (group_expr.tp() != tipb::ColumnRef)
+                {
                     throw TiFlashException("grouping sets expression should be column expr", Errors::Coprocessor::BadRequest);
                 }
                 String cp_name = getActions(group_expr, actions);
@@ -832,7 +837,7 @@ std::shared_ptr<Expand> DAGExpressionAnalyzer::buildExpandGroupingColumns(
         group_sets_columns.emplace_back(std::move(group_set_columns));
     }
     // change the original source column to be nullable, and add a new column for groupingID.
-    for (auto & mutable_one: source_columns)
+    for (auto & mutable_one : source_columns)
     {
         if (map_grouping_col[mutable_one.name])
             mutable_one.type = makeNullable(mutable_one.type);
@@ -843,10 +848,11 @@ std::shared_ptr<Expand> DAGExpressionAnalyzer::buildExpandGroupingColumns(
 }
 
 ExpressionActionsPtr DAGExpressionAnalyzer::appendExpand(
-        const tipb::Expand & expand, ExpressionActionsChain & chain)
+    const tipb::Expand & expand,
+    ExpressionActionsChain & chain)
 {
     auto & last_step = initAndGetLastStep(chain);
-    for (const auto &origin_col : last_step.actions->getSampleBlock().getNamesAndTypesList())
+    for (const auto & origin_col : last_step.actions->getSampleBlock().getNamesAndTypesList())
     {
         last_step.required_output.push_back(origin_col.name);
     }
diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
index 9634dd38681..887ca17c38c 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
@@ -140,7 +140,8 @@ AnalysisResult analyzeExpressions(
         chain.addStep();
     }
 
-    if (query_block.expand) {
+    if (query_block.expand)
+    {
         res.before_expand = analyzer.appendExpand(query_block.expand->expand(), chain);
     }
 
@@ -273,10 +274,10 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline &
         = tiflash_join.genJoinOtherConditionAction(context, left_input_header, right_input_header, probe_side_prepare_actions);
 
     const Settings & settings = context.getSettingsRef();
-    size_t max_block_size_for_cross_join = settings.max_block_size; 
+    size_t max_block_size_for_cross_join = settings.max_block_size;
     fiu_do_on(FailPoints::minimum_block_size_for_cross_join, { max_block_size_for_cross_join = 1; });
 
-    JoinPtr join_ptr = std::make_shared<Join>(   // make join
+    JoinPtr join_ptr = std::make_shared<Join>( // make join
         probe_key_names,
         build_key_names,
         tiflash_join.kind,
@@ -748,7 +749,7 @@ void DAGQueryBlockInterpreter::executeLimit(DAGPipeline & pipeline)
 
 void DAGQueryBlockInterpreter::executeExpand(DAGPipeline & pipeline, const ExpressionActionsPtr & expr)
 {
-    pipeline.transform([&](auto &stream) {
+    pipeline.transform([&](auto & stream) {
         stream = std::make_shared<ExpressionBlockInputStream>(stream, expr, log->identifier());
     });
 }
diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h
index 48edf039ff5..c3cd27beacf 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h
@@ -92,7 +92,10 @@ class DAGQueryBlockInterpreter
 
     void restorePipelineConcurrency(DAGPipeline & pipeline);
 
-    DAGContext & dagContext() const { return *context.getDAGContext(); }
+    DAGContext & dagContext() const
+    {
+        return *context.getDAGContext();
+    }
 
     Context & context;
     std::vector<BlockInputStreams> input_streams_vec;
diff --git a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp
index 3b5c94a81d8..94c1b6c573a 100644
--- a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp
+++ b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp
@@ -100,18 +100,21 @@ bool collectForTableScan(std::vector<tipb::FieldType> & output_field_types, cons
     return false;
 }
 
-bool collectForExpand(std::vector<tipb::FieldType> &out_field_types, const tipb::Executor & executor)
+bool collectForExpand(std::vector<tipb::FieldType> & out_field_types, const tipb::Executor & executor)
 {
-    auto &out_child_fields = out_field_types;
+    auto & out_child_fields = out_field_types;
     // collect output_field_types of children
     getChildren(executor).forEach([&out_child_fields](const tipb::Executor & child) {
         traverseExecutorTree(child, [&out_child_fields](const tipb::Executor & e) { return collectForExecutor(out_child_fields, e); });
     });
 
     // make the columns from grouping sets nullable.
-    for (const auto & grouping_set : executor.expand().grouping_sets()){
-        for (const auto & grouping_exprs : grouping_set.grouping_exprs()){
-            for (const auto & grouping_col : grouping_exprs.grouping_expr()){
+    for (const auto & grouping_set : executor.expand().grouping_sets())
+    {
+        for (const auto & grouping_exprs : grouping_set.grouping_exprs())
+        {
+            for (const auto & grouping_col : grouping_exprs.grouping_expr())
+            {
                 // assert that: grouping_col must be the column ref guaranteed by tidb.
                 auto column_index = decodeDAGInt64(grouping_col.val());
                 if (column_index < 0 || column_index >= static_cast<Int64>(out_child_fields.size()))
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
index 4d459ba08c4..b43d676b23f 100644
--- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
+++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
@@ -55,7 +55,7 @@ PhysicalPlanNodePtr PhysicalExpand::build(
     auto child_header = child->getSchema();
     for (const auto & one : child_header)
     {
-        expand_output_columns.emplace_back(one.name, shared_expand->isInGroupSetColumn(one.name)? makeNullable(one.type): one.type);
+        expand_output_columns.emplace_back(one.name, shared_expand->isInGroupSetColumn(one.name) ? makeNullable(one.type) : one.type);
     }
     expand_output_columns.emplace_back(shared_expand->grouping_identifier_column_name, shared_expand->grouping_identifier_column_type);
 
@@ -76,7 +76,7 @@ void PhysicalExpand::expandTransform(DAGPipeline & child_pipeline, Context & con
     auto expand_actions = PhysicalPlanHelper::newActions(child_pipeline.firstStream()->getHeader(), context);
     expand_actions->add(ExpressionAction::expandSource(shared_expand));
     String expand_extra_info = fmt::format("expand, expand_executor_id = {}", execId());
-    child_pipeline.transform([&](auto &stream) {
+    child_pipeline.transform([&](auto & stream) {
         stream = std::make_shared<ExpressionBlockInputStream>(stream, expand_actions, log->identifier());
         stream->setExtraInfo(expand_extra_info);
     });
@@ -92,7 +92,7 @@ void PhysicalExpand::finalize(const Names & parent_require)
 {
     FinalizeHelper::checkSchemaContainsParentRequire(schema, parent_require);
     Names required_output;
-    required_output.reserve( shared_expand->getGroupSetNum());    // grouping set column should be existed in the child output schema.
+    required_output.reserve(shared_expand->getGroupSetNum()); // grouping set column should be existed in the child output schema.
     auto name_set = std::set<String>();
     shared_expand->getAllGroupSetColumnNames(name_set);
     // append parent_require column it may expect self-filled groupingID.
@@ -103,7 +103,8 @@ void PhysicalExpand::finalize(const Names & parent_require)
             name_set.insert(one);
         }
     }
-    for (const auto & grouping_name: name_set) {
+    for (const auto & grouping_name : name_set)
+    {
         required_output.emplace_back(grouping_name);
     }
     child->finalize(required_output);
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h
index 6c798ad35c3..14b910d8a75 100644
--- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h
+++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h
@@ -39,7 +39,9 @@ class PhysicalExpand : public PhysicalUnary
         const std::shared_ptr<Expand> & shared_expand,
         const Block & sample_block_)
         : PhysicalUnary(executor_id_, PlanType::Expand, schema_, req_id, child_)
-        , shared_expand(shared_expand), sample_block(sample_block_){}
+        , shared_expand(shared_expand)
+        , sample_block(sample_block_)
+    {}
 
     void finalize(const Names & parent_require) override;
 
@@ -52,6 +54,4 @@ class PhysicalExpand : public PhysicalUnary
     std::shared_ptr<Expand> shared_expand;
     Block sample_block;
 };
-}  // namespace DB
-
-
+} // namespace DB
diff --git a/dbms/src/Flash/Statistics/CommonExecutorImpl.h b/dbms/src/Flash/Statistics/CommonExecutorImpl.h
index 56d55ea415f..42afeab9971 100644
--- a/dbms/src/Flash/Statistics/CommonExecutorImpl.h
+++ b/dbms/src/Flash/Statistics/CommonExecutorImpl.h
@@ -62,9 +62,9 @@ struct ExpandImpl
 {
     static constexpr bool has_extra_info = false;
 
-    static constexpr  auto type = "Expand";
+    static constexpr auto type = "Expand";
 
-    static bool isMatch(const tipb::Executor *executor)
+    static bool isMatch(const tipb::Executor * executor)
     {
         return executor->has_expand();
     }
diff --git a/dbms/src/Flash/tests/gtest_expand_executor.cpp b/dbms/src/Flash/tests/gtest_expand_executor.cpp
index 004c568560b..e27a6fe0b9f 100644
--- a/dbms/src/Flash/tests/gtest_expand_executor.cpp
+++ b/dbms/src/Flash/tests/gtest_expand_executor.cpp
@@ -42,7 +42,14 @@ try
     /// case 1
     auto request = context
                        .scan("test_db", "test_table")
-                       .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                       .expand(MockVVecColumnNameVec{
+                           MockVecColumnNameVec{
+                               MockColumnNameVec{"s1"},
+                           },
+                           MockVecColumnNameVec{
+                               MockColumnNameVec{"s2"},
+                           },
+                       })
                        .build(context);
     /// data flow:
     ///
@@ -64,13 +71,20 @@ try
         request,
         {toNullableVec<String>({"banana", {}, {}, {}, "banana", {}}),
          toNullableVec<String>({{}, "apple", {}, {}, {}, "banana"}),
-         toVec<UInt64>({1,2,1,2,1,2})});
+         toVec<UInt64>({1, 2, 1, 2, 1, 2})});
 
     /// case 2
     request = context
                   .scan("test_db", "test_table")
                   .filter(eq(col("s1"), col("s2")))
-                  .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .expand(MockVVecColumnNameVec{
+                      MockVecColumnNameVec{
+                          MockColumnNameVec{"s1"},
+                      },
+                      MockVecColumnNameVec{
+                          MockColumnNameVec{"s2"},
+                      },
+                  })
                   .build(context);
     /// data flow:
     ///
@@ -92,7 +106,7 @@ try
         request,
         {toNullableVec<String>({"banana", {}}),
          toNullableVec<String>({{}, "banana"}),
-         toVec<UInt64>({1,2})});
+         toVec<UInt64>({1, 2})});
 
     /// case 3: this case is only for non-planner mode.
     /// request = context
@@ -113,8 +127,15 @@ try
     auto const_false = lit(Field(static_cast<UInt64>(0)));
     request = context
                   .scan("test_db", "test_table")
-                  .filter(const_false)                      // refuse all rows
-                  .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"s1"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .filter(const_false) // refuse all rows
+                  .expand(MockVVecColumnNameVec{
+                      MockVecColumnNameVec{
+                          MockColumnNameVec{"s1"},
+                      },
+                      MockVecColumnNameVec{
+                          MockColumnNameVec{"s2"},
+                      },
+                  })
                   .build(context);
     executeAndAssertColumnsEqual(
         request,
@@ -127,13 +148,22 @@ try
                   .build(context);
     executeAndAssertColumnsEqual(
         request,
-        {toVec<UInt64>({1, 0, 1}),
-            toNullableVec<String>({"apple", {}, "banana"}),});
+        {
+            toVec<UInt64>({1, 0, 1}),
+            toNullableVec<String>({"apple", {}, "banana"}),
+        });
 
     request = context
                   .scan("test_db", "test_table")
                   .aggregation({Count(col("s1"))}, {col("s2")})
-                  .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .expand(MockVVecColumnNameVec{
+                      MockVecColumnNameVec{
+                          MockColumnNameVec{"count(s1)"},
+                      },
+                      MockVecColumnNameVec{
+                          MockColumnNameVec{"s2"},
+                      },
+                  })
                   .build(context);
     /// data flow:
     ///
@@ -159,20 +189,27 @@ try
     ///
     executeAndAssertColumnsEqual(
         request,
-        {toNullableVec<UInt64>({1, {}, 0, {}, 1,{}}),
-         toNullableVec<String>({{}, "apple", {},{},{}, "banana"}),
-         toVec<UInt64>({1,2,1,2,1,2})});
+        {toNullableVec<UInt64>({1, {}, 0, {}, 1, {}}),
+         toNullableVec<String>({{}, "apple", {}, {}, {}, "banana"}),
+         toVec<UInt64>({1, 2, 1, 2, 1, 2})});
 
     /// case 5   (test integrated with aggregation and projection)
     request = context
                   .scan("test_db", "test_table")
                   .aggregation({Count(col("s1"))}, {col("s2")})
-                  .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .expand(MockVVecColumnNameVec{
+                      MockVecColumnNameVec{
+                          MockColumnNameVec{"count(s1)"},
+                      },
+                      MockVecColumnNameVec{
+                          MockColumnNameVec{"s2"},
+                      },
+                  })
                   .project({"count(s1)"})
                   .build(context);
     executeAndAssertColumnsEqual(
         request,
-        {toNullableVec<UInt64>({1, {}, 0, {}, 1,{}})});
+        {toNullableVec<UInt64>({1, {}, 0, {}, 1, {}})});
 
     /// case 6   (test integrated with aggregation and projection and limit) 1
     /// note: by now, limit is executed before expand does to reduce unnecessary row expand work.
@@ -227,7 +264,14 @@ try
     request = context
                   .scan("test_db", "test_table")
                   .aggregation({Count(col("s1"))}, {col("s2")})
-                  .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .expand(MockVVecColumnNameVec{
+                      MockVecColumnNameVec{
+                          MockColumnNameVec{"count(s1)"},
+                      },
+                      MockVecColumnNameVec{
+                          MockColumnNameVec{"s2"},
+                      },
+                  })
                   .project({"count(s1)"})
                   .topN({{"count(s1)", true}}, 2)
                   .build(context);
@@ -300,7 +344,14 @@ try
     request = context
                   .receive("exchange1")
                   .aggregation({Count(col("s1"))}, {col("s2")})
-                  .expand(MockVVecColumnNameVec{MockVecColumnNameVec{MockColumnNameVec{"count(s1)"},}, MockVecColumnNameVec{MockColumnNameVec{"s2"},},})
+                  .expand(MockVVecColumnNameVec{
+                      MockVecColumnNameVec{
+                          MockColumnNameVec{"count(s1)"},
+                      },
+                      MockVecColumnNameVec{
+                          MockColumnNameVec{"s2"},
+                      },
+                  })
                   .join(context.scan("test_db", "test_table").project({"s2"}), tipb::JoinType::TypeInnerJoin, {col("s2")})
                   .project({"count(s1)", "groupingID"})
                   .topN({{"groupingID", true}}, 2)
@@ -349,8 +400,10 @@ try
     ///
     executeAndAssertColumnsEqual(
         request,
-        {toNullableVec<UInt64>({{}, {}}),
-            toVec<UInt64>({2,2}),});
+        {
+            toNullableVec<UInt64>({{}, {}}),
+            toVec<UInt64>({2, 2}),
+        });
 
 
     /// assert the input stream plan format. (under planner-enabled mode)
diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp
index ee1753bf247..6280a05763d 100644
--- a/dbms/src/Interpreters/Expand.cpp
+++ b/dbms/src/Interpreters/Expand.cpp
@@ -23,7 +23,7 @@
 namespace DB
 {
 
-namespace   /// anonymous namespace for storing private function utils.
+namespace /// anonymous namespace for storing private function utils.
 {
 void convertColumnToNullable(ColumnWithTypeAndName & column)
 {
@@ -31,23 +31,25 @@ void convertColumnToNullable(ColumnWithTypeAndName & column)
     if (column.column)
         column.column = makeNullable(column.column);
 }
-}
+} // namespace
 
 Expand::Expand(const DB::GroupingSets & gss)
-    : group_sets_names(gss){}
+    : group_sets_names(gss)
+{}
 
 void Expand::getGroupingSetsDes(FmtBuffer & buffer) const
 {
     buffer.fmtAppend("[");
-    for (const auto & grouping_set: group_sets_names)
+    for (const auto & grouping_set : group_sets_names)
     {
         buffer.fmtAppend("{{");
-        for (const auto  & grouping_exprs: grouping_set)
+        for (const auto & grouping_exprs : grouping_set)
         {
             buffer.fmtAppend("<");
-            for ( size_t i = 0; i < grouping_exprs.size(); i++)
+            for (size_t i = 0; i < grouping_exprs.size(); i++)
             {
-                if (i != 0) {
+                if (i != 0)
+                {
                     buffer.fmtAppend(",");
                 }
                 buffer.fmtAppend(grouping_exprs.at(i));
@@ -111,7 +113,7 @@ void Expand::replicateAndFillNull(Block & block) const
         {
             // start from 1.
             Field grouping_id = j + 1;
-            added_grouping_id_column[0]->insert(grouping_id); 
+            added_grouping_id_column[0]->insert(grouping_id);
         }
     }
     // todo: for some column overlapping in different grouping set, we should copy the overlapped column as a new column
@@ -185,16 +187,18 @@ void Expand::replicateAndFillNull(Block & block) const
                 // eg: for case above, for grouping_offset of <a> = 0, we only set the every offset = 0 in each
                 // small replicate_group_x to null.
                 //
-                 for (UInt64 j = 0; j < replicate_times_for_one_row; j++){
-                     if (j == grouping_offset) {
-                         // only keep this column value for targeted replica.
-                         continue;
-                     }
-                     // set this column as null for all the other targeted replica.
-                     // todo: since nullable column always be prior to computation of null value first, should we clean the old data at the same pos in nested column
-                     auto computed_offset = i * replicate_times_for_one_row + j;
-                     cloned_one->getNullMapData().data()[computed_offset] = 1;
-                 }
+                for (UInt64 j = 0; j < replicate_times_for_one_row; j++)
+                {
+                    if (j == grouping_offset)
+                    {
+                        // only keep this column value for targeted replica.
+                        continue;
+                    }
+                    // set this column as null for all the other targeted replica.
+                    // todo: since nullable column always be prior to computation of null value first, should we clean the old data at the same pos in nested column
+                    auto computed_offset = i * replicate_times_for_one_row + j;
+                    cloned_one->getNullMapData().data()[computed_offset] = 1;
+                }
             }
             block.getByName(grouping_col).column = std::move(cloned_one);
         }
@@ -204,16 +208,18 @@ void Expand::replicateAndFillNull(Block & block) const
     // return input from block.
 }
 
-bool Expand::isInGroupSetColumn(String name) const{
-    for(const auto& it1 : group_sets_names)
+bool Expand::isInGroupSetColumn(String name) const
+{
+    for (const auto & it1 : group_sets_names)
     {
         // for every grouping set.
-        for(const auto& it2 : it1)
+        for (const auto & it2 : it1)
         {
             // for every grouping exprs
-            for(const auto& it3 : it2)
+            for (const auto & it3 : it2)
             {
-                if (it3 == name){
+                if (it3 == name)
+                {
                     return true;
                 }
             }
@@ -222,21 +228,21 @@ bool Expand::isInGroupSetColumn(String name) const{
     return false;
 }
 
-const GroupingColumnNames& Expand::getGroupSetColumnNamesByOffset(size_t offset) const
+const GroupingColumnNames & Expand::getGroupSetColumnNamesByOffset(size_t offset) const
 {
     /// currently, there only can be one groupingExprs in one groupingSet before the planner supporting the grouping set merge.
     return group_sets_names[offset][0];
 }
 
-void Expand::getAllGroupSetColumnNames(std::set<String>& name_set) const
+void Expand::getAllGroupSetColumnNames(std::set<String> & name_set) const
 {
-    for(const auto& it1 : group_sets_names)
+    for (const auto & it1 : group_sets_names)
     {
         // for every grouping set.
-        for(const auto& it2 : it1)
+        for (const auto & it2 : it1)
         {
             // for every grouping exprs
-            for(const auto& it3 : it2)
+            for (const auto & it3 : it2)
             {
                 name_set.insert(it3);
             }
@@ -246,9 +252,9 @@ void Expand::getAllGroupSetColumnNames(std::set<String>& name_set) const
 
 std::shared_ptr<Expand> Expand::sharedExpand(const GroupingSets & groupingSets)
 {
-   return std::make_shared<Expand>(groupingSets);
+    return std::make_shared<Expand>(groupingSets);
 }
 
 const std::string Expand::grouping_identifier_column_name = "groupingID";
 const DataTypePtr Expand::grouping_identifier_column_type = std::make_shared<DataTypeUInt64>();
-}
+} // namespace DB
diff --git a/dbms/src/Interpreters/Expand.h b/dbms/src/Interpreters/Expand.h
index 33f9f94f024..e2128f476fd 100644
--- a/dbms/src/Interpreters/Expand.h
+++ b/dbms/src/Interpreters/Expand.h
@@ -51,7 +51,6 @@ using GroupingSet = std::vector<GroupingColumnNames>;
 using GroupingSets = std::vector<GroupingSet>;
 
 
-
 /** Data structure for implementation of Expand.
   *
   * Expand is a kind of operator used for replicate low-layer datasource rows to feed different aggregate
@@ -119,13 +118,13 @@ class Expand
     // to illustrate what group this row is targeted for.
     void replicateAndFillNull(Block & block) const;
 
-    size_t getGroupSetNum() const {return group_sets_names.size();}
+    size_t getGroupSetNum() const { return group_sets_names.size(); }
 
     bool isInGroupSetColumn(String name) const;
 
-    const GroupingColumnNames& getGroupSetColumnNamesByOffset(size_t offset) const;
+    const GroupingColumnNames & getGroupSetColumnNamesByOffset(size_t offset) const;
 
-    void getAllGroupSetColumnNames(std::set<String>& name_set) const;
+    void getAllGroupSetColumnNames(std::set<String> & name_set) const;
 
     static std::shared_ptr<Expand> sharedExpand(const GroupingSets & groupingSets);
 
diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp
index ff7cec2b382..f5bc85d4903 100644
--- a/dbms/src/Interpreters/ExpressionActions.cpp
+++ b/dbms/src/Interpreters/ExpressionActions.cpp
@@ -144,7 +144,7 @@ ExpressionAction ExpressionAction::expandSource(std::shared_ptr<const Expand> ex
 }
 
 
-void ExpressionAction::prepare(Block & sample_block)  
+void ExpressionAction::prepare(Block & sample_block)
 {
     /** Constant expressions should be evaluated, and put the result in sample_block.
       */
@@ -242,7 +242,8 @@ void ExpressionAction::prepare(Block & sample_block)
         auto name_set = std::set<String>();
         expand->getAllGroupSetColumnNames(name_set);
         // make grouping set column to be nullable.
-        for (const auto & col_name: name_set) {
+        for (const auto & col_name : name_set)
+        {
             auto & column_with_name = sample_block.getByName(col_name);
             column_with_name.type = makeNullable(column_with_name.type);
             if (column_with_name.column != nullptr)
diff --git a/dbms/src/Interpreters/tests/gtest_block_expand.cpp b/dbms/src/Interpreters/tests/gtest_block_expand.cpp
index 1a34e0fde30..97ef466c245 100644
--- a/dbms/src/Interpreters/tests/gtest_block_expand.cpp
+++ b/dbms/src/Interpreters/tests/gtest_block_expand.cpp
@@ -98,9 +98,12 @@ try
         const auto * col_1_nest = &static_cast<const ColumnString &>(col_1->getNestedColumn());
         for (int i = 0; i < int(expand_rows); ++i)
         {
-            if (res1[i] == "null") {
+            if (res1[i] == "null")
+            {
                 ASSERT_EQ(col_1->isNullAt(i), true);
-            } else {
+            }
+            else
+            {
                 ASSERT_EQ(col_1_nest->getDataAt(i), res1[i]);
             }
         }
@@ -110,14 +113,17 @@ try
         const auto * col_2_nest = &static_cast<const ColumnString &>(col_2->getNestedColumn());
         for (int i = 0; i < int(expand_rows); ++i)
         {
-            if (res2[i] == "null") {
+            if (res2[i] == "null")
+            {
                 ASSERT_EQ(col_2->isNullAt(i), true);
-            } else {
+            }
+            else
+            {
                 ASSERT_EQ(col_2_nest->getDataAt(i), res2[i]);
             }
         }
 
-        const auto res3 = ColumnWithUInt64{1, 1,1,1, 0,0};
+        const auto res3 = ColumnWithUInt64{1, 1, 1, 1, 0, 0};
         const auto * col_3 = typeid_cast<const ColumnUInt64 *>(block.getColumns()[3].get());
         for (int i = 0; i < int(expand_rows); ++i)
         {
@@ -192,9 +198,12 @@ try
         const auto * col_1_nest = &static_cast<const ColumnString &>(col_1->getNestedColumn());
         for (int i = 0; i < int(expand_rows); ++i)
         {
-            if (res1[i] == "null") {
+            if (res1[i] == "null")
+            {
                 ASSERT_EQ(col_1->isNullAt(i), true);
-            } else {
+            }
+            else
+            {
                 ASSERT_EQ(col_1_nest->getDataAt(i), res1[i]);
             }
         }
@@ -204,9 +213,12 @@ try
         const auto * col_2_nest = &static_cast<const ColumnString &>(col_2->getNestedColumn());
         for (int i = 0; i < int(expand_rows); ++i)
         {
-            if (res2[i] == "null") {
+            if (res2[i] == "null")
+            {
                 ASSERT_EQ(col_2->isNullAt(i), true);
-            } else {
+            }
+            else
+            {
                 ASSERT_EQ(col_2_nest->getDataAt(i), res2[i]);
             }
         }
@@ -217,9 +229,12 @@ try
         const auto * col_3_nest = &typeid_cast<const ColumnUInt64 &>(col_3->getNestedColumn());
         for (int i = 0; i < int(expand_rows); ++i)
         {
-            if (res3[i] == UInt64(-1)) {
+            if (res3[i] == UInt64(-1))
+            {
                 ASSERT_EQ(col_3->isNullAt(i), true);
-            } else {
+            }
+            else
+            {
                 ASSERT_EQ(col_3_nest->getElement(i), res3[i]);
             }
         }
@@ -236,7 +251,7 @@ try
         const ColumnsWithTypeAndName
             ori_col
             = {
-                toVec<Int64>(col_name[0], ColumnWithInt64{}),  // without data.
+                toVec<Int64>(col_name[0], ColumnWithInt64{}), // without data.
                 toVec<String>(col_name[1], ColumnWithString{}),
                 toVec<String>(col_name[2], ColumnWithString{}),
                 toVec<UInt64>(col_name[3], ColumnWithUInt64{}),
diff --git a/dbms/src/TestUtils/ExecutorTestUtils.cpp b/dbms/src/TestUtils/ExecutorTestUtils.cpp
index 403651569c2..505f5b2a284 100644
--- a/dbms/src/TestUtils/ExecutorTestUtils.cpp
+++ b/dbms/src/TestUtils/ExecutorTestUtils.cpp
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include <AggregateFunctions/registerAggregateFunctions.h>
+#include <Columns/ColumnNullable.h>
 #include <Common/FmtUtils.h>
 #include <Debug/MockComputeServerManager.h>
 #include <Debug/MockStorage.h>
@@ -21,7 +22,6 @@
 #include <Flash/executeQuery.h>
 #include <TestUtils/ExecutorTestUtils.h>
 #include <TestUtils/executorSerializer.h>
-#include <Columns/ColumnNullable.h>
 
 #include <functional>
 
diff --git a/dbms/src/TestUtils/executorSerializer.cpp b/dbms/src/TestUtils/executorSerializer.cpp
index bffa9baf3cf..04b1df80abb 100644
--- a/dbms/src/TestUtils/executorSerializer.cpp
+++ b/dbms/src/TestUtils/executorSerializer.cpp
@@ -171,10 +171,11 @@ void serializeExpandSource(const String & executor_id, const tipb::Expand & expa
             buf.fmtAppend("{{");
             for (auto i = 0; i < grouping_exprs.grouping_expr().size(); i++)
             {
-                if (i != 0) {
+                if (i != 0)
+                {
                     buf.fmtAppend(",");
                 }
-                auto expr =  grouping_exprs.grouping_expr().Get(i);
+                auto expr = grouping_exprs.grouping_expr().Get(i);
                 serializeExpression(expr, buf);
             }
             buf.fmtAppend("}}");
diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp
index bcb4ec63a19..d1be7e1c17f 100644
--- a/dbms/src/TestUtils/mockExecutor.cpp
+++ b/dbms/src/TestUtils/mockExecutor.cpp
@@ -365,15 +365,17 @@ DAGRequestBuilder & DAGRequestBuilder::sort(MockOrderByItemVec order_by_vec, boo
 DAGRequestBuilder & DAGRequestBuilder::expand(MockVVecColumnNameVec grouping_set_columns)
 {
     assert(root);
-    auto grouping_sets_ast =  mock::MockVVecGroupingNameVec();
+    auto grouping_sets_ast = mock::MockVVecGroupingNameVec();
     auto grouping_col_collection = std::set<String>();
-    for (const auto & grouping_set : grouping_set_columns) {
+    for (const auto & grouping_set : grouping_set_columns)
+    {
         auto grouping_set_ast = mock::MockVecGroupingNameVec();
-        for (const auto &grouping_exprs : grouping_set) {
+        for (const auto & grouping_exprs : grouping_set)
+        {
             auto grouping_exprs_ast = mock::MockGroupingNameVec();
-            for (const auto &grouping_col : grouping_exprs)
+            for (const auto & grouping_col : grouping_exprs)
             {
-                auto ast_col_ptr = buildColumn(grouping_col);             // string identifier change to ast column ref
+                auto ast_col_ptr = buildColumn(grouping_col); // string identifier change to ast column ref
                 grouping_exprs_ast.emplace_back(std::move(ast_col_ptr));
                 grouping_col_collection.insert(grouping_col);
             }
diff --git a/dbms/src/TestUtils/mockExecutor.h b/dbms/src/TestUtils/mockExecutor.h
index da369c69ff7..248d303abb2 100644
--- a/dbms/src/TestUtils/mockExecutor.h
+++ b/dbms/src/TestUtils/mockExecutor.h
@@ -38,7 +38,7 @@ using MockOrderByItemVec = std::vector<MockOrderByItem>;
 using MockPartitionByItem = std::pair<String, bool>;
 using MockPartitionByItemVec = std::vector<MockPartitionByItem>;
 using MockColumnNameVec = std::vector<String>;
-using MockVecColumnNameVec = std::vector<MockColumnNameVec>;     // for grouping set (every groupingExpr element inside is slice of column)
+using MockVecColumnNameVec = std::vector<MockColumnNameVec>; // for grouping set (every groupingExpr element inside is slice of column)
 using MockVVecColumnNameVec = std::vector<MockVecColumnNameVec>; // for grouping sets
 using MockAstVec = std::vector<ASTPtr>;
 using MockWindowFrame = mock::MockWindowFrame;

From 445b3c4590423257872aa9959c704ba9474b6483 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Sat, 11 Feb 2023 19:08:25 +0800
Subject: [PATCH 16/31] fix rebase error

Signed-off-by: AilinKid <3148019@qq.com>
---
 dbms/src/Common/TiFlashMetrics.h              |   2 +-
 dbms/src/Flash/Planner/PhysicalPlan.cpp       |   3 +-
 .../Flash/Planner/Plans/PhysicalExpand.cpp    |  12 +-
 dbms/src/Flash/Planner/Plans/PhysicalExpand.h |   6 +-
 .../Flash/Planner/Plans/PhysicalRepeat.cpp    | 116 ------------------
 dbms/src/Flash/Planner/Plans/PhysicalRepeat.h |  57 ---------
 .../src/Flash/tests/gtest_expand_executor.cpp |   1 +
 7 files changed, 13 insertions(+), 184 deletions(-)
 delete mode 100644 dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp
 delete mode 100644 dbms/src/Flash/Planner/Plans/PhysicalRepeat.h

diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h
index 9f3d5f064c8..4b8778213fd 100644
--- a/dbms/src/Common/TiFlashMetrics.h
+++ b/dbms/src/Common/TiFlashMetrics.h
@@ -62,7 +62,7 @@ namespace DB
         F(type_exchange_receiver, {"type", "exchange_receiver"}), F(type_projection, {"type", "projection"}),                                       \
         F(type_partition_ts, {"type", "partition_table_scan"}),                                                                                     \
         F(type_window, {"type", "window"}), F(type_window_sort, {"type", "window_sort"}),                                                           \
-        F(type_repeat_source, {"type", "repeat_source"}))                                                                                           \
+        F(type_expand, {"type", "expand"}))                                                                                                         \
     M(tiflash_coprocessor_request_duration_seconds, "Bucketed histogram of request duration", Histogram,                                            \
         F(type_cop, {{"type", "cop"}}, ExpBuckets{0.001, 2, 20}),                                                                                   \
         F(type_batch, {{"type", "batch"}}, ExpBuckets{0.001, 2, 20}),                                                                               \
diff --git a/dbms/src/Flash/Planner/PhysicalPlan.cpp b/dbms/src/Flash/Planner/PhysicalPlan.cpp
index 12badaae469..059a681b867 100644
--- a/dbms/src/Flash/Planner/PhysicalPlan.cpp
+++ b/dbms/src/Flash/Planner/PhysicalPlan.cpp
@@ -24,7 +24,7 @@
 #include <Flash/Planner/Plans/PhysicalAggregation.h>
 #include <Flash/Planner/Plans/PhysicalExchangeReceiver.h>
 #include <Flash/Planner/Plans/PhysicalExchangeSender.h>
-#include <Flash/Planner/plans/PhysicalExpand.h>
+#include <Flash/Planner/Plans/PhysicalExpand.h>
 #include <Flash/Planner/Plans/PhysicalFilter.h>
 #include <Flash/Planner/Plans/PhysicalJoin.h>
 #include <Flash/Planner/Plans/PhysicalLimit.h>
@@ -37,6 +37,7 @@
 #include <Flash/Planner/Plans/PhysicalWindow.h>
 #include <Flash/Planner/Plans/PhysicalWindowSort.h>
 #include <Flash/Planner/optimize.h>
+#include <Flash/Statistics/traverseExecutors.h>
 #include <Interpreters/Context.h>
 
 namespace DB
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
index b43d676b23f..00c9ada765a 100644
--- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
+++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
@@ -21,7 +21,7 @@
 #include <Flash/Coprocessor/DAGPipeline.h>
 #include <Flash/Planner/FinalizeHelper.h>
 #include <Flash/Planner/PhysicalPlanHelper.h>
-#include <Flash/Planner/plans/PhysicalExpand.h>
+#include <Flash/Planner/Plans/PhysicalExpand.h>
 #include <Interpreters/Context.h>
 #include <fmt/format.h>
 
@@ -45,7 +45,7 @@ PhysicalPlanNodePtr PhysicalExpand::build(
     }
 
     DAGExpressionAnalyzer analyzer{child->getSchema(), context};
-    ExpressionActionsPtr before_expand_actions = PhysicalPlanHelper::newActions(child->getSampleBlock(), context);
+    ExpressionActionsPtr before_expand_actions = PhysicalPlanHelper::newActions(child->getSampleBlock());
 
 
     auto shared_expand = analyzer.buildExpandGroupingColumns(expand, before_expand_actions);
@@ -71,9 +71,9 @@ PhysicalPlanNodePtr PhysicalExpand::build(
 }
 
 
-void PhysicalExpand::expandTransform(DAGPipeline & child_pipeline, Context & context)
+void PhysicalExpand::expandTransform(DAGPipeline & child_pipeline)
 {
-    auto expand_actions = PhysicalPlanHelper::newActions(child_pipeline.firstStream()->getHeader(), context);
+    auto expand_actions = PhysicalPlanHelper::newActions(child_pipeline.firstStream()->getHeader());
     expand_actions->add(ExpressionAction::expandSource(shared_expand));
     String expand_extra_info = fmt::format("expand, expand_executor_id = {}", execId());
     child_pipeline.transform([&](auto & stream) {
@@ -84,8 +84,8 @@ void PhysicalExpand::expandTransform(DAGPipeline & child_pipeline, Context & con
 
 void PhysicalExpand::transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams)
 {
-    child->transform(pipeline, context, max_streams);
-    expandTransform(pipeline, context);
+    child->buildBlockInputStream(pipeline, context, max_streams);
+    expandTransform(pipeline);
 }
 
 void PhysicalExpand::finalize(const Names & parent_require)
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h
index 14b910d8a75..7ff84324319 100644
--- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h
+++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h
@@ -14,7 +14,7 @@
 
 #pragma once
 
-#include <Flash/Planner/plans/PhysicalUnary.h>
+#include <Flash/Planner/Plans/PhysicalUnary.h>
 #include <Interpreters/ExpressionActions.h>
 #include <Interpreters/Join.h>
 #include <tipb/executor.pb.h>
@@ -45,12 +45,12 @@ class PhysicalExpand : public PhysicalUnary
 
     void finalize(const Names & parent_require) override;
 
-    void expandTransform(DAGPipeline & child_pipeline, Context & context);
+    void expandTransform(DAGPipeline & child_pipeline);
 
     const Block & getSampleBlock() const override;
 
 private:
-    void transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) override;
+    void transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams);
     std::shared_ptr<Expand> shared_expand;
     Block sample_block;
 };
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp b/dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp
deleted file mode 100644
index 51eaaeaa4c3..00000000000
--- a/dbms/src/Flash/Planner/Plans/PhysicalRepeat.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-// Copyright 2022 PingCAP, Ltd.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <Common/FailPoint.h>
-#include <Common/Logger.h>
-#include <Common/TiFlashException.h>
-#include <DataStreams/ExpandBlockInputStream.h>
-#include <DataTypes/DataTypeNullable.h>
-#include <Flash/Coprocessor/DAGExpressionAnalyzer.h>
-#include <Flash/Coprocessor/DAGPipeline.h>
-#include <Flash/Planner/FinalizeHelper.h>
-#include <Flash/Planner/PhysicalPlanHelper.h>
-#include <Flash/Planner/plans/PhysicalExpand.h>
-#include <Interpreters/Context.h>
-#include <fmt/format.h>
-
-namespace DB
-{
-PhysicalPlanNodePtr PhysicalExpand::build(
-    const Context & context,
-    const String & executor_id,
-    const LoggerPtr & log,
-    const tipb::Expand & expand,
-    const PhysicalPlanNodePtr & child)
-{
-    assert(child);
-
-    child->finalize();
-
-    if (unlikely(expand.grouping_sets().empty()))
-    {
-        //should not reach here
-        throw TiFlashException("Repeat executor without grouping sets", Errors::Planner::BadRequest);
-    }
-
-    DAGExpressionAnalyzer analyzer{child->getSchema(), context};
-    ExpressionActionsPtr before_repeat_actions = PhysicalPlanHelper::newActions(child->getSampleBlock(), context);
-
-
-    auto shared_repeat = analyzer.buildExpandGroupingColumns(expand, before_repeat_actions);
-
-    // construct sample block.
-    NamesAndTypes repeat_output_columns;
-    auto child_header = child->getSchema();
-    for (const auto & one : child_header)
-    {
-        repeat_output_columns.emplace_back(one.name, shared_repeat->isInGroupSetColumn(one.name)? makeNullable(one.type): one.type);
-    }
-    repeat_output_columns.emplace_back(shared_repeat->grouping_identifier_column_name, shared_repeat->grouping_identifier_column_type);
-
-    auto physical_repeat = std::make_shared<PhysicalExpand>(
-        executor_id,
-        repeat_output_columns,
-        log->identifier(),
-        child,
-        shared_repeat,
-        Block(repeat_output_columns));
-
-    return physical_repeat;
-}
-
-
-void PhysicalExpand::repeatTransform(DAGPipeline & child_pipeline, Context & context)
-{
-    auto repeat_actions = PhysicalPlanHelper::newActions(child_pipeline.firstStream()->getHeader(), context);
-    repeat_actions->add(ExpressionAction::expandSource(shared_expand));
-    String repeat_extra_info = fmt::format("repeat source, repeat_executor_id = {}", execId());
-    child_pipeline.transform([&](auto &stream) {
-        stream = std::make_shared<ExpandBlockInputStream>(stream, repeat_actions);
-        stream->setExtraInfo(repeat_extra_info);
-    });
-}
-
-void PhysicalExpand::transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams)
-{
-    child->transform(pipeline, context, max_streams);
-    repeatTransform(pipeline, context);
-}
-
-void PhysicalExpand::finalize(const Names & parent_require)
-{
-    FinalizeHelper::checkSchemaContainsParentRequire(schema, parent_require);
-    Names required_output;
-    required_output.reserve( shared_expand->getGroupSetNum());    // grouping set column should be existed in the child output schema.
-    auto name_set = std::set<String>();
-    shared_expand->getAllGroupSetColumnNames(name_set);
-    // append parent_require column it may expect self-filled groupingID.
-    for (const auto & one : parent_require)
-    {
-        if (one != Expand::grouping_identifier_column_name)
-        {
-            name_set.insert(one);
-        }
-    }
-    for (const auto & grouping_name: name_set) {
-        required_output.emplace_back(grouping_name);
-    }
-    child->finalize(required_output);
-}
-
-const Block & PhysicalExpand::getSampleBlock() const
-{
-    return sample_block;
-}
-} // namespace DB
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalRepeat.h b/dbms/src/Flash/Planner/Plans/PhysicalRepeat.h
deleted file mode 100644
index a2696affb5b..00000000000
--- a/dbms/src/Flash/Planner/Plans/PhysicalRepeat.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2022 PingCAP, Ltd.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <Flash/Planner/plans/PhysicalUnary.h>
-#include <Interpreters/ExpressionActions.h>
-#include <Interpreters/Join.h>
-#include <tipb/executor.pb.h>
-
-namespace DB
-{
-class PhysicalExpand : public PhysicalUnary
-{
-public:
-    static PhysicalPlanNodePtr build(
-        const Context & context,
-        const String & executor_id,
-        const LoggerPtr & log,
-        const tipb::Expand & expand,
-        const PhysicalPlanNodePtr & child);
-
-    PhysicalExpand(
-        const String & executor_id_,
-        const NamesAndTypes & schema_,
-        const String & req_id,
-        const PhysicalPlanNodePtr & child_,
-        const std::shared_ptr<Expand> & shared_expand,
-        const Block & sample_block_)
-        : PhysicalUnary(executor_id_, PlanType::Repeat, schema_, req_id, child_)
-        , shared_expand(shared_expand), sample_block(sample_block_){}
-
-    void finalize(const Names & parent_require) override;
-
-    void repeatTransform(DAGPipeline & child_pipeline, Context & context);
-
-    const Block & getSampleBlock() const override;
-
-private:
-    void transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) override;
-    std::shared_ptr<Expand> shared_expand;
-    Block sample_block;
-};
-}  // namespace DB
-
-
diff --git a/dbms/src/Flash/tests/gtest_expand_executor.cpp b/dbms/src/Flash/tests/gtest_expand_executor.cpp
index e27a6fe0b9f..3766c6066cc 100644
--- a/dbms/src/Flash/tests/gtest_expand_executor.cpp
+++ b/dbms/src/Flash/tests/gtest_expand_executor.cpp
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <TestUtils/ExecutorTestUtils.h>
 #include <TestUtils/ExecutorTestUtils.h>
 #include <TestUtils/mockExecutor.h>
 

From 5bba07f44d6777ad7be92c24fb38ce84197e2ef2 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Mon, 13 Feb 2023 18:46:16 +0800
Subject: [PATCH 17/31] address partial of haisheng's comment

Signed-off-by: AilinKid <3148019@qq.com>
---
 .../ExpressionBlockInputStream.cpp            | 10 --------
 .../DataStreams/ExpressionBlockInputStream.h  |  1 -
 .../Flash/Planner/Plans/PhysicalExpand.cpp    | 23 +++++++++++++++----
 dbms/src/Flash/Planner/Plans/PhysicalExpand.h |  7 +++++-
 .../src/Flash/tests/gtest_expand_executor.cpp |  2 +-
 dbms/src/Interpreters/Expand.cpp              | 16 ++++++-------
 dbms/src/Interpreters/ExpressionActions.cpp   |  2 +-
 dbms/src/TestUtils/ExecutorTestUtils.h        |  4 ++--
 dbms/src/TestUtils/executorSerializer.cpp     | 12 +++++-----
 .../TestUtils/tests/gtest_mock_executors.cpp  |  1 +
 10 files changed, 44 insertions(+), 34 deletions(-)

diff --git a/dbms/src/DataStreams/ExpressionBlockInputStream.cpp b/dbms/src/DataStreams/ExpressionBlockInputStream.cpp
index 27daa61152a..5bc7a4685e4 100644
--- a/dbms/src/DataStreams/ExpressionBlockInputStream.cpp
+++ b/dbms/src/DataStreams/ExpressionBlockInputStream.cpp
@@ -53,14 +53,4 @@ Block ExpressionBlockInputStream::readImpl()
     expression->execute(res);
     return res;
 }
-
-void ExpressionBlockInputStream::appendInfo(FmtBuffer & buffer) const
-{
-    if (!expression.get()->getActions().empty() && expression.get()->getActions()[0].expand != nullptr)
-    {
-        buffer.fmtAppend(": grouping set ");
-        expression.get()->getActions()[0].expand->getGroupingSetsDes(buffer);
-    }
-}
-
 } // namespace DB
diff --git a/dbms/src/DataStreams/ExpressionBlockInputStream.h b/dbms/src/DataStreams/ExpressionBlockInputStream.h
index 73141446414..1d1e059d51d 100644
--- a/dbms/src/DataStreams/ExpressionBlockInputStream.h
+++ b/dbms/src/DataStreams/ExpressionBlockInputStream.h
@@ -41,7 +41,6 @@ class ExpressionBlockInputStream : public IProfilingBlockInputStream
     String getName() const override { return NAME; }
     Block getTotals() override;
     Block getHeader() const override;
-    void appendInfo(FmtBuffer & buffer) const override;
 
 protected:
     Block readImpl() override;
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
index 00c9ada765a..8ec2083179e 100644
--- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
+++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
@@ -19,10 +19,13 @@
 #include <DataTypes/DataTypeNullable.h>
 #include <Flash/Coprocessor/DAGExpressionAnalyzer.h>
 #include <Flash/Coprocessor/DAGPipeline.h>
+#include <Flash/Pipeline/Exec/PipelineExecBuilder.h>
 #include <Flash/Planner/FinalizeHelper.h>
 #include <Flash/Planner/PhysicalPlanHelper.h>
 #include <Flash/Planner/Plans/PhysicalExpand.h>
 #include <Interpreters/Context.h>
+#include <Operators/ExpressionTransformOp.h>
+#include <Operators/FilterTransformOp.h>
 #include <fmt/format.h>
 
 namespace DB
@@ -46,9 +49,10 @@ PhysicalPlanNodePtr PhysicalExpand::build(
 
     DAGExpressionAnalyzer analyzer{child->getSchema(), context};
     ExpressionActionsPtr before_expand_actions = PhysicalPlanHelper::newActions(child->getSampleBlock());
-
+    ExpressionActionsPtr expand_actions_itself = PhysicalPlanHelper::newActions(child->getSampleBlock());
 
     auto shared_expand = analyzer.buildExpandGroupingColumns(expand, before_expand_actions);
+    expand_actions_itself->add(ExpressionAction::expandSource(shared_expand));
 
     // construct sample block.
     NamesAndTypes expand_output_columns;
@@ -65,6 +69,7 @@ PhysicalPlanNodePtr PhysicalExpand::build(
         log->identifier(),
         child,
         shared_expand,
+        expand_actions_itself,
         Block(expand_output_columns));
 
     return physical_expand;
@@ -73,16 +78,26 @@ PhysicalPlanNodePtr PhysicalExpand::build(
 
 void PhysicalExpand::expandTransform(DAGPipeline & child_pipeline)
 {
-    auto expand_actions = PhysicalPlanHelper::newActions(child_pipeline.firstStream()->getHeader());
-    expand_actions->add(ExpressionAction::expandSource(shared_expand));
     String expand_extra_info = fmt::format("expand, expand_executor_id = {}", execId());
+    FmtBuffer fb;
+    fb.append(": grouping set ");
+    shared_expand->getGroupingSetsDes(fb);
+    expand_extra_info.append(fb.toString());
     child_pipeline.transform([&](auto & stream) {
         stream = std::make_shared<ExpressionBlockInputStream>(stream, expand_actions, log->identifier());
         stream->setExtraInfo(expand_extra_info);
     });
 }
 
-void PhysicalExpand::transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams)
+void PhysicalExpand::buildPipelineExec(PipelineExecGroupBuilder & group_builder, Context &, size_t)
+{
+    auto input_header = group_builder.getCurrentHeader();
+    group_builder.transform([&](auto &builder) {
+        builder.appendTransformOp(std::make_unique<ExpressionTransformOp>(group_builder.exec_status, expand_actions, log->identifier()));
+    });
+}
+
+void PhysicalExpand::buildBlockInputStreamImpl(DAGPipeline & pipeline, Context & context, size_t max_streams)
 {
     child->buildBlockInputStream(pipeline, context, max_streams);
     expandTransform(pipeline);
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h
index 7ff84324319..77d5abdcd87 100644
--- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h
+++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h
@@ -37,9 +37,11 @@ class PhysicalExpand : public PhysicalUnary
         const String & req_id,
         const PhysicalPlanNodePtr & child_,
         const std::shared_ptr<Expand> & shared_expand,
+        const ExpressionActionsPtr & expand_actions,
         const Block & sample_block_)
         : PhysicalUnary(executor_id_, PlanType::Expand, schema_, req_id, child_)
         , shared_expand(shared_expand)
+        , expand_actions(expand_actions)
         , sample_block(sample_block_)
     {}
 
@@ -49,9 +51,12 @@ class PhysicalExpand : public PhysicalUnary
 
     const Block & getSampleBlock() const override;
 
+    void buildPipelineExec(PipelineExecGroupBuilder & group_builder, Context & /*context*/, size_t /*concurrency*/) override;
+
 private:
-    void transformImpl(DAGPipeline & pipeline, Context & context, size_t max_streams);
+    void buildBlockInputStreamImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) override;
     std::shared_ptr<Expand> shared_expand;
+    ExpressionActionsPtr expand_actions;
     Block sample_block;
 };
 } // namespace DB
diff --git a/dbms/src/Flash/tests/gtest_expand_executor.cpp b/dbms/src/Flash/tests/gtest_expand_executor.cpp
index 3766c6066cc..bf7a1c9a8b7 100644
--- a/dbms/src/Flash/tests/gtest_expand_executor.cpp
+++ b/dbms/src/Flash/tests/gtest_expand_executor.cpp
@@ -426,7 +426,7 @@ CreatingSets
         Expression: <remove useless column after join>
          HashJoinProbe: <join probe, join_executor_id = Join_5, has_non_joined_data = false>
           Expression: <final projection>
-           Expression: <expand, expand_executor_id = expand_2>: grouping set [{<count(exchange_receiver_0)_collator_46 >}{<any(exchange_receiver_1)_collator_46 >}]
+           Expression: <expand, expand_executor_id = expand_2: grouping set [{<count(exchange_receiver_0)_collator_46 >}{<any(exchange_receiver_1)_collator_46 >}]>
             Expression: <expr after aggregation>
              SharedQuery: <restore concurrency>
               ParallelAggregating, max_threads: 10, final: true
diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp
index 6280a05763d..d7aa21e2457 100644
--- a/dbms/src/Interpreters/Expand.cpp
+++ b/dbms/src/Interpreters/Expand.cpp
@@ -39,26 +39,26 @@ Expand::Expand(const DB::GroupingSets & gss)
 
 void Expand::getGroupingSetsDes(FmtBuffer & buffer) const
 {
-    buffer.fmtAppend("[");
+    buffer.append("[");
     for (const auto & grouping_set : group_sets_names)
     {
-        buffer.fmtAppend("{{");
+        buffer.append("{");
         for (const auto & grouping_exprs : grouping_set)
         {
-            buffer.fmtAppend("<");
+            buffer.append("<");
             for (size_t i = 0; i < grouping_exprs.size(); i++)
             {
                 if (i != 0)
                 {
-                    buffer.fmtAppend(",");
+                    buffer.append(",");
                 }
-                buffer.fmtAppend(grouping_exprs.at(i));
+                buffer.append(grouping_exprs.at(i));
             }
-            buffer.fmtAppend(">");
+            buffer.append(">");
         }
-        buffer.fmtAppend("}}");
+        buffer.append("}");
     }
-    buffer.fmtAppend("]");
+    buffer.append("]");
 }
 
 /// for cases like: select count(distinct a), count(distinct b) from t;
diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp
index f5bc85d4903..aa5d856a966 100644
--- a/dbms/src/Interpreters/ExpressionActions.cpp
+++ b/dbms/src/Interpreters/ExpressionActions.cpp
@@ -258,7 +258,7 @@ void ExpressionAction::prepare(Block & sample_block)
     {
         Block new_block;
 
-        for (auto & projection : projections) // change alias
+        for (auto & projection : projections)
         {
             const std::string & name = projection.first;
             const std::string & alias = projection.second;
diff --git a/dbms/src/TestUtils/ExecutorTestUtils.h b/dbms/src/TestUtils/ExecutorTestUtils.h
index 13363768a3b..2bf97d81a17 100644
--- a/dbms/src/TestUtils/ExecutorTestUtils.h
+++ b/dbms/src/TestUtils/ExecutorTestUtils.h
@@ -32,11 +32,11 @@ ColumnsWithTypeAndName readBlock(BlockInputStreamPtr stream);
 ColumnsWithTypeAndName readBlocks(std::vector<BlockInputStreamPtr> streams);
 
 #define WRAP_FOR_TEST_BEGIN                         \
-    std::vector<bool> planner_bools{false, true};   \
+    std::vector<bool> planner_bools{true};          \
     for (auto enable_planner : planner_bools)       \
     {                                               \
         enablePlanner(enable_planner);              \
-        std::vector<bool> pipeline_bools{false};    \
+        std::vector<bool> pipeline_bools{};         \
         if (enable_planner)                         \
             pipeline_bools.push_back(true);         \
         for (auto enable_pipeline : pipeline_bools) \
diff --git a/dbms/src/TestUtils/executorSerializer.cpp b/dbms/src/TestUtils/executorSerializer.cpp
index 04b1df80abb..7c1cfa980b6 100644
--- a/dbms/src/TestUtils/executorSerializer.cpp
+++ b/dbms/src/TestUtils/executorSerializer.cpp
@@ -165,24 +165,24 @@ void serializeExpandSource(const String & executor_id, const tipb::Expand & expa
     buf.fmtAppend("{} | expanded_by: [", executor_id);
     for (const auto & grouping_set : expand.grouping_sets())
     {
-        buf.fmtAppend("<");
+        buf.append("<");
         for (const auto & grouping_exprs : grouping_set.grouping_exprs())
         {
-            buf.fmtAppend("{{");
+            buf.append("{");
             for (auto i = 0; i < grouping_exprs.grouping_expr().size(); i++)
             {
                 if (i != 0)
                 {
-                    buf.fmtAppend(",");
+                    buf.append(",");
                 }
                 auto expr = grouping_exprs.grouping_expr().Get(i);
                 serializeExpression(expr, buf);
             }
-            buf.fmtAppend("}}");
+            buf.append("}");
         }
-        buf.fmtAppend(">");
+        buf.append(">");
     }
-    buf.fmtAppend("]\n");
+    buf.append("]\n");
 }
 
 void serializeJoin(const String & executor_id, const tipb::Join & join, FmtBuffer & buf)
diff --git a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp
index f1826226aeb..c94f2509c84 100644
--- a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp
+++ b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp
@@ -14,6 +14,7 @@
 
 #include <TestUtils/ExecutorTestUtils.h>
 #include <TestUtils/mockExecutor.h>
+#include <gtest/gtest.h>
 
 namespace DB
 {

From 74ea6521eea09eaf90289c13c93fdc1b90ca3953 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Mon, 13 Feb 2023 19:25:39 +0800
Subject: [PATCH 18/31] make fmt

Signed-off-by: AilinKid <3148019@qq.com>
---
 dbms/src/Common/TiFlashMetrics.h              |  2 +-
 dbms/src/Flash/Planner/PlanType.h             |  1 +
 .../Flash/Planner/Plans/PhysicalExpand.cpp    |  2 +-
 .../src/Flash/tests/gtest_expand_executor.cpp |  1 -
 .../Interpreters/tests/gtest_block_expand.cpp | 24 +++++++++----------
 5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h
index 4b8778213fd..3afacefabe4 100644
--- a/dbms/src/Common/TiFlashMetrics.h
+++ b/dbms/src/Common/TiFlashMetrics.h
@@ -277,7 +277,7 @@ namespace DB
     M(tiflash_compute_request_unit, "Request Unit used by tiflash compute", Counter,                                                                \
         F(type_mpp, {{"type", "mpp"}, ComputeLabelHolder::instance().getClusterIdLabel(), ComputeLabelHolder::instance().getProcessIdLabel()}),     \
         F(type_cop, {{"type", "cop"}, ComputeLabelHolder::instance().getClusterIdLabel(), ComputeLabelHolder::instance().getProcessIdLabel()}),     \
-        F(type_batch, {{"type", "batch"}, ComputeLabelHolder::instance().getClusterIdLabel(), ComputeLabelHolder::instance().getProcessIdLabel()})) \
+        F(type_batch, {{"type", "batch"}, ComputeLabelHolder::instance().getClusterIdLabel(), ComputeLabelHolder::instance().getProcessIdLabel()}))
 
 // clang-format on
 
diff --git a/dbms/src/Flash/Planner/PlanType.h b/dbms/src/Flash/Planner/PlanType.h
index cfbdff03e77..6b8e540b6a9 100644
--- a/dbms/src/Flash/Planner/PlanType.h
+++ b/dbms/src/Flash/Planner/PlanType.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #include <common/types.h>
+
 #include "Common/Exception.h"
 
 namespace DB
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
index 8ec2083179e..0b69b747177 100644
--- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
+++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
@@ -92,7 +92,7 @@ void PhysicalExpand::expandTransform(DAGPipeline & child_pipeline)
 void PhysicalExpand::buildPipelineExec(PipelineExecGroupBuilder & group_builder, Context &, size_t)
 {
     auto input_header = group_builder.getCurrentHeader();
-    group_builder.transform([&](auto &builder) {
+    group_builder.transform([&](auto & builder) {
         builder.appendTransformOp(std::make_unique<ExpressionTransformOp>(group_builder.exec_status, expand_actions, log->identifier()));
     });
 }
diff --git a/dbms/src/Flash/tests/gtest_expand_executor.cpp b/dbms/src/Flash/tests/gtest_expand_executor.cpp
index bf7a1c9a8b7..6401edbc424 100644
--- a/dbms/src/Flash/tests/gtest_expand_executor.cpp
+++ b/dbms/src/Flash/tests/gtest_expand_executor.cpp
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <TestUtils/ExecutorTestUtils.h>
 #include <TestUtils/ExecutorTestUtils.h>
 #include <TestUtils/mockExecutor.h>
 
diff --git a/dbms/src/Interpreters/tests/gtest_block_expand.cpp b/dbms/src/Interpreters/tests/gtest_block_expand.cpp
index 97ef466c245..039a1545888 100644
--- a/dbms/src/Interpreters/tests/gtest_block_expand.cpp
+++ b/dbms/src/Interpreters/tests/gtest_block_expand.cpp
@@ -88,7 +88,7 @@ try
 
         const auto res0 = ColumnWithInt64{1, 1, 0, 0, -1, -1};
         const auto * col_0 = typeid_cast<const ColumnInt64 *>(block.getColumns()[0].get());
-        for (int i = 0; i < int(expand_rows); ++i)
+        for (int i = 0; i < static_cast<int>(expand_rows); ++i)
         {
             ASSERT_EQ(col_0->getElement(i), res0[i]);
         }
@@ -96,7 +96,7 @@ try
         const auto res1 = ColumnWithString{"1   ", "null", "1  ", "null", "1 ", "null"};
         const auto * col_1 = typeid_cast<const ColumnNullable *>(block.getColumns()[1].get());
         const auto * col_1_nest = &static_cast<const ColumnString &>(col_1->getNestedColumn());
-        for (int i = 0; i < int(expand_rows); ++i)
+        for (int i = 0; i < static_cast<int>(expand_rows); ++i)
         {
             if (res1[i] == "null")
             {
@@ -111,7 +111,7 @@ try
         const auto res2 = ColumnWithString{"null", "1", "null", "2", "null", "3"};
         const auto * col_2 = typeid_cast<const ColumnNullable *>(block.getColumns()[2].get());
         const auto * col_2_nest = &static_cast<const ColumnString &>(col_2->getNestedColumn());
-        for (int i = 0; i < int(expand_rows); ++i)
+        for (int i = 0; i < static_cast<int>(expand_rows); ++i)
         {
             if (res2[i] == "null")
             {
@@ -125,14 +125,14 @@ try
 
         const auto res3 = ColumnWithUInt64{1, 1, 1, 1, 0, 0};
         const auto * col_3 = typeid_cast<const ColumnUInt64 *>(block.getColumns()[3].get());
-        for (int i = 0; i < int(expand_rows); ++i)
+        for (int i = 0; i < static_cast<int>(expand_rows); ++i)
         {
             ASSERT_EQ(col_3->getElement(i), res3[i]);
         }
 
         const auto res4 = ColumnWithUInt64{1, 2, 1, 2, 1, 2};
         const auto * col_4 = typeid_cast<const ColumnUInt64 *>(block.getColumns()[4].get());
-        for (int i = 0; i < int(expand_rows); ++i)
+        for (int i = 0; i < static_cast<int>(expand_rows); ++i)
         {
             ASSERT_EQ(col_4->getElement(i), res4[i]);
         }
@@ -188,7 +188,7 @@ try
 
         const auto res0 = ColumnWithInt64{1, 1, 1, 0, 0, 0, -1, -1, -1};
         const auto * col_0 = typeid_cast<const ColumnInt64 *>(block.getColumns()[0].get());
-        for (int i = 0; i < int(expand_rows); ++i)
+        for (int i = 0; i < static_cast<int>(expand_rows); ++i)
         {
             ASSERT_EQ(col_0->getElement(i), res0[i]);
         }
@@ -196,7 +196,7 @@ try
         const auto res1 = ColumnWithString{"aaa", "null", "null", "bbb", "null", "null", "ccc", "null", "null"};
         const auto * col_1 = typeid_cast<const ColumnNullable *>(block.getColumns()[1].get());
         const auto * col_1_nest = &static_cast<const ColumnString &>(col_1->getNestedColumn());
-        for (int i = 0; i < int(expand_rows); ++i)
+        for (int i = 0; i < static_cast<int>(expand_rows); ++i)
         {
             if (res1[i] == "null")
             {
@@ -211,7 +211,7 @@ try
         const auto res2 = ColumnWithString{"null", "1", "null", "null", "2", "null", "null", "3", "null"};
         const auto * col_2 = typeid_cast<const ColumnNullable *>(block.getColumns()[2].get());
         const auto * col_2_nest = &static_cast<const ColumnString &>(col_2->getNestedColumn());
-        for (int i = 0; i < int(expand_rows); ++i)
+        for (int i = 0; i < static_cast<int>(expand_rows); ++i)
         {
             if (res2[i] == "null")
             {
@@ -224,12 +224,12 @@ try
         }
 
         // use UInt64(-1) to represent null.
-        const auto res3 = ColumnWithUInt64{UInt64(-1), UInt64(-1), 1, UInt64(-1), UInt64(-1), 1, UInt64(-1), UInt64(-1), 0};
+        const auto res3 = ColumnWithUInt64{static_cast<UInt64>(-1), static_cast<UInt64>(-1), 1, static_cast<UInt64>(-1), static_cast<UInt64>(-1), 1, static_cast<UInt64>(-1), static_cast<UInt64>(-1), 0};
         const auto * col_3 = typeid_cast<const ColumnNullable *>(block.getColumns()[3].get());
         const auto * col_3_nest = &typeid_cast<const ColumnUInt64 &>(col_3->getNestedColumn());
-        for (int i = 0; i < int(expand_rows); ++i)
+        for (int i = 0; i < static_cast<int>(expand_rows); ++i)
         {
-            if (res3[i] == UInt64(-1))
+            if (res3[i] == static_cast<UInt64>(-1))
             {
                 ASSERT_EQ(col_3->isNullAt(i), true);
             }
@@ -241,7 +241,7 @@ try
 
         const auto res4 = ColumnWithUInt64{1, 2, 3, 1, 2, 3, 1, 2, 3};
         const auto * col_4 = typeid_cast<const ColumnUInt64 *>(block.getColumns()[4].get());
-        for (int i = 0; i < int(expand_rows); ++i)
+        for (int i = 0; i < static_cast<int>(expand_rows); ++i)
         {
             ASSERT_EQ(col_4->getElement(i), res4[i]);
         }

From 11e8a46618dcdd42395bdf019e66a800d20d267b Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Fri, 17 Feb 2023 13:40:38 +0800
Subject: [PATCH 19/31] add

Signed-off-by: AilinKid <3148019@qq.com>
---
 .../Coprocessor/DAGExpressionAnalyzer.cpp     |  9 ++++----
 .../Flash/Coprocessor/DAGExpressionAnalyzer.h |  2 +-
 .../Coprocessor/collectOutputFieldTypes.cpp   |  5 +---
 dbms/src/Flash/Planner/PlanType.h             |  2 --
 .../Flash/Planner/Plans/PhysicalExpand.cpp    | 14 +++++------
 dbms/src/Flash/Planner/Plans/PhysicalExpand.h |  4 ++--
 dbms/src/Interpreters/Expand.cpp              | 23 ++++++++-----------
 dbms/src/Interpreters/Expand.h                |  2 +-
 dbms/src/Interpreters/ExpressionActions.cpp   |  8 +++----
 dbms/src/Interpreters/ExpressionActions.h     |  2 +-
 dbms/src/TestUtils/executorSerializer.cpp     |  2 +-
 11 files changed, 30 insertions(+), 43 deletions(-)

diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
index 816b0e324dd..370505c9b8c 100644
--- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
@@ -805,7 +805,7 @@ NamesAndTypes DAGExpressionAnalyzer::buildOrderColumns(
     return order_columns;
 }
 
-std::shared_ptr<Expand> DAGExpressionAnalyzer::buildExpandGroupingColumns(
+GroupingSets DAGExpressionAnalyzer::buildExpandGroupingColumns(
     const tipb::Expand & expand,
     const ExpressionActionsPtr & actions)
 {
@@ -843,8 +843,7 @@ std::shared_ptr<Expand> DAGExpressionAnalyzer::buildExpandGroupingColumns(
             mutable_one.type = makeNullable(mutable_one.type);
     }
     source_columns.emplace_back(Expand::grouping_identifier_column_name, Expand::grouping_identifier_column_type);
-    auto shared_expand = Expand::sharedExpand(group_sets_columns);
-    return shared_expand;
+    return group_sets_columns;
 }
 
 ExpressionActionsPtr DAGExpressionAnalyzer::appendExpand(
@@ -856,8 +855,8 @@ ExpressionActionsPtr DAGExpressionAnalyzer::appendExpand(
     {
         last_step.required_output.push_back(origin_col.name);
     }
-    auto shared_expand = buildExpandGroupingColumns(expand, last_step.actions);
-    last_step.actions->add(ExpressionAction::expandSource(shared_expand));
+    auto grouping_sets = buildExpandGroupingColumns(expand, last_step.actions);
+    last_step.actions->add(ExpressionAction::expandSource(grouping_sets));
 
     auto before_expand = chain.getLastActions();
     chain.finalize();
diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h
index 7436841034a..4cec8ec0358 100644
--- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h
+++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h
@@ -71,7 +71,7 @@ class DAGExpressionAnalyzer : private boost::noncopyable
         ExpressionActionsChain & chain,
         const std::vector<const tipb::Expr *> & conditions);
 
-    std::shared_ptr<Expand> buildExpandGroupingColumns(const tipb::Expand & expand, const ExpressionActionsPtr & actions);
+    GroupingSets buildExpandGroupingColumns(const tipb::Expand & expand, const ExpressionActionsPtr & actions);
 
     ExpressionActionsPtr appendExpand(const tipb::Expand & expand, ExpressionActionsChain & chain);
 
diff --git a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp
index 94c1b6c573a..8813c36f24e 100644
--- a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp
+++ b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp
@@ -117,10 +117,7 @@ bool collectForExpand(std::vector<tipb::FieldType> & out_field_types, const tipb
             {
                 // assert that: grouping_col must be the column ref guaranteed by tidb.
                 auto column_index = decodeDAGInt64(grouping_col.val());
-                if (column_index < 0 || column_index >= static_cast<Int64>(out_child_fields.size()))
-                {
-                    throw TiFlashException("Column index out of bound", Errors::Coprocessor::BadRequest);
-                }
+                RUNTIME_CHECK_MSG(column_index >= 0 || column_index < static_cast<Int64>(out_child_fields.size()), "Column index out of bound");
                 out_child_fields[column_index].set_flag(out_child_fields[column_index].flag() & (~TiDB::ColumnFlagNotNull));
             }
         }
diff --git a/dbms/src/Flash/Planner/PlanType.h b/dbms/src/Flash/Planner/PlanType.h
index 6b8e540b6a9..c3c31ce3c81 100644
--- a/dbms/src/Flash/Planner/PlanType.h
+++ b/dbms/src/Flash/Planner/PlanType.h
@@ -16,8 +16,6 @@
 
 #include <common/types.h>
 
-#include "Common/Exception.h"
-
 namespace DB
 {
 struct PlanType
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
index 0b69b747177..5e0294a5973 100644
--- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
+++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
@@ -51,24 +51,25 @@ PhysicalPlanNodePtr PhysicalExpand::build(
     ExpressionActionsPtr before_expand_actions = PhysicalPlanHelper::newActions(child->getSampleBlock());
     ExpressionActionsPtr expand_actions_itself = PhysicalPlanHelper::newActions(child->getSampleBlock());
 
-    auto shared_expand = analyzer.buildExpandGroupingColumns(expand, before_expand_actions);
-    expand_actions_itself->add(ExpressionAction::expandSource(shared_expand));
+    auto grouping_sets = analyzer.buildExpandGroupingColumns(expand, before_expand_actions);
+    auto expand_action = ExpressionAction::expandSource(grouping_sets);
+    expand_actions_itself->add(expand_action);
 
     // construct sample block.
     NamesAndTypes expand_output_columns;
     auto child_header = child->getSchema();
     for (const auto & one : child_header)
     {
-        expand_output_columns.emplace_back(one.name, shared_expand->isInGroupSetColumn(one.name) ? makeNullable(one.type) : one.type);
+        expand_output_columns.emplace_back(one.name, expand_action.expand->isInGroupSetColumn(one.name) ? makeNullable(one.type) : one.type);
     }
-    expand_output_columns.emplace_back(shared_expand->grouping_identifier_column_name, shared_expand->grouping_identifier_column_type);
+    expand_output_columns.emplace_back(expand_action.expand->grouping_identifier_column_name, expand_action.expand->grouping_identifier_column_type);
 
     auto physical_expand = std::make_shared<PhysicalExpand>(
         executor_id,
         expand_output_columns,
         log->identifier(),
         child,
-        shared_expand,
+        expand_action.expand,
         expand_actions_itself,
         Block(expand_output_columns));
 
@@ -108,8 +109,7 @@ void PhysicalExpand::finalize(const Names & parent_require)
     FinalizeHelper::checkSchemaContainsParentRequire(schema, parent_require);
     Names required_output;
     required_output.reserve(shared_expand->getGroupSetNum()); // grouping set column should be existed in the child output schema.
-    auto name_set = std::set<String>();
-    shared_expand->getAllGroupSetColumnNames(name_set);
+    auto name_set = shared_expand->getAllGroupSetColumnNames();
     // append parent_require column it may expect self-filled groupingID.
     for (const auto & one : parent_require)
     {
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h
index 77d5abdcd87..bca4f9beedd 100644
--- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h
+++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h
@@ -36,7 +36,7 @@ class PhysicalExpand : public PhysicalUnary
         const NamesAndTypes & schema_,
         const String & req_id,
         const PhysicalPlanNodePtr & child_,
-        const std::shared_ptr<Expand> & shared_expand,
+        const std::shared_ptr<const Expand> & shared_expand,
         const ExpressionActionsPtr & expand_actions,
         const Block & sample_block_)
         : PhysicalUnary(executor_id_, PlanType::Expand, schema_, req_id, child_)
@@ -55,7 +55,7 @@ class PhysicalExpand : public PhysicalUnary
 
 private:
     void buildBlockInputStreamImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) override;
-    std::shared_ptr<Expand> shared_expand;
+    std::shared_ptr<const Expand> shared_expand;
     ExpressionActionsPtr expand_actions;
     Block sample_block;
 };
diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp
index d7aa21e2457..702e351cdf3 100644
--- a/dbms/src/Interpreters/Expand.cpp
+++ b/dbms/src/Interpreters/Expand.cpp
@@ -46,14 +46,7 @@ void Expand::getGroupingSetsDes(FmtBuffer & buffer) const
         for (const auto & grouping_exprs : grouping_set)
         {
             buffer.append("<");
-            for (size_t i = 0; i < grouping_exprs.size(); i++)
-            {
-                if (i != 0)
-                {
-                    buffer.append(",");
-                }
-                buffer.append(grouping_exprs.at(i));
-            }
+            buffer.joinStr(grouping_exprs.begin(), grouping_exprs.end());
             buffer.append(">");
         }
         buffer.append("}");
@@ -103,13 +96,13 @@ void Expand::replicateAndFillNull(Block & block) const
     added_grouping_id_column.reserve(1);
     added_grouping_id_column.push_back(grouping_id_column->getPtr());
 
-    for (size_t i = 0; i < origin_rows; i++)
+    for (size_t i = 0; i < origin_rows; ++i)
     {
         current_offset += replicate_times_for_one_row;
         (*offsets_to_replicate)[i] = current_offset;
 
         // in the same loop, to fill the grouping id.
-        for (UInt64 j = 0; j < replicate_times_for_one_row; j++)
+        for (UInt64 j = 0; j < replicate_times_for_one_row; ++j)
         {
             // start from 1.
             Field grouping_id = j + 1;
@@ -143,7 +136,7 @@ void Expand::replicateAndFillNull(Block & block) const
 
 
     // after replication, it just copied the same row for N times, we still need to fill corresponding Field with null value.
-    for (size_t grouping_offset = 0; grouping_offset < replicate_times_for_one_row; grouping_offset++)
+    for (size_t grouping_offset = 0; grouping_offset < replicate_times_for_one_row; ++grouping_offset)
     {
         auto grouping_columns = getGroupSetColumnNamesByOffset(grouping_offset);
         // for every grouping col, get the mutated one of them.
@@ -180,14 +173,14 @@ void Expand::replicateAndFillNull(Block & block) const
             ///      2  2       1   +  replicate_group2        for b, it's 1, we should pick and set:
             ///      2  2       2   +                              replicate_group_rows[1].b = null
             ///    -----------------+
-            for (size_t i = 0; i < origin_rows; i++)
+            for (size_t i = 0; i < origin_rows; ++i)
             {
                 // for every original one row mapped N rows, fill the corresponding group set column as null value according to the offset.
                 // only when the offset in replicate_group equals to current group_offset, set the data to null.
                 // eg: for case above, for grouping_offset of <a> = 0, we only set the every offset = 0 in each
                 // small replicate_group_x to null.
                 //
-                for (UInt64 j = 0; j < replicate_times_for_one_row; j++)
+                for (UInt64 j = 0; j < replicate_times_for_one_row; ++j)
                 {
                     if (j == grouping_offset)
                     {
@@ -234,8 +227,9 @@ const GroupingColumnNames & Expand::getGroupSetColumnNamesByOffset(size_t offset
     return group_sets_names[offset][0];
 }
 
-void Expand::getAllGroupSetColumnNames(std::set<String> & name_set) const
+std::set<String> Expand::getAllGroupSetColumnNames() const
 {
+    std::set<String> name_set;
     for (const auto & it1 : group_sets_names)
     {
         // for every grouping set.
@@ -248,6 +242,7 @@ void Expand::getAllGroupSetColumnNames(std::set<String> & name_set) const
             }
         }
     }
+    return name_set;
 }
 
 std::shared_ptr<Expand> Expand::sharedExpand(const GroupingSets & groupingSets)
diff --git a/dbms/src/Interpreters/Expand.h b/dbms/src/Interpreters/Expand.h
index e2128f476fd..86229309317 100644
--- a/dbms/src/Interpreters/Expand.h
+++ b/dbms/src/Interpreters/Expand.h
@@ -124,7 +124,7 @@ class Expand
 
     const GroupingColumnNames & getGroupSetColumnNamesByOffset(size_t offset) const;
 
-    void getAllGroupSetColumnNames(std::set<String> & name_set) const;
+    std::set<String> getAllGroupSetColumnNames() const;
 
     static std::shared_ptr<Expand> sharedExpand(const GroupingSets & groupingSets);
 
diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp
index aa5d856a966..0c49880850b 100644
--- a/dbms/src/Interpreters/ExpressionActions.cpp
+++ b/dbms/src/Interpreters/ExpressionActions.cpp
@@ -135,15 +135,14 @@ ExpressionAction ExpressionAction::ordinaryJoin(std::shared_ptr<const Join> join
     return a;
 }
 
-ExpressionAction ExpressionAction::expandSource(std::shared_ptr<const Expand> expand_)
+ExpressionAction ExpressionAction::expandSource(GroupingSets grouping_sets_)
 {
     ExpressionAction a;
     a.type = EXPAND;
-    a.expand = expand_;
+    a.expand = std::make_shared<Expand>(grouping_sets_);
     return a;
 }
 
-
 void ExpressionAction::prepare(Block & sample_block)
 {
     /** Constant expressions should be evaluated, and put the result in sample_block.
@@ -239,8 +238,7 @@ void ExpressionAction::prepare(Block & sample_block)
     case EXPAND:
     {
         // sample_block is just for schema check followed by later block, modify it if your schema has changed during this action.
-        auto name_set = std::set<String>();
-        expand->getAllGroupSetColumnNames(name_set);
+        auto name_set = expand->getAllGroupSetColumnNames();
         // make grouping set column to be nullable.
         for (const auto & col_name : name_set)
         {
diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h
index e9d98de2106..f93266529f5 100644
--- a/dbms/src/Interpreters/ExpressionActions.h
+++ b/dbms/src/Interpreters/ExpressionActions.h
@@ -112,7 +112,7 @@ struct ExpressionAction
     static ExpressionAction project(const NamesWithAliases & projected_columns_);
     static ExpressionAction project(const Names & projected_columns_);
     static ExpressionAction ordinaryJoin(std::shared_ptr<const Join> join_, const NamesAndTypesList & columns_added_by_join_);
-    static ExpressionAction expandSource(std::shared_ptr<const Expand> expand_);
+    static ExpressionAction expandSource(GroupingSets grouping_sets);
 
     /// Which columns necessary to perform this action.
     Names getNeededColumns() const;
diff --git a/dbms/src/TestUtils/executorSerializer.cpp b/dbms/src/TestUtils/executorSerializer.cpp
index 7c1cfa980b6..607bd27c68f 100644
--- a/dbms/src/TestUtils/executorSerializer.cpp
+++ b/dbms/src/TestUtils/executorSerializer.cpp
@@ -169,7 +169,7 @@ void serializeExpandSource(const String & executor_id, const tipb::Expand & expa
         for (const auto & grouping_exprs : grouping_set.grouping_exprs())
         {
             buf.append("{");
-            for (auto i = 0; i < grouping_exprs.grouping_expr().size(); i++)
+            for (auto i = 0; i < grouping_exprs.grouping_expr().size(); ++i)
             {
                 if (i != 0)
                 {

From 5609f3b0742cb41191fe8d8941c73a08913944e8 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Fri, 17 Feb 2023 18:54:37 +0800
Subject: [PATCH 20/31] resolve header file recycle

Signed-off-by: AilinKid <3148019@qq.com>
---
 dbms/src/Interpreters/Expand.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dbms/src/Interpreters/Expand.h b/dbms/src/Interpreters/Expand.h
index 86229309317..09182be015d 100644
--- a/dbms/src/Interpreters/Expand.h
+++ b/dbms/src/Interpreters/Expand.h
@@ -23,7 +23,6 @@
 #include <DataStreams/IBlockInputStream.h>
 #include <DataStreams/SizeLimits.h>
 #include <Interpreters/AggregationCommon.h>
-#include <Interpreters/ExpressionActions.h>
 #include <Interpreters/SettingsCommon.h>
 #include <Parsers/ASTTablesInSelectQuery.h>
 #include <common/ThreadPool.h>

From 995d14efc33e35faed9ef505884d1d4087e1fc56 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Sat, 18 Feb 2023 16:15:22 +0800
Subject: [PATCH 21/31] address jiangtao's comment

Signed-off-by: AilinKid <3148019@qq.com>
---
 dbms/src/Interpreters/Expand.cpp               | 18 +++++-------------
 .../TestUtils/tests/gtest_mock_executors.cpp   |  1 -
 2 files changed, 5 insertions(+), 14 deletions(-)

diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp
index 702e351cdf3..00a1c0af0e7 100644
--- a/dbms/src/Interpreters/Expand.cpp
+++ b/dbms/src/Interpreters/Expand.cpp
@@ -64,7 +64,7 @@ void Expand::getGroupingSetsDes(FmtBuffer & buffer) const
 ///      1  1       target a -+----->     1  null  groupingID for a =1
 ///      2  2                 +----->     2  null  groupingID for b =2
 ///                 target b -+----->     null  1  groupingID for a =1
-///                           +----->     null  a  groupingID for b =2
+///                           +----->     null  2  groupingID for b =2
 ///
 /// when target a specified group set, other group set columns should be filled
 /// with null value to make group by(a,b) operator to meet the equivalence effect
@@ -145,15 +145,8 @@ void Expand::replicateAndFillNull(Block & block) const
             assert(block.getByName(grouping_col).column->isColumnNullable());
 
             const auto * nullable_column = typeid_cast<const ColumnNullable *>(block.getByName(grouping_col).column.get());
-            auto origin_size = nullable_column->size();
-            // clone the nested column.
-            MutableColumnPtr new_nested_col = nullable_column->getNestedColumn().cloneResized(origin_size);
-            // just get mutable new null map.
-            auto new_null_map = ColumnUInt8::create();
-            new_null_map->getData().resize(origin_size);
-            memcpy(new_null_map->getData().data(), nullable_column->getNullMapData().data(), origin_size * sizeof(nullable_column->getNullMapData()[0]));
-
-            auto cloned_one = ColumnNullable::create(std::move(new_nested_col), std::move(new_null_map));
+            auto cloned = ColumnNullable::create(nullable_column->getNestedColumnPtr(), nullable_column->getNullMapColumnPtr());
+            auto cloned_one = typeid_cast<ColumnNullable *>(cloned->assumeMutable().get());
 
             /// travel total rows, and set null values for current grouping set column.
             /// basically looks like:
@@ -162,7 +155,7 @@ void Expand::replicateAndFillNull(Block & block) const
             ///      1  1       target a -+----->     1  null  groupingID for a =1
             ///      2  2                 +----->     2  null  groupingID for b =2
             ///                 target b -+----->     null  1  groupingID for a =1
-            ///                           +----->     null  a  groupingID for b =2
+            ///                           +----->     null  2  groupingID for b =2
             ///
             /// after the replicate is now, the data form likes like below
             ///      <a, b, groupingID>              ==>       for one : in <a, b>
@@ -179,7 +172,6 @@ void Expand::replicateAndFillNull(Block & block) const
                 // only when the offset in replicate_group equals to current group_offset, set the data to null.
                 // eg: for case above, for grouping_offset of <a> = 0, we only set the every offset = 0 in each
                 // small replicate_group_x to null.
-                //
                 for (UInt64 j = 0; j < replicate_times_for_one_row; ++j)
                 {
                     if (j == grouping_offset)
@@ -193,7 +185,7 @@ void Expand::replicateAndFillNull(Block & block) const
                     cloned_one->getNullMapData().data()[computed_offset] = 1;
                 }
             }
-            block.getByName(grouping_col).column = std::move(cloned_one);
+            block.getByName(grouping_col).column = std::move(cloned);
         }
         // finish of adjustment for one grouping set columns. (by now one column for one grouping set).
     }
diff --git a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp
index c94f2509c84..f1826226aeb 100644
--- a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp
+++ b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp
@@ -14,7 +14,6 @@
 
 #include <TestUtils/ExecutorTestUtils.h>
 #include <TestUtils/mockExecutor.h>
-#include <gtest/gtest.h>
 
 namespace DB
 {

From 199961caacfb1d8c9907cc429ae7be1676e89425 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Sat, 18 Feb 2023 16:57:27 +0800
Subject: [PATCH 22/31] fmt

Signed-off-by: AilinKid <3148019@qq.com>
---
 dbms/src/Interpreters/Expand.cpp |  7 +------
 dbms/src/Interpreters/Expand.h   | 11 -----------
 2 files changed, 1 insertion(+), 17 deletions(-)

diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp
index 00a1c0af0e7..cfe9c7eb870 100644
--- a/dbms/src/Interpreters/Expand.cpp
+++ b/dbms/src/Interpreters/Expand.cpp
@@ -146,7 +146,7 @@ void Expand::replicateAndFillNull(Block & block) const
 
             const auto * nullable_column = typeid_cast<const ColumnNullable *>(block.getByName(grouping_col).column.get());
             auto cloned = ColumnNullable::create(nullable_column->getNestedColumnPtr(), nullable_column->getNullMapColumnPtr());
-            auto cloned_one = typeid_cast<ColumnNullable *>(cloned->assumeMutable().get());
+            auto *cloned_one = typeid_cast<ColumnNullable *>(cloned->assumeMutable().get());
 
             /// travel total rows, and set null values for current grouping set column.
             /// basically looks like:
@@ -237,11 +237,6 @@ std::set<String> Expand::getAllGroupSetColumnNames() const
     return name_set;
 }
 
-std::shared_ptr<Expand> Expand::sharedExpand(const GroupingSets & groupingSets)
-{
-    return std::make_shared<Expand>(groupingSets);
-}
-
 const std::string Expand::grouping_identifier_column_name = "groupingID";
 const DataTypePtr Expand::grouping_identifier_column_type = std::make_shared<DataTypeUInt64>();
 } // namespace DB
diff --git a/dbms/src/Interpreters/Expand.h b/dbms/src/Interpreters/Expand.h
index 09182be015d..d23af17cafa 100644
--- a/dbms/src/Interpreters/Expand.h
+++ b/dbms/src/Interpreters/Expand.h
@@ -18,16 +18,7 @@
 #include <Columns/ColumnFixedString.h>
 #include <Columns/ColumnNullable.h>
 #include <Columns/ColumnString.h>
-#include <Common/Arena.h>
-#include <Common/Logger.h>
 #include <DataStreams/IBlockInputStream.h>
-#include <DataStreams/SizeLimits.h>
-#include <Interpreters/AggregationCommon.h>
-#include <Interpreters/SettingsCommon.h>
-#include <Parsers/ASTTablesInSelectQuery.h>
-#include <common/ThreadPool.h>
-
-#include <shared_mutex>
 
 namespace DB
 {
@@ -125,8 +116,6 @@ class Expand
 
     std::set<String> getAllGroupSetColumnNames() const;
 
-    static std::shared_ptr<Expand> sharedExpand(const GroupingSets & groupingSets);
-
     void getGroupingSetsDes(FmtBuffer & buffer) const;
 
     static const String grouping_identifier_column_name;

From faf8cf37e727f23a4311a3c9cb395c25557dbd76 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Sat, 18 Feb 2023 18:43:50 +0800
Subject: [PATCH 23/31] add test for overlap grouping set

Signed-off-by: AilinKid <3148019@qq.com>
---
 dbms/src/Interpreters/Expand.cpp              |   4 +-
 .../Interpreters/tests/gtest_block_expand.cpp | 120 ++++++++++++++++++
 2 files changed, 121 insertions(+), 3 deletions(-)

diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp
index cfe9c7eb870..f5c7e916f9f 100644
--- a/dbms/src/Interpreters/Expand.cpp
+++ b/dbms/src/Interpreters/Expand.cpp
@@ -109,8 +109,6 @@ void Expand::replicateAndFillNull(Block & block) const
             added_grouping_id_column[0]->insert(grouping_id);
         }
     }
-    // todo: for some column overlapping in different grouping set, we should copy the overlapped column as a new column
-    //  and the upper layer OP's computation should be shifted and based on the new one's id. Need a plan side control.
 
     // replicate the original block rows.
     size_t existing_columns = block.columns();
@@ -146,7 +144,7 @@ void Expand::replicateAndFillNull(Block & block) const
 
             const auto * nullable_column = typeid_cast<const ColumnNullable *>(block.getByName(grouping_col).column.get());
             auto cloned = ColumnNullable::create(nullable_column->getNestedColumnPtr(), nullable_column->getNullMapColumnPtr());
-            auto *cloned_one = typeid_cast<ColumnNullable *>(cloned->assumeMutable().get());
+            auto * cloned_one = typeid_cast<ColumnNullable *>(cloned->assumeMutable().get());
 
             /// travel total rows, and set null values for current grouping set column.
             /// basically looks like:
diff --git a/dbms/src/Interpreters/tests/gtest_block_expand.cpp b/dbms/src/Interpreters/tests/gtest_block_expand.cpp
index 039a1545888..e85cb0d7185 100644
--- a/dbms/src/Interpreters/tests/gtest_block_expand.cpp
+++ b/dbms/src/Interpreters/tests/gtest_block_expand.cpp
@@ -38,6 +38,126 @@ class BlockExpand : public ::testing::Test
     const std::vector<String> col_name{"age", "gender", "country", "region", "zip"};
 };
 
+/// todo: for some column overlapping in different grouping set, we should copy the overlapped column as a new column
+///  and the upper layer OP's computation should be shifted and based on the new one's id. Need a plan side control,
+///  tiflash side is ready to go.
+///
+///  just a overlapped case for grouping set 1: <{a}>, grouping set 2: <{a,b}>
+///  count(distinct a) and count(distinct a, b) planner will clone a new column from a for either one of them
+///  count(distinct a') and count(distinct a, b) = need a more column a' here (maybe from lower projection or something)
+///  then, according a' and a,b 's index offset in PB, to describe the grouping set definition.
+///  when targeting a' 's replicate group, fill a and b as null in the group.
+///  when targeting a and b 's replicate group, fill a' as null in the group.
+TEST_F(BlockExpand, ExpandLogic4Overlap)
+try
+{
+    {
+        // test basic block expand operation. (two grouping set: {<age>}, {<gender, country>})
+        const ColumnsWithTypeAndName
+            ori_col
+            = {
+                toVec<Int64>(col_name[0], ColumnWithInt64{1, 0, -1}),
+                toVec<String>(col_name[1], ColumnWithString{"1   ", "1  ", "1 "}),
+                toVec<String>(col_name[2], ColumnWithString{"1", "2", "3"}),
+                toVec<UInt64>(col_name[3], ColumnWithUInt64{1, 1, 0}),
+            };
+        // group set<gender>, group set<country>
+        GroupingSet g_age = GroupingSet{GroupingColumnNames{col_name[0]}};
+        GroupingSet g_gender_country = GroupingSet{GroupingColumnNames{col_name[1], col_name[2]}};
+        GroupingSets group_sets = GroupingSets{g_age, g_gender_country};
+        Expand expand = Expand(group_sets);
+        Block block(ori_col);
+        auto origin_rows = block.rows();
+
+        expand.replicateAndFillNull(block);
+        // assert the col size is added with 1.
+        ASSERT_EQ(block.getColumns().size(), size_t(5));
+        // assert the new col groupingID is appended.
+        ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID");
+        // assert the block size is equal to origin rows * grouping set num.
+        auto expand_rows = block.rows();
+        auto grouping_set_num = expand.getGroupSetNum();
+        ASSERT_EQ(origin_rows * grouping_set_num, expand_rows); // 6
+        // assert grouping set column are nullable.
+        ASSERT_EQ(block.getColumns()[0].get()->isColumnNullable(), true);
+        ASSERT_EQ(block.getColumns()[1].get()->isColumnNullable(), true);
+        ASSERT_EQ(block.getColumns()[2].get()->isColumnNullable(), true);
+        ASSERT_EQ(block.getColumns()[3].get()->isColumnNullable(), false);
+        ASSERT_EQ(block.getColumns()[4].get()->isColumnNullable(), false);
+
+        // assert the rows layout
+        //          "age", "gender", "country", "region", "groupingID"
+        //  ori_col   1      null       null       1         1
+        //  rpt_col  null    "1   "     "1"        1         2
+        //
+        //  ori_col   0      null       null       1         1
+        //  rpt_col  null    "1  "      "2"        1         2
+        //
+        //  ori_col  -1      null       null       0         1
+        //  rpt_col  null    "1 "       "3"        0         2
+        const auto num4_null = 100;
+        const auto res0 = ColumnWithInt64{1, num4_null, 0, num4_null, -1, num4_null};
+        const auto * col_0 = typeid_cast<const ColumnNullable *>(block.getColumns()[0].get());
+        const auto * col_0_nest = &static_cast<const ColumnInt64 &>(col_0->getNestedColumn());
+        for (int i = 0; i < static_cast<int>(expand_rows); ++i)
+        {
+            if (res0[i] == num4_null)
+            {
+                ASSERT_EQ(col_0->isNullAt(i), true);
+            }
+            else
+            {
+                ASSERT_EQ(col_0_nest->getElement(i), res0[i]);
+            }
+        }
+
+        const auto res1 = ColumnWithString{"null", "1   ", "null", "1  ", "null", "1 "};
+        const auto * col_1 = typeid_cast<const ColumnNullable *>(block.getColumns()[1].get());
+        const auto * col_1_nest = &static_cast<const ColumnString &>(col_1->getNestedColumn());
+        for (int i = 0; i < static_cast<int>(expand_rows); ++i)
+        {
+            if (res1[i] == "null")
+            {
+                ASSERT_EQ(col_1->isNullAt(i), true);
+            }
+            else
+            {
+                ASSERT_EQ(col_1_nest->getDataAt(i), res1[i]);
+            }
+        }
+
+        const auto res2 = ColumnWithString{"null", "1", "null", "2", "null", "3"};
+        const auto * col_2 = typeid_cast<const ColumnNullable *>(block.getColumns()[2].get());
+        const auto * col_2_nest = &static_cast<const ColumnString &>(col_2->getNestedColumn());
+        for (int i = 0; i < static_cast<int>(expand_rows); ++i)
+        {
+            if (res2[i] == "null")
+            {
+                ASSERT_EQ(col_2->isNullAt(i), true);
+            }
+            else
+            {
+                ASSERT_EQ(col_2_nest->getDataAt(i), res2[i]);
+            }
+        }
+
+        const auto res3 = ColumnWithUInt64{1, 1, 1, 1, 0, 0};
+        const auto * col_3 = typeid_cast<const ColumnUInt64 *>(block.getColumns()[3].get());
+        for (int i = 0; i < static_cast<int>(expand_rows); ++i)
+        {
+            ASSERT_EQ(col_3->getElement(i), res3[i]);
+        }
+
+        const auto res4 = ColumnWithUInt64{1, 2, 1, 2, 1, 2};
+        const auto * col_4 = typeid_cast<const ColumnUInt64 *>(block.getColumns()[4].get());
+        for (int i = 0; i < static_cast<int>(expand_rows); ++i)
+        {
+            ASSERT_EQ(col_4->getElement(i), res4[i]);
+        }
+    }
+}
+CATCH
+
 TEST_F(BlockExpand, ExpandLogic)
 try
 {

From 95c3ea1d83764bcd78efcbbfbfaf504e3060660c Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Mon, 20 Feb 2023 11:32:25 +0800
Subject: [PATCH 24/31] remove debug info

Signed-off-by: AilinKid <3148019@qq.com>
---
 dbms/src/TestUtils/ExecutorTestUtils.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbms/src/TestUtils/ExecutorTestUtils.h b/dbms/src/TestUtils/ExecutorTestUtils.h
index 2bf97d81a17..13363768a3b 100644
--- a/dbms/src/TestUtils/ExecutorTestUtils.h
+++ b/dbms/src/TestUtils/ExecutorTestUtils.h
@@ -32,11 +32,11 @@ ColumnsWithTypeAndName readBlock(BlockInputStreamPtr stream);
 ColumnsWithTypeAndName readBlocks(std::vector<BlockInputStreamPtr> streams);
 
 #define WRAP_FOR_TEST_BEGIN                         \
-    std::vector<bool> planner_bools{true};          \
+    std::vector<bool> planner_bools{false, true};   \
     for (auto enable_planner : planner_bools)       \
     {                                               \
         enablePlanner(enable_planner);              \
-        std::vector<bool> pipeline_bools{};         \
+        std::vector<bool> pipeline_bools{false};    \
         if (enable_planner)                         \
             pipeline_bools.push_back(true);         \
         for (auto enable_pipeline : pipeline_bools) \

From 126d2b0bda1bbd25f32f54d708eeb40bddcb171b Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Mon, 20 Feb 2023 11:39:09 +0800
Subject: [PATCH 25/31] remove useless header file and comment

Signed-off-by: AilinKid <3148019@qq.com>
---
 dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp | 1 -
 dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp    | 2 +-
 dbms/src/TestUtils/ExecutorTestUtils.cpp                   | 2 --
 3 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp
index 7d7a502beb1..b45ade0f7d2 100644
--- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp
@@ -150,7 +150,6 @@ String DAGExpressionAnalyzerHelper::buildInFunction(
         DataTypePtr type = inferDataType4Literal(child);
         argument_types.push_back(type);
     }
-    // find common type
     DataTypePtr resolved_type = getLeastSupertype(argument_types);
     if (!removeNullable(resolved_type)->equals(*removeNullable(argument_types[0])))
     {
diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
index 887ca17c38c..2d9f7c1da25 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
@@ -277,7 +277,7 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline &
     size_t max_block_size_for_cross_join = settings.max_block_size;
     fiu_do_on(FailPoints::minimum_block_size_for_cross_join, { max_block_size_for_cross_join = 1; });
 
-    JoinPtr join_ptr = std::make_shared<Join>( // make join
+    JoinPtr join_ptr = std::make_shared<Join>(
         probe_key_names,
         build_key_names,
         tiflash_join.kind,
diff --git a/dbms/src/TestUtils/ExecutorTestUtils.cpp b/dbms/src/TestUtils/ExecutorTestUtils.cpp
index 505f5b2a284..d0b72d10948 100644
--- a/dbms/src/TestUtils/ExecutorTestUtils.cpp
+++ b/dbms/src/TestUtils/ExecutorTestUtils.cpp
@@ -13,11 +13,9 @@
 // limitations under the License.
 
 #include <AggregateFunctions/registerAggregateFunctions.h>
-#include <Columns/ColumnNullable.h>
 #include <Common/FmtUtils.h>
 #include <Debug/MockComputeServerManager.h>
 #include <Debug/MockStorage.h>
-#include <Flash/Coprocessor/DAGQuerySource.h>
 #include <Flash/Pipeline/Schedule/TaskScheduler.h>
 #include <Flash/executeQuery.h>
 #include <TestUtils/ExecutorTestUtils.h>

From c6344fe278fe4a1526a8bf8a26d8ddbab0eb1a42 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Mon, 20 Feb 2023 16:08:28 +0800
Subject: [PATCH 26/31] enable pipeline model

Signed-off-by: AilinKid <3148019@qq.com>
---
 dbms/src/Flash/Pipeline/Pipeline.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dbms/src/Flash/Pipeline/Pipeline.cpp b/dbms/src/Flash/Pipeline/Pipeline.cpp
index 67ba2569ae6..0421c1cd4c0 100644
--- a/dbms/src/Flash/Pipeline/Pipeline.cpp
+++ b/dbms/src/Flash/Pipeline/Pipeline.cpp
@@ -144,6 +144,7 @@ bool Pipeline::isSupported(const tipb::DAGRequest & dag_request)
             case tipb::ExecType::TypeTableScan:
             case tipb::ExecType::TypeExchangeSender:
             case tipb::ExecType::TypeExchangeReceiver:
+            case tipb::ExecType::TypeExpand:
                 return true;
             default:
                 is_supported = false;

From 034d06a0284e4bf251d7738b77ad59f23a0e3652 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Mon, 20 Feb 2023 23:54:47 +0800
Subject: [PATCH 27/31] fix single block test

Signed-off-by: AilinKid <3148019@qq.com>
---
 .../Coprocessor/DAGQueryBlockInterpreter.cpp  | 10 ++++++-
 dbms/src/Flash/tests/gtest_interpreter.out    | 28 +++++++++----------
 2 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
index 2d9f7c1da25..169f25c23be 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
@@ -137,16 +137,24 @@ AnalysisResult analyzeExpressions(
     if (query_block.limit_or_topn && query_block.limit_or_topn->tp() == tipb::ExecType::TypeTopN)
     {
         res.order_columns = analyzer.appendOrderBy(chain, query_block.limit_or_topn->topn());
-        chain.addStep();
     }
 
     if (query_block.expand)
     {
+        chain.addStep();
         res.before_expand = analyzer.appendExpand(query_block.expand->expand(), chain);
     }
 
     const auto & dag_context = *context.getDAGContext();
     // Append final project results if needed.
+    if (query_block.isRootQueryBlock())
+    {
+        std::cout << "Wocao1" << std::endl;
+    }
+    else
+    {
+        std::cout << "Wocao2" << std::endl;
+    }
     final_project = query_block.isRootQueryBlock()
         ? analyzer.appendFinalProjectForRootQueryBlock(
             chain,
diff --git a/dbms/src/Flash/tests/gtest_interpreter.out b/dbms/src/Flash/tests/gtest_interpreter.out
index 89e323ef5e8..25f1bf2c0eb 100644
--- a/dbms/src/Flash/tests/gtest_interpreter.out
+++ b/dbms/src/Flash/tests/gtest_interpreter.out
@@ -20,10 +20,10 @@ Union: <for test>
 ~result:
 Union: <for test>
  SharedQuery x 10: <restore concurrency>
-  Limit, limit = 10
-   Union: <for partial limit>
-    Limit x 10, limit = 10
-     Expression: <final projection>
+  Expression: <final projection>
+   Limit, limit = 10
+    Union: <for partial limit>
+     Limit x 10, limit = 10
       Expression: <before order and select>
        Filter: <execute having>
         SharedQuery: <restore concurrency>
@@ -84,10 +84,10 @@ Union: <for test>
 ~result:
 Union: <for test>
  SharedQuery x 10: <restore concurrency>
-  Limit, limit = 10
-   Union: <for partial limit>
-    Limit x 10, limit = 10
-     Expression: <final projection>
+  Expression: <final projection>
+   Limit, limit = 10
+    Union: <for partial limit>
+     Limit x 10, limit = 10
       Expression: <projection>
        Expression: <final projection>
         Expression: <before order and select>
@@ -423,10 +423,10 @@ CreatingSets
  Union: <for test>
   MockExchangeSender x 20
    SharedQuery: <restore concurrency>
-    Limit, limit = 10
-     Union: <for partial limit>
-      Limit x 20, limit = 10
-       Expression: <final projection>
+    Expression: <final projection>
+     Limit, limit = 10
+      Union: <for partial limit>
+       Limit x 20, limit = 10
         Expression: <before order and select>
          SharedQuery: <restore concurrency>
           ParallelAggregating, max_threads: 20, final: true
@@ -439,8 +439,8 @@ CreatingSets
 ~test_suite_name: ListBase
 ~result_index: 0
 ~result:
-Limit, limit = 10
- Expression: <final projection>
+Expression: <final projection>
+ Limit, limit = 10
   Aggregating
    Expression: <before aggregation>
     Filter: <execute where>

From 3f954d3dd2a5d54b19498bb304faa49e890ed33a Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Tue, 21 Feb 2023 14:57:36 +0800
Subject: [PATCH 28/31] address haisheng comment

Signed-off-by: AilinKid <3148019@qq.com>
---
 .../Coprocessor/DAGExpressionAnalyzer.cpp     |  4 ---
 .../Coprocessor/DAGQueryBlockInterpreter.cpp  | 29 +++++++++---------
 .../Flash/Planner/Plans/PhysicalExpand.cpp    | 29 ++++--------------
 dbms/src/Flash/tests/gtest_interpreter.out    | 30 +++++++++----------
 dbms/src/Interpreters/Expand.cpp              |  9 +++---
 dbms/src/Interpreters/Expand.h                |  2 +-
 dbms/src/Interpreters/ExpressionActions.h     |  1 -
 7 files changed, 41 insertions(+), 63 deletions(-)

diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
index 370505c9b8c..197d493fe4a 100644
--- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp
@@ -822,10 +822,6 @@ GroupingSets DAGExpressionAnalyzer::buildExpandGroupingColumns(
             group_exprs_columns.reserve(group_exprs.grouping_expr().size());
             for (const auto & group_expr : group_exprs.grouping_expr())
             {
-                if (group_expr.tp() != tipb::ColumnRef)
-                {
-                    throw TiFlashException("grouping sets expression should be column expr", Errors::Coprocessor::BadRequest);
-                }
                 String cp_name = getActions(group_expr, actions);
                 // tidb expression computation is based on column index offset child's chunk schema, change to ck block column name here.
                 group_exprs_columns.emplace_back(cp_name);
diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
index 169f25c23be..61b8bfe1550 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
@@ -78,9 +78,11 @@ struct AnalysisResult
     ExpressionActionsPtr before_where;
     ExpressionActionsPtr before_aggregation;
     ExpressionActionsPtr before_having;
-    ExpressionActionsPtr before_order_and_select;
-    ExpressionActionsPtr final_projection;
+    // ExpressionActionsPtr before_order_and_select;
+    ExpressionActionsPtr before_order;
     ExpressionActionsPtr before_expand;
+    ExpressionActionsPtr before_select;
+    ExpressionActionsPtr final_projection;
 
     String filter_column_name;
     String having_column_name;
@@ -137,24 +139,18 @@ AnalysisResult analyzeExpressions(
     if (query_block.limit_or_topn && query_block.limit_or_topn->tp() == tipb::ExecType::TypeTopN)
     {
         res.order_columns = analyzer.appendOrderBy(chain, query_block.limit_or_topn->topn());
+        res.before_order = chain.getLastActions();
+        chain.addStep();
     }
 
     if (query_block.expand)
     {
-        chain.addStep();
         res.before_expand = analyzer.appendExpand(query_block.expand->expand(), chain);
+        chain.addStep();
     }
 
     const auto & dag_context = *context.getDAGContext();
     // Append final project results if needed.
-    if (query_block.isRootQueryBlock())
-    {
-        std::cout << "Wocao1" << std::endl;
-    }
-    else
-    {
-        std::cout << "Wocao2" << std::endl;
-    }
     final_project = query_block.isRootQueryBlock()
         ? analyzer.appendFinalProjectForRootQueryBlock(
             chain,
@@ -166,7 +162,7 @@ AnalysisResult analyzeExpressions(
             chain,
             query_block.qb_column_prefix);
 
-    res.before_order_and_select = chain.getLastActions();
+    res.before_select = chain.getLastActions();
 
     chain.finalize();
     chain.clear();
@@ -684,9 +680,9 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline)
         executeWhere(pipeline, res.before_having, res.having_column_name, "execute having");
         recordProfileStreams(pipeline, query_block.having_name);
     }
-    if (res.before_order_and_select)
+    if (res.before_order)
     {
-        executeExpression(pipeline, res.before_order_and_select, log, "before order and select");
+        executeExpression(pipeline, res.before_order, log, "before order");
     }
 
     if (!res.order_columns.empty())
@@ -712,6 +708,11 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline)
         recordProfileStreams(pipeline, query_block.expand_name);
     }
 
+    if (res.before_select)
+    {
+        executeExpression(pipeline, res.before_select, log, "before select");
+    }
+
     // execute final project action
     executeProject(pipeline, final_project, "final projection");
 
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
index 5e0294a5973..927448b13ec 100644
--- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
+++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
@@ -49,11 +49,11 @@ PhysicalPlanNodePtr PhysicalExpand::build(
 
     DAGExpressionAnalyzer analyzer{child->getSchema(), context};
     ExpressionActionsPtr before_expand_actions = PhysicalPlanHelper::newActions(child->getSampleBlock());
-    ExpressionActionsPtr expand_actions_itself = PhysicalPlanHelper::newActions(child->getSampleBlock());
 
     auto grouping_sets = analyzer.buildExpandGroupingColumns(expand, before_expand_actions);
     auto expand_action = ExpressionAction::expandSource(grouping_sets);
-    expand_actions_itself->add(expand_action);
+    // include expand action itself.
+    before_expand_actions->add(expand_action);
 
     // construct sample block.
     NamesAndTypes expand_output_columns;
@@ -70,7 +70,7 @@ PhysicalPlanNodePtr PhysicalExpand::build(
         log->identifier(),
         child,
         expand_action.expand,
-        expand_actions_itself,
+        before_expand_actions,
         Block(expand_output_columns));
 
     return physical_expand;
@@ -79,11 +79,7 @@ PhysicalPlanNodePtr PhysicalExpand::build(
 
 void PhysicalExpand::expandTransform(DAGPipeline & child_pipeline)
 {
-    String expand_extra_info = fmt::format("expand, expand_executor_id = {}", execId());
-    FmtBuffer fb;
-    fb.append(": grouping set ");
-    shared_expand->getGroupingSetsDes(fb);
-    expand_extra_info.append(fb.toString());
+    String expand_extra_info = fmt::format("expand, expand_executor_id = {}: grouping set {}", execId(), shared_expand->getGroupingSetsDes());
     child_pipeline.transform([&](auto & stream) {
         stream = std::make_shared<ExpressionBlockInputStream>(stream, expand_actions, log->identifier());
         stream->setExtraInfo(expand_extra_info);
@@ -107,22 +103,7 @@ void PhysicalExpand::buildBlockInputStreamImpl(DAGPipeline & pipeline, Context &
 void PhysicalExpand::finalize(const Names & parent_require)
 {
     FinalizeHelper::checkSchemaContainsParentRequire(schema, parent_require);
-    Names required_output;
-    required_output.reserve(shared_expand->getGroupSetNum()); // grouping set column should be existed in the child output schema.
-    auto name_set = shared_expand->getAllGroupSetColumnNames();
-    // append parent_require column it may expect self-filled groupingID.
-    for (const auto & one : parent_require)
-    {
-        if (one != Expand::grouping_identifier_column_name)
-        {
-            name_set.insert(one);
-        }
-    }
-    for (const auto & grouping_name : name_set)
-    {
-        required_output.emplace_back(grouping_name);
-    }
-    child->finalize(required_output);
+    child->finalize(expand_actions->getRequiredColumns());
 }
 
 const Block & PhysicalExpand::getSampleBlock() const
diff --git a/dbms/src/Flash/tests/gtest_interpreter.out b/dbms/src/Flash/tests/gtest_interpreter.out
index 25f1bf2c0eb..65a46dc9842 100644
--- a/dbms/src/Flash/tests/gtest_interpreter.out
+++ b/dbms/src/Flash/tests/gtest_interpreter.out
@@ -7,7 +7,7 @@ Union: <for test>
    MergeSorting, limit = 10
     Union: <for partial order>
      PartialSorting x 10: limit = 10
-      Expression: <before order and select>
+      Expression: <before order>
        Filter: <execute having>
         SharedQuery: <restore concurrency>
          ParallelAggregating, max_threads: 10, final: true
@@ -21,10 +21,10 @@ Union: <for test>
 Union: <for test>
  SharedQuery x 10: <restore concurrency>
   Expression: <final projection>
-   Limit, limit = 10
-    Union: <for partial limit>
-     Limit x 10, limit = 10
-      Expression: <before order and select>
+   Expression: <before select>
+    Limit, limit = 10
+     Union: <for partial limit>
+      Limit x 10, limit = 10
        Filter: <execute having>
         SharedQuery: <restore concurrency>
          ParallelAggregating, max_threads: 10, final: true
@@ -90,7 +90,7 @@ Union: <for test>
      Limit x 10, limit = 10
       Expression: <projection>
        Expression: <final projection>
-        Expression: <before order and select>
+        Expression: <before select>
          Filter: <execute where>
           Expression: <projection>
            Expression: <final projection>
@@ -138,7 +138,7 @@ Union: <for test>
 ~result:
 Union: <for test>
  Expression x 10: <final projection>
-  Expression: <before order and select>
+  Expression: <before select>
    SharedQuery: <restore concurrency>
     Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current}
      Expression: <final projection>
@@ -153,7 +153,7 @@ Union: <for test>
 ~result:
 Union: <for test>
  Expression x 10: <final projection>
-  Expression: <before order and select>
+  Expression: <before select>
    Expression: <projection>
     Expression: <final projection>
      SharedQuery: <restore concurrency>
@@ -170,7 +170,7 @@ Union: <for test>
 ~result:
 Union: <for test>
  Expression x 10: <final projection>
-  Expression: <before order and select>
+  Expression: <before select>
    Expression: <projection>
     Expression: <final projection>
      SharedQuery: <restore concurrency>
@@ -191,7 +191,7 @@ Union: <for test>
 ~result:
 Union: <for test>
  Expression x 8: <final projection>
-  Expression: <before order and select>
+  Expression: <before select>
    Window: <enable fine grained shuffle>, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current}
     Expression: <final projection>
      MergeSorting: <enable fine grained shuffle>, limit = 0
@@ -215,7 +215,7 @@ Union: <for test>
 ~result:
 Union: <for test>
  Expression x 10: <final projection>
-  Expression: <before order and select>
+  Expression: <before select>
    SharedQuery: <restore concurrency>
     Window, function: {row_number}, frame: {type: Rows, boundary_begin: Current, boundary_end: Current}
      Expression: <final projection>
@@ -424,10 +424,10 @@ CreatingSets
   MockExchangeSender x 20
    SharedQuery: <restore concurrency>
     Expression: <final projection>
-     Limit, limit = 10
-      Union: <for partial limit>
-       Limit x 20, limit = 10
-        Expression: <before order and select>
+     Expression: <before select>
+      Limit, limit = 10
+       Union: <for partial limit>
+        Limit x 20, limit = 10
          SharedQuery: <restore concurrency>
           ParallelAggregating, max_threads: 20, final: true
            Expression x 20: <remove useless column after join>
diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp
index f5c7e916f9f..0fc84578fcd 100644
--- a/dbms/src/Interpreters/Expand.cpp
+++ b/dbms/src/Interpreters/Expand.cpp
@@ -14,16 +14,15 @@
 
 #include <Columns/ColumnNullable.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypesNumber.h>
 #include <Functions/FunctionHelpers.h>
 #include <Interpreters/Expand.h>
 #include <tipb/executor.pb.h>
 
-#include "DataTypes/DataTypesNumber.h"
-
 namespace DB
 {
 
-namespace /// anonymous namespace for storing private function utils.
+namespace
 {
 void convertColumnToNullable(ColumnWithTypeAndName & column)
 {
@@ -37,8 +36,9 @@ Expand::Expand(const DB::GroupingSets & gss)
     : group_sets_names(gss)
 {}
 
-void Expand::getGroupingSetsDes(FmtBuffer & buffer) const
+String Expand::getGroupingSetsDes() const
 {
+    FmtBuffer buffer;
     buffer.append("[");
     for (const auto & grouping_set : group_sets_names)
     {
@@ -52,6 +52,7 @@ void Expand::getGroupingSetsDes(FmtBuffer & buffer) const
         buffer.append("}");
     }
     buffer.append("]");
+    return buffer.toString();
 }
 
 /// for cases like: select count(distinct a), count(distinct b) from t;
diff --git a/dbms/src/Interpreters/Expand.h b/dbms/src/Interpreters/Expand.h
index d23af17cafa..e3e52a4d983 100644
--- a/dbms/src/Interpreters/Expand.h
+++ b/dbms/src/Interpreters/Expand.h
@@ -116,7 +116,7 @@ class Expand
 
     std::set<String> getAllGroupSetColumnNames() const;
 
-    void getGroupingSetsDes(FmtBuffer & buffer) const;
+    String getGroupingSetsDes() const;
 
     static const String grouping_identifier_column_name;
 
diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h
index f93266529f5..c25e8e7193a 100644
--- a/dbms/src/Interpreters/ExpressionActions.h
+++ b/dbms/src/Interpreters/ExpressionActions.h
@@ -18,7 +18,6 @@
 #include <Core/ColumnWithTypeAndName.h>
 #include <Core/Names.h>
 #include <Interpreters/Expand.h>
-#include <Interpreters/Settings.h>
 #include <Storages/Transaction/Collator.h>
 
 #include <unordered_map>

From 0eb302935ba056e93bb76bda331b4f42509f0e90 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Tue, 21 Feb 2023 17:30:48 +0800
Subject: [PATCH 29/31] address haisheng's comment 2

Signed-off-by: AilinKid <3148019@qq.com>
---
 .../Flash/Planner/Plans/PhysicalExpand.cpp    | 14 ++++--
 dbms/src/Flash/Planner/Plans/PhysicalExpand.h |  5 +--
 dbms/src/Interpreters/Expand.cpp              | 44 +++++++------------
 dbms/src/Interpreters/Expand.h                |  5 ++-
 dbms/src/Interpreters/ExpressionActions.cpp   |  6 +++
 .../TestUtils/tests/gtest_mock_executors.cpp  | 32 ++++++++++++++
 6 files changed, 70 insertions(+), 36 deletions(-)

diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
index 927448b13ec..ddf7a959d61 100644
--- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
+++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
@@ -62,7 +62,7 @@ PhysicalPlanNodePtr PhysicalExpand::build(
     {
         expand_output_columns.emplace_back(one.name, expand_action.expand->isInGroupSetColumn(one.name) ? makeNullable(one.type) : one.type);
     }
-    expand_output_columns.emplace_back(expand_action.expand->grouping_identifier_column_name, expand_action.expand->grouping_identifier_column_type);
+    expand_output_columns.emplace_back(Expand::grouping_identifier_column_name, Expand::grouping_identifier_column_type);
 
     auto physical_expand = std::make_shared<PhysicalExpand>(
         executor_id,
@@ -70,8 +70,7 @@ PhysicalPlanNodePtr PhysicalExpand::build(
         log->identifier(),
         child,
         expand_action.expand,
-        before_expand_actions,
-        Block(expand_output_columns));
+        before_expand_actions);
 
     return physical_expand;
 }
@@ -103,11 +102,18 @@ void PhysicalExpand::buildBlockInputStreamImpl(DAGPipeline & pipeline, Context &
 void PhysicalExpand::finalize(const Names & parent_require)
 {
     FinalizeHelper::checkSchemaContainsParentRequire(schema, parent_require);
+    Names required_output = parent_require;
+    required_output.emplace_back(Expand::grouping_identifier_column_name);
+    expand_actions->finalize(required_output);
+
+    // do the child finalize before require column changed after expand_action finalization.
     child->finalize(expand_actions->getRequiredColumns());
+    FinalizeHelper::prependProjectInputIfNeed(expand_actions, child->getSampleBlock().columns());
+    FinalizeHelper::checkSampleBlockContainsParentRequire(getSampleBlock(), parent_require);
 }
 
 const Block & PhysicalExpand::getSampleBlock() const
 {
-    return sample_block;
+    return expand_actions->getSampleBlock();
 }
 } // namespace DB
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h
index bca4f9beedd..fa668b64114 100644
--- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.h
+++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.h
@@ -37,12 +37,10 @@ class PhysicalExpand : public PhysicalUnary
         const String & req_id,
         const PhysicalPlanNodePtr & child_,
         const std::shared_ptr<const Expand> & shared_expand,
-        const ExpressionActionsPtr & expand_actions,
-        const Block & sample_block_)
+        const ExpressionActionsPtr & expand_actions)
         : PhysicalUnary(executor_id_, PlanType::Expand, schema_, req_id, child_)
         , shared_expand(shared_expand)
         , expand_actions(expand_actions)
-        , sample_block(sample_block_)
     {}
 
     void finalize(const Names & parent_require) override;
@@ -57,6 +55,5 @@ class PhysicalExpand : public PhysicalUnary
     void buildBlockInputStreamImpl(DAGPipeline & pipeline, Context & context, size_t max_streams) override;
     std::shared_ptr<const Expand> shared_expand;
     ExpressionActionsPtr expand_actions;
-    Block sample_block;
 };
 } // namespace DB
diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp
index 0fc84578fcd..45b86951041 100644
--- a/dbms/src/Interpreters/Expand.cpp
+++ b/dbms/src/Interpreters/Expand.cpp
@@ -34,7 +34,9 @@ void convertColumnToNullable(ColumnWithTypeAndName & column)
 
 Expand::Expand(const DB::GroupingSets & gss)
     : group_sets_names(gss)
-{}
+{
+    collectNameSet();
+}
 
 String Expand::getGroupingSetsDes() const
 {
@@ -92,11 +94,6 @@ void Expand::replicateAndFillNull(Block & block) const
     // reserve N times of current block rows size.
     grouping_id_column_data.reserve(block.rows() * replicate_times_for_one_row);
 
-    // prepare added mutable grouping id column.
-    MutableColumns added_grouping_id_column;
-    added_grouping_id_column.reserve(1);
-    added_grouping_id_column.push_back(grouping_id_column->getPtr());
-
     for (size_t i = 0; i < origin_rows; ++i)
     {
         current_offset += replicate_times_for_one_row;
@@ -107,7 +104,7 @@ void Expand::replicateAndFillNull(Block & block) const
         {
             // start from 1.
             Field grouping_id = j + 1;
-            added_grouping_id_column[0]->insert(grouping_id);
+            grouping_id_column->insert(grouping_id);
         }
     }
 
@@ -123,10 +120,10 @@ void Expand::replicateAndFillNull(Block & block) const
                 block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->convertToFullColumnIfConst();
 
             // for every existing column, if the column is a grouping set column, make it nullable.
-            if (isInGroupSetColumn(block.safeGetByPosition(i).name) && !block.safeGetByPosition(i).column->isColumnNullable())
-            {
+            auto & column = block.safeGetByPosition(i);
+            if (isInGroupSetColumn(column.name) && !column.column->isColumnNullable())
                 convertColumnToNullable(block.getByPosition(i));
-            }
+
             if (!offsets_to_replicate->empty())
                 // replicate it.
                 block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate);
@@ -188,26 +185,16 @@ void Expand::replicateAndFillNull(Block & block) const
         }
         // finish of adjustment for one grouping set columns. (by now one column for one grouping set).
     }
-    block.insert(ColumnWithTypeAndName(std::move(added_grouping_id_column[0]), std::make_shared<DataTypeUInt64>(), std::move("groupingID")));
+    block.insert(ColumnWithTypeAndName(std::move(grouping_id_column), grouping_identifier_column_type, grouping_identifier_column_name));
     // return input from block.
 }
 
 bool Expand::isInGroupSetColumn(String name) const
 {
-    for (const auto & it1 : group_sets_names)
+    for (const auto & it1 : name_set)
     {
-        // for every grouping set.
-        for (const auto & it2 : it1)
-        {
-            // for every grouping exprs
-            for (const auto & it3 : it2)
-            {
-                if (it3 == name)
-                {
-                    return true;
-                }
-            }
-        }
+        if (it1 == name)
+            return true;
     }
     return false;
 }
@@ -218,9 +205,13 @@ const GroupingColumnNames & Expand::getGroupSetColumnNamesByOffset(size_t offset
     return group_sets_names[offset][0];
 }
 
-std::set<String> Expand::getAllGroupSetColumnNames() const
+const std::set<String> & Expand::getAllGroupSetColumnNames() const
+{
+    return name_set;
+}
+
+void Expand::collectNameSet()
 {
-    std::set<String> name_set;
     for (const auto & it1 : group_sets_names)
     {
         // for every grouping set.
@@ -233,7 +224,6 @@ std::set<String> Expand::getAllGroupSetColumnNames() const
             }
         }
     }
-    return name_set;
 }
 
 const std::string Expand::grouping_identifier_column_name = "groupingID";
diff --git a/dbms/src/Interpreters/Expand.h b/dbms/src/Interpreters/Expand.h
index e3e52a4d983..51c3ae0422b 100644
--- a/dbms/src/Interpreters/Expand.h
+++ b/dbms/src/Interpreters/Expand.h
@@ -114,15 +114,18 @@ class Expand
 
     const GroupingColumnNames & getGroupSetColumnNamesByOffset(size_t offset) const;
 
-    std::set<String> getAllGroupSetColumnNames() const;
+    const std::set<String> & getAllGroupSetColumnNames() const;
 
     String getGroupingSetsDes() const;
 
+    void collectNameSet();
+
     static const String grouping_identifier_column_name;
 
     static const DataTypePtr grouping_identifier_column_type;
 
 private:
     GroupingSets group_sets_names;
+    std::set<String> name_set;
 };
 } // namespace DB
diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp
index 0c49880850b..826f86d1059 100644
--- a/dbms/src/Interpreters/ExpressionActions.cpp
+++ b/dbms/src/Interpreters/ExpressionActions.cpp
@@ -48,6 +48,12 @@ Names ExpressionAction::getNeededColumns() const
     for (const auto & column : projections)
         res.push_back(column.first);
 
+    if (expand)
+    {
+        for (const auto & column : expand->getAllGroupSetColumnNames())
+            res.push_back(column);
+    }
+
     if (!source_name.empty())
         res.push_back(source_name);
 
diff --git a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp
index f1826226aeb..768f514a66f 100644
--- a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp
+++ b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp
@@ -319,6 +319,38 @@ try
 }
 CATCH
 
+TEST_F(MockDAGRequestTest, Expand)
+try
+{
+    auto request = context.scan("test_db", "test_table").expand(MockVVecColumnNameVec{
+                                                                    MockVecColumnNameVec{
+                                                                        MockColumnNameVec{"s1"},
+                                                                    },
+                                                                    MockVecColumnNameVec{
+                                                                        MockColumnNameVec{"s2"},
+                                                                    },
+                                                                })
+                       .build(context);
+    {
+        String expected = "expand_1 | expanded_by: [<{<0, String>}><{<1, String>}>]\n table_scan_0 | {<0, String>, <1, String>}";
+        ASSERT_DAGREQUEST_EQAUL(expected, request);
+    }
+    request = context.receive("sender_1").expand(MockVVecColumnNameVec{
+                                                     MockVecColumnNameVec{
+                                                         MockColumnNameVec{"s1"},
+                                                     },
+                                                     MockVecColumnNameVec{
+                                                         MockColumnNameVec{"s2"},
+                                                     },
+                                                 })
+                  .build(context);
+    {
+        String expected = "expand_1 | expanded_by: [<{<0, String>}><{<1, String>}>]\n exchange_receiver_0 | type:PassThrough, {<0, String>, <1, String>, <2, String>}";
+        ASSERT_DAGREQUEST_EQAUL(expected, request);
+    }
+}
+CATCH
+
 TEST_F(MockDAGRequestTest, MockWindow)
 try
 {

From b3518e3a3f313415883dcd28fe6629ac6e395b1f Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Wed, 22 Feb 2023 13:38:36 +0800
Subject: [PATCH 30/31] address comment: move plan test and refactor some code

Signed-off-by: AilinKid <3148019@qq.com>
---
 .../Coprocessor/DAGQueryBlockInterpreter.cpp  |  5 ++--
 .../Flash/Planner/Plans/PhysicalExpand.cpp    |  5 ----
 .../src/Flash/tests/gtest_expand_executor.cpp | 27 -------------------
 dbms/src/Flash/tests/gtest_interpreter.cpp    | 25 +++++++++++++++++
 dbms/src/Flash/tests/gtest_interpreter.out    | 27 +++++++++++++++++++
 .../Flash/tests/gtest_planner_interpreter.cpp | 25 +++++++++++++++++
 .../Flash/tests/gtest_planner_interpreter.out | 27 +++++++++++++++++++
 dbms/src/Interpreters/Expand.cpp              | 19 ++++++-------
 dbms/src/Interpreters/Expand.h                | 12 ++++-----
 dbms/src/Interpreters/ExpressionActions.h     |  1 -
 .../Interpreters/tests/gtest_block_expand.cpp |  8 +++---
 .../TestUtils/tests/gtest_mock_executors.cpp  |  6 +++--
 12 files changed, 128 insertions(+), 59 deletions(-)

diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
index 61b8bfe1550..3db44c4a279 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
@@ -30,8 +30,6 @@
 #include <DataStreams/ParallelAggregatingBlockInputStream.h>
 #include <DataStreams/TiRemoteBlockInputStream.h>
 #include <DataStreams/WindowBlockInputStream.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <Debug/MockStorage.h>
 #include <Flash/Coprocessor/AggregationInterpreterHelper.h>
 #include <Flash/Coprocessor/DAGExpressionAnalyzer.h>
 #include <Flash/Coprocessor/DAGQueryBlockInterpreter.h>
@@ -47,7 +45,6 @@
 #include <Flash/Mpp/newMPPExchangeWriter.h>
 #include <Interpreters/Aggregator.h>
 #include <Interpreters/Expand.h>
-#include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/Join.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <Storages/Transaction/TMTContext.h>
@@ -758,8 +755,10 @@ void DAGQueryBlockInterpreter::executeLimit(DAGPipeline & pipeline)
 
 void DAGQueryBlockInterpreter::executeExpand(DAGPipeline & pipeline, const ExpressionActionsPtr & expr)
 {
+    String expand_extra_info = fmt::format("expand: grouping set {}", expr->getActions().back().expand->getGroupingSetsDes());
     pipeline.transform([&](auto & stream) {
         stream = std::make_shared<ExpressionBlockInputStream>(stream, expr, log->identifier());
+        stream->setExtraInfo(expand_extra_info);
     });
 }
 
diff --git a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
index ddf7a959d61..a710ac0fc40 100644
--- a/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
+++ b/dbms/src/Flash/Planner/Plans/PhysicalExpand.cpp
@@ -39,8 +39,6 @@ PhysicalPlanNodePtr PhysicalExpand::build(
 {
     assert(child);
 
-    child->finalize();
-
     if (unlikely(expand.grouping_sets().empty()))
     {
         //should not reach here
@@ -55,7 +53,6 @@ PhysicalPlanNodePtr PhysicalExpand::build(
     // include expand action itself.
     before_expand_actions->add(expand_action);
 
-    // construct sample block.
     NamesAndTypes expand_output_columns;
     auto child_header = child->getSchema();
     for (const auto & one : child_header)
@@ -87,7 +84,6 @@ void PhysicalExpand::expandTransform(DAGPipeline & child_pipeline)
 
 void PhysicalExpand::buildPipelineExec(PipelineExecGroupBuilder & group_builder, Context &, size_t)
 {
-    auto input_header = group_builder.getCurrentHeader();
     group_builder.transform([&](auto & builder) {
         builder.appendTransformOp(std::make_unique<ExpressionTransformOp>(group_builder.exec_status, expand_actions, log->identifier()));
     });
@@ -106,7 +102,6 @@ void PhysicalExpand::finalize(const Names & parent_require)
     required_output.emplace_back(Expand::grouping_identifier_column_name);
     expand_actions->finalize(required_output);
 
-    // do the child finalize before require column changed after expand_action finalization.
     child->finalize(expand_actions->getRequiredColumns());
     FinalizeHelper::prependProjectInputIfNeed(expand_actions, child->getSampleBlock().columns());
     FinalizeHelper::checkSampleBlockContainsParentRequire(getSampleBlock(), parent_require);
diff --git a/dbms/src/Flash/tests/gtest_expand_executor.cpp b/dbms/src/Flash/tests/gtest_expand_executor.cpp
index 6401edbc424..451a9430e98 100644
--- a/dbms/src/Flash/tests/gtest_expand_executor.cpp
+++ b/dbms/src/Flash/tests/gtest_expand_executor.cpp
@@ -404,33 +404,6 @@ try
             toNullableVec<UInt64>({{}, {}}),
             toVec<UInt64>({2, 2}),
         });
-
-
-    /// assert the input stream plan format. (under planner-enabled mode)
-    String expected = R"(
-CreatingSets
- Union: <for join>
-  HashJoinBuild x 10: <join build, build_side_root_executor_id = project_4>, join_kind = Inner
-   Expression: <append join key and join filters for build side>
-    Expression: <final projection>
-     Expression: <projection>
-      MockTableScan
- Union: <for test>
-  Expression x 10: <final projection>
-   SharedQuery: <restore concurrency>
-    MergeSorting, limit = 2
-     Union: <for partial order>
-      PartialSorting x 10: limit = 2
-       Expression: <projection>
-        Expression: <remove useless column after join>
-         HashJoinProbe: <join probe, join_executor_id = Join_5, has_non_joined_data = false>
-          Expression: <final projection>
-           Expression: <expand, expand_executor_id = expand_2: grouping set [{<count(exchange_receiver_0)_collator_46 >}{<any(exchange_receiver_1)_collator_46 >}]>
-            Expression: <expr after aggregation>
-             SharedQuery: <restore concurrency>
-              ParallelAggregating, max_threads: 10, final: true
-               MockExchangeReceiver x 10)";
-    ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10);
 }
 CATCH
 
diff --git a/dbms/src/Flash/tests/gtest_interpreter.cpp b/dbms/src/Flash/tests/gtest_interpreter.cpp
index e129c5587a5..634d175a9b0 100644
--- a/dbms/src/Flash/tests/gtest_interpreter.cpp
+++ b/dbms/src/Flash/tests/gtest_interpreter.cpp
@@ -394,5 +394,30 @@ try
     }
 }
 CATCH
+
+TEST_F(InterpreterExecuteTest, ExpandPlan)
+try
+{
+    {
+        auto request = context
+                           .receive("sender_1")
+                           .aggregation({Count(col("s1"))}, {col("s2")})
+                           .expand(MockVVecColumnNameVec{
+                               MockVecColumnNameVec{
+                                   MockColumnNameVec{"count(s1)"},
+                               },
+                               MockVecColumnNameVec{
+                                   MockColumnNameVec{"s2"},
+                               },
+                           })
+                           .join(context.scan("test_db", "test_table").project({"s2"}), tipb::JoinType::TypeInnerJoin, {col("s2")})
+                           .project({"count(s1)", "groupingID"})
+                           .topN({{"groupingID", true}}, 2)
+                           .build(context);
+        runAndAssert(request, 10);
+    }
+}
+CATCH
+
 } // namespace tests
 } // namespace DB
diff --git a/dbms/src/Flash/tests/gtest_interpreter.out b/dbms/src/Flash/tests/gtest_interpreter.out
index 65a46dc9842..58e48ec0db4 100644
--- a/dbms/src/Flash/tests/gtest_interpreter.out
+++ b/dbms/src/Flash/tests/gtest_interpreter.out
@@ -461,3 +461,30 @@ Union: <for test>
          Filter: <execute where>
           MockTableScan
 @
+~test_suite_name: ExpandPlan
+~result_index: 0
+~result:
+CreatingSets
+ Union: <for join>
+  HashJoinBuild x 10: <join build, build_side_root_executor_id = project_4>, join_kind = Inner
+   Expression: <append join key and join filters for build side>
+    Expression: <final projection>
+     Expression: <projection>
+      Expression: <final projection>
+       MockTableScan
+ Union: <for test>
+  SharedQuery x 10: <restore concurrency>
+   Expression: <final projection>
+    MergeSorting, limit = 2
+     Union: <for partial order>
+      PartialSorting x 10: limit = 2
+       Expression: <projection>
+        Expression: <final projection>
+         Expression: <remove useless column after join>
+          HashJoinProbe: <join probe, join_executor_id = Join_5, has_non_joined_data = false>
+           Expression: <final projection>
+            Expression: <expand: grouping set [{<count(exchange_receiver_0)_collator_46 >}{<any(exchange_receiver_1)_collator_46 >}]>
+             SharedQuery: <restore concurrency>
+              ParallelAggregating, max_threads: 10, final: true
+               MockExchangeReceiver x 10
+@
diff --git a/dbms/src/Flash/tests/gtest_planner_interpreter.cpp b/dbms/src/Flash/tests/gtest_planner_interpreter.cpp
index f4fd9be7613..5b9bea7b28f 100644
--- a/dbms/src/Flash/tests/gtest_planner_interpreter.cpp
+++ b/dbms/src/Flash/tests/gtest_planner_interpreter.cpp
@@ -499,5 +499,30 @@ try
 }
 CATCH
 
+
+TEST_F(PlannerInterpreterExecuteTest, ExpandPlan)
+try
+{
+    {
+        auto request = context
+                           .receive("sender_1")
+                           .aggregation({Count(col("s1"))}, {col("s2")})
+                           .expand(MockVVecColumnNameVec{
+                               MockVecColumnNameVec{
+                                   MockColumnNameVec{"count(s1)"},
+                               },
+                               MockVecColumnNameVec{
+                                   MockColumnNameVec{"s2"},
+                               },
+                           })
+                           .join(context.scan("test_db", "test_table").project({"s2"}), tipb::JoinType::TypeInnerJoin, {col("s2")})
+                           .project({"count(s1)", "groupingID"})
+                           .topN({{"groupingID", true}}, 2)
+                           .build(context);
+        runAndAssert(request, 10);
+    }
+}
+CATCH
+
 } // namespace tests
 } // namespace DB
diff --git a/dbms/src/Flash/tests/gtest_planner_interpreter.out b/dbms/src/Flash/tests/gtest_planner_interpreter.out
index 8c46e98de28..549cdc35503 100644
--- a/dbms/src/Flash/tests/gtest_planner_interpreter.out
+++ b/dbms/src/Flash/tests/gtest_planner_interpreter.out
@@ -709,3 +709,30 @@ Union: <for test>
             Filter
              MockTableScan
 @
+~test_suite_name: ExpandPlan
+~result_index: 0
+~result:
+CreatingSets
+ Union: <for join>
+  HashJoinBuild x 10: <join build, build_side_root_executor_id = project_4>, join_kind = Inner
+   Expression: <append join key and join filters for build side>
+    Expression: <final projection>
+     Expression: <projection>
+      MockTableScan
+ Union: <for test>
+  Expression x 10: <final projection>
+   SharedQuery: <restore concurrency>
+    MergeSorting, limit = 2
+     Union: <for partial order>
+      PartialSorting x 10: limit = 2
+       Expression: <projection>
+        Expression: <remove useless column after join>
+         HashJoinProbe: <join probe, join_executor_id = Join_5, has_non_joined_data = false>
+          Expression: <final projection>
+           Expression: <expand, expand_executor_id = expand_2: grouping set [{<count(exchange_receiver_0)_collator_46 >}{<any(exchange_receiver_1)_collator_46 >}]>
+            Expression: <expr after aggregation>
+             SharedQuery: <restore concurrency>
+              ParallelAggregating, max_threads: 10, final: true
+               Expression x 10: <before aggregation>
+                MockExchangeReceiver
+@
diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp
index 45b86951041..fa36b41e893 100644
--- a/dbms/src/Interpreters/Expand.cpp
+++ b/dbms/src/Interpreters/Expand.cpp
@@ -92,8 +92,10 @@ void Expand::replicateAndFillNull(Block & block) const
     auto grouping_id_column = ColumnUInt64::create();
     auto & grouping_id_column_data = grouping_id_column->getData();
     // reserve N times of current block rows size.
-    grouping_id_column_data.reserve(block.rows() * replicate_times_for_one_row);
+    grouping_id_column_data.resize(origin_rows * replicate_times_for_one_row);
 
+    // manipulate the data directly to avoid the virtual function overheads.
+    size_t grouping_id_column_index = 0;
     for (size_t i = 0; i < origin_rows; ++i)
     {
         current_offset += replicate_times_for_one_row;
@@ -103,8 +105,9 @@ void Expand::replicateAndFillNull(Block & block) const
         for (UInt64 j = 0; j < replicate_times_for_one_row; ++j)
         {
             // start from 1.
-            Field grouping_id = j + 1;
-            grouping_id_column->insert(grouping_id);
+            auto grouping_id = j + 1;
+            grouping_id_column_data[grouping_id_column_index] = grouping_id;
+            ++grouping_id_column_index;
         }
     }
 
@@ -125,8 +128,7 @@ void Expand::replicateAndFillNull(Block & block) const
                 convertColumnToNullable(block.getByPosition(i));
 
             if (!offsets_to_replicate->empty())
-                // replicate it.
-                block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate);
+                column.column = column.column->replicate(*offsets_to_replicate);
         }
     }
 
@@ -191,12 +193,7 @@ void Expand::replicateAndFillNull(Block & block) const
 
 bool Expand::isInGroupSetColumn(String name) const
 {
-    for (const auto & it1 : name_set)
-    {
-        if (it1 == name)
-            return true;
-    }
-    return false;
+    return name_set.find(name) != name_set.end();
 }
 
 const GroupingColumnNames & Expand::getGroupSetColumnNamesByOffset(size_t offset) const
diff --git a/dbms/src/Interpreters/Expand.h b/dbms/src/Interpreters/Expand.h
index 51c3ae0422b..076abe7b83b 100644
--- a/dbms/src/Interpreters/Expand.h
+++ b/dbms/src/Interpreters/Expand.h
@@ -108,23 +108,23 @@ class Expand
     // to illustrate what group this row is targeted for.
     void replicateAndFillNull(Block & block) const;
 
-    size_t getGroupSetNum() const { return group_sets_names.size(); }
-
     bool isInGroupSetColumn(String name) const;
 
-    const GroupingColumnNames & getGroupSetColumnNamesByOffset(size_t offset) const;
-
     const std::set<String> & getAllGroupSetColumnNames() const;
 
     String getGroupingSetsDes() const;
 
-    void collectNameSet();
-
     static const String grouping_identifier_column_name;
 
     static const DataTypePtr grouping_identifier_column_type;
 
 private:
+    void collectNameSet();
+
+    size_t getGroupSetNum() const { return group_sets_names.size(); }
+
+    const GroupingColumnNames & getGroupSetColumnNamesByOffset(size_t offset) const;
+
     GroupingSets group_sets_names;
     std::set<String> name_set;
 };
diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h
index c25e8e7193a..83090f675ed 100644
--- a/dbms/src/Interpreters/ExpressionActions.h
+++ b/dbms/src/Interpreters/ExpressionActions.h
@@ -96,7 +96,6 @@ struct ExpressionAction
 
     /// For EXPAND.
     std::shared_ptr<const Expand> expand;
-    NamesAndTypesList columns_added_by_expand;
 
     /// If result_name_ == "", as name "function_name(arguments separated by commas) is used".
     static ExpressionAction applyFunction(
diff --git a/dbms/src/Interpreters/tests/gtest_block_expand.cpp b/dbms/src/Interpreters/tests/gtest_block_expand.cpp
index e85cb0d7185..c5756d3e94a 100644
--- a/dbms/src/Interpreters/tests/gtest_block_expand.cpp
+++ b/dbms/src/Interpreters/tests/gtest_block_expand.cpp
@@ -76,7 +76,7 @@ try
         ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID");
         // assert the block size is equal to origin rows * grouping set num.
         auto expand_rows = block.rows();
-        auto grouping_set_num = expand.getGroupSetNum();
+        auto grouping_set_num = 2;
         ASSERT_EQ(origin_rows * grouping_set_num, expand_rows); // 6
         // assert grouping set column are nullable.
         ASSERT_EQ(block.getColumns()[0].get()->isColumnNullable(), true);
@@ -186,7 +186,7 @@ try
         ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID");
         // assert the block size is equal to origin rows * grouping set num.
         auto expand_rows = block.rows();
-        auto grouping_set_num = expand.getGroupSetNum();
+        auto grouping_set_num = 2;
         ASSERT_EQ(origin_rows * grouping_set_num, expand_rows); // 6
         // assert grouping set column are nullable.
         ASSERT_EQ(block.getColumns()[0].get()->isColumnNullable(), false);
@@ -283,7 +283,7 @@ try
         ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID");
         // assert the block size is equal to origin rows * grouping set num.
         auto expand_rows = block.rows();
-        auto grouping_set_num = expand.getGroupSetNum();
+        auto grouping_set_num = 3;
         ASSERT_EQ(origin_rows * grouping_set_num, expand_rows); // 9
         // assert grouping set column are nullable.
         ASSERT_EQ(block.getColumns()[0].get()->isColumnNullable(), false);
@@ -392,7 +392,7 @@ try
         ASSERT_EQ(block.getColumnsWithTypeAndName()[4].name, "groupingID");
         // assert the block size is equal to origin rows * grouping set num.
         auto expand_rows = block.rows();
-        auto grouping_set_num = expand.getGroupSetNum();
+        auto grouping_set_num = 3;
         ASSERT_EQ(origin_rows, 0);
         ASSERT_EQ(origin_rows * grouping_set_num, expand_rows); // 0
         // assert grouping set column are nullable.
diff --git a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp
index 768f514a66f..a000bf3f87b 100644
--- a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp
+++ b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp
@@ -332,7 +332,8 @@ try
                                                                 })
                        .build(context);
     {
-        String expected = "expand_1 | expanded_by: [<{<0, String>}><{<1, String>}>]\n table_scan_0 | {<0, String>, <1, String>}";
+        String expected = "expand_1 | expanded_by: [<{<0, String>}><{<1, String>}>]\n"
+                          " table_scan_0 | {<0, String>, <1, String>}";
         ASSERT_DAGREQUEST_EQAUL(expected, request);
     }
     request = context.receive("sender_1").expand(MockVVecColumnNameVec{
@@ -345,7 +346,8 @@ try
                                                  })
                   .build(context);
     {
-        String expected = "expand_1 | expanded_by: [<{<0, String>}><{<1, String>}>]\n exchange_receiver_0 | type:PassThrough, {<0, String>, <1, String>, <2, String>}";
+        String expected = "expand_1 | expanded_by: [<{<0, String>}><{<1, String>}>]\n"
+                          " exchange_receiver_0 | type:PassThrough, {<0, String>, <1, String>, <2, String>}";
         ASSERT_DAGREQUEST_EQAUL(expected, request);
     }
 }

From 6c12fd9c3b74927eddb8f2ae8db5c20d843f5539 Mon Sep 17 00:00:00 2001
From: AilinKid <3148019@qq.com>
Date: Wed, 22 Feb 2023 15:03:50 +0800
Subject: [PATCH 31/31] add test for gtest_pipeline_interpreter

Signed-off-by: AilinKid <3148019@qq.com>
---
 .../Coprocessor/DAGQueryBlockInterpreter.cpp  |  1 -
 .../tests/gtest_pipeline_interpreter.cpp      | 24 ++++++++++
 .../tests/gtest_pipeline_interpreter.out      | 27 +++++++++++
 dbms/src/Interpreters/Expand.cpp              |  5 +--
 dbms/src/Storages/DeltaMerge/DeltaTree.h      | 45 +++++++++++++++----
 5 files changed, 88 insertions(+), 14 deletions(-)

diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
index 3db44c4a279..f6e7e69ce44 100644
--- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
+++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp
@@ -75,7 +75,6 @@ struct AnalysisResult
     ExpressionActionsPtr before_where;
     ExpressionActionsPtr before_aggregation;
     ExpressionActionsPtr before_having;
-    // ExpressionActionsPtr before_order_and_select;
     ExpressionActionsPtr before_order;
     ExpressionActionsPtr before_expand;
     ExpressionActionsPtr before_select;
diff --git a/dbms/src/Flash/tests/gtest_pipeline_interpreter.cpp b/dbms/src/Flash/tests/gtest_pipeline_interpreter.cpp
index 1a5478cbdf1..828e14e36ff 100644
--- a/dbms/src/Flash/tests/gtest_pipeline_interpreter.cpp
+++ b/dbms/src/Flash/tests/gtest_pipeline_interpreter.cpp
@@ -521,5 +521,29 @@ try
 }
 CATCH
 
+TEST_F(PipelineInterpreterExecuteTest, ExpandPlan)
+try
+{
+    {
+        auto request = context
+                           .receive("sender_1")
+                           .aggregation({Count(col("s1"))}, {col("s2")})
+                           .expand(MockVVecColumnNameVec{
+                               MockVecColumnNameVec{
+                                   MockColumnNameVec{"count(s1)"},
+                               },
+                               MockVecColumnNameVec{
+                                   MockColumnNameVec{"s2"},
+                               },
+                           })
+                           .join(context.scan("test_db", "test_table").project({"s2"}), tipb::JoinType::TypeInnerJoin, {col("s2")})
+                           .project({"count(s1)", "groupingID"})
+                           .topN({{"groupingID", true}}, 2)
+                           .build(context);
+        runAndAssert(request, 10);
+    }
+}
+CATCH
+
 } // namespace tests
 } // namespace DB
diff --git a/dbms/src/Flash/tests/gtest_pipeline_interpreter.out b/dbms/src/Flash/tests/gtest_pipeline_interpreter.out
index 11a0cddabda..a59672f7809 100644
--- a/dbms/src/Flash/tests/gtest_pipeline_interpreter.out
+++ b/dbms/src/Flash/tests/gtest_pipeline_interpreter.out
@@ -604,3 +604,30 @@ Union: <for test>
             Filter
              MockTableScan
 @
+~test_suite_name: ExpandPlan
+~result_index: 0
+~result:
+CreatingSets
+ Union: <for join>
+  HashJoinBuild x 10: <join build, build_side_root_executor_id = project_4>, join_kind = Inner
+   Expression: <append join key and join filters for build side>
+    Expression: <final projection>
+     Expression: <projection>
+      MockTableScan
+ Union: <for test>
+  Expression x 10: <final projection>
+   SharedQuery: <restore concurrency>
+    MergeSorting, limit = 2
+     Union: <for partial order>
+      PartialSorting x 10: limit = 2
+       Expression: <projection>
+        Expression: <remove useless column after join>
+         HashJoinProbe: <join probe, join_executor_id = Join_5, has_non_joined_data = false>
+          Expression: <final projection>
+           Expression: <expand, expand_executor_id = expand_2: grouping set [{<count(exchange_receiver_0)_collator_46 >}{<any(exchange_receiver_1)_collator_46 >}]>
+            Expression: <expr after aggregation>
+             SharedQuery: <restore concurrency>
+              ParallelAggregating, max_threads: 10, final: true
+               Expression x 10: <before aggregation>
+                MockExchangeReceiver
+@
diff --git a/dbms/src/Interpreters/Expand.cpp b/dbms/src/Interpreters/Expand.cpp
index fa36b41e893..3910efec76a 100644
--- a/dbms/src/Interpreters/Expand.cpp
+++ b/dbms/src/Interpreters/Expand.cpp
@@ -94,7 +94,6 @@ void Expand::replicateAndFillNull(Block & block) const
     // reserve N times of current block rows size.
     grouping_id_column_data.resize(origin_rows * replicate_times_for_one_row);
 
-    // manipulate the data directly to avoid the virtual function overheads.
     size_t grouping_id_column_index = 0;
     for (size_t i = 0; i < origin_rows; ++i)
     {
@@ -105,9 +104,7 @@ void Expand::replicateAndFillNull(Block & block) const
         for (UInt64 j = 0; j < replicate_times_for_one_row; ++j)
         {
             // start from 1.
-            auto grouping_id = j + 1;
-            grouping_id_column_data[grouping_id_column_index] = grouping_id;
-            ++grouping_id_column_index;
+            grouping_id_column_data[grouping_id_column_index++] = j + 1;
         }
     }
 
diff --git a/dbms/src/Storages/DeltaMerge/DeltaTree.h b/dbms/src/Storages/DeltaMerge/DeltaTree.h
index ad3fd32b3b1..b51575ba732 100644
--- a/dbms/src/Storages/DeltaMerge/DeltaTree.h
+++ b/dbms/src/Storages/DeltaMerge/DeltaTree.h
@@ -892,8 +892,14 @@ class DeltaTree
     }
 
 public:
-    DeltaTree() { init(std::make_shared<ValueSpace>()); }
-    explicit DeltaTree(const ValueSpacePtr & insert_value_space_) { init(insert_value_space_); }
+    DeltaTree()
+    {
+        init(std::make_shared<ValueSpace>());
+    }
+    explicit DeltaTree(const ValueSpacePtr & insert_value_space_)
+    {
+        init(insert_value_space_);
+    }
     DeltaTree(const Self & o);
 
     DeltaTree & operator=(const Self & o)
@@ -954,10 +960,19 @@ class DeltaTree
         check(root, true);
     }
 
-    size_t getBytes() { return bytes; }
+    size_t getBytes()
+    {
+        return bytes;
+    }
 
-    size_t getHeight() const { return height; }
-    EntryIterator begin() const { return EntryIterator(left_leaf, 0, 0); }
+    size_t getHeight() const
+    {
+        return height;
+    }
+    EntryIterator begin() const
+    {
+        return EntryIterator(left_leaf, 0, 0);
+    }
     EntryIterator end() const
     {
         Int64 delta = isLeaf(root) ? as(Leaf, root)->getDelta() : as(Intern, root)->getDelta();
@@ -971,11 +986,23 @@ class DeltaTree
         return std::make_shared<DTEntriesCopy<M, F, S, CopyAllocator>>(left_leaf, num_entries, delta);
     }
 
-    CompactedEntriesPtr getCompactedEntries() { return std::make_shared<CompactedEntries>(begin(), end(), num_entries); }
+    CompactedEntriesPtr getCompactedEntries()
+    {
+        return std::make_shared<CompactedEntries>(begin(), end(), num_entries);
+    }
 
-    size_t numEntries() const { return num_entries; }
-    size_t numInserts() const { return num_inserts; }
-    size_t numDeletes() const { return num_deletes; }
+    size_t numEntries() const
+    {
+        return num_entries;
+    }
+    size_t numInserts() const
+    {
+        return num_inserts;
+    }
+    size_t numDeletes() const
+    {
+        return num_deletes;
+    }
 
     void addDelete(UInt64 rid);
     void addInsert(UInt64 rid, UInt64 tuple_id);