From 1d948c0288656c2d718f9de651fd49af499ee31f Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 9 Sep 2021 17:11:26 +0800 Subject: [PATCH] tcmur_device: skip reporting the event if device in recovery If the device is in recovery, we can defer reporting the event in the recovery when reopening the device. And if the device is stopped or stopping we can just skip it. Just wait for the report event to finish when recoverying the device, because the recovery will close and then open the device during which the private data maybe released. And it may cause use-after-free crash in report event routine. Signed-off-by: Xiubo Li --- kmod-devel-25-16.el8.x86_64.rpm | Bin 0 -> 19140 bytes main.c | 14 +++++++++++++- target.c | 6 ++++++ tcmur_device.c | 24 +++++++++++++++++++++++- tcmur_device.h | 3 +++ 5 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 kmod-devel-25-16.el8.x86_64.rpm diff --git a/kmod-devel-25-16.el8.x86_64.rpm b/kmod-devel-25-16.el8.x86_64.rpm new file mode 100644 index 0000000000000000000000000000000000000000..96734fb24db80fa5bec84df484df676e03ebad41 GIT binary patch literal 19140 zcmeGjcU%)mHxxy&fxWxJsh}jJ7X^(XQbei+1jJ3Ufk2wsgkl%r&fY~-JPRn6J4Nhb ziDECycE_Uujf;n z2{&_MF7CE~Bd5m~?#*quVRfNQSikkgi1d*M!?y-mZ5|P)EpiawDZIZg>g-{= zCR3I^Y5nNpN4=HrgmyQB43E}**wElt-xiL?D_Og?+_AGguu5_}Wh7^M@HIQl@#~A` zoqyTna-|DwyZE)V36gQulGao@sQ7yRhn z@Y%1HblT)@y~3f(x_aDIS&_D}2BYl5>Ly<9p=0;C)xdHVt~@D_6GJa&zv3oJDqXLJSj6)}Q)k!$B5B z-qLaL>zfKDs%EA%x*u|R^S*rVyS7J;g!bL(oB6Qwh}0(wuX5VYN!mTo{$A99qxI@# zejsM%FYD87?v;GT%t0qlg*02C=Q15H#CG%vKir3BY-=;${!{6bzw+h;CwTDo=Q-|> z@JS}0EyJaJmYl_uN>~!MP$HE`q#UN4kgy1rT*ToDBtoHtNy9~KKF${rT$V^k2nAdj zA!Lf=d|brkaHRr{jK>uaTp5=um9cp&IZp=vaGcNLa#%tslOvUZFw$t;;@afUuGEf} z{Oq{>c?bIRquY}#d#|xcZ!@Q-mG8*kbw_>+yS#cwOb^}0%_}DyK7XQK&UNhEoNKf2 zVcT*~t;<^3rl<4lS&zeSkg*N58&_U+u!*}gF1W|Tl?}V}OZ%r~^4Xo+d;~F1KG>8g zf&wyyZl2@()m}+no!Xx0xg_G~+;#D+#wCO1Et}B8Ds_n4nFlROJd3-Z6`bZ@9J1t$ z)mg9HcRgJ74b)q*L(l%!$IWi>$?YodiRxn`vNmll?Kmbyx8P_-r^XIhk-CTP@#K?eOu#Ua`C>Ig6gpixwmHqa{~*&a z${%mA}`t4Yvv5$hq}#FUg{uCabt$qIlQtwR%CVMUH`4_ zefJ;@js}wLblR%G-tcOV8)Ub{UeJz{s2a*Tt9PFZF3RhkH&~lK=Er>f5&;D#IH3 zw_3D$e1&Dv5$>Kg-O8q(_$^hyekcgNHYBEJBU@JbYR|$&VQ-s+*;;15$n*R=HNbv@ zI!7}8LTR4ywcIcHb>+T*sKw(QukPhFT7(^rTzPQ61+j=pD=!Ox&X^|vVaIR!tU;8zrk`bFdaL%}a8_%)P3k3|(d z@6GiVbgKRX1;w78$)(_CRK1vj5nmC000pBp0~#zxK?(D9w;8sF zzD@OBPy)WA7)r!1ZwmIKU>_)!F14y9Gjt=3}7XDthAqhVi>q z(VIZkSFL{{1!q8s+9y%)dNbT&Aq8)T62@WO-gMlPurf#bBfNA8mxK^uFNO8SFqb4zZ46er#6l4sh*I-HwF2i74gWB}w zr%)Rbu{c7fCX^VEVgR}rXab{VR{AFj`E(|Wp^|748B(B4LozhFXjI#6uLr;ZAvCJ+ z3iOL-_o5(dfI*>@kx3-xCNV^l;xA46(gvdQEAvcU{S*=%u2T>sEk>``_I7oRR_GaK zQqVZA{j}}|y+TR4n!mXsR+;`V0I?75P8w7yT$e;sqB+oj*eE^{m0xbaAEcW%r(~`E zG$jk(;0N>Zt9tkx4oAq9N~A&=M7$|M9#Ig`VNV+A2w z!e{f?0=5Jf3gkQqI-TG`4@!ju&4)p17;FZI!7)?$<*!bo(bN9(k4d&Zp8_&Sos^-| zs!(h2DW88PER$qZ6GkVrs(h|9O&b?2)u`o)XeWvmXY=S{RDfKIk zJB!O;!z3ds1!B{4@4Yc%N4_y4)87~oHr5!?Y@IQDTck034rvTe6^X-@F2?XtAB^GQ z4aMQ1a`DKq55*&gZZ?KZwG)RWriepxFmdRLP2v#?Y>mSU%8kRfxr>L-2@wzXixCf- zf{TYuJRlwxe_1^AMWAtLDJdQrGv7Eg@QN|y$x?C1AxCk@h8S_kqCjzoCtn1pn^;H4z?d+k8dYD6j+yvPs|z--PT1T!VRPz)&8`zR zt4`R=x?qdyf-I~HvVi7kh$hqo7LugXs1&$ft%%daV{QbQr2Bd$`E}{KPU~2wW#rHX z5i*Sq$ASqZfs+L0sg72t2|}k(M`La}_^6H+%S|ZY5@QLe9{CDN8rd{o4Y;`hnk4YC zZm|gx^zxtBtREkWOaDLU6z)*pN9(Wh50W(i()R>!AqNCnx z<0G3*=l(c#>^f1$t`pmkq5lTk=tz{53UD`!+69UQ^bJ=im7qa!NqT*L7UmX@Kn96G zT&##wFyso8?6K-Z91Dp1{$*h@ShS%qE;a;d~qAn-fRM3tI{6d9+Sr&?4IcJwa?JBfuBR2(=WF2$1%`h(x^(cdg2m=xi~F zB1w%ho*;W;x){k=N0vY!6beOOk)<;!1%XLp2x%NLT^gM!O{39axJ*VjCm?F;j>lt( z_|P3p2*Obb(HoO#FttXH$#I1e16E2P7ow*t)K$rq+B)G2cr4CWG{pc9AoNhc1kJP5 zSM>qkplbrt8Pv6O#~1Mg{I9!1{4zuj>Hsl zNNEu|m<~K@jd|6px~q*0u87S-q8Ln|)sR6hOdmr@B4p2$cr;ln^&BQo!2Ozf$UgwH z6nX^?lh=bZV^SSqO0}6Yak?ZrT9}#>F^LuiJ7+E6N)$?kJ_+$0$Ml2>7zl|hOs3Gm zenz89s%;W%wn)VMni{mM3Np$pp-~XD>mj;=q+o&$a)1!Y!w~||xg zszA1jM9JI)NUva0Tx~KGpx~GcIBuw2hnOOcfQ?v#`ZD90X5)f=PRE(Q0!n|-POuih zB8JRW20I(b)TrfXt%jNG?!SyN$_58}d-`G2B+(dV4FT%~h7ToWNyy56wKsB2fQ^r< zIkt&n3!}i{kx`UxK_Apm(0i3>Ve>F2WPULgkHhAP1kOJ;k|1gTWO`9XxaN>QK(?S} zAGMIptsbw%S*0tQy2y5Fwi16#yna)gKvwQvi$haXr7PEB$G62NQX5CkAw zsU(m+L6(s;xm8Vq8jPDH2{bWDAA@uXNClAx&y6e8H3>sa*~s$OLN2ogxm2vq=3pM6 z{L#pQd_x1wc%&SJW8sO5w%jpLSe}OIPO92 z^xqNyIedi_`8pX`GC~Iy1-WdpYFSuZ0h?=DmLK3hD~ipoxdce+;QR>k38X$F3Q=v>pfBgseU~)*!VqeAF)%I@CgUK>fSp-ugq*H;7hci3~ zQo-Sa1F0EiQlcSZvsuC)w6`qLckl)&eJ!g|yUwwY&V7xIuOy)yPtrjkbAb0HY|>Ql&wL>pbW+aI|;;5Bgv2yng}wd)}b>!M4dni#NX&p4wLAljsgLT zEkxRcjDP~p2O-oYfIyAbJbzLn*TdwIz(X_O8ib1kuKKsZa;RDZV3#6V4RIydKvQU1 zgMSV%eN#XM=hLvCGlfAYVj^Kfhr_6P#6;)}ol7HLE5I$HDd9>ORF-67oQW-n76&B> zs186G#g3}bTA>6+$YkiO9QF}(kPg#XI${77d{Y?zu1Bnqpokb22ED?OEQ*b_Kr>>a znn=K40plsMw4mqcu${m(U>r;p3OF-2eKGAjOsBcEDFDy3fLbLEHXgM%&lfnR(9`uA zh$}x^9;E6N^`^Bp)dD#X`~G`p0Rbr_BI9Z|OI#R_B=oQpNTTV8E;y8h^>&8}LT#Gw zSTJg)a5r&m)fNxf^+kD=u<;@FUzXnq_zNx?SOUTU6M!W!O$hB6Fch@Ge#aTQ9|V!5 zd5);Y#k7q_umd`*cBsNc>^Aj}s^a02fW`ou34>Z{y3z235c4_smuUde5GqizXi`@l zwdcbPdX33+qU``;ix0JhGj9ozJ*FZ6xkjnfBp`*YviGp{0hbJIhMII;fkF`sovi5; zQcQ(Qb>{7fTBD|`3`#Ut3Fd${D-JH;K6NtW^1+|N4%Q^<)fVOR2x_TDhjxK_G%lzfxL`UI!?{5IL2CHCO>==^(IpEM zFM*R#sL`uZL){Gd!pAB)5E~$BF;M{>C%}ypST3~CxIw0XL<@mA)gXy9NK^{Fo{%BS zTE$d~mDRibTBxf_h!a{#=PGK^4FuDj8*~NX^J59`a8JkdgZl~Sj>6;yU(WoizfoA` zwwAyH)IB4HR`o_|KEGp~wGH7Fo>sL!CVsE~kR!5*IO(A@mp%g<+kM`&nN z|Bw(*zzFaU!RT@#QA%jdS9j=gFq+ahWU=(7)Bqg}kOoAA9u27iNK(##ZsU7+s||s0sHf;it=S zGD$6s0h&NV=>|3Ejf}2G76hoFh=(&0GNx(@C?u!LrSfVpO{!IBXo+KKZ0ZsZ-ApsY zRdBI~9<4?fyK4C%*|7A$%x7UPSpN_YA0H6bM7S6Bd9gs3l;~br^&e`7ex~-p&<9)~ zvxE};8W{9)NMMWPIG-=);xd68c7Ae}lnwu7SSXaiRUfHb!eX=8GNwQ-60!t1!Q{)O zLJnKbWAUW|Lc|mDMCj(4Nj}YYI6r-QF9z7|gJ5jvNVrpT`(5MJmb9)G&Ecct?%!N) z^>|>rDw5W=Nr|olCY(c-#7M{e8aCkOwYd?YcX!Z|0WgVLl=4`xl>W;*e5yq4?qrhXgBT%dO?NlMXLj62I}f z?BK;#E%tToGVN|XVRVOOe|y`fD4G=g_UWG_@8740lw}L%#KvsT-TNs2PCJ*W{d`=f zoNP8ZxjdCDnBlvbF-^0xpZ%nxw_Y6#UGdMysA0v<1Ck=nCMB$0YdPhUUAWa6`(smM z&u2}_$;|aFSzUVR$ilJ~39n~c_>ZrYMCVLfH)(&;Hs`K$0_$hYcItj$__K*)=8s5d zxvS;Te7$?025Y=sof11AtsltGY>`*l=t5MdissGSb}qO{a0@&8oclQ5ukn$_SBioe z19-=-y)5*YwXJ0K#qgsuF2CWXO&uR-;d^u5D8JXgIVgD1C(>P;&2nmg`*=>b`K7VD zq@#n&+N^ATaDQQ^JE9}&BhnUbUzF65oVxs4$>hF6W_m2Sd6PVRrooyK1evLxet5rO zLD8?jtnq5UE7$!O)`Qf?l`WB<7kt%duWKFo_})chhH~LDdIe{BsZ*SGT{E6 zQIXG9E$V|${IqGFLt>z;UDpfll6DF`ObCa{WUQc}E*1}~a$EMBRS3$w9eYP;9 z?Az*vtaQA|oDNOf^nOtumD_#$_#Ros4im7JsW;vgXIDU4*Sk-K-WN?LxvB-O-_u%#UK}%w< zTFYlHm+FV1(3$}c3^{d(xwCDEyRZsD&E{gT1QoG&|!4sO<` zYyYCry34Q9gFfx<(kt)fwCl7t{o7sWbYP46xglEB<->w9{F)(uVV^*K>20rQGRSI2 zL1@zMUZ>>|yVCg=*46JgdROL-9<86f@@;=K-QU;d`0+3;H@G-oHUp#m^XOgjrsVh$ zuFLHXPuAI8T+fK!{~`ZQ!KBhopA;`6Xc=j$-ihamFLy{kmfe2LZBdsd)@gXs>ekkd zhm-7%Pj4Eii%YaG)AsZ_e!2nq;(j?;es<18%R?Pk81Ifb^lRHUymR z@Y=Q=x(^vWaqn1<((>RNLAafLpU&q8Y-8p0-57bL!6WB)d#rbQ^qAYc=aYU{6S{eD z>|K{Tm-LXP*>lPRoXKXot1Z0eugl>!JJF!L?MiFkeK!s}OA>EbohW;eHoAU~@WZ{f zRBU48U9=uN{lvCuKC>>)ePBV1aLC_&J>~LZPkds=R57=LZuPR6@3hsu7d%bw;?_}c z{FiiEO#dV2{Z6lSk6vEhLM_rBDtaH>Ub^5?Yh2u*54PZ;J zdY&t9`4Mls#HnxEp93a$$eX>tPvrnf=!2(g9Kuov_VJUsCzdI-0cX;u44EB}>gLMi z$|o=1>pft$l~mgpXN*j^o;Nj=b*XV)*wX@<=1FMuV7r3zj~m7`ZFu>{8lRR8Dm}_4 z9N+$OS%<$ia~1XxsI{HPU`vRbCOaW=EcSAFoq9$r(TjPeiAwE*O{C> zhkML?FubU2`3z@@6_K^c!u+vE3_EqFI5khOWz__4-2Mb=kWjpaJphL5({@rTQU zJ+1sprq5Urd+=tm_}Rw4Q#2NTE@`oQQL9UB^ru}PC2lRqzPjVNji4`&8^NlV)+#4+ zOz)Y?n$@dVl{+id(qG(vRsrK-|4wU#r~a|gQtG8={LSb=hLiPf~S}E?<;Td`{~7# zrZio7c^*b5uxz0vaZ^o5B>g!^7(yiYeKE#o%hDj?ZUn{9bf&>Et}@UiO_45A{#HU2pDP_}p`+OKV}bB?Wt*cIqG*Qcpks zQm2~@`d?PqoL@orS~u~7+%D#2QPW4OV?Rc>=fp?bzdiChD?8|kqqlWI`Gm@L?(MGi zV3#cp&Hp&s>DrPm;o%W+Qb}xUwfFG9fU}pDb$J4uyNj;bS z$LZ!PFEyI{Au}Vn{;5}jnG@DoGVk@+JT+`YX6oB6Z9{?+M$P&=A~nD-`)FBl%ueqQ zu8Vwyo1@Q7b6|EVnb2@U$+#0(yY^h3b+fJ6oR;ww zF=0>N9^CLNvuMePrsF?d4DcRXJ~1;=U`QM8^=4y0$7=F6693f^?tVI8=B7As=$p4Z-<`P{)DESGJ6pxM}NQ@=#(*`mN_%`b6ID&+rrkEI84= z;rcdlLz1WMOul`o95?#XdIs}$sR}=_tYa literal 0 HcmV?d00001 diff --git a/main.c b/main.c index 70b42335..1ad82d06 100644 --- a/main.c +++ b/main.c @@ -1044,9 +1044,15 @@ static int dev_added(struct tcmu_device *dev) goto cleanup_format_lock; } + ret = pthread_cond_init(&rdev->report_event_cond, NULL); + if (ret) { + ret = -ret; + goto cleanup_rdev_lock; + } + ret = setup_io_work_queue(dev); if (ret < 0) - goto cleanup_rdev_lock; + goto cleanup_event_cond; ret = setup_aio_tracking(rdev); if (ret < 0) @@ -1088,6 +1094,8 @@ static int dev_added(struct tcmu_device *dev) cleanup_aio_tracking(rdev); cleanup_io_work_queue: cleanup_io_work_queue(dev, true); +cleanup_event_cond: + pthread_cond_destroy(&rdev->report_event_cond); cleanup_rdev_lock: pthread_mutex_destroy(&rdev->rdev_lock); cleanup_format_lock: @@ -1130,6 +1138,10 @@ static void dev_removed(struct tcmu_device *dev) tcmur_destroy_work(rdev->event_work); + ret = pthread_cond_destroy(&rdev->report_event_cond); + if (ret != 0) + tcmu_err("could not cleanup report event cond %d\n", ret); + ret = pthread_mutex_destroy(&rdev->rdev_lock); if (ret != 0) tcmu_err("could not cleanup state lock %d\n", ret); diff --git a/target.c b/target.c index 51a0d97c..91cc2d8e 100644 --- a/target.c +++ b/target.c @@ -252,6 +252,12 @@ static void tgt_port_grp_recovery_work_fn(void *arg) */ list_for_each_safe(&tpg->devs, rdev, tmp_rdev, recovery_entry) { list_del(&rdev->recovery_entry); + + pthread_mutex_lock(&rdev->rdev_lock); + if (rdev->flags & TCMUR_DEV_FLAG_REPORTING_EVENT) + pthread_cond_wait(&rdev->report_event_cond, &rdev->rdev_lock); + pthread_mutex_unlock(&rdev->rdev_lock); + ret = __tcmu_reopen_dev(rdev->dev, -1); if (ret) { tcmu_dev_err(rdev->dev, "Could not reinitialize device. (err %d).\n", diff --git a/tcmur_device.c b/tcmur_device.c index 09b29ba9..107ad425 100644 --- a/tcmur_device.c +++ b/tcmur_device.c @@ -86,8 +86,8 @@ int __tcmu_reopen_dev(struct tcmu_device *dev, int retries) rhandler->close(dev); } - pthread_mutex_lock(&rdev->rdev_lock); ret = -EIO; + pthread_mutex_lock(&rdev->rdev_lock); while (ret != 0 && !(rdev->flags & TCMUR_DEV_FLAG_STOPPING) && (retries < 0 || attempt <= retries)) { pthread_mutex_unlock(&rdev->rdev_lock); @@ -128,7 +128,10 @@ int tcmu_reopen_dev(struct tcmu_device *dev, int retries) pthread_mutex_unlock(&rdev->rdev_lock); return -EBUSY; } + rdev->flags |= TCMUR_DEV_FLAG_IN_RECOVERY; + if (rdev->flags & TCMUR_DEV_FLAG_REPORTING_EVENT) + pthread_cond_wait(&rdev->report_event_cond, &rdev->rdev_lock); pthread_mutex_unlock(&rdev->rdev_lock); return __tcmu_reopen_dev(dev, retries); @@ -168,9 +171,28 @@ static void __tcmu_report_event(void *data) sleep(1); pthread_mutex_lock(&rdev->rdev_lock); + /* + * If the device is in recovery, will skip reporting the event + * this time because the event should be report when the device + * is reopening. + */ + if (rdev->flags & (TCMUR_DEV_FLAG_IN_RECOVERY | + TCMUR_DEV_FLAG_STOPPING | + TCMUR_DEV_FLAG_STOPPED)) { + pthread_mutex_unlock(&rdev->rdev_lock); + return; + } + + rdev->flags |= TCMUR_DEV_FLAG_REPORTING_EVENT; + pthread_mutex_unlock(&rdev->rdev_lock); + ret = rhandler->report_event(dev); if (ret) tcmu_dev_err(dev, "Could not report events. Error %d.\n", ret); + + pthread_mutex_unlock(&rdev->rdev_lock); + rdev->flags &= ~TCMUR_DEV_FLAG_REPORTING_EVENT; + pthread_cond_signal(&rdev->report_event_cond); pthread_mutex_unlock(&rdev->rdev_lock); } diff --git a/tcmur_device.h b/tcmur_device.h index cd37aabe..3c7a449e 100644 --- a/tcmur_device.h +++ b/tcmur_device.h @@ -22,6 +22,7 @@ #define TCMUR_DEV_FLAG_IS_OPEN (1 << 2) #define TCMUR_DEV_FLAG_STOPPING (1 << 3) #define TCMUR_DEV_FLAG_STOPPED (1 << 4) +#define TCMUR_DEV_FLAG_REPORTING_EVENT (1 << 5) #define TCMUR_UA_DEV_SIZE_CHANGED 0 @@ -83,6 +84,8 @@ struct tcmur_device { int cmd_time_out; + pthread_cond_t report_event_cond; + pthread_spinlock_t cmds_list_lock; /* protects cmds_list */ struct list_head cmds_list; };