Skip to content

Commit 3ae09d7

Browse files
gerd-rauschoraclelinuxkernel
authored andcommitted
net/rds: Don't block workqueues "cma_wq" and "cm.wq"
The rdma_cm event handler is called through either one of these queues. Unfortunately RDS is quite slow in processing some of these events. "RDMA_CM_EVENT_ROUTE_RESOLVED" which triggers a "cm_initiate_connect" was seen to accumulate delays up to 15 seconds before the same event could be delivered for subsequent "rdma_resolve_route" calls, manifesting itself in propagation delays from "cma_query_handler" to "cma_work_handler". There were even cases where a single call to "rds_rdma_cm_event_handler_cmn" took on the order of 4 seconds. One of the main offenders is "RDMA_CM_EVENT_ESTABLISHED" which triggers a "cm_connect_complete", which is a very expensive operation. Simply send the processing of events to the background, in order for them to no longer block the "cma_wq" or "cm.wq" workqueues, that are shared across all connections and allow for only a single active worker at a time. Since the "rds_rdma_cm_event_handler_cmn" callback no longer benefits from the protection of "cm_id" to not disappear in the midst of the callback, we obtain the value of "cm_id->context" from within "rds_spawn_rdma_cm_event_handler" and pass it to "rds_rdma_cm_event_handler_cmn" instead. Also drop "WQ_MEM_RECLAIM" from "rds_aux_wq", since workqueues marked for RECLAIM are not allowed to depend (flush_work) on non-RECLAIM workqueues. Orabug: 32373816 Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com> Reviewed-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com> (cherry picked from commit 04d2f24) Port to U3 Signed-off-by: Jack Vogel <jack.vogel@oracle.com> Orabug: 33590097 UEK6 => UEK7 (cherry picked from commit 41cf364) cherry-pick-repo=UEK/production/linux-uek.git Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com> Reviewed-by: William Kucharski <william.kucharski@oracle.com> Orabug: 37347036 UEK7 => LUCI => UEK8 (cherry picked from commit 093ae1d) cherry-pick-repo=UEK/production/linux-uek.git Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com> Reviewed-by: William Kucharski <william.kucharski@oracle.com>
1 parent 10d03da commit 3ae09d7

File tree

2 files changed

+92
-37
lines changed

2 files changed

+92
-37
lines changed

net/rds/ib.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1226,8 +1226,7 @@ int rds_ib_init(void)
12261226
if (ret)
12271227
goto out_sysctl;
12281228

1229-
rds_aux_wq = alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 0,
1230-
"krdsd_aux");
1229+
rds_aux_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "krdsd_aux");
12311230
if (!rds_aux_wq) {
12321231
pr_err("%s: failed to create aux workqueue\n", __func__);
12331232
goto out_recv;

net/rds/rdma_transport.c

Lines changed: 91 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,15 @@
4646

4747
#define RDS_REJ_CONSUMER_DEFINED 28
4848

49+
struct rds_rdma_cm_event_handler_info {
50+
struct work_struct work;
51+
struct rdma_cm_id *cm_id;
52+
struct rdma_cm_event event;
53+
struct rds_connection *conn;
54+
bool isv6;
55+
char private_data[];
56+
};
57+
4958
struct mutex cm_id_map_lock;
5059
DEFINE_IDR(cm_id_map);
5160
/* Global IPv4 and IPv6 RDS RDMA listener cm_id */
@@ -100,13 +109,13 @@ static inline bool __rds_rdma_chk_dev(struct rds_connection *conn)
100109
return true;
101110
}
102111

103-
static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
104-
struct rdma_cm_event *event,
105-
bool isv6)
112+
static void rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
113+
struct rdma_cm_event *event,
114+
struct rds_connection *conn,
115+
bool isv6)
106116
{
107-
/* this can be null in the listening path */
108-
struct rds_connection *conn = cm_id->context;
109117
struct rds_transport *trans = &rds_ib_transport;
118+
struct rds_connection *new_conn;
110119
struct rds_ib_connection *ic;
111120
int ret = 0;
112121
char *reason = NULL;
@@ -128,20 +137,22 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
128137
/* make sure that ic and cm_id are disassocated
129138
* before cm_id gets destroyed
130139
*/
131-
conn = cm_id->context;
132-
if (conn) {
133-
mutex_lock(&conn->c_cm_lock);
134-
ic = conn->c_transport_data;
140+
new_conn = cm_id->context;
141+
if (new_conn) {
142+
mutex_lock(&new_conn->c_cm_lock);
143+
ic = new_conn->c_transport_data;
135144
BUG_ON(!ic);
136145
down_write(&ic->i_cm_id_free_lock);
137146
ic->i_cm_id = NULL;
138147
cm_id->context = NULL;
139148
up_write(&ic->i_cm_id_free_lock);
140-
mutex_unlock(&conn->c_cm_lock);
149+
mutex_unlock(&new_conn->c_cm_lock);
141150
}
151+
152+
rdma_destroy_id(cm_id);
142153
}
143154

144-
return ret;
155+
return;
145156
}
146157

147158
/* Prevent shutdown from tearing down the connection
@@ -169,39 +180,33 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
169180
ic = conn->c_transport_data;
170181
BUG_ON(!ic);
171182

172-
/* If we can't acquire a read-lock on "i_cm_id_free_lock", "ic->i_cm_id" is being destroyed.
183+
/* If we can't acquire a read-lock on "i_cm_id_free_lock",
184+
* "ic->i_cm_id" is in the process of being disassociated.
173185
* Just ignore the event in that case.
174186
*/
175187
if (!down_read_trylock(&ic->i_cm_id_free_lock)) {
176188
mutex_unlock(&conn->c_cm_lock);
177-
return 0;
189+
return;
178190
}
179191

180-
if (!cm_id->context) {
181-
/* Connection is being destroyed.
182-
* This must have happened just before we acquired "i_cm_id_free_lock".
183-
* Just ignore the event in this case as well.
184-
*/
192+
/* If the connection no longer points to this cm_id,
193+
* it already has been disassociated and possibly destroyed.
194+
* Just ignore the event in this case as well.
195+
*/
196+
if (ic->i_cm_id != cm_id) {
185197
up_read(&ic->i_cm_id_free_lock);
186198
mutex_unlock(&conn->c_cm_lock);
187-
return 0;
199+
return;
188200
}
189201

190-
/* If the bottom-up pointer "cm_id->context" is inconsistent with
191-
* the top-down pointer "ic->i_cm_id", it must be a bug.
192-
*/
193-
BUG_ON(ic->i_cm_id != cm_id);
194-
195202
/* Even though this function no longer accesses "ic->i_cm_id" past this point
196203
* and "cma.c" always blocks "rdma_destroy_id" until "event_callback" is done,
197204
* we still need to hang on to the "i_cm_id_free_lock" until return,
198205
* since some functions called (e.g. conn->c_transport_data) just access
199206
* "ic->i_cm_id" without any checks.
200207
*/
201208

202-
/* If the connection is being shut down, bail out
203-
* right away. We return 0 so cm_id doesn't get
204-
* destroyed prematurely.
209+
/* If the connection is being shut down, bail out right away.
205210
*
206211
* Events RDMA_CM_EVENT_ADDR_CHANGE and RDMA_CM_EVENT_DISCONNECTED
207212
* need to be processed regardless of the connection state
@@ -343,30 +348,81 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
343348
*/
344349

345350
rds_conn_drop(conn, DR_IB_SHUTDOWN_NEEDED, ret);
346-
347-
/* prevent double rdma_destroy_id,
348-
* as the shutdown-path will destroy this cm_id
349-
*/
350-
ret = 0;
351351
}
352352

353353
up_read(&ic->i_cm_id_free_lock);
354354
mutex_unlock(&conn->c_cm_lock);
355+
}
355356

356-
return ret;
357+
static void rds_rdma_cm_event_handler_worker(struct work_struct *work)
358+
{
359+
struct rds_rdma_cm_event_handler_info *info = container_of(work,
360+
struct rds_rdma_cm_event_handler_info,
361+
work);
362+
363+
rds_rdma_cm_event_handler_cmn(info->cm_id, &info->event, info->conn, info->isv6);
364+
365+
kfree(info);
366+
}
367+
368+
static void rds_spawn_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
369+
struct rdma_cm_event *event,
370+
bool isv6)
371+
{
372+
struct rds_connection *conn = cm_id->context;
373+
struct workqueue_struct *wq;
374+
struct rds_rdma_cm_event_handler_info *info;
375+
376+
if (event->event != RDMA_CM_EVENT_CONNECT_REQUEST)
377+
wq = conn ? conn->c_path->cp_wq : NULL;
378+
else
379+
wq = rds_aux_wq;
380+
381+
if (!wq) {
382+
rds_rdma_cm_event_handler_cmn(cm_id, event, conn, isv6);
383+
return;
384+
}
385+
386+
info = kmalloc(sizeof(*info) + event->param.conn.private_data_len, GFP_KERNEL);
387+
if (!info) {
388+
rds_rdma_cm_event_handler_cmn(cm_id, event, conn, isv6);
389+
return;
390+
}
391+
392+
INIT_WORK(&info->work, rds_rdma_cm_event_handler_worker);
393+
394+
info->cm_id = cm_id;
395+
memcpy(&info->event, event, sizeof(*event));
396+
info->conn = conn;
397+
info->isv6 = isv6;
398+
399+
if (event->param.conn.private_data &&
400+
event->param.conn.private_data_len) {
401+
memcpy(info->private_data,
402+
event->param.conn.private_data,
403+
event->param.conn.private_data_len);
404+
info->event.param.conn.private_data = info->private_data;
405+
} else {
406+
info->event.param.conn.private_data = NULL;
407+
info->event.param.conn.private_data_len = 0;
408+
}
409+
410+
queue_work(wq, &info->work);
357411
}
358412

359413
int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
360414
struct rdma_cm_event *event)
361415
{
362-
return rds_rdma_cm_event_handler_cmn(cm_id, event, false);
416+
rds_spawn_rdma_cm_event_handler(cm_id, event, false);
417+
return 0;
363418
}
364419

365420
#if IS_ENABLED(CONFIG_IPV6)
366421
int rds6_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
367422
struct rdma_cm_event *event)
368423
{
369-
return rds_rdma_cm_event_handler_cmn(cm_id, event, true);
424+
rds_spawn_rdma_cm_event_handler(cm_id, event, true);
425+
return 0;
370426
}
371427
#endif
372428

0 commit comments

Comments
 (0)