Skip to content

Commit d22f498

Browse files
cpaaschdavem330
authored andcommitted
mptcp: process MP_CAPABLE data option
This patch implements the handling of MP_CAPABLE + data option, as per RFC 6824 bis / RFC 8684: MPTCP v1. On the server side we can receive the remote key after that the connection is established. We need to explicitly track the 'missing remote key' status and avoid emitting a mptcp ack until we get such info. When a late/retransmitted/OoO pkt carrying MP_CAPABLE[+data] option is received, we have to propagate the mptcp seq number info to the msk socket. To avoid ABBA locking issue, explicitly check for that in recvmsg(), where we own msk and subflow sock locks. The above also means that an established mp_capable subflow - still waiting for the remote key - can be 'downgraded' to plain TCP. Such change could potentially block a reader waiting for new data forever - as they hook to msk, while later wake-up after the downgrade will be on subflow only. The above issue is not handled here, we likely have to get rid of msk->fallback to handle that cleanly. Signed-off-by: Christoph Paasch <cpaasch@apple.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent cc7972e commit d22f498

File tree

4 files changed

+95
-27
lines changed

4 files changed

+95
-27
lines changed

net/mptcp/options.c

+44-12
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ void mptcp_rcv_synsent(struct sock *sk)
243243
pr_debug("subflow=%p", subflow);
244244
if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) {
245245
subflow->mp_capable = 1;
246+
subflow->can_ack = 1;
246247
subflow->remote_key = tp->rx_opt.mptcp.sndr_key;
247248
} else {
248249
tcp_sk(sk)->is_mptcp = 0;
@@ -332,6 +333,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
332333
struct mptcp_ext *mpext;
333334
struct mptcp_sock *msk;
334335
unsigned int ack_size;
336+
bool ret = false;
335337
u8 tcp_fin;
336338

337339
if (skb) {
@@ -355,6 +357,14 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
355357
if (skb && tcp_fin &&
356358
subflow->conn->sk_state != TCP_ESTABLISHED)
357359
mptcp_write_data_fin(subflow, &opts->ext_copy);
360+
ret = true;
361+
}
362+
363+
opts->ext_copy.use_ack = 0;
364+
msk = mptcp_sk(subflow->conn);
365+
if (!msk || !READ_ONCE(msk->can_ack)) {
366+
*size = ALIGN(dss_size, 4);
367+
return ret;
358368
}
359369

360370
ack_size = TCPOLEN_MPTCP_DSS_ACK64;
@@ -365,15 +375,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
365375

366376
dss_size += ack_size;
367377

368-
msk = mptcp_sk(mptcp_subflow_ctx(sk)->conn);
369-
if (msk) {
370-
opts->ext_copy.data_ack = msk->ack_seq;
371-
} else {
372-
mptcp_crypto_key_sha(mptcp_subflow_ctx(sk)->remote_key,
373-
NULL, &opts->ext_copy.data_ack);
374-
opts->ext_copy.data_ack++;
375-
}
376-
378+
opts->ext_copy.data_ack = msk->ack_seq;
377379
opts->ext_copy.ack64 = 1;
378380
opts->ext_copy.use_ack = 1;
379381

@@ -422,13 +424,46 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
422424
return false;
423425
}
424426

427+
static bool check_fourth_ack(struct mptcp_subflow_context *subflow,
428+
struct sk_buff *skb,
429+
struct mptcp_options_received *mp_opt)
430+
{
431+
/* here we can process OoO, in-window pkts, only in-sequence 4th ack
432+
* are relevant
433+
*/
434+
if (likely(subflow->fourth_ack ||
435+
TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1))
436+
return true;
437+
438+
if (mp_opt->use_ack)
439+
subflow->fourth_ack = 1;
440+
441+
if (subflow->can_ack)
442+
return true;
443+
444+
/* If the first established packet does not contain MP_CAPABLE + data
445+
* then fallback to TCP
446+
*/
447+
if (!mp_opt->mp_capable) {
448+
subflow->mp_capable = 0;
449+
tcp_sk(mptcp_subflow_tcp_sock(subflow))->is_mptcp = 0;
450+
return false;
451+
}
452+
subflow->remote_key = mp_opt->sndr_key;
453+
subflow->can_ack = 1;
454+
return true;
455+
}
456+
425457
void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
426458
struct tcp_options_received *opt_rx)
427459
{
460+
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
428461
struct mptcp_options_received *mp_opt;
429462
struct mptcp_ext *mpext;
430463

431464
mp_opt = &opt_rx->mptcp;
465+
if (!check_fourth_ack(subflow, skb, mp_opt))
466+
return;
432467

433468
if (!mp_opt->dss)
434469
return;
@@ -441,9 +476,6 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
441476

442477
if (mp_opt->use_map) {
443478
if (mp_opt->mpc_map) {
444-
struct mptcp_subflow_context *subflow =
445-
mptcp_subflow_ctx(sk);
446-
447479
/* this is an MP_CAPABLE carrying MPTCP data
448480
* we know this map the first chunk of data
449481
*/

net/mptcp/protocol.c

+9-7
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
*/
3131
static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
3232
{
33-
if (!msk->subflow || mptcp_subflow_ctx(msk->subflow->sk)->fourth_ack)
33+
if (!msk->subflow || READ_ONCE(msk->can_ack))
3434
return NULL;
3535

3636
return msk->subflow;
@@ -651,17 +651,20 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
651651
__mptcp_init_sock(new_mptcp_sock);
652652

653653
msk = mptcp_sk(new_mptcp_sock);
654-
msk->remote_key = subflow->remote_key;
655654
msk->local_key = subflow->local_key;
656655
msk->token = subflow->token;
657656
msk->subflow = NULL;
658657

659658
mptcp_token_update_accept(newsk, new_mptcp_sock);
660659

661-
mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
662660
msk->write_seq = subflow->idsn + 1;
663-
ack_seq++;
664-
msk->ack_seq = ack_seq;
661+
if (subflow->can_ack) {
662+
msk->can_ack = true;
663+
msk->remote_key = subflow->remote_key;
664+
mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
665+
ack_seq++;
666+
msk->ack_seq = ack_seq;
667+
}
665668
newsk = new_mptcp_sock;
666669
mptcp_copy_inaddrs(newsk, ssk);
667670
list_add(&subflow->node, &msk->conn_list);
@@ -678,8 +681,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
678681
* the receive path and process the pending ones
679682
*/
680683
lock_sock(ssk);
681-
subflow->map_seq = ack_seq;
682-
subflow->map_subflow_seq = 1;
683684
subflow->rel_write_seq = 1;
684685
subflow->tcp_sock = ssk;
685686
subflow->conn = new_mptcp_sock;
@@ -795,6 +796,7 @@ void mptcp_finish_connect(struct sock *ssk)
795796
WRITE_ONCE(msk->token, subflow->token);
796797
WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
797798
WRITE_ONCE(msk->ack_seq, ack_seq);
799+
WRITE_ONCE(msk->can_ack, 1);
798800
}
799801

800802
static void mptcp_sock_graft(struct sock *sk, struct socket *parent)

net/mptcp/protocol.h

+7-3
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ struct mptcp_sock {
6969
u64 ack_seq;
7070
u32 token;
7171
unsigned long flags;
72+
bool can_ack;
7273
struct list_head conn_list;
7374
struct skb_ext *cached_ext; /* for the next sendmsg */
7475
struct socket *subflow; /* outgoing connect/listener/!mp_capable */
@@ -84,9 +85,10 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
8485

8586
struct mptcp_subflow_request_sock {
8687
struct tcp_request_sock sk;
87-
u8 mp_capable : 1,
88+
u16 mp_capable : 1,
8889
mp_join : 1,
89-
backup : 1;
90+
backup : 1,
91+
remote_key_valid : 1;
9092
u64 local_key;
9193
u64 remote_key;
9294
u64 idsn;
@@ -118,8 +120,10 @@ struct mptcp_subflow_context {
118120
fourth_ack : 1, /* send initial DSS */
119121
conn_finished : 1,
120122
map_valid : 1,
123+
mpc_map : 1,
121124
data_avail : 1,
122-
rx_eof : 1;
125+
rx_eof : 1,
126+
can_ack : 1; /* only after processing the remote a key */
123127

124128
struct sock *tcp_sock; /* tcp sk backpointer */
125129
struct sock *conn; /* parent mptcp_sock */

net/mptcp/subflow.c

+35-5
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ static void subflow_init_req(struct request_sock *req,
6161
mptcp_get_options(skb, &rx_opt);
6262

6363
subflow_req->mp_capable = 0;
64+
subflow_req->remote_key_valid = 0;
6465

6566
#ifdef CONFIG_TCP_MD5SIG
6667
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
@@ -185,17 +186,28 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
185186

186187
pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
187188

188-
/* if the sk is MP_CAPABLE, we need to fetch the client key */
189+
/* if the sk is MP_CAPABLE, we try to fetch the client key */
189190
subflow_req = mptcp_subflow_rsk(req);
190191
if (subflow_req->mp_capable) {
192+
if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
193+
/* here we can receive and accept an in-window,
194+
* out-of-order pkt, which will not carry the MP_CAPABLE
195+
* opt even on mptcp enabled paths
196+
*/
197+
goto create_child;
198+
}
199+
191200
opt_rx.mptcp.mp_capable = 0;
192201
mptcp_get_options(skb, &opt_rx);
193-
if (!opt_rx.mptcp.mp_capable)
194-
subflow_req->mp_capable = 0;
195-
else
202+
if (opt_rx.mptcp.mp_capable) {
196203
subflow_req->remote_key = opt_rx.mptcp.sndr_key;
204+
subflow_req->remote_key_valid = 1;
205+
} else {
206+
subflow_req->mp_capable = 0;
207+
}
197208
}
198209

210+
create_child:
199211
child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
200212
req_unhash, own_req);
201213

@@ -377,6 +389,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
377389
subflow->map_subflow_seq = mpext->subflow_seq;
378390
subflow->map_data_len = data_len;
379391
subflow->map_valid = 1;
392+
subflow->mpc_map = mpext->mpc_map;
380393
pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
381394
subflow->map_seq, subflow->map_subflow_seq,
382395
subflow->map_data_len);
@@ -428,6 +441,19 @@ static bool subflow_check_data_avail(struct sock *ssk)
428441
if (WARN_ON_ONCE(!skb))
429442
return false;
430443

444+
/* if msk lacks the remote key, this subflow must provide an
445+
* MP_CAPABLE-based mapping
446+
*/
447+
if (unlikely(!READ_ONCE(msk->can_ack))) {
448+
if (!subflow->mpc_map) {
449+
ssk->sk_err = EBADMSG;
450+
goto fatal;
451+
}
452+
WRITE_ONCE(msk->remote_key, subflow->remote_key);
453+
WRITE_ONCE(msk->ack_seq, subflow->map_seq);
454+
WRITE_ONCE(msk->can_ack, true);
455+
}
456+
431457
old_ack = READ_ONCE(msk->ack_seq);
432458
ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
433459
pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
@@ -752,13 +778,17 @@ static void subflow_ulp_clone(const struct request_sock *req,
752778
return;
753779
}
754780

781+
/* see comments in subflow_syn_recv_sock(), MPTCP connection is fully
782+
* established only after we receive the remote key
783+
*/
755784
new_ctx->conn_finished = 1;
756785
new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
757786
new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
758787
new_ctx->tcp_state_change = old_ctx->tcp_state_change;
759788
new_ctx->tcp_write_space = old_ctx->tcp_write_space;
760789
new_ctx->mp_capable = 1;
761-
new_ctx->fourth_ack = 1;
790+
new_ctx->fourth_ack = subflow_req->remote_key_valid;
791+
new_ctx->can_ack = subflow_req->remote_key_valid;
762792
new_ctx->remote_key = subflow_req->remote_key;
763793
new_ctx->local_key = subflow_req->local_key;
764794
new_ctx->token = subflow_req->token;

0 commit comments

Comments
 (0)