Skip to content

Commit

Permalink
net: mptcp: improve fallback to TCP
Browse files Browse the repository at this point in the history
keep using MPTCP sockets and a "dummy mapping" in case of fallback to
regular TCP. Skip adding DSS option on send, if TCP fallback has been
done earlier.

Notes: I'm unsure on what to do in mptcp_clean_una() to do a one-time
flush of the retransmit queue, as per Mat's suggestion. Any advice?

Changes since v1
 - rebase on top of Paolo's fix for NULL dereference in mptcp_recvmsg()

Changes since RFC v2:
 - use a bit in msk->flags, rather than a dedicated boolean in struct
   msk. This bit is going to be used in combination with another one,
   TCP_FALLBACK_ALLOWED, that is 1 at the first subflow creation
   and gets cleared once TCP fallback is no more allowed.
 - separate code that adds support for "infinite mapping", and use
   the term "dummy" instead of "infinite". Suggested by Mat
 - remove inappropriate call to __mptcp_do_fallback() in
   mptcp_accept() (Paolo)

Changes since RFC v1:
 - use a dedicated member of struct msk to indicate that a fallback
   ha happened, use it in case of infinite mapping
 - don't delete skb_ext in case of infinite mapping (Mat)
 - test the value of pm.subflows on reception of an infinite map to
   ensure that no other subflow is currently opened (Mat)
 - in mptcp_established_options(), avoid adding TCP options in case
   of fallback indication; simplify sendmsg()/recvmsg()/poll() to
   keep using the MPTCP socket in case of TCP fallback. Set the
   fallback indication in case subflow is not mp_capable after
   successful 3-way handshake, instead of flipping 'is_mptcp'
   (Paolo/Mat)
 - remove deadcode in mptcp_finish_connect, and increment
   MPTCP_MIB_MPCAPABLEACTIVEFALLBACK in subflow_finish_connect
   (Paolo)

BugLink: #11
BugLink: #22
Co-developed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
  • Loading branch information
dcaratti authored and jenkins-tessares committed Jun 16, 2020
1 parent 2579482 commit 147eccc
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 80 deletions.
9 changes: 8 additions & 1 deletion net/mptcp/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,9 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,

opts->suboptions = 0;

if (unlikely(mptcp_check_fallback(sk)))
return false;

if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts))
ret = true;
else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining,
Expand Down Expand Up @@ -716,7 +719,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
*/
if (!mp_opt->mp_capable) {
subflow->mp_capable = 0;
tcp_sk(sk)->is_mptcp = 0;
pr_fallback(msk);
__mptcp_do_fallback(msk);
return false;
}

Expand Down Expand Up @@ -816,6 +820,9 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
struct mptcp_options_received mp_opt;
struct mptcp_ext *mpext;

if (__mptcp_check_fallback(msk))
return;

mptcp_get_options(skb, &mp_opt);
if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
return;
Expand Down
81 changes: 18 additions & 63 deletions net/mptcp/protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,6 @@ static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
return msk->subflow;
}

static bool __mptcp_needs_tcp_fallback(const struct mptcp_sock *msk)
{
return msk->first && !sk_is_mptcp(msk->first);
}

static struct socket *mptcp_is_tcpsk(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
Expand Down Expand Up @@ -94,7 +89,7 @@ static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk)
if (unlikely(sock))
return sock;

if (likely(!__mptcp_needs_tcp_fallback(msk)))
if (likely(!__mptcp_check_fallback(msk)))
return NULL;

return msk->subflow;
Expand Down Expand Up @@ -229,6 +224,15 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
if (!skb)
break;

if (__mptcp_check_fallback(msk)) {
/* if we are running under the workqueue, TCP could have
* collapsed skbs between dummy map creation and now
* be sure to adjust the size
*/
map_remaining = skb->len;
subflow->map_data_len = skb->len;
}

offset = seq - TCP_SKB_CB(skb)->seq;
fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
if (fin) {
Expand Down Expand Up @@ -466,8 +470,15 @@ static void mptcp_clean_una(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_data_frag *dtmp, *dfrag;
u64 snd_una = atomic64_read(&msk->snd_una);
bool cleaned = false;
u64 snd_una;

/* on fallback we just need to ignore snd_una, as this is really
* plain TCP
*/
if (__mptcp_check_fallback(msk))
atomic64_set(&msk->snd_una, msk->write_seq);
snd_una = atomic64_read(&msk->snd_una);

list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) {
if (after64(dfrag->data_seq + dfrag->data_len, snd_una))
Expand Down Expand Up @@ -740,7 +751,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int mss_now = 0, size_goal = 0, ret = 0;
struct mptcp_sock *msk = mptcp_sk(sk);
struct page_frag *pfrag;
struct socket *ssock;
size_t copied = 0;
struct sock *ssk;
bool tx_ok;
Expand All @@ -759,15 +769,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto out;
}

fallback:
ssock = __mptcp_tcp_fallback(msk);
if (unlikely(ssock)) {
release_sock(sk);
pr_debug("fallback passthrough");
ret = sock_sendmsg(ssock, msg);
return ret >= 0 ? ret + copied : (copied ? copied : ret);
}

pfrag = sk_page_frag(sk);
restart:
mptcp_clean_una(sk);
Expand Down Expand Up @@ -819,17 +820,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
break;
}
if (ret == 0 && unlikely(__mptcp_needs_tcp_fallback(msk))) {
/* Can happen for passive sockets:
* 3WHS negotiated MPTCP, but first packet after is
* plain TCP (e.g. due to middlebox filtering unknown
* options).
*
* Fall back to TCP.
*/
release_sock(ssk);
goto fallback;
}

copied += ret;

Expand Down Expand Up @@ -972,7 +962,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct socket *ssock;
int copied = 0;
int target;
long timeo;
Expand All @@ -981,16 +970,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return -EOPNOTSUPP;

lock_sock(sk);
ssock = __mptcp_tcp_fallback(msk);
if (unlikely(ssock)) {
fallback:
release_sock(sk);
pr_debug("fallback-read subflow=%p",
mptcp_subflow_ctx(ssock->sk));
copied = sock_recvmsg(ssock, msg, flags);
return copied;
}

timeo = sock_rcvtimeo(sk, nonblock);

len = min_t(size_t, len, INT_MAX);
Expand Down Expand Up @@ -1056,9 +1035,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,

pr_debug("block timeout %ld", timeo);
mptcp_wait_data(sk, &timeo);
ssock = __mptcp_tcp_fallback(msk);
if (unlikely(ssock))
goto fallback;
}

if (skb_queue_empty(&sk->sk_receive_queue)) {
Expand Down Expand Up @@ -1660,12 +1636,6 @@ void mptcp_finish_connect(struct sock *ssk)
sk = subflow->conn;
msk = mptcp_sk(sk);

if (!subflow->mp_capable) {
MPTCP_INC_STATS(sock_net(sk),
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
return;
}

pr_debug("msk=%p, token=%u", sk, subflow->token);

mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
Expand Down Expand Up @@ -1971,23 +1941,10 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
{
struct sock *sk = sock->sk;
struct mptcp_sock *msk;
struct socket *ssock;
__poll_t mask = 0;

msk = mptcp_sk(sk);
lock_sock(sk);
ssock = __mptcp_tcp_fallback(msk);
if (!ssock)
ssock = __mptcp_nmpc_socket(msk);
if (ssock) {
mask = ssock->ops->poll(file, ssock, wait);
release_sock(sk);
return mask;
}

release_sock(sk);
sock_poll_wait(file, sock, wait);
lock_sock(sk);

if (test_bit(MPTCP_DATA_READY, &msk->flags))
mask = EPOLLIN | EPOLLRDNORM;
Expand All @@ -1997,8 +1954,6 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;

release_sock(sk);

return mask;
}

Expand Down
34 changes: 34 additions & 0 deletions net/mptcp/protocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@
#define MPTCP_SEND_SPACE 1
#define MPTCP_WORK_RTX 2
#define MPTCP_WORK_EOF 3
#define MPTCP_FALLBACK_DONE 4

struct mptcp_options_received {
u64 sndr_key;
Expand Down Expand Up @@ -467,4 +468,37 @@ static inline bool before64(__u64 seq1, __u64 seq2)

void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops);

static inline bool __mptcp_check_fallback(struct mptcp_sock *msk)
{
return test_bit(MPTCP_FALLBACK_DONE, &msk->flags);
}

static inline bool mptcp_check_fallback(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);

return __mptcp_check_fallback(msk);
}

static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
{
if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) {
pr_debug("TCP fallback already done (msk=%p)", msk);
return;
}
set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
}

static inline void mptcp_do_fallback(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);

__mptcp_do_fallback(msk);
}

#define pr_fallback(a) do { pr_debug("%s:fallback to TCP (msk=%p)",\
__FUNCTION__, a); } while (0)

#endif /* __MPTCP_PROTOCOL_H */
47 changes: 31 additions & 16 deletions net/mptcp/subflow.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_options_received mp_opt;
struct sock *parent = subflow->conn;
struct tcp_sock *tp = tcp_sk(sk);

subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);

Expand All @@ -202,6 +201,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
return;

subflow->conn_finished = 1;
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);

mptcp_get_options(skb, &mp_opt);
if (subflow->request_mptcp && mp_opt.mp_capable) {
Expand All @@ -217,21 +218,20 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
subflow->thmac, subflow->remote_nonce);
} else {
tp->is_mptcp = 0;
if (subflow->request_mptcp)
MPTCP_INC_STATS(sock_net(sk),
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
mptcp_do_fallback(sk);
pr_fallback(mptcp_sk(subflow->conn));
}

if (!tp->is_mptcp)
if (mptcp_check_fallback(sk))
return;

if (subflow->mp_capable) {
pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
subflow->remote_key);
mptcp_finish_connect(sk);

if (skb) {
pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq);
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
}
} else if (subflow->mp_join) {
u8 hmac[SHA256_DIGEST_SIZE];

Expand All @@ -251,9 +251,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)

memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);

if (skb)
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;

if (!mptcp_finish_join(sk))
goto do_reset;

Expand Down Expand Up @@ -529,7 +526,8 @@ enum mapping_status {
MAPPING_OK,
MAPPING_INVALID,
MAPPING_EMPTY,
MAPPING_DATA_FIN
MAPPING_DATA_FIN,
MAPPING_DUMMY
};

static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
Expand Down Expand Up @@ -593,6 +591,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
if (!skb)
return MAPPING_EMPTY;

if (mptcp_check_fallback(ssk))
return MAPPING_DUMMY;

mpext = mptcp_get_ext(skb);
if (!mpext || !mpext->use_map) {
if (!subflow->map_valid && !skb->len) {
Expand Down Expand Up @@ -734,6 +735,16 @@ static bool subflow_check_data_avail(struct sock *ssk)
ssk->sk_err = EBADMSG;
goto fatal;
}
if (status == MAPPING_DUMMY) {
__mptcp_do_fallback(msk);
skb = skb_peek(&ssk->sk_receive_queue);
subflow->map_valid = 1;
subflow->map_seq = READ_ONCE(msk->ack_seq);
subflow->map_data_len = skb->len;
subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
subflow->ssn_offset;
return true;
}

if (status != MAPPING_OK)
return false;
Expand Down Expand Up @@ -857,14 +868,18 @@ static void subflow_data_ready(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct sock *parent = subflow->conn;
struct mptcp_sock *msk;

if (!subflow->mp_capable && !subflow->mp_join) {
subflow->tcp_data_ready(sk);

msk = mptcp_sk(parent);
if (inet_sk_state_load(sk) == TCP_LISTEN) {
set_bit(MPTCP_DATA_READY, &msk->flags);
parent->sk_data_ready(parent);
return;
}

WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
!subflow->mp_join);

if (mptcp_subflow_data_available(sk))
mptcp_data_ready(parent, sk);
}
Expand Down Expand Up @@ -1103,7 +1118,7 @@ static void subflow_state_change(struct sock *sk)
* a fin packet carrying a DSS can be unnoticed if we don't trigger
* the data available machinery here.
*/
if (subflow->mp_capable && mptcp_subflow_data_available(sk))
if (mptcp_subflow_data_available(sk))
mptcp_data_ready(parent, sk);

if (!(parent->sk_shutdown & RCV_SHUTDOWN) &&
Expand Down

0 comments on commit 147eccc

Please sign in to comment.