Skip to content

Commit

Permalink
mptcp: cope with later TCP fallback
Browse files Browse the repository at this point in the history
With V1 passive connections can fallback TCP after that the
subflow become established:

syn+ MP_CAPABLE ->
               <- syn, ack + MP_CAPABLE

ack, seq = 3    ->
        // OoO packet is accepted because in-sequence
        // passive socket is created, is in ESTABLISHED
	// status and tentatively as MP_CAPABLE

ack, seq = 2     ->
        // no MP_CAPABLE opt, subflow should fallback to TCP

We can't use the 'subflow' socket fallback, as we don't have
it available for passive connection.

Instead, when the fallback is detected, replace the mptcp
socket with the underlining TCP subflow. Beyond covering
the above scenario, it makes TCP fallback socket as efficient
as plain TCP ones.

Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
  • Loading branch information
Paolo Abeni authored and jenkins-tessares committed Dec 18, 2019
1 parent fb4e106 commit 1e2466f
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 17 deletions.
111 changes: 94 additions & 17 deletions net/mptcp/protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,50 @@

#define MPTCP_SAME_STATE TCP_MAX_STATES

static void __mptcp_close(struct sock *sk, long timeout);

/* MP_CAPABLE handshake failed, convert msk to plain tcp, replacing
* socket->sk and stream ops and destroying msk
* return the msk socket, as we can't access msk anymore after this function
* completes
* Called with msk lock held, releases such lock before returning
*/
static struct socket *__mptcp_fallback_to_tcp(struct mptcp_sock *msk,
struct sock *ssk)
{
struct mptcp_subflow_context *subflow;
struct socket *sock;
struct sock *sk;

sk = (struct sock *)msk;
sock = sk->sk_socket;
subflow = mptcp_subflow_ctx(ssk);

/* detach the msk socket */
list_del_init(&subflow->node);
sock_orphan(sk);
sock->sk = NULL;

/* socket is now TCP */
lock_sock(ssk);
sock_graft(ssk, sock);
if (subflow->conn) {
/* Clearing the 'conn' field will make the ULP-overriden
* ops behaving like plain TCP ones.
* Note: we can't release the ULP data on a live socket.
*/
sock_put(subflow->conn);
subflow->conn = NULL;
}
release_sock(ssk);
sock->ops = sk->sk_family == AF_INET6 ? &inet6_stream_ops :
&inet_stream_ops;

/* destroy the left-over msk sock */
__mptcp_close(sk, 0);
return sock;
}

/* if msk has a single subflow socket, and the mp_capable handshake is not
* completed yet or has failed - that is, the socket is Not MP Capable,
* returns it.
Expand All @@ -37,25 +81,37 @@ static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
return msk->subflow;
}

/* if msk has a single subflow, and the mp_capable handshake is failed,
* return it.
static bool __mptcp_needs_tcp_fallback(const struct mptcp_sock *msk)
{
return msk->first && !tcp_sk(msk->first)->is_mptcp;
}

/* if the mp_capable handshake has failed, it fallbacks msk to plain TCP,
* releases the socket lock and returns a reference to the now TCP socket.
* Otherwise returns NULL
*/
static struct socket *__mptcp_tcp_fallback(const struct mptcp_sock *msk)
static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk)
{
struct socket *ssock = __mptcp_nmpc_socket(msk);

sock_owned_by_me((const struct sock *)msk);

if (!ssock || tcp_sk(ssock->sk)->is_mptcp)
if (likely(!__mptcp_needs_tcp_fallback(msk)))
return NULL;

return ssock;
if (msk->subflow) {
/* the first subflow is an active connection, discart the
* paired socket
*/
msk->subflow->sk = NULL;
sock_release(msk->subflow);
msk->subflow = NULL;
}

return __mptcp_fallback_to_tcp(msk, msk->first);
}

static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk)
{
return ((struct sock *)msk)->sk_state == TCP_CLOSE;
return !msk->first;
}

static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state)
Expand All @@ -76,6 +132,7 @@ static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state)
if (err)
return ERR_PTR(err);

msk->first = ssock->sk;
msk->subflow = ssock;
subflow = mptcp_subflow_ctx(ssock->sk);
list_add(&subflow->node, &msk->conn_list);
Expand Down Expand Up @@ -155,6 +212,8 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
ret = sk_stream_wait_memory(ssk, timeo);
if (ret)
return ret;
if (unlikely(__mptcp_needs_tcp_fallback(msk)))
return 0;
}

/* compute copy limit */
Expand Down Expand Up @@ -265,11 +324,11 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)

lock_sock(sk);
ssock = __mptcp_tcp_fallback(msk);
if (ssock) {
if (unlikely(ssock)) {
fallback:
pr_debug("fallback passthrough");
ret = sock_sendmsg(ssock, msg);
release_sock(sk);
return ret;
return ret >= 0 ? ret + copied : (copied ? copied : ret);
}

timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
Expand All @@ -288,6 +347,11 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
&size_goal);
if (ret < 0)
break;
if (ret == 0 && unlikely(__mptcp_needs_tcp_fallback(msk))) {
release_sock(ssk);
ssock = __mptcp_tcp_fallback(msk);
goto fallback;
}

copied += ret;
}
Expand Down Expand Up @@ -368,11 +432,11 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,

lock_sock(sk);
ssock = __mptcp_tcp_fallback(msk);
if (ssock) {
if (unlikely(ssock)) {
fallback:
pr_debug("fallback-read subflow=%p",
mptcp_subflow_ctx(ssock->sk));
copied = sock_recvmsg(ssock, msg, flags);
release_sock(sk);
return copied;
}

Expand Down Expand Up @@ -477,6 +541,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
pr_debug("block timeout %ld", timeo);
wait_data = true;
mptcp_wait_data(sk, &timeo);
if (unlikely(__mptcp_tcp_fallback(msk)))
goto fallback;
}

if (more_data_avail) {
Expand Down Expand Up @@ -529,6 +595,8 @@ static int __mptcp_init_sock(struct sock *sk)
INIT_LIST_HEAD(&msk->conn_list);
__set_bit(MPTCP_SEND_SPACE, &msk->flags);

msk->first = NULL;

return 0;
}

Expand Down Expand Up @@ -563,16 +631,15 @@ static void mptcp_subflow_shutdown(struct sock *ssk, int how)
release_sock(ssk);
}

static void mptcp_close(struct sock *sk, long timeout)
/* Called with msk lock held, releases such lock before returning */
static void __mptcp_close(struct sock *sk, long timeout)
{
struct mptcp_subflow_context *subflow, *tmp;
struct mptcp_sock *msk = mptcp_sk(sk);

mptcp_token_destroy(msk->token);
inet_sk_state_store(sk, TCP_CLOSE);

lock_sock(sk);

list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);

Expand All @@ -585,6 +652,12 @@ static void mptcp_close(struct sock *sk, long timeout)
sk_common_release(sk);
}

static void mptcp_close(struct sock *sk, long timeout)
{
lock_sock(sk);
__mptcp_close(sk, timeout);
}

static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
{
const struct ipv6_pinfo *ssk6 = inet6_sk(ssk);
Expand Down Expand Up @@ -654,6 +727,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
msk->local_key = subflow->local_key;
msk->token = subflow->token;
msk->subflow = NULL;
msk->first = newsk;

mptcp_token_update_accept(newsk, new_mptcp_sock);

Expand Down Expand Up @@ -1009,8 +1083,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
static __poll_t mptcp_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait)
{
const struct mptcp_sock *msk;
struct sock *sk = sock->sk;
struct mptcp_sock *msk;
struct socket *ssock;
__poll_t mask = 0;

Expand All @@ -1026,6 +1100,9 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
release_sock(sk);
sock_poll_wait(file, sock, wait);
lock_sock(sk);
ssock = __mptcp_tcp_fallback(msk);
if (unlikely(ssock))
return ssock->ops->poll(file, ssock, NULL);

if (test_bit(MPTCP_DATA_READY, &msk->flags))
mask = EPOLLIN | EPOLLRDNORM;
Expand Down
1 change: 1 addition & 0 deletions net/mptcp/protocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ struct mptcp_sock {
struct list_head conn_list;
struct skb_ext *cached_ext; /* for the next sendmsg */
struct socket *subflow; /* outgoing connect/listener/!mp_capable */
struct sock *first;
};

#define mptcp_for_each_subflow(__msk, __subflow) \
Expand Down

0 comments on commit 1e2466f

Please sign in to comment.