Skip to content

Commit

Permalink
mptcp: Handle MP_CAPABLE options for outgoing connections
Browse files Browse the repository at this point in the history
Add hooks to tcp_output.c to add MP_CAPABLE to an outgoing SYN request,
to capture the MP_CAPABLE in the received SYN-ACK, to add MP_CAPABLE to
the final ACK of the three-way handshake.

Use the .sk_rx_dst_set() handler in the subflow proto to capture when the
responding SYN-ACK is received and notify the MPTCP connection layer.

Co-developed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Peter Krystad <peter.krystad@linux.intel.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Peter Krystad authored and davem330 committed Jan 24, 2020
1 parent 2303f99 commit cec37a6
Show file tree
Hide file tree
Showing 9 changed files with 663 additions and 24 deletions.
3 changes: 3 additions & 0 deletions include/linux/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ struct tcp_request_sock {
const struct tcp_request_sock_ops *af_specific;
u64 snt_synack; /* first SYNACK sent time */
bool tfo_listener;
#if IS_ENABLED(CONFIG_MPTCP)
bool is_mptcp;
#endif
u32 txhash;
u32 rcv_isn;
u32 snt_isn;
Expand Down
57 changes: 57 additions & 0 deletions include/net/mptcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,27 @@ struct mptcp_out_options {

void mptcp_init(void);

static inline bool sk_is_mptcp(const struct sock *sk)
{
return tcp_sk(sk)->is_mptcp;
}

static inline bool rsk_is_mptcp(const struct request_sock *req)
{
return tcp_rsk(req)->is_mptcp;
}

void mptcp_parse_option(const unsigned char *ptr, int opsize,
struct tcp_options_received *opt_rx);
bool mptcp_syn_options(struct sock *sk, unsigned int *size,
struct mptcp_out_options *opts);
void mptcp_rcv_synsent(struct sock *sk);
bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
struct mptcp_out_options *opts);
bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
unsigned int *size, unsigned int remaining,
struct mptcp_out_options *opts);

void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts);

/* move the skb extension owership, with the assumption that 'to' is
Expand Down Expand Up @@ -89,11 +108,47 @@ static inline void mptcp_init(void)
{
}

static inline bool sk_is_mptcp(const struct sock *sk)
{
return false;
}

static inline bool rsk_is_mptcp(const struct request_sock *req)
{
return false;
}

static inline void mptcp_parse_option(const unsigned char *ptr, int opsize,
struct tcp_options_received *opt_rx)
{
}

static inline bool mptcp_syn_options(struct sock *sk, unsigned int *size,
struct mptcp_out_options *opts)
{
return false;
}

static inline void mptcp_rcv_synsent(struct sock *sk)
{
}

static inline bool mptcp_synack_options(const struct request_sock *req,
unsigned int *size,
struct mptcp_out_options *opts)
{
return false;
}

static inline bool mptcp_established_options(struct sock *sk,
struct sk_buff *skb,
unsigned int *size,
unsigned int remaining,
struct mptcp_out_options *opts)
{
return false;
}

static inline void mptcp_skb_ext_move(struct sk_buff *to,
const struct sk_buff *from)
{
Expand All @@ -107,6 +162,8 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,

#endif /* CONFIG_MPTCP */

void mptcp_handle_ipv6_mapped(struct sock *sk, bool mapped);

#if IS_ENABLED(CONFIG_MPTCP_IPV6)
int mptcpv6_init(void);
#elif IS_ENABLED(CONFIG_IPV6)
Expand Down
6 changes: 6 additions & 0 deletions net/ipv4/tcp_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -5978,6 +5978,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);

if (sk_is_mptcp(sk))
mptcp_rcv_synsent(sk);

/* Remember, tcp_poll() does not lock socket!
* Change state from SYN-SENT only after copied_seq
* is initialized. */
Expand Down Expand Up @@ -6600,6 +6603,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,

tcp_rsk(req)->af_specific = af_ops;
tcp_rsk(req)->ts_off = 0;
#if IS_ENABLED(CONFIG_MPTCP)
tcp_rsk(req)->is_mptcp = 0;
#endif

tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = af_ops->mss_clamp;
Expand Down
44 changes: 44 additions & 0 deletions net/ipv4/tcp_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,22 @@ static void smc_set_option_cond(const struct tcp_sock *tp,
#endif
}

static void mptcp_set_option_cond(const struct request_sock *req,
struct tcp_out_options *opts,
unsigned int *remaining)
{
if (rsk_is_mptcp(req)) {
unsigned int size;

if (mptcp_synack_options(req, &size, &opts->mptcp)) {
if (*remaining >= size) {
opts->options |= OPTION_MPTCP;
*remaining -= size;
}
}
}
}

/* Compute TCP options for SYN packets. This is not the final
* network wire format yet.
*/
Expand Down Expand Up @@ -666,6 +682,15 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,

smc_set_option(tp, opts, &remaining);

if (sk_is_mptcp(sk)) {
unsigned int size;

if (mptcp_syn_options(sk, &size, &opts->mptcp)) {
opts->options |= OPTION_MPTCP;
remaining -= size;
}
}

return MAX_TCP_OPTION_SPACE - remaining;
}

Expand Down Expand Up @@ -727,6 +752,8 @@ static unsigned int tcp_synack_options(const struct sock *sk,
}
}

mptcp_set_option_cond(req, opts, &remaining);

smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);

return MAX_TCP_OPTION_SPACE - remaining;
Expand Down Expand Up @@ -764,6 +791,23 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
size += TCPOLEN_TSTAMP_ALIGNED;
}

/* MPTCP options have precedence over SACK for the limited TCP
* option space because a MPTCP connection would be forced to
* fall back to regular TCP if a required multipath option is
* missing. SACK still gets a chance to use whatever space is
* left.
*/
if (sk_is_mptcp(sk)) {
unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
unsigned int opt_size = 0;

if (mptcp_established_options(sk, skb, &opt_size, remaining,
&opts->mptcp)) {
opts->options |= OPTION_MPTCP;
size += opt_size;
}
}

eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
if (unlikely(eff_sacks)) {
const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
Expand Down
6 changes: 6 additions & 0 deletions net/ipv6/tcp_ipv6.c
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];

icsk->icsk_af_ops = &ipv6_mapped;
if (sk_is_mptcp(sk))
mptcp_handle_ipv6_mapped(sk, true);
sk->sk_backlog_rcv = tcp_v4_do_rcv;
#ifdef CONFIG_TCP_MD5SIG
tp->af_specific = &tcp_sock_ipv6_mapped_specific;
Expand All @@ -248,6 +250,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (err) {
icsk->icsk_ext_hdr_len = exthdrlen;
icsk->icsk_af_ops = &ipv6_specific;
if (sk_is_mptcp(sk))
mptcp_handle_ipv6_mapped(sk, false);
sk->sk_backlog_rcv = tcp_v6_do_rcv;
#ifdef CONFIG_TCP_MD5SIG
tp->af_specific = &tcp_sock_ipv6_specific;
Expand Down Expand Up @@ -1203,6 +1207,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newnp->saddr = newsk->sk_v6_rcv_saddr;

inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
if (sk_is_mptcp(newsk))
mptcp_handle_ipv6_mapped(newsk, true);
newsk->sk_backlog_rcv = tcp_v4_do_rcv;
#ifdef CONFIG_TCP_MD5SIG
newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
Expand Down
100 changes: 100 additions & 0 deletions net/mptcp/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,114 @@ void mptcp_parse_option(const unsigned char *ptr, int opsize,
}
}

void mptcp_get_options(const struct sk_buff *skb,
struct tcp_options_received *opt_rx)
{
const unsigned char *ptr;
const struct tcphdr *th = tcp_hdr(skb);
int length = (th->doff * 4) - sizeof(struct tcphdr);

ptr = (const unsigned char *)(th + 1);

while (length > 0) {
int opcode = *ptr++;
int opsize;

switch (opcode) {
case TCPOPT_EOL:
return;
case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
length--;
continue;
default:
opsize = *ptr++;
if (opsize < 2) /* "silly options" */
return;
if (opsize > length)
return; /* don't parse partial options */
if (opcode == TCPOPT_MPTCP)
mptcp_parse_option(ptr, opsize, opt_rx);
ptr += opsize - 2;
length -= opsize;
}
}
}

bool mptcp_syn_options(struct sock *sk, unsigned int *size,
struct mptcp_out_options *opts)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);

if (subflow->request_mptcp) {
pr_debug("local_key=%llu", subflow->local_key);
opts->suboptions = OPTION_MPTCP_MPC_SYN;
opts->sndr_key = subflow->local_key;
*size = TCPOLEN_MPTCP_MPC_SYN;
return true;
}
return false;
}

void mptcp_rcv_synsent(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct tcp_sock *tp = tcp_sk(sk);

pr_debug("subflow=%p", subflow);
if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) {
subflow->mp_capable = 1;
subflow->remote_key = tp->rx_opt.mptcp.sndr_key;
} else {
tcp_sk(sk)->is_mptcp = 0;
}
}

bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
unsigned int *size, unsigned int remaining,
struct mptcp_out_options *opts)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);

if (subflow->mp_capable && !subflow->fourth_ack) {
opts->suboptions = OPTION_MPTCP_MPC_ACK;
opts->sndr_key = subflow->local_key;
opts->rcvr_key = subflow->remote_key;
*size = TCPOLEN_MPTCP_MPC_ACK;
subflow->fourth_ack = 1;
pr_debug("subflow=%p, local_key=%llu, remote_key=%llu",
subflow, subflow->local_key, subflow->remote_key);
return true;
}
return false;
}

bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
struct mptcp_out_options *opts)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);

if (subflow_req->mp_capable) {
opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
opts->sndr_key = subflow_req->local_key;
*size = TCPOLEN_MPTCP_MPC_SYNACK;
pr_debug("subflow_req=%p, local_key=%llu",
subflow_req, subflow_req->local_key);
return true;
}
return false;
}

void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
{
if ((OPTION_MPTCP_MPC_SYN |
OPTION_MPTCP_MPC_SYNACK |
OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
u8 len;

if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
len = TCPOLEN_MPTCP_MPC_SYN;
else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions)
len = TCPOLEN_MPTCP_MPC_SYNACK;
else
len = TCPOLEN_MPTCP_MPC_ACK;

Expand Down
Loading

0 comments on commit cec37a6

Please sign in to comment.