Skip to content

Commit 562b1fd

Browse files
haiyangzkuba-moo
authored andcommitted
tcp: Set pingpong threshold via sysctl
TCP pingpong threshold is 1 by default. But some applications, like SQL DB may prefer a higher pingpong threshold to activate delayed acks in quick ack mode for better performance. The pingpong threshold and related code were changed to 3 in the year 2019 in: commit 4a41f45 ("tcp: change pingpong threshold to 3") And reverted to 1 in the year 2022 in: commit 4d8f24e ("Revert "tcp: change pingpong threshold to 3"") There is no single value that fits all applications. Add net.ipv4.tcp_pingpong_thresh sysctl tunable, so it can be tuned for optimal performance based on the application needs. Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com> Reviewed-by: Simon Horman <horms@kernel.org> Reviewed-by: Eric Dumazet <edumazet@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com> Link: https://lore.kernel.org/r/1697056244-21888-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1 parent 39d08b9 commit 562b1fd

File tree

6 files changed

+39
-6
lines changed

6 files changed

+39
-6
lines changed

Documentation/networking/ip-sysctl.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1183,6 +1183,19 @@ tcp_plb_cong_thresh - INTEGER
11831183

11841184
Default: 128
11851185

1186+
tcp_pingpong_thresh - INTEGER
1187+
The number of estimated data replies sent for estimated incoming data
1188+
requests that must happen before TCP considers that a connection is a
1189+
"ping-pong" (request-response) connection for which delayed
1190+
acknowledgments can provide benefits.
1191+
1192+
This threshold is 1 by default, but some applications may need a higher
1193+
threshold for optimal performance.
1194+
1195+
Possible Values: 1 - 255
1196+
1197+
Default: 1
1198+
11861199
UDP variables
11871200
=============
11881201

include/net/inet_connection_sock.h

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -328,11 +328,10 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
328328

329329
struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
330330

331-
#define TCP_PINGPONG_THRESH 1
332-
333331
static inline void inet_csk_enter_pingpong_mode(struct sock *sk)
334332
{
335-
inet_csk(sk)->icsk_ack.pingpong = TCP_PINGPONG_THRESH;
333+
inet_csk(sk)->icsk_ack.pingpong =
334+
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pingpong_thresh);
336335
}
337336

338337
static inline void inet_csk_exit_pingpong_mode(struct sock *sk)
@@ -342,7 +341,16 @@ static inline void inet_csk_exit_pingpong_mode(struct sock *sk)
342341

343342
static inline bool inet_csk_in_pingpong_mode(struct sock *sk)
344343
{
345-
return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
344+
return inet_csk(sk)->icsk_ack.pingpong >=
345+
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pingpong_thresh);
346+
}
347+
348+
static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)
349+
{
350+
struct inet_connection_sock *icsk = inet_csk(sk);
351+
352+
if (icsk->icsk_ack.pingpong < U8_MAX)
353+
icsk->icsk_ack.pingpong++;
346354
}
347355

348356
static inline bool inet_csk_has_ulp(const struct sock *sk)

include/net/netns/ipv4.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ struct netns_ipv4 {
133133
u8 sysctl_tcp_migrate_req;
134134
u8 sysctl_tcp_comp_sack_nr;
135135
u8 sysctl_tcp_backlog_ack_defer;
136+
u8 sysctl_tcp_pingpong_thresh;
137+
136138
int sysctl_tcp_reordering;
137139
u8 sysctl_tcp_retries1;
138140
u8 sysctl_tcp_retries2;

net/ipv4/sysctl_net_ipv4.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1498,6 +1498,14 @@ static struct ctl_table ipv4_net_table[] = {
14981498
.extra1 = SYSCTL_ZERO,
14991499
.extra2 = SYSCTL_ONE,
15001500
},
1501+
{
1502+
.procname = "tcp_pingpong_thresh",
1503+
.data = &init_net.ipv4.sysctl_tcp_pingpong_thresh,
1504+
.maxlen = sizeof(u8),
1505+
.mode = 0644,
1506+
.proc_handler = proc_dou8vec_minmax,
1507+
.extra1 = SYSCTL_ONE,
1508+
},
15011509
{ }
15021510
};
15031511

net/ipv4/tcp_ipv4.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3288,6 +3288,8 @@ static int __net_init tcp_sk_init(struct net *net)
32883288
net->ipv4.sysctl_tcp_syn_linear_timeouts = 4;
32893289
net->ipv4.sysctl_tcp_shrink_window = 0;
32903290

3291+
net->ipv4.sysctl_tcp_pingpong_thresh = 1;
3292+
32913293
return 0;
32923294
}
32933295

net/ipv4/tcp_output.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,10 +170,10 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
170170
tp->lsndtime = now;
171171

172172
/* If it is a reply for ato after last received
173-
* packet, enter pingpong mode.
173+
* packet, increase pingpong count.
174174
*/
175175
if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
176-
inet_csk_enter_pingpong_mode(sk);
176+
inet_csk_inc_pingpong_cnt(sk);
177177
}
178178

179179
/* Account for an ACK we sent. */

0 commit comments

Comments
 (0)