diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 7be9b124235486..d1790c565d0ad1 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -231,7 +231,6 @@ struct tcp_sock { /* RTT measurement */ u32 srtt_us; /* smoothed round trip time << 3 in usecs */ u32 mdev_us; /* medium deviation */ - u32 mdev_max_us; /* maximal mdev for the last rtt period */ u32 rttvar_us; /* smoothed mdev_max */ u32 rtt_seq; /* sequence number to update rttvar */ struct rtt_meas { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d6c8f4cd080001..a0f66f88315d67 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -680,8 +680,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) /* Called to compute a smoothed rtt estimate. The data fed to this * routine either comes from timestamps, or from segments that were * known _not_ to have been retransmitted [see Karn/Partridge - * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88 - * piece by Van Jacobson. + * Proceedings SIGCOMM 87]. * NOTE: the next three routines used to be one big routine. * To save cycles in the RFC 1323 implementation it was better to break * it up into three procedures. -- erics @@ -692,59 +691,21 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) long m = mrtt_us; /* RTT */ u32 srtt = tp->srtt_us; - /* The following amusing code comes from Jacobson's - * article in SIGCOMM '88. Note that rtt and mdev - * are scaled versions of rtt and mean deviation. - * This is designed to be as fast as possible - * m stands for "measurement". - * - * On a 1990 paper the rto value is changed to: - * RTO = rtt + 4 * mdev - * - * Funny. This algorithm seems to be very broken. - * These formulae increase RTO, when it should be decreased, increase - * too slowly, when it should be increased quickly, decrease too quickly - * etc. I guess in BSD RTO takes ONE value, so that it is absolutely - * does not matter how to _calculate_ it. Seems, it was trap - * that VJ failed to avoid. 8) - */ if (srtt != 0) { - m -= (srtt >> 3); /* m is now error in rtt est */ - srtt += m; /* rtt = 7/8 rtt + 1/8 new */ - if (m < 0) { - m = -m; /* m is now abs(error) */ - m -= (tp->mdev_us >> 2); /* similar update on mdev */ - /* This is similar to one of Eifel findings. - * Eifel blocks mdev updates when rtt decreases. - * This solution is a bit different: we use finer gain - * for mdev in this case (alpha*beta). - * Like Eifel it also prevents growth of rto, - * but also it limits too fast rto decreases, - * happening in pure Eifel. - */ - if (m > 0) - m >>= 3; - } else { - m -= (tp->mdev_us >> 2); /* similar update on mdev */ - } - tp->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ - if (tp->mdev_us > tp->mdev_max_us) { - tp->mdev_max_us = tp->mdev_us; - if (tp->mdev_max_us > tp->rttvar_us) - tp->rttvar_us = tp->mdev_max_us; - } - if (after(tp->snd_una, tp->rtt_seq)) { - if (tp->mdev_max_us < tp->rttvar_us) - tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2; + m -= (srtt >> 3); /* m' = m - srtt / 8 = (R' - SRTT) */ + srtt += m; /* srtt = srtt + m’ = srtt + m - srtt / 8 */ + if (m < 0) + m = -m; + m -= (tp->mdev_us >> 2); /* m'' = |m'| - mdev / 4 */ + tp->mdev_us += m; + tp->rttvar_us = tp->mdev_us; + if (after(tp->snd_una, tp->rtt_seq)) tp->rtt_seq = tp->snd_nxt; - tp->mdev_max_us = tcp_rto_min_us(sk); - } } else { /* no previous measure. */ - srtt = m << 3; /* take the measured time to be rtt */ - tp->mdev_us = m << 1; /* make sure rto = 3*rtt */ - tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk)); - tp->mdev_max_us = tp->rttvar_us; + srtt = m << 3; + tp->mdev_us = m << 1; + tp->rttvar_us = tp->mdev_us; tp->rtt_seq = tp->snd_nxt; } tp->srtt_us = max(1U, srtt); @@ -798,6 +759,7 @@ static void tcp_update_pacing_rate(struct sock *sk) */ static void tcp_set_rto(struct sock *sk) { + const u32 min_rto = tcp_rto_min_us(sk); const struct tcp_sock *tp = tcp_sk(sk); /* Old crap is replaced with new one. 8) * @@ -809,17 +771,15 @@ static void tcp_set_rto(struct sock *sk) * is invisible. Actually, Linux-2.4 also generates erratic * ACKs in some circumstances. */ - inet_csk(sk)->icsk_rto = __tcp_set_rto(tp); - + if (((tp->srtt_us >> 3) + tp->rttvar_us) < min_rto) + inet_csk(sk)->icsk_rto = usecs_to_jiffies(min_rto); + else + inet_csk(sk)->icsk_rto = __tcp_set_rto(tp); /* 2. Fixups made earlier cannot be right. * If we do not estimate RTO correctly without them, * all the algo is pure shit and should be replaced * with correct one. It is exactly, which we pretend to do. */ - - /* NOTE: clamping at TCP_RTO_MIN is not required, current algo - * guarantees that rto is higher. - */ tcp_bound_rto(sk); } diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index b617826e24770a..7f59f5b979c704 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -561,7 +561,7 @@ void tcp_init_metrics(struct sock *sk) * retransmission. */ tp->rttvar_us = jiffies_to_usecs(TCP_TIMEOUT_FALLBACK); - tp->mdev_us = tp->mdev_max_us = tp->rttvar_us; + tp->mdev_us = tp->rttvar_us; inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; }