From: Ilpo Järvinen ij@kernel.org
The AccECN option ceb/cep heuristic algorithm is from AccECN spec Appendix A.2.2 to mitigate against false ACE field overflows. Armed with ceb delta from option, delivered bytes, and delivered packets it is possible to estimate how many times ACE field wrapped.
This calculation is necessary only if more than one wrap is possible. Without SACK, delivered bytes and packets are not always trustworthy in which case TCP falls back to the simpler no-or-all wraps ceb algorithm.
Signed-off-by: Ilpo Järvinen ij@kernel.org Signed-off-by: Chia-Yu Chang chia-yu.chang@nokia-bell-labs.com --- include/net/tcp.h | 1 + net/ipv4/tcp_input.c | 36 ++++++++++++++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h index 014a22aef25a..120a8288a0a3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -246,6 +246,7 @@ static_assert((1 << ATO_BITS) > TCP_DELACK_MAX); #define TCP_ACCECN_MAXSIZE (TCPOLEN_ACCECN_BASE + \ TCPOLEN_ACCECN_PERFIELD * \ TCP_ACCECN_NUMFIELDS) +#define TCP_ACCECN_SAFETY_SHIFT 1 /* SAFETY_FACTOR in accecn draft */
/* Flags in tp->nonagle */ #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5a8f5ca9efde..094645e0d9fc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -492,16 +492,19 @@ static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb, u32 delivered_pkts, u32 delivered_bytes, int flag) { + u32 old_ceb = tcp_sk(sk)->delivered_ecn_bytes[INET_ECN_CE - 1]; const struct tcphdr *th = tcp_hdr(skb); struct tcp_sock *tp = tcp_sk(sk); - u32 delta, safe_delta; + u32 delta, safe_delta, d_ceb; + bool opt_deltas_valid; u32 corrected_ace;
/* Reordered ACK or uncertain due to lack of data to send and ts */ if (!(flag & (FLAG_FORWARD_PROGRESS | FLAG_TS_PROGRESS))) return 0;
- tcp_accecn_process_option(tp, skb, delivered_bytes, flag); + opt_deltas_valid = tcp_accecn_process_option(tp, skb, + delivered_bytes, flag);
if (!(flag & FLAG_SLOWPATH)) { /* AccECN counter might overflow on large ACKs */ @@ -524,6 +527,35 @@ static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb, safe_delta = delivered_pkts - ((delivered_pkts - delta) & TCP_ACCECN_CEP_ACE_MASK);
+ if (opt_deltas_valid) { + d_ceb = tp->delivered_ecn_bytes[INET_ECN_CE - 1] - old_ceb; + if (!d_ceb) + return delta; + + if ((delivered_pkts >= (TCP_ACCECN_CEP_ACE_MASK + 1) * 2) && + (tcp_is_sack(tp) || + ((1 << inet_csk(sk)->icsk_ca_state) & + (TCPF_CA_Open | TCPF_CA_CWR)))) { + u32 est_d_cep; + + if (delivered_bytes <= d_ceb) + return safe_delta; + + est_d_cep = DIV_ROUND_UP_ULL((u64)d_ceb * + delivered_pkts, + delivered_bytes); + return min(safe_delta, + delta + + (est_d_cep & ~TCP_ACCECN_CEP_ACE_MASK)); + } + + if (d_ceb > delta * tp->mss_cache) + return safe_delta; + if (d_ceb < + safe_delta * tp->mss_cache >> TCP_ACCECN_SAFETY_SHIFT) + return delta; + } + return safe_delta; }