-----Original Message----- From: Chia-Yu Chang (Nokia) Sent: Thursday, June 26, 2025 4:47 PM To: Eric Dumazet edumazet@google.com Cc: pabeni@redhat.com; linux-doc@vger.kernel.org; corbet@lwn.net; horms@kernel.org; dsahern@kernel.org; kuniyu@amazon.com; bpf@vger.kernel.org; netdev@vger.kernel.org; dave.taht@gmail.com; jhs@mojatatu.com; kuba@kernel.org; stephen@networkplumber.org; xiyou.wangcong@gmail.com; jiri@resnulli.us; davem@davemloft.net; andrew+netdev@lunn.ch; donald.hunter@gmail.com; ast@fiberby.net; liuhangbin@gmail.com; shuah@kernel.org; linux-kselftest@vger.kernel.org; ij@kernel.org; ncardwell@google.com; Koen De Schepper (Nokia) koen.de_schepper@nokia-bell-labs.com; g.white@cablelabs.com; ingemar.s.johansson@ericsson.com; mirja.kuehlewind@ericsson.com; cheshire@apple.com; rs.ietf@gmx.at; Jason_Livingood@comcast.com; vidhi_goel@apple.com Subject: RE: [PATCH v9 net-next 09/15] tcp: accecn: AccECN option
-----Original Message----- From: Eric Dumazet edumazet@google.com Sent: Wednesday, June 25, 2025 11:21 AM To: Chia-Yu Chang (Nokia) chia-yu.chang@nokia-bell-labs.com Cc: pabeni@redhat.com; linux-doc@vger.kernel.org; corbet@lwn.net; horms@kernel.org; dsahern@kernel.org; kuniyu@amazon.com; bpf@vger.kernel.org; netdev@vger.kernel.org; dave.taht@gmail.com; jhs@mojatatu.com; kuba@kernel.org; stephen@networkplumber.org; xiyou.wangcong@gmail.com; jiri@resnulli.us; davem@davemloft.net; andrew+netdev@lunn.ch; donald.hunter@gmail.com; ast@fiberby.net; liuhangbin@gmail.com; shuah@kernel.org; linux-kselftest@vger.kernel.org; ij@kernel.org; ncardwell@google.com; Koen De Schepper (Nokia) koen.de_schepper@nokia-bell-labs.com; g.white@cablelabs.com; ingemar.s.johansson@ericsson.com; mirja.kuehlewind@ericsson.com; cheshire@apple.com; rs.ietf@gmx.at; Jason_Livingood@comcast.com; vidhi_goel@apple.com Subject: Re: [PATCH v9 net-next 09/15] tcp: accecn: AccECN option
CAUTION: This is an external email. Please be very careful when clicking links or opening attachments. See the URL nok.it/ext for additional information.
On Sat, Jun 21, 2025 at 12:38 PM chia-yu.chang@nokia-bell-labs.com wrote:
From: Ilpo Järvinen ij@kernel.org
[...]
--- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -316,6 +316,13 @@ struct tcp_info { * in milliseconds, including any * unfinished recovery. */
__u32 tcpi_received_ce; /* # of CE marks received */
__u32 tcpi_delivered_e1_bytes; /* Accurate ECN byte counters */
__u32 tcpi_delivered_e0_bytes;
__u32 tcpi_delivered_ce_bytes;
__u32 tcpi_received_e1_bytes;
__u32 tcpi_received_e0_bytes;
__u32 tcpi_received_ce_bytes;
We need to immediately fill the 32bit hole, otherwise we won't be able to use it in the future.
Hi Eric,
I did not see any holes created after adding these __u32 values, and you can see the pahole result of tcp_info below:
And the pahole result is the same either I compile with ARM32 or 64-bit architecture.
Or you mean we need an extra "__u32 unused;" at the end?
struct tcp_info { __u8 tcpi_state; /* 0 1 */ __u8 tcpi_ca_state; /* 1 1 */ __u8 tcpi_retransmits; /* 2 1 */ __u8 tcpi_probes; /* 3 1 */ __u8 tcpi_backoff; /* 4 1 */ __u8 tcpi_options; /* 5 1 */ __u8 tcpi_snd_wscale:4; /* 6: 0 1 */ __u8 tcpi_rcv_wscale:4; /* 6: 4 1 */ __u8 tcpi_delivery_rate_app_limited:1; /* 7: 0 1 */ __u8 tcpi_fastopen_client_fail:2; /* 7: 1 1 */
/* XXX 5 bits hole, try to pack */ __u32 tcpi_rto; /* 8 4 */ __u32 tcpi_ato; /* 12 4 */ __u32 tcpi_snd_mss; /* 16 4 */ __u32 tcpi_rcv_mss; /* 20 4 */ __u32 tcpi_unacked; /* 24 4 */ __u32 tcpi_sacked; /* 28 4 */ __u32 tcpi_lost; /* 32 4 */ __u32 tcpi_retrans; /* 36 4 */ __u32 tcpi_fackets; /* 40 4 */ __u32 tcpi_last_data_sent; /* 44 4 */ __u32 tcpi_last_ack_sent; /* 48 4 */ __u32 tcpi_last_data_recv; /* 52 4 */ __u32 tcpi_last_ack_recv; /* 56 4 */ __u32 tcpi_pmtu; /* 60 4 */ /* --- cacheline 1 boundary (64 bytes) --- */ __u32 tcpi_rcv_ssthresh; /* 64 4 */ __u32 tcpi_rtt; /* 68 4 */ __u32 tcpi_rttvar; /* 72 4 */ __u32 tcpi_snd_ssthresh; /* 76 4 */ __u32 tcpi_snd_cwnd; /* 80 4 */ __u32 tcpi_advmss; /* 84 4 */ __u32 tcpi_reordering; /* 88 4 */ __u32 tcpi_rcv_rtt; /* 92 4 */ __u32 tcpi_rcv_space; /* 96 4 */ __u32 tcpi_total_retrans; /* 100 4 */ __u64 tcpi_pacing_rate; /* 104 8 */ __u64 tcpi_max_pacing_rate; /* 112 8 */ __u64 tcpi_bytes_acked; /* 120 8 */ /* --- cacheline 2 boundary (128 bytes) --- */ __u64 tcpi_bytes_received; /* 128 8 */ __u32 tcpi_segs_out; /* 136 4 */ __u32 tcpi_segs_in; /* 140 4 */ __u32 tcpi_notsent_bytes; /* 144 4 */ __u32 tcpi_min_rtt; /* 148 4 */ __u32 tcpi_data_segs_in; /* 152 4 */ __u32 tcpi_data_segs_out; /* 156 4 */ __u64 tcpi_delivery_rate; /* 160 8 */ __u64 tcpi_busy_time; /* 168 8 */ __u64 tcpi_rwnd_limited; /* 176 8 */ __u64 tcpi_sndbuf_limited; /* 184 8 */ /* --- cacheline 3 boundary (192 bytes) --- */ __u32 tcpi_delivered; /* 192 4 */ __u32 tcpi_delivered_ce; /* 196 4 */ __u64 tcpi_bytes_sent; /* 200 8 */ __u64 tcpi_bytes_retrans; /* 208 8 */ __u32 tcpi_dsack_dups; /* 216 4 */ __u32 tcpi_reord_seen; /* 220 4 */ __u32 tcpi_rcv_ooopack; /* 224 4 */ __u32 tcpi_snd_wnd; /* 228 4 */ __u32 tcpi_rcv_wnd; /* 232 4 */ __u32 tcpi_rehash; /* 236 4 */ __u16 tcpi_total_rto; /* 240 2 */ __u16 tcpi_total_rto_recoveries; /* 242 2 */ __u32 tcpi_total_rto_time; /* 244 4 */ __u32 tcpi_received_ce; /* 248 4 */ __u32 tcpi_delivered_e1_bytes; /* 252 4 */ /* --- cacheline 4 boundary (256 bytes) --- */ __u32 tcpi_delivered_e0_bytes; /* 256 4 */ __u32 tcpi_delivered_ce_bytes; /* 260 4 */ __u32 tcpi_received_e1_bytes; /* 264 4 */ __u32 tcpi_received_e0_bytes; /* 268 4 */ __u32 tcpi_received_ce_bytes; /* 272 4 */ /* size: 280, cachelines: 5, members: 68 */ /* sum members: 274 */ /* sum bitfield members: 11 bits, bit holes: 1, sum bit holes: 5 bits */ /* padding: 4 */ /* last cacheline: 24 bytes */
}; [...]
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a327fc14b207..66390966cc41 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -70,6 +70,7 @@ #include <linux/sysctl.h> #include <linux/kernel.h> #include <linux/prefetch.h> +#include <linux/bitops.h> #include <net/dst.h> #include <net/tcp.h> #include <net/proto_memory.h> @@ -530,6 +531,139 @@ static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr return false; }
+/* Maps IP ECN field ECT/CE code point to AccECN option field +number, given
- we are sending fields with Accurate ECN Order 1: ECT(1), CE, ECT(0).
- */
+static u8 tcp_ecnfield_to_accecn_optfield(u8 ecnfield) {
switch (ecnfield & INET_ECN_MASK) {
case INET_ECN_NOT_ECT:
return 0; /* AccECN does not send counts of NOT_ECT */
case INET_ECN_ECT_1:
return 1;
case INET_ECN_CE:
return 2;
case INET_ECN_ECT_0:
return 3;
}
return 0;
+}
net/ipv4/tcp_input.c has already 7273 lines....
I would suggest adding all these tcp ecn helpers in a separate include file, and move existing ones.
Sure, I will create another header file including tcp ecn & accecn helpers.
Chia-Yu
On top of my previous reply, I will like to specify that the following ECN functions will also be included in new /include/net/tcp_ecn.h
void tcp_ecn_queue_cwr(), void tcp_ecn_accept_cwr(), and void tcp_ecn_withdraw_cwr()
This is because these functions are also been modified due to the introduction of Accurate ECN.
Does it make sense to you? Or only AccECN function shall be included in the new header?