lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANn89iKuE7gLXq5qn0PDCKOXxOZdMXXxtW1dA-D0eLjfhwApgQ@mail.gmail.com>
Date: Wed, 25 Jun 2025 01:04:55 -0700
From: Eric Dumazet <edumazet@...gle.com>
To: chia-yu.chang@...ia-bell-labs.com
Cc: pabeni@...hat.com, linux-doc@...r.kernel.org, corbet@....net, 
	horms@...nel.org, dsahern@...nel.org, kuniyu@...zon.com, bpf@...r.kernel.org, 
	netdev@...r.kernel.org, dave.taht@...il.com, jhs@...atatu.com, 
	kuba@...nel.org, stephen@...workplumber.org, xiyou.wangcong@...il.com, 
	jiri@...nulli.us, davem@...emloft.net, andrew+netdev@...n.ch, 
	donald.hunter@...il.com, ast@...erby.net, liuhangbin@...il.com, 
	shuah@...nel.org, linux-kselftest@...r.kernel.org, ij@...nel.org, 
	ncardwell@...gle.com, koen.de_schepper@...ia-bell-labs.com, 
	g.white@...lelabs.com, ingemar.s.johansson@...csson.com, 
	mirja.kuehlewind@...csson.com, cheshire@...le.com, rs.ietf@....at, 
	Jason_Livingood@...cast.com, vidhi_goel@...le.com
Subject: Re: [PATCH v9 net-next 06/15] tcp: accecn: add AccECN rx byte counters

On Sat, Jun 21, 2025 at 12:37 PM <chia-yu.chang@...ia-bell-labs.com> wrote:
>
> From: Ilpo Järvinen <ij@...nel.org>
>
> These three byte counters track IP ECN field payload byte sums for
> all arriving (acceptable) packets for ECT0, ECT1, and CE. The
> AccECN option (added by a later patch in the series) echoes these
> counters back to sender side; therefore, it is placed within the
> group of tcp_sock_write_txrx.
>
> Below are the pahole outcomes before and after this patch, in which
> the group size of tcp_sock_write_txrx is increased from 95 + 4 to
> 107 + 4 and an extra 4-byte hole is created but will be exploited
> in later patches:
>
> [BEFORE THIS PATCH]
> struct tcp_sock {
>     [...]
>     u32                        delivered_ce;         /*  2640     4 */
>     u32                        received_ce;          /*  2644     4 */
>     u32                        app_limited;          /*  2648     4 */
>     u32                        rcv_wnd;              /*  2652     4 */
>     struct tcp_options_received rx_opt;              /*  2656    24 */
>     __cacheline_group_end__tcp_sock_write_txrx[0];   /*  2680     0 */
>
>     [...]
>     /* size: 3264, cachelines: 51, members: 169 */
> }
>
> [AFTER THIS PATCH]
> struct tcp_sock {
>     [...]
>     u32                        delivered_ce;         /*  2640     4 */
>     u32                        received_ce;          /*  2644     4 */
>     u32                        received_ecn_bytes[3];/*  2648    12 */
>     u32                        app_limited;          /*  2660     4 */
>     u32                        rcv_wnd;              /*  2664     4 */
>     struct tcp_options_received rx_opt;              /*  2668    24 */
>     __cacheline_group_end__tcp_sock_write_txrx[0];   /*  2692     0 */
>     /* XXX 4 bytes hole, try to pack */
>
>     [...]
>     /* size: 3264, cachelines: 51, members: 170 */
> }
>
> Signed-off-by: Ilpo Järvinen <ij@...nel.org>
> Signed-off-by: Neal Cardwell <ncardwell@...gle.com>
> Co-developed-by: Chia-Yu Chang <chia-yu.chang@...ia-bell-labs.com>
> Signed-off-by: Chia-Yu Chang <chia-yu.chang@...ia-bell-labs.com>
>
> ---
> v8:
> - Add new helper function tcp_ecn_received_counters_payload()
> ---
>  .../networking/net_cachelines/tcp_sock.rst    |  1 +
>  include/linux/tcp.h                           |  4 ++++
>  include/net/tcp.h                             | 20 +++++++++++++++++-
>  net/ipv4/tcp.c                                |  3 ++-
>  net/ipv4/tcp_input.c                          | 21 +++++++++++++++----
>  net/ipv4/tcp_minisocks.c                      |  2 +-
>  6 files changed, 44 insertions(+), 7 deletions(-)
>
> diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst b/Documentation/networking/net_cachelines/tcp_sock.rst
> index 22ac668fe6c7..804480d39132 100644
> --- a/Documentation/networking/net_cachelines/tcp_sock.rst
> +++ b/Documentation/networking/net_cachelines/tcp_sock.rst
> @@ -102,6 +102,7 @@ u32                           prr_out                 read_mostly         read_m
>  u32                           delivered               read_mostly         read_write          tcp_rate_skb_sent, tcp_newly_delivered(tx);tcp_ack, tcp_rate_gen, tcp_clean_rtx_queue (rx)
>  u32                           delivered_ce            read_mostly         read_write          tcp_rate_skb_sent(tx);tcp_rate_gen(rx)
>  u32                           received_ce             read_mostly         read_write
> +u32[3]                        received_ecn_bytes      read_mostly         read_write
>  u8:4                          received_ce_pending     read_mostly         read_write
>  u8:2                          syn_ect_snt             write_mostly        read_write
>  u8:2                          syn_ect_rcv             read_mostly         read_write
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index 1d8301f2883c..0c2331e186e8 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -306,6 +306,10 @@ struct tcp_sock {
>         u32     delivered;      /* Total data packets delivered incl. rexmits */
>         u32     delivered_ce;   /* Like the above but only ECE marked packets */
>         u32     received_ce;    /* Like the above but for rcvd CE marked pkts */
> +       u32     received_ecn_bytes[3]; /* received byte counters for three ECN
> +                                       * types: INET_ECN_ECT_1, INET_ECN_ECT_0,
> +                                       * and INET_ECN_CE
> +                                       */
>         u32     app_limited;    /* limited until "delivered" reaches this val */
>         u32     rcv_wnd;        /* Current receiver window              */
>  /*
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 4d6325fa3f67..b861941a2213 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -467,7 +467,10 @@ static inline int tcp_accecn_extract_syn_ect(u8 ace)
>  bool tcp_accecn_validate_syn_feedback(struct sock *sk, u8 ace, u8 sent_ect);
>  void tcp_accecn_third_ack(struct sock *sk, const struct sk_buff *skb,
>                           u8 syn_ect_snt);
> -void tcp_ecn_received_counters(struct sock *sk, const struct sk_buff *skb);
> +void tcp_ecn_received_counters_payload(struct sock *sk,
> +                                      const struct sk_buff *skb);
> +void tcp_ecn_received_counters(struct sock *sk, const struct sk_buff *skb,
> +                              u32 payload_len);
>
>  enum tcp_tw_status {
>         TCP_TW_SUCCESS = 0,
> @@ -1035,6 +1038,20 @@ static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq)
>   * See draft-ietf-tcpm-accurate-ecn for the latest values.
>   */
>  #define TCP_ACCECN_CEP_INIT_OFFSET 5
> +#define TCP_ACCECN_E1B_INIT_OFFSET 1
> +#define TCP_ACCECN_E0B_INIT_OFFSET 1
> +#define TCP_ACCECN_CEB_INIT_OFFSET 0
> +
> +static inline void __tcp_accecn_init_bytes_counters(int *counter_array)
> +{
> +       BUILD_BUG_ON(INET_ECN_ECT_1 != 0x1);
> +       BUILD_BUG_ON(INET_ECN_ECT_0 != 0x2);
> +       BUILD_BUG_ON(INET_ECN_CE != 0x3);
> +
> +       counter_array[INET_ECN_ECT_1 - 1] = 0;
> +       counter_array[INET_ECN_ECT_0 - 1] = 0;
> +       counter_array[INET_ECN_CE - 1] = 0;
> +}
>
>  /* The highest ECN variant (Accurate ECN, ECN, or no ECN) that is
>   * attemped to be negotiated and requested for incoming connection
> @@ -1053,6 +1070,7 @@ static inline void tcp_accecn_init_counters(struct tcp_sock *tp)
>  {
>         tp->received_ce = 0;
>         tp->received_ce_pending = 0;
> +       __tcp_accecn_init_bytes_counters(tp->received_ecn_bytes);
>  }
>
>  /* State flags for sacked in struct tcp_skb_cb */
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index e6d7b5420c88..0e779de3ce01 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -5122,6 +5122,7 @@ static void __init tcp_struct_check(void)
>         CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered);
>         CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered_ce);
>         CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ce);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ecn_bytes);
>         CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited);
>         CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd);
>         CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt);
> @@ -5129,7 +5130,7 @@ static void __init tcp_struct_check(void)
>         /* 32bit arches with 8byte alignment on u64 fields might need padding
>          * before tcp_clock_cache.
>          */
> -       CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 95 + 4);
> +       CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 107 + 4);
>
>         /* RX read-write hotpath cache lines */
>         CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received);
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index c986452302cb..5c0159cc0083 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -6013,8 +6013,17 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t
>         }
>  }
>
> +void tcp_ecn_received_counters_payload(struct sock *sk,
> +                                      const struct sk_buff *skb)
> +{
> +       const struct tcphdr *th = (const struct tcphdr *)skb->data;
> +
> +       tcp_ecn_received_counters(sk, skb, skb->len - th->doff * 4);
> +}
> +
>  /* Updates Accurate ECN received counters from the received IP ECN field */
> -void tcp_ecn_received_counters(struct sock *sk, const struct sk_buff *skb)
> +void tcp_ecn_received_counters(struct sock *sk, const struct sk_buff *skb,
> +                              u32 payload_len)
>  {
>         u8 ecnfield = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK;
>         u8 is_ce = INET_ECN_is_ce(ecnfield);
> @@ -6034,6 +6043,9 @@ void tcp_ecn_received_counters(struct sock *sk, const struct sk_buff *skb)
>                 tp->received_ce += pcount;
>                 tp->received_ce_pending = min(tp->received_ce_pending + pcount,
>                                               0xfU);
> +
> +               if (payload_len > 0)
> +                       tp->received_ecn_bytes[ecnfield - 1] += payload_len;
>         }
>  }
>
> @@ -6307,7 +6319,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
>                                         flag |= __tcp_replace_ts_recent(tp,
>                                                                         delta);
>
> -                               tcp_ecn_received_counters(sk, skb);
> +                               tcp_ecn_received_counters(sk, skb, 0);
>
>                                 /* We know that such packets are checksummed
>                                  * on entry.
> @@ -6353,7 +6365,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
>                         /* Bulk data transfer: receiver */
>                         tcp_cleanup_skb(skb);
>                         __skb_pull(skb, tcp_header_len);
> -                       tcp_ecn_received_counters(sk, skb);
> +                       tcp_ecn_received_counters(sk, skb,
> +                                                 len - tcp_header_len);
>                         eaten = tcp_queue_rcv(sk, skb, &fragstolen);
>
>                         tcp_event_data_recv(sk, skb);
> @@ -6400,7 +6413,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
>                         tcp_accecn_third_ack(sk, skb, tp->syn_ect_snt);
>                 tcp_fast_path_on(tp);
>         }
> -       tcp_ecn_received_counters(sk, skb);
> +       tcp_ecn_received_counters_payload(sk, skb);

I missed this from a prior patch, but is it expected to account bytes
even if the packet is dropped ?

>
>         reason = tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT);
>         if ((int)reason < 0) {
> diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
> index 779a206a5ca6..f1e698c323f3 100644
> --- a/net/ipv4/tcp_minisocks.c
> +++ b/net/ipv4/tcp_minisocks.c
> @@ -500,7 +500,7 @@ static void tcp_ecn_openreq_child(struct sock *sk,
>                 tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
>                 tp->syn_ect_snt = treq->syn_ect_snt;
>                 tcp_accecn_third_ack(sk, skb, treq->syn_ect_snt);
> -               tcp_ecn_received_counters(sk, skb);
> +               tcp_ecn_received_counters_payload(sk, skb);
>         } else {
>                 tcp_ecn_mode_set(tp, inet_rsk(req)->ecn_ok ?
>                                      TCP_ECN_MODE_RFC3168 :
> --
> 2.34.1
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ