lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Thu, 26 Oct 2023 11:45:41 +0200
From: Eric Dumazet <edumazet@...gle.com>
To: Coco Li <lixiaoyan@...gle.com>
Cc: Jakub Kicinski <kuba@...nel.org>, Neal Cardwell <ncardwell@...gle.com>, 
	Mubashir Adnan Qureshi <mubashirq@...gle.com>, Paolo Abeni <pabeni@...hat.com>, Andrew Lunn <andrew@...n.ch>, 
	Jonathan Corbet <corbet@....net>, David Ahern <dsahern@...nel.org>, 
	Daniel Borkmann <daniel@...earbox.net>, netdev@...r.kernel.org, Chao Wu <wwchao@...gle.com>, 
	Wei Wang <weiwan@...gle.com>, Pradeep Nemavat <pnemavat@...gle.com>
Subject: Re: [PATCH v4 net-next 4/6] netns-ipv4: reorganize netns_ipv4 fast
 path variables

On Thu, Oct 26, 2023 at 10:20 AM Coco Li <lixiaoyan@...gle.com> wrote:
>
> Reorganize fast path variables on tx-txrx-rx order.
> Fastpath cacheline ends after sysctl_tcp_rmem.
> There are only read-only variables here. (write is on the control path
> and not considered in this case)
>
> Below data generated with pahole on x86 architecture.
> Fast path variables span cache lines before change: 4
> Fast path variables span cache lines after change: 2
>
> Signed-off-by: Coco Li <lixiaoyan@...gle.com>
> Suggested-by: Eric Dumazet <edumazet@...gle.com>
> Reviewed-by: Wei Wang <weiwan@...gle.com>
> Reviewed-by: David Ahern <dsahern@...nel.org>
> ---
>  fs/proc/proc_net.c       | 39 ++++++++++++++++++++++++++++++++++++
>  include/net/netns/ipv4.h | 43 ++++++++++++++++++++++++++--------------
>  2 files changed, 67 insertions(+), 15 deletions(-)
>
> diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
> index 2ba31b6d68c07..38846be34acd9 100644
> --- a/fs/proc/proc_net.c
> +++ b/fs/proc/proc_net.c
> @@ -344,6 +344,43 @@ const struct file_operations proc_net_operations = {
>         .iterate_shared = proc_tgid_net_readdir,
>  };
>
> +static void __init netns_ipv4_struct_check(void)
> +{
> +       /* TX readonly hotpath cache lines */
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_early_retrans);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_tso_win_divisor);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_tso_rtt_log);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_autocorking);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_min_snd_mss);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_notsent_lowat);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_limit_output_bytes);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_min_rtt_wlen);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_wmem);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_ip_fwd_use_pmtu);
> +       /* TXRX readonly hotpath cache lines */
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_moderate_rcvbuf);
> +       /* RX readonly hotpath cache line */
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_ip_early_demux);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_early_demux);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_reordering);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_rmem);
> +}
> +
>  static __net_init int proc_net_ns_init(struct net *net)
>  {
>         struct proc_dir_entry *netd, *net_statd;
> @@ -351,6 +388,8 @@ static __net_init int proc_net_ns_init(struct net *net)
>         kgid_t gid;
>         int err;
>
> +       netns_ipv4_struct_check();
> +
>         /*
>          * This PDE acts only as an anchor for /proc/${pid}/net hierarchy.
>          * Corresponding inode (PDE(inode) == net->proc_net) is never
> diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
> index 73f43f6991999..617074fccde68 100644
> --- a/include/net/netns/ipv4.h
> +++ b/include/net/netns/ipv4.h
> @@ -42,6 +42,34 @@ struct inet_timewait_death_row {
>  struct tcp_fastopen_context;
>
>  struct netns_ipv4 {
> +       /* Cacheline organization can be found documented in
> +        * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst.
> +        * Please update the document when adding new fields.
> +        */
> +
> +       __cacheline_group_begin(netns_ipv4_read);

Same remark here, please use three different groups, instead of a single one.

__cacheline_group_begin(tx_path);

> +       /* TX readonly hotpath cache lines */
> +       u8 sysctl_tcp_early_retrans;
> +       u8 sysctl_tcp_tso_win_divisor;
> +       u8 sysctl_tcp_tso_rtt_log;
> +       u8 sysctl_tcp_autocorking;
> +       int sysctl_tcp_min_snd_mss;
> +       unsigned int sysctl_tcp_notsent_lowat;
> +       int sysctl_tcp_limit_output_bytes;
> +       int sysctl_tcp_min_rtt_wlen;
> +       int sysctl_tcp_wmem[3];
> +       u8 sysctl_ip_fwd_use_pmtu;
> +

__cacheline_group_end(tx_path);
__cacheline_group_begin(rxtx_path);
> +       /* TXRX readonly hotpath cache lines */
> +       u8 sysctl_tcp_moderate_rcvbuf;
> +

__cacheline_group_end(rxtx_path);
__cacheline_group_begin(rx_path);

> +       /* RX readonly hotpath cache line */
> +       u8 sysctl_ip_early_demux;
> +       u8 sysctl_tcp_early_demux;
> +       int sysctl_tcp_reordering;
> +       int sysctl_tcp_rmem[3];
> +       __cacheline_group_end(netns_ipv4_read);

__cacheline_group_end(rx_path);


> +
>         struct inet_timewait_death_row tcp_death_row;
>         struct udp_table *udp_table;
>
> @@ -96,17 +124,14 @@ struct netns_ipv4 {
>
>         u8 sysctl_ip_default_ttl;
>         u8 sysctl_ip_no_pmtu_disc;
> -       u8 sysctl_ip_fwd_use_pmtu;
>         u8 sysctl_ip_fwd_update_priority;
>         u8 sysctl_ip_nonlocal_bind;
>         u8 sysctl_ip_autobind_reuse;
>         /* Shall we try to damage output packets if routing dev changes? */
>         u8 sysctl_ip_dynaddr;
> -       u8 sysctl_ip_early_demux;
>  #ifdef CONFIG_NET_L3_MASTER_DEV
>         u8 sysctl_raw_l3mdev_accept;
>  #endif
> -       u8 sysctl_tcp_early_demux;
>         u8 sysctl_udp_early_demux;
>
>         u8 sysctl_nexthop_compat_mode;
> @@ -119,7 +144,6 @@ struct netns_ipv4 {
>         u8 sysctl_tcp_mtu_probing;
>         int sysctl_tcp_mtu_probe_floor;
>         int sysctl_tcp_base_mss;
> -       int sysctl_tcp_min_snd_mss;
>         int sysctl_tcp_probe_threshold;
>         u32 sysctl_tcp_probe_interval;
>
> @@ -135,17 +159,14 @@ struct netns_ipv4 {
>         u8 sysctl_tcp_backlog_ack_defer;
>         u8 sysctl_tcp_pingpong_thresh;
>
> -       int sysctl_tcp_reordering;
>         u8 sysctl_tcp_retries1;
>         u8 sysctl_tcp_retries2;
>         u8 sysctl_tcp_orphan_retries;
>         u8 sysctl_tcp_tw_reuse;
>         int sysctl_tcp_fin_timeout;
> -       unsigned int sysctl_tcp_notsent_lowat;
>         u8 sysctl_tcp_sack;
>         u8 sysctl_tcp_window_scaling;
>         u8 sysctl_tcp_timestamps;
> -       u8 sysctl_tcp_early_retrans;
>         u8 sysctl_tcp_recovery;
>         u8 sysctl_tcp_thin_linear_timeouts;
>         u8 sysctl_tcp_slow_start_after_idle;
> @@ -161,21 +182,13 @@ struct netns_ipv4 {
>         u8 sysctl_tcp_frto;
>         u8 sysctl_tcp_nometrics_save;
>         u8 sysctl_tcp_no_ssthresh_metrics_save;
> -       u8 sysctl_tcp_moderate_rcvbuf;
> -       u8 sysctl_tcp_tso_win_divisor;
>         u8 sysctl_tcp_workaround_signed_windows;
> -       int sysctl_tcp_limit_output_bytes;
>         int sysctl_tcp_challenge_ack_limit;
> -       int sysctl_tcp_min_rtt_wlen;
>         u8 sysctl_tcp_min_tso_segs;
> -       u8 sysctl_tcp_tso_rtt_log;
> -       u8 sysctl_tcp_autocorking;
>         u8 sysctl_tcp_reflect_tos;
>         int sysctl_tcp_invalid_ratelimit;
>         int sysctl_tcp_pacing_ss_ratio;
>         int sysctl_tcp_pacing_ca_ratio;
> -       int sysctl_tcp_wmem[3];
> -       int sysctl_tcp_rmem[3];
>         unsigned int sysctl_tcp_child_ehash_entries;
>         unsigned long sysctl_tcp_comp_sack_delay_ns;
>         unsigned long sysctl_tcp_comp_sack_slack_ns;
> --
> 2.42.0.758.gaed0368e0e-goog
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ