[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANn89iJaavn3aArjc1LDXDs1wfTZV=hUVnreQu=3Dnde=BOEMQ@mail.gmail.com>
Date: Thu, 26 Oct 2023 11:45:41 +0200
From: Eric Dumazet <edumazet@...gle.com>
To: Coco Li <lixiaoyan@...gle.com>
Cc: Jakub Kicinski <kuba@...nel.org>, Neal Cardwell <ncardwell@...gle.com>,
Mubashir Adnan Qureshi <mubashirq@...gle.com>, Paolo Abeni <pabeni@...hat.com>, Andrew Lunn <andrew@...n.ch>,
Jonathan Corbet <corbet@....net>, David Ahern <dsahern@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>, netdev@...r.kernel.org, Chao Wu <wwchao@...gle.com>,
Wei Wang <weiwan@...gle.com>, Pradeep Nemavat <pnemavat@...gle.com>
Subject: Re: [PATCH v4 net-next 4/6] netns-ipv4: reorganize netns_ipv4 fast
path variables
On Thu, Oct 26, 2023 at 10:20 AM Coco Li <lixiaoyan@...gle.com> wrote:
>
> Reorganize fast path variables on tx-txrx-rx order.
> Fastpath cacheline ends after sysctl_tcp_rmem.
> There are only read-only variables here. (write is on the control path
> and not considered in this case)
>
> Below data generated with pahole on x86 architecture.
> Fast path variables span cache lines before change: 4
> Fast path variables span cache lines after change: 2
>
> Signed-off-by: Coco Li <lixiaoyan@...gle.com>
> Suggested-by: Eric Dumazet <edumazet@...gle.com>
> Reviewed-by: Wei Wang <weiwan@...gle.com>
> Reviewed-by: David Ahern <dsahern@...nel.org>
> ---
> fs/proc/proc_net.c | 39 ++++++++++++++++++++++++++++++++++++
> include/net/netns/ipv4.h | 43 ++++++++++++++++++++++++++--------------
> 2 files changed, 67 insertions(+), 15 deletions(-)
>
> diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
> index 2ba31b6d68c07..38846be34acd9 100644
> --- a/fs/proc/proc_net.c
> +++ b/fs/proc/proc_net.c
> @@ -344,6 +344,43 @@ const struct file_operations proc_net_operations = {
> .iterate_shared = proc_tgid_net_readdir,
> };
>
> +static void __init netns_ipv4_struct_check(void)
> +{
> + /* TX readonly hotpath cache lines */
> + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> + sysctl_tcp_early_retrans);
> + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> + sysctl_tcp_tso_win_divisor);
> + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> + sysctl_tcp_tso_rtt_log);
> + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> + sysctl_tcp_autocorking);
> + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> + sysctl_tcp_min_snd_mss);
> + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> + sysctl_tcp_notsent_lowat);
> + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> + sysctl_tcp_limit_output_bytes);
> + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> + sysctl_tcp_min_rtt_wlen);
> + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> + sysctl_tcp_wmem);
> + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> + sysctl_ip_fwd_use_pmtu);
> + /* TXRX readonly hotpath cache lines */
> + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> + sysctl_tcp_moderate_rcvbuf);
> + /* RX readonly hotpath cache line */
> + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> + sysctl_ip_early_demux);
> + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> + sysctl_tcp_early_demux);
> + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> + sysctl_tcp_reordering);
> + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> + sysctl_tcp_rmem);
> +}
> +
> static __net_init int proc_net_ns_init(struct net *net)
> {
> struct proc_dir_entry *netd, *net_statd;
> @@ -351,6 +388,8 @@ static __net_init int proc_net_ns_init(struct net *net)
> kgid_t gid;
> int err;
>
> + netns_ipv4_struct_check();
> +
> /*
> * This PDE acts only as an anchor for /proc/${pid}/net hierarchy.
> * Corresponding inode (PDE(inode) == net->proc_net) is never
> diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
> index 73f43f6991999..617074fccde68 100644
> --- a/include/net/netns/ipv4.h
> +++ b/include/net/netns/ipv4.h
> @@ -42,6 +42,34 @@ struct inet_timewait_death_row {
> struct tcp_fastopen_context;
>
> struct netns_ipv4 {
> + /* Cacheline organization can be found documented in
> + * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst.
> + * Please update the document when adding new fields.
> + */
> +
> + __cacheline_group_begin(netns_ipv4_read);
Same remark here, please use three different groups, instead of a single one.
__cacheline_group_begin(tx_path);
> + /* TX readonly hotpath cache lines */
> + u8 sysctl_tcp_early_retrans;
> + u8 sysctl_tcp_tso_win_divisor;
> + u8 sysctl_tcp_tso_rtt_log;
> + u8 sysctl_tcp_autocorking;
> + int sysctl_tcp_min_snd_mss;
> + unsigned int sysctl_tcp_notsent_lowat;
> + int sysctl_tcp_limit_output_bytes;
> + int sysctl_tcp_min_rtt_wlen;
> + int sysctl_tcp_wmem[3];
> + u8 sysctl_ip_fwd_use_pmtu;
> +
__cacheline_group_end(tx_path);
__cacheline_group_begin(rxtx_path);
> + /* TXRX readonly hotpath cache lines */
> + u8 sysctl_tcp_moderate_rcvbuf;
> +
__cacheline_group_end(rxtx_path);
__cacheline_group_begin(rx_path);
> + /* RX readonly hotpath cache line */
> + u8 sysctl_ip_early_demux;
> + u8 sysctl_tcp_early_demux;
> + int sysctl_tcp_reordering;
> + int sysctl_tcp_rmem[3];
> + __cacheline_group_end(netns_ipv4_read);
__cacheline_group_end(rx_path);
> +
> struct inet_timewait_death_row tcp_death_row;
> struct udp_table *udp_table;
>
> @@ -96,17 +124,14 @@ struct netns_ipv4 {
>
> u8 sysctl_ip_default_ttl;
> u8 sysctl_ip_no_pmtu_disc;
> - u8 sysctl_ip_fwd_use_pmtu;
> u8 sysctl_ip_fwd_update_priority;
> u8 sysctl_ip_nonlocal_bind;
> u8 sysctl_ip_autobind_reuse;
> /* Shall we try to damage output packets if routing dev changes? */
> u8 sysctl_ip_dynaddr;
> - u8 sysctl_ip_early_demux;
> #ifdef CONFIG_NET_L3_MASTER_DEV
> u8 sysctl_raw_l3mdev_accept;
> #endif
> - u8 sysctl_tcp_early_demux;
> u8 sysctl_udp_early_demux;
>
> u8 sysctl_nexthop_compat_mode;
> @@ -119,7 +144,6 @@ struct netns_ipv4 {
> u8 sysctl_tcp_mtu_probing;
> int sysctl_tcp_mtu_probe_floor;
> int sysctl_tcp_base_mss;
> - int sysctl_tcp_min_snd_mss;
> int sysctl_tcp_probe_threshold;
> u32 sysctl_tcp_probe_interval;
>
> @@ -135,17 +159,14 @@ struct netns_ipv4 {
> u8 sysctl_tcp_backlog_ack_defer;
> u8 sysctl_tcp_pingpong_thresh;
>
> - int sysctl_tcp_reordering;
> u8 sysctl_tcp_retries1;
> u8 sysctl_tcp_retries2;
> u8 sysctl_tcp_orphan_retries;
> u8 sysctl_tcp_tw_reuse;
> int sysctl_tcp_fin_timeout;
> - unsigned int sysctl_tcp_notsent_lowat;
> u8 sysctl_tcp_sack;
> u8 sysctl_tcp_window_scaling;
> u8 sysctl_tcp_timestamps;
> - u8 sysctl_tcp_early_retrans;
> u8 sysctl_tcp_recovery;
> u8 sysctl_tcp_thin_linear_timeouts;
> u8 sysctl_tcp_slow_start_after_idle;
> @@ -161,21 +182,13 @@ struct netns_ipv4 {
> u8 sysctl_tcp_frto;
> u8 sysctl_tcp_nometrics_save;
> u8 sysctl_tcp_no_ssthresh_metrics_save;
> - u8 sysctl_tcp_moderate_rcvbuf;
> - u8 sysctl_tcp_tso_win_divisor;
> u8 sysctl_tcp_workaround_signed_windows;
> - int sysctl_tcp_limit_output_bytes;
> int sysctl_tcp_challenge_ack_limit;
> - int sysctl_tcp_min_rtt_wlen;
> u8 sysctl_tcp_min_tso_segs;
> - u8 sysctl_tcp_tso_rtt_log;
> - u8 sysctl_tcp_autocorking;
> u8 sysctl_tcp_reflect_tos;
> int sysctl_tcp_invalid_ratelimit;
> int sysctl_tcp_pacing_ss_ratio;
> int sysctl_tcp_pacing_ca_ratio;
> - int sysctl_tcp_wmem[3];
> - int sysctl_tcp_rmem[3];
> unsigned int sysctl_tcp_child_ehash_entries;
> unsigned long sysctl_tcp_comp_sack_delay_ns;
> unsigned long sysctl_tcp_comp_sack_slack_ns;
> --
> 2.42.0.758.gaed0368e0e-goog
>
Powered by blists - more mailing lists