lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <Y9pWcFdwaa4l6bPP@nanopsycho>
Date:   Wed, 1 Feb 2023 13:09:20 +0100
From:   Jiri Pirko <jiri@...nulli.us>
To:     taoyuan_eddy@...mail.com
Cc:     dev@...nvswitch.org, Pravin B Shelar <pshelar@....org>,
        "David S. Miller" <davem@...emloft.net>,
        Eric Dumazet <edumazet@...gle.com>,
        Jakub Kicinski <kuba@...nel.org>,
        Paolo Abeni <pabeni@...hat.com>, netdev@...r.kernel.org,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH] net: openvswitch: reduce cpu_used_mask memory consumption

Tue, Jan 31, 2023 at 02:58:22PM CET, taoyuan_eddy@...mail.com wrote:
>From: eddytaoyuan <taoyuan_eddy@...mail.com>
>
>struct cpumask cpu_used_mask is directly embedded in struct sw_flow
>however, its size is hardcoded to CONFIG_NR_CPUS bits, which
>can be as large as 8192 by default, it cost memory and slows down
>ovs_flow_alloc, this fix used actual CPU number instead
>
>Signed-off-by: eddytaoyuan <taoyuan_eddy@...mail.com>

Hmm, I guess that the name should be rather "Dddy Taoyuan" ? Please fix,
also in your "From:" header and preferably email client.


>---
> net/openvswitch/flow.c       |  6 +++---
> net/openvswitch/flow.h       |  2 +-
> net/openvswitch/flow_table.c | 25 ++++++++++++++++++++++---
> 3 files changed, 26 insertions(+), 7 deletions(-)
>
>diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
>index e20d1a973417..06345cd8c777 100644
>--- a/net/openvswitch/flow.c
>+++ b/net/openvswitch/flow.c
>@@ -107,7 +107,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
> 
> 					rcu_assign_pointer(flow->stats[cpu],
> 							   new_stats);
>-					cpumask_set_cpu(cpu, &flow->cpu_used_mask);
>+					cpumask_set_cpu(cpu, flow->cpu_used_mask);
> 					goto unlock;
> 				}
> 			}
>@@ -135,7 +135,7 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
> 	memset(ovs_stats, 0, sizeof(*ovs_stats));
> 
> 	/* We open code this to make sure cpu 0 is always considered */
>-	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
>+	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, flow->cpu_used_mask)) {
> 		struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
> 
> 		if (stats) {
>@@ -159,7 +159,7 @@ void ovs_flow_stats_clear(struct sw_flow *flow)
> 	int cpu;
> 
> 	/* We open code this to make sure cpu 0 is always considered */
>-	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
>+	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, flow->cpu_used_mask)) {
> 		struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
> 
> 		if (stats) {
>diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
>index 073ab73ffeaa..b5711aff6e76 100644
>--- a/net/openvswitch/flow.h
>+++ b/net/openvswitch/flow.h
>@@ -229,7 +229,7 @@ struct sw_flow {
> 					 */
> 	struct sw_flow_key key;
> 	struct sw_flow_id id;
>-	struct cpumask cpu_used_mask;
>+	struct cpumask *cpu_used_mask;
> 	struct sw_flow_mask *mask;
> 	struct sw_flow_actions __rcu *sf_acts;
> 	struct sw_flow_stats __rcu *stats[]; /* One for each CPU.  First one
>diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
>index 0a0e4c283f02..c0fdff73272f 100644
>--- a/net/openvswitch/flow_table.c
>+++ b/net/openvswitch/flow_table.c
>@@ -87,11 +87,12 @@ struct sw_flow *ovs_flow_alloc(void)
> 	if (!stats)
> 		goto err;
> 
>+	flow->cpu_used_mask = (struct cpumask *)&(flow->stats[nr_cpu_ids]);
> 	spin_lock_init(&stats->lock);
> 
> 	RCU_INIT_POINTER(flow->stats[0], stats);
> 
>-	cpumask_set_cpu(0, &flow->cpu_used_mask);
>+	cpumask_set_cpu(0, flow->cpu_used_mask);
> 
> 	return flow;
> err:
>@@ -115,7 +116,7 @@ static void flow_free(struct sw_flow *flow)
> 					  flow->sf_acts);
> 	/* We open code this to make sure cpu 0 is always considered */
> 	for (cpu = 0; cpu < nr_cpu_ids;
>-	     cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
>+	     cpu = cpumask_next(cpu, flow->cpu_used_mask)) {
> 		if (flow->stats[cpu])
> 			kmem_cache_free(flow_stats_cache,
> 					(struct sw_flow_stats __force *)flow->stats[cpu]);
>@@ -1194,9 +1195,27 @@ int ovs_flow_init(void)
> 	BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
> 	BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
> 
>+        /*
>+         * Simply including 'struct cpumask' in 'struct sw_flow'
>+         * consumes memory unnecessarily large.
>+         * The reason is that compilation option CONFIG_NR_CPUS(default 8192)
>+         * decides the value of NR_CPUS, which in turn decides size of
>+         * 'struct cpumask', which means 1024 bytes are needed for the cpumask
>+         * It affects ovs_flow_alloc performance as well as memory footprint.
>+         * We should use actual CPU count instead of hardcoded value.
>+         *
>+         * To address this, 'cpu_used_mask' is redefined to pointer
>+         * and append a cpumask_size() after 'stat' to hold the actual memory
>+         * of struct cpumask
>+         *
>+         * cpumask APIs like cpumask_next and cpumask_set_cpu have been defined
>+         * to never access bits beyond cpu count by design, thus above change is
>+         * safe even though the actual memory provided is smaller than
>+         * sizeof(struct cpumask)

I don't understand the reason to have this comment in the code. From
what I see, this should be moved to the patch description. Of course
with changing the mood to imperation when you say the codebase what to
do.




>+         */
> 	flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
> 				       + (nr_cpu_ids
>-					  * sizeof(struct sw_flow_stats *)),
>+					  * sizeof(struct sw_flow_stats *)) + cpumask_size(),
> 				       0, 0, NULL);
> 	if (flow_cache == NULL)
> 		return -ENOMEM;
>-- 
>2.27.0
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ