[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20250613123629.-XSoQTCu@linutronix.de>
Date: Fri, 13 Jun 2025 14:36:29 +0200
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To: netdev@...r.kernel.org, dev@...nvswitch.org
Cc: "David S. Miller" <davem@...emloft.net>,
Aaron Conole <aconole@...hat.com>,
Eelco Chaudron <echaudro@...hat.com>,
Eric Dumazet <edumazet@...gle.com>, Gal Pressman <gal@...dia.com>,
Ilya Maximets <i.maximets@....org>,
Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>,
Simon Horman <horms@...nel.org>
Subject: [PATCH net] openvswitch: Allocate struct ovs_pcpu_storage dynamically
PERCPU_MODULE_RESERVE defines the maximum size that can by used for the
per-CPU data size used by modules. This is 8KiB.
Commit 035fcdc4d240c ("openvswitch: Merge three per-CPU structures into
one") restructured the per-CPU memory allocation for the module and
moved the separate alloc_percpu() invocations at module init time to a
static per-CPU variable which is allocated by the module loader.
The size of the per-CPU data section for openvswitch is 6488 bytes which
is ~80% of the available per-CPU memory. Together with a few other
modules it is easy to exhaust the available 8KiB of memory.
Allocate ovs_pcpu_storage dynamically at module init time.
Reported-by: Gal Pressman <gal@...dia.com>
Closes: https://lore.kernel.org/all/c401e017-f8db-4f57-a1cd-89beb979a277@nvidia.com
Fixes: 035fcdc4d240c ("openvswitch: Merge three per-CPU structures into one")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
---
Gal, would you please be so kind and check if this works for you?
net/openvswitch/actions.c | 23 +++++++++------------
net/openvswitch/datapath.c | 42 +++++++++++++++++++++++++++++++-------
net/openvswitch/datapath.h | 3 ++-
3 files changed, 47 insertions(+), 21 deletions(-)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index e7269a3eec79e..3add108340bfd 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -39,16 +39,14 @@
#include "flow_netlink.h"
#include "openvswitch_trace.h"
-DEFINE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage) = {
- .bh_lock = INIT_LOCAL_LOCK(bh_lock),
-};
+struct ovs_pcpu_storage __percpu *ovs_pcpu_storage;
/* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys'
* space. Return NULL if out of key spaces.
*/
static struct sw_flow_key *clone_key(const struct sw_flow_key *key_)
{
- struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage);
+ struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
struct action_flow_keys *keys = &ovs_pcpu->flow_keys;
int level = ovs_pcpu->exec_level;
struct sw_flow_key *key = NULL;
@@ -94,7 +92,7 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
const struct nlattr *actions,
const int actions_len)
{
- struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos);
+ struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos);
struct deferred_action *da;
da = action_fifo_put(fifo);
@@ -755,7 +753,7 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
static int ovs_vport_output(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
- struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage.frag_data);
+ struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage->frag_data);
struct vport *vport = data->vport;
if (skb_cow_head(skb, data->l2_len) < 0) {
@@ -807,7 +805,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb,
unsigned int hlen = skb_network_offset(skb);
struct ovs_frag_data *data;
- data = this_cpu_ptr(&ovs_pcpu_storage.frag_data);
+ data = this_cpu_ptr(&ovs_pcpu_storage->frag_data);
data->dst = skb->_skb_refdst;
data->vport = vport;
data->cb = *OVS_CB(skb);
@@ -1566,16 +1564,15 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
clone = clone_flow_key ? clone_key(key) : key;
if (clone) {
int err = 0;
-
if (actions) { /* Sample action */
if (clone_flow_key)
- __this_cpu_inc(ovs_pcpu_storage.exec_level);
+ __this_cpu_inc(ovs_pcpu_storage->exec_level);
err = do_execute_actions(dp, skb, clone,
actions, len);
if (clone_flow_key)
- __this_cpu_dec(ovs_pcpu_storage.exec_level);
+ __this_cpu_dec(ovs_pcpu_storage->exec_level);
} else { /* Recirc action */
clone->recirc_id = recirc_id;
ovs_dp_process_packet(skb, clone);
@@ -1611,7 +1608,7 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
static void process_deferred_actions(struct datapath *dp)
{
- struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos);
+ struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos);
/* Do not touch the FIFO in case there is no deferred actions. */
if (action_fifo_is_empty(fifo))
@@ -1642,7 +1639,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
{
int err, level;
- level = __this_cpu_inc_return(ovs_pcpu_storage.exec_level);
+ level = __this_cpu_inc_return(ovs_pcpu_storage->exec_level);
if (unlikely(level > OVS_RECURSION_LIMIT)) {
net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
ovs_dp_name(dp));
@@ -1659,6 +1656,6 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
process_deferred_actions(dp);
out:
- __this_cpu_dec(ovs_pcpu_storage.exec_level);
+ __this_cpu_dec(ovs_pcpu_storage->exec_level);
return err;
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 6a304ae2d959c..b990dc83504f4 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -244,7 +244,7 @@ void ovs_dp_detach_port(struct vport *p)
/* Must be called with rcu_read_lock. */
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
{
- struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage);
+ struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
const struct vport *p = OVS_CB(skb)->input_vport;
struct datapath *dp = p->dp;
struct sw_flow *flow;
@@ -299,7 +299,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
* avoided.
*/
if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) {
- local_lock_nested_bh(&ovs_pcpu_storage.bh_lock);
+ local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
ovs_pcpu->owner = current;
ovs_pcpu_locked = true;
}
@@ -310,7 +310,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
ovs_dp_name(dp), error);
if (ovs_pcpu_locked) {
ovs_pcpu->owner = NULL;
- local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock);
+ local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
}
stats_counter = &stats->n_hit;
@@ -689,13 +689,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
sf_acts = rcu_dereference(flow->sf_acts);
local_bh_disable();
- local_lock_nested_bh(&ovs_pcpu_storage.bh_lock);
+ local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
if (IS_ENABLED(CONFIG_PREEMPT_RT))
- this_cpu_write(ovs_pcpu_storage.owner, current);
+ this_cpu_write(ovs_pcpu_storage->owner, current);
err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
if (IS_ENABLED(CONFIG_PREEMPT_RT))
- this_cpu_write(ovs_pcpu_storage.owner, NULL);
- local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock);
+ this_cpu_write(ovs_pcpu_storage->owner, NULL);
+ local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
local_bh_enable();
rcu_read_unlock();
@@ -2744,6 +2744,28 @@ static struct drop_reason_list drop_reason_list_ovs = {
.n_reasons = ARRAY_SIZE(ovs_drop_reasons),
};
+static int __init ovs_alloc_percpu_storage(void)
+{
+ unsigned int cpu;
+
+ ovs_pcpu_storage = alloc_percpu(*ovs_pcpu_storage);
+ if (!ovs_pcpu_storage)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ struct ovs_pcpu_storage *ovs_pcpu;
+
+ ovs_pcpu = per_cpu_ptr(ovs_pcpu_storage, cpu);
+ local_lock_init(&ovs_pcpu->bh_lock);
+ }
+ return 0;
+}
+
+static void ovs_free_percpu_storage(void)
+{
+ free_percpu(ovs_pcpu_storage);
+}
+
static int __init dp_init(void)
{
int err;
@@ -2753,6 +2775,10 @@ static int __init dp_init(void)
pr_info("Open vSwitch switching datapath\n");
+ err = ovs_alloc_percpu_storage();
+ if (err)
+ goto error;
+
err = ovs_internal_dev_rtnl_link_register();
if (err)
goto error;
@@ -2799,6 +2825,7 @@ static int __init dp_init(void)
error_unreg_rtnl_link:
ovs_internal_dev_rtnl_link_unregister();
error:
+ ovs_free_percpu_storage();
return err;
}
@@ -2813,6 +2840,7 @@ static void dp_cleanup(void)
ovs_vport_exit();
ovs_flow_exit();
ovs_internal_dev_rtnl_link_unregister();
+ ovs_free_percpu_storage();
}
module_init(dp_init);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 1b5348b0f5594..cfeb817a18894 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -220,7 +220,8 @@ struct ovs_pcpu_storage {
struct task_struct *owner;
local_lock_t bh_lock;
};
-DECLARE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage);
+
+extern struct ovs_pcpu_storage __percpu *ovs_pcpu_storage;
/**
* enum ovs_pkt_hash_types - hash info to include with a packet
--
2.49.0
Powered by blists - more mailing lists