[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250424080755.272925-5-harry.yoo@oracle.com>
Date: Thu, 24 Apr 2025 17:07:52 +0900
From: Harry Yoo <harry.yoo@...cle.com>
To: Vlastimil Babka <vbabka@...e.cz>, Christoph Lameter <cl@...two.org>,
David Rientjes <rientjes@...gle.com>,
Andrew Morton <akpm@...ux-foundation.org>,
Dennis Zhou <dennis@...nel.org>, Tejun Heo <tj@...nel.org>,
Mateusz Guzik <mjguzik@...il.com>
Cc: Jamal Hadi Salim <jhs@...atatu.com>, Cong Wang <xiyou.wangcong@...il.com>,
Jiri Pirko <jiri@...nulli.us>, Vlad Buslov <vladbu@...dia.com>,
Yevgeny Kliteynik <kliteyn@...dia.com>, Jan Kara <jack@...e.cz>,
Byungchul Park <byungchul@...com>, linux-mm@...ck.org,
netdev@...r.kernel.org, linux-kernel@...r.kernel.org,
Harry Yoo <harry.yoo@...cle.com>
Subject: [RFC PATCH 4/7] net/sched/act_api: use slab ctor/dtor to reduce contention on pcpu alloc
A tc_action object allocates three percpu memory regions and stores their
pointers within the object. For each object's lifetime, this requires
acquiring and releasing the globak lock, pcpu_alloc_mutex three times.
In workloads that frequently create and destroy TC filters, this leads
to severe lock contention due to the globl lock.
By using the slab constructor/destructor pair, the contention on
pcpu_alloc_mutex is shifted to the creation and destruction of slabs
(which contains multiple objects), which occur far less frequently than
allocating and freeing individual tc_action objects.
When tested with the following command, a 26% reduction in system time
was observed.
$ cd tools/testing/selftests/tc-testing
$ sudo python3 tdc.py -f ./tc-tests/filters/flower.json -d <NIC name>
Lock contention as measured with `perf lock record/report`:
Before:
Name acquired contended avg wait total wait max wait min wait
15042346 15042346 3.82 us 57.40 s 4.00 ms 316 ns
pcpu_alloc_mutex 10959650 10959650 7.10 us 1.30 m 3.76 ms 313 ns
After:
Name acquired contended avg wait total wait max wait min wait
15488031 15488031 5.16 us 1.33 m 5124095.50 h 316 ns
pcpu_alloc_mutex 7695276 7695276 3.39 us 26.07 s 4.03 ms 284 ns
The contention has moved from pcpu_alloc_mutex to other locks (which are not
symbolized and appear as blank in the output above).
Signed-off-by: Harry Yoo <harry.yoo@...cle.com>
---
net/sched/act_api.c | 82 +++++++++++++++++++++++++++++++--------------
1 file changed, 56 insertions(+), 26 deletions(-)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 839790043256..60cde766135a 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -112,6 +112,8 @@ struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action,
}
EXPORT_SYMBOL(tcf_action_set_ctrlact);
+static struct kmem_cache *tc_action_cache;
+
/* XXX: For standalone actions, we don't need a RCU grace period either, because
* actions are always connected to filters and filters are already destroyed in
* RCU callbacks, so after a RCU grace period actions are already disconnected
@@ -121,15 +123,15 @@ static void free_tcf(struct tc_action *p)
{
struct tcf_chain *chain = rcu_dereference_protected(p->goto_chain, 1);
- free_percpu(p->cpu_bstats);
- free_percpu(p->cpu_bstats_hw);
- free_percpu(p->cpu_qstats);
tcf_set_action_cookie(&p->user_cookie, NULL);
if (chain)
tcf_chain_put_by_act(chain);
- kfree(p);
+ if (p->cpu_bstats)
+ kmem_cache_free(tc_action_cache, p);
+ else
+ kfree(p);
}
static void offload_action_hw_count_set(struct tc_action *act,
@@ -778,27 +780,20 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
struct tc_action **a, const struct tc_action_ops *ops,
int bind, bool cpustats, u32 flags)
{
- struct tc_action *p = kzalloc(ops->size, GFP_KERNEL);
+ struct tc_action *p;
struct tcf_idrinfo *idrinfo = tn->idrinfo;
int err = -ENOMEM;
+ if (cpustats)
+ p = kmem_cache_alloc(tc_action_cache, GFP_KERNEL);
+ else
+ p = kzalloc(ops->size, GFP_KERNEL);
+
if (unlikely(!p))
return -ENOMEM;
refcount_set(&p->tcfa_refcnt, 1);
if (bind)
atomic_set(&p->tcfa_bindcnt, 1);
-
- if (cpustats) {
- p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
- if (!p->cpu_bstats)
- goto err1;
- p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
- if (!p->cpu_bstats_hw)
- goto err2;
- p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
- if (!p->cpu_qstats)
- goto err3;
- }
gnet_stats_basic_sync_init(&p->tcfa_bstats);
gnet_stats_basic_sync_init(&p->tcfa_bstats_hw);
spin_lock_init(&p->tcfa_lock);
@@ -812,7 +807,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
&p->tcfa_rate_est,
&p->tcfa_lock, false, est);
if (err)
- goto err4;
+ goto err;
}
p->idrinfo = idrinfo;
@@ -820,14 +815,11 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
p->ops = ops;
*a = p;
return 0;
-err4:
- free_percpu(p->cpu_qstats);
-err3:
- free_percpu(p->cpu_bstats_hw);
-err2:
- free_percpu(p->cpu_bstats);
-err1:
- kfree(p);
+err:
+ if (cpustats)
+ kmem_cache_free(tc_action_cache, p);
+ else
+ kfree(p);
return err;
}
EXPORT_SYMBOL(tcf_idr_create);
@@ -2270,8 +2262,46 @@ static const struct rtnl_msg_handler tc_action_rtnl_msg_handlers[] __initconst =
.dumpit = tc_dump_action},
};
+static int tcf_action_ctor(void *object) {
+ struct tc_action *p = object;
+
+ p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
+ if (!p->cpu_bstats)
+ goto err1;
+ p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
+ if (!p->cpu_bstats_hw)
+ goto err2;
+ p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
+ if (!p->cpu_qstats)
+ goto err3;
+ return 0;
+
+err3:
+ free_percpu(p->cpu_bstats_hw);
+err2:
+ free_percpu(p->cpu_bstats);
+err1:
+ return -ENOMEM;
+}
+
+static void tcf_action_dtor(void *object) {
+ struct tc_action *p = object;
+
+ free_percpu(p->cpu_bstats);
+ free_percpu(p->cpu_bstats_hw);
+ free_percpu(p->cpu_qstats);
+}
+
static int __init tc_action_init(void)
{
+ struct kmem_cache_args kmem_args = {
+ .ctor = tcf_action_ctor,
+ .dtor = tcf_action_dtor,
+ };
+
+ tc_action_cache = kmem_cache_create("tc_action",
+ sizeof(struct tc_action),
+ &kmem_args, SLAB_PANIC);
rtnl_register_many(tc_action_rtnl_msg_handlers);
return 0;
}
--
2.43.0
Powered by blists - more mailing lists