[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250722124317.2698497-3-bertrand.wlodarczyk@intel.com>
Date: Tue, 22 Jul 2025 14:43:19 +0200
From: Bertrand Wlodarczyk <bertrand.wlodarczyk@...el.com>
To: tj@...nel.org,
hannes@...xchg.org,
mkoutny@...e.com,
cgroups@...r.kernel.org,
linux-kernel@...r.kernel.org
Cc: shakeel.butt@...ux.dev,
inwardvessel@...il.com,
Bertrand Wlodarczyk <bertrand.wlodarczyk@...el.com>
Subject: [PATCH] cgroup/rstat: move css_rstat_lock outside cpu loop
By moving the lock outside the CPU loop, we reduce the frequency
of costly lock acquisition.
This adjustment slightly improves performance in scenarios where
multiple CPUs concurrently attempt to acquire the lock for
the same css.
The cpu argument passed to __css_rstat_lock, which was utilized
by the event system, has been removed because it is no longer in use.
---
Results:
QEMU vm
+-------+---------+
| main | patched |
+-------+---------+
| 9.17s | 2.36s |
+-------+---------+
ext4 raw image with debian:
qemu-system-x86_64 -enable-kvm -cpu host -smp 102 -m 16G -kernel linux-cgroup/arch/x86/boot/bzImage -drive file=rootfs.ext4,if=virtio,format=raw -append "rootwait root=/dev/vda console=tty1 console=ttyS0 nokaslr cgroup_enable=memory cgroup_memory=1" -net nic,model=virtio -net user -nographic
Benchmark code: https://gist.github.com/bwlodarcz/c955b36b5667f0167dffcff23953d1da
musl-gcc -o benchmark -static -g3 -DNUM_THREADS=10 -DNUM_ITER=10000 -O2 -Wall benchmark.c -pthread
---
include/trace/events/cgroup.h | 22 ++++++++++------------
kernel/cgroup/rstat.c | 20 +++++++++-----------
2 files changed, 19 insertions(+), 23 deletions(-)
diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h
index ba9229af9a34..eb674eef8b99 100644
--- a/include/trace/events/cgroup.h
+++ b/include/trace/events/cgroup.h
@@ -206,15 +206,14 @@ DEFINE_EVENT(cgroup_event, cgroup_notify_frozen,
DECLARE_EVENT_CLASS(cgroup_rstat,
- TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
+ TP_PROTO(struct cgroup *cgrp, bool contended),
- TP_ARGS(cgrp, cpu, contended),
+ TP_ARGS(cgrp, contended),
TP_STRUCT__entry(
__field( int, root )
__field( int, level )
__field( u64, id )
- __field( int, cpu )
__field( bool, contended )
),
@@ -222,13 +221,12 @@ DECLARE_EVENT_CLASS(cgroup_rstat,
__entry->root = cgrp->root->hierarchy_id;
__entry->id = cgroup_id(cgrp);
__entry->level = cgrp->level;
- __entry->cpu = cpu;
__entry->contended = contended;
),
- TP_printk("root=%d id=%llu level=%d cpu=%d lock contended:%d",
+ TP_printk("root=%d id=%llu level=%d lock contended:%d",
__entry->root, __entry->id, __entry->level,
- __entry->cpu, __entry->contended)
+ __entry->contended)
);
/*
@@ -238,23 +236,23 @@ DECLARE_EVENT_CLASS(cgroup_rstat,
*/
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_lock_contended,
- TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
+ TP_PROTO(struct cgroup *cgrp, bool contended),
- TP_ARGS(cgrp, cpu, contended)
+ TP_ARGS(cgrp, contended)
);
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_locked,
- TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
+ TP_PROTO(struct cgroup *cgrp, bool contended),
- TP_ARGS(cgrp, cpu, contended)
+ TP_ARGS(cgrp, contended)
);
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_unlock,
- TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
+ TP_PROTO(struct cgroup *cgrp, bool contended),
- TP_ARGS(cgrp, cpu, contended)
+ TP_ARGS(cgrp, contended)
);
#endif /* _TRACE_CGROUP_H */
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index c8a48cf83878..dd312fe1896d 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -326,8 +326,7 @@ __bpf_hook_end();
* value -1 is used when obtaining the main lock else this is the CPU
* number processed last.
*/
-static inline void __css_rstat_lock(struct cgroup_subsys_state *css,
- int cpu_in_loop)
+static inline void __css_rstat_lock(struct cgroup_subsys_state *css)
__acquires(ss_rstat_lock(css->ss))
{
struct cgroup *cgrp = css->cgroup;
@@ -337,21 +336,20 @@ static inline void __css_rstat_lock(struct cgroup_subsys_state *css,
lock = ss_rstat_lock(css->ss);
contended = !spin_trylock_irq(lock);
if (contended) {
- trace_cgroup_rstat_lock_contended(cgrp, cpu_in_loop, contended);
+ trace_cgroup_rstat_lock_contended(cgrp, contended);
spin_lock_irq(lock);
}
- trace_cgroup_rstat_locked(cgrp, cpu_in_loop, contended);
+ trace_cgroup_rstat_locked(cgrp, contended);
}
-static inline void __css_rstat_unlock(struct cgroup_subsys_state *css,
- int cpu_in_loop)
+static inline void __css_rstat_unlock(struct cgroup_subsys_state *css)
__releases(ss_rstat_lock(css->ss))
{
struct cgroup *cgrp = css->cgroup;
spinlock_t *lock;
lock = ss_rstat_lock(css->ss);
- trace_cgroup_rstat_unlock(cgrp, cpu_in_loop, false);
+ trace_cgroup_rstat_unlock(cgrp, false);
spin_unlock_irq(lock);
}
@@ -381,11 +379,11 @@ __bpf_kfunc void css_rstat_flush(struct cgroup_subsys_state *css)
return;
might_sleep();
+ __css_rstat_lock(css);
for_each_possible_cpu(cpu) {
struct cgroup_subsys_state *pos;
/* Reacquire for each CPU to avoid disabling IRQs too long */
- __css_rstat_lock(css, cpu);
pos = css_rstat_updated_list(css, cpu);
for (; pos; pos = pos->rstat_flush_next) {
if (is_self) {
@@ -395,10 +393,10 @@ __bpf_kfunc void css_rstat_flush(struct cgroup_subsys_state *css)
} else
pos->ss->css_rstat_flush(pos, cpu);
}
- __css_rstat_unlock(css, cpu);
if (!cond_resched())
cpu_relax();
}
+ __css_rstat_unlock(css);
}
int css_rstat_init(struct cgroup_subsys_state *css)
@@ -685,11 +683,11 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq)
if (cgroup_parent(cgrp)) {
css_rstat_flush(&cgrp->self);
- __css_rstat_lock(&cgrp->self, -1);
+ __css_rstat_lock(&cgrp->self);
bstat = cgrp->bstat;
cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime,
&bstat.cputime.utime, &bstat.cputime.stime);
- __css_rstat_unlock(&cgrp->self, -1);
+ __css_rstat_unlock(&cgrp->self);
} else {
root_cgroup_cputime(&bstat);
}
--
2.49.0
Powered by blists - more mailing lists