[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251010080327.GF4067720@noisy.programming.kicks-ass.net>
Date: Fri, 10 Oct 2025 10:03:27 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: Linus Torvalds <torvalds@...ux-foundation.org>
Cc: Oleg Nesterov <oleg@...hat.com>,
Alexander Viro <viro@...iv.linux.org.uk>,
Boqun Feng <boqun.feng@...il.com>,
David Howells <dhowells@...hat.com>, Ingo Molnar <mingo@...hat.com>,
Li RongQing <lirongqing@...du.com>,
Waiman Long <longman@...hat.com>, Will Deacon <will@...nel.org>,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 1/4] seqlock: introduce scoped_seqlock_read() and
scoped_seqlock_read_irqsave()
On Thu, Oct 09, 2025 at 03:55:15PM -0700, Linus Torvalds wrote:
> On Thu, 9 Oct 2025 at 15:12, Peter Zijlstra <peterz@...radead.org> wrote:
> >
> > Sure; otoh compiler should be able to tell the same using liveness
> > analysis I suppose, but perhaps they're not *that* clever.
>
> They are that clever, but only if they end up unrolling the loop
> statically. If the loop remains a loop, the two variables end up live
> in the same code.
>
> And while a compiler _could_ in theory still see that they aren't
> actually live in the same _iteration_, I don't think any compiler
> actually ends up being that clever in practice.
>
> So making it a union then hopefully gets the compiler to basically use
> that explicit information.
Right, so I had to use -Os to not make it unroll the thing, but then
indeed, sharing the variable helps it.
> > So I thought they were fine; we handle all the enum cases with 'return'
> > so its impossible to not exit the switch() but the silly compiler was
> > complaining about possible fall-through, so clearly it was getting
> > confused.
>
> Yeah, I found the same thing with the 0/1/2 approach - the compiler
> wouldn't realize that the range was limited until I added a very
> explicit limit check that "shouldn't matter", but did.
>
> This might obviously end up depending on compiler version and other
> random things, but in general the whole value range analysis tends to
> be a pretty fragile thing.
>
> In practice, compilers tend to be good at doing value range analysis
> if they see particular patterns (like initializing it to some value,
> always incrementing it by one, and comparing against another value).
>
> But when it's written more like a state machine like this, it's
> clearly very hit and miss.
I reordered the code, it is happier now.
Anyway, the below seems to generate decent code for
{-O2,-Os}x{gcc-14,clang-22}. Yay for optimizing compilers I suppose :-)
---
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 5ce48eab7a2a..45fab026f7d6 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -1209,4 +1209,83 @@ done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags)
if (seq & 1)
read_sequnlock_excl_irqrestore(lock, flags);
}
+
+enum ss_state {
+ ss_done = 0,
+ ss_lock,
+ ss_lock_irqsave,
+ ss_lockless,
+};
+
+struct ss_tmp {
+ enum ss_state state;
+ unsigned long data;
+ spinlock_t *lock;
+ spinlock_t *lock_irqsave;
+};
+
+static inline void __scoped_seqlock_cleanup(struct ss_tmp *sst)
+{
+ if (sst->lock)
+ spin_unlock(sst->lock);
+ if (sst->lock_irqsave)
+ spin_unlock_irqrestore(sst->lock, sst->data);
+}
+
+extern void __scoped_seqlock_invalid_target(void);
+extern void __scoped_seqlock_bug(void);
+
+static inline void
+__scoped_seqlock_next(struct ss_tmp *sst, seqlock_t *lock, enum ss_state target)
+{
+ switch (sst->state) {
+ case ss_done:
+ __scoped_seqlock_bug();
+ return;
+
+ case ss_lock:
+ case ss_lock_irqsave:
+ sst->state = ss_done;
+ return;
+
+ case ss_lockless:
+ if (!read_seqretry(lock, sst->data)) {
+ sst->state = ss_done;
+ return;
+ }
+ break;
+ }
+
+ switch (target) {
+ case ss_done:
+ __scoped_seqlock_invalid_target();
+ return;
+
+ case ss_lock:
+ sst->lock = &lock->lock;
+ spin_lock(sst->lock);
+ sst->state = ss_lock;
+ return;
+
+ case ss_lock_irqsave:
+ sst->lock_irqsave = &lock->lock;
+ spin_lock_irqsave(sst->lock, sst->data);
+ sst->state = ss_lock_irqsave;
+ return;
+
+ case ss_lockless:
+ sst->data = read_seqbegin(lock);
+ return;
+ }
+}
+
+#define __scoped_seqlock_read(_seqlock, _target, _s) \
+ for (struct ss_tmp _s __cleanup(__scoped_seqlock_cleanup) = \
+ { .state = ss_lockless, .data = read_seqbegin(_seqlock) }; \
+ _s.state != ss_done; \
+ __scoped_seqlock_next(&_s, _seqlock, _target))
+
+#define scoped_seqlock_read(_seqlock, _target) \
+ __scoped_seqlock_read(_seqlock, _target, __UNIQUE_ID(seqlock))
+
#endif /* __LINUX_SEQLOCK_H */
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 7097de2c8cda..d2b3f987c888 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -313,10 +313,8 @@ static u64 read_sum_exec_runtime(struct task_struct *t)
void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
{
struct signal_struct *sig = tsk->signal;
- u64 utime, stime;
struct task_struct *t;
- unsigned int seq, nextseq;
- unsigned long flags;
+ u64 utime, stime;
/*
* Update current task runtime to account pending time since last
@@ -329,27 +327,19 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
if (same_thread_group(current, tsk))
(void) task_sched_runtime(current);
- rcu_read_lock();
- /* Attempt a lockless read on the first round. */
- nextseq = 0;
- do {
- seq = nextseq;
- flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
+ guard(rcu)();
+ scoped_seqlock_read(&sig->stats_lock, ss_lock_irqsave) {
times->utime = sig->utime;
times->stime = sig->stime;
times->sum_exec_runtime = sig->sum_sched_runtime;
- for_each_thread(tsk, t) {
+ __for_each_thread(sig, t) {
task_cputime(t, &utime, &stime);
times->utime += utime;
times->stime += stime;
times->sum_exec_runtime += read_sum_exec_runtime(t);
}
- /* If lockless access failed, take the lock. */
- nextseq = 1;
- } while (need_seqretry(&sig->stats_lock, seq));
- done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
- rcu_read_unlock();
+ }
}
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
Powered by blists - more mailing lists