linux-kernel - Re: [RFC 2/1] seqlock: make the read_seqbegin_or_lock() API more simple and less error-prone ?

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <20251001131337.GC20441@redhat.com>
Date: Wed, 1 Oct 2025 15:13:39 +0200
From: Oleg Nesterov <oleg@...hat.com>
To: Peter Zijlstra <peterz@...radead.org>
Cc: Boqun Feng <boqun.feng@...il.com>, David Howells <dhowells@...hat.com>,
	Ingo Molnar <mingo@...hat.com>, Li RongQing <lirongqing@...du.com>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Waiman Long <longman@...hat.com>, Will Deacon <will@...nel.org>,
	linux-kernel@...r.kernel.org
Subject: Re: [RFC 2/1] seqlock: make the read_seqbegin_or_lock() API more
 simple and less error-prone ?

On 10/01, Peter Zijlstra wrote:
> On Sun, Sep 28, 2025 at 06:20:54PM +0200, Oleg Nesterov wrote:
>
> > To simplify, suppose we add the new helper
> >
> > 	static inline int need_seqretry_xxx(seqlock_t *lock, int *seq)
> > 	{
> > 		int ret = !(*seq & 1) && read_seqretry(lock, *seq);
> >
> > 		if (ret)
> > 			++*seq;	/* make this counter odd */
                        ^^^^^^
Hmm. just
			*seq = 1;
makes more sense

> How about need_seqretry_or_lock() to stay in theme with
> read_seqbegin_or_lock().

I am fine with any name ;) This one looks good to me.

> > 	#define __XXX(lock, seq, lockless)	\
> > 		for (int lockless = 1, seq; xxx(lock, &seq, lockless); lockless = 0)
> >
> > 	#define XXX(lock)	\
> > 		__XXX(lock, __UNIQUE_ID(seq), __UNIQUE_ID(lockless))
> >
> >
> > ?
>
> Oh gawd, that thing had better not have control flow escape that loop.

Yes, yes. "continue" is fine, but break/return won't work.

> But yes, I suppose something like this is far more useable than the
> current thing.

OK, great. So, modulo naming, how about the patch below?

The new stuff should obviously go to include/linux/seqlock.h, xxx() can be
probably uninlined. thread_group_cputime() is changed as an example.

Oleg.


--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -306,6 +306,35 @@ static u64 read_sum_exec_runtime(struct task_struct *t)
 }
 #endif /* !CONFIG_64BIT */
 
+static inline int xxx(seqlock_t *lock, int lockless, int *seq, unsigned long *flags)
+{
+	if (lockless) {
+		*seq = read_seqbegin(lock);
+		return 1;
+	} else if (*seq & 1) {
+		if (flags)
+			read_sequnlock_excl_irqrestore(lock, *flags);
+		else
+			read_sequnlock_excl(lock);
+		return 0;
+	} else if (read_seqretry(lock, *seq)) {
+		if (flags)
+			read_seqlock_excl_irqsave(lock, *flags);
+		else
+			read_seqlock_excl(lock);
+		*seq = 1;
+		return 1;
+	} else {
+		return 0;
+	}
+}
+
+#define __XXX(lock, lockless, seq, flags)	\
+	for (int lockless = 1, seq; xxx(lock, lockless, &seq, flags); lockless = 0)
+
+#define XXX(lock, flags)	\
+	__XXX(lock, __UNIQUE_ID(lockless), __UNIQUE_ID(seq), flags)
+
 /*
  * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
  * tasks (sum on group iteration) belonging to @tsk's group.
@@ -315,7 +344,6 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 	struct signal_struct *sig = tsk->signal;
 	u64 utime, stime;
 	struct task_struct *t;
-	unsigned int seq, nextseq;
 	unsigned long flags;
 
 	/*
@@ -330,11 +358,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 		(void) task_sched_runtime(current);
 
 	rcu_read_lock();
-	/* Attempt a lockless read on the first round. */
-	nextseq = 0;
-	do {
-		seq = nextseq;
-		flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
+	XXX(&sig->stats_lock, &flags) {
 		times->utime = sig->utime;
 		times->stime = sig->stime;
 		times->sum_exec_runtime = sig->sum_sched_runtime;
@@ -345,10 +369,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 			times->stime += stime;
 			times->sum_exec_runtime += read_sum_exec_runtime(t);
 		}
-		/* If lockless access failed, take the lock. */
-		nextseq = 1;
-	} while (need_seqretry(&sig->stats_lock, seq));
-	done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
+	}
 	rcu_read_unlock();
 }