linux-kernel - Re: [PATCH 2/2] locking: Apply contention tracepoints in the slow path

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220328112921.GZ8939@worktop.programming.kicks-ass.net>
Date:   Mon, 28 Mar 2022 13:29:21 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     Namhyung Kim <namhyung@...nel.org>
Cc:     Ingo Molnar <mingo@...nel.org>, Will Deacon <will@...nel.org>,
        Waiman Long <longman@...hat.com>,
        Boqun Feng <boqun.feng@...il.com>,
        LKML <linux-kernel@...r.kernel.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Steven Rostedt <rostedt@...dmis.org>,
        Byungchul Park <byungchul.park@....com>,
        "Paul E. McKenney" <paulmck@...nel.org>,
        Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
        Arnd Bergmann <arnd@...db.de>,
        Radoslaw Burny <rburny@...gle.com>, linux-arch@...r.kernel.org,
        bpf@...r.kernel.org, Hyeonggon Yoo <42.hyeyoo@...il.com>
Subject: Re: [PATCH 2/2] locking: Apply contention tracepoints in the slow
 path

On Tue, Mar 22, 2022 at 11:57:09AM -0700, Namhyung Kim wrote:
> Adding the lock contention tracepoints in various lock function slow
> paths.  Note that each arch can define spinlock differently, I only
> added it only to the generic qspinlock for now.
> 
> Tested-by: Hyeonggon Yoo <42.hyeyoo@...il.com>
> Signed-off-by: Namhyung Kim <namhyung@...nel.org>
> ---
>  kernel/locking/mutex.c        |  3 +++
>  kernel/locking/percpu-rwsem.c |  3 +++
>  kernel/locking/qrwlock.c      |  9 +++++++++
>  kernel/locking/qspinlock.c    |  5 +++++
>  kernel/locking/rtmutex.c      | 11 +++++++++++
>  kernel/locking/rwbase_rt.c    |  3 +++
>  kernel/locking/rwsem.c        |  9 +++++++++
>  kernel/locking/semaphore.c    | 15 ++++++++++++++-
>  8 files changed, 57 insertions(+), 1 deletion(-)

I had conflicts in rwsem.c due to Waiman's patches, but that was simple
enough to resolve. However, I had a good look at the other sites and
ended up with the below...

Yes, I know I'm the one that suggested the percpu thing, but upon
looking again it missed the largest part of percpu_down_write(), which
very much includes that RCU grace period and waiting for the readers to
bugger off

Also, rwbase_rt was missing the entire READ side -- yes, I see that's
also covered by the rtmuex.c part, but that's on a different address and
with different flags, and it's very confusing to not have it annotated.

Anyway, I'll queue this patch with the below folded in for post -rc1.

---

--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -155,7 +155,6 @@ static void percpu_rwsem_wait(struct per
 	}
 	spin_unlock_irq(&sem->waiters.lock);
 
-	trace_contention_begin(sem, LCB_F_PERCPU | (reader ? LCB_F_READ : LCB_F_WRITE));
 	while (wait) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		if (!smp_load_acquire(&wq_entry.private))
@@ -163,7 +162,6 @@ static void percpu_rwsem_wait(struct per
 		schedule();
 	}
 	__set_current_state(TASK_RUNNING);
-	trace_contention_end(sem, 0);
 }
 
 bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
@@ -174,9 +172,11 @@ bool __sched __percpu_down_read(struct p
 	if (try)
 		return false;
 
+	trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_READ);
 	preempt_enable();
 	percpu_rwsem_wait(sem, /* .reader = */ true);
 	preempt_disable();
+	trace_contention_end(sem, 0);
 
 	return true;
 }
@@ -219,6 +219,7 @@ void __sched percpu_down_write(struct pe
 {
 	might_sleep();
 	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
+	trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_WRITE);
 
 	/* Notify readers to take the slow path. */
 	rcu_sync_enter(&sem->rss);
@@ -240,6 +241,7 @@ void __sched percpu_down_write(struct pe
 
 	/* Wait for all active readers to complete. */
 	rcuwait_wait_event(&sem->writer, readers_active_check(sem), TASK_UNINTERRUPTIBLE);
+	trace_contention_end(sem, 0);
 }
 EXPORT_SYMBOL_GPL(percpu_down_write);
 
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -35,7 +35,7 @@ void queued_read_lock_slowpath(struct qr
 	}
 	atomic_sub(_QR_BIAS, &lock->cnts);
 
-	trace_contention_begin(lock, LCB_F_READ | LCB_F_SPIN);
+	trace_contention_begin(lock, LCB_F_SPIN | LCB_F_READ);
 
 	/*
 	 * Put the reader into the wait queue
@@ -67,7 +67,7 @@ void queued_write_lock_slowpath(struct q
 {
 	int cnts;
 
-	trace_contention_begin(lock, LCB_F_WRITE | LCB_F_SPIN);
+	trace_contention_begin(lock, LCB_F_SPIN | LCB_F_WRITE);
 
 	/* Put the writer into the wait queue */
 	arch_spin_lock(&lock->wait_lock);
--- a/kernel/locking/rwbase_rt.c
+++ b/kernel/locking/rwbase_rt.c
@@ -112,6 +112,8 @@ static int __sched __rwbase_read_lock(st
 	 * Reader2 to call up_read(), which might be unbound.
 	 */
 
+	trace_contention_begin(rwb, LCB_F_RT | LCB_F_READ);
+
 	/*
 	 * For rwlocks this returns 0 unconditionally, so the below
 	 * !ret conditionals are optimized out.
@@ -130,6 +132,8 @@ static int __sched __rwbase_read_lock(st
 	raw_spin_unlock_irq(&rtm->wait_lock);
 	if (!ret)
 		rwbase_rtmutex_unlock(rtm);
+
+	trace_contention_end(rwb, ret);
 	return ret;
 }
 
@@ -247,7 +251,7 @@ static int __sched rwbase_write_lock(str
 		goto out_unlock;
 
 	rwbase_set_and_save_current_state(state);
-	trace_contention_begin(rwb, LCB_F_WRITE | LCB_F_RT);
+	trace_contention_begin(rwb, LCB_F_RT | LCB_F_WRITE);
 	for (;;) {
 		/* Optimized out for rwlocks */
 		if (rwbase_signal_pending_state(state, current)) {