lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220427155241.26410-1-dave@stgolabs.net>
Date:   Wed, 27 Apr 2022 08:52:40 -0700
From:   Davidlohr Bueso <dave@...olabs.net>
To:     peterz@...radead.org
Cc:     namhyung@...nel.org, longman@...hat.com, will@...nel.org,
        boqun.feng@...il.com, mingo@...nel.org, dave@...olabs.net,
        linux-kernel@...r.kernel.org
Subject: [PATCH] locking/rwsem: Teach contention tracing about optimistic spinning

Similar to the mutex counterpart, we can further distinguish the
types of contention. Similarly this patch also introduces potentially
various _begin() tracepoints with a single respective _end().

- The original _begin() for down_write() is moved further up,
right after we know optimistic spinning is bust.

- For the scenario that spins after setting the hand-off bit and
failing to grab the lock the code is change to duplicate the
rwsem_try_write_lock() upon a OWNER_NULL, which minimizes the
amounts of _begin() in the wait-loop - but also gets rid of a
goto label and the logic is pretty encapsulated in the branch.
In such cases the common case will be to acquire the lock,
but if it is stolen in that window, this path won't see the
signal_pending() now in the iteration and block. This should
be benign as the check will come in after waking if the lock
can still not be taken.

Signed-off-by: Davidlohr Bueso <dave@...olabs.net>
---
rtmutexes' top waiter will also do optimitic spinning, but
I don't think it is worth adding tracepoints here as it
is quite minimal, unlike the osq stuff.

 include/trace/events/lock.h |  4 +++-
 kernel/locking/rwsem.c      | 29 +++++++++++++++++++++--------
 2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h
index 9ebd081e057e..0f68a3e69a9f 100644
--- a/include/trace/events/lock.h
+++ b/include/trace/events/lock.h
@@ -15,6 +15,7 @@
 #define LCB_F_RT	(1U << 3)
 #define LCB_F_PERCPU	(1U << 4)
 #define LCB_F_MUTEX	(1U << 5)
+#define LCB_F_RWSEM	(1U << 6)
 
 
 #ifdef CONFIG_LOCKDEP
@@ -115,7 +116,8 @@ TRACE_EVENT(contention_begin,
 				{ LCB_F_WRITE,		"WRITE" },
 				{ LCB_F_RT,		"RT" },
 				{ LCB_F_PERCPU,		"PERCPU" },
-				{ LCB_F_MUTEX,		"MUTEX" }
+				{ LCB_F_MUTEX,		"MUTEX" },
+				{ LCB_F_RWSEM,		"RWSEM" }
 			  ))
 );
 
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 9d1db4a54d34..c38f990cacea 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -1057,7 +1057,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat
 	if (!wake_q_empty(&wake_q))
 		wake_up_q(&wake_q);
 
-	trace_contention_begin(sem, LCB_F_READ);
+	trace_contention_begin(sem, LCB_F_RWSEM | LCB_F_READ);
 
 	/* wait to be given the lock */
 	for (;;) {
@@ -1101,9 +1101,14 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
 	DEFINE_WAKE_Q(wake_q);
 
 	/* do optimistic spinning and steal lock if possible */
-	if (rwsem_can_spin_on_owner(sem) && rwsem_optimistic_spin(sem)) {
-		/* rwsem_optimistic_spin() implies ACQUIRE on success */
-		return sem;
+	if (rwsem_can_spin_on_owner(sem)) {
+		trace_contention_begin(sem,
+				       LCB_F_RWSEM | LCB_F_WRITE | LCB_F_SPIN);
+		if (rwsem_optimistic_spin(sem)) {
+			/* rwsem_optimistic_spin() implies ACQUIRE on success */
+			trace_contention_end(sem, 0);
+			return sem;
+		}
 	}
 
 	/*
@@ -1115,6 +1120,8 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
 	waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
 	waiter.handoff_set = false;
 
+	trace_contention_begin(sem, LCB_F_RWSEM | LCB_F_WRITE);
+
 	raw_spin_lock_irq(&sem->wait_lock);
 	rwsem_add_waiter(sem, &waiter);
 
@@ -1137,7 +1144,6 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
 
 	/* wait until we successfully acquire the lock */
 	set_current_state(state);
-	trace_contention_begin(sem, LCB_F_WRITE);
 
 	for (;;) {
 		if (rwsem_try_write_lock(sem, &waiter)) {
@@ -1161,18 +1167,25 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
 		if (waiter.handoff_set) {
 			enum owner_state owner_state;
 
+			trace_contention_begin(sem, LCB_F_RWSEM |
+					       LCB_F_WRITE | LCB_F_SPIN);
 			preempt_disable();
 			owner_state = rwsem_spin_on_owner(sem);
 			preempt_enable();
 
-			if (owner_state == OWNER_NULL)
-				goto trylock_again;
+			if (owner_state == OWNER_NULL) {
+				raw_spin_lock_irq(&sem->wait_lock);
+				if (rwsem_try_write_lock(sem, &waiter))
+					break;
+				raw_spin_unlock_irq(&sem->wait_lock);
+			}
+
+			trace_contention_begin(sem, LCB_F_RWSEM | LCB_F_WRITE);
 		}
 
 		schedule();
 		lockevent_inc(rwsem_sleep_writer);
 		set_current_state(state);
-trylock_again:
 		raw_spin_lock_irq(&sem->wait_lock);
 	}
 	__set_current_state(TASK_RUNNING);
-- 
2.36.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ