lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200918100112.GN1362448@hirez.programming.kicks-ass.net>
Date:   Fri, 18 Sep 2020 12:01:12 +0200
From:   peterz@...radead.org
To:     Jan Kara <jack@...e.cz>
Cc:     Oleg Nesterov <oleg@...hat.com>, Boaz Harrosh <boaz@...xistor.com>,
        Hou Tao <houtao1@...wei.com>, Ingo Molnar <mingo@...hat.com>,
        Will Deacon <will@...nel.org>, Dennis Zhou <dennis@...nel.org>,
        Tejun Heo <tj@...nel.org>, Christoph Lameter <cl@...ux.com>,
        linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org
Subject: Re: [RFC PATCH] locking/percpu-rwsem: use this_cpu_{inc|dec}() for
 read_count

On Fri, Sep 18, 2020 at 11:07:02AM +0200, Jan Kara wrote:
> If people really wanted to avoid irq-safe inc/dec for archs where it is
> more expensive, one idea I had was that we could add 'read_count_in_irq' to
> percpu_rw_semaphore. So callers in normal context would use read_count and
> callers in irq context would use read_count_in_irq. And the writer side
> would sum over both but we don't care about performance of that one much.

That's not a bad idea... something like so I suppose.

(completely untested)

---
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 5fda40f97fe9..9c847490a86a 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -11,7 +11,7 @@
 
 struct percpu_rw_semaphore {
 	struct rcu_sync		rss;
-	unsigned int __percpu	*read_count;
+	u32 __percpu		*read_count;
 	struct rcuwait		writer;
 	wait_queue_head_t	waiters;
 	atomic_t		block;
@@ -60,7 +60,7 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
 	 * anything we did within this RCU-sched read-size critical section.
 	 */
 	if (likely(rcu_sync_is_idle(&sem->rss)))
-		this_cpu_inc(*sem->read_count);
+		__this_cpu_inc(sem->read_count[0]);
 	else
 		__percpu_down_read(sem, false); /* Unconditional memory barrier */
 	/*
@@ -74,12 +74,16 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
 {
 	bool ret = true;
 
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+	WARN_ON_ONCE(!in_task());
+#endif
+
 	preempt_disable();
 	/*
 	 * Same as in percpu_down_read().
 	 */
 	if (likely(rcu_sync_is_idle(&sem->rss)))
-		this_cpu_inc(*sem->read_count);
+		__this_cpu_inc(sem->read_count[0]);
 	else
 		ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */
 	preempt_enable();
@@ -98,12 +102,16 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
 {
 	rwsem_release(&sem->dep_map, _RET_IP_);
 
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+	WARN_ON_ONCE(!in_task());
+#endif
+
 	preempt_disable();
 	/*
 	 * Same as in percpu_down_read().
 	 */
 	if (likely(rcu_sync_is_idle(&sem->rss))) {
-		this_cpu_dec(*sem->read_count);
+		__this_cpu_dec(sem->read_count[0]);
 	} else {
 		/*
 		 * slowpath; reader will only ever wake a single blocked
@@ -115,12 +123,39 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
 		 * aggregate zero, as that is the only time it matters) they
 		 * will also see our critical section.
 		 */
-		this_cpu_dec(*sem->read_count);
+		__this_cpu_dec(sem->read_count[0]);
 		rcuwait_wake_up(&sem->writer);
 	}
 	preempt_enable();
 }
 
+static inline void __percpu_up_read_irqsafe(struct percpu_rw_semaphore *sem)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	/*
+	 * Same as in percpu_down_read().
+	 */
+	if (likely(rcu_sync_is_idle(&sem->rss))) {
+		__this_cpu_dec(sem->read_count[1]);
+	} else {
+		/*
+		 * slowpath; reader will only ever wake a single blocked
+		 * writer.
+		 */
+		smp_mb(); /* B matches C */
+		/*
+		 * In other words, if they see our decrement (presumably to
+		 * aggregate zero, as that is the only time it matters) they
+		 * will also see our critical section.
+		 */
+		__this_cpu_dec(sem->read_count[1]);
+		rcuwait_wake_up(&sem->writer);
+	}
+	local_irq_restore(flags);
+}
+
 extern void percpu_down_write(struct percpu_rw_semaphore *);
 extern void percpu_up_write(struct percpu_rw_semaphore *);
 
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 70a32a576f3f..00741216a7f6 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -12,7 +12,7 @@
 int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
 			const char *name, struct lock_class_key *key)
 {
-	sem->read_count = alloc_percpu(int);
+	sem->read_count = (u32 *)alloc_percpu(u64);
 	if (unlikely(!sem->read_count))
 		return -ENOMEM;
 
@@ -45,7 +45,7 @@ EXPORT_SYMBOL_GPL(percpu_free_rwsem);
 
 static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
 {
-	this_cpu_inc(*sem->read_count);
+	__this_cpu_inc(sem->read_count[0]);
 
 	/*
 	 * Due to having preemption disabled the decrement happens on
@@ -71,7 +71,7 @@ static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
 	if (likely(!atomic_read_acquire(&sem->block)))
 		return true;
 
-	this_cpu_dec(*sem->read_count);
+	__this_cpu_dec(sem->read_count[0]);
 
 	/* Prod writer to re-evaluate readers_active_check() */
 	rcuwait_wake_up(&sem->writer);
@@ -198,7 +198,9 @@ EXPORT_SYMBOL_GPL(__percpu_down_read);
  */
 static bool readers_active_check(struct percpu_rw_semaphore *sem)
 {
-	if (per_cpu_sum(*sem->read_count) != 0)
+	u64 sum = per_cpu_sum(*(u64 *)sem->read_count);
+
+	if (sum + (sum >> 32))
 		return false;
 
 	/*

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ