Provide a down_read()/up_read() variant that keeps preemption disabled over the whole thing, when possible. This avoids a needless preemption point for constructs such as: percpu_down_read(&global_rwsem); spin_lock(&lock); ... spin_unlock(&lock); percpu_up_read(&global_rwsem); Which perturbs timings. In particular it was found to cure a performance regression in a follow up patch in fs/locks.c Signed-off-by: Peter Zijlstra (Intel) --- include/linux/percpu-rwsem.h | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -28,7 +28,7 @@ static struct percpu_rw_semaphore name = extern int __percpu_down_read(struct percpu_rw_semaphore *, int); extern void __percpu_up_read(struct percpu_rw_semaphore *); -static inline void percpu_down_read(struct percpu_rw_semaphore *sem) +static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore *sem) { might_sleep(); @@ -46,13 +46,19 @@ static inline void percpu_down_read(stru __this_cpu_inc(*sem->read_count); if (unlikely(!rcu_sync_is_idle(&sem->rss))) __percpu_down_read(sem, false); /* Unconditional memory barrier */ - preempt_enable(); + barrier(); /* - * The barrier() from preempt_enable() prevents the compiler from + * The barrier() prevents the compiler from * bleeding the critical section out. */ } +static inline void percpu_down_read(struct percpu_rw_semaphore *sem) +{ + percpu_down_read_preempt_disable(sem); + preempt_enable(); +} + static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem) { int ret = 1; @@ -76,13 +82,13 @@ static inline int percpu_down_read_trylo return ret; } -static inline void percpu_up_read(struct percpu_rw_semaphore *sem) +static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem) { /* - * The barrier() in preempt_disable() prevents the compiler from + * The barrier() prevents the compiler from * bleeding the critical section out. */ - preempt_disable(); + barrier(); /* * Same as in percpu_down_read(). */ @@ -95,6 +101,12 @@ static inline void percpu_up_read(struct rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_); } +static inline void percpu_up_read(struct percpu_rw_semaphore *sem) +{ + preempt_disable(); + percpu_up_read_preempt_enable(sem); +} + extern void percpu_down_write(struct percpu_rw_semaphore *); extern void percpu_up_write(struct percpu_rw_semaphore *);