The cpu hotplug lock is a rwsem with read-in-write and read-in-read recursion. Implement it as such. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/cpu.h | 6 + include/linux/percpu-rwsem.h | 10 ++- include/linux/sched.h | 4 + init/main.c | 1 kernel/cpu.c | 133 +++++++++++++------------------------------ kernel/fork.c | 2 lib/Kconfig | 5 + 7 files changed, 66 insertions(+), 95 deletions(-) --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -224,6 +224,9 @@ extern struct bus_type cpu_subsys; #ifdef CONFIG_HOTPLUG_CPU /* Stop CPUs going up and down. */ +extern void cpu_hotplug_init(void); +extern void cpu_hotplug_init_task(struct task_struct *p); + extern void cpu_hotplug_begin(void); extern void cpu_hotplug_done(void); extern void get_online_cpus(void); @@ -242,6 +245,9 @@ int cpu_down(unsigned int cpu); #else /* CONFIG_HOTPLUG_CPU */ +static inline void cpu_hotplug_init(void) {} +static inline void cpu_hotplug_init_task(struct task_struct *p) {} + static inline void cpu_hotplug_begin(void) {} static inline void cpu_hotplug_done(void) {} #define get_online_cpus() do { } while (0) --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -20,12 +20,10 @@ extern void __percpu_down_read(struct pe extern bool __percpu_down_read_trylock(struct percpu_rw_semaphore *); extern void __percpu_up_read(struct percpu_rw_semaphore *); -static inline void percpu_down_read(struct percpu_rw_semaphore *sem) +static inline void _percpu_down_read(struct percpu_rw_semaphore *sem) { might_sleep(); - rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_); - preempt_disable(); /* * We are in an RCU-sched read-side critical section, so the writer @@ -46,6 +44,12 @@ static inline void percpu_down_read(stru */ } +static inline void percpu_down_read(struct percpu_rw_semaphore *sem) +{ + rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_); + _percpu_down_read(sem); +} + static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem) { bool ret = true; --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1389,6 +1389,10 @@ struct task_struct { unsigned int btrace_seq; #endif +#ifdef CONFIG_HOTPLUG_CPU + int cpuhp_ref; +#endif + unsigned int policy; int nr_cpus_allowed; cpumask_t cpus_allowed; --- a/init/main.c +++ b/init/main.c @@ -588,6 +588,7 @@ asmlinkage __visible void __init start_k sched_clock_postinit(); perf_event_init(); profile_init(); + cpu_hotplug_init(); call_function_init(); WARN(!irqs_disabled(), "Interrupts were enabled early\n"); early_boot_irqs_disabled = false; --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "smpboot.h" @@ -50,7 +51,8 @@ EXPORT_SYMBOL(cpu_notifier_register_done static RAW_NOTIFIER_HEAD(cpu_chain); -/* If set, cpu_up and cpu_down will return -EBUSY and do nothing. +/* + * If set, cpu_up and cpu_down will return -EBUSY and do nothing. * Should always be manipulated under cpu_add_remove_lock */ static int cpu_hotplug_disabled; @@ -58,126 +60,72 @@ static int cpu_hotplug_disabled; #ifdef CONFIG_HOTPLUG_CPU static struct { - struct task_struct *active_writer; - /* wait queue to wake up the active_writer */ - wait_queue_head_t wq; - /* verifies that no writer will get active while readers are active */ - struct mutex lock; - /* - * Also blocks the new readers during - * an ongoing cpu hotplug operation. - */ - atomic_t refcount; - -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif -} cpu_hotplug = { - .active_writer = NULL, - .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq), - .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), -#ifdef CONFIG_DEBUG_LOCK_ALLOC - .dep_map = {.name = "cpu_hotplug.lock" }, -#endif -}; - -/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */ -#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map) -#define cpuhp_lock_acquire_tryread() \ - lock_map_acquire_tryread(&cpu_hotplug.dep_map) -#define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) -#define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) + struct percpu_rw_semaphore rwsem; + struct task_struct *writer; +} cpu_hotplug = { .writer = &init_task, }; + +void cpu_hotplug_init(void) +{ + percpu_init_rwsem(&cpu_hotplug.rwsem); + cpu_hotplug.writer = NULL; +} +void cpu_hotplug_init_task(struct task_struct *p) +{ + p->cpuhp_ref = 0; +} void get_online_cpus(void) { might_sleep(); - if (cpu_hotplug.active_writer == current) + + /* read in write recursion */ + if (cpu_hotplug.writer == current) + return; + + /* read in read recursion */ + if (current->cpuhp_ref++) return; - cpuhp_lock_acquire_read(); - mutex_lock(&cpu_hotplug.lock); - atomic_inc(&cpu_hotplug.refcount); - mutex_unlock(&cpu_hotplug.lock); + + lock_map_acquire_read(&cpu_hotplug.rwsem.rw_sem.dep_map); + _percpu_down_read(&cpu_hotplug.rwsem); } EXPORT_SYMBOL_GPL(get_online_cpus); bool try_get_online_cpus(void) { - if (cpu_hotplug.active_writer == current) + if (cpu_hotplug.writer == current) return true; - if (!mutex_trylock(&cpu_hotplug.lock)) - return false; - cpuhp_lock_acquire_tryread(); - atomic_inc(&cpu_hotplug.refcount); - mutex_unlock(&cpu_hotplug.lock); - return true; + + if (current->cpuhp_ref++) + return true; + + return percpu_down_read_trylock(&cpu_hotplug.rwsem); } EXPORT_SYMBOL_GPL(try_get_online_cpus); void put_online_cpus(void) { - int refcount; - - if (cpu_hotplug.active_writer == current) + if (cpu_hotplug.writer == current) return; - refcount = atomic_dec_return(&cpu_hotplug.refcount); - if (WARN_ON(refcount < 0)) /* try to fix things up */ - atomic_inc(&cpu_hotplug.refcount); - - if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq)) - wake_up(&cpu_hotplug.wq); - - cpuhp_lock_release(); + if (--current->cpuhp_ref) + return; + percpu_up_read(&cpu_hotplug.rwsem); } EXPORT_SYMBOL_GPL(put_online_cpus); -/* - * This ensures that the hotplug operation can begin only when the - * refcount goes to zero. - * - * Note that during a cpu-hotplug operation, the new readers, if any, - * will be blocked by the cpu_hotplug.lock - * - * Since cpu_hotplug_begin() is always called after invoking - * cpu_maps_update_begin(), we can be sure that only one writer is active. - * - * Note that theoretically, there is a possibility of a livelock: - * - Refcount goes to zero, last reader wakes up the sleeping - * writer. - * - Last reader unlocks the cpu_hotplug.lock. - * - A new reader arrives at this moment, bumps up the refcount. - * - The writer acquires the cpu_hotplug.lock finds the refcount - * non zero and goes to sleep again. - * - * However, this is very difficult to achieve in practice since - * get_online_cpus() not an api which is called all that often. - * - */ void cpu_hotplug_begin(void) { - DEFINE_WAIT(wait); - - cpu_hotplug.active_writer = current; - cpuhp_lock_acquire(); - - for (;;) { - mutex_lock(&cpu_hotplug.lock); - prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE); - if (likely(!atomic_read(&cpu_hotplug.refcount))) - break; - mutex_unlock(&cpu_hotplug.lock); - schedule(); - } - finish_wait(&cpu_hotplug.wq, &wait); + percpu_down_write(&cpu_hotplug.rwsem); + cpu_hotplug.writer = current; } void cpu_hotplug_done(void) { - cpu_hotplug.active_writer = NULL; - mutex_unlock(&cpu_hotplug.lock); - cpuhp_lock_release(); + cpu_hotplug.writer = NULL; + percpu_up_write(&cpu_hotplug.rwsem); } /* --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1410,6 +1410,8 @@ static struct task_struct *copy_process( p->sequential_io_avg = 0; #endif + cpu_hotplug_init_task(p); + /* Perform scheduler related setup. Assign this task to a CPU. */ retval = sched_fork(clone_flags, p); if (retval) --- a/lib/Kconfig +++ b/lib/Kconfig @@ -56,6 +56,11 @@ config STMP_DEVICE config PERCPU_RWSEM bool +config PERCPU_RWSEM_HOTPLUG + def_bool y + depends on HOTPLUG_CPU + select PERCPU_RWSEM + config ARCH_USE_CMPXCHG_LOCKREF bool -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in Please read the FAQ at http://www.tux.org/lkml/