This patch provides the (Refcount + Workqueue) implementation for cpu_hotplug "locking". It is analogous to a unfair rwsem. Signed-off-by : Gautham R Shenoy Index: current/kernel/cpu.c =================================================================== --- current.orig/kernel/cpu.c 2006-08-06 23:50:11.000000000 +0530 +++ current/kernel/cpu.c 2006-08-24 15:00:04.000000000 +0530 @@ -14,50 +14,172 @@ #include #include #include - -/* This protects CPUs going up and down... */ -static DEFINE_MUTEX(cpu_add_remove_lock); -static DEFINE_MUTEX(cpu_bitmask_lock); +#include +#include +#include +#include static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain); +/************************************************************************ + * A FEW CONTEXT SPECIFIC DEFINITIONS * + * ---------------------------------------------------------------------* + * - reader : task which tries to *prevent* a cpu hotplug event. * + * * + * - writer : task which tries to *perform* a cpu hotplug event * + * * + * - write-operation: cpu hotplug operation. * + * * + ************************************************************************/ + + /*********************************************************************** + * THE PROTOCOL * + *----------------------------------------------------------------------* + *- Analogous to RWSEM, only not so fair. * + * * + *- Readers assume control iff: * + * a) No other reader has a reference and no writer is writing. * + * OR * + * b) Atleast one reader has a reference. * + * * + *- In any other case, the reader is blocked. * + * * + *- Writer gets to perform a write iff: * + * *No* reader has a reference and no writer is writing. * + * * + *- In any other case, the writer is blocked. * + * * + *- Writer, on completion would preferable wake up other waiting * + * writers over the waiting readers. * + * * + *- The *last* writer wakes up all the waiting readers. * + * * + ************************************************************************/ + +/************************************************************************ + USEFUL FLAGS +*************************************************************************/ +/* System has no writers */ +#define NO_WRITERS 0 + +/* Some writer is waiting */ +#define WRITER_WAITING 1 + +/* A Writer is performing cpu hotplug*/ +#define CPU_HOTPLUG_ONGOING 2 + +/************************************************************************* + ( REFCOUNT + WAITQUEUE ) VARIABLES +**************************************************************************/ + +static struct { + int reader_count; /* Refcount for the Readers*/ + int status; /* Status of hotplug operation(none,waiting,ongoing) */ + spinlock_t lock; /* Serializes access to this struct */ +} cpu_hotplug __cacheline_aligned_in_smp = + {0, NO_WRITERS, SPIN_LOCK_UNLOCKED}; + +/* Waitqueues for readers and writers */ +static __cacheline_aligned_in_smp DECLARE_WAIT_QUEUE_HEAD(read_queue); +static __cacheline_aligned_in_smp DECLARE_WAIT_QUEUE_HEAD(write_queue); + +/******************************************************************** + MAIN CPU_HOTPLUG (ENABLE/ DISABLE/ BEGIN/ DONE) CODE +*********************************************************************/ #ifdef CONFIG_HOTPLUG_CPU -/* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */ -static struct task_struct *recursive; -static int recursive_depth; - +/** lock_cpu_hotplug : Blocks iff + - Hotplug operation is underway. +*/ void lock_cpu_hotplug(void) { - struct task_struct *tsk = current; - - if (tsk == recursive) { - static int warnings = 10; - if (warnings) { - printk(KERN_ERR "Lukewarm IQ detected in hotplug locking\n"); - WARN_ON(1); - warnings--; - } - recursive_depth++; + DECLARE_WAITQUEUE(wait, current); + spin_lock(&cpu_hotplug.lock); + cpu_hotplug.reader_count++; + if (cpu_hotplug.status != CPU_HOTPLUG_ONGOING) { + spin_unlock(&cpu_hotplug.lock); return; } - mutex_lock(&cpu_bitmask_lock); - recursive = tsk; + add_wait_queue_exclusive(&read_queue, &wait); + set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock(&cpu_hotplug.lock); + schedule(); + remove_wait_queue(&read_queue, &wait); } EXPORT_SYMBOL_GPL(lock_cpu_hotplug); +/** unlock_cpu_hotplug: + - Decrements the reader_count. + - If no readers are holding reference AND there is a writer + waiting, we set the flag to HOTPLUG_ONGOING and wake up + one of the waiting writer. +*/ void unlock_cpu_hotplug(void) { - WARN_ON(recursive != current); - if (recursive_depth) { - recursive_depth--; - return; + spin_lock(&cpu_hotplug.lock); + /* This should not happen, but in case it does... */ + WARN_ON(!cpu_hotplug.reader_count); + + cpu_hotplug.reader_count--; + if (!(cpu_hotplug.reader_count) && \ + (cpu_hotplug.status == WRITER_WAITING)) { + cpu_hotplug.status = CPU_HOTPLUG_ONGOING; + wake_up(&write_queue); } - mutex_unlock(&cpu_bitmask_lock); - recursive = NULL; + spin_unlock(&cpu_hotplug.lock); } EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); +/** cpu_hotplug_begin : Blocks unless + No reader has the reference + AND + Hotplug operation is not ongoing. +*/ +static void cpu_hotplug_begin(int interruptible) +{ + DECLARE_WAITQUEUE(wait, current); + spin_lock(&cpu_hotplug.lock); + if (!cpu_hotplug.reader_count && \ + (cpu_hotplug.status != CPU_HOTPLUG_ONGOING)) { + cpu_hotplug.status = CPU_HOTPLUG_ONGOING; + spin_unlock(&cpu_hotplug.lock); + return; + } + if (cpu_hotplug.status != CPU_HOTPLUG_ONGOING) + cpu_hotplug.status = WRITER_WAITING; + add_wait_queue_exclusive(&write_queue, &wait); + if (interruptible) + set_current_state(TASK_INTERRUPTIBLE); + else + set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock(&cpu_hotplug.lock); + schedule(); + remove_wait_queue(&write_queue, &wait); +} + +/** cpu_hotplug_done : Performs either one of the following: + - Wake up the next writer,if any. + - reset flag to zero + and wake up all the waiting readers, if any. +*/ +void cpu_hotplug_done(void) +{ + spin_lock(&cpu_hotplug.lock); + + if (!list_empty(&write_queue.task_list)) + /* Another cpu writer! */ + wake_up(&write_queue); + else { + /* This is the last writer. Remove the flag */ + cpu_hotplug.status = NO_WRITERS; + + if (cpu_hotplug.reader_count) + wake_up_all(&read_queue); /* All rise! */ + } + + spin_unlock(&cpu_hotplug.lock); +} + #endif /* CONFIG_HOTPLUG_CPU */ /* Need to know about CPUs going up/down? */ @@ -114,7 +236,7 @@ int cpu_down(unsigned int cpu) struct task_struct *p; cpumask_t old_allowed, tmp; - mutex_lock(&cpu_add_remove_lock); + cpu_hotplug_begin(1); if (num_online_cpus() == 1) { err = -EBUSY; goto out; @@ -140,9 +262,7 @@ int cpu_down(unsigned int cpu) cpu_clear(cpu, tmp); set_cpus_allowed(current, tmp); - mutex_lock(&cpu_bitmask_lock); p = __stop_machine_run(take_cpu_down, NULL, cpu); - mutex_unlock(&cpu_bitmask_lock); if (IS_ERR(p)) { /* CPU didn't die: tell everyone. Can't complain. */ @@ -180,7 +300,7 @@ out_thread: out_allowed: set_cpus_allowed(current, old_allowed); out: - mutex_unlock(&cpu_add_remove_lock); + cpu_hotplug_done(); return err; } #endif /*CONFIG_HOTPLUG_CPU*/ @@ -190,7 +310,7 @@ int __devinit cpu_up(unsigned int cpu) int ret; void *hcpu = (void *)(long)cpu; - mutex_lock(&cpu_add_remove_lock); + cpu_hotplug_begin(1); if (cpu_online(cpu) || !cpu_present(cpu)) { ret = -EINVAL; goto out; @@ -205,21 +325,18 @@ int __devinit cpu_up(unsigned int cpu) } /* Arch-specific enabling code. */ - mutex_lock(&cpu_bitmask_lock); ret = __cpu_up(cpu); - mutex_unlock(&cpu_bitmask_lock); if (ret != 0) goto out_notify; BUG_ON(!cpu_online(cpu)); /* Now call notifier in preparation. */ blocking_notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu); - out_notify: if (ret != 0) blocking_notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu); out: - mutex_unlock(&cpu_add_remove_lock); + cpu_hotplug_done(); return ret; }