/* * Read-Copy Update mechanism for mutual exclusion * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Copyright IBM Corporation, 2001 * * Authors: Dipankar Sarma * Manfred Spraul * * Based on the original work by Paul McKenney * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. * Papers: * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) * * For detailed explanation of Read-Copy Update mechanism see - * Documentation/RCU * * Rewrite based on a global state machine * (C) Manfred Spraul , 2008 * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key rcu_lock_key; struct lockdep_map rcu_lock_map = STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key); EXPORT_SYMBOL_GPL(rcu_lock_map); #endif /* Definition for rcupdate control block. */ static struct rcu_global_state rcu_global_state_normal = { .lock = __SEQLOCK_UNLOCKED(&rcu_global_state_normal.lock), .start_immediately = 0, .cpus = __RCU_CPUMASK_INIT(&rcu_global_state_normal.cpus) }; static struct rcu_global_state rcu_global_state_bh = { .lock = __SEQLOCK_UNLOCKED(&rcu_global_state_bh.lock), .start_immediately = 0, .cpus = __RCU_CPUMASK_INIT(&rcu_global_state_bh.cpus) }; DEFINE_PER_CPU(struct rcu_cpu_state, rcu_cpudata_normal) = { 0L }; DEFINE_PER_CPU(struct rcu_cpu_state, rcu_cpudata_bh) = { 0L }; DEFINE_PER_CPU(struct rcu_cpu_dead, rcu_cpudata_dead) = { 0L }; /* * rcu_cpumode: * -1: * "normal" rcu behavior: the scheduler and the timer interrupt * check for grace periods, read side critical sections are permitted * everywhere. * * 0: * This cpu is sitting in the idle thread, with disabled hz timer. * * > 0: * The cpu is in an interrupt that interrupted a nohz idle thread. */ #define RCU_CPUMODE_INVALID -2 #define RCU_CPUMODE_DELAYED -1 DEFINE_PER_CPU(int, rcu_cpumode) = { 0L }; int qlowmark = 100; long rcu_batches_completed(void) { return rcu_global_state_normal.completed; } long rcu_batches_completed_bh(void) { return rcu_global_state_normal.completed; } /** * rcu_state_startcycle - start the next rcu cycle * @rgs: global rcu state * * The function starts the next rcu cycle, either immediately or * by setting rgs->start_immediately. */ static void rcu_state_startcycle(struct rcu_global_state *rgs) { unsigned seq; int do_real_start; BUG_ON(!irqs_disabled()); do { seq = read_seqbegin(&rgs->lock); if (rgs->start_immediately == 0) { do_real_start = 1; } else { do_real_start = 0; BUG_ON(rcu_cpumask_getstate(&rgs->cpus) == RCU_STATE_DESTROY); } } while (read_seqretry(&rgs->lock, seq)); if (do_real_start) { write_seqlock(&rgs->lock); switch(rcu_cpumask_getstate(&rgs->cpus)) { case RCU_STATE_DESTROY_AND_COLLECT: case RCU_STATE_GRACE: rgs->start_immediately = 1; break; case RCU_STATE_DESTROY: rcu_cpumask_init(&rgs->cpus, RCU_STATE_DESTROY_AND_COLLECT, 1); smp_wmb(); BUG_ON(rgs->start_immediately); break; default: BUG(); } write_sequnlock(&rgs->lock); } } /* * Delay that can occur for synchronize_rcu() callers */ #define RCU_MAX_DELAY (HZ/30+1) static void rcu_checkqlen(struct rcu_global_state *rgs, struct rcu_cpu_state *rcs, int inc) { BUG_ON(!irqs_disabled()); if (unlikely(rcs->newqlen == 0)) { rcs->timeout = jiffies + RCU_MAX_DELAY; } if ((rcs->newqlen < qlowmark) && (rcs->newqlen+inc >= qlowmark)) rcu_state_startcycle(rgs); rcs->newqlen += inc; /* * This is not really a bug, it might happen when interrupt calls * call_rcu() while the cpu is in nohz mode. see rcu_irq_exit */ WARN_ON( (rcs->newqlen >= qlowmark) && (rcu_cpumask_getstate(&rgs->cpus) == RCU_STATE_DESTROY)); } static void __call_rcu(struct rcu_head *head, struct rcu_global_state *rgs, struct rcu_cpu_state *rcs) { if (rcs->new == NULL) { rcs->new = head; } else { (*rcs->newtail) = head; } rcs->newtail = &head->next; rcu_checkqlen(rgs, rcs, 1); } /** * call_rcu - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. * @func: actual update function to be invoked after the grace period * * The update function will be invoked some time after a full grace * period elapses, in other words after all currently executing RCU * read-side critical sections have completed. RCU read-side critical * sections are delimited by rcu_read_lock() and rcu_read_unlock(), * and may be nested. */ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { unsigned long flags; head->func = func; local_irq_save(flags); __call_rcu(head, &rcu_global_state_normal, &__get_cpu_var(rcu_cpudata_normal)); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(call_rcu); /** * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. * @func: actual update function to be invoked after the grace period * * The update function will be invoked some time after a full grace * period elapses, in other words after all currently executing RCU * read-side critical sections have completed. call_rcu_bh() assumes * that the read-side critical sections end on completion of a softirq * handler. This means that read-side critical sections in process * context must not be interrupted by softirqs. This interface is to be * used when most of the read-side critical sections are in softirq context. * RCU read-side critical sections are delimited by rcu_read_lock() and * rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh() * and rcu_read_unlock_bh(), if in process context. These may be nested. */ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { unsigned long flags; head->func = func; local_irq_save(flags); __call_rcu(head, &rcu_global_state_bh, &__get_cpu_var(rcu_cpudata_bh)); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(call_rcu_bh); #define RCU_BATCH_MIN 100 #define RCU_BATCH_INCFACTOR 2 #define RCU_BATCH_DECFACTOR 4 static void rcu_move_and_raise(struct rcu_cpu_state *rcs, int do_raise) { struct rcu_cpu_dead *rcd = &get_cpu_var(rcu_cpudata_dead); BUG_ON(!irqs_disabled()); /* update batch limit: * - if there are still old entries when new entries are added: * double the batch count. * - if there are no old entries: reduce it by 25%, but never below 100. */ if (rcd->deadqlen) rcd->batchcount = rcd->batchcount*RCU_BATCH_INCFACTOR; else rcd->batchcount = rcd->batchcount-rcd->batchcount/RCU_BATCH_DECFACTOR; if (rcd->batchcount < RCU_BATCH_MIN) rcd->batchcount = RCU_BATCH_MIN; if (rcs->old != NULL) { if (rcd->dead == NULL) { rcd->dead = rcs->old; } else { (*rcd->deadtail) = rcs->old; } rcd->deadtail = rcs->oldtail; rcd->deadqlen += rcs->oldqlen; } rcs->old = NULL; rcs->oldtail = NULL; rcs->oldqlen = 0; if (do_raise) raise_softirq(RCU_SOFTIRQ); put_cpu_var(rcu_cpudata_dead); } static void __rcu_state_machine(struct rcu_global_state *rgs, struct rcu_cpu_state *rcs, int global_state, int is_quiet, int do_raise, int cpu) { int inc_state; unsigned long flags; /* * Theoretically, this code should run under read_seqbegin(). * But: important chages (i.e. from COLLECT to GRACE, * from GRACE to DESTROY) only happen when all cpus have completed * their work. If rcu_cpumask_getstate(&rgs->cpus) != rcs->state, then we haven't completed * our work yet. Thus such a change cannot happen. * The only change that might happen is a change from RCU_STATE_DESTROY * to RCU_STATE_DESTROY_AND_COLLECT. We'll notice that in the next * round. * no need for an mb() either - it simply doesn't matter. * Actually: when rcu_state_startcycle() is called, then it's guaranteed * that global_state and rcu_cpumask_getstate(&rgs->cpus) do not match... */ local_irq_save(flags); if (global_state == RCU_STATE_DESTROY && rcs->newqlen > 0 && time_after(jiffies, rcs->timeout) && do_raise) { printk(KERN_ERR" delayed rcu start for %p: %ld entries (cpu %d, ptr %p).\n", rgs, rcs->newqlen, cpu, rcs); rcu_state_startcycle(rgs); } inc_state = 0; if (global_state != rcs->state) { switch(global_state) { case RCU_STATE_DESTROY: rcs->state = RCU_STATE_DESTROY; rcu_move_and_raise(rcs, do_raise); break; case RCU_STATE_DESTROY_AND_COLLECT: rcs->state = RCU_STATE_DESTROY_AND_COLLECT; rcu_move_and_raise(rcs, do_raise); rcs->old = rcs->new; rcs->oldtail = rcs->newtail; rcs->oldqlen = rcs->newqlen; rcs->new = NULL; rcs->newtail = NULL; rcs->newqlen = 0; rcs->looking = 0; if (rcu_cpumask_clear_and_test(&rgs->cpus, cpu)) inc_state = 1; break; case RCU_STATE_GRACE: if (is_quiet || (rcs->quiet && rcs->looking)) { rcs->state = RCU_STATE_GRACE; if (rcu_cpumask_clear_and_test(&rgs->cpus, cpu)) inc_state = 1; } rcs->quiet = 0; rcs->looking = 1; break; default: BUG(); } } if (unlikely(inc_state)) { local_irq_save(flags); write_seqlock(&rgs->lock); BUG_ON(rcu_cpumask_getstate(&rgs->cpus) != rcs->state); BUG_ON(global_state != rcu_cpumask_getstate(&rgs->cpus)); /* * advance the state machine: * - from COLLECT to GRACE * - from GRACE to DESTROY/COLLECT */ switch(rcu_cpumask_getstate(&rgs->cpus)) { case RCU_STATE_DESTROY_AND_COLLECT: rcu_cpumask_init(&rgs->cpus, RCU_STATE_GRACE, 1); break; case RCU_STATE_GRACE: rgs->completed++; if (rgs->start_immediately) { rcu_cpumask_init(&rgs->cpus, RCU_STATE_DESTROY_AND_COLLECT, 1); } else { rcu_cpumask_init(&rgs->cpus, RCU_STATE_DESTROY, 0); } rgs->start_immediately = 0; break; default: BUG(); } write_sequnlock(&rgs->lock); local_irq_restore(flags); } } static void rcu_state_machine(struct rcu_global_state *rgs, struct rcu_cpu_state *rcs, int is_quiet, int cpu) { int global_state = rcu_cpumask_getstate(&rgs->cpus); /* gcc should not optimize away the local variable global_state... */ barrier(); __rcu_state_machine(rgs, rcs, global_state, is_quiet, 1, cpu); } #if defined(CONFIG_HOTPLUG_CPU) || defined (CONFIG_NO_HZ) static void __rcu_remove_cpu(struct rcu_global_state *rgs, struct rcu_cpu_state *rcs, int cpu) { int global_state; unsigned seq; BUG_ON(!irqs_disabled()); /* task 1: * Do the work that the cpu is still supposed to do. * We rely on the lock inside the rcu_cpumask, that guarantees that * we neither do too much nor too little. * But do not raise the softirq, the caller is responsible handling * the entries stil in the queues. */ global_state = rcu_cpumask_removecpu(&rgs->cpus, cpu); /* * ensure that we are not in the middle of updating * rcu_cpumask_getstate(&rgs->cpus): otherwise __rcu_state_machine() * would return with "nothing to do", although * the cpu must do something. */ do { seq = read_seqbegin(&rgs->lock); } while (read_seqretry(&rgs->lock, seq)); __rcu_state_machine(rgs, rcs, global_state, 1, 0, cpu); } #endif #ifdef CONFIG_HOTPLUG_CPU /** * rcu_bulk_add - bulk add new rcu objects. * @rgs: global rcu state * @rcs: cpu state * @h: linked list of rcu objects. * * Must be called with enabled local interrupts */ static void rcu_bulk_add(struct rcu_global_state *rgs, struct rcu_cpu_state *rcs, struct rcu_head *h, struct rcu_head **htail, int len) { BUG_ON(irqs_disabled()); if (len > 0) { local_irq_disable(); if (rcs->new == NULL) { rcs->new = h; } else { (*rcs->newtail) = h; } rcs->newtail = htail; rcu_checkqlen(rgs, rcs, len); local_irq_enable(); } } static void __rcu_offline_cpu(struct rcu_global_state *rgs, struct rcu_cpu_state *this_rcs, struct rcu_cpu_state *other_rcs, int cpu) { /* * task 1: Do the work that the other cpu is still supposed to do. */ __rcu_remove_cpu(rgs, other_rcs, cpu); per_cpu(rcu_cpumode, cpu) = RCU_CPUMODE_INVALID; /* task 2: move all entries from the new cpu into the lists of the current cpu. * locking: The other cpu is dead, thus no locks are required. * Thus it's more or less a bulk call_rcu(). * For the sake of simplicity, all objects are treated as "new", even the objects * that are already in old. */ rcu_bulk_add(rgs, this_rcs, other_rcs->new, other_rcs->newtail, other_rcs->newqlen); rcu_bulk_add(rgs, this_rcs, other_rcs->old, other_rcs->oldtail, other_rcs->oldqlen); } static void rcu_offline_cpu(int cpu) { struct rcu_cpu_state *this_rcs_normal = &get_cpu_var(rcu_cpudata_normal); struct rcu_cpu_state *this_rcs_bh = &get_cpu_var(rcu_cpudata_bh); struct rcu_cpu_dead *this_rcd, *other_rcd; BUG_ON(irqs_disabled()); /* step 1: move new & old lists, clear cpu bitmask */ __rcu_offline_cpu(&rcu_global_state_normal, this_rcs_normal, &per_cpu(rcu_cpudata_normal, cpu), cpu); __rcu_offline_cpu(&rcu_global_state_bh, this_rcs_bh, &per_cpu(rcu_cpudata_bh, cpu), cpu); put_cpu_var(rcu_cpudata_normal); put_cpu_var(rcu_cpudata_bh); /* step 2: move dead list */ this_rcd = &get_cpu_var(rcu_cpudata_dead); other_rcd = &per_cpu(rcu_cpudata_dead, cpu); if (other_rcd->dead != NULL) { local_irq_disable(); if (this_rcd->dead == NULL) { this_rcd->dead = other_rcd->dead; } else { (*this_rcd->deadtail) = other_rcd->dead; } this_rcd->deadtail = other_rcd->deadtail; this_rcd->deadqlen += other_rcd->deadqlen; local_irq_enable(); } put_cpu_var(rcu_cpudata_dead); BUG_ON(rcu_needs_cpu(cpu)); } #else static void rcu_offline_cpu(int cpu) { } #endif static int __rcu_pending(struct rcu_global_state *rgs, struct rcu_cpu_state *rcs) { /* * This cpu must do something for the state machine. */ if (rcu_cpumask_getstate(&rgs->cpus) != rcs->state) return 1; /* * The state machine is stopped and the current * cpu has outstanding rcu callbacks */ if (rcs->state == RCU_STATE_DESTROY && rcs->newqlen) return 1; return 0; } /** * void rcu_pending(int cpu) - check for pending rcu related work. * @cpu: cpu to check. * * Check to see if there is any immediate RCU-related work to be done * by the current CPU, returning 1 if so. This function is part of the * RCU implementation; it is -not- an exported member of the RCU API. * * This function is inherently racy: If it returns 1, then there is something * to do. If it return 0, then there was nothing to do. It's possible that * by the time rcu_pending returns, there is now something to do. * */ int rcu_pending(int cpu) { return __rcu_pending(&rcu_global_state_normal, &per_cpu(rcu_cpudata_normal, cpu)) || __rcu_pending(&rcu_global_state_bh, &per_cpu(rcu_cpudata_bh, cpu)); } static int __rcu_needs_cpu(struct rcu_global_state *rgs, struct rcu_cpu_state *rcs) { if (rcs->new) return 1; if (rcs->old) return 1; return 0; } /** * void rcu_needs_cpu(cpu) - check for outstanding rcu work. * @cpu: cpu to check. * * Check to see if any future RCU-related work will need to be done * by @cpu, even if none need be done immediately, returning * 1 if so. This function is part of the RCU implementation; it is -not- * an exported member of the RCU API. * * Locking only works properly if the function is called for the current * cpu and with disabled local interupts. It's a prerequisite for * rcu_nohz_enter() that rcu_needs_cpu() return 0. Local interupts must not * be enabled in between, otherwise a softirq could call call_rcu(). * * Note: rcu_needs_cpu() can be 0 (cpu not needed) even though rcu_pending() * return 1. This means that the outstanding work can be completed by either * the CPU_DEAD callback or rcu_enter_nohz(). */ int rcu_needs_cpu(int cpu) { int ret; BUG_ON(!irqs_disabled()); ret = __rcu_needs_cpu(&rcu_global_state_normal, &per_cpu(rcu_cpudata_normal, cpu)) || __rcu_needs_cpu(&rcu_global_state_bh, &per_cpu(rcu_cpudata_bh, cpu)) || (per_cpu(rcu_cpudata_dead, cpu).deadqlen > 0); printk(KERN_ERR" rcu_needs cpu %d: %d.\n", cpu, ret); return ret; } /** * rcu_check_callback(cpu, user) - external entry point for grace checking * @cpu: cpu id. * @user: user space was interrupted. * * Top-level function driving RCU grace-period detection, normally * invoked from the scheduler-clock interrupt. This function simply * increments counters that are read only from softirq by this same * CPU, so there are no memory barriers required. * * This function can run with disabled local interrupts, thus all * callees must use local_irq_save() */ void rcu_check_callbacks(int cpu, int user) { if (user || (idle_cpu(cpu) && !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { /* * Get here if this CPU took its interrupt from user * mode or from the idle loop, and if this is not a * nested interrupt. In this case, the CPU is in * a quiescent state, so count it. * */ rcu_state_machine(&rcu_global_state_normal, &per_cpu(rcu_cpudata_normal, cpu), 1, cpu); rcu_state_machine(&rcu_global_state_bh, &per_cpu(rcu_cpudata_bh, cpu), 1, cpu); } else if (!in_softirq()) { /* * Get here if this CPU did not take its interrupt from * softirq, in other words, if it is not interrupting * a rcu_bh read-side critical section. This is an _bh * critical section, so count it. */ rcu_state_machine(&rcu_global_state_normal, &per_cpu(rcu_cpudata_normal, cpu), 0, cpu); rcu_state_machine(&rcu_global_state_bh, &per_cpu(rcu_cpudata_bh, cpu), 1, cpu); } else { /* * We are interrupting something. Nevertheless - check if we should collect * rcu objects. This can be done from arbitrary context. */ rcu_state_machine(&rcu_global_state_normal, &per_cpu(rcu_cpudata_normal, cpu), 0, cpu); rcu_state_machine(&rcu_global_state_bh, &per_cpu(rcu_cpudata_bh, cpu), 0, cpu); } } /* * Invoke the completed RCU callbacks. */ static void rcu_do_batch(struct rcu_cpu_dead *rcd) { struct rcu_head *list; int i, count; if (!rcd->deadqlen) return; /* step 1: pull up to rcs->batchcount objects */ BUG_ON(irqs_disabled()); local_irq_disable(); if (rcd->deadqlen > rcd->batchcount) { struct rcu_head *walk; list = rcd->dead; count = rcd->batchcount; walk = rcd->dead; for (i=0;inext; rcd->dead = walk; } else { list = rcd->dead; count = rcd->deadqlen; rcd->dead = NULL; rcd->deadtail = NULL; } rcd->deadqlen -= count; BUG_ON(rcd->deadqlen < 0); local_irq_enable(); /* step 2: call the rcu callbacks */ for (i=0;inext; prefetch(next); list->func(list); list = next; } /* step 3: if still entries left, raise the softirq again */ if (rcd->deadqlen) raise_softirq(RCU_SOFTIRQ); } static void rcu_process_callbacks(struct softirq_action *unused) { rcu_do_batch(&get_cpu_var(rcu_cpudata_dead)); put_cpu_var(rcu_cpudata_dead); } static void __rcu_add_cpu(struct rcu_global_state *rgs, struct rcu_cpu_state *rcs, int cpu) { rcs->state = rcu_cpumask_addcpu(&rgs->cpus, cpu); } #ifdef CONFIG_NO_HZ void rcu_enter_nohz(void) { int cpu = smp_processor_id(); int *pmode; /* * call_rcu() between rcu_needs_cpu and rcu_enter_nohz() are * not permitted. * Thus both must be called with disabled local interrupts, * without enabling the interrupts in between. * * Note: disabling interrupts only prevents call_rcu(). * it can obviously happen that another cpu forwards * the state machine. That doesn't hurt: __rcu_remove_cpu() * the the work that we need to do. */ BUG_ON(!irqs_disabled()); pmode = &get_cpu_var(rcu_cpumode); BUG_ON(*pmode != RCU_CPUMODE_DELAYED); *pmode = 0; put_cpu_var(rcu_cpumode); __rcu_remove_cpu(&rcu_global_state_normal, &get_cpu_var(rcu_cpudata_normal), cpu); put_cpu_var(rcu_cpudata_normal); __rcu_remove_cpu(&rcu_global_state_bh, &get_cpu_var(rcu_cpudata_bh), cpu); put_cpu_var(rcu_cpudata_bh); BUG_ON(rcu_needs_cpu(cpu)); printk(KERN_ERR" enter_nohz %d.\n", cpu); } void rcu_exit_nohz(void) { int cpu = smp_processor_id(); int *pmode; BUG_ON(!irqs_disabled()); pmode = &get_cpu_var(rcu_cpumode); BUG_ON(*pmode != 0); *pmode = RCU_CPUMODE_DELAYED; put_cpu_var(rcu_cpumode); __rcu_add_cpu(&rcu_global_state_normal, &get_cpu_var(rcu_cpudata_normal), cpu); put_cpu_var(rcu_cpudata_normal); __rcu_add_cpu(&rcu_global_state_bh, &get_cpu_var(rcu_cpudata_bh), cpu); put_cpu_var(rcu_cpudata_bh); printk(KERN_ERR" exit_nohz %d.\n", cpu); } void rcu_irq_enter(void) { int *pmode; BUG_ON(!irqs_disabled()); pmode = &get_cpu_var(rcu_cpumode); if (unlikely(*pmode != RCU_CPUMODE_DELAYED)) { printk(KERN_ERR" irq enter %d, %d.\n", smp_processor_id(), *pmode); /* FIXME: * This code is not NMI safe. especially: * __rcu_add_cpu acquires spinlocks. */ if (*pmode == 0) { int cpu = smp_processor_id(); __rcu_add_cpu(&rcu_global_state_normal,&get_cpu_var(rcu_cpudata_normal), cpu); put_cpu_var(rcu_cpudata_normal); __rcu_add_cpu(&rcu_global_state_bh,&get_cpu_var(rcu_cpudata_bh), cpu); put_cpu_var(rcu_cpudata_bh); } (*pmode)++; } put_cpu_var(rcu_cpumode); } void rcu_irq_exit(void) { int *pmode; BUG_ON(!irqs_disabled()); pmode = &get_cpu_var(rcu_cpumode); if (unlikely(*pmode != RCU_CPUMODE_DELAYED)) { printk(KERN_ERR" irq exit %d, %d.\n", smp_processor_id(), *pmode); (*pmode)--; if (*pmode == 0) { int cpu = smp_processor_id(); /* FIXME: * This code is not NMI safe. especially: * __rcu_remove_cpu acquires spinlocks. */ /* * task 1: remove us from the list of cpus that might be inside critical * sections and inform the global state machine that we are outside * any read side critical sections. */ __rcu_remove_cpu(&rcu_global_state_normal,&per_cpu(rcu_cpudata_normal, cpu), cpu); __rcu_remove_cpu(&rcu_global_state_bh,&per_cpu(rcu_cpudata_bh, cpu), cpu); if (rcu_needs_cpu(cpu)) { /* * task 2: Someone did a call_rcu() in the interupt. * Duh, we've lost. Force a reschedule, that leaves nohz mode. * FIXME: double check that this really works. * * Note: This can race: our call_rcu() might have set * start_immediately. But: that start might happen before * we readd ourself to the global cpu mask. Then we would * not take part in the global cycle - and we would not set * start_immediately again, either. The timeout would * ensure forward progress, thus it's not that bad. */ printk(KERN_ERR" irq exit %d - need resched .\n", cpu); set_need_resched(); } } } } #endif /* CONFIG_NO_HZ */ static void rcu_init_percpu_data(struct rcu_global_state *rgs, struct rcu_cpu_state *rcs, int cpu) { __rcu_add_cpu(rgs, rcs, cpu); rcs->new = rcs->old = NULL; rcs->newqlen = rcs->oldqlen = 0; } static void __cpuinit rcu_online_cpu(int cpu) { rcu_init_percpu_data(&rcu_global_state_normal, &per_cpu(rcu_cpudata_normal, cpu), cpu); rcu_init_percpu_data(&rcu_global_state_bh, &per_cpu(rcu_cpudata_bh, cpu), cpu); per_cpu(rcu_cpumode, cpu) = RCU_CPUMODE_DELAYED; per_cpu(rcu_cpudata_dead, cpu).dead = NULL; per_cpu(rcu_cpudata_dead, cpu).deadqlen = 0; per_cpu(rcu_cpudata_dead, cpu).batchcount = RCU_BATCH_MIN; open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); } static int __cpuinit rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: rcu_online_cpu(cpu); break; case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: /* * During CPU_UP_PREPARE, the cpu is fully accounted for * and added into the rcu_cpumask. Thus it must be properly * removed if the CPU_UP failed. * Therefore CPU_UP_CANCELED is equivalent to CPU_DEAD. */ /* fall-through */ case CPU_DEAD: case CPU_DEAD_FROZEN: rcu_offline_cpu(cpu); break; default: break; } return NOTIFY_OK; } static struct notifier_block __cpuinitdata rcu_nb = { .notifier_call = rcu_cpu_notify, }; /* * Initializes rcu mechanism. Assumed to be called early. * That is before local timer(SMP) or jiffie timer (uniproc) is setup. * Note that rcu_qsctr and friends are implicitly * initialized due to the choice of ``0'' for RCU_CTR_INVALID. */ void __init __rcu_init(void) { rcu_cpumask_init(&rcu_global_state_normal.cpus, RCU_STATE_DESTROY, 0); rcu_cpumask_init(&rcu_global_state_bh.cpus, RCU_STATE_DESTROY, 0); rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)smp_processor_id()); /* Register notifier for non-boot CPUs */ register_cpu_notifier(&rcu_nb); } module_param(qlowmark, int, 0);