/* * rcuclassic.c: user-level prototype of hierarchical classic RCU. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Copyright (c) 2008 Paul E. McKenney, IBM Corporation. */ #define CONFIG_RCU_FANOUT 3 #define NR_CPUS 5 /* #define CONFIG_RCU_FANOUT_EXACT */ #include #include "api.h" #include "rcuclassic.h" /* Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. */ #define MAX_RCU_LEVELS 3 #if NR_CPUS <= CONFIG_RCU_FANOUT #define NUM_RCU_LEVELS 1 #define NUM_RCU_LEVEL_1 1 #define NUM_RCU_LEVEL_2 NR_CPUS #define NUM_RCU_LEVEL_3 0 #define NUM_RCU_LEVEL_4 0 #define NUM_RCU_NODES NUM_RCU_LEVEL_1 #elif NR_CPUS <= CONFIG_RCU_FANOUT * CONFIG_RCU_FANOUT #define NUM_RCU_LEVELS 2 #define NUM_RCU_LEVEL_1 1 #define NUM_RCU_LEVEL_2 \ (((NR_CPUS) + (CONFIG_RCU_FANOUT) - 1) / (CONFIG_RCU_FANOUT)) #define NUM_RCU_LEVEL_3 NR_CPUS #define NUM_RCU_LEVEL_4 0 #define NUM_RCU_NODES \ ((NUM_RCU_LEVEL_1) + (NUM_RCU_LEVEL_2)) #elif NR_CPUS <= CONFIG_RCU_FANOUT * CONFIG_RCU_FANOUT * CONFIG_RCU_FANOUT #define NUM_RCU_LEVELS 3 #define RCU_FANOUT_SQ ((CONFIG_RCU_FANOUT) * (CONFIG_RCU_FANOUT)) #define NUM_RCU_LEVEL_1 1 #define NUM_RCU_LEVEL_2 \ (((NR_CPUS) + (RCU_FANOUT_SQ) - 1) / (RCU_FANOUT_SQ)) #define NUM_RCU_LEVEL_3 \ ((NR_CPUS) + (CONFIG_RCU_FANOUT) - 1) / (CONFIG_RCU_FANOUT) #define NUM_RCU_LEVEL_4 NR_CPUS #define NUM_RCU_NODES \ ((NUM_RCU_LEVEL_1) + \ (NUM_RCU_LEVEL_2) + \ (NUM_RCU_LEVEL_3)) #else #error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" #endif /* Data structure definitions. */ /* * Definition for node within the RCU grace-period-detection hierarchy. */ struct rcu_node { spinlock_t lock; long qsmask; /* CPUs or groups that need to switch in */ /* order for current grace period to proceed.*/ long qsmaskinit; /* Per-GP initialization for qsmask. */ int grplo; /* lowest-numbered CPU or group here. */ int grphi; /* highest-numbered CPU or group here. */ char grpnum; /* CPU/group number for next level up. */ char level; /* root is at level 0. */ struct rcu_node *parent; } ____cacheline_internodealigned_in_smp; /* * RCU global state, including node hierarchy. This hierarchy is * represented in "heap" form in a dense array. The root (first level) * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]), * and the third level in ->node[m+1] and following (->node[m+1] referenced * by ->level[2]). The number of levels is determined by the number of * CPUs and by CONFIG_RCU_FANOUT. Small systems will have a "hierarchy" * consisting of a single rcu_node. */ struct rcu_state { struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ struct rcu_node *level[NUM_RCU_LEVELS]; /* Hierarchy levels. */ int levelcnt[MAX_RCU_LEVELS + 1]; /* # nodes in each level. */ int levelspread[NUM_RCU_LEVELS]; /* kids/node in each level. */ /* The following fields are guarded by the root rcu_node's lock. */ char signaled ____cacheline_internodealigned_in_smp; /* sent GP-kick IPIs? */ int gpnum; /* Current gp number. */ int completed; /* # of last completed gp. */ }; #define RCU_STATE_INITIALIZER(name) { \ .node = { { \ .lock = __SPIN_LOCK_UNLOCKED(&name.node[0].lock), \ .qsmask = 0, \ } }, \ .level = { &name.node[0] }, \ .levelcnt = { \ NUM_RCU_LEVEL_1, /* root of hierarchy. */ \ NUM_RCU_LEVEL_2, \ NUM_RCU_LEVEL_3, \ NUM_RCU_LEVEL_4, /* == MAX_RCU_LEVELS */ \ }, \ .gpnum = -300, \ .completed = -300, \ } struct rcu_state rcu_state = RCU_STATE_INITIALIZER(rcu_state); /* @@@ DEFINE_PER_CPU(struct rcu_data, rcu_data); */ struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); /* @@@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); */ static int blimit = 10; static int qhimark = 10000; static int qlowmark = 100; /* * Does the current CPU require a yet-as-unscheduled grace period? */ static inline int cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) { return *rdp->nxttail[RCU_DONE_TAIL] && ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum); } /* * Return the root node of the specified rcu_state structure. */ static inline struct rcu_node *rcu_get_root(struct rcu_state *rsp) { return &rsp->node[0]; } /* * Compute the per-level fanout, either using the exact fanout specified * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. */ #ifdef CONFIG_RCU_FANOUT_EXACT void rcu_init_levelspread(struct rcu_state *rsp) { int i; for (i = NUM_RCU_LEVELS - 1; i >= 0; i--) { levelspread[i] = CONFIG_RCU_FANOUT; } } #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ void rcu_init_levelspread(struct rcu_state *rsp) { int ccur; int cprv; int i; cprv = NR_CPUS; for (i = NUM_RCU_LEVELS - 1; i >= 0; i--) { ccur = rsp->levelcnt[i]; rsp->levelspread[i] = (cprv + ccur - 1) / ccur; cprv = ccur; } } #endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */ /* * When a given CPU first becomes aware of a grace period, it knows * that all of its pre-existing callbacks will be covered by the next * grace period. * * Similarly, if a given CPU has not yet let RCU know that it passed * through a quiescent state for the current grace period, then that * CPU knows that all of its callbacks may safely be invoked at the * end of the next grace period. */ static inline void rcu_next_callbacks_are_ready(struct rcu_data *rdp) { rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; } /* * Update local state to record the newly noticed grace period. */ static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp) { rdp->qs_pending = 1; rdp->passed_quiesc = 0; rdp->gpnum = rsp->gpnum; } /* * Did a new RCU grace period start since we last checked? Update * local state appropriately if so. */ static int check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp) { unsigned long flags; local_irq_save(flags); if (rdp->gpnum != rsp->gpnum) { note_new_gpnum(rsp, rdp); local_irq_restore(flags); return 1; } local_irq_restore(flags); return 0; } /* * Start a new RCU grace period if warranted, re-initializing the hierarchy * in preparation for detecting the next grace period. The caller must hold * the root node's ->lock, which is released before return. Hard irqs must * be disabled. */ static void rcu_start_gp(struct rcu_state *rsp, struct rcu_data *rdp) { struct rcu_node *rnp = rcu_get_root(rsp); struct rcu_node *rnp_cur; struct rcu_node *rnp_end; struct rcu_node *rnp_stack[NUM_RCU_LEVELS]; if (!cpu_needs_another_gp(rsp, rdp)) { /* * Either there is no need to detect any more grace periods * at the moment, or we are already in the process of * detecting one. Either way, we should not start a new * RCU grace period, so drop the lock and exit. */ spin_unlock(&rnp->lock); return; } /* Advance to a new grace period. */ rsp->gpnum++; note_new_gpnum(rsp, rdp); /* * Because we are first, we know that all our callbacks will * be covered by this upcoming grace period, even the ones * that were registered arbitrarily recently. */ rcu_next_callbacks_are_ready(rdp); rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; /* Special-case the common single-level case. */ if (NUM_RCU_NODES == 1) { rnp->qsmask = rnp->qsmaskinit; spin_unlock(&rnp->lock); return; } spin_unlock(&rnp->lock); /* * Set all the quiescent-state-needed bits in all the non-leaf * RCU nodes. This operation relies on the layout of the * hierarchy within the rsp->node[] array. Note that other * CPUs will access only the leaves of the hierarchy, which * still indicate that no grace period is in progress. * * We therefore do not need to hold any locks. Any required * memory barriers will be supplied by the locks guarding the * leaf rcu_nodes in the hierarchy. */ rnp_end = rsp->level[NUM_RCU_LEVELS - 1]; for (rnp_cur = &rsp->node[0]; rnp_cur < rnp_end; rnp_cur++) rnp_cur->qsmask = rnp_cur->qsmaskinit; /* * Now set up the leaf nodes. Here we must be careful. First, * we need to hold the lock in order to exclude other CPUs, which * might be contending for the leaf nodes' locks. Second, as * soon as we initialize a given leaf node, its CPUs might run * up the rest of the hierarchy. Third, CPUs might be coming * online and going offline during this time. We must therefore * acquire locks for each node that we touch during this stage. * * Note that the grace period cannot complete until we finish * the initialization process, as there will be at least one * qsmask bit set in the root node until that time, namely the * one corresponding to this CPU. */ rnp_end = &rsp->node[NUM_RCU_NODES]; rnp_cur = rsp->level[NUM_RCU_LEVELS - 1]; for (; rnp_cur < rnp_end; rnp_cur++) { spin_lock(&rnp_cur->lock); rnp_cur->qsmask = rnp_cur->qsmaskinit; spin_unlock(&rnp_cur->lock); } } /* * Advance this CPU's callbacks after the end of an RCU grace period. */ static void rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp) { long completed_snap; unsigned long flags; local_irq_save(flags); completed_snap = ACCESS_ONCE(rsp->completed); /* Did another grace period end? */ if (rdp->completed != completed_snap) { /* Advance callbacks. No harm if list empty. */ rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL]; rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL]; rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; /* Remember that we saw this grace-period completion. */ rdp->completed = completed_snap; } local_irq_restore(flags); } /* * Record a quiescent state for the specified CPU. Note that a CPU * going offline counts as a quiescent state. */ static void cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) { long mask; struct rcu_node *rnp; rnp = rdp->mynode; spin_lock(&rnp->lock); mask = 1L << (cpu - rnp->grplo); for (;;) { if (!(rnp->qsmask & mask)) { /* Our bit has already been cleared, so done. */ spin_unlock(&rnp->lock); return; } rnp->qsmask &= ~mask; if (rnp->qsmask != 0) { /* Other bits still set at this level, so done. */ spin_unlock(&rnp->lock); return; } mask = 1L << rnp->grpnum; if (rnp->parent == NULL) { /* No more levels. */ break; } spin_unlock(&rnp->lock); rnp = rnp->parent; spin_lock(&rnp->lock); } /* * Get here if we are the last CPU to pass through a quiescent * state for this grace period. Clean up and let rcu_start_gp() * start up the next grace period if one is needed. Note that * we still hold rnp->lock, as required by rcu_start_gp(). */ rsp->completed = rsp->gpnum; /*&&&&*/printf("cpu_quiet: end of grace period detected by %d.\n", rdp->cpu); rcu_process_gp_end(rsp, rdp); rcu_start_gp(rsp, rdp); } /* * Check to see if there is a new grace period of which this CPU * is not yet aware, and if so, set up local rcu_data state for it. * Otherwise, see if this CPU has just passed through its first * quiescent state for this grace period, and record that fact if so. */ static void rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) { /* If there is now a new grace period, record and return. */ if (check_for_new_grace_period(rsp, rdp)) return; /* Did this CPU already do its part for the current grace period? */ if (!rdp->qs_pending) return; /* * Was there a quiescent state since the beginning of the grace * period? If no, then exit and wait for the next call. */ if (!rdp->passed_quiesc) return; /* * Say we did our quiescent state, and set up to process all * currently pending callbacks at the end of the next grace * period. */ rdp->qs_pending = 0; rcu_next_callbacks_are_ready(rdp); cpu_quiet(rdp->cpu, rsp, rdp); /* * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync * during cpu startup. Ignore the quiescent state. @@@ fixed??? */ } #ifdef CONFIG_HOTPLUG_CPU /* * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy * and move all callbacks from the outgoing CPU to the current one. */ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, struct rcu_data *rdp_me) { int i; long mask; struct rcu_node *rnp; /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ rnp = rdp->mynode; spin_lock(&rnp->lock); mask = 1L << (cpu - rnp->grplo); for (;;) { rnp->qsmaskinit &= ~mask; if (rnp->qsmaskinit != 0) { spin_unlock(&rnp->lock); break; } mask = 1L << rnp->grpnum; spin_unlock(&rnp->lock); rnp = rnp->parent; if (rnp == NULL) break; spin_lock(&rnp->lock); } /* Being offline is a quiescent state, so go record it. */ cpu_quiet(cpu, rsp, rdp); /* * Move callbacks from the outgoing CPU to the running CPU. * Note that the outgoing CPU is now quiscent, so it is now * (uncharacteristically) safe to access it rcu_data structure. * Note also that we must carefully retain the order of the * outgoing CPU's callbacks in order for rcu_barrier() to work * correctly. Finally, note that we start all the callbacks * afresh, even those that have passed through a grace period * and are therefore ready to invoke. The theory is that hotplug * events are rare, and that if they are frequent enough to * indefinitely delay callbacks, you have far worse things to * be worrying about. * * We disable irqs to prevent races with call_rcu() invoked * from interrupt handlers. */ if (rdp->nxtlist != NULL) { local_irq_disable(); *rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; rdp->nxtlist = NULL; for (i = 0; i < RCU_NEXT_SIZE; i++) rdp->nxttail[i] = &rdp->nxtlist; local_irq_enable(); } } /* * Remove the specified CPU from the RCU hierarchy and move any pending * callbacks that it might have to the current CPU. This code assumes * that at least one CPU in the system will remain running at all times. * Any attempt to offline -all- CPUs is likely to strand RCU callbacks. */ static void rcu_offline_cpu(int cpu) /* !HOTPLUG_CPU @@@ */ { struct rcu_data *rdp = &per_cpu(rcu_data, cpu); struct rcu_data *rdp_me = &__get_cpu_var(rcu_data); struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu); struct rcu_data *bh_rdp_me = &__get_cpu_var(rcu_bh_data); __rcu_offline_cpu(cpu, &rcu_state, rdp, rdp_me); __rcu_offline_cpu(cpu, &rcu_bh_state, bh_rdp, bh_rdp_me); } #else /* #ifdef CONFIG_HOTPLUG_CPU */ static inline void __rcu_offline_cpu(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) { } #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ /* * Invoke any RCU callbacks that have made it to the end of their grace * period. */ static void rcu_do_batch(struct rcu_data *rdp) { struct rcu_head *next, *list, **tail; int count; /* If no callbacks are ready, just return.*/ if (&rdp->nxtlist == rdp->nxttail[RCU_DONE_TAIL]) return; /* * Extract the list of ready callbacks, disabling to prevent * races with call_rcu() from interrupt handlers. */ local_irq_disable(); list = rdp->nxtlist; rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; *rdp->nxttail[RCU_DONE_TAIL] = NULL; tail = rdp->nxttail[RCU_DONE_TAIL]; for (count = RCU_NEXT_SIZE - 1; count >= 0; count--) if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL]) rdp->nxttail[count] = &rdp->nxtlist; local_irq_enable(); /* Invoke callbacks. */ count = 0; while (list) { next = list->next; prefetch(next); list->func(list); list = next; if (++count >= rdp->blimit) break; } /* Update count, and requeue any remaining callbacks. */ local_irq_disable(); rdp->qlen -= count; if (list != NULL) { *tail = rdp->nxtlist; rdp->nxtlist = list; for (count = 0; count < RCU_NEXT_SIZE; count++) if (&rdp->nxtlist == rdp->nxttail[count]) rdp->nxttail[count] = tail; else break; } local_irq_enable(); /* Reinstate batch limit if we have worked down the excess. */ if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark) rdp->blimit = blimit; /* Re-raise the RCU softirq if there are callbacks remaining. */ if (&rdp->nxtlist == rdp->nxttail[RCU_DONE_TAIL]) raise_rcu_softirq(); } /* * This does the RCU processing work from softirq context. */ static void __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) { /* * Advance callbacks in response to end of earlier grace * period that some other CPU ended. */ rcu_process_gp_end(rsp, rdp); /* Update RCU state based on any recent quiescent states. */ rcu_check_quiescent_state(rsp, rdp); /* Does this CPU require a not-yet-started grace period? */ if (cpu_needs_another_gp(rsp, rdp)) { spin_lock(&rcu_get_root(rsp)->lock); rcu_start_gp(rsp, rdp); /* releases rsp->lock */ } rcu_do_batch(rdp); } static void rcu_process_callbacks(struct softirq_action *unused) { /* * Memory references from any prior RCU read-side critical sections * executed by the interrupted code must be see before any RCU * grace-period manupulations below. */ smp_mb(); /* See above block comment. */ __rcu_process_callbacks(&rcu_state, &__get_cpu_var(rcu_data)); __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); /* * Memory references from any later RCU read-side critical sections * executed by the interrupted code must be see after any RCU * grace-period manupulations above. */ smp_mb(); /* See above block comment. */ } /* * Check to see if there is any immediate RCU-related work to be done * by the current CPU, for the specified type of RCU, returning 1 if so. * The checks are in order of increasing expense: checks that can be * carried out against CPU-local state are performed first. */ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) { /* Check for CPU stalls, if enabled. */ /* @@@ check_cpu_stall(rsp, rdp); @@@ */ /* Is the RCU core waiting for a quiescent state from this CPU? */ if (rdp->qs_pending) return 1; /* Does this CPU have finished callbacks to invoke? */ if (rdp->nxttail[RCU_DONE_TAIL] != &rdp->nxtlist) return 1; /* Are there callbacks waiting for a GP that needs to be started? */ if (cpu_needs_another_gp(rsp, rdp)) return 1; /* Has another RCU grace period has been detected? */ if (ACCESS_ONCE(rsp->completed) != rdp->completed) return 1; /* nothing to do */ return 0; } /* * Check to see if there is any immediate RCU-related work to be done * by the current CPU, returning 1 if so. This function is part of the * RCU implementation; it is -not- an exported member of the RCU API. */ int rcu_pending(int cpu) { return __rcu_pending(&rcu_state, &per_cpu(rcu_data, cpu)) || __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)); } /* * Check to see if this CPU is in a non-context-switch quiescent state * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). * Also schedule the RCU softirq handler. * * This function must be called with hardirqs disabled. It is normally * invoked from the scheduling-clock interrupt. If rcu_pending returns * false, there is no point in invoking rcu_check_callbacks(). */ void rcu_check_callbacks(int cpu, int user) { if (user || (idle_cpu(cpu) && !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { /* * Get here if this CPU took its interrupt from user * mode or from the idle loop, and if this is not a * nested interrupt. In this case, the CPU is in * a quiescent state, so count it. * * Also do a memory barrier. This is needed to handle * the case where writes from a preempt-disable section * of code get reordered into schedule() by this CPU's * write buffer. The memory barrier makes sure that * the rcu_qsctr_inc() and rcu_bh_qsctr_inc() are see * by other CPUs to happen after any such write. */ smp_mb(); /* See above block comment. */ rcu_qsctr_inc(cpu); rcu_bh_qsctr_inc(cpu); } else if (!in_softirq()) { /* * Get here if this CPU did not take its interrupt from * softirq, in other words, if it is not interrupting * a rcu_bh read-side critical section. This is an _bh * critical section, so count it. The memory barrier * is needed for the same reason as is the above one. */ smp_mb(); /* See above block comment. */ rcu_bh_qsctr_inc(cpu); } raise_rcu_softirq(); } static void __call_rcu(struct rcu_head *head, struct rcu_state *rsp, struct rcu_data *rdp) { smp_mb(); /* Ensure RCU update seen before callback registry. */ /* * Opportunistically note grace-period endings and beginnings. * Note that we might see a beginning right after we see an * end, but never vice versa, since this CPU has to pass through * a quiescent state betweentimes. */ rcu_process_gp_end(rsp, rdp); check_for_new_grace_period(rsp, rdp); *rdp->nxttail[RCU_NEXT_TAIL] = head; rdp->nxttail[RCU_NEXT_TAIL] = &head->next; if (unlikely(++rdp->qlen > qhimark)) { rdp->blimit = INT_MAX; /* @@@ force_quiescent_state(rsp, rdp); */ } } /** * call_rcu - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. * @func: actual update function to be invoked after the grace period * * The update function will be invoked some time after a full grace * period elapses, in other words after all currently executing RCU * read-side critical sections have completed. RCU read-side critical * sections are delimited by rcu_read_lock() and rcu_read_unlock(), * and may be nested. */ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { unsigned long flags; head->func = func; head->next = NULL; local_irq_save(flags); __call_rcu(head, &rcu_state, &__get_cpu_var(rcu_data)); local_irq_restore(flags); } /*@@@ EXPORT_SYMBOL_GPL(call_rcu); */ /** * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. * @func: actual update function to be invoked after the grace period * * The update function will be invoked some time after a full grace * period elapses, in other words after all currently executing RCU * read-side critical sections have completed. call_rcu_bh() assumes * that the read-side critical sections end on completion of a softirq * handler. This means that read-side critical sections in process * context must not be interrupted by softirqs. This interface is to be * used when most of the read-side critical sections are in softirq context. * RCU read-side critical sections are delimited by rcu_read_lock() and * rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh() * and rcu_read_unlock_bh(), if in process context. These may be nested. */ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { unsigned long flags; head->func = func; head->next = NULL; local_irq_save(flags); __call_rcu(head, &rcu_bh_state, &__get_cpu_var(rcu_bh_data)); local_irq_restore(flags); } /* @@@ EXPORT_SYMBOL_GPL(call_rcu_bh); */ /* * Initialize a CPU's per-CPU RCU data. We take this "scorched earth" * approach so that we don't have to worry about how long the CPU has * been gone, or whether it ever was online previously. We do trust the * ->mynode field, as it is constant for a given struct rcu_data and * initialized during early boot. * * Note that only one online or offline event can be happening at a given * time. Note also that we can accept some slop in the rsp->completed * access due to the fact that this CPU cannot possibly have any RCU * callbacks in flight yet. */ static void rcu_init_percpu_data(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) { long completed_snap; int i; long mask; struct rcu_node *rnp = rdp->mynode; spin_lock(&rnp->lock); completed_snap = ACCESS_ONCE(rsp->completed); memset(rdp, 0, sizeof(*rdp)); rdp->completed = completed_snap; rdp->gpnum = completed_snap; rdp->passed_quiesc = 1; rdp->qs_pending = 0; rdp->mynode = rnp; for (i = 0; i < RCU_NEXT_SIZE; i++) rdp->nxttail[i] = &rdp->nxtlist; rdp->blimit = /* @@@ blimit */ 10; rdp->cpu = cpu; /* Add CPU to rcu_node bitmasks. */ mask = 1L << (cpu - rnp->grplo); for (;;) { rnp->qsmaskinit |= mask; mask = 1L << rnp->grpnum; spin_unlock(&rnp->lock); rnp = rnp->parent; if ((rnp == NULL) || !!(rnp->qsmaskinit & mask)) break; spin_lock(&rnp->lock); } } static void __cpuinit rcu_online_cpu(int cpu) { struct rcu_data *rdp = &per_cpu(rcu_data, cpu); struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu); rcu_init_percpu_data(cpu, &rcu_state, rdp); rcu_init_percpu_data(cpu, &rcu_bh_state, bh_rdp); /* open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL); @@@ */ } /* * Handle CPU online/offline notifcation events. */ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: rcu_online_cpu(cpu); break; case CPU_DEAD: case CPU_DEAD_FROZEN: rcu_offline_cpu(cpu); break; default: break; } return NOTIFY_OK; } /* * Helper function for rcu_init() that initializes one rcu_state structure. */ static void __init rcu_init_one(struct rcu_state *rsp) { int i; int j; struct rcu_node *rnp; /* Initialize the level-tracking arrays. */ for (i = 1; i < NUM_RCU_LEVELS; i++) { rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; } rcu_init_levelspread(rsp); /* Initialize the elements themselves, starting from the leaves. */ for (i = NUM_RCU_LEVELS - 1; i > 0; i--) { rnp = rsp->level[i]; for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { spin_lock_init(&rnp->lock); rnp->qsmask = rsp->node[0].qsmask; rnp->grplo = j * rsp->levelspread[i]; rnp->grphi = (j + 1) * rsp->levelspread[i] - 1; if (rnp->grphi >= rsp->levelcnt[i + 1]) rnp->grphi = rsp->levelcnt[i + 1] - 1; rnp->qsmaskinit = 0; if (i != NUM_RCU_LEVELS - 1) rnp->grplo = rnp->grphi = 0; rnp->grpnum = j % rsp->levelspread[i - 1]; rnp->level = i; rnp->parent = rsp->level[i - 1] + j / rsp->levelspread[i - 1]; } } /* Initialize the root of the hierarchy. */ rsp->node[0].qsmaskinit = 0; rsp->node[0].grpnum = -1; rsp->signaled = 0; } /* * Helper macro for rcu_init(). To be used nowhere else! * Assigns leaf node pointers into each CPU's rcu_data structure. */ #define RCU_DATA_PTR_INIT(rsp, rcu_data) \ do { \ rnp = (rsp)->level[NUM_RCU_LEVELS - 1]; \ j = 0; \ for_each_possible_cpu(i) { \ if (i > rnp[j].grphi) \ j++; \ per_cpu(rcu_data, i).mynode = &rnp[j]; \ } \ } while (0) static struct notifier_block __cpuinitdata rcu_nb = { .notifier_call = rcu_cpu_notify, }; static void __init rcu_init(void) { int i; /* All used by RCU_DATA_PTR_INIT(). */ int j; struct rcu_node *rnp; rcu_init_one(&rcu_state); RCU_DATA_PTR_INIT(&rcu_state, rcu_data); rcu_init_one(&rcu_bh_state); RCU_DATA_PTR_INIT(&rcu_bh_state, rcu_bh_data); for_each_online_cpu(i) rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)i); #if 0 /* @@@ */ /* Register notifier for non-boot CPUs */ register_cpu_notifier(&rcu_nb); #endif /* @@@ #if 0 */ }