>From 5a6c7ea8b91c21631d47ce767b1aaf5141967a52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Sat, 11 May 2013 18:58:20 +0200 Subject: [PATCH] Revert "rcu: Make RCU_FAST_NO_HZ take advantage of numbered callbacks" This reverts commit c0f4dfd4f90f1667d234d21f15153ea09a2eaa66. Conflicts: kernel/rcutree_plugin.h --- Documentation/kernel-parameters.txt | 28 +-- include/linux/rcupdate.h | 1 - init/Kconfig | 17 +- kernel/rcutree.c | 28 ++- kernel/rcutree.h | 12 +- kernel/rcutree_plugin.h | 374 +++++++++++++++++++++++++---------- kernel/rcutree_trace.c | 2 + 7 files changed, 313 insertions(+), 149 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c3bfacb..424c319 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2561,17 +2561,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. leaf rcu_node structure. Useful for very large systems. - rcutree.jiffies_till_first_fqs= [KNL,BOOT] - Set delay from grace-period initialization to - first attempt to force quiescent states. - Units are jiffies, minimum value is zero, - and maximum value is HZ. - - rcutree.jiffies_till_next_fqs= [KNL,BOOT] - Set delay between subsequent attempts to force - quiescent states. Units are jiffies, minimum - value is one, and maximum value is HZ. - rcutree.qhimark= [KNL,BOOT] Set threshold of queued RCU callbacks over which batch limiting is disabled. @@ -2586,15 +2575,16 @@ bytes respectively. Such letter suffixes can also be entirely omitted. rcutree.rcu_cpu_stall_timeout= [KNL,BOOT] Set timeout for RCU CPU stall warning messages. - rcutree.rcu_idle_gp_delay= [KNL,BOOT] - Set wakeup interval for idle CPUs that have - RCU callbacks (RCU_FAST_NO_HZ=y). + rcutree.jiffies_till_first_fqs= [KNL,BOOT] + Set delay from grace-period initialization to + first attempt to force quiescent states. + Units are jiffies, minimum value is zero, + and maximum value is HZ. - rcutree.rcu_idle_lazy_gp_delay= [KNL,BOOT] - Set wakeup interval for idle CPUs that have - only "lazy" RCU callbacks (RCU_FAST_NO_HZ=y). - Lazy RCU callbacks are those which RCU can - prove do nothing more than free memory. + rcutree.jiffies_till_next_fqs= [KNL,BOOT] + Set delay between subsequent attempts to force + quiescent states. Units are jiffies, minimum + value is one, and maximum value is HZ. rcutorture.fqs_duration= [KNL,BOOT] Set duration of force_quiescent_state bursts. diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 4ccd68e..8e0948c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -80,7 +80,6 @@ extern void do_trace_rcu_torture_read(char *rcutorturename, #define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b)) #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) -#define ulong2long(a) (*(long *)(&(a))) /* Exported common interfaces */ diff --git a/init/Kconfig b/init/Kconfig index 9d3a788..94c4e55 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -579,16 +579,13 @@ config RCU_FAST_NO_HZ depends on NO_HZ_COMMON && SMP default n help - This option permits CPUs to enter dynticks-idle state even if - they have RCU callbacks queued, and prevents RCU from waking - these CPUs up more than roughly once every four jiffies (by - default, you can adjust this using the rcutree.rcu_idle_gp_delay - parameter), thus improving energy efficiency. On the other - hand, this option increases the duration of RCU grace periods, - for example, slowing down synchronize_rcu(). - - Say Y if energy efficiency is critically important, and you - don't care about increased grace-period durations. + This option causes RCU to attempt to accelerate grace periods in + order to allow CPUs to enter dynticks-idle state more quickly. + On the other hand, this option increases the overhead of the + dynticks-idle checking, thus degrading scheduling latency. + + Say Y if energy efficiency is critically important, and you don't + care about real-time response. Say N if you are unsure. diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 16ea679..0d152ab 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2775,27 +2775,19 @@ static int rcu_pending(int cpu) } /* - * Return true if the specified CPU has any callback. If all_lazy is - * non-NULL, store an indication of whether all callbacks are lazy. - * (If there are no callbacks, all of them are deemed to be lazy.) + * Check to see if any future RCU-related work will need to be done + * by the current CPU, even if none need be done immediately, returning + * 1 if so. */ -static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy) +static int rcu_cpu_has_callbacks(int cpu) { - bool al = true; - bool hc = false; - struct rcu_data *rdp; struct rcu_state *rsp; - for_each_rcu_flavor(rsp) { - rdp = per_cpu_ptr(rsp->rda, cpu); - if (rdp->qlen != rdp->qlen_lazy) - al = false; - if (rdp->nxtlist) - hc = true; - } - if (all_lazy) - *all_lazy = al; - return hc; + /* RCU callbacks either ready or pending? */ + for_each_rcu_flavor(rsp) + if (per_cpu_ptr(rsp->rda, cpu)->nxtlist) + return 1; + return 0; } /* @@ -3014,6 +3006,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; atomic_set(&rdp->dynticks->dynticks, (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); + rcu_prepare_for_idle_init(cpu); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ /* Add CPU to rcu_node bitmasks. */ @@ -3082,6 +3075,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, case CPU_DYING_FROZEN: for_each_rcu_flavor(rsp) rcu_cleanup_dying_cpu(rsp); + rcu_cleanup_after_idle(cpu); break; case CPU_DEAD: case CPU_DEAD_FROZEN: diff --git a/kernel/rcutree.h b/kernel/rcutree.h index da77a8f..e628db5 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -88,13 +88,18 @@ struct rcu_dynticks { int dynticks_nmi_nesting; /* Track NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ #ifdef CONFIG_RCU_FAST_NO_HZ - bool all_lazy; /* Are all CPU's CBs lazy? */ + int dyntick_drain; /* Prepare-for-idle state variable. */ + unsigned long dyntick_holdoff; + /* No retries for the jiffy of failure. */ + struct timer_list idle_gp_timer; + /* Wake up CPU sleeping with callbacks. */ + unsigned long idle_gp_timer_expires; + /* When to wake up CPU (for repost). */ + bool idle_first_pass; /* First pass of attempt to go idle? */ unsigned long nonlazy_posted; /* # times non-lazy CBs posted to CPU. */ unsigned long nonlazy_posted_snap; /* idle-period nonlazy_posted snapshot. */ - unsigned long last_accelerate; - /* Last jiffy CBs were accelerated. */ int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ }; @@ -518,6 +523,7 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, struct rcu_node *rnp); #endif /* #ifdef CONFIG_RCU_BOOST */ static void __cpuinit rcu_prepare_kthreads(int cpu); +static void rcu_prepare_for_idle_init(int cpu); static void rcu_cleanup_after_idle(int cpu); static void rcu_prepare_for_idle(int cpu); static void rcu_idle_count_callbacks_posted(void); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 170814d..76f79fa 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1544,7 +1544,14 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) { *delta_jiffies = ULONG_MAX; - return rcu_cpu_has_callbacks(cpu, NULL); + return rcu_cpu_has_callbacks(cpu); +} + +/* + * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it. + */ +static void rcu_prepare_for_idle_init(int cpu) +{ } /* @@ -1581,6 +1588,16 @@ static void rcu_idle_count_callbacks_posted(void) * * The following three proprocessor symbols control this state machine: * + * RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt + * to satisfy RCU. Beyond this point, it is better to incur a periodic + * scheduling-clock interrupt than to loop through the state machine + * at full power. + * RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are + * optional if RCU does not need anything immediately from this + * CPU, even if this CPU still has RCU callbacks queued. The first + * times through the state machine are mandatory: we need to give + * the state machine a chance to communicate a quiescent state + * to the RCU core. * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted * to sleep in dyntick-idle mode with RCU callbacks pending. This * is sized to be roughly one RCU grace period. Those energy-efficiency @@ -1596,9 +1613,15 @@ static void rcu_idle_count_callbacks_posted(void) * adjustment, they can be converted into kernel config parameters, though * making the state machine smarter might be a better option. */ +#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ +#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ +static int rcu_idle_flushes = RCU_IDLE_FLUSHES; +module_param(rcu_idle_flushes, int, 0644); +static int rcu_idle_opt_flushes = RCU_IDLE_OPT_FLUSHES; +module_param(rcu_idle_opt_flushes, int, 0644); static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY; module_param(rcu_idle_gp_delay, int, 0644); static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; @@ -1607,97 +1630,178 @@ module_param(rcu_idle_lazy_gp_delay, int, 0644); extern int tick_nohz_enabled; /* - * Try to advance callbacks for all flavors of RCU on the current CPU. - * Afterwards, if there are any callbacks ready for immediate invocation, - * return true. + * Does the specified flavor of RCU have non-lazy callbacks pending on + * the specified CPU? Both RCU flavor and CPU are specified by the + * rcu_data structure. */ -static bool rcu_try_advance_all_cbs(void) +static bool __rcu_cpu_has_nonlazy_callbacks(struct rcu_data *rdp) { - bool cbs_ready = false; - struct rcu_data *rdp; - struct rcu_node *rnp; - struct rcu_state *rsp; + return rdp->qlen != rdp->qlen_lazy; +} - for_each_rcu_flavor(rsp) { - rdp = this_cpu_ptr(rsp->rda); - rnp = rdp->mynode; +#ifdef CONFIG_TREE_PREEMPT_RCU - /* - * Don't bother checking unless a grace period has - * completed since we last checked and there are - * callbacks not yet ready to invoke. - */ - if (rdp->completed != rnp->completed && - rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]) - rcu_process_gp_end(rsp, rdp); +/* + * Are there non-lazy RCU-preempt callbacks? (There cannot be if there + * is no RCU-preempt in the kernel.) + */ +static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); - if (cpu_has_callbacks_ready_to_invoke(rdp)) - cbs_ready = true; - } - return cbs_ready; + return __rcu_cpu_has_nonlazy_callbacks(rdp); +} + +#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ + +static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) +{ + return 0; } +#endif /* else #ifdef CONFIG_TREE_PREEMPT_RCU */ + /* - * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready - * to invoke. If the CPU has callbacks, try to advance them. Tell the - * caller to set the timeout based on whether or not there are non-lazy - * callbacks. + * Does any flavor of RCU have non-lazy callbacks on the specified CPU? + */ +static bool rcu_cpu_has_nonlazy_callbacks(int cpu) +{ + return __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_sched_data, cpu)) || + __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_bh_data, cpu)) || + rcu_preempt_cpu_has_nonlazy_callbacks(cpu); +} + +/* + * Allow the CPU to enter dyntick-idle mode if either: (1) There are no + * callbacks on this CPU, (2) this CPU has not yet attempted to enter + * dyntick-idle mode, or (3) this CPU is in the process of attempting to + * enter dyntick-idle mode. Otherwise, if we have recently tried and failed + * to enter dyntick-idle mode, we refuse to try to enter it. After all, + * it is better to incur scheduling-clock interrupts than to spin + * continuously for the same time duration! * - * The caller must have disabled interrupts. + * The delta_jiffies argument is used to store the time when RCU is + * going to need the CPU again if it still has callbacks. The reason + * for this is that rcu_prepare_for_idle() might need to post a timer, + * but if so, it will do so after tick_nohz_stop_sched_tick() has set + * the wakeup time for this CPU. This means that RCU's timer can be + * delayed until the wakeup time, which defeats the purpose of posting + * a timer. */ -int rcu_needs_cpu(int cpu, unsigned long *dj) +int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) { struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - /* Snapshot to detect later posting of non-lazy callback. */ - rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; - + /* Flag a new idle sojourn to the idle-entry state machine. */ + rdtp->idle_first_pass = 1; /* If no callbacks, RCU doesn't need the CPU. */ - if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) { - *dj = ULONG_MAX; + if (!rcu_cpu_has_callbacks(cpu)) { + *delta_jiffies = ULONG_MAX; return 0; } - - /* Attempt to advance callbacks. */ - if (rcu_try_advance_all_cbs()) { - /* Some ready to invoke, so initiate later invocation. */ - invoke_rcu_core(); + if (rdtp->dyntick_holdoff == jiffies) { + /* RCU recently tried and failed, so don't try again. */ + *delta_jiffies = 1; return 1; } - rdtp->last_accelerate = jiffies; - - /* Request timer delay depending on laziness, and round. */ - if (rdtp->all_lazy) { - *dj = round_up(rcu_idle_gp_delay + jiffies, - rcu_idle_gp_delay) - jiffies; + /* Set up for the possibility that RCU will post a timer. */ + if (rcu_cpu_has_nonlazy_callbacks(cpu)) { + *delta_jiffies = round_up(rcu_idle_gp_delay + jiffies, + rcu_idle_gp_delay) - jiffies; } else { - *dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies; + *delta_jiffies = jiffies + rcu_idle_lazy_gp_delay; + *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies; } return 0; } /* - * Prepare a CPU for idle from an RCU perspective. The first major task - * is to sense whether nohz mode has been enabled or disabled via sysfs. - * The second major task is to check to see if a non-lazy callback has - * arrived at a CPU that previously had only lazy callbacks. The third - * major task is to accelerate (that is, assign grace-period numbers to) - * any recently arrived callbacks. + * Handler for smp_call_function_single(). The only point of this + * handler is to wake the CPU up, so the handler does only tracing. + */ +void rcu_idle_demigrate(void *unused) +{ + trace_rcu_prep_idle("Demigrate"); +} + +/* + * Timer handler used to force CPU to start pushing its remaining RCU + * callbacks in the case where it entered dyntick-idle mode with callbacks + * pending. The hander doesn't really need to do anything because the + * real work is done upon re-entry to idle, or by the next scheduling-clock + * interrupt should idle not be re-entered. + * + * One special case: the timer gets migrated without awakening the CPU + * on which the timer was scheduled on. In this case, we must wake up + * that CPU. We do so with smp_call_function_single(). + */ +static void rcu_idle_gp_timer_func(unsigned long cpu_in) +{ + int cpu = (int)cpu_in; + + trace_rcu_prep_idle("Timer"); + if (cpu != smp_processor_id()) + smp_call_function_single(cpu, rcu_idle_demigrate, NULL, 0); + else + WARN_ON_ONCE(1); /* Getting here can hang the system... */ +} + +/* + * Initialize the timer used to pull CPUs out of dyntick-idle mode. + */ +static void rcu_prepare_for_idle_init(int cpu) +{ + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + + rdtp->dyntick_holdoff = jiffies - 1; + setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu); + rdtp->idle_gp_timer_expires = jiffies - 1; + rdtp->idle_first_pass = 1; +} + +/* + * Clean up for exit from idle. Because we are exiting from idle, there + * is no longer any point to ->idle_gp_timer, so cancel it. This will + * do nothing if this timer is not active, so just cancel it unconditionally. + */ +static void rcu_cleanup_after_idle(int cpu) +{ + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + + del_timer(&rdtp->idle_gp_timer); + trace_rcu_prep_idle("Cleanup after idle"); + rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled); +} + +/* + * Check to see if any RCU-related work can be done by the current CPU, + * and if so, schedule a softirq to get it done. This function is part + * of the RCU implementation; it is -not- an exported member of the RCU API. + * + * The idea is for the current CPU to clear out all work required by the + * RCU core for the current grace period, so that this CPU can be permitted + * to enter dyntick-idle mode. In some cases, it will need to be awakened + * at the end of the grace period by whatever CPU ends the grace period. + * This allows CPUs to go dyntick-idle more quickly, and to reduce the + * number of wakeups by a modest integer factor. + * + * Because it is not legal to invoke rcu_process_callbacks() with irqs + * disabled, we do one pass of force_quiescent_state(), then do a + * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked + * later. The ->dyntick_drain field controls the sequencing. * * The caller must have disabled interrupts. */ static void rcu_prepare_for_idle(int cpu) { - struct rcu_data *rdp; + struct timer_list *tp; struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - struct rcu_node *rnp; - struct rcu_state *rsp; int tne; /* Handle nohz enablement switches conservatively. */ tne = ACCESS_ONCE(tick_nohz_enabled); if (tne != rdtp->tick_nohz_enabled_snap) { - if (rcu_cpu_has_callbacks(cpu, NULL)) + if (rcu_cpu_has_callbacks(cpu)) invoke_rcu_core(); /* force nohz to see update. */ rdtp->tick_nohz_enabled_snap = tne; return; @@ -1705,56 +1809,125 @@ static void rcu_prepare_for_idle(int cpu) if (!tne) return; - /* If this is a no-CBs CPU, no callbacks, just return. */ - if (rcu_is_nocb_cpu(cpu)) + /* Adaptive-tick mode, where usermode execution is idle to RCU. */ + if (!is_idle_task(current)) { + rdtp->dyntick_holdoff = jiffies - 1; + if (rcu_cpu_has_nonlazy_callbacks(cpu)) { + trace_rcu_prep_idle("User dyntick with callbacks"); + rdtp->idle_gp_timer_expires = + round_up(jiffies + rcu_idle_gp_delay, + rcu_idle_gp_delay); + } else if (rcu_cpu_has_callbacks(cpu)) { + rdtp->idle_gp_timer_expires = + round_jiffies(jiffies + rcu_idle_lazy_gp_delay); + trace_rcu_prep_idle("User dyntick with lazy callbacks"); + } else { + return; + } + tp = &rdtp->idle_gp_timer; + mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); return; + } /* - * If a non-lazy callback arrived at a CPU having only lazy - * callbacks, invoke RCU core for the side-effect of recalculating - * idle duration on re-entry to idle. + * If this is an idle re-entry, for example, due to use of + * RCU_NONIDLE() or the new idle-loop tracing API within the idle + * loop, then don't take any state-machine actions, unless the + * momentary exit from idle queued additional non-lazy callbacks. + * Instead, repost the ->idle_gp_timer if this CPU has callbacks + * pending. */ - if (rdtp->all_lazy && - rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) { - invoke_rcu_core(); + if (!rdtp->idle_first_pass && + (rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) { + if (rcu_cpu_has_callbacks(cpu)) { + tp = &rdtp->idle_gp_timer; + mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); + } return; } + rdtp->idle_first_pass = 0; + rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1; /* - * If we have not yet accelerated this jiffy, accelerate all - * callbacks on this CPU. + * If there are no callbacks on this CPU, enter dyntick-idle mode. + * Also reset state to avoid prejudicing later attempts. */ - if (rdtp->last_accelerate == jiffies) + if (!rcu_cpu_has_callbacks(cpu)) { + rdtp->dyntick_holdoff = jiffies - 1; + rdtp->dyntick_drain = 0; + trace_rcu_prep_idle("No callbacks"); return; - rdtp->last_accelerate = jiffies; - for_each_rcu_flavor(rsp) { - rdp = per_cpu_ptr(rsp->rda, cpu); - if (!*rdp->nxttail[RCU_DONE_TAIL]) - continue; - rnp = rdp->mynode; - raw_spin_lock(&rnp->lock); /* irqs already disabled. */ - rcu_accelerate_cbs(rsp, rnp, rdp); - raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } -} -/* - * Clean up for exit from idle. Attempt to advance callbacks based on - * any grace periods that elapsed while the CPU was idle, and if any - * callbacks are now ready to invoke, initiate invocation. - */ -static void rcu_cleanup_after_idle(int cpu) -{ - struct rcu_data *rdp; - struct rcu_state *rsp; + /* + * If in holdoff mode, just return. We will presumably have + * refrained from disabling the scheduling-clock tick. + */ + if (rdtp->dyntick_holdoff == jiffies) { + trace_rcu_prep_idle("In holdoff"); + return; + } - if (rcu_is_nocb_cpu(cpu)) + /* Check and update the ->dyntick_drain sequencing. */ + if (rdtp->dyntick_drain <= 0) { + /* First time through, initialize the counter. */ + rdtp->dyntick_drain = rcu_idle_flushes; + } else if (rdtp->dyntick_drain <= rcu_idle_opt_flushes && + !rcu_pending(cpu) && + !local_softirq_pending()) { + /* Can we go dyntick-idle despite still having callbacks? */ + rdtp->dyntick_drain = 0; + rdtp->dyntick_holdoff = jiffies; + if (rcu_cpu_has_nonlazy_callbacks(cpu)) { + trace_rcu_prep_idle("Dyntick with callbacks"); + rdtp->idle_gp_timer_expires = + round_up(jiffies + rcu_idle_gp_delay, + rcu_idle_gp_delay); + } else { + rdtp->idle_gp_timer_expires = + round_jiffies(jiffies + rcu_idle_lazy_gp_delay); + trace_rcu_prep_idle("Dyntick with lazy callbacks"); + } + tp = &rdtp->idle_gp_timer; + mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); + rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; + return; /* Nothing more to do immediately. */ + } else if (--(rdtp->dyntick_drain) <= 0) { + /* We have hit the limit, so time to give up. */ + rdtp->dyntick_holdoff = jiffies; + trace_rcu_prep_idle("Begin holdoff"); + invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ return; - rcu_try_advance_all_cbs(); - for_each_rcu_flavor(rsp) { - rdp = per_cpu_ptr(rsp->rda, cpu); - if (cpu_has_callbacks_ready_to_invoke(rdp)) - invoke_rcu_core(); + } + + /* + * Do one step of pushing the remaining RCU callbacks through + * the RCU core state machine. + */ +#ifdef CONFIG_TREE_PREEMPT_RCU + if (per_cpu(rcu_preempt_data, cpu).nxtlist) { + rcu_preempt_qs(cpu); + force_quiescent_state(&rcu_preempt_state); + } +#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ + if (per_cpu(rcu_sched_data, cpu).nxtlist) { + rcu_sched_qs(cpu); + force_quiescent_state(&rcu_sched_state); + } + if (per_cpu(rcu_bh_data, cpu).nxtlist) { + rcu_bh_qs(cpu); + force_quiescent_state(&rcu_bh_state); + } + + /* + * If RCU callbacks are still pending, RCU still needs this CPU. + * So try forcing the callbacks through the grace period. + */ + if (rcu_cpu_has_callbacks(cpu)) { + trace_rcu_prep_idle("More callbacks"); + invoke_rcu_core(); + } else { + trace_rcu_prep_idle("Callbacks drained"); } } @@ -1862,13 +2035,16 @@ early_initcall(rcu_register_oom_notifier); static void print_cpu_stall_fast_no_hz(char *cp, int cpu) { struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap; + struct timer_list *tltp = &rdtp->idle_gp_timer; + char c; - sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c", - rdtp->last_accelerate & 0xffff, jiffies & 0xffff, - ulong2long(nlpd), - rdtp->all_lazy ? 'L' : '.', - rdtp->tick_nohz_enabled_snap ? '.' : 'D'); + c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.'; + if (timer_pending(tltp)) + sprintf(cp, "drain=%d %c timer=%lu", + rdtp->dyntick_drain, c, tltp->expires - jiffies); + else + sprintf(cp, "drain=%d %c timer not pending", + rdtp->dyntick_drain, c); } #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index cf6c174..93f8e8f 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -46,6 +46,8 @@ #define RCU_TREE_NONCORE #include "rcutree.h" +#define ulong2long(a) (*(long *)(&(a))) + static int r_open(struct inode *inode, struct file *file, const struct seq_operations *op) { -- 1.7.10.4