[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20120803185146.GE2474@linux.vnet.ibm.com>
Date: Fri, 3 Aug 2012 11:51:46 -0700
From: "Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
To: Frederic Weisbecker <fweisbec@...il.com>
Cc: LKML <linux-kernel@...r.kernel.org>,
Alessio Igor Bogani <abogani@...nel.org>,
Andrew Morton <akpm@...ux-foundation.org>,
Avi Kivity <avi@...hat.com>,
Chris Metcalf <cmetcalf@...era.com>,
Christoph Lameter <cl@...ux.com>,
Geoff Levand <geoff@...radead.org>,
Gilad Ben Yossef <gilad@...yossef.com>,
Hakan Akkan <hakanakkan@...il.com>,
"H. Peter Anvin" <hpa@...or.com>, Ingo Molnar <mingo@...nel.org>,
Kevin Hilman <khilman@...com>,
Max Krasnyansky <maxk@...lcomm.com>,
Peter Zijlstra <peterz@...radead.org>,
Stephen Hemminger <shemminger@...tta.com>,
Steven Rostedt <rostedt@...dmis.org>,
Sven-Thorsten Dietrich <thebigcorporation@...il.com>,
Thomas Gleixner <tglx@...utronix.de>
Subject: Re: [PATCH 1/5] code_domain: New code domain tracking susbsystem
On Fri, Aug 03, 2012 at 05:02:21PM +0200, Frederic Weisbecker wrote:
> Create a new subsystem that handles the probing on kernel
> boundaries to keep track of the transitions between code domains
> with two basic initial domains: user or kernel.
>
> This is an abstraction of some RCU code that use it to implement
> its userspace extended quiescent state.
>
> We need to pull this up from RCU into this new level of indirection
> because this probing is also going to be used to implement an "on
> demand" generic virtual cputime accounting. A necessary step to
> shutdown the tick while still accounting the cputime.
>From an RCU viewpoint:
Reviewed-by: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>
> Signed-off-by: Frederic Weisbecker <fweisbec@...il.com>
> Cc: Alessio Igor Bogani <abogani@...nel.org>
> Cc: Andrew Morton <akpm@...ux-foundation.org>
> Cc: Avi Kivity <avi@...hat.com>
> Cc: Chris Metcalf <cmetcalf@...era.com>
> Cc: Christoph Lameter <cl@...ux.com>
> Cc: Geoff Levand <geoff@...radead.org>
> Cc: Gilad Ben Yossef <gilad@...yossef.com>
> Cc: Hakan Akkan <hakanakkan@...il.com>
> Cc: H. Peter Anvin <hpa@...or.com>
> Cc: Ingo Molnar <mingo@...nel.org>
> Cc: Kevin Hilman <khilman@...com>
> Cc: Max Krasnyansky <maxk@...lcomm.com>
> Cc: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>
> Cc: Peter Zijlstra <peterz@...radead.org>
> Cc: Stephen Hemminger <shemminger@...tta.com>
> Cc: Steven Rostedt <rostedt@...dmis.org>
> Cc: Sven-Thorsten Dietrich <thebigcorporation@...il.com>
> Cc: Thomas Gleixner <tglx@...utronix.de>
> ---
> arch/Kconfig | 12 +++---
> arch/x86/Kconfig | 2 +-
> arch/x86/include/asm/{rcu.h => code_domain.h} | 12 +++---
> arch/x86/kernel/ptrace.c | 6 +-
> arch/x86/kernel/signal.c | 5 +-
> arch/x86/kernel/traps.c | 2 +-
> arch/x86/mm/fault.c | 2 +-
> include/linux/code_domain.h | 18 ++++++++
> include/linux/rcupdate.h | 2 -
> include/linux/sched.h | 8 ---
> init/Kconfig | 24 ++++++----
> kernel/Makefile | 1 +
> kernel/code_domain_tracking.c | 59 +++++++++++++++++++++++++
> kernel/rcutree.c | 42 +-----------------
> kernel/sched/core.c | 9 ++--
> 15 files changed, 121 insertions(+), 83 deletions(-)
> rename arch/x86/include/asm/{rcu.h => code_domain.h} (53%)
> create mode 100644 include/linux/code_domain.h
> create mode 100644 kernel/code_domain_tracking.c
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index d891c62..2ce2a2f 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -277,14 +277,14 @@ config SECCOMP_FILTER
> config HAVE_VIRT_CPU_ACCOUNTING
> bool
>
> -config HAVE_RCU_USER_QS
> +config HAVE_CODE_DOMAIN_TRACKING
> bool
> help
> - Provide kernel entry/exit hooks necessary for userspace
> + Provide kernel boundaries probing necessary for userspace
> RCU extended quiescent state. Syscalls need to be wrapped inside
> - rcu_user_exit()-rcu_user_enter() through the slow path using
> - TIF_NOHZ flag. Exceptions handlers must be wrapped as well. Irqs
> - are already protected inside rcu_irq_enter/rcu_irq_exit() but
> - preemption or signal handling on irq exit still need to be protected.
> + user_exit()-user_enter() through the slow path using TIF_NOHZ flag.
> + Exceptions handlers must be wrapped as well. Irqs are already
> + protected inside rcu_irq_enter/rcu_irq_exit() but preemption or
> + signal handling on irq exit still need to be protected.
>
> source "kernel/gcov/Kconfig"
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 38dfcc2..cc9bf3e 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -95,7 +95,7 @@ config X86
> select KTIME_SCALAR if X86_32
> select GENERIC_STRNCPY_FROM_USER
> select GENERIC_STRNLEN_USER
> - select HAVE_RCU_USER_QS if X86_64
> + select HAVE_CODE_DOMAIN_TRACKING if X86_64
>
> config INSTRUCTION_DECODER
> def_bool (KPROBES || PERF_EVENTS || UPROBES)
> diff --git a/arch/x86/include/asm/rcu.h b/arch/x86/include/asm/code_domain.h
> similarity index 53%
> rename from arch/x86/include/asm/rcu.h
> rename to arch/x86/include/asm/code_domain.h
> index 439815b..e245152 100644
> --- a/arch/x86/include/asm/rcu.h
> +++ b/arch/x86/include/asm/code_domain.h
> @@ -1,19 +1,19 @@
> -#ifndef _ASM_X86_RCU_H
> -#define _ASM_X86_RCU_H
> +#ifndef _ASM_X86_CODE_DOMAIN_H
> +#define _ASM_X86_CODE_DOMAIN_H
>
> -#include <linux/rcupdate.h>
> +#include <linux/code_domain.h>
> #include <asm/ptrace.h>
>
> static inline void exception_enter(struct pt_regs *regs)
> {
> - rcu_user_exit();
> + user_exit();
> }
>
> static inline void exception_exit(struct pt_regs *regs)
> {
> -#ifdef CONFIG_RCU_USER_QS
> +#ifdef CONFIG_CODE_DOMAIN_TRACKING
> if (user_mode(regs))
> - rcu_user_enter();
> + user_enter();
> #endif
> }
>
> diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
> index 9f94f8e..5bc2e50 100644
> --- a/arch/x86/kernel/ptrace.c
> +++ b/arch/x86/kernel/ptrace.c
> @@ -21,7 +21,7 @@
> #include <linux/signal.h>
> #include <linux/perf_event.h>
> #include <linux/hw_breakpoint.h>
> -#include <linux/rcupdate.h>
> +#include <linux/code_domain.h>
>
> #include <asm/uaccess.h>
> #include <asm/pgtable.h>
> @@ -1464,7 +1464,7 @@ long syscall_trace_enter(struct pt_regs *regs)
> {
> long ret = 0;
>
> - rcu_user_exit();
> + user_exit();
>
> /*
> * If we stepped into a sysenter/syscall insn, it trapped in
> @@ -1530,5 +1530,5 @@ void syscall_trace_leave(struct pt_regs *regs)
> if (step || test_thread_flag(TIF_SYSCALL_TRACE))
> tracehook_report_syscall_exit(regs, step);
>
> - rcu_user_enter();
> + user_enter();
> }
> diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
> index 5cc2579..fc3e12c 100644
> --- a/arch/x86/kernel/signal.c
> +++ b/arch/x86/kernel/signal.c
> @@ -19,6 +19,7 @@
> #include <linux/uaccess.h>
> #include <linux/user-return-notifier.h>
> #include <linux/uprobes.h>
> +#include <linux/code_domain.h>
>
> #include <asm/processor.h>
> #include <asm/ucontext.h>
> @@ -776,7 +777,7 @@ static void do_signal(struct pt_regs *regs)
> void
> do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
> {
> - rcu_user_exit();
> + user_exit();
>
> #ifdef CONFIG_X86_MCE
> /* notify userspace of pending MCEs */
> @@ -804,7 +805,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
> clear_thread_flag(TIF_IRET);
> #endif /* CONFIG_X86_32 */
>
> - rcu_user_enter();
> + user_enter();
> }
>
> void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index 9b8195b..2d1fe02 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -52,7 +52,7 @@
> #include <asm/i387.h>
> #include <asm/fpu-internal.h>
> #include <asm/mce.h>
> -#include <asm/rcu.h>
> +#include <asm/code_domain.h>
>
> #include <asm/mach_traps.h>
>
> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
> index 7dde46d..be026ea 100644
> --- a/arch/x86/mm/fault.c
> +++ b/arch/x86/mm/fault.c
> @@ -18,7 +18,7 @@
> #include <asm/pgalloc.h> /* pgd_*(), ... */
> #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
> #include <asm/fixmap.h> /* VSYSCALL_START */
> -#include <asm/rcu.h> /* exception_enter(), ... */
> +#include <asm/code_domain.h> /* exception_enter(), ... */
>
> /*
> * Page fault error code bits:
> diff --git a/include/linux/code_domain.h b/include/linux/code_domain.h
> new file mode 100644
> index 0000000..5d4513d
> --- /dev/null
> +++ b/include/linux/code_domain.h
> @@ -0,0 +1,18 @@
> +#ifndef _LINUX_CODE_DOMAIN_TRACKING_H
> +#define _LINUX_CODE_DOMAIN_TRACKING_H
> +
> +#ifdef CONFIG_CODE_DOMAIN_TRACKING
> +#include <linux/sched.h>
> +
> +extern void user_enter(void);
> +extern void user_exit(void);
> +extern void code_domain_task_switch(struct task_struct *prev,
> + struct task_struct *next);
> +#else
> +static inline void user_enter(void) { }
> +static inline void user_exit(void) { }
> +static inline void code_domain_task_switch(struct task_struct *prev,
> + struct task_struct *next) { }
> +#endif /* !CONFIG_CODE_DOMAIN_TRACKING */
> +
> +#endif
> diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> index 1fc0a0e..e411117 100644
> --- a/include/linux/rcupdate.h
> +++ b/include/linux/rcupdate.h
> @@ -197,8 +197,6 @@ extern void rcu_user_enter(void);
> extern void rcu_user_exit(void);
> extern void rcu_user_enter_irq(void);
> extern void rcu_user_exit_irq(void);
> -extern void rcu_user_hooks_switch(struct task_struct *prev,
> - struct task_struct *next);
> #else
> static inline void rcu_user_enter(void) { }
> static inline void rcu_user_exit(void) { }
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 30105f4..7b7a438 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1899,14 +1899,6 @@ static inline void rcu_copy_process(struct task_struct *p)
>
> #endif
>
> -static inline void rcu_switch(struct task_struct *prev,
> - struct task_struct *next)
> -{
> -#ifdef CONFIG_RCU_USER_QS
> - rcu_user_hooks_switch(prev, next);
> -#endif
> -}
> -
> #ifdef CONFIG_SMP
> extern void do_set_cpus_allowed(struct task_struct *p,
> const struct cpumask *new_mask);
> diff --git a/init/Kconfig b/init/Kconfig
> index cc1d581..e2854a0 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -404,6 +404,19 @@ config AUDIT_LOGINUID_IMMUTABLE
> source "kernel/irq/Kconfig"
> source "kernel/time/Kconfig"
>
> +config CODE_DOMAIN_TRACKING
> + bool
> +
> +config CODE_DOMAIN_TRACKING_FORCE
> + bool "Force kernel boundaries probing"
> + depends on CODE_DOMAIN_TRACKING
> + help
> + Set the probes in user/kernel boundaries by default in order to
> + test the features that rely on it such as userspace RCU extended
> + quiescent states.
> + This test is there for debugging until we have a real user like a
> + full adaptive nohz option.
> +
> menu "RCU Subsystem"
>
> choice
> @@ -456,7 +469,8 @@ config PREEMPT_RCU
>
> config RCU_USER_QS
> bool "Consider userspace as in RCU extended quiescent state"
> - depends on HAVE_RCU_USER_QS && SMP
> + depends on HAVE_CODE_DOMAIN_TRACKING && SMP
> + select CODE_DOMAIN_TRACKING
> help
> This option sets hooks on kernel / userspace boundaries and
> puts RCU in extended quiescent state when the CPU runs in
> @@ -464,14 +478,6 @@ config RCU_USER_QS
> excluded from the global RCU state machine and thus doesn't
> to keep the timer tick on for RCU.
>
> -config RCU_USER_QS_FORCE
> - bool "Force userspace extended QS by default"
> - depends on RCU_USER_QS
> - help
> - Set the hooks in user/kernel boundaries by default in order to
> - test this feature that treats userspace as an extended quiescent
> - state until we have a real user like a full adaptive nohz option.
> -
> config RCU_FANOUT
> int "Tree-based hierarchical RCU fanout value"
> range 2 64 if 64BIT
> diff --git a/kernel/Makefile b/kernel/Makefile
> index c0cc67a..86bc293 100644
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -110,6 +110,7 @@ obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
> obj-$(CONFIG_PADATA) += padata.o
> obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
> obj-$(CONFIG_JUMP_LABEL) += jump_label.o
> +obj-$(CONFIG_CODE_DOMAIN_TRACKING) += code_domain_tracking.o
>
> $(obj)/configs.o: $(obj)/config_data.h
>
> diff --git a/kernel/code_domain_tracking.c b/kernel/code_domain_tracking.c
> new file mode 100644
> index 0000000..8332c76
> --- /dev/null
> +++ b/kernel/code_domain_tracking.c
> @@ -0,0 +1,59 @@
> +#include <linux/code_domain.h>
> +#include <linux/rcupdate.h>
> +#include <linux/sched.h>
> +#include <linux/percpu.h>
> +
> +struct code_domain_tracking {
> + /*
> + * When tracking_active is false, hooks are not
> + * set to minimize overhead: TIF flags are cleared
> + * and calls to user_enter/exit are ignored. This
> + * may be further optimized using static keys.
> + */
> + bool tracking_active;
> + enum {
> + IN_KERNEL = 0,
> + IN_USER,
> + } state;
> +};
> +
> +DEFINE_PER_CPU(struct code_domain_tracking, code_domain) = {
> +#ifdef CONFIG_CODE_DOMAIN_TRACKING_FORCE
> + .tracking_active = true,
> +#endif
> +};
> +
> +void user_enter(void)
> +{
> + unsigned long flags;
> +
> + WARN_ON_ONCE(!current->mm);
> + local_irq_save(flags);
> + if (__this_cpu_read(code_domain.tracking_active) &&
> + __this_cpu_read(code_domain.state) != IN_USER) {
> + __this_cpu_write(code_domain.state, IN_USER);
> + rcu_user_enter();
> + }
> + local_irq_restore(flags);
> +}
> +
> +void user_exit(void)
> +{
> + unsigned long flags;
> +
> + local_irq_save(flags);
> + if (__this_cpu_read(code_domain.state) == IN_USER) {
> + __this_cpu_write(code_domain.state, IN_KERNEL);
> + rcu_user_exit();
> + }
> + local_irq_restore(flags);
> +}
> +
> +void code_domain_task_switch(struct task_struct *prev,
> + struct task_struct *next)
> +{
> + if (__this_cpu_read(code_domain.tracking_active)) {
> + clear_tsk_thread_flag(prev, TIF_NOHZ);
> + set_tsk_thread_flag(next, TIF_NOHZ);
> + }
> +}
> diff --git a/kernel/rcutree.c b/kernel/rcutree.c
> index 318d00e..f6a24cb 100644
> --- a/kernel/rcutree.c
> +++ b/kernel/rcutree.c
> @@ -212,9 +212,6 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
> DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
> .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
> .dynticks = ATOMIC_INIT(1),
> -#if defined(CONFIG_RCU_USER_QS) && !defined(CONFIG_RCU_USER_QS_FORCE)
> - .ignore_user_qs = true,
> -#endif
> };
>
> static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */
> @@ -448,18 +445,7 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
> */
> void rcu_user_enter(void)
> {
> - unsigned long flags;
> - struct rcu_dynticks *rdtp;
> -
> - WARN_ON_ONCE(!current->mm);
> -
> - local_irq_save(flags);
> - rdtp = &__get_cpu_var(rcu_dynticks);
> - if (!rdtp->ignore_user_qs && !rdtp->in_user) {
> - rdtp->in_user = true;
> - rcu_eqs_enter(1);
> - }
> - local_irq_restore(flags);
> + rcu_eqs_enter(1);
> }
> EXPORT_SYMBOL_GPL(rcu_user_enter);
>
> @@ -597,16 +583,7 @@ EXPORT_SYMBOL_GPL(rcu_idle_exit);
> */
> void rcu_user_exit(void)
> {
> - unsigned long flags;
> - struct rcu_dynticks *rdtp;
> -
> - local_irq_save(flags);
> - rdtp = &__get_cpu_var(rcu_dynticks);
> - if (rdtp->in_user) {
> - rdtp->in_user = false;
> - rcu_eqs_exit(1);
> - }
> - local_irq_restore(flags);
> + rcu_eqs_exit(1);
> }
> EXPORT_SYMBOL_GPL(rcu_user_exit);
>
> @@ -730,21 +707,6 @@ int rcu_is_cpu_idle(void)
> }
> EXPORT_SYMBOL(rcu_is_cpu_idle);
>
> -#ifdef CONFIG_RCU_USER_QS
> -void rcu_user_hooks_switch(struct task_struct *prev,
> - struct task_struct *next)
> -{
> - struct rcu_dynticks *rdtp;
> -
> - /* Interrupts are disabled in context switch */
> - rdtp = &__get_cpu_var(rcu_dynticks);
> - if (!rdtp->ignore_user_qs) {
> - clear_tsk_thread_flag(prev, TIF_NOHZ);
> - set_tsk_thread_flag(next, TIF_NOHZ);
> - }
> -}
> -#endif /* #ifdef CONFIG_RCU_USER_QS */
> -
> #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
>
> /*
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 94a4894..64bb370 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -72,6 +72,7 @@
> #include <linux/slab.h>
> #include <linux/init_task.h>
> #include <linux/binfmts.h>
> +#include <linux/code_domain.h>
>
> #include <asm/switch_to.h>
> #include <asm/tlb.h>
> @@ -1925,8 +1926,8 @@ context_switch(struct rq *rq, struct task_struct *prev,
> spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
> #endif
>
> + code_domain_task_switch(prev, next);
> /* Here we just switch the register state and the stack. */
> - rcu_switch(prev, next);
> switch_to(prev, next, prev);
>
> barrier();
> @@ -2920,9 +2921,9 @@ EXPORT_SYMBOL(schedule);
>
> asmlinkage void __sched schedule_user(void)
> {
> - rcu_user_exit();
> + user_exit();
> schedule();
> - rcu_user_enter();
> + user_enter();
> }
>
> /**
> @@ -3026,7 +3027,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
> /* Catch callers which need to be fixed */
> BUG_ON(ti->preempt_count || !irqs_disabled());
>
> - rcu_user_exit();
> + user_exit();
> do {
> add_preempt_count(PREEMPT_ACTIVE);
> local_irq_enable();
> --
> 1.7.5.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists