[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CALCETrW3NZmWEffa+G+c40zcuPcBAzckfn47bLh8-L2G+SfV=g@mail.gmail.com>
Date: Wed, 10 Dec 2014 10:38:41 -0800
From: Andy Lutomirski <luto@...capital.net>
To: Shaohua Li <shli@...com>
Cc: "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
X86 ML <x86@...nel.org>, kernel-team@...com,
"H. Peter Anvin" <hpa@...or.com>, Ingo Molnar <mingo@...hat.com>
Subject: Re: [PATCH 2/3] X86: add a generic API to let vdso code detect
context switch
On Sun, Dec 7, 2014 at 7:03 PM, Shaohua Li <shli@...com> wrote:
> vdso code can't disable preempt, so it can be preempted at any time.
> This makes a challenge to implement specific features. This patch adds a
> generic API to let vdso code detect context switch.
>
> With this patch, every cpu maintains a context switch count. The upper
> bits of the count is the logical cpu id, so the count can't be identical
> for any cpu. The low bits of the count will be increased for each
> context switch. For a x86_64 cpu with 4096 cpus, the context switch will
> be overflowed for 2^(64 - 12) context switch, which is a long time and can be
> ignored. The change of the count in giving time can be used to detect if
> context switch occurs.
Why do you need those high bits? I don't understand how you could
possibly confuse one cpu's count with another's unless you fail to
make sure that you're reading the same address both times.
That being said, I don't like this patch. I'm not sure I have a much
better idea, though. More thoughts in the 0/0 email to follow.
--Andy
>
> Cc: Andy Lutomirski <luto@...capital.net>
> Cc: H. Peter Anvin <hpa@...or.com>
> Cc: Ingo Molnar <mingo@...hat.com>
> Signed-off-by: Shaohua Li <shli@...com>
> ---
> arch/x86/Kconfig | 4 ++++
> arch/x86/include/asm/vdso.h | 34 ++++++++++++++++++++++++++++++++++
> arch/x86/include/asm/vvar.h | 6 ++++++
> arch/x86/kernel/asm-offsets.c | 6 ++++++
> arch/x86/vdso/vclock_gettime.c | 12 ++++++++++++
> arch/x86/vdso/vma.c | 6 ++++++
> kernel/sched/core.c | 5 +++++
> 7 files changed, 73 insertions(+)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 41a503c..4978e31 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -1920,6 +1920,10 @@ config COMPAT_VDSO
> If unsure, say N: if you are compiling your own kernel, you
> are unlikely to be using a buggy version of glibc.
>
> +config VDSO_CS_DETECT
> + def_bool y
> + depends on X86_64
> +
> config CMDLINE_BOOL
> bool "Built-in kernel command line"
> ---help---
> diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
> index 8021bd2..42d6d2c 100644
> --- a/arch/x86/include/asm/vdso.h
> +++ b/arch/x86/include/asm/vdso.h
> @@ -4,6 +4,7 @@
> #include <asm/page_types.h>
> #include <linux/linkage.h>
> #include <linux/init.h>
> +#include <generated/bounds.h>
>
> #ifndef __ASSEMBLER__
>
> @@ -49,6 +50,39 @@ extern const struct vdso_image *selected_vdso32;
>
> extern void __init init_vdso_image(const struct vdso_image *image);
>
> +#ifdef CONFIG_VDSO_CS_DETECT
> +struct vdso_percpu_data {
> + /* layout: | cpu ID | context switch count | */
> + unsigned long cpu_cs_count;
> +} ____cacheline_aligned;
> +
> +struct vdso_data {
> + int dummy;
> + struct vdso_percpu_data vpercpu[0];
> +};
> +extern struct vdso_data vdso_data;
> +
> +#ifdef CONFIG_SMP
> +#define VDSO_CS_COUNT_BITS \
> + (sizeof(unsigned long) * 8 - NR_CPUS_BITS)
> +static inline void vdso_inc_cpu_cs_count(int cpu)
> +{
> + unsigned long cs = vdso_data.vpercpu[cpu].cpu_cs_count;
> + cs++;
> + cs &= (1L << VDSO_CS_COUNT_BITS) - 1;
> + vdso_data.vpercpu[cpu].cpu_cs_count = cs |
> + (((unsigned long)cpu) << VDSO_CS_COUNT_BITS);
> + smp_wmb();
> +}
> +#else
> +static inline void vdso_inc_cpu_cs_count(int cpu)
> +{
> + vdso_data.vpercpu[cpu].cpu_cs_count++;
> + smp_wmb();
> +}
> +#endif
> +#endif
> +
> #endif /* __ASSEMBLER__ */
>
> #endif /* _ASM_X86_VDSO_H */
> diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
> index fcbe621..394ab65 100644
> --- a/arch/x86/include/asm/vvar.h
> +++ b/arch/x86/include/asm/vvar.h
> @@ -47,6 +47,12 @@ extern char __vvar_page;
> DECLARE_VVAR(0, volatile unsigned long, jiffies)
> DECLARE_VVAR(16, int, vgetcpu_mode)
> DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
> +#if defined(CONFIG_VDSO_CS_DETECT) && defined(CONFIG_X86_64)
> +/*
> + * this one needs to be last because it ends with a per-cpu array.
> + */
> +DECLARE_VVAR(320, struct vdso_data, vdso_data)
> +#endif
> /*
> * you must update VVAR_TOTAL_SIZE to reflect all of the variables we're
> * stuffing into the vvar area. Don't change any of the above without
> diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
> index 0ab31a9..7321cdc 100644
> --- a/arch/x86/kernel/asm-offsets.c
> +++ b/arch/x86/kernel/asm-offsets.c
> @@ -17,6 +17,7 @@
> #include <asm/bootparam.h>
> #include <asm/suspend.h>
> #include <asm/vgtod.h>
> +#include <asm/vdso.h>
>
> #ifdef CONFIG_XEN
> #include <xen/interface/xen.h>
> @@ -74,6 +75,11 @@ void common(void) {
> DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
>
> BLANK();
> +#ifdef CONFIG_VDSO_CS_DETECT
> + DEFINE(VVAR_TOTAL_SIZE, ALIGN(320 + sizeof(struct vdso_data)
> + + sizeof(struct vdso_percpu_data) * CONFIG_NR_CPUS, PAGE_SIZE));
> +#else
> DEFINE(VVAR_TOTAL_SIZE,
> ALIGN(128 + sizeof(struct vsyscall_gtod_data), PAGE_SIZE));
> +#endif
> }
> diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
> index 9793322..438b3be 100644
> --- a/arch/x86/vdso/vclock_gettime.c
> +++ b/arch/x86/vdso/vclock_gettime.c
> @@ -17,6 +17,8 @@
> #include <asm/vvar.h>
> #include <asm/unistd.h>
> #include <asm/msr.h>
> +#include <asm/vdso.h>
> +#include <asm/vsyscall.h>
> #include <linux/math64.h>
> #include <linux/time.h>
>
> @@ -289,6 +291,16 @@ notrace static void do_monotonic_coarse(struct timespec *ts)
> } while (unlikely(gtod_read_retry(gtod, seq)));
> }
>
> +#if defined(CONFIG_VDSO_CS_DETECT) && defined(CONFIG_X86_64)
> +notrace unsigned long __vdso_get_context_switch_count(void)
> +{
> + int cpu = __getcpu() & VGETCPU_CPU_MASK;
> +
> + smp_rmb();
> + return VVAR(vdso_data).vpercpu[cpu].cpu_cs_count;
> +}
> +#endif
> +
> notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
> {
> switch (clock) {
> diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
> index fc37067..400e454 100644
> --- a/arch/x86/vdso/vma.c
> +++ b/arch/x86/vdso/vma.c
> @@ -22,6 +22,8 @@
> unsigned int __read_mostly vdso64_enabled = 1;
>
> extern unsigned short vdso_sync_cpuid;
> +
> +DEFINE_VVAR(struct vdso_data, vdso_data);
> #endif
>
> void __init init_vdso_image(const struct vdso_image *image)
> @@ -42,6 +44,10 @@ void __init init_vdso_image(const struct vdso_image *image)
> #if defined(CONFIG_X86_64)
> static int __init init_vdso(void)
> {
> + int cpu;
> + for (cpu =0; cpu < CONFIG_NR_CPUS; cpu++)
> + vdso_inc_cpu_cs_count(cpu);
> +
> init_vdso_image(&vdso_image_64);
>
> #ifdef CONFIG_X86_X32_ABI
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 89e7283..5e4100a 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -2238,6 +2238,11 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
> {
> struct mm_struct *mm = rq->prev_mm;
> long prev_state;
> +#ifdef CONFIG_VDSO_CS_DETECT
> + int cpu = smp_processor_id();
> +
> + vdso_inc_cpu_cs_count(cpu);
> +#endif
>
> rq->prev_mm = NULL;
>
> --
> 1.8.1
>
--
Andy Lutomirski
AMA Capital Management, LLC
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists