[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <7a8b31b1-c597-57d6-2721-2c06388def39@oracle.com>
Date: Wed, 25 Jan 2017 14:26:21 -0500
From: Boris Ostrovsky <boris.ostrovsky@...cle.com>
To: Joao Martins <joao.m.martins@...cle.com>,
linux-kernel@...r.kernel.org, xen-devel@...ts.xenproject.org
Cc: Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>,
"H. Peter Anvin" <hpa@...or.com>, x86@...nel.org,
Juergen Gross <jgross@...e.com>,
Andy Lutomirski <luto@...capital.net>
Subject: Re: [PATCH v1 2/3] x86/xen/time: setup vcpu 0 time info page
On 01/25/2017 12:33 PM, Joao Martins wrote:
> In order to support pvclock vdso on xen we need to setup the time
> info page for vcpu 0 and register the page with Xen using the
> VCPUOP_register_vcpu_time_memory_area hypercall. This hypercall
> will also forcefully update the pvti which will set some of the
> necessary flags for vdso. Afterwards we check if it supports the
> PVCLOCK_TSC_STABLE_BIT flag which is mandatory for having
> vdso/vsyscall support. And if so, it will set the cpu 0 pvti that
> will be later on used when mapping the vdso image.
>
> The xen headers are also updated to include the new hypercall for
> registering the secondary vcpu_time_info struct.
>
> Signed-off-by: Joao Martins <joao.m.martins@...cle.com>
> ---
> Changes since RFC:
> (Comments from Boris and David)
> * Remove Kconfig option
> * Use get_zeroed_page/free/page
> * Remove the hypercall availability check
> * Unregister pvti with arg.addr.v = NULL if stable bit isn't supported.
> (New)
> * Set secondary copy on restore such that it works on migration.
> * Drop global xen_clock variable and stash it locally on
> xen_setup_vsyscall_time_info.
> * WARN_ON(ret) if we fail to unregister the pvti.
> ---
> arch/x86/xen/enlighten.c | 2 ++
> arch/x86/xen/time.c | 51 ++++++++++++++++++++++++++++++++++++++++++++
> arch/x86/xen/xen-ops.h | 1 +
> include/xen/interface/vcpu.h | 28 ++++++++++++++++++++++++
> 4 files changed, 82 insertions(+)
>
> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index 51ef952..15d271d 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -270,6 +270,8 @@ void xen_vcpu_restore(void)
This is called for PV only. What about HVM?
> HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL))
> BUG();
> }
> +
> + xen_setup_vsyscall_time_info(0);
Do we need to tear down time memory area on VCPU suspend?
> }
>
> static void __init xen_banner(void)
> diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
> index 1e69956..e90f703 100644
> --- a/arch/x86/xen/time.c
> +++ b/arch/x86/xen/time.c
> @@ -367,6 +367,56 @@ static const struct pv_time_ops xen_time_ops __initconst = {
> .steal_clock = xen_steal_clock,
> };
>
> +int xen_setup_vsyscall_time_info(int cpu)
> +{
> + struct pvclock_vsyscall_time_info *xen_clock;
> + struct vcpu_register_time_memory_area t;
> + struct pvclock_vcpu_time_info *pvti;
> + unsigned long addr;
> + u8 flags;
> + int ret;
> +
> + addr = get_zeroed_page(GFP_KERNEL);
> + if (!addr)
> + return -ENOMEM;
> +
> + xen_clock = (struct pvclock_vsyscall_time_info *) addr;
> + memset(xen_clock, 0, PAGE_SIZE);
You don't really need addr and there is no reason to memset the page to
zero, given that you got it with get_zeroed_page().
> +
> + t.addr.v = &xen_clock->pvti;
> +
> + ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area,
> + cpu, &t);
> +
> + if (ret) {
> + pr_debug("xen: cannot register vcpu_time_info err %d\n", ret);
pr_warn() would be more appropriate I think. You also have blank line
before 'if'.
> + free_page(addr);
> + return ret;
> + }
> +
> + pvti = &xen_clock->pvti;
> + flags = pvti->flags;
I don't think you need these, given that you only reference flags once
below.
> +
> + if (!(flags & PVCLOCK_TSC_STABLE_BIT)) {
> + t.addr.v = NULL;
> + ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area,
> + cpu, &t);
> + if (!ret)
> + free_page(addr);
> +
> + WARN_ON(ret);
Did someone ask for WARN_ON()? (from your changelog it looks like it
could have been either David or me). I think pr_warn() is sufficient,
just like above.
> + pr_debug("xen: VCLOCK_PVCLOCK not supported\n");
pr_notice()
-boris
> + return -ENOTSUPP;
> + }
> +
> + pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
> + pvclock_set_pvti_cpu0_va(xen_clock);
> +
> + xen_clocksource.archdata.vclock_mode = VCLOCK_PVCLOCK;
> +
> + return 0;
> +}
> +
> static void __init xen_time_init(void)
> {
> int cpu = smp_processor_id();
> @@ -393,6 +443,7 @@ static void __init xen_time_init(void)
> setup_force_cpu_cap(X86_FEATURE_TSC);
>
> xen_setup_runstate_info(cpu);
> + xen_setup_vsyscall_time_info(cpu);
> xen_setup_timer(cpu);
> xen_setup_cpu_clockevents();
>
> diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
> index ac0a2b0..4036d15 100644
> --- a/arch/x86/xen/xen-ops.h
> +++ b/arch/x86/xen/xen-ops.h
> @@ -66,6 +66,7 @@ void __init xen_vmalloc_p2m_tree(void);
> void xen_init_irq_ops(void);
> void xen_setup_timer(int cpu);
> void xen_setup_runstate_info(int cpu);
> +int xen_setup_vsyscall_time_info(int cpu);
> void xen_teardown_timer(int cpu);
> u64 xen_clocksource_read(void);
> void xen_setup_cpu_clockevents(void);
> diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h
> index 98188c8..8da788c 100644
> --- a/include/xen/interface/vcpu.h
> +++ b/include/xen/interface/vcpu.h
> @@ -178,4 +178,32 @@ DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info);
>
> /* Send an NMI to the specified VCPU. @extra_arg == NULL. */
> #define VCPUOP_send_nmi 11
> +
> +/*
> + * Register a memory location to get a secondary copy of the vcpu time
> + * parameters. The master copy still exists as part of the vcpu shared
> + * memory area, and this secondary copy is updated whenever the master copy
> + * is updated (and using the same versioning scheme for synchronisation).
> + *
> + * The intent is that this copy may be mapped (RO) into userspace so
> + * that usermode can compute system time using the time info and the
> + * tsc. Usermode will see an array of vcpu_time_info structures, one
> + * for each vcpu, and choose the right one by an existing mechanism
> + * which allows it to get the current vcpu number (such as via a
> + * segment limit). It can then apply the normal algorithm to compute
> + * system time from the tsc.
> + *
> + * @extra_arg == pointer to vcpu_register_time_info_memory_area structure.
> + */
> +#define VCPUOP_register_vcpu_time_memory_area 13
> +DEFINE_GUEST_HANDLE_STRUCT(vcpu_time_info_t);
> +struct vcpu_register_time_memory_area {
> + union {
> + GUEST_HANDLE(vcpu_time_info_t) h;
> + struct pvclock_vcpu_time_info *v;
> + uint64_t p;
> + } addr;
> +};
> +DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_time_memory_area_t);
> +
> #endif /* __XEN_PUBLIC_VCPU_H__ */
Powered by blists - more mailing lists