[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <ca8c2155-707d-14f0-d818-053e5e57f8bd@virtuozzo.com>
Date: Mon, 28 Aug 2017 10:28:06 +0300
From: Denis Plotnikov <dplotnikov@...tuozzo.com>
To: Paolo Bonzini <pbonzini@...hat.com>,
Thomas Gleixner <tglx@...utronix.de>
Cc: John Stultz <john.stultz@...aro.org>,
Radim Krcmar <rkrcmar@...hat.com>,
kvm list <kvm@...r.kernel.org>, Ingo Molnar <mingo@...hat.com>,
"H. Peter Anvin" <hpa@...or.com>,
lkml <linux-kernel@...r.kernel.org>, x86@...nel.org,
rkagan@...tuozzo.com, den@...tuozzo.com,
Marcelo Tosatti <mtosatti@...hat.com>,
Peter Zijlstra <peterz@...radead.org>
Subject: Re: [PATCH v4 00/10] make L2's kvm-clock stable, get rid of
pvclock_gtod_copy in KVM
On 24.08.2017 11:00, Paolo Bonzini wrote:
> On 23/08/2017 18:02, Paolo Bonzini wrote:
>>
>> More duct tape would have been just:
>>
>> - if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
>> + mode = READ_ONCE(pvclock_gtod_data.clock.vclock_mode);
>> + if (mode != VCLOCK_TSC &&
>> + (mode != VCLOCK_PVCLOCK || !pvclock_nested_virt_magic())
>> return false;
>>
>> - return do_realtime(ts, cycle_now) == VCLOCK_TSC;
>> + switch (mode) {
>> + case VCLOCK_TSC:
>> + return do_realtime_tsc(ts, cycle_now);
>> + case VCLOCK_PVCLOCK:
>> + return do_realtime_pvclock(ts, cycle_now);
>> + }
>>
>> Nested virtualization does need a clocksource change notifier on top,
>> but we can cross that bridge later. Maybe Denis can post just those
>> patches to begin with.
>
> For what it's worth, this is all that's needed (with patches 1-2-3-4-5-7)
> to support kvmclock on top of Hyper-V clock. It's trivial.
>
> Even if we could add paravirtualization magic to KVM live migration, we
> certainly couldn't do that for other hypervisors.
>
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index 5b882cc0c0e9..3bab935b021a 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -46,10 +46,24 @@ static u64 read_hv_clock_tsc(struct clocksource *arg)
> return current_tick;
> }
>
> +static bool read_hv_clock_tsc_with_stamp(struct clocksource *arg,
> + u64 *cycles, u64 *cycles_stamp)
> +{
> + *cycles = __hv_read_tsc_page(tsc_pg, &cycles_stamp);
> +
> + if (*cycles == U64_MAX) {
> + *cycles = rdmsrl(HV_X64_MSR_TIME_REF_COUNT);
> + return false;
> + }
> +
> + return true;
> +}
> +
> static struct clocksource hyperv_cs_tsc = {
> .name = "hyperv_clocksource_tsc_page",
> .rating = 400,
> .read = read_hv_clock_tsc,
> + .read_with_stamp = read_hv_clock_tsc_with_stamp,
> .mask = CLOCKSOURCE_MASK(64),
> .flags = CLOCK_SOURCE_IS_CONTINUOUS,
> };
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 2b58c8c1eeaa..5aff66e9fff7 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -176,9 +176,9 @@ void hyperv_cleanup(void);
> #endif
> #ifdef CONFIG_HYPERV_TSCPAGE
> struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
> -static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
> +static inline u64 __hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg, u64 *cur_tsc)
> {
> - u64 scale, offset, cur_tsc;
> + u64 scale, offset;
> u32 sequence;
>
> /*
> @@ -209,7 +209,7 @@ static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
>
> scale = READ_ONCE(tsc_pg->tsc_scale);
> offset = READ_ONCE(tsc_pg->tsc_offset);
> - cur_tsc = rdtsc_ordered();
> + *cur_tsc = rdtsc_ordered();
>
> /*
> * Make sure we read sequence after we read all other values
> @@ -219,9 +219,14 @@ static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
>
> } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
>
> - return mul_u64_u64_shr(cur_tsc, scale, 64) + offset;
> + return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset;
> }
>
> +static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
> +{
> + u64 cur_tsc;
> + return __hv_read_tsc_page(tsc_pg, &cur_tsc);
> +}
> #else
> static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
> {
>
>
> Denis, could you try redoing patch 7 to use the pvclock_gtod_notifier
> instead of the new one you're adding, and only send that first part? I
> think it's a worthwhile cleanup anyway, so let's start with that.
>
> Paolo
>
Ok, I'll do that
Denis
Powered by blists - more mailing lists