[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.DEB.2.20.1709252322120.2418@nanos>
Date: Tue, 26 Sep 2017 00:00:27 +0200 (CEST)
From: Thomas Gleixner <tglx@...utronix.de>
To: Denis Plotnikov <dplotnikov@...tuozzo.com>
cc: pbonzini@...hat.com, rkrcmar@...hat.com, kvm@...r.kernel.org,
john.stultz@...aro.org, mingo@...hat.com, hpa@...or.com,
linux-kernel@...r.kernel.org, x86@...nel.org, rkagan@...tuozzo.com,
den@...tuozzo.com
Subject: Re: [PATCH v5 1/6] timekeeper: introduce extended clocksource reading
callback
On Wed, 30 Aug 2017, Denis Plotnikov wrote:
> The callback provides extended information about just read
> clocksource value.
>
> It's going to be used in cases when detailed system information
> needed for further time related values calculation, e.g in KVM
> masterclock settings calculation.
>
> Signed-off-by: Denis Plotnikov <dplotnikov@...tuozzo.com>
> ---
> include/linux/clocksource.h | 11 ++++++++++-
> 1 file changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
> index a78cb18..786a522 100644
> --- a/include/linux/clocksource.h
> +++ b/include/linux/clocksource.h
> @@ -48,7 +48,14 @@ struct module;
> * 400-499: Perfect
> * The ideal clocksource. A must-use where
> * available.
> - * @read: returns a cycle value, passes clocksource as argument
> + * @read: returns a cycle value (might be not quite cycles:
> + * see pvclock) passes clocksource as argument
> + * @read_with_stamp: saves cycles value (got from timekeeper) and cycles
> + * stamp (got from hardware counter value and used by
> + * timekeeper to calculate the cycles value) to
> + * corresponding input pointers return true if cycles
> + * stamp holds real cycles and false if it holds some
> + * time derivative value
Neither the changelog nor this comment make any sense. Not to talk about
the actual TSC side implementation which does the same as tsc_read() - it
actually uses tsc_read() - but stores the same value twice and
unconditionally returns true.
I have no idea why you need this extra voodoo function if you can achieve
the same thing with a simple property bit in clocksource->flags.
Neither do I understand the rest of the magic you introduce in the snapshot
code:
> + if (clock->read_with_stamp)
> + systime_snapshot->cycles_valid =
> + clock->read_with_stamp(
> + clock, &now, &systime_snapshot->cycles);
> + else {
> + systime_snapshot->cycles_valid = false;
> + now = clock->read(clock);
> + systime_snapshot->cycles = now;
> + }
The only difference between the two code pathes is the effect on
systime_snapshot->cycles_valid. The explanation of that bit is not making
much sense either.
+ * @cycles_valid: The flag is true when @cycles contain actual
+ * number of cycles instead some cycle derivative
Why the heck would cycles be something different than what clock->read()
returns?
What you really want to convey is the information whether
now = tk_clock_read(&tk->tkr_mono);
is a value which you can use for your pvclock correlation or not.
Unless I'm missing something important all of this can be achieved with a
minimal and actually understandable patch. See below.
Thanks,
tglx
8<------------------
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1045,7 +1045,8 @@ static struct clocksource clocksource_ts
.read = read_tsc,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
- CLOCK_SOURCE_MUST_VERIFY,
+ CLOCK_SOURCE_MUST_VERIFY |
+ CLOCK_SOURCE_VALID_FOR_PVCLOCK_UPDATE,
.archdata = { .vclock_mode = VCLOCK_TSC },
.resume = tsc_resume,
.mark_unstable = tsc_cs_mark_unstable,
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -118,7 +118,9 @@ struct clocksource {
#define CLOCK_SOURCE_VALID_FOR_HRES 0x20
#define CLOCK_SOURCE_UNSTABLE 0x40
#define CLOCK_SOURCE_SUSPEND_NONSTOP 0x80
-#define CLOCK_SOURCE_RESELECT 0x100
+#define CLOCK_SOURCE_VALID_FOR_PVCLOCK_UPDATE 0x100
+
+#define CLOCK_SOURCE_RESELECT 0x200
/* simplify initialization of mask field */
#define CLOCKSOURCE_MASK(bits) GENMASK_ULL((bits) - 1, 0)
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -285,6 +285,8 @@ extern void ktime_get_raw_and_real_ts64(
* @raw: Monotonic raw system time
* @clock_was_set_seq: The sequence number of clock was set events
* @cs_was_changed_seq: The sequence number of clocksource change events
+ * @valid_for_pvclock_update: @cycles is from a clocksource which
+ * can be used for pvclock updates
*/
struct system_time_snapshot {
u64 cycles;
@@ -292,6 +294,7 @@ struct system_time_snapshot {
ktime_t raw;
unsigned int clock_was_set_seq;
u8 cs_was_changed_seq;
+ bool valid_for_pvclock_update;
};
/*
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -948,7 +948,7 @@ time64_t __ktime_get_real_seconds(void)
void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
{
struct timekeeper *tk = &tk_core.timekeeper;
- unsigned long seq;
+ unsigned long seq, clk_flags;
ktime_t base_raw;
ktime_t base_real;
u64 nsec_raw;
@@ -960,6 +960,7 @@ void ktime_get_snapshot(struct system_ti
do {
seq = read_seqcount_begin(&tk_core.seq);
now = tk_clock_read(&tk->tkr_mono);
+ clk_flags = tk->tkr_mono.clock->flags;
systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
base_real = ktime_add(tk->tkr_mono.base,
@@ -970,6 +971,8 @@ void ktime_get_snapshot(struct system_ti
} while (read_seqcount_retry(&tk_core.seq, seq));
systime_snapshot->cycles = now;
+ systime_snapshot->valid_for_pvclock_update =
+ clk_flags & CLOCK_SOURCE_VALID_FOR_PVCLOCK_UPDATE,
systime_snapshot->real = ktime_add_ns(base_real, nsec_real);
systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw);
}
Powered by blists - more mailing lists