lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <54089DDE.6010004@redhat.com>
Date:	Thu, 04 Sep 2014 19:14:06 +0200
From:	Paolo Bonzini <pbonzini@...hat.com>
To:	Chris J Arges <chris.j.arges@...onical.com>,
	linux-kernel@...r.kernel.org
CC:	kvm@...r.kernel.org, Thomas Gleixner <tglx@...utronix.de>,
	John Stultz <john.stultz@...aro.org>
Subject: Re: [PATCH] KVM: x86: fix kvmclock breakage from timers branch merge

Il 04/09/2014 18:00, Chris J Arges ha scritto:
> Uptime:
>  15:58:02 up  1:00,  1 user,  load average: 0.59, 0.60, 0.31
> 
> Here is the output:
> 
> ./x86-run x86/kvmclock_test.flat -smp 2 --append "10000000 `date +%s`"
> qemu-system-x86_64 -enable-kvm -device pc-testdev -device
> isa-debug-exit,iobase=0xf4,iosize=0x4 -display none -serial stdio
> -device pci-testdev -kernel x86/kvmclock_test.flat -smp 2 --append
> 10000000 1409846210
> enabling apic
> enabling apic
> kvm-clock: cpu 0, msr 0x:44d4c0
> kvm-clock: cpu 0, msr 0x:44d4c0
> Wallclock test, threshold 5
> Seconds get from host:     1409846210
> Seconds get from kvmclock: 2819688866
> Offset:                    1409842656

With kvm/queue this would have been roughly -3600, now it's 
host_wallclock-3600.  So the patch hasn't fixed the -3600 part for you.

Can you try applying this patch on top of 3.16?  This is my backport of
Thomas's patch.  If this works for you, we "only" have to find out how
to compute boot_ns and nsec_base in the new timekeeping world order of
3.17...

Thomas, do you have any ideas?  Every time a VM is started, the kvmclock
starts at the boot time of the host, instead of the current wallclock time.

Paolo

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d38abc81db65..70de23f1de51 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1020,9 +1020,8 @@ struct pvclock_gtod_data {
 		u32	shift;
 	} clock;
 
-	/* open coded 'struct timespec' */
-	u64		monotonic_time_snsec;
-	time_t		monotonic_time_sec;
+	u64		boot_ns;
+	u64		nsec_base;
 };
 
 static struct pvclock_gtod_data pvclock_gtod_data;
@@ -1030,6 +1029,12 @@ static struct pvclock_gtod_data pvclock_gtod_data;
 static void update_pvclock_gtod(struct timekeeper *tk)
 {
 	struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
+	u64 boot_ns;
+
+	boot_ns = timespec_to_ns(&tk->total_sleep_time)
+		+ tk->wall_to_monotonic.tv_sec * (u64)NSEC_PER_SEC
+		+ tk->wall_to_monotonic.tv_nsec
+		+ tk->xtime_sec * (u64)NSEC_PER_SEC;
 
 	write_seqcount_begin(&vdata->seq);
 
@@ -1040,17 +1044,8 @@ static void update_pvclock_gtod(struct timekeeper *tk)
 	vdata->clock.mult		= tk->mult;
 	vdata->clock.shift		= tk->shift;
 
-	vdata->monotonic_time_sec	= tk->xtime_sec
-					+ tk->wall_to_monotonic.tv_sec;
-	vdata->monotonic_time_snsec	= tk->xtime_nsec
-					+ (tk->wall_to_monotonic.tv_nsec
-						<< tk->shift);
-	while (vdata->monotonic_time_snsec >=
-					(((u64)NSEC_PER_SEC) << tk->shift)) {
-		vdata->monotonic_time_snsec -=
-					((u64)NSEC_PER_SEC) << tk->shift;
-		vdata->monotonic_time_sec++;
-	}
+	vdata->boot_ns                  = boot_ns;
+	vdata->nsec_base		= tk->xtime_nsec;
 
 	write_seqcount_end(&vdata->seq);
 }
@@ -1414,23 +1409,22 @@ static inline u64 vgettsc(cycle_t *cycle_now)
 	return v * gtod->clock.mult;
 }
 
-static int do_monotonic(struct timespec *ts, cycle_t *cycle_now)
+static int do_monotonic_boot(s64 *t, cycle_t *cycle_now)
 {
+	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
 	unsigned long seq;
-	u64 ns;
 	int mode;
-	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
+	u64 ns;
 
-	ts->tv_nsec = 0;
 	do {
 		seq = read_seqcount_begin(&gtod->seq);
 		mode = gtod->clock.vclock_mode;
-		ts->tv_sec = gtod->monotonic_time_sec;
-		ns = gtod->monotonic_time_snsec;
+		ns = gtod->nsec_base;
 		ns += vgettsc(cycle_now);
 		ns >>= gtod->clock.shift;
+		ns += gtod->boot_ns;
 	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
-	timespec_add_ns(ts, ns);
+	*t = ns;
 
 	return mode;
 }
@@ -1438,19 +1432,11 @@ static int do_monotonic(struct timespec *ts, cycle_t *cycle_now)
 /* returns true if host is using tsc clocksource */
 static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
 {
-	struct timespec ts;
-
 	/* checked again under seqlock below */
 	if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
 		return false;
 
-	if (do_monotonic(&ts, cycle_now) != VCLOCK_TSC)
-		return false;
-
-	monotonic_to_bootbased(&ts);
-	*kernel_ns = timespec_to_ns(&ts);
-
-	return true;
+	return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
 }
 #endif
 


> My observation is that the kvmclock value seems to be positively biased
> by the boot_ns value.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ