lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20070617151452.GA21971@dreamland.darkstar.lan>
Date:	Sun, 17 Jun 2007 17:14:52 +0200
From:	Luca Tettamanti <kronos.it@...il.com>
To:	Avi Kivity <avi@...ranet.com>
Cc:	kvm-devel@...ts.sourceforge.net, linux-kernel@...r.kernel.org
Subject: Re: [kvm-devel] [BUG] Oops with KVM-27

Il Sat, Jun 16, 2007 at 10:43:23AM +0300, Avi Kivity ha scritto: 
> Luca Tettamanti wrote:
> > Il Fri, Jun 15, 2007 at 12:06:50PM +0300, Avi Kivity ha scritto: 
> >   
> >>> After a bit of thinking: it's correct but removes an optimization;
> >>> furthermore it may miss other instructions that write to memory mapped
> >>> areas.
> >>> A more proper fix should be "force the writeback if dst.ptr is in some
> >>> kind of mmio area".
> >>>
> >>>       
> >> I think we can just disable the optimization, and (in a separate patch)
> >> add it back in emulator_write_phys(), where we know we're modifying
> >> memory and not hitting mmio.
> >>     
> >
> > Problem is that in emulator_write_phys() we've already lost track of the
> > previous (dst.orig_val) value. I moved up the decision in
> >   
> 
> Actually we haven't; just before the memcpy(), we can put a memcmp() to
> guard the kvm_mmu_pte_write(), which is the really expensive operation,
> especially with guest smp.

Yup, but it seemed wasteful to map (at least when highmem is in use) a
page just to check for something that we already knew. That was a
preemptive optmization though, I haven't actually benchmarked the cost
of setting up the mapping ;-)

[...]
> I think we can simply remove the if ().  For the register case, the
> check is more expensive that the write; for mmio, we don't want it; and
> for memory writes, we can put it in emulator_write_phys().

Ok, this way it's simpler. How does this look:

--- a/kernel/x86_emulate.c	2007-06-15 21:13:51.000000000 +0200
+++ b/kernel/x86_emulate.c	2007-06-17 16:57:50.000000000 +0200
@@ -1057,40 +1057,38 @@
 	}
 
 writeback:
-	if ((d & Mov) || (dst.orig_val != dst.val)) {
-		switch (dst.type) {
-		case OP_REG:
-			/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
-			switch (dst.bytes) {
-			case 1:
-				*(u8 *)dst.ptr = (u8)dst.val;
-				break;
-			case 2:
-				*(u16 *)dst.ptr = (u16)dst.val;
-				break;
-			case 4:
-				*dst.ptr = (u32)dst.val;
-				break;	/* 64b: zero-ext */
-			case 8:
-				*dst.ptr = dst.val;
-				break;
-			}
+	switch (dst.type) {
+	case OP_REG:
+		/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
+		switch (dst.bytes) {
+		case 1:
+			*(u8 *)dst.ptr = (u8)dst.val;
 			break;
-		case OP_MEM:
-			if (lock_prefix)
-				rc = ops->cmpxchg_emulated((unsigned long)dst.
-							   ptr, &dst.orig_val,
-							   &dst.val, dst.bytes,
-							   ctxt);
-			else
-				rc = ops->write_emulated((unsigned long)dst.ptr,
-							 &dst.val, dst.bytes,
-							 ctxt);
-			if (rc != 0)
-				goto done;
-		default:
+		case 2:
+			*(u16 *)dst.ptr = (u16)dst.val;
+			break;
+		case 4:
+			*dst.ptr = (u32)dst.val;
+			break;	/* 64b: zero-ext */
+		case 8:
+			*dst.ptr = dst.val;
 			break;
 		}
+		break;
+	case OP_MEM:
+		if (lock_prefix)
+			rc = ops->cmpxchg_emulated((unsigned long)dst.
+						   ptr, &dst.orig_val,
+						   &dst.val, dst.bytes,
+						   ctxt);
+		else
+			rc = ops->write_emulated((unsigned long)dst.ptr,
+						 &dst.val, dst.bytes,
+						 ctxt);
+		if (rc != 0)
+			goto done;
+	default:
+		break;
 	}
 
 	/* Commit shadow register state. */

--- a/kernel/kvm_main.c	2007-06-15 21:18:08.000000000 +0200
+++ b/kernel/kvm_main.c	2007-06-17 16:59:33.000000000 +0200
@@ -1139,8 +1139,10 @@
 		return 0;
 	mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
 	virt = kmap_atomic(page, KM_USER0);
-	kvm_mmu_pte_write(vcpu, gpa, virt + offset, val, bytes);
-	memcpy(virt + offset_in_page(gpa), val, bytes);
+	if (memcmp(virt + offset_in_page(gpa), val, bytes)) {
+		kvm_mmu_pte_write(vcpu, gpa, virt + offset, val, bytes);
+		memcpy(virt + offset_in_page(gpa), val, bytes);
+	}
 	kunmap_atomic(virt, KM_USER0);
 	return 1;
 }



Luca
-- 
Runtime error 6D at f000:a12f : user incompetente
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ