lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [day] [month] [year] [list]
Date:   Wed, 13 Dec 2017 13:13:57 +0100
From:   Paolo Bonzini <pbonzini@...hat.com>
To:     linux-kernel@...r.kernel.org, kvm@...r.kernel.org
Cc:     Jim Mattson <jmattson@...gle.com>,
        Wanpeng Li <wanpeng.li@...mail.com>
Subject: [PATCH] KVM: VMX: optimize shadow VMCS copying

Because all fields can be read/written with a single vmread/vmwrite on
64-bit kernels, the switch statements in copy_vmcs12_to_shadow and
copy_shadow_to_vmcs12 are unnecessary.

What I did in this patch is to copy the two parts of 64-bit fields
separately on 32-bit kernels, to keep all complicated #ifdef-ery
in init_vmcs_shadow_fields.  The disadvantage is that 64-bit fields
have to be listed separately in shadow_read_only/read_write_fields,
but those are few and we can validate the arrays when building the
VMREAD and VMWRITE bitmaps.  This saves a few hundred clock cycles
per nested vmexit.

However there is still a "switch" in vmcs_read_any and vmcs_write_any.
So, while at it, this patch reorders the fields by type, hoping that
the branch predictor appreciates it.

Cc: Jim Mattson <jmattson@...gle.com>
Cc: Wanpeng Li <wanpeng.li@...mail.com>
Signed-off-by: Paolo Bonzini <pbonzini@...hat.com>
---
 arch/x86/kvm/vmx.c | 135 ++++++++++++++++++++++++-----------------------------
 1 file changed, 61 insertions(+), 74 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9fadba5338fd..3d3b0ae0da63 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -727,49 +727,59 @@ static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
 	 * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified
 	 * by nested_vmx_failValid)
 	 */
+	/* 32-bits */
 	VM_EXIT_REASON,
 	VM_EXIT_INTR_INFO,
 	VM_EXIT_INSTRUCTION_LEN,
 	IDT_VECTORING_INFO_FIELD,
 	IDT_VECTORING_ERROR_CODE,
 	VM_EXIT_INTR_ERROR_CODE,
+
+	/* Natural width */
 	EXIT_QUALIFICATION,
 	GUEST_LINEAR_ADDRESS,
-	GUEST_PHYSICAL_ADDRESS
+
+	/* 64-bit */
+	GUEST_PHYSICAL_ADDRESS,
+	GUEST_PHYSICAL_ADDRESS_HIGH,
 };
 static int max_shadow_read_only_fields =
 	ARRAY_SIZE(shadow_read_only_fields);
 
 static unsigned long shadow_read_write_fields[] = {
+	/* 16-bits */
+	GUEST_CS_SELECTOR,
+	GUEST_INTR_STATUS,
+	GUEST_PML_INDEX,
+	HOST_FS_SELECTOR,
+	HOST_GS_SELECTOR,
+
+	/* 32-bits */
+	CPU_BASED_VM_EXEC_CONTROL,
+	EXCEPTION_BITMAP,
+	VM_ENTRY_EXCEPTION_ERROR_CODE,
+	VM_ENTRY_INTR_INFO_FIELD,
+	VM_ENTRY_INSTRUCTION_LEN,
 	TPR_THRESHOLD,
+	GUEST_CS_LIMIT,
+	GUEST_CS_AR_BYTES,
+	GUEST_INTERRUPTIBILITY_INFO,
+	VMX_PREEMPTION_TIMER_VALUE,
+
+	/* Natural width */
 	GUEST_RIP,
 	GUEST_RSP,
 	GUEST_CR0,
 	GUEST_CR3,
 	GUEST_CR4,
-	GUEST_INTERRUPTIBILITY_INFO,
 	GUEST_RFLAGS,
-	GUEST_CS_SELECTOR,
-	GUEST_CS_AR_BYTES,
-	GUEST_CS_LIMIT,
 	GUEST_CS_BASE,
 	GUEST_ES_BASE,
-	GUEST_PML_INDEX,
-	GUEST_INTR_STATUS,
-	VMX_PREEMPTION_TIMER_VALUE,
 	CR0_GUEST_HOST_MASK,
 	CR0_READ_SHADOW,
 	CR4_READ_SHADOW,
-	EXCEPTION_BITMAP,
-	CPU_BASED_VM_EXEC_CONTROL,
-	VM_ENTRY_EXCEPTION_ERROR_CODE,
-	VM_ENTRY_INTR_INFO_FIELD,
-	VM_ENTRY_INSTRUCTION_LEN,
-	VM_ENTRY_EXCEPTION_ERROR_CODE,
 	HOST_FS_BASE,
 	HOST_GS_BASE,
-	HOST_FS_SELECTOR,
-	HOST_GS_SELECTOR
 };
 static int max_shadow_read_write_fields =
 	ARRAY_SIZE(shadow_read_write_fields);
@@ -4068,15 +4078,39 @@ static void init_vmcs_shadow_fields(void)
 {
 	int i, j;
 
-	/* No checks for read only fields yet */
+	for (i = j = 0; i < max_shadow_read_only_fields; i++) {
+		u16 field = shadow_read_only_fields[i];
+		if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64 &&
+		    (i + 1 == max_shadow_read_only_fields ||
+		     shadow_read_only_fields[i + 1] != field + 1))
+			pr_err("Missing field from shadow_read_only_field %x\n",
+			       field + 1);
+
+		clear_bit(field, vmx_vmread_bitmap);
+#ifdef CONFIG_X86_64
+		if (field & 1)
+			continue;
+#endif
+		if (j < i)
+			shadow_read_only_fields[j] = field;
+		j++;
+	}
+	max_shadow_read_only_fields = j;
 
 	for (i = j = 0; i < max_shadow_read_write_fields; i++) {
+		u16 field = shadow_read_write_fields[i];
+		if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64 &&
+		    (i + 1 == max_shadow_read_write_fields ||
+		     shadow_read_write_fields[i + 1] != field + 1))
+			pr_err("Missing field from shadow_read_write_field %x\n",
+			       field + 1);
+
 		/*
 		 * PML and the preemption timer can be emulated, but the
 		 * processor cannot vmwrite to fields that don't exist
 		 * on bare metal.
 		 */
-		switch (shadow_read_write_fields[i]) {
+		switch (field) {
 		case GUEST_PML_INDEX:
 			if (!cpu_has_vmx_pml())
 				continue;
@@ -4093,31 +4127,17 @@ static void init_vmcs_shadow_fields(void)
 			break;
 		}
 
+		clear_bit(field, vmx_vmwrite_bitmap);
+		clear_bit(field, vmx_vmread_bitmap);
+#ifdef CONFIG_X86_64
+		if (field & 1)
+			continue;
+#endif
 		if (j < i)
-			shadow_read_write_fields[j] =
-				shadow_read_write_fields[i];
+			shadow_read_write_fields[j] = field;
 		j++;
 	}
 	max_shadow_read_write_fields = j;
-
-	/* shadowed fields guest access without vmexit */
-	for (i = 0; i < max_shadow_read_write_fields; i++) {
-		unsigned long field = shadow_read_write_fields[i];
-
-		clear_bit(field, vmx_vmwrite_bitmap);
-		clear_bit(field, vmx_vmread_bitmap);
-		if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64) {
-			clear_bit(field + 1, vmx_vmwrite_bitmap);
-			clear_bit(field + 1, vmx_vmread_bitmap);
-		}
-	}
-	for (i = 0; i < max_shadow_read_only_fields; i++) {
-		unsigned long field = shadow_read_only_fields[i];
-
-		clear_bit(field, vmx_vmread_bitmap);
-		if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64)
-			clear_bit(field + 1, vmx_vmread_bitmap);
-	}
 }
 
 static __init int alloc_kvm_area(void)
@@ -7820,23 +7840,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
 
 	for (i = 0; i < num_fields; i++) {
 		field = fields[i];
-		switch (vmcs_field_type(field)) {
-		case VMCS_FIELD_TYPE_U16:
-			field_value = vmcs_read16(field);
-			break;
-		case VMCS_FIELD_TYPE_U32:
-			field_value = vmcs_read32(field);
-			break;
-		case VMCS_FIELD_TYPE_U64:
-			field_value = vmcs_read64(field);
-			break;
-		case VMCS_FIELD_TYPE_NATURAL_WIDTH:
-			field_value = vmcs_readl(field);
-			break;
-		default:
-			WARN_ON(1);
-			continue;
-		}
+		field_value = __vmcs_readl(field);
 		vmcs12_write_any(&vmx->vcpu, field, field_value);
 	}
 
@@ -7867,24 +7871,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
 		for (i = 0; i < max_fields[q]; i++) {
 			field = fields[q][i];
 			vmcs12_read_any(&vmx->vcpu, field, &field_value);
-
-			switch (vmcs_field_type(field)) {
-			case VMCS_FIELD_TYPE_U16:
-				vmcs_write16(field, (u16)field_value);
-				break;
-			case VMCS_FIELD_TYPE_U32:
-				vmcs_write32(field, (u32)field_value);
-				break;
-			case VMCS_FIELD_TYPE_U64:
-				vmcs_write64(field, (u64)field_value);
-				break;
-			case VMCS_FIELD_TYPE_NATURAL_WIDTH:
-				vmcs_writel(field, (long)field_value);
-				break;
-			default:
-				WARN_ON(1);
-				break;
-			}
+			__vmcs_writel(field, field_value);
 		}
 	}
 
-- 
1.8.3.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ