lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <4C827CE1-CF87-45BE-BE84-42ABDEA3DE8D@oracle.com>
Date:   Tue, 19 Nov 2019 20:58:32 +0200
From:   Liran Alon <liran.alon@...cle.com>
To:     Sean Christopherson <sean.j.christopherson@...el.com>
Cc:     Wanpeng Li <kernellwp@...il.com>, linux-kernel@...r.kernel.org,
        kvm@...r.kernel.org, Paolo Bonzini <pbonzini@...hat.com>,
        Radim Krčmář <rkrcmar@...hat.com>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        Wanpeng Li <wanpengli@...cent.com>,
        Jim Mattson <jmattson@...gle.com>,
        Joerg Roedel <joro@...tes.org>
Subject: Re: [PATCH v2 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI
 fastpath



> On 19 Nov 2019, at 20:36, Sean Christopherson <sean.j.christopherson@...el.com> wrote:
> 
> On Tue, Nov 19, 2019 at 02:36:28PM +0800, Wanpeng Li wrote:
>> From: Wanpeng Li <wanpengli@...cent.com>
>> 
>> ICR and TSCDEADLINE MSRs write cause the main MSRs write vmexits in 
>> our product observation, multicast IPIs are not as common as unicast 
>> IPI like RESCHEDULE_VECTOR and CALL_FUNCTION_SINGLE_VECTOR etc.
>> 
>> This patch tries to optimize x2apic physical destination mode, fixed 
>> delivery mode single target IPI by delivering IPI to receiver as soon 
>> as possible after sender writes ICR vmexit to avoid various checks 
>> when possible, especially when running guest w/ --overcommit cpu-pm=on
>> or guest can keep running, IPI can be injected to target vCPU by 
>> posted-interrupt immediately.
>> 
>> Testing on Xeon Skylake server:
>> 
>> The virtual IPI latency from sender send to receiver receive reduces 
>> more than 200+ cpu cycles.
>> 
>> Signed-off-by: Wanpeng Li <wanpengli@...cent.com>
>> ---
>> v1 -> v2:
>> * add tracepoint
>> * Instead of a separate vcpu->fast_vmexit, set exit_reason
>>   to vmx->exit_reason to -1 if the fast path succeeds.
>> * move the "kvm_skip_emulated_instruction(vcpu)" to vmx_handle_exit
>> * moving the handling into vmx_handle_exit_irqoff()
>> 
>> arch/x86/include/asm/kvm_host.h |  4 ++--
>> arch/x86/include/uapi/asm/vmx.h |  1 +
>> arch/x86/kvm/svm.c              |  4 ++--
>> arch/x86/kvm/vmx/vmx.c          | 40 +++++++++++++++++++++++++++++++++++++---
>> arch/x86/kvm/x86.c              |  5 +++--
>> 5 files changed, 45 insertions(+), 9 deletions(-)
>> 
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index 898ab9e..0daafa9 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -1084,7 +1084,7 @@ struct kvm_x86_ops {
>> 	void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
>> 
>> 	void (*run)(struct kvm_vcpu *vcpu);
>> -	int (*handle_exit)(struct kvm_vcpu *vcpu);
>> +	int (*handle_exit)(struct kvm_vcpu *vcpu, u32 *vcpu_exit_reason);
>> 	int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
>> 	void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
>> 	u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
>> @@ -1134,7 +1134,7 @@ struct kvm_x86_ops {
>> 	int (*check_intercept)(struct kvm_vcpu *vcpu,
>> 			       struct x86_instruction_info *info,
>> 			       enum x86_intercept_stage stage);
>> -	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
>> +	void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu, u32 *vcpu_exit_reason);
>> 	bool (*mpx_supported)(void);
>> 	bool (*xsaves_supported)(void);
>> 	bool (*umip_emulated)(void);
>> diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
>> index 3eb8411..b33c6e1 100644
>> --- a/arch/x86/include/uapi/asm/vmx.h
>> +++ b/arch/x86/include/uapi/asm/vmx.h
>> @@ -88,6 +88,7 @@
>> #define EXIT_REASON_XRSTORS             64
>> #define EXIT_REASON_UMWAIT              67
>> #define EXIT_REASON_TPAUSE              68
>> +#define EXIT_REASON_NEED_SKIP_EMULATED_INSN -1
>> 
>> #define VMX_EXIT_REASONS \
>> 	{ EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
> 
> Rather than pass a custom exit reason around, can we simply handle *all*
> x2apic ICR writes during handle_exit_irqoff() for both VMX and SVM?  The
> only risk I can think of is that KVM could stall too long before enabling
> IRQs.
> 

I agree that if it doesn’t cause to run with interrupts disabled then this is a nicer approach.
However, I think we may generalise a bit this patch to a clear code-path where accelerated exit handling
should be put. See my other reply in this email thread and tell me what you think:
https://www.spinics.net/lists/kernel/msg3322282.html

-Liran

> 
> From 1ea8ff1aa766928c869ef7c1eb437fe4f7b8daf9 Mon Sep 17 00:00:00 2001
> From: Sean Christopherson <sean.j.christopherson@...el.com>
> Date: Tue, 19 Nov 2019 09:50:42 -0800
> Subject: [PATCH] KVM: x86: Add a fast path for sending virtual IPIs in x2APIC
> mode
> 
> Add a fast path to handle writes to the ICR when the local APIC is
> emulated in the kernel and x2APIC is enabled.  The fast path is invoked
> at ->handle_exit_irqoff() to emulate only the effect of the ICR write
> itself, i.e. the sending of IPIs.  Sending IPIs early in the VM-Exit
> flow reduces the latency of virtual IPIs by avoiding the expensive bits
> of transitioning from guest to host, e.g. reacquiring KVM's SRCU lock.
> 
> Suggested-by: Wanpeng Li <wanpengli@...cent.com>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@...el.com>
> ---
> arch/x86/kvm/emulate.c |  1 -
> arch/x86/kvm/lapic.c   |  5 +++--
> arch/x86/kvm/lapic.h   | 25 +++++++++++++++++++++++++
> arch/x86/kvm/svm.c     |  3 +++
> arch/x86/kvm/vmx/vmx.c |  2 ++
> 5 files changed, 33 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index 952d1a4f4d7e..8313234e7d64 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -19,7 +19,6 @@
>  */
> 
> #include <linux/kvm_host.h>
> -#include "kvm_cache_regs.h"
> #include <asm/kvm_emulate.h>
> #include <linux/stringify.h>
> #include <asm/debugreg.h>
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 452cedd6382b..0f02820332d4 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -2655,9 +2655,10 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> 	if (reg == APIC_ICR2)
> 		return 1;
> 
> -	/* if this is ICR write vector before command */
> +	/* ICR writes are handled early by kvm_x2apic_fast_icr_write(). */
> 	if (reg == APIC_ICR)
> -		kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
> +		return 0;
> +
> 	return kvm_lapic_reg_write(apic, reg, (u32)data);
> }
> 
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index c1d77436126a..19fd2734d9e6 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -6,6 +6,8 @@
> 
> #include <linux/kvm_host.h>
> 
> +#include "kvm_cache_regs.h"
> +
> #define KVM_APIC_INIT		0
> #define KVM_APIC_SIPI		1
> #define KVM_APIC_LVT_NUM	6
> @@ -245,4 +247,27 @@ static inline enum lapic_mode kvm_apic_mode(u64 apic_base)
> 	return apic_base & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
> }
> 
> +/*
> + * Fast path for sending virtual IPIs immediately after VM-Exit.  Fault
> + * detection and injection, e.g. if x2apic is disabled, tracing and/or skipping
> + * of the emulated instruction are all handled in the standard WRMSR path,
> + * kvm_emulate_wrmsr().
> + */
> +static inline void kvm_x2apic_fast_icr_write(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_lapic *apic = vcpu->arch.apic;
> +	u64 data;
> +
> +	if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
> +		return;
> +
> +	if (kvm_rcx_read(vcpu) != (APIC_BASE_MSR + (APIC_ICR >> 4)))
> +		return;
> +
> +	data = kvm_read_edx_eax(vcpu);
> +
> +	kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
> +	WARN_ON_ONCE(kvm_lapic_reg_write(apic, APIC_ICR, (u32)data));
> +}
> +
> #endif
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index d02a73a48461..713510210b29 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -6189,7 +6189,10 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
> 
> static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> {
> +	struct vcpu_svm *svm = to_svm(vcpu);
> 
> +	if (svm->vmcb->control.exit_code && svm->vmcb->control.exit_info_1)
> +		kvm_x2apic_fast_icr_write(vcpu);
> }
> 
> static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 621142e55e28..82412c4085fc 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -6231,6 +6231,8 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> 		handle_external_interrupt_irqoff(vcpu);
> 	else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
> 		handle_exception_nmi_irqoff(vmx);
> +	else if (vmx->exit_reason == EXIT_REASON_MSR_WRITE)
> +		kvm_x2apic_fast_icr_write(vcpu);
> }
> 
> static bool vmx_has_emulated_msr(int index)
> -- 
> 2.24.0
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ