[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <09CD3BD3-1F5E-48DA-82ED-58E3196DBD83@oracle.com>
Date: Tue, 19 Nov 2019 14:11:41 +0200
From: Liran Alon <liran.alon@...cle.com>
To: Wanpeng Li <kernellwp@...il.com>
Cc: linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
Paolo Bonzini <pbonzini@...hat.com>,
Radim Krčmář <rkrcmar@...hat.com>,
Sean Christopherson <sean.j.christopherson@...el.com>,
Vitaly Kuznetsov <vkuznets@...hat.com>,
Wanpeng Li <wanpengli@...cent.com>,
Jim Mattson <jmattson@...gle.com>,
Joerg Roedel <joro@...tes.org>
Subject: Re: [PATCH v2 1/2] KVM: VMX: FIXED+PHYSICAL mode single target IPI
fastpath
> On 19 Nov 2019, at 8:36, Wanpeng Li <kernellwp@...il.com> wrote:
>
> From: Wanpeng Li <wanpengli@...cent.com>
>
> ICR and TSCDEADLINE MSRs write cause the main MSRs write vmexits in
> our product observation, multicast IPIs are not as common as unicast
> IPI like RESCHEDULE_VECTOR and CALL_FUNCTION_SINGLE_VECTOR etc.
>
> This patch tries to optimize x2apic physical destination mode, fixed
> delivery mode single target IPI by delivering IPI to receiver as soon
> as possible after sender writes ICR vmexit to avoid various checks
> when possible, especially when running guest w/ --overcommit cpu-pm=on
> or guest can keep running, IPI can be injected to target vCPU by
> posted-interrupt immediately.
>
> Testing on Xeon Skylake server:
>
> The virtual IPI latency from sender send to receiver receive reduces
> more than 200+ cpu cycles.
>
> Signed-off-by: Wanpeng Li <wanpengli@...cent.com>
> ---
> v1 -> v2:
> * add tracepoint
> * Instead of a separate vcpu->fast_vmexit, set exit_reason
> to vmx->exit_reason to -1 if the fast path succeeds.
> * move the "kvm_skip_emulated_instruction(vcpu)" to vmx_handle_exit
> * moving the handling into vmx_handle_exit_irqoff()
>
> arch/x86/include/asm/kvm_host.h | 4 ++--
> arch/x86/include/uapi/asm/vmx.h | 1 +
> arch/x86/kvm/svm.c | 4 ++--
> arch/x86/kvm/vmx/vmx.c | 40 +++++++++++++++++++++++++++++++++++++---
> arch/x86/kvm/x86.c | 5 +++--
> 5 files changed, 45 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 898ab9e..0daafa9 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1084,7 +1084,7 @@ struct kvm_x86_ops {
> void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
>
> void (*run)(struct kvm_vcpu *vcpu);
> - int (*handle_exit)(struct kvm_vcpu *vcpu);
> + int (*handle_exit)(struct kvm_vcpu *vcpu, u32 *vcpu_exit_reason);
> int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
> void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
> u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
> @@ -1134,7 +1134,7 @@ struct kvm_x86_ops {
> int (*check_intercept)(struct kvm_vcpu *vcpu,
> struct x86_instruction_info *info,
> enum x86_intercept_stage stage);
> - void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
> + void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu, u32 *vcpu_exit_reason);
> bool (*mpx_supported)(void);
> bool (*xsaves_supported)(void);
> bool (*umip_emulated)(void);
> diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
> index 3eb8411..b33c6e1 100644
> --- a/arch/x86/include/uapi/asm/vmx.h
> +++ b/arch/x86/include/uapi/asm/vmx.h
> @@ -88,6 +88,7 @@
> #define EXIT_REASON_XRSTORS 64
> #define EXIT_REASON_UMWAIT 67
> #define EXIT_REASON_TPAUSE 68
> +#define EXIT_REASON_NEED_SKIP_EMULATED_INSN -1
>
> #define VMX_EXIT_REASONS \
> { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index d02a73a..c8e063a 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -4929,7 +4929,7 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
> *info2 = control->exit_info_2;
> }
>
> -static int handle_exit(struct kvm_vcpu *vcpu)
> +static int handle_exit(struct kvm_vcpu *vcpu, u32 *vcpu_exit_reason)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
> struct kvm_run *kvm_run = vcpu->run;
> @@ -6187,7 +6187,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
> return ret;
> }
>
> -static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> +static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu, u32 *vcpu_exit_reason)
> {
>
> }
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 621142e5..b98198d 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -5792,7 +5792,7 @@ void dump_vmcs(void)
> * The guest has exited. See if we can fix it or if we need userspace
> * assistance.
> */
> -static int vmx_handle_exit(struct kvm_vcpu *vcpu)
> +static int vmx_handle_exit(struct kvm_vcpu *vcpu, u32 *vcpu_exit_reason)
vmx_handle_exit() should get second parameter by value and not by pointer. As it doesn’t need to modify it.
I would also rename parameter to “accel_exit_completion” to indicate this is additional work that needs to happen to complete accelerated-exit handling.
This parameter should be an enum that currently only have 2 values: ACCEL_EXIT_NONE and ACCEL_EXIT_SKIP_EMUL_INS.
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> u32 exit_reason = vmx->exit_reason;
> @@ -5878,7 +5878,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
> }
> }
>
> - if (exit_reason < kvm_vmx_max_exit_handlers
> + if (*vcpu_exit_reason == EXIT_REASON_NEED_SKIP_EMULATED_INSN) {
> + kvm_skip_emulated_instruction(vcpu);
> + return 1;
> + } else if (exit_reason < kvm_vmx_max_exit_handlers
> && kvm_vmx_exit_handlers[exit_reason]) {
> #ifdef CONFIG_RETPOLINE
> if (exit_reason == EXIT_REASON_MSR_WRITE)
> @@ -6223,7 +6226,36 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
> }
> STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
>
> -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> +static u32 handle_ipi_fastpath(struct kvm_vcpu *vcpu)
> +{
> + u32 index;
> + u64 data;
> + int ret = 0;
> +
> + if (lapic_in_kernel(vcpu) && apic_x2apic_mode(vcpu->arch.apic)) {
> + /*
> + * fastpath to IPI target, FIXED+PHYSICAL which is popular
> + */
> + index = kvm_rcx_read(vcpu);
> + data = kvm_read_edx_eax(vcpu);
> +
> + if (((index - APIC_BASE_MSR) << 4 == APIC_ICR) &&
> + ((data & KVM_APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
> + ((data & APIC_MODE_MASK) == APIC_DM_FIXED)) {
> +
> + trace_kvm_msr_write(index, data);
On a standard EXIT_REASON_MSR_WRITE VMExit, this trace will be printed only after LAPIC emulation logic happens.
You should preserve same ordering.
> + kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32));
> + ret = kvm_lapic_reg_write(vcpu->arch.apic, APIC_ICR, (u32)data);
> +
> + if (ret == 0)
> + return EXIT_REASON_NEED_SKIP_EMULATED_INSN;
> + }
> + }
> +
> + return ret;
> +}
Maybe it would be more elegant to modify this function as follows?
static int handle_accel_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
if (lapic_in_kernel(vcpu) && apic_x2apic_mode(vcpu->arch.apic) &&
((data & KVM_APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
((data & APIC_MODE_MASK) == APIC_DM_FIXED)) {
kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32));
return kvm_lapic_reg_write(vcpu->arch.apic, APIC_ICR, (u32)data);
}
return 1;
}
static enum accel_exit_completion handle_accel_set_msr_irqoff(struct kvm_vcpu *vcpu)
{
u32 msr = kvm_rcx_read(vcpu);
u64 data = kvm_read_edx_eax(vcpu);
int ret = 0;
switch (msr) {
case APIC_BASE_MSR + (APIC_ICR >> 4):
ret = handle_accel_set_x2apic_icr_irqoff(vcpu, msr, data);
break;
default:
return ACCEL_EXIT_NONE;
}
if (!ret) {
trace_kvm_msr_write(msr, data);
return ACCEL_EXIT_SKIP_EMUL_INS;
}
return ACCEL_EXIT_NONE;
}
> +
> +static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu, u32 *exit_reason)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
>
> @@ -6231,6 +6263,8 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> handle_external_interrupt_irqoff(vcpu);
> else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
> handle_exception_nmi_irqoff(vmx);
> + else if (vmx->exit_reason == EXIT_REASON_MSR_WRITE)
> + *exit_reason = handle_ipi_fastpath(vcpu);
1) This case requires a comment as the only reason it is called here is an optimisation.
In contrast to the other cases which must be called before interrupts are enabled on the host.
2) I would rename handler to handle_accel_set_msr_irqoff().
To signal this handler runs with host interrupts disabled and to make it a general place for accelerating WRMSRs in case we would require more in the future.
-Liran
> }
>
> static bool vmx_has_emulated_msr(int index)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 991dd01..a53bce3 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7981,6 +7981,7 @@ EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
> static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> {
> int r;
> + u32 exit_reason = 0;
> bool req_int_win =
> dm_request_for_irq_injection(vcpu) &&
> kvm_cpu_accept_dm_intr(vcpu);
> @@ -8226,7 +8227,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> vcpu->mode = OUTSIDE_GUEST_MODE;
> smp_wmb();
>
> - kvm_x86_ops->handle_exit_irqoff(vcpu);
> + kvm_x86_ops->handle_exit_irqoff(vcpu, &exit_reason);
>
> /*
> * Consume any pending interrupts, including the possible source of
> @@ -8270,7 +8271,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> kvm_lapic_sync_from_vapic(vcpu);
>
> vcpu->arch.gpa_available = false;
> - r = kvm_x86_ops->handle_exit(vcpu);
> + r = kvm_x86_ops->handle_exit(vcpu, &exit_reason);
> return r;
>
> cancel_injection:
> --
> 2.7.4
>
Powered by blists - more mailing lists