linux-kernel - Re: [PART1 RFC v3 07/12] svm: Add interrupt injection via AVIC

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <56EBD6FE.2050807@redhat.com>
Date:	Fri, 18 Mar 2016 11:22:54 +0100
From:	Paolo Bonzini <pbonzini@...hat.com>
To:	Suravee Suthikulpanit <Suravee.Suthikulpanit@....com>,
	rkrcmar@...hat.com, joro@...tes.org, bp@...en8.de, gleb@...nel.org,
	alex.williamson@...hat.com
Cc:	kvm@...r.kernel.org, linux-kernel@...r.kernel.org, wei@...hat.com,
	sherry.hurwitz@....com
Subject: Re: [PART1 RFC v3 07/12] svm: Add interrupt injection via AVIC



On 18/03/2016 07:09, Suravee Suthikulpanit wrote:
> This patch introduces a new mechanism to inject interrupt using AVIC.
> Since VINTR is not supported when enable AVIC, we need to inject
> interrupt via APIC backing page instead.
> 
> This patch also adds support for AVIC doorbell, which is used by
> KVM to signal a running vcpu to check IRR for injected interrupts.
> 
> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@....com>

Looks good, but I think it breaks nested virtualization.  See below.

> ---
>  arch/x86/kvm/svm.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 44 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 621c948..8e31ad3 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -71,6 +71,8 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
>  #define SVM_FEATURE_DECODE_ASSIST  (1 <<  7)
>  #define SVM_FEATURE_PAUSE_FILTER   (1 << 10)
>  
> +#define SVM_AVIC_DOORBELL	0xc001011b
> +
>  #define NESTED_EXIT_HOST	0	/* Exit handled on host level */
>  #define NESTED_EXIT_DONE	1	/* Exit caused nested vmexit  */
>  #define NESTED_EXIT_CONTINUE	2	/* Further checks needed      */
> @@ -217,6 +219,8 @@ static bool npt_enabled = true;
>  static bool npt_enabled;
>  #endif
>  
> +static struct kvm_x86_ops svm_x86_ops;
> +
>  /* allow nested paging (virtualized MMU) for all guests */
>  static int npt = true;
>  module_param(npt, int, S_IRUGO);
> @@ -291,6 +295,18 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
>  	return container_of(vcpu, struct vcpu_svm, vcpu);
>  }
>  
> +static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
> +{
> +	struct vcpu_svm *svm = to_svm(vcpu);
> +	u64 *entry;
> +
> +	entry = svm->avic_phy_apic_id_cache;
> +	if (!entry)
> +		return false;
> +
> +	return (READ_ONCE(*entry) & AVIC_PHY_APIC_ID__IS_RUN_MSK);

AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK

> +}
> +
>  static void recalc_intercepts(struct vcpu_svm *svm)
>  {
>  	struct vmcb_control_area *c, *h;
> @@ -964,6 +980,8 @@ static __init int svm_hardware_setup(void)
>  
>  	if (avic) {
>  		printk(KERN_INFO "kvm: AVIC enabled\n");
> +	} else {
> +		svm_x86_ops.deliver_posted_interrupt = NULL;
>  	}
>  
>  	return 0;
> @@ -2877,8 +2895,10 @@ static int clgi_interception(struct vcpu_svm *svm)
>  	disable_gif(svm);
>  
>  	/* After a CLGI no interrupts should come */
> -	svm_clear_vintr(svm);
> -	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
> +	if (!svm_vcpu_avic_enabled(svm)) {
> +		svm_clear_vintr(svm);
> +		svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
> +	}

This is for nested virtualization.  Unless you support nested AVIC, the
L2 guest should run without AVIC (i.e. IsRunning should be false) and
use the old VINTR mechanism.

>  	mark_dirty(svm->vmcb, VMCB_INTR);
>  
> @@ -3453,6 +3473,9 @@ static int msr_interception(struct vcpu_svm *svm)
>  
>  static int interrupt_window_interception(struct vcpu_svm *svm)
>  {
> +	if (svm_vcpu_avic_enabled(svm))
> +		BUG();

Please WARN and return, but I suspect this may also need some changes
for nested virtualization.

>  	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
>  	svm_clear_vintr(svm);
>  	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
> @@ -3767,6 +3790,7 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
>  {
>  	struct vmcb_control_area *control;
>  
> +	/* The following fields are ignored when AVIC is enabled */
>  	control = &svm->vmcb->control;
>  	control->int_vector = irq;
>  	control->int_ctl &= ~V_INTR_PRIO_MASK;
> @@ -3844,6 +3868,20 @@ static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
>  	return;
>  }
>  
> +static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
> +{
> +	struct vcpu_svm *svm = to_svm(vcpu);
> +
> +	kvm_lapic_set_vector(vec, svm->vcpu.arch.apic->regs + APIC_IRR);
> +	smp_mb__after_atomic();
> +
> +	if (avic_vcpu_is_running(vcpu))
> +		wrmsrl(SVM_AVIC_DOORBELL,
> +		       __default_cpu_present_to_apicid(vcpu->cpu));
> +	else
> +		kvm_vcpu_wake_up(vcpu);
> +}
> +
>  static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
> @@ -3904,6 +3942,9 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
>  	 * get that intercept, this function will be called again though and
>  	 * we'll get the vintr intercept.
>  	 */
> +	if (svm_vcpu_avic_enabled(svm))
> +		return;

Same here.

>  	if (gif_set(svm) && nested_svm_intr(svm)) {
>  		svm_set_vintr(svm);
>  		svm_inject_irq(svm, 0x0);
> @@ -4638,6 +4679,7 @@ static struct kvm_x86_ops svm_x86_ops = {
>  	.sched_in = svm_sched_in,
>  
>  	.pmu_ops = &amd_pmu_ops,
> +	.deliver_posted_interrupt = svm_deliver_avic_intr,
>  };
>  
>  static int __init svm_init(void)
>