linux-kernel - Re: [PATCH v5 6/8] KVM: VMX: enable IPI virtualization

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <YeCeCYY2UTL/T1Tv@google.com>
Date:   Thu, 13 Jan 2022 21:47:53 +0000
From:   Sean Christopherson <seanjc@...gle.com>
To:     Zeng Guang <guang.zeng@...el.com>
Cc:     Paolo Bonzini <pbonzini@...hat.com>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        Wanpeng Li <wanpengli@...cent.com>,
        Jim Mattson <jmattson@...gle.com>,
        Joerg Roedel <joro@...tes.org>, kvm@...r.kernel.org,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Tony Luck <tony.luck@...el.com>,
        Kan Liang <kan.liang@...ux.intel.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
        "H. Peter Anvin" <hpa@...or.com>,
        Kim Phillips <kim.phillips@....com>,
        Jarkko Sakkinen <jarkko@...nel.org>,
        Jethro Beekman <jethro@...tanix.com>,
        Kai Huang <kai.huang@...el.com>, x86@...nel.org,
        linux-kernel@...r.kernel.org, Robert Hu <robert.hu@...el.com>,
        Gao Chao <chao.gao@...el.com>
Subject: Re: [PATCH v5 6/8] KVM: VMX: enable IPI virtualization

On Fri, Dec 31, 2021, Zeng Guang wrote:
> +/* Tertiary Processor-Based VM-Execution Controls, word 3 */
> +#define VMX_FEATURE_IPI_VIRT		(3*32 +  4) /* "" Enable IPI virtualization */
>  #endif /* _ASM_X86_VMXFEATURES_H */
> diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
> index 38d414f64e61..78b0525dd991 100644
> --- a/arch/x86/kvm/vmx/capabilities.h
> +++ b/arch/x86/kvm/vmx/capabilities.h
> @@ -12,6 +12,7 @@ extern bool __read_mostly enable_ept;
>  extern bool __read_mostly enable_unrestricted_guest;
>  extern bool __read_mostly enable_ept_ad_bits;
>  extern bool __read_mostly enable_pml;
> +extern bool __read_mostly enable_ipiv;
>  extern int __read_mostly pt_mode;
>  
>  #define PT_MODE_SYSTEM		0
> @@ -283,6 +284,12 @@ static inline bool cpu_has_vmx_apicv(void)
>  		cpu_has_vmx_posted_intr();
>  }
>  
> +static inline bool cpu_has_vmx_ipiv(void)
> +{
> +	return vmcs_config.cpu_based_3rd_exec_ctrl &
> +		TERTIARY_EXEC_IPI_VIRT;

Unnecessary newline, that fits on a single line.

> +}
> +
>  static inline bool cpu_has_vmx_flexpriority(void)
>  {
>  	return cpu_has_vmx_tpr_shadow() &&
> diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
> index 1c94783b5a54..bd9c9a89726a 100644
> --- a/arch/x86/kvm/vmx/posted_intr.c
> +++ b/arch/x86/kvm/vmx/posted_intr.c
> @@ -85,11 +85,16 @@ static bool vmx_can_use_vtd_pi(struct kvm *kvm)
>  		irq_remapping_cap(IRQ_POSTING_CAP);
>  }
>  
> +static bool vmx_can_use_ipiv_pi(struct kvm *kvm)
> +{
> +	return irqchip_in_kernel(kvm) && enable_apicv && enable_ipiv;

enable_ipiv should be cleared if !enable_apicv, i.e. the enable_apicv check
here should be unnecessary.

> +}
> +
>  void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
>  {
>  	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
>  
> -	if (!vmx_can_use_vtd_pi(vcpu->kvm))
> +	if (!(vmx_can_use_ipiv_pi(vcpu->kvm) || vmx_can_use_vtd_pi(vcpu->kvm)))

Purely because I am beyond terrible at reading !(A || B) and !(A && B), can we
write this as:

	if (!vmx_can_use_ipiv_pi(vcpu->kvm) && !vmx_can_use_vtd_pi(vcpu->kvm))
		return;

Or better, add a helper.  We could even drop vmx_can_use_ipiv_pi() altogether, e.g.

static bool vmx_can_use_posted_interrupts(struct kvm *kvm)
{
	return irqchip_in_kernel(kvm) &&
	       (enable_ipiv || vmx_can_use_vtd_pi(kvm));
}

Or with both helpers:

static bool vmx_can_use_posted_interrupts(struct kvm *kvm)
{
	return vmx_can_use_ipiv_pi(kvm) || vmx_can_use_vtd_pi(kvm);
}

I don't think I have a strong preference over whether or not to drop 
vmx_can_use_ipiv_pi().  I think it's marginally easier to read with the extra
helper?

>  		return;
>  
>  	/* Set SN when the vCPU is preempted */
> @@ -147,7 +152,7 @@ int pi_pre_block(struct kvm_vcpu *vcpu)
>  	struct pi_desc old, new;
>  	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
>  
> -	if (!vmx_can_use_vtd_pi(vcpu->kvm))
> +	if (!(vmx_can_use_ipiv_pi(vcpu->kvm) || vmx_can_use_vtd_pi(vcpu->kvm)))
>  		return 0;
>  
>  	WARN_ON(irqs_disabled());
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 5716db9704c0..2e65464d6dee 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -104,6 +104,9 @@ module_param(fasteoi, bool, S_IRUGO);
>  
>  module_param(enable_apicv, bool, S_IRUGO);
>  
> +bool __read_mostly enable_ipiv = true;
> +module_param(enable_ipiv, bool, 0444);
> +
>  /*
>   * If nested=1, nested virtualization is supported, i.e., guests may use
>   * VMX and be a hypervisor for its own guests. If nested=0, guests may not
> @@ -224,6 +227,11 @@ static const struct {
>  };
>  
>  #define L1D_CACHE_ORDER 4
> +
> +/* PID(Posted-Interrupt Descriptor)-pointer table entry is 64-bit long */
> +#define MAX_PID_TABLE_ORDER get_order(KVM_MAX_VCPU_IDS * sizeof(u64))
> +#define PID_TABLE_ENTRY_VALID 1
> +
>  static void *vmx_l1d_flush_pages;
>  
>  static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
> @@ -2504,7 +2512,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
>  	}
>  
>  	if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS) {
> -		u64 opt3 = 0;
> +		u64 opt3 = TERTIARY_EXEC_IPI_VIRT;
>  		u64 min3 = 0;
>  
>  		if (adjust_vmx_controls_64(min3, opt3,
> @@ -3841,6 +3849,8 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu)
>  		vmx_enable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_RW);
>  		vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
>  		vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
> +		vmx_set_intercept_for_msr(vcpu, X2APIC_MSR(APIC_ICR),
> +				MSR_TYPE_RW, !enable_ipiv);

Please align this, e.g.

		vmx_set_intercept_for_msr(vcpu, X2APIC_MSR(APIC_ICR),
					  MSR_TYPE_RW, !enable_ipiv);

though I think I'd actually prefer we do:


		if (enable_ipiv)
			vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_ICR), MSR_TYPE_RW);

and just let it poke out.  That makes it much more obvious that interception is
disabled when IPI virtualization is enabled.  Using vmx_set_intercept_for_msr()
implies that it could go either way, but that's not true as vmx_reset_x2apic_msrs()
sets the bitmap to intercept all x2APIC MSRs.

>  	}
>  }
>