lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <s2he4kulkeylkrv5n7r5vb7uyr72lvv7yajh5ln67d3zjwrnai@e4stv2pkh7c4>
Date: Mon, 15 Dec 2025 18:51:03 +0000
From: Yosry Ahmed <yosry.ahmed@...ux.dev>
To: Kevin Cheng <chengkev@...gle.com>
Cc: seanjc@...gle.com, pbonzini@...hat.com, jmattson@...gle.com, 
	kvm@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2] KVM: SVM: Don't allow L1 intercepts for instructions
 not advertised

On Mon, Dec 15, 2025 at 04:07:10PM +0000, Kevin Cheng wrote:
> If a feature is not advertised in the guest's CPUID, prevent L1 from
> intercepting the unsupported instructions by clearing the corresponding
> intercept in KVM's cached vmcb12.
> 
> When an L2 guest executes an instruction that is not advertised to L1,
> we expect a #UD exception to be injected by L0. However, the nested svm
> exit handler first checks if the instruction intercept is set in vmcb12,
> and if so, synthesizes an exit from L2 to L1 instead of a #UD exception.
> If a feature is not advertised, the L1 intercept should be ignored.
> 
> While creating KVM's cached vmcb12, sanitize the intercepts for
> instructions that are not advertised in the guest CPUID. This
> effectively ignores the L1 intercept on nested vm exit handling.

Nit: It also ignores the L1 intercept when computing the intercepts in
VMCB02, so if L0 (for some reason) does not intercept the instruction,
KVM won't intercept it at all.

I don't think this should happen because KVM should always intercept
unsupported instructions to inject a #UD, unless they are not supported
by HW, in which case I believe the HW will inject the #UD for us.

> 
> Signed-off-by: Kevin Cheng <chengkev@...gle.com>

Maybe also this since Sean contributed code to the patch in his last
review?

Co-developed-by: Sean Christopherson <seanjc@...gle.com>
Signed-off-by: Sean Christopherson <seanjc@...gle.com>

Otherwise looks good to me, FWIW:
Reviewed-by: Yosry Ahmed <yosry.ahmed@...ux.dev>

> ---
> v1 -> v2:
>   - Removed nested_intercept_mask which was a bit mask for nested
>     intercepts to ignore.
>   - Now sanitizing intercepts every time cached vmcb12 is created
>   - New wrappers for vmcb set/clear intercept functions
>   - Added macro functions for vmcb12 intercept sanitizing
>   - All changes suggested by Sean. Thanks!
>   - https://lore.kernel.org/all/20251205070630.4013452-1-chengkev@google.com/
> 
>  arch/x86/kvm/svm/nested.c | 19 +++++++++++++++++++
>  arch/x86/kvm/svm/svm.h    | 35 +++++++++++++++++++++++++++--------
>  2 files changed, 46 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index c81005b245222..5ffc12a315ec7 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -403,6 +403,19 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu)
>  	return __nested_vmcb_check_controls(vcpu, ctl);
>  }
> 
> +/*
> + * If a feature is not advertised to L1, clear the corresponding vmcb12
> + * intercept.
> + */
> +#define __nested_svm_sanitize_intercept(__vcpu, __control, fname, iname)	\
> +do {										\
> +	if (!guest_cpu_cap_has(__vcpu, X86_FEATURE_##fname))			\
> +		vmcb12_clr_intercept(__control, INTERCEPT_##iname);		\
> +} while (0)
> +
> +#define nested_svm_sanitize_intercept(__vcpu, __control, name)			\
> +	__nested_svm_sanitize_intercept(__vcpu, __control, name, name)
> +
>  static
>  void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
>  					 struct vmcb_ctrl_area_cached *to,
> @@ -413,6 +426,12 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
>  	for (i = 0; i < MAX_INTERCEPT; i++)
>  		to->intercepts[i] = from->intercepts[i];
> 
> +	__nested_svm_sanitize_intercept(vcpu, to, XSAVE, XSETBV);
> +	nested_svm_sanitize_intercept(vcpu, to, INVPCID);
> +	nested_svm_sanitize_intercept(vcpu, to, RDTSCP);
> +	nested_svm_sanitize_intercept(vcpu, to, SKINIT);
> +	nested_svm_sanitize_intercept(vcpu, to, RDPRU);
> +
>  	to->iopm_base_pa        = from->iopm_base_pa;
>  	to->msrpm_base_pa       = from->msrpm_base_pa;
>  	to->tsc_offset          = from->tsc_offset;
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 9e151dbdef25d..7a8c92c4de2fb 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -434,28 +434,47 @@ static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
>   */
>  #define SVM_REGS_LAZY_LOAD_SET	(1 << VCPU_EXREG_PDPTR)
> 
> -static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
> +static inline void __vmcb_set_intercept(unsigned long *intercepts, u32 bit)
>  {
>  	WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
> -	__set_bit(bit, (unsigned long *)&control->intercepts);
> +	__set_bit(bit, intercepts);
>  }
> 
> -static inline void vmcb_clr_intercept(struct vmcb_control_area *control, u32 bit)
> +static inline void __vmcb_clr_intercept(unsigned long *intercepts, u32 bit)
>  {
>  	WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
> -	__clear_bit(bit, (unsigned long *)&control->intercepts);
> +	__clear_bit(bit, intercepts);
>  }
> 
> -static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
> +static inline bool __vmcb_is_intercept(unsigned long *intercepts, u32 bit)
>  {
>  	WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
> -	return test_bit(bit, (unsigned long *)&control->intercepts);
> +	return test_bit(bit, intercepts);
> +}
> +
> +static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
> +{
> +	__vmcb_set_intercept((unsigned long *)&control->intercepts, bit);
> +}
> +
> +static inline void vmcb_clr_intercept(struct vmcb_control_area *control, u32 bit)
> +{
> +	__vmcb_clr_intercept((unsigned long *)&control->intercepts, bit);
> +}
> +
> +static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
> +{
> +	return __vmcb_is_intercept((unsigned long *)&control->intercepts, bit);
> +}
> +
> +static inline void vmcb12_clr_intercept(struct vmcb_ctrl_area_cached *control, u32 bit)
> +{
> +	__vmcb_clr_intercept((unsigned long *)&control->intercepts, bit);
>  }
> 
>  static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u32 bit)
>  {
> -	WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
> -	return test_bit(bit, (unsigned long *)&control->intercepts);
> +	return __vmcb_is_intercept((unsigned long *)&control->intercepts, bit);
>  }
> 
>  static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
> --
> 2.52.0.239.gd5f0c6e74e-goog
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ