[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <s2he4kulkeylkrv5n7r5vb7uyr72lvv7yajh5ln67d3zjwrnai@e4stv2pkh7c4>
Date: Mon, 15 Dec 2025 18:51:03 +0000
From: Yosry Ahmed <yosry.ahmed@...ux.dev>
To: Kevin Cheng <chengkev@...gle.com>
Cc: seanjc@...gle.com, pbonzini@...hat.com, jmattson@...gle.com,
kvm@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2] KVM: SVM: Don't allow L1 intercepts for instructions
not advertised
On Mon, Dec 15, 2025 at 04:07:10PM +0000, Kevin Cheng wrote:
> If a feature is not advertised in the guest's CPUID, prevent L1 from
> intercepting the unsupported instructions by clearing the corresponding
> intercept in KVM's cached vmcb12.
>
> When an L2 guest executes an instruction that is not advertised to L1,
> we expect a #UD exception to be injected by L0. However, the nested svm
> exit handler first checks if the instruction intercept is set in vmcb12,
> and if so, synthesizes an exit from L2 to L1 instead of a #UD exception.
> If a feature is not advertised, the L1 intercept should be ignored.
>
> While creating KVM's cached vmcb12, sanitize the intercepts for
> instructions that are not advertised in the guest CPUID. This
> effectively ignores the L1 intercept on nested vm exit handling.
Nit: It also ignores the L1 intercept when computing the intercepts in
VMCB02, so if L0 (for some reason) does not intercept the instruction,
KVM won't intercept it at all.
I don't think this should happen because KVM should always intercept
unsupported instructions to inject a #UD, unless they are not supported
by HW, in which case I believe the HW will inject the #UD for us.
>
> Signed-off-by: Kevin Cheng <chengkev@...gle.com>
Maybe also this since Sean contributed code to the patch in his last
review?
Co-developed-by: Sean Christopherson <seanjc@...gle.com>
Signed-off-by: Sean Christopherson <seanjc@...gle.com>
Otherwise looks good to me, FWIW:
Reviewed-by: Yosry Ahmed <yosry.ahmed@...ux.dev>
> ---
> v1 -> v2:
> - Removed nested_intercept_mask which was a bit mask for nested
> intercepts to ignore.
> - Now sanitizing intercepts every time cached vmcb12 is created
> - New wrappers for vmcb set/clear intercept functions
> - Added macro functions for vmcb12 intercept sanitizing
> - All changes suggested by Sean. Thanks!
> - https://lore.kernel.org/all/20251205070630.4013452-1-chengkev@google.com/
>
> arch/x86/kvm/svm/nested.c | 19 +++++++++++++++++++
> arch/x86/kvm/svm/svm.h | 35 +++++++++++++++++++++++++++--------
> 2 files changed, 46 insertions(+), 8 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index c81005b245222..5ffc12a315ec7 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -403,6 +403,19 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu)
> return __nested_vmcb_check_controls(vcpu, ctl);
> }
>
> +/*
> + * If a feature is not advertised to L1, clear the corresponding vmcb12
> + * intercept.
> + */
> +#define __nested_svm_sanitize_intercept(__vcpu, __control, fname, iname) \
> +do { \
> + if (!guest_cpu_cap_has(__vcpu, X86_FEATURE_##fname)) \
> + vmcb12_clr_intercept(__control, INTERCEPT_##iname); \
> +} while (0)
> +
> +#define nested_svm_sanitize_intercept(__vcpu, __control, name) \
> + __nested_svm_sanitize_intercept(__vcpu, __control, name, name)
> +
> static
> void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
> struct vmcb_ctrl_area_cached *to,
> @@ -413,6 +426,12 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
> for (i = 0; i < MAX_INTERCEPT; i++)
> to->intercepts[i] = from->intercepts[i];
>
> + __nested_svm_sanitize_intercept(vcpu, to, XSAVE, XSETBV);
> + nested_svm_sanitize_intercept(vcpu, to, INVPCID);
> + nested_svm_sanitize_intercept(vcpu, to, RDTSCP);
> + nested_svm_sanitize_intercept(vcpu, to, SKINIT);
> + nested_svm_sanitize_intercept(vcpu, to, RDPRU);
> +
> to->iopm_base_pa = from->iopm_base_pa;
> to->msrpm_base_pa = from->msrpm_base_pa;
> to->tsc_offset = from->tsc_offset;
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 9e151dbdef25d..7a8c92c4de2fb 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -434,28 +434,47 @@ static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
> */
> #define SVM_REGS_LAZY_LOAD_SET (1 << VCPU_EXREG_PDPTR)
>
> -static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
> +static inline void __vmcb_set_intercept(unsigned long *intercepts, u32 bit)
> {
> WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
> - __set_bit(bit, (unsigned long *)&control->intercepts);
> + __set_bit(bit, intercepts);
> }
>
> -static inline void vmcb_clr_intercept(struct vmcb_control_area *control, u32 bit)
> +static inline void __vmcb_clr_intercept(unsigned long *intercepts, u32 bit)
> {
> WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
> - __clear_bit(bit, (unsigned long *)&control->intercepts);
> + __clear_bit(bit, intercepts);
> }
>
> -static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
> +static inline bool __vmcb_is_intercept(unsigned long *intercepts, u32 bit)
> {
> WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
> - return test_bit(bit, (unsigned long *)&control->intercepts);
> + return test_bit(bit, intercepts);
> +}
> +
> +static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
> +{
> + __vmcb_set_intercept((unsigned long *)&control->intercepts, bit);
> +}
> +
> +static inline void vmcb_clr_intercept(struct vmcb_control_area *control, u32 bit)
> +{
> + __vmcb_clr_intercept((unsigned long *)&control->intercepts, bit);
> +}
> +
> +static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
> +{
> + return __vmcb_is_intercept((unsigned long *)&control->intercepts, bit);
> +}
> +
> +static inline void vmcb12_clr_intercept(struct vmcb_ctrl_area_cached *control, u32 bit)
> +{
> + __vmcb_clr_intercept((unsigned long *)&control->intercepts, bit);
> }
>
> static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u32 bit)
> {
> - WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
> - return test_bit(bit, (unsigned long *)&control->intercepts);
> + return __vmcb_is_intercept((unsigned long *)&control->intercepts, bit);
> }
>
> static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
> --
> 2.52.0.239.gd5f0c6e74e-goog
>
Powered by blists - more mailing lists