[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAE6NW_bTM0cpYcwu-wj+PHUCnkuYs3WpejPr1or+9+PVg1-byA@mail.gmail.com>
Date: Mon, 15 Dec 2025 14:28:17 -0500
From: Kevin Cheng <chengkev@...gle.com>
To: Yosry Ahmed <yosry.ahmed@...ux.dev>
Cc: seanjc@...gle.com, pbonzini@...hat.com, jmattson@...gle.com,
kvm@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2] KVM: SVM: Don't allow L1 intercepts for instructions
not advertised
On Mon, Dec 15, 2025 at 1:51 PM Yosry Ahmed <yosry.ahmed@...ux.dev> wrote:
>
> On Mon, Dec 15, 2025 at 04:07:10PM +0000, Kevin Cheng wrote:
> > If a feature is not advertised in the guest's CPUID, prevent L1 from
> > intercepting the unsupported instructions by clearing the corresponding
> > intercept in KVM's cached vmcb12.
> >
> > When an L2 guest executes an instruction that is not advertised to L1,
> > we expect a #UD exception to be injected by L0. However, the nested svm
> > exit handler first checks if the instruction intercept is set in vmcb12,
> > and if so, synthesizes an exit from L2 to L1 instead of a #UD exception.
> > If a feature is not advertised, the L1 intercept should be ignored.
> >
> > While creating KVM's cached vmcb12, sanitize the intercepts for
> > instructions that are not advertised in the guest CPUID. This
> > effectively ignores the L1 intercept on nested vm exit handling.
>
> Nit: It also ignores the L1 intercept when computing the intercepts in
> VMCB02, so if L0 (for some reason) does not intercept the instruction,
> KVM won't intercept it at all.
>
> I don't think this should happen because KVM should always intercept
> unsupported instructions to inject a #UD, unless they are not supported
> by HW, in which case I believe the HW will inject the #UD for us.
>
> >
> > Signed-off-by: Kevin Cheng <chengkev@...gle.com>
>
> Maybe also this since Sean contributed code to the patch in his last
> review?
>
> Co-developed-by: Sean Christopherson <seanjc@...gle.com>
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
>
I promise I didn't mean to leave that out haha. Sorry Sean! Done.
> Otherwise looks good to me, FWIW:
> Reviewed-by: Yosry Ahmed <yosry.ahmed@...ux.dev>
>
> > ---
> > v1 -> v2:
> > - Removed nested_intercept_mask which was a bit mask for nested
> > intercepts to ignore.
> > - Now sanitizing intercepts every time cached vmcb12 is created
> > - New wrappers for vmcb set/clear intercept functions
> > - Added macro functions for vmcb12 intercept sanitizing
> > - All changes suggested by Sean. Thanks!
> > - https://lore.kernel.org/all/20251205070630.4013452-1-chengkev@google.com/
> >
> > arch/x86/kvm/svm/nested.c | 19 +++++++++++++++++++
> > arch/x86/kvm/svm/svm.h | 35 +++++++++++++++++++++++++++--------
> > 2 files changed, 46 insertions(+), 8 deletions(-)
> >
> > diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> > index c81005b245222..5ffc12a315ec7 100644
> > --- a/arch/x86/kvm/svm/nested.c
> > +++ b/arch/x86/kvm/svm/nested.c
> > @@ -403,6 +403,19 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu)
> > return __nested_vmcb_check_controls(vcpu, ctl);
> > }
> >
> > +/*
> > + * If a feature is not advertised to L1, clear the corresponding vmcb12
> > + * intercept.
> > + */
> > +#define __nested_svm_sanitize_intercept(__vcpu, __control, fname, iname) \
> > +do { \
> > + if (!guest_cpu_cap_has(__vcpu, X86_FEATURE_##fname)) \
> > + vmcb12_clr_intercept(__control, INTERCEPT_##iname); \
> > +} while (0)
> > +
> > +#define nested_svm_sanitize_intercept(__vcpu, __control, name) \
> > + __nested_svm_sanitize_intercept(__vcpu, __control, name, name)
> > +
> > static
> > void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
> > struct vmcb_ctrl_area_cached *to,
> > @@ -413,6 +426,12 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
> > for (i = 0; i < MAX_INTERCEPT; i++)
> > to->intercepts[i] = from->intercepts[i];
> >
> > + __nested_svm_sanitize_intercept(vcpu, to, XSAVE, XSETBV);
> > + nested_svm_sanitize_intercept(vcpu, to, INVPCID);
> > + nested_svm_sanitize_intercept(vcpu, to, RDTSCP);
> > + nested_svm_sanitize_intercept(vcpu, to, SKINIT);
> > + nested_svm_sanitize_intercept(vcpu, to, RDPRU);
> > +
> > to->iopm_base_pa = from->iopm_base_pa;
> > to->msrpm_base_pa = from->msrpm_base_pa;
> > to->tsc_offset = from->tsc_offset;
> > diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> > index 9e151dbdef25d..7a8c92c4de2fb 100644
> > --- a/arch/x86/kvm/svm/svm.h
> > +++ b/arch/x86/kvm/svm/svm.h
> > @@ -434,28 +434,47 @@ static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
> > */
> > #define SVM_REGS_LAZY_LOAD_SET (1 << VCPU_EXREG_PDPTR)
> >
> > -static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
> > +static inline void __vmcb_set_intercept(unsigned long *intercepts, u32 bit)
> > {
> > WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
> > - __set_bit(bit, (unsigned long *)&control->intercepts);
> > + __set_bit(bit, intercepts);
> > }
> >
> > -static inline void vmcb_clr_intercept(struct vmcb_control_area *control, u32 bit)
> > +static inline void __vmcb_clr_intercept(unsigned long *intercepts, u32 bit)
> > {
> > WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
> > - __clear_bit(bit, (unsigned long *)&control->intercepts);
> > + __clear_bit(bit, intercepts);
> > }
> >
> > -static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
> > +static inline bool __vmcb_is_intercept(unsigned long *intercepts, u32 bit)
> > {
> > WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
> > - return test_bit(bit, (unsigned long *)&control->intercepts);
> > + return test_bit(bit, intercepts);
> > +}
> > +
> > +static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
> > +{
> > + __vmcb_set_intercept((unsigned long *)&control->intercepts, bit);
> > +}
> > +
> > +static inline void vmcb_clr_intercept(struct vmcb_control_area *control, u32 bit)
> > +{
> > + __vmcb_clr_intercept((unsigned long *)&control->intercepts, bit);
> > +}
> > +
> > +static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
> > +{
> > + return __vmcb_is_intercept((unsigned long *)&control->intercepts, bit);
> > +}
> > +
> > +static inline void vmcb12_clr_intercept(struct vmcb_ctrl_area_cached *control, u32 bit)
> > +{
> > + __vmcb_clr_intercept((unsigned long *)&control->intercepts, bit);
> > }
> >
> > static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u32 bit)
> > {
> > - WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
> > - return test_bit(bit, (unsigned long *)&control->intercepts);
> > + return __vmcb_is_intercept((unsigned long *)&control->intercepts, bit);
> > }
> >
> > static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
> > --
> > 2.52.0.239.gd5f0c6e74e-goog
> >
Powered by blists - more mailing lists