[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <10d91fe6-c5b7-fbdb-f956-bce7b2e77221@amd.com>
Date: Tue, 23 Sep 2025 09:47:11 -0500
From: Tom Lendacky <thomas.lendacky@....com>
To: Neeraj Upadhyay <Neeraj.Upadhyay@....com>, kvm@...r.kernel.org,
seanjc@...gle.com, pbonzini@...hat.com
Cc: linux-kernel@...r.kernel.org, nikunj@....com, Santosh.Shukla@....com,
Vasant.Hegde@....com, Suravee.Suthikulpanit@....com, bp@...en8.de,
David.Kaplan@....com, huibo.wang@....com, naveen.rao@....com,
tiala@...rosoft.com
Subject: Re: [RFC PATCH v2 06/17] KVM: SVM: Implement interrupt injection for
Secure AVIC
On 9/23/25 00:03, Neeraj Upadhyay wrote:
> For AMD SEV-SNP guests with Secure AVIC, the virtual APIC state is
> not visible to KVM and managed by the hardware. This renders the
> traditional interrupt injection mechanism, which directly modifies
> guest state, unusable. Instead, interrupt delivery must be mediated
> through a new interface in the VMCB. Implement support for this
> mechanism.
>
> First, new VMCB control fields, requested_irr and update_irr, are
> defined to allow KVM to communicate pending interrupts to the hardware
> before VMRUN.
>
> Hook the core interrupt injection path, svm_inject_irq(). Instead of
> injecting directly, transfer pending interrupts from KVM's software
> IRR to the new requested_irr VMCB field and delegate final delivery
> to the hardware.
>
> Since the hardware is now responsible for the timing and delivery of
> interrupts to the guest (including managing the guest's RFLAGS.IF and
> vAPIC state), bypass the standard KVM interrupt window checks in
> svm_interrupt_allowed() and svm_enable_irq_window(). Similarly, interrupt
> re-injection is handled by the hardware and requires no explicit KVM
> involvement.
>
> Finally, update the logic for detecting pending interrupts. Add the
> vendor op, protected_apic_has_interrupt(), to check only KVM's software
> vAPIC IRR state.
>
> Co-developed-by: Kishon Vijay Abraham I <kvijayab@....com>
> Signed-off-by: Kishon Vijay Abraham I <kvijayab@....com>
> Signed-off-by: Neeraj Upadhyay <Neeraj.Upadhyay@....com>
> ---
> arch/x86/include/asm/svm.h | 8 +++++--
> arch/x86/kvm/lapic.c | 17 ++++++++++++---
> arch/x86/kvm/svm/sev.c | 44 ++++++++++++++++++++++++++++++++++++++
> arch/x86/kvm/svm/svm.c | 13 +++++++++++
> arch/x86/kvm/svm/svm.h | 4 ++++
> arch/x86/kvm/x86.c | 15 ++++++++++++-
> 6 files changed, 95 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
> index ab3d55654c77..0faf262f9f9f 100644
> --- a/arch/x86/include/asm/svm.h
> +++ b/arch/x86/include/asm/svm.h
> @@ -162,10 +162,14 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
> u64 vmsa_pa; /* Used for an SEV-ES guest */
> u8 reserved_8[16];
> u16 bus_lock_counter; /* Offset 0x120 */
> - u8 reserved_9[22];
> + u8 reserved_9[18];
> + u8 update_irr; /* Offset 0x134 */
The APM has this as a 4 byte field.
> + u8 reserved_10[3];
> u64 allowed_sev_features; /* Offset 0x138 */
> u64 guest_sev_features; /* Offset 0x140 */
> - u8 reserved_10[664];
> + u8 reserved_11[8];
> + u32 requested_irr[8]; /* Offset 0x150 */
> + u8 reserved_12[624];
> /*
> * Offset 0x3e0, 32 bytes reserved
> * for use by hypervisor/software.
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 5fc437341e03..3199c7c6db05 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -2938,11 +2938,22 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
> if (!kvm_apic_present(vcpu))
> return -1;
>
> - if (apic->guest_apic_protected)
> + if (!apic->guest_apic_protected) {
> + __apic_update_ppr(apic, &ppr);
> + return apic_has_interrupt_for_ppr(apic, ppr);
> + }
> +
> + if (!apic->prot_apic_intr_inject)
> return -1;
>
> - __apic_update_ppr(apic, &ppr);
> - return apic_has_interrupt_for_ppr(apic, ppr);
> + /*
> + * For guest-protected virtual APIC, hardware manages the virtual
> + * PPR and interrupt delivery to the guest. So, checking the KVM
> + * managed virtual APIC's APIC_IRR state for any pending vectors
> + * is the only thing required here.
> + */
> + return apic_search_irr(apic);
Just a though, but I wonder if this would look cleaner by doing:
if (apic->guest_apic_protected) {
if (!apic->prot_apic_intr_inject)
return -1;
/*
* For guest-protected ...
*/
return apic_search_irr(apic);
}
__apic_update_ppr(apic, &ppr);
return apic_has_interrupt_for_ppr(apic, ppr);
> +
> }
> EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt);
>
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index afe4127a1918..78cefc14a2ee 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -28,6 +28,7 @@
> #include <asm/debugreg.h>
> #include <asm/msr.h>
> #include <asm/sev.h>
> +#include <asm/apic.h>
>
> #include "mmu.h"
> #include "x86.h"
> @@ -35,6 +36,7 @@
> #include "svm_ops.h"
> #include "cpuid.h"
> #include "trace.h"
> +#include "lapic.h"
>
> #define GHCB_VERSION_MAX 2ULL
> #define GHCB_VERSION_DEFAULT 2ULL
> @@ -5064,3 +5066,45 @@ void sev_free_decrypted_vmsa(struct kvm_vcpu *vcpu, struct vmcb_save_area *vmsa)
>
> free_page((unsigned long)vmsa);
> }
> +
> +void sev_savic_set_requested_irr(struct vcpu_svm *svm, bool reinjected)
> +{
> + unsigned int i, vec, vec_pos, vec_start;
> + struct kvm_lapic *apic;
> + bool has_interrupts;
> + u32 val;
> +
> + /* Secure AVIC HW takes care of re-injection */
> + if (reinjected)
> + return;
> +
> + apic = svm->vcpu.arch.apic;
> + has_interrupts = false;
> +
> + for (i = 0; i < ARRAY_SIZE(svm->vmcb->control.requested_irr); i++) {
> + val = apic_get_reg(apic->regs, APIC_IRR + i * 0x10);
> + if (!val)
> + continue;
Add a blank line here.
> + has_interrupts = true;
> + svm->vmcb->control.requested_irr[i] |= val;
Add a blank line here.
> + vec_start = i * 32;
Move this line to just below the comment.
> + /*
> + * Clear each vector one by one to avoid race with concurrent
> + * APIC_IRR updates from the deliver_interrupt() path.
> + */
> + do {
> + vec_pos = __ffs(val);
> + vec = vec_start + vec_pos;
> + apic_clear_vector(vec, apic->regs + APIC_IRR);
> + val = val & ~BIT(vec_pos);
> + } while (val);
Would the following be cleaner?
for_each_set_bit(vec_pos, &val, 32)
apic_clear_vector(vec_start + vec_pos, apic->regs + APIC_IRR);
Might have to make "val" an unsigned long, though, and not sure how that
affects OR'ing it into requested_irr.
> + }
> +
> + if (has_interrupts)
> + svm->vmcb->control.update_irr |= BIT(0);
> +}
> +
> +bool sev_savic_has_pending_interrupt(struct kvm_vcpu *vcpu)
> +{
> + return kvm_apic_has_interrupt(vcpu) != -1;
> +}
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 064ec98d7e67..7811a87bc111 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -52,6 +52,8 @@
> #include "svm.h"
> #include "svm_ops.h"
>
> +#include "lapic.h"
Is this include really needed?
> +
> #include "kvm_onhyperv.h"
> #include "svm_onhyperv.h"
>
> @@ -3689,6 +3691,9 @@ static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
> struct vcpu_svm *svm = to_svm(vcpu);
> u32 type;
>
> + if (sev_savic_active(vcpu->kvm))
> + return sev_savic_set_requested_irr(svm, reinjected);
> +
> if (vcpu->arch.interrupt.soft) {
> if (svm_update_soft_interrupt_rip(vcpu))
> return;
> @@ -3870,6 +3875,9 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
>
> + if (sev_savic_active(vcpu->kvm))
> + return 1;
Maybe just add a comment above this about why you always return 1 for
Secure AVIC.
> +
> if (svm->nested.nested_run_pending)
> return -EBUSY;
>
> @@ -3890,6 +3898,9 @@ static void svm_enable_irq_window(struct kvm_vcpu *vcpu)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
>
> + if (sev_savic_active(vcpu->kvm))
> + return;
Ditto here on the comment.
> +
> /*
> * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
> * 1, because that's a separate STGI/VMRUN intercept. The next time we
> @@ -5132,6 +5143,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
> .apicv_post_state_restore = avic_apicv_post_state_restore,
> .required_apicv_inhibits = AVIC_REQUIRED_APICV_INHIBITS,
>
> + .protected_apic_has_interrupt = sev_savic_has_pending_interrupt,
> +
> .get_exit_info = svm_get_exit_info,
> .get_entry_info = svm_get_entry_info,
>
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 1090a48adeda..60dc424d62c4 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -873,6 +873,8 @@ static inline bool sev_savic_active(struct kvm *kvm)
> {
> return to_kvm_sev_info(kvm)->vmsa_features & SVM_SEV_FEAT_SECURE_AVIC;
> }
> +void sev_savic_set_requested_irr(struct vcpu_svm *svm, bool reinjected);
> +bool sev_savic_has_pending_interrupt(struct kvm_vcpu *vcpu);
> #else
> static inline struct page *snp_safe_alloc_page_node(int node, gfp_t gfp)
> {
> @@ -910,6 +912,8 @@ static inline struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu)
> return NULL;
> }
> static inline void sev_free_decrypted_vmsa(struct kvm_vcpu *vcpu, struct vmcb_save_area *vmsa) {}
> +static inline void sev_savic_set_requested_irr(struct vcpu_svm *svm, bool reinjected) {}
> +static inline bool sev_savic_has_pending_interrupt(struct kvm_vcpu *vcpu) { return false; }
> #endif
>
> /* vmenter.S */
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 33fba801b205..65ebdc6deb92 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -10369,7 +10369,20 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
> if (r < 0)
> goto out;
> if (r) {
> - int irq = kvm_cpu_get_interrupt(vcpu);
> + int irq;
> +
> + /*
> + * Do not ack the interrupt here for guest-protected VAPIC
> + * which requires interrupt injection to the guest.
Maybe a bit more detail about why you don't want to do the ACK?
Thanks,
Tom
> + *
> + * ->inject_irq reads the KVM's VAPIC's APIC_IRR state and
> + * clears it.
> + */
> + if (vcpu->arch.apic->guest_apic_protected &&
> + vcpu->arch.apic->prot_apic_intr_inject)
> + irq = kvm_apic_has_interrupt(vcpu);
> + else
> + irq = kvm_cpu_get_interrupt(vcpu);
>
> if (!WARN_ON_ONCE(irq == -1)) {
> kvm_queue_interrupt(vcpu, irq, false);
Powered by blists - more mailing lists