linux-kernel - Re: [RFC PATCH v2 06/17] KVM: SVM: Implement interrupt injection for Secure AVIC

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <10d91fe6-c5b7-fbdb-f956-bce7b2e77221@amd.com>
Date: Tue, 23 Sep 2025 09:47:11 -0500
From: Tom Lendacky <thomas.lendacky@....com>
To: Neeraj Upadhyay <Neeraj.Upadhyay@....com>, kvm@...r.kernel.org,
 seanjc@...gle.com, pbonzini@...hat.com
Cc: linux-kernel@...r.kernel.org, nikunj@....com, Santosh.Shukla@....com,
 Vasant.Hegde@....com, Suravee.Suthikulpanit@....com, bp@...en8.de,
 David.Kaplan@....com, huibo.wang@....com, naveen.rao@....com,
 tiala@...rosoft.com
Subject: Re: [RFC PATCH v2 06/17] KVM: SVM: Implement interrupt injection for
 Secure AVIC

On 9/23/25 00:03, Neeraj Upadhyay wrote:
> For AMD SEV-SNP guests with Secure AVIC, the virtual APIC state is
> not visible to KVM and managed by the hardware. This renders the
> traditional interrupt injection mechanism, which directly modifies
> guest state, unusable. Instead, interrupt delivery must be mediated
> through a new interface in the VMCB. Implement support for this
> mechanism.
> 
> First, new VMCB control fields, requested_irr and update_irr, are
> defined to allow KVM to communicate pending interrupts to the hardware
> before VMRUN.
> 
> Hook the core interrupt injection path, svm_inject_irq(). Instead of
> injecting directly, transfer pending interrupts from KVM's software
> IRR to the new requested_irr VMCB field and delegate final delivery
> to the hardware.
> 
> Since the hardware is now responsible for the timing and delivery of
> interrupts to the guest (including managing the guest's RFLAGS.IF and
> vAPIC state), bypass the standard KVM interrupt window checks in
> svm_interrupt_allowed() and svm_enable_irq_window(). Similarly, interrupt
> re-injection is handled by the hardware and requires no explicit KVM
> involvement.
> 
> Finally, update the logic for detecting pending interrupts. Add the
> vendor op, protected_apic_has_interrupt(), to check only KVM's software
> vAPIC IRR state.
> 
> Co-developed-by: Kishon Vijay Abraham I <kvijayab@....com>
> Signed-off-by: Kishon Vijay Abraham I <kvijayab@....com>
> Signed-off-by: Neeraj Upadhyay <Neeraj.Upadhyay@....com>
> ---
>  arch/x86/include/asm/svm.h |  8 +++++--
>  arch/x86/kvm/lapic.c       | 17 ++++++++++++---
>  arch/x86/kvm/svm/sev.c     | 44 ++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/svm/svm.c     | 13 +++++++++++
>  arch/x86/kvm/svm/svm.h     |  4 ++++
>  arch/x86/kvm/x86.c         | 15 ++++++++++++-
>  6 files changed, 95 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
> index ab3d55654c77..0faf262f9f9f 100644
> --- a/arch/x86/include/asm/svm.h
> +++ b/arch/x86/include/asm/svm.h
> @@ -162,10 +162,14 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
>  	u64 vmsa_pa;		/* Used for an SEV-ES guest */
>  	u8 reserved_8[16];
>  	u16 bus_lock_counter;		/* Offset 0x120 */
> -	u8 reserved_9[22];
> +	u8 reserved_9[18];
> +	u8 update_irr;			/* Offset 0x134 */

The APM has this as a 4 byte field.

> +	u8 reserved_10[3];
>  	u64 allowed_sev_features;	/* Offset 0x138 */
>  	u64 guest_sev_features;		/* Offset 0x140 */
> -	u8 reserved_10[664];
> +	u8 reserved_11[8];
> +	u32 requested_irr[8];		/* Offset 0x150 */
> +	u8 reserved_12[624];
>  	/*
>  	 * Offset 0x3e0, 32 bytes reserved
>  	 * for use by hypervisor/software.
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 5fc437341e03..3199c7c6db05 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -2938,11 +2938,22 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
>  	if (!kvm_apic_present(vcpu))
>  		return -1;
>  
> -	if (apic->guest_apic_protected)
> +	if (!apic->guest_apic_protected) {
> +		__apic_update_ppr(apic, &ppr);
> +		return apic_has_interrupt_for_ppr(apic, ppr);
> +	}
> +
> +	if (!apic->prot_apic_intr_inject)
>  		return -1;
>  
> -	__apic_update_ppr(apic, &ppr);
> -	return apic_has_interrupt_for_ppr(apic, ppr);
> +	/*
> +	 * For guest-protected virtual APIC, hardware manages the virtual
> +	 * PPR and interrupt delivery to the guest. So, checking the KVM
> +	 * managed virtual APIC's APIC_IRR state for any pending vectors
> +	 * is the only thing required here.
> +	 */
> +	return apic_search_irr(apic);

Just a though, but I wonder if this would look cleaner by doing:

	if (apic->guest_apic_protected) {
		if (!apic->prot_apic_intr_inject)
			return -1;

		/*
		 * For guest-protected ...
		 */
		return apic_search_irr(apic);
	}

	__apic_update_ppr(apic, &ppr);
	return apic_has_interrupt_for_ppr(apic, ppr);

> +
>  }
>  EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt);
>  
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index afe4127a1918..78cefc14a2ee 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -28,6 +28,7 @@
>  #include <asm/debugreg.h>
>  #include <asm/msr.h>
>  #include <asm/sev.h>
> +#include <asm/apic.h>
>  
>  #include "mmu.h"
>  #include "x86.h"
> @@ -35,6 +36,7 @@
>  #include "svm_ops.h"
>  #include "cpuid.h"
>  #include "trace.h"
> +#include "lapic.h"
>  
>  #define GHCB_VERSION_MAX	2ULL
>  #define GHCB_VERSION_DEFAULT	2ULL
> @@ -5064,3 +5066,45 @@ void sev_free_decrypted_vmsa(struct kvm_vcpu *vcpu, struct vmcb_save_area *vmsa)
>  
>  	free_page((unsigned long)vmsa);
>  }
> +
> +void sev_savic_set_requested_irr(struct vcpu_svm *svm, bool reinjected)
> +{
> +	unsigned int i, vec, vec_pos, vec_start;
> +	struct kvm_lapic *apic;
> +	bool has_interrupts;
> +	u32 val;
> +
> +	/* Secure AVIC HW takes care of re-injection */
> +	if (reinjected)
> +		return;
> +
> +	apic = svm->vcpu.arch.apic;
> +	has_interrupts = false;
> +
> +	for (i = 0; i < ARRAY_SIZE(svm->vmcb->control.requested_irr); i++) {
> +		val = apic_get_reg(apic->regs, APIC_IRR + i * 0x10);
> +		if (!val)
> +			continue;

Add a blank line here.

> +		has_interrupts = true;
> +		svm->vmcb->control.requested_irr[i] |= val;

Add a blank line here.

> +		vec_start = i * 32;

Move this line to just below the comment.

> +		/*
> +		 * Clear each vector one by one to avoid race with concurrent
> +		 * APIC_IRR updates from the deliver_interrupt() path.
> +		 */
> +		do {
> +			vec_pos = __ffs(val);
> +			vec = vec_start + vec_pos;
> +			apic_clear_vector(vec, apic->regs + APIC_IRR);
> +			val = val & ~BIT(vec_pos);
> +		} while (val);

Would the following be cleaner?

for_each_set_bit(vec_pos, &val, 32)
	apic_clear_vector(vec_start + vec_pos, apic->regs + APIC_IRR);

Might have to make "val" an unsigned long, though, and not sure how that
affects OR'ing it into requested_irr.

> +	}
> +
> +	if (has_interrupts)
> +		svm->vmcb->control.update_irr |= BIT(0);
> +}
> +
> +bool sev_savic_has_pending_interrupt(struct kvm_vcpu *vcpu)
> +{
> +	return kvm_apic_has_interrupt(vcpu) != -1;
> +}
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 064ec98d7e67..7811a87bc111 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -52,6 +52,8 @@
>  #include "svm.h"
>  #include "svm_ops.h"
>  
> +#include "lapic.h"

Is this include really needed?

> +
>  #include "kvm_onhyperv.h"
>  #include "svm_onhyperv.h"
>  
> @@ -3689,6 +3691,9 @@ static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  	u32 type;
>  
> +	if (sev_savic_active(vcpu->kvm))
> +		return sev_savic_set_requested_irr(svm, reinjected);
> +
>  	if (vcpu->arch.interrupt.soft) {
>  		if (svm_update_soft_interrupt_rip(vcpu))
>  			return;
> @@ -3870,6 +3875,9 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  
> +	if (sev_savic_active(vcpu->kvm))
> +		return 1;

Maybe just add a comment above this about why you always return 1 for
Secure AVIC.

> +
>  	if (svm->nested.nested_run_pending)
>  		return -EBUSY;
>  
> @@ -3890,6 +3898,9 @@ static void svm_enable_irq_window(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  
> +	if (sev_savic_active(vcpu->kvm))
> +		return;

Ditto here on the comment.

> +
>  	/*
>  	 * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
>  	 * 1, because that's a separate STGI/VMRUN intercept.  The next time we
> @@ -5132,6 +5143,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
>  	.apicv_post_state_restore = avic_apicv_post_state_restore,
>  	.required_apicv_inhibits = AVIC_REQUIRED_APICV_INHIBITS,
>  
> +	.protected_apic_has_interrupt = sev_savic_has_pending_interrupt,
> +
>  	.get_exit_info = svm_get_exit_info,
>  	.get_entry_info = svm_get_entry_info,
>  
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 1090a48adeda..60dc424d62c4 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -873,6 +873,8 @@ static inline bool sev_savic_active(struct kvm *kvm)
>  {
>  	return to_kvm_sev_info(kvm)->vmsa_features & SVM_SEV_FEAT_SECURE_AVIC;
>  }
> +void sev_savic_set_requested_irr(struct vcpu_svm *svm, bool reinjected);
> +bool sev_savic_has_pending_interrupt(struct kvm_vcpu *vcpu);
>  #else
>  static inline struct page *snp_safe_alloc_page_node(int node, gfp_t gfp)
>  {
> @@ -910,6 +912,8 @@ static inline struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu)
>  	return NULL;
>  }
>  static inline void sev_free_decrypted_vmsa(struct kvm_vcpu *vcpu, struct vmcb_save_area *vmsa) {}
> +static inline void sev_savic_set_requested_irr(struct vcpu_svm *svm, bool reinjected) {}
> +static inline bool sev_savic_has_pending_interrupt(struct kvm_vcpu *vcpu) { return false; }
>  #endif
>  
>  /* vmenter.S */
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 33fba801b205..65ebdc6deb92 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -10369,7 +10369,20 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
>  		if (r < 0)
>  			goto out;
>  		if (r) {
> -			int irq = kvm_cpu_get_interrupt(vcpu);
> +			int irq;
> +
> +			/*
> +			 * Do not ack the interrupt here for guest-protected VAPIC
> +			 * which requires interrupt injection to the guest.

Maybe a bit more detail about why you don't want to do the ACK?

Thanks,
Tom

> +			 *
> +			 * ->inject_irq reads the KVM's VAPIC's APIC_IRR state and
> +			 * clears it.
> +			 */
> +			if (vcpu->arch.apic->guest_apic_protected &&
> +			    vcpu->arch.apic->prot_apic_intr_inject)
> +				irq = kvm_apic_has_interrupt(vcpu);
> +			else
> +				irq = kvm_cpu_get_interrupt(vcpu);
>  
>  			if (!WARN_ON_ONCE(irq == -1)) {
>  				kvm_queue_interrupt(vcpu, irq, false);