[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250523010004.3240643-3-seanjc@google.com>
Date: Thu, 22 May 2025 17:59:07 -0700
From: Sean Christopherson <seanjc@...gle.com>
To: Sean Christopherson <seanjc@...gle.com>, Paolo Bonzini <pbonzini@...hat.com>,
Joerg Roedel <joro@...tes.org>, David Woodhouse <dwmw2@...radead.org>,
Lu Baolu <baolu.lu@...ux.intel.com>
Cc: kvm@...r.kernel.org, iommu@...ts.linux.dev, linux-kernel@...r.kernel.org,
Sairaj Kodilkar <sarunkod@....com>, Vasant Hegde <vasant.hegde@....com>,
Maxim Levitsky <mlevitsk@...hat.com>, Joao Martins <joao.m.martins@...cle.com>,
Francesco Lavra <francescolavra.fl@...il.com>, David Matlack <dmatlack@...gle.com>
Subject: [PATCH v2 02/59] KVM: SVM: Track per-vCPU IRTEs using
kvm_kernel_irqfd structure
Track the IRTEs that are posting to an SVM vCPU via the associated irqfd
structure and GSI routing instead of dynamically allocating a separate
data structure. In addition to eliminating an atomic allocation, this
will allow hoisting much of the IRTE update logic to common x86.
Cc: Sairaj Kodilkar <sarunkod@....com>
Signed-off-by: Sean Christopherson <seanjc@...gle.com>
---
arch/x86/kvm/svm/avic.c | 71 +++++++++++++++------------------------
arch/x86/kvm/svm/svm.h | 10 +++---
include/linux/kvm_irqfd.h | 3 ++
3 files changed, 36 insertions(+), 48 deletions(-)
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index adacf00d6664..d33c01379421 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -75,14 +75,6 @@ static bool next_vm_id_wrapped = 0;
static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
bool x2avic_enabled;
-/*
- * This is a wrapper of struct amd_iommu_ir_data.
- */
-struct amd_svm_iommu_ir {
- struct list_head node; /* Used by SVM for per-vcpu ir_list */
- void *data; /* Storing pointer to struct amd_ir_data */
-};
-
static void avic_activate_vmcb(struct vcpu_svm *svm)
{
struct vmcb *vmcb = svm->vmcb01.ptr;
@@ -746,8 +738,8 @@ static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
{
int ret = 0;
unsigned long flags;
- struct amd_svm_iommu_ir *ir;
struct vcpu_svm *svm = to_svm(vcpu);
+ struct kvm_kernel_irqfd *irqfd;
if (!kvm_arch_has_assigned_device(vcpu->kvm))
return 0;
@@ -761,11 +753,11 @@ static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
if (list_empty(&svm->ir_list))
goto out;
- list_for_each_entry(ir, &svm->ir_list, node) {
+ list_for_each_entry(irqfd, &svm->ir_list, vcpu_list) {
if (activate)
- ret = amd_iommu_activate_guest_mode(ir->data);
+ ret = amd_iommu_activate_guest_mode(irqfd->irq_bypass_data);
else
- ret = amd_iommu_deactivate_guest_mode(ir->data);
+ ret = amd_iommu_deactivate_guest_mode(irqfd->irq_bypass_data);
if (ret)
break;
}
@@ -774,27 +766,30 @@ static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
return ret;
}
-static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
+static void svm_ir_list_del(struct vcpu_svm *svm,
+ struct kvm_kernel_irqfd *irqfd,
+ struct amd_iommu_pi_data *pi)
{
unsigned long flags;
- struct amd_svm_iommu_ir *cur;
+ struct kvm_kernel_irqfd *cur;
spin_lock_irqsave(&svm->ir_list_lock, flags);
- list_for_each_entry(cur, &svm->ir_list, node) {
- if (cur->data != pi->ir_data)
+ list_for_each_entry(cur, &svm->ir_list, vcpu_list) {
+ if (cur->irq_bypass_data != pi->ir_data)
continue;
- list_del(&cur->node);
- kfree(cur);
+ if (WARN_ON_ONCE(cur != irqfd))
+ continue;
+ list_del(&irqfd->vcpu_list);
break;
}
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
}
-static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
+static int svm_ir_list_add(struct vcpu_svm *svm,
+ struct kvm_kernel_irqfd *irqfd,
+ struct amd_iommu_pi_data *pi)
{
- int ret = 0;
unsigned long flags;
- struct amd_svm_iommu_ir *ir;
u64 entry;
if (WARN_ON_ONCE(!pi->ir_data))
@@ -811,25 +806,14 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
struct vcpu_svm *prev_svm;
- if (!prev_vcpu) {
- ret = -EINVAL;
- goto out;
- }
+ if (!prev_vcpu)
+ return -EINVAL;
prev_svm = to_svm(prev_vcpu);
- svm_ir_list_del(prev_svm, pi);
+ svm_ir_list_del(prev_svm, irqfd, pi);
}
- /**
- * Allocating new amd_iommu_pi_data, which will get
- * add to the per-vcpu ir_list.
- */
- ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_ATOMIC | __GFP_ACCOUNT);
- if (!ir) {
- ret = -ENOMEM;
- goto out;
- }
- ir->data = pi->ir_data;
+ irqfd->irq_bypass_data = pi->ir_data;
spin_lock_irqsave(&svm->ir_list_lock, flags);
@@ -844,10 +828,9 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK,
true, pi->ir_data);
- list_add(&ir->node, &svm->ir_list);
+ list_add(&irqfd->vcpu_list, &svm->ir_list);
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
-out:
- return ret;
+ return 0;
}
/*
@@ -951,7 +934,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
* scheduling information in IOMMU irte.
*/
if (!ret && pi.is_guest_mode)
- svm_ir_list_add(svm, &pi);
+ svm_ir_list_add(svm, irqfd, &pi);
}
if (!ret && svm) {
@@ -992,7 +975,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
vcpu = kvm_get_vcpu_by_id(kvm, id);
if (vcpu)
- svm_ir_list_del(to_svm(vcpu), &pi);
+ svm_ir_list_del(to_svm(vcpu), irqfd, &pi);
}
}
out:
@@ -1004,8 +987,8 @@ static inline int
avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
{
int ret = 0;
- struct amd_svm_iommu_ir *ir;
struct vcpu_svm *svm = to_svm(vcpu);
+ struct kvm_kernel_irqfd *irqfd;
lockdep_assert_held(&svm->ir_list_lock);
@@ -1019,8 +1002,8 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
if (list_empty(&svm->ir_list))
return 0;
- list_for_each_entry(ir, &svm->ir_list, node) {
- ret = amd_iommu_update_ga(cpu, r, ir->data);
+ list_for_each_entry(irqfd, &svm->ir_list, vcpu_list) {
+ ret = amd_iommu_update_ga(cpu, r, irqfd->irq_bypass_data);
if (ret)
return ret;
}
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index b35fce30d923..cc27877d69ae 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -310,10 +310,12 @@ struct vcpu_svm {
u64 *avic_physical_id_cache;
/*
- * Per-vcpu list of struct amd_svm_iommu_ir:
- * This is used mainly to store interrupt remapping information used
- * when update the vcpu affinity. This avoids the need to scan for
- * IRTE and try to match ga_tag in the IOMMU driver.
+ * Per-vCPU list of irqfds that are eligible to post IRQs directly to
+ * the vCPU (a.k.a. device posted IRQs, a.k.a. IRQ bypass). The list
+ * is used to reconfigure IRTEs when the vCPU is loaded/put (to set the
+ * target pCPU), when AVIC is toggled on/off (to (de)activate bypass),
+ * and if the irqfd becomes ineligible for posting (to put the IRTE
+ * back into remapped mode).
*/
struct list_head ir_list;
spinlock_t ir_list_lock;
diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
index 8ad43692e3bb..6510a48e62aa 100644
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h
@@ -59,6 +59,9 @@ struct kvm_kernel_irqfd {
struct work_struct shutdown;
struct irq_bypass_consumer consumer;
struct irq_bypass_producer *producer;
+
+ struct list_head vcpu_list;
+ void *irq_bypass_data;
};
#endif /* __LINUX_KVM_IRQFD_H */
--
2.49.0.1151.ga128411c76-goog
Powered by blists - more mailing lists