[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250401161106.790710-8-pbonzini@redhat.com>
Date: Tue, 1 Apr 2025 18:10:44 +0200
From: Paolo Bonzini <pbonzini@...hat.com>
To: linux-kernel@...r.kernel.org,
kvm@...r.kernel.org
Cc: roy.hopkins@...e.com,
seanjc@...gle.com,
thomas.lendacky@....com,
ashish.kalra@....com,
michael.roth@....com,
jroedel@...e.de,
nsaenz@...zon.com,
anelkz@...zon.de,
James.Bottomley@...senPartnership.com
Subject: [PATCH 07/29] KVM: do not use online_vcpus to test vCPU validity
Different planes can initialize their vCPUs separately, therefore there is
no single online_vcpus value that can be used to test that a vCPU has
indeed been fully initialized.
Use the shiny new plane field instead, initializing it to an invalid value
(-1) while the vCPU is visible in the xarray but may still disappear if
the creation fails.
Signed-off-by: Paolo Bonzini <pbonzini@...hat.com>
---
arch/x86/kvm/i8254.c | 3 ++-
include/linux/kvm_host.h | 23 ++++++-----------------
virt/kvm/kvm_main.c | 20 +++++++++++++-------
3 files changed, 21 insertions(+), 25 deletions(-)
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index d7ab8780ab9e..e3a3e7b90c26 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -260,9 +260,10 @@ static void pit_do_work(struct kthread_work *work)
* VCPUs and only when LVT0 is in NMI mode. The interrupt can
* also be simultaneously delivered through PIC and IOAPIC.
*/
- if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0)
+ if (atomic_read(&kvm->arch.vapics_in_nmi_mode) > 0) {
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_apic_nmi_wd_deliver(vcpu);
+ }
}
static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4d408d1d5ccc..0db27814294f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -992,27 +992,16 @@ static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
{
- int num_vcpus = atomic_read(&kvm->online_vcpus);
-
- /*
- * Explicitly verify the target vCPU is online, as the anti-speculation
- * logic only limits the CPU's ability to speculate, e.g. given a "bad"
- * index, clamping the index to 0 would return vCPU0, not NULL.
- */
- if (i >= num_vcpus)
+ struct kvm_vcpu *vcpu = xa_load(&kvm->vcpu_array, i);
+ if (vcpu && unlikely(vcpu->plane == -1))
return NULL;
- i = array_index_nospec(i, num_vcpus);
-
- /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */
- smp_rmb();
- return xa_load(&kvm->vcpu_array, i);
+ return vcpu;
}
-#define kvm_for_each_vcpu(idx, vcpup, kvm) \
- if (atomic_read(&kvm->online_vcpus)) \
- xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \
- (atomic_read(&kvm->online_vcpus) - 1))
+#define kvm_for_each_vcpu(idx, vcpup, kvm) \
+ xa_for_each(&kvm->vcpu_array, idx, vcpup) \
+ if ((vcpup)->plane == -1) ; else \
static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
{
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e343905e46d8..eba02cb7cc57 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4186,6 +4186,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
goto unlock_vcpu_destroy;
}
+ /*
+ * Store an invalid plane number until fully initialized. xa_insert() has
+ * release semantics, which ensures the write is visible to kvm_get_vcpu().
+ */
+ vcpu->plane = -1;
vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus);
r = xa_insert(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT);
WARN_ON_ONCE(r == -EBUSY);
@@ -4195,7 +4200,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
/*
* Now it's all set up, let userspace reach it. Grab the vCPU's mutex
* so that userspace can't invoke vCPU ioctl()s until the vCPU is fully
- * visible (per online_vcpus), e.g. so that KVM doesn't get tricked
+ * visible (valid vcpu->plane), e.g. so that KVM doesn't get tricked
* into a NULL-pointer dereference because KVM thinks the _current_
* vCPU doesn't exist. As a bonus, taking vcpu->mutex ensures lockdep
* knows it's taken *inside* kvm->lock.
@@ -4206,12 +4211,13 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
if (r < 0)
goto kvm_put_xa_erase;
- /*
- * Pairs with smp_rmb() in kvm_get_vcpu. Store the vcpu
- * pointer before kvm->online_vcpu's incremented value.
- */
- smp_wmb();
atomic_inc(&kvm->online_vcpus);
+
+ /*
+ * Pairs with xa_load() in kvm_get_vcpu, ensuring that online_vcpus
+ * is updated before vcpu->plane.
+ */
+ smp_store_release(&vcpu->plane, 0);
mutex_unlock(&vcpu->mutex);
mutex_unlock(&kvm->lock);
@@ -4355,7 +4361,7 @@ static int kvm_wait_for_vcpu_online(struct kvm_vcpu *vcpu)
* In practice, this happy path will always be taken, as a well-behaved
* VMM will never invoke a vCPU ioctl() before KVM_CREATE_VCPU returns.
*/
- if (likely(vcpu->vcpu_idx < atomic_read(&kvm->online_vcpus)))
+ if (likely(vcpu->plane != -1))
return 0;
/*
--
2.49.0
Powered by blists - more mailing lists