[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250401161106.790710-14-pbonzini@redhat.com>
Date: Tue, 1 Apr 2025 18:10:50 +0200
From: Paolo Bonzini <pbonzini@...hat.com>
To: linux-kernel@...r.kernel.org,
kvm@...r.kernel.org
Cc: roy.hopkins@...e.com,
seanjc@...gle.com,
thomas.lendacky@....com,
ashish.kalra@....com,
michael.roth@....com,
jroedel@...e.de,
nsaenz@...zon.com,
anelkz@...zon.de,
James.Bottomley@...senPartnership.com
Subject: [PATCH 13/29] KVM: implement vCPU creation for extra planes
For userspace to have fun with planes it is probably useful to let them
create vCPUs on the non-zero planes as well. Since such vCPUs are backed
by the same struct kvm_vcpu, these are regular vCPU file descriptors except
that they only allow a small subset of ioctls (mostly get/set) and they
share some of the backing resources, notably vcpu->run.
TODO: prefault might be useful on non-default planes as well?
Signed-off-by: Paolo Bonzini <pbonzini@...hat.com>
---
Documentation/virt/kvm/locking.rst | 3 +
include/linux/kvm_host.h | 4 +-
include/uapi/linux/kvm.h | 1 +
virt/kvm/kvm_main.c | 167 +++++++++++++++++++++++------
4 files changed, 142 insertions(+), 33 deletions(-)
diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst
index ae8bce7fecbe..ad22344deb28 100644
--- a/Documentation/virt/kvm/locking.rst
+++ b/Documentation/virt/kvm/locking.rst
@@ -26,6 +26,9 @@ The acquisition orders for mutexes are as follows:
are taken on the waiting side when modifying memslots, so MMU notifiers
must not take either kvm->slots_lock or kvm->slots_arch_lock.
+- when VMs have multiple planes, vcpu->mutex for plane 0 can taken
+ outside vcpu->mutex for the same id and another plane
+
cpus_read_lock() vs kvm_lock:
- Taking cpus_read_lock() outside of kvm_lock is problematic, despite that
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b511aed2de8e..99fd90c5d71b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -343,6 +343,9 @@ struct kvm_vcpu {
struct mutex mutex;
+ /* Only valid on plane 0 */
+ bool wants_to_run;
+
/* Shared for all planes */
struct kvm_run *run;
@@ -388,7 +391,6 @@ struct kvm_vcpu {
bool dy_eligible;
} spin_loop;
#endif
- bool wants_to_run;
bool preempted;
bool ready;
bool scheduled_out;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 96d25c7fa18f..24fa002cd7c1 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1691,5 +1691,6 @@ struct kvm_pre_fault_memory {
};
#define KVM_CREATE_PLANE _IO(KVMIO, 0xd6)
+#define KVM_CREATE_VCPU_PLANE _IO(KVMIO, 0xd7)
#endif /* __LINUX_KVM_H */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 863fd80ddfbe..06fa2a6ad96f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -438,11 +438,11 @@ void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
}
#endif
-static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
+static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm_plane *plane, unsigned id)
{
mutex_init(&vcpu->mutex);
vcpu->cpu = -1;
- vcpu->kvm = kvm;
+ vcpu->kvm = plane->kvm;
vcpu->vcpu_id = id;
vcpu->pid = NULL;
rwlock_init(&vcpu->pid_lock);
@@ -459,8 +459,13 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
vcpu->last_used_slot = NULL;
/* Fill the stats id string for the vcpu */
- snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d",
- task_pid_nr(current), id);
+ if (plane->plane) {
+ snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d:%d",
+ task_pid_nr(current), id, plane->plane);
+ } else {
+ snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d",
+ task_pid_nr(current), id);
+ }
}
static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -475,7 +480,9 @@ static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
*/
put_pid(vcpu->pid);
- free_page((unsigned long)vcpu->run);
+ if (!vcpu->plane)
+ free_page((unsigned long)vcpu->run);
+
kmem_cache_free(kvm_vcpu_cache, vcpu);
}
@@ -4026,6 +4033,9 @@ static vm_fault_t kvm_vcpu_fault(struct vm_fault *vmf)
struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data;
struct page *page;
+ if (vcpu->plane)
+ return VM_FAULT_SIGBUS;
+
if (vmf->pgoff == 0)
page = virt_to_page(vcpu->run);
#ifdef CONFIG_X86
@@ -4113,7 +4123,10 @@ static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
if (!debugfs_initialized())
return;
- snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id);
+ if (vcpu->plane)
+ snprintf(dir_name, sizeof(dir_name), "vcpu%d:%d", vcpu->vcpu_id, vcpu->plane);
+ else
+ snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id);
debugfs_dentry = debugfs_create_dir(dir_name,
vcpu->kvm->debugfs_dentry);
debugfs_create_file("pid", 0444, debugfs_dentry, vcpu,
@@ -4126,9 +4139,10 @@ static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
/*
* Creates some virtual cpus. Good luck creating more than one.
*/
-static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
+static int kvm_vm_ioctl_create_vcpu(struct kvm_plane *plane, struct kvm_vcpu *plane0_vcpu, unsigned long id)
{
int r;
+ struct kvm *kvm = plane->kvm;
struct kvm_vcpu *vcpu;
struct page *page;
@@ -4165,24 +4179,33 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
goto vcpu_decrement;
}
- BUILD_BUG_ON(sizeof(struct kvm_run) > PAGE_SIZE);
- page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
- if (!page) {
- r = -ENOMEM;
- goto vcpu_free;
- }
- vcpu->run = page_address(page);
+ if (plane->plane) {
+ page = NULL;
+ vcpu->run = plane0_vcpu->run;
+ } else {
+ WARN_ON(plane0_vcpu != NULL);
+ plane0_vcpu = vcpu;
- if (kvm->dirty_ring_size) {
- r = kvm_dirty_ring_alloc(kvm, &vcpu->__dirty_ring,
- id, kvm->dirty_ring_size);
- if (r)
- goto vcpu_free_run_page;
+ BUILD_BUG_ON(sizeof(struct kvm_run) > PAGE_SIZE);
+ page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ if (!page) {
+ r = -ENOMEM;
+ goto vcpu_free;
+ }
+ vcpu->run = page_address(page);
+
+ if (kvm->dirty_ring_size) {
+ r = kvm_dirty_ring_alloc(kvm, &vcpu->__dirty_ring,
+ id, kvm->dirty_ring_size);
+ if (r)
+ goto vcpu_free_run_page;
+ }
}
- vcpu->plane0 = vcpu;
- vcpu->stat = &vcpu->__stat;
- kvm_vcpu_init(vcpu, kvm, id);
+ vcpu->plane0 = plane0_vcpu;
+ vcpu->stat = &plane0_vcpu->__stat;
+ vcpu->dirty_ring = &plane0_vcpu->__dirty_ring;
+ kvm_vcpu_init(vcpu, plane, id);
r = kvm_arch_vcpu_create(vcpu);
if (r)
@@ -4190,7 +4213,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
mutex_lock(&kvm->lock);
- if (kvm_get_vcpu_by_id(kvm, id)) {
+ if (kvm_get_plane_vcpu_by_id(plane, id)) {
r = -EEXIST;
goto unlock_vcpu_destroy;
}
@@ -4200,8 +4223,13 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
* release semantics, which ensures the write is visible to kvm_get_vcpu().
*/
vcpu->plane = -1;
- vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus);
- r = xa_insert(&kvm->planes[0]->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT);
+ if (plane->plane)
+ vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus);
+ else
+ vcpu->vcpu_idx = plane0_vcpu->vcpu_idx;
+
+ r = xa_insert(&plane->vcpu_array, vcpu->vcpu_idx,
+ vcpu, GFP_KERNEL_ACCOUNT);
WARN_ON_ONCE(r == -EBUSY);
if (r)
goto unlock_vcpu_destroy;
@@ -4220,13 +4248,14 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
if (r < 0)
goto kvm_put_xa_erase;
- atomic_inc(&kvm->online_vcpus);
+ if (!plane0_vcpu)
+ atomic_inc(&kvm->online_vcpus);
/*
* Pairs with xa_load() in kvm_get_vcpu, ensuring that online_vcpus
* is updated before vcpu->plane.
*/
- smp_store_release(&vcpu->plane, 0);
+ smp_store_release(&vcpu->plane, plane->plane);
mutex_unlock(&vcpu->mutex);
mutex_unlock(&kvm->lock);
@@ -4237,14 +4266,15 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id)
kvm_put_xa_erase:
mutex_unlock(&vcpu->mutex);
kvm_put_kvm_no_destroy(kvm);
- xa_erase(&kvm->planes[0]->vcpu_array, vcpu->vcpu_idx);
+ xa_erase(&plane->vcpu_array, vcpu->vcpu_idx);
unlock_vcpu_destroy:
mutex_unlock(&kvm->lock);
kvm_arch_vcpu_destroy(vcpu);
vcpu_free_dirty_ring:
kvm_dirty_ring_free(&vcpu->__dirty_ring);
vcpu_free_run_page:
- free_page((unsigned long)vcpu->run);
+ if (page)
+ __free_page(page);
vcpu_free:
kmem_cache_free(kvm_vcpu_cache, vcpu);
vcpu_decrement:
@@ -4406,6 +4436,35 @@ static int kvm_plane_ioctl_check_extension(struct kvm_plane *plane, long arg)
}
}
+static int kvm_plane_ioctl_create_vcpu(struct kvm_plane *plane, long arg)
+{
+ int r = -EINVAL;
+ struct file *file;
+ struct kvm_vcpu *vcpu;
+ int fd;
+
+ if (arg != (int)arg)
+ return -EBADF;
+
+ fd = arg;
+ file = fget(fd);
+ if (!file)
+ return -EBADF;
+
+ if (file->f_op != &kvm_vcpu_fops)
+ goto err;
+
+ vcpu = file->private_data;
+ if (vcpu->kvm != plane->kvm)
+ goto err;
+
+ r = kvm_vm_ioctl_create_vcpu(plane, vcpu, vcpu->vcpu_id);
+
+err:
+ fput(file);
+ return r;
+}
+
static long __kvm_plane_ioctl(struct kvm_plane *plane, unsigned int ioctl,
unsigned long arg)
{
@@ -4432,6 +4491,8 @@ static long __kvm_plane_ioctl(struct kvm_plane *plane, unsigned int ioctl,
#endif
case KVM_CHECK_EXTENSION:
return kvm_plane_ioctl_check_extension(plane, arg);
+ case KVM_CREATE_VCPU_PLANE:
+ return kvm_plane_ioctl_create_vcpu(plane, arg);
default:
return -ENOTTY;
}
@@ -4463,6 +4524,44 @@ static struct file_operations kvm_plane_fops = {
};
+static inline bool kvm_arch_is_vcpu_plane_ioctl(unsigned ioctl)
+{
+ switch (ioctl) {
+ case KVM_GET_DEBUGREGS:
+ case KVM_SET_DEBUGREGS:
+ case KVM_GET_FPU:
+ case KVM_SET_FPU:
+ case KVM_GET_LAPIC:
+ case KVM_SET_LAPIC:
+ case KVM_GET_MSRS:
+ case KVM_SET_MSRS:
+ case KVM_GET_NESTED_STATE:
+ case KVM_SET_NESTED_STATE:
+ case KVM_GET_ONE_REG:
+ case KVM_SET_ONE_REG:
+ case KVM_GET_REGS:
+ case KVM_SET_REGS:
+ case KVM_GET_SREGS:
+ case KVM_SET_SREGS:
+ case KVM_GET_SREGS2:
+ case KVM_SET_SREGS2:
+ case KVM_GET_VCPU_EVENTS:
+ case KVM_SET_VCPU_EVENTS:
+ case KVM_GET_XCRS:
+ case KVM_SET_XCRS:
+ case KVM_GET_XSAVE:
+ case KVM_GET_XSAVE2:
+ case KVM_SET_XSAVE:
+
+ case KVM_GET_REG_LIST:
+ case KVM_TRANSLATE:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
static long kvm_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -4475,6 +4574,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead)
return -EIO;
+ if (vcpu->plane && !kvm_arch_is_vcpu_plane_ioctl(ioctl))
+ return -EINVAL;
+
if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
return -EINVAL;
@@ -4958,7 +5060,7 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
case KVM_CAP_PLANES:
if (kvm)
return kvm_arch_nr_vcpu_planes(kvm);
- return KVM_MAX_PLANES;
+ return KVM_MAX_VCPU_PLANES;
case KVM_CAP_PLANES_FPU:
return kvm_arch_planes_share_fpu(kvm);
#endif
@@ -5201,7 +5303,8 @@ static int kvm_vm_ioctl_create_plane(struct kvm *kvm, unsigned id)
struct file *file;
int r, fd;
- if (id >= KVM_MAX_VCPU_PLANES)
+ if (id >= kvm_arch_nr_vcpu_planes(kvm)
+ || WARN_ON_ONCE(id >= KVM_MAX_VCPU_PLANES))
return -EINVAL;
guard(mutex)(&kvm->lock);
@@ -5259,7 +5362,7 @@ static long kvm_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_create_plane(kvm, arg);
break;
case KVM_CREATE_VCPU:
- r = kvm_vm_ioctl_create_vcpu(kvm, arg);
+ r = kvm_vm_ioctl_create_vcpu(kvm->planes[0], NULL, arg);
break;
case KVM_ENABLE_CAP: {
struct kvm_enable_cap cap;
--
2.49.0
Powered by blists - more mailing lists