[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1486552228-26798-1-git-send-email-pbonzini@redhat.com>
Date: Wed, 8 Feb 2017 12:10:28 +0100
From: Paolo Bonzini <pbonzini@...hat.com>
To: linux-kernel@...r.kernel.org, kvm@...r.kernel.org
Subject: [RFC PATCH] KVM: race-free exit from KVM_RUN without POSIX signals
The purpose of the KVM_SET_SIGNAL_MASK API is to let userspace "kick"
a VCPU out of KVM_RUN through a POSIX signal. A signal is attached
to a dummy signal handler; by blocking the signal outside KVM_RUN and
unblocking it inside, this possible race is closed:
VCPU thread service thread
--------------------------------------------------------------
check flag
set flag
raise signal
(signal handler does nothing)
KVM_RUN
However, one issue with KVM_SET_SIGNAL_MASK is that it has to take
tsk->sighand->siglock on every KVM_RUN. This lock is often on a
remote NUMA node, because it is on the node of a thread's creator.
Taking this lock can be very expensive if there are many userspace
exits (as is the case for SMP Windows VMs without Hyper-V reference
time counter).
As an alternative, we can put the flag directly in kvm_run so that
KVM can see it:
VCPU thread service thread
--------------------------------------------------------------
raise signal
signal handler
set run->immediate_exit
KVM_RUN
check run->immediate_exit
Signed-off-by: Paolo Bonzini <pbonzini@...hat.com>
---
Documentation/virtual/kvm/api.txt | 13 ++++++++++++-
include/uapi/linux/kvm.h | 1 +
virt/kvm/kvm_main.c | 14 +++++++++++---
3 files changed, 24 insertions(+), 4 deletions(-)
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index e4f2cdcf78eb..925b1b6be073 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3389,7 +3389,18 @@ struct kvm_run {
Request that KVM_RUN return when it becomes possible to inject external
interrupts into the guest. Useful in conjunction with KVM_INTERRUPT.
- __u8 padding1[7];
+ __u8 immediate_exit;
+
+This field is polled once when KVM_RUN starts; if non-zero, KVM_RUN
+clears the field and exits immediately, returning -EINTR. In the common
+scenario where a signal is used to "kick" a VCPU out of KVM_RUN, this
+field can be used as an alternative to KVM_SET_SIGNAL_MASK: instead of
+blocking the signal outside KVM_RUN, the signal handler should set
+run->immediate_exit to a nonzero value.
+
+This field is only available if KVM_CAP_IMMEDIATE_EXIT is.
+
+ __u8 padding1[6];
/* out */
__u32 exit_reason;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 7964b970b9ad..0aa43b1e5b1d 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -881,6 +881,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_SPAPR_RESIZE_HPT 133
#define KVM_CAP_PPC_MMU_RADIX 134
#define KVM_CAP_PPC_MMU_HASH_V3 135
+#define KVM_CAP_IMMEDIATE_EXIT 136
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 482612b4e496..5abb8df00db1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2550,7 +2550,8 @@ static long kvm_vcpu_ioctl(struct file *filp,
if (r)
return r;
switch (ioctl) {
- case KVM_RUN:
+ case KVM_RUN: {
+ struct kvm_run *run;
r = -EINVAL;
if (arg)
goto out;
@@ -2564,9 +2565,15 @@ static long kvm_vcpu_ioctl(struct file *filp,
synchronize_rcu();
put_pid(oldpid);
}
- r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
- trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
+ run = vcpu->run;
+ if (run->immediate_exit) {
+ WRITE_ONCE(run->immediate_exit, 0);
+ return -EINTR;
+ }
+ r = kvm_arch_vcpu_ioctl_run(vcpu, run);
+ trace_kvm_userspace_exit(run->exit_reason, r);
break;
+ }
case KVM_GET_REGS: {
struct kvm_regs *kvm_regs;
@@ -2927,6 +2934,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
#endif
case KVM_CAP_IOEVENTFD_ANY_LENGTH:
case KVM_CAP_CHECK_EXTENSION_VM:
+ case KVM_CAP_IMMEDIATE_EXIT:
return 1;
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
case KVM_CAP_IRQ_ROUTING:
--
1.8.3.1
Powered by blists - more mailing lists