[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <87muadnn1t.fsf@vitty.brq.redhat.com>
Date: Fri, 24 Jan 2020 11:53:02 +0100
From: Vitaly Kuznetsov <vkuznets@...hat.com>
To: Paolo Bonzini <pbonzini@...hat.com>,
Sean Christopherson <sean.j.christopherson@...el.com>,
Jim Mattson <jmattson@...gle.com>
Cc: linmiaohe <linmiaohe@...wei.com>, kvm list <kvm@...r.kernel.org>,
LKML <linux-kernel@...r.kernel.org>,
the arch/x86 maintainers <x86@...nel.org>,
Radim Krčmář <rkrcmar@...hat.com>,
Wanpeng Li <wanpengli@...cent.com>,
Joerg Roedel <joro@...tes.org>,
Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
"H . Peter Anvin" <hpa@...or.com>
Subject: Re: [PATCH] KVM: nVMX: set rflags to specify success in handle_invvpid() default case
Sean Christopherson <sean.j.christopherson@...el.com> writes:
> On Thu, Jan 23, 2020 at 10:22:24AM -0800, Jim Mattson wrote:
>> On Thu, Jan 23, 2020 at 1:54 AM Paolo Bonzini <pbonzini@...hat.com> wrote:
>> >
>> > On 23/01/20 10:45, Vitaly Kuznetsov wrote:
>> > >>> SDM says that "If an
>> > >>> unsupported INVVPID type is specified, the instruction fails." and this
>> > >>> is similar to INVEPT and I decided to check what handle_invept()
>> > >>> does. Well, it does BUG_ON().
>> > >>>
>> > >>> Are we doing the right thing in any of these cases?
>> > >>
>> > >> Yes, both INVEPT and INVVPID catch this earlier.
>> > >>
>> > >> So I'm leaning towards not applying Miaohe's patch.
>> > >
>> > > Well, we may at least want to converge on BUG_ON() for both
>> > > handle_invvpid()/handle_invept(), there's no need for them to differ.
>> >
>> > WARN_ON_ONCE + nested_vmx_failValid would probably be better, if we
>> > really want to change this.
>> >
>> > Paolo
>>
>> In both cases, something is seriously wrong. The only plausible
>> explanations are compiler error or hardware failure. It would be nice
>> to handle *all* such failures with a KVM_INTERNAL_ERROR exit to
>> userspace. (I'm also thinking of situations like getting a VM-exit for
>> INIT.)
>
> Ya. Vitaly and I had a similar discussion[*]. The idea we tossed around
> was to also mark the VM as having encountered a KVM/hardware bug so that
> the VM is effectively dead. That would also allow gracefully handling bugs
> that are detected deep in the stack, i.e. can't simply return 0 to get out
> to userspace.
Yea, I was thinking about introducing a big hammer which would stop the
whole VM as soon as possible to make it easier to debug such
situations. Something like (not really tested):
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cf917139de6b..5476f88c9ada 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8001,6 +8001,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
bool req_immediate_exit = false;
if (kvm_request_pending(vcpu)) {
+ /* INTERROR check should always come first */
+ if (kvm_check_request(KVM_REQ_INTERROR, vcpu)) {
+ if (vcpu->run->exit_reason != KVM_EXIT_INTERNAL_ERROR) {
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_OTHERCPU;
+ }
+ r = 0;
+ goto out;
+ }
if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) {
if (unlikely(!kvm_x86_ops->get_vmcs12_pages(vcpu))) {
r = 0;
@@ -8510,6 +8519,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_sigset_activate(vcpu);
kvm_load_guest_fpu(vcpu);
+ if (unlikely(vcpu->kvm->vm_bugged)) {
+ vcpu->run->exit_reason = KVM_REQ_INTERROR;
+ /* Maybe a suberror for 'attempted to run a vCPU of a bugged VM? */
+ r = 0;
+ goto out;
+ }
+
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
if (kvm_run->immediate_exit) {
r = -EINTR;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 538c25e778c0..d003be5fcf42 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -146,6 +146,7 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_PENDING_TIMER 2
#define KVM_REQ_UNHALT 3
+#define KVM_REQ_INTERROR (4 | KVM_REQUEST_WAIT)
#define KVM_REQUEST_ARCH_BASE 8
#define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
@@ -501,6 +502,9 @@ struct kvm {
struct srcu_struct srcu;
struct srcu_struct irq_srcu;
pid_t userspace_pid;
+
+ /* VM caused internal KVM error */
+ bool vm_bugged;
};
#define kvm_err(fmt, ...) \
@@ -613,6 +617,7 @@ static inline void kvm_irqfd_exit(void)
int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
struct module *module);
void kvm_exit(void);
+void kvm_vm_bug(struct kvm_vcpu *vcpu, u32 error);
void kvm_get_kvm(struct kvm *kvm);
void kvm_put_kvm(struct kvm *kvm);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index f0a16b4adbbd..62505161ae98 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -246,6 +246,8 @@ struct kvm_hyperv_exit {
#define KVM_INTERNAL_ERROR_DELIVERY_EV 3
/* Encounter unexpected vm-exit reason */
#define KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON 4
+/* Some other vCPU caused internal KVM error */
+#define KVM_INTERNAL_ERROR_OTHERCPU 5
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 00268290dcbd..4cc268d57714 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4446,6 +4446,18 @@ void kvm_exit(void)
}
EXPORT_SYMBOL_GPL(kvm_exit);
+void kvm_vm_bug(struct kvm_vcpu *vcpu, u32 error)
+{
+ vcpu->kvm->vm_bugged = true;
+
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = error;
+ /* We can also pass ndata/data ... */
+
+ kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_INTERROR);
+}
+EXPORT_SYMBOL_GPL(kvm_vm_bug);
+
struct kvm_vm_worker_thread_context {
struct kvm *kvm;
struct task_struct *parent;
If you guys like the idea in general I can prepare patches.
--
Vitaly
Powered by blists - more mailing lists