[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZyLWMGcgj76YizSw@google.com>
Date: Thu, 31 Oct 2024 07:54:45 -0700
From: Sean Christopherson <seanjc@...gle.com>
To: Kai Huang <kai.huang@...el.com>
Cc: "binbin.wu@...ux.intel.com" <binbin.wu@...ux.intel.com>,
"yuan.yao@...ux.intel.com" <yuan.yao@...ux.intel.com>, "kvm@...r.kernel.org" <kvm@...r.kernel.org>,
"pbonzini@...hat.com" <pbonzini@...hat.com>, Xiaoyao Li <xiaoyao.li@...el.com>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>, Rick Edgecombe <rick.p.edgecombe@...el.com>,
Isaku Yamahata <isaku.yamahata@...el.com>
Subject: Re: [PATCH v3 1/2] KVM: x86: Check hypercall's exit to userspace generically
On Thu, Oct 31, 2024, Kai Huang wrote:
> > @@ -10111,12 +10111,21 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
> > cpl = kvm_x86_call(get_cpl)(vcpu);
> >
> > ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl);
> > - if (nr == KVM_HC_MAP_GPA_RANGE && !ret)
> > - /* MAP_GPA tosses the request to the user space. */
> > + if (!ret)
> > return 0;
> >
> > - if (!op_64_bit)
> > + /*
> > + * KVM's ABI with the guest is that '0' is success, and any other value
> > + * is an error code. Internally, '0' == exit to userspace (see above)
> > + * and '1' == success, as KVM's de facto standard return codes are that
> > + * plus -errno == failure. Explicitly check for '1' as some PV error
> > + * codes are positive values.
> > + */
> > + if (ret == 1)
> > + ret = 0;
> > + else if (!op_64_bit)
> > ret = (u32)ret;
> > +
> > kvm_rax_write(vcpu, ret);
> >
> > return kvm_skip_emulated_instruction(vcpu);
> >
>
> It appears below two cases are not covered correctly?
>
> #ifdef CONFIG_X86_64
> case KVM_HC_CLOCK_PAIRING:
> ret = kvm_pv_clock_pairing(vcpu, a0, a1);
> break;
> #endif
> case KVM_HC_SEND_IPI:
> if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SEND_IPI))
> break;
>
> ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
> break;
>
> Looking at the code, seems at least for KVM_HC_CLOCK_PAIRING,
> kvm_pv_clock_pairing() returns 0 on success, and the upstream behaviour is not
> routing to userspace but writing 0 to vcpu's RAX and returning to guest. With
> the change above, it immediately returns to userspace w/o writing 0 to RAX.
>
> For KVM_HC_SEND_IPI, seems the upstream behaviour is the return value of
> kvm_pv_send_ipi() is always written to vcpu's RAX reg, and we always just go
> back to guest. With your change, the behaviour will be changed to:
>
> 1) when ret == 0, exit to userspace w/o writing 0 to vcpu's RAX,
> 2) when ret == 1, it is converted to 0 and then written to RAX.
>
> This doesn't look like safe.
Drat, I managed to space on the cases that didn't explicit set '0'. Hrm.
My other idea was have an out-param to separate the return code intended for KVM
from the return code intended for the guest. I generally dislike out-params, but
trying to juggle a return value that multiplexes guest and host values seems like
an even worse idea.
Also completely untested...
---
arch/x86/include/asm/kvm_host.h | 8 +++----
arch/x86/kvm/x86.c | 41 +++++++++++++++------------------
2 files changed, 23 insertions(+), 26 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6d9f763a7bb9..226df5c56811 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -2179,10 +2179,10 @@ static inline void kvm_clear_apicv_inhibit(struct kvm *kvm,
kvm_set_or_clear_apicv_inhibit(kvm, reason, false);
}
-unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
- unsigned long a0, unsigned long a1,
- unsigned long a2, unsigned long a3,
- int op_64_bit, int cpl);
+int __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
+ unsigned long a0, unsigned long a1,
+ unsigned long a2, unsigned long a3,
+ int op_64_bit, int cpl, unsigned long *ret);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e09daa3b157c..e9ae09f1b45b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9998,13 +9998,11 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
-unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
- unsigned long a0, unsigned long a1,
- unsigned long a2, unsigned long a3,
- int op_64_bit, int cpl)
+int __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
+ unsigned long a0, unsigned long a1,
+ unsigned long a2, unsigned long a3,
+ int op_64_bit, int cpl, unsigned long *ret)
{
- unsigned long ret;
-
trace_kvm_hypercall(nr, a0, a1, a2, a3);
if (!op_64_bit) {
@@ -10016,15 +10014,15 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
}
if (cpl) {
- ret = -KVM_EPERM;
+ *ret = -KVM_EPERM;
goto out;
}
- ret = -KVM_ENOSYS;
+ *ret = -KVM_ENOSYS;
switch (nr) {
case KVM_HC_VAPIC_POLL_IRQ:
- ret = 0;
+ *ret = 0;
break;
case KVM_HC_KICK_CPU:
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_UNHALT))
@@ -10032,36 +10030,36 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
kvm_pv_kick_cpu_op(vcpu->kvm, a1);
kvm_sched_yield(vcpu, a1);
- ret = 0;
+ *ret = 0;
break;
#ifdef CONFIG_X86_64
case KVM_HC_CLOCK_PAIRING:
- ret = kvm_pv_clock_pairing(vcpu, a0, a1);
+ *ret = kvm_pv_clock_pairing(vcpu, a0, a1);
break;
#endif
case KVM_HC_SEND_IPI:
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SEND_IPI))
break;
- ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
+ *ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
break;
case KVM_HC_SCHED_YIELD:
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
break;
kvm_sched_yield(vcpu, a0);
- ret = 0;
+ *ret = 0;
break;
case KVM_HC_MAP_GPA_RANGE: {
u64 gpa = a0, npages = a1, attrs = a2;
- ret = -KVM_ENOSYS;
+ *ret = -KVM_ENOSYS;
if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE))
break;
if (!PAGE_ALIGNED(gpa) || !npages ||
gpa_to_gfn(gpa) + npages <= gpa_to_gfn(gpa)) {
- ret = -KVM_EINVAL;
+ *ret = -KVM_EINVAL;
break;
}
@@ -10080,13 +10078,13 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
return 0;
}
default:
- ret = -KVM_ENOSYS;
+ *ret = -KVM_ENOSYS;
break;
}
out:
++vcpu->stat.hypercalls;
- return ret;
+ return 1;
}
EXPORT_SYMBOL_GPL(__kvm_emulate_hypercall);
@@ -10094,7 +10092,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
int op_64_bit;
- int cpl;
+ int cpl, r;
if (kvm_xen_hypercall_enabled(vcpu->kvm))
return kvm_xen_hypercall(vcpu);
@@ -10110,10 +10108,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
op_64_bit = is_64_bit_hypercall(vcpu);
cpl = kvm_x86_call(get_cpl)(vcpu);
- ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl);
- if (nr == KVM_HC_MAP_GPA_RANGE && !ret)
- /* MAP_GPA tosses the request to the user space. */
- return 0;
+ r = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl, &ret);
+ if (r <= r)
+ return r;
if (!op_64_bit)
ret = (u32)ret;
base-commit: 675248928970d33f7fc8ca9851a170c98f4f1c4f
--
Powered by blists - more mailing lists