[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9c55087b-e529-46cd-8678-51975a9acc71@linux.intel.com>
Date: Fri, 1 Nov 2024 10:25:43 +0800
From: Binbin Wu <binbin.wu@...ux.intel.com>
To: Sean Christopherson <seanjc@...gle.com>, Kai Huang <kai.huang@...el.com>
Cc: "yuan.yao@...ux.intel.com" <yuan.yao@...ux.intel.com>,
"kvm@...r.kernel.org" <kvm@...r.kernel.org>,
"pbonzini@...hat.com" <pbonzini@...hat.com>,
Xiaoyao Li <xiaoyao.li@...el.com>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
Rick Edgecombe <rick.p.edgecombe@...el.com>,
Isaku Yamahata <isaku.yamahata@...el.com>
Subject: Re: [PATCH v3 1/2] KVM: x86: Check hypercall's exit to userspace
generically
On 10/31/2024 10:54 PM, Sean Christopherson wrote:
> My other idea was have an out-param to separate the return code intended for KVM
> from the return code intended for the guest. I generally dislike out-params, but
> trying to juggle a return value that multiplexes guest and host values seems like
> an even worse idea.
>
> Also completely untested...
>
> ---
> arch/x86/include/asm/kvm_host.h | 8 +++----
> arch/x86/kvm/x86.c | 41 +++++++++++++++------------------
> 2 files changed, 23 insertions(+), 26 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 6d9f763a7bb9..226df5c56811 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -2179,10 +2179,10 @@ static inline void kvm_clear_apicv_inhibit(struct kvm *kvm,
> kvm_set_or_clear_apicv_inhibit(kvm, reason, false);
> }
>
> -unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
> - unsigned long a0, unsigned long a1,
> - unsigned long a2, unsigned long a3,
> - int op_64_bit, int cpl);
> +int __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
> + unsigned long a0, unsigned long a1,
> + unsigned long a2, unsigned long a3,
> + int op_64_bit, int cpl, unsigned long *ret);
> int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
>
> int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index e09daa3b157c..e9ae09f1b45b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -9998,13 +9998,11 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
> return kvm_skip_emulated_instruction(vcpu);
> }
>
> -unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
> - unsigned long a0, unsigned long a1,
> - unsigned long a2, unsigned long a3,
> - int op_64_bit, int cpl)
> +int __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
> + unsigned long a0, unsigned long a1,
> + unsigned long a2, unsigned long a3,
> + int op_64_bit, int cpl, unsigned long *ret)
> {
> - unsigned long ret;
> -
> trace_kvm_hypercall(nr, a0, a1, a2, a3);
>
> if (!op_64_bit) {
> @@ -10016,15 +10014,15 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
> }
>
> if (cpl) {
> - ret = -KVM_EPERM;
> + *ret = -KVM_EPERM;
> goto out;
> }
>
> - ret = -KVM_ENOSYS;
> + *ret = -KVM_ENOSYS;
>
> switch (nr) {
> case KVM_HC_VAPIC_POLL_IRQ:
> - ret = 0;
> + *ret = 0;
> break;
> case KVM_HC_KICK_CPU:
> if (!guest_pv_has(vcpu, KVM_FEATURE_PV_UNHALT))
> @@ -10032,36 +10030,36 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
>
> kvm_pv_kick_cpu_op(vcpu->kvm, a1);
> kvm_sched_yield(vcpu, a1);
> - ret = 0;
> + *ret = 0;
> break;
> #ifdef CONFIG_X86_64
> case KVM_HC_CLOCK_PAIRING:
> - ret = kvm_pv_clock_pairing(vcpu, a0, a1);
> + *ret = kvm_pv_clock_pairing(vcpu, a0, a1);
> break;
> #endif
> case KVM_HC_SEND_IPI:
> if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SEND_IPI))
> break;
>
> - ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
> + *ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
> break;
> case KVM_HC_SCHED_YIELD:
> if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
> break;
>
> kvm_sched_yield(vcpu, a0);
> - ret = 0;
> + *ret = 0;
> break;
> case KVM_HC_MAP_GPA_RANGE: {
> u64 gpa = a0, npages = a1, attrs = a2;
>
> - ret = -KVM_ENOSYS;
> + *ret = -KVM_ENOSYS;
> if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE))
> break;
>
> if (!PAGE_ALIGNED(gpa) || !npages ||
> gpa_to_gfn(gpa) + npages <= gpa_to_gfn(gpa)) {
> - ret = -KVM_EINVAL;
> + *ret = -KVM_EINVAL;
> break;
> }
*ret needs to be set to 0 for this case before returning 0 to caller?
>
> @@ -10080,13 +10078,13 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
> return 0;
> }
> default:
> - ret = -KVM_ENOSYS;
> + *ret = -KVM_ENOSYS;
> break;
> }
>
> out:
> ++vcpu->stat.hypercalls;
> - return ret;
> + return 1;
> }
> EXPORT_SYMBOL_GPL(__kvm_emulate_hypercall);
>
> @@ -10094,7 +10092,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
> {
> unsigned long nr, a0, a1, a2, a3, ret;
> int op_64_bit;
> - int cpl;
> + int cpl, r;
>
> if (kvm_xen_hypercall_enabled(vcpu->kvm))
> return kvm_xen_hypercall(vcpu);
> @@ -10110,10 +10108,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
> op_64_bit = is_64_bit_hypercall(vcpu);
> cpl = kvm_x86_call(get_cpl)(vcpu);
>
> - ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl);
> - if (nr == KVM_HC_MAP_GPA_RANGE && !ret)
> - /* MAP_GPA tosses the request to the user space. */
> - return 0;
> + r = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl, &ret);
> + if (r <= r)
A typo here.
I guess it meant to be "if (r <= ret)" ?
So the combinations will be
----------------------------------------------------------------------------
| r | ret | r <= ret |
---|-----|-----------|----------|-------------------------------------------
1 | 0 | 0 | true | return r, which is 0, exit to userspace
---|-----|-----------|----------|-------------------------------------------
2 | 1 | 0 | false | set vcpu's RAX and return back to guest
---|-----|-----------|----------|-------------------------------------------
3 | 1 | -KVM_Exxx | false | set vcpu's RAX and return back to guest
---|-----|-----------|----------|-------------------------------------------
4 | 1 | Positive | true | return r, which is 1,
| | N | | back to guest without setting vcpu's RAX
----------------------------------------------------------------------------
KVM_HC_SEND_IPI, which calls kvm_pv_send_ipi() can hit case 4, which will
return back to guest without setting RAX. It is different from the current behavior.
r can be 0 only if there is no other error detected during pre-checks.
I think it can just check whether r is 0 or not.
I.e.,
@@ -10094,7 +10092,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
int op_64_bit;
- int cpl;
+ int cpl, r;
if (kvm_xen_hypercall_enabled(vcpu->kvm))
return kvm_xen_hypercall(vcpu);
@@ -10110,10 +10108,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
op_64_bit = is_64_bit_hypercall(vcpu);
cpl = kvm_x86_call(get_cpl)(vcpu);
- ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl);
- if (nr == KVM_HC_MAP_GPA_RANGE && !ret)
- /* MAP_GPA tosses the request to the user space. */
- return 0;
+ r = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl, &ret);
+ if (!r)
+ return 0;
if (!op_64_bit)
ret = (u32)ret;
> + return r;
>
> if (!op_64_bit)
> ret = (u32)ret;
>
> base-commit: 675248928970d33f7fc8ca9851a170c98f4f1c4f
Powered by blists - more mailing lists