linux-kernel - Re: [PATCH v3 1/2] KVM: x86: Check hypercall's exit to userspace generically

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9c55087b-e529-46cd-8678-51975a9acc71@linux.intel.com>
Date: Fri, 1 Nov 2024 10:25:43 +0800
From: Binbin Wu <binbin.wu@...ux.intel.com>
To: Sean Christopherson <seanjc@...gle.com>, Kai Huang <kai.huang@...el.com>
Cc: "yuan.yao@...ux.intel.com" <yuan.yao@...ux.intel.com>,
 "kvm@...r.kernel.org" <kvm@...r.kernel.org>,
 "pbonzini@...hat.com" <pbonzini@...hat.com>,
 Xiaoyao Li <xiaoyao.li@...el.com>,
 "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
 Rick Edgecombe <rick.p.edgecombe@...el.com>,
 Isaku Yamahata <isaku.yamahata@...el.com>
Subject: Re: [PATCH v3 1/2] KVM: x86: Check hypercall's exit to userspace
 generically




On 10/31/2024 10:54 PM, Sean Christopherson wrote:
> My other idea was have an out-param to separate the return code intended for KVM
> from the return code intended for the guest.  I generally dislike out-params, but
> trying to juggle a return value that multiplexes guest and host values seems like
> an even worse idea.
>
> Also completely untested...
>
> ---
>   arch/x86/include/asm/kvm_host.h |  8 +++----
>   arch/x86/kvm/x86.c              | 41 +++++++++++++++------------------
>   2 files changed, 23 insertions(+), 26 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 6d9f763a7bb9..226df5c56811 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -2179,10 +2179,10 @@ static inline void kvm_clear_apicv_inhibit(struct kvm *kvm,
>   	kvm_set_or_clear_apicv_inhibit(kvm, reason, false);
>   }
>   
> -unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
> -				      unsigned long a0, unsigned long a1,
> -				      unsigned long a2, unsigned long a3,
> -				      int op_64_bit, int cpl);
> +int __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
> +			    unsigned long a0, unsigned long a1,
> +			    unsigned long a2, unsigned long a3,
> +			    int op_64_bit, int cpl, unsigned long *ret);
>   int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
>   
>   int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index e09daa3b157c..e9ae09f1b45b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -9998,13 +9998,11 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
>   	return kvm_skip_emulated_instruction(vcpu);
>   }
>   
> -unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
> -				      unsigned long a0, unsigned long a1,
> -				      unsigned long a2, unsigned long a3,
> -				      int op_64_bit, int cpl)
> +int __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
> +			    unsigned long a0, unsigned long a1,
> +			    unsigned long a2, unsigned long a3,
> +			    int op_64_bit, int cpl, unsigned long *ret)
>   {
> -	unsigned long ret;
> -
>   	trace_kvm_hypercall(nr, a0, a1, a2, a3);
>   
>   	if (!op_64_bit) {
> @@ -10016,15 +10014,15 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
>   	}
>   
>   	if (cpl) {
> -		ret = -KVM_EPERM;
> +		*ret = -KVM_EPERM;
>   		goto out;
>   	}
>   
> -	ret = -KVM_ENOSYS;
> +	*ret = -KVM_ENOSYS;
>   
>   	switch (nr) {
>   	case KVM_HC_VAPIC_POLL_IRQ:
> -		ret = 0;
> +		*ret = 0;
>   		break;
>   	case KVM_HC_KICK_CPU:
>   		if (!guest_pv_has(vcpu, KVM_FEATURE_PV_UNHALT))
> @@ -10032,36 +10030,36 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
>   
>   		kvm_pv_kick_cpu_op(vcpu->kvm, a1);
>   		kvm_sched_yield(vcpu, a1);
> -		ret = 0;
> +		*ret = 0;
>   		break;
>   #ifdef CONFIG_X86_64
>   	case KVM_HC_CLOCK_PAIRING:
> -		ret = kvm_pv_clock_pairing(vcpu, a0, a1);
> +		*ret = kvm_pv_clock_pairing(vcpu, a0, a1);
>   		break;
>   #endif
>   	case KVM_HC_SEND_IPI:
>   		if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SEND_IPI))
>   			break;
>   
> -		ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
> +		*ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
>   		break;
>   	case KVM_HC_SCHED_YIELD:
>   		if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
>   			break;
>   
>   		kvm_sched_yield(vcpu, a0);
> -		ret = 0;
> +		*ret = 0;
>   		break;
>   	case KVM_HC_MAP_GPA_RANGE: {
>   		u64 gpa = a0, npages = a1, attrs = a2;
>   
> -		ret = -KVM_ENOSYS;
> +		*ret = -KVM_ENOSYS;
>   		if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE))
>   			break;
>   
>   		if (!PAGE_ALIGNED(gpa) || !npages ||
>   		    gpa_to_gfn(gpa) + npages <= gpa_to_gfn(gpa)) {
> -			ret = -KVM_EINVAL;
> +			*ret = -KVM_EINVAL;
>   			break;
>   		}

*ret needs to be set to 0 for this case before returning 0 to caller?

>   
> @@ -10080,13 +10078,13 @@ unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr,
>   		return 0;
>   	}
>   	default:
> -		ret = -KVM_ENOSYS;
> +		*ret = -KVM_ENOSYS;
>   		break;
>   	}
>   
>   out:
>   	++vcpu->stat.hypercalls;
> -	return ret;
> +	return 1;
>   }
>   EXPORT_SYMBOL_GPL(__kvm_emulate_hypercall);
>   
> @@ -10094,7 +10092,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
>   {
>   	unsigned long nr, a0, a1, a2, a3, ret;
>   	int op_64_bit;
> -	int cpl;
> +	int cpl, r;
>   
>   	if (kvm_xen_hypercall_enabled(vcpu->kvm))
>   		return kvm_xen_hypercall(vcpu);
> @@ -10110,10 +10108,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
>   	op_64_bit = is_64_bit_hypercall(vcpu);
>   	cpl = kvm_x86_call(get_cpl)(vcpu);
>   
> -	ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl);
> -	if (nr == KVM_HC_MAP_GPA_RANGE && !ret)
> -		/* MAP_GPA tosses the request to the user space. */
> -		return 0;
> +	r = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl, &ret);
> +	if (r <= r)
A typo here.
I guess it meant to be "if (r <= ret)" ?

So the combinations will be
----------------------------------------------------------------------------
    |  r  |    ret    | r <= ret |
---|-----|-----------|----------|-------------------------------------------
  1 |  0  |     0     |   true   |  return r, which is 0, exit to userspace
---|-----|-----------|----------|-------------------------------------------
  2 |  1  |     0     |   false  |  set vcpu's RAX and return back to guest
---|-----|-----------|----------|-------------------------------------------
  3 |  1  | -KVM_Exxx |   false  |  set vcpu's RAX and return back to guest
---|-----|-----------|----------|-------------------------------------------
  4 |  1  |  Positive |   true   |  return r, which is 1,
    |     |     N     |          |  back to guest without setting vcpu's RAX
----------------------------------------------------------------------------

KVM_HC_SEND_IPI, which calls kvm_pv_send_ipi() can hit case 4, which will
return back to guest without setting RAX. It is different from the current behavior.

r can be 0 only if there is no other error detected during pre-checks.
I think it can just check whether r is 0 or not.
I.e.,

@@ -10094,7 +10092,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
  {
      unsigned long nr, a0, a1, a2, a3, ret;
      int op_64_bit;
-    int cpl;
+    int cpl, r;

      if (kvm_xen_hypercall_enabled(vcpu->kvm))
          return kvm_xen_hypercall(vcpu);
@@ -10110,10 +10108,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
      op_64_bit = is_64_bit_hypercall(vcpu);
      cpl = kvm_x86_call(get_cpl)(vcpu);

-    ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl);
-    if (nr == KVM_HC_MAP_GPA_RANGE && !ret)
-        /* MAP_GPA tosses the request to the user space. */
-        return 0;
+    r = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl, &ret);
+    if (!r)
+        return 0;

      if (!op_64_bit)
          ret = (u32)ret;


> +		return r;
>   
>   	if (!op_64_bit)
>   		ret = (u32)ret;
>
> base-commit: 675248928970d33f7fc8ca9851a170c98f4f1c4f