linux-kernel - Re: [PATCH 6/9] KVM: x86: Update guest cpu_caps at runtime for dynamic CPUID-based features

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9395d416-cc5c-536d-641e-ffd971b682d1@gmail.com>
Date:   Wed, 15 Nov 2023 09:59:45 +0800
From:   Robert Hoo <robert.hoo.linux@...il.com>
To:     Sean Christopherson <seanjc@...gle.com>
Cc:     Paolo Bonzini <pbonzini@...hat.com>, kvm@...r.kernel.org,
        linux-kernel@...r.kernel.org, Maxim Levitsky <mlevitsk@...hat.com>
Subject: Re: [PATCH 6/9] KVM: x86: Update guest cpu_caps at runtime for
 dynamic CPUID-based features

On 11/14/2023 9:48 PM, Sean Christopherson wrote:
> On Mon, Nov 13, 2023, Robert Hoo wrote:
...
>> u32 *caps  = vcpu->arch.cpu_caps;
>> and update guest_cpu_cap_set(), guest_cpu_cap_clear(),
>> guest_cpu_cap_change() and guest_cpu_cap_restrict() to pass in
>> vcpu->arch.cpu_caps instead of vcpu, since all of them merely refer to vcpu
>> cap, rather than whole vcpu info.
> 
> No, because then every caller would need extra code to pass vcpu->cpu_caps, 

Emm, I don't understand this. I tried to modified and compiled, all need to do 
is simply substitute "vcpu" with "vcpu->arch.cpu_caps" in calling. (at the end 
is my diff based on this patch set)

> and
> passing 'u32 *' provides less type safety than 'struct kvm_vcpu *'.  That tradeoff
> isn't worth making this one path slightly easier to read.

My point is also from vulnerability, long term, since as a principle, we'd 
better pass in param/info to a function of its necessity. e.g. cpuid_entry2_find().
Anyway, this is a less important point, shouldn't distract your focus.

This patch set's whole idea is good, I also felt confusion when initially 
looking into vCPUID code and its complicated dependencies with each other and 
KVM cap (or your word govern)  ( and even Kernel govern and HW cap ?). With this 
guest_cap[], the layered relationship can be much clearer, alone with fast guest 
cap queries.
> 
>> Or, for simple change, here rename variable name "caps" --> "vcpu", to less
>> reading confusion.
> 
> @vcpu is already defined and needs to be used in this function.  See the comment
> below.
> 
> I'm definitely open to a better name, though I would like to keep the name
> relative short so that the line lengths of the callers is reasonable, e.g. would
> prefer not to do vcpu_caps.
> 
>>> +	/*
>>> +	 * Don't update vCPU capabilities if KVM is updating CPUID entries that
>>> +	 * are coming in from userspace!
>>> +	 */
>>> +	if (entries != vcpu->arch.cpuid_entries)
>>> +		caps = NULL;
>>>    	best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
>>> -	if (best) {
>>> -		/* Update OSXSAVE bit */
>>> -		if (boot_cpu_has(X86_FEATURE_XSAVE))
>>> -			cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
>>> +
>>> +	if (boot_cpu_has(X86_FEATURE_XSAVE))
>>> +		kvm_update_feature_runtime(caps, best, X86_FEATURE_OSXSAVE,
>>>    					   kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
>>> -		cpuid_entry_change(best, X86_FEATURE_APIC,
>>> -			   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
>>> +	kvm_update_feature_runtime(caps, best, X86_FEATURE_APIC,
>>> +				   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 6407e5c45f20..3e8976705342 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -262,7 +262,7 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 
*entries, int nent)
         return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
  }

-static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
+static __always_inline void kvm_update_feature_runtime(u32 *guest_caps,
                                                        struct kvm_cpuid_entry2 
*entry,
                                                        unsigned int x86_feature,
                                                        bool has_feature)
@@ -270,15 +270,15 @@ static __always_inline void 
kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
         if (entry)
                 cpuid_entry_change(entry, x86_feature, has_feature);

-       if (vcpu)
-               guest_cpu_cap_change(vcpu, x86_feature, has_feature);
+       if (guest_caps)
+               guest_cpu_cap_change(guest_caps, x86_feature, has_feature);
  }

  static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct 
kvm_cpuid_entry2 *entries,
                                        int nent)
  {
         struct kvm_cpuid_entry2 *best;
-       struct kvm_vcpu *caps = vcpu;
+       u32 *caps = vcpu->arch.cpu_caps;

         /*
          * Don't update vCPU capabilities if KVM is updating CPUID entries that
@@ -397,7 +397,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
          */
         allow_gbpages = tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
                                       guest_cpu_cap_has(vcpu, X86_FEATURE_GBPAGES);
-       guest_cpu_cap_change(vcpu, X86_FEATURE_GBPAGES, allow_gbpages);
+       guest_cpu_cap_change(vcpu->arch.cpu_caps, X86_FEATURE_GBPAGES, 
allow_gbpages);

         best = kvm_find_cpuid_entry(vcpu, 1);
         if (best && apic) {
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 98694dfe062e..a3a0482fc514 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -183,39 +183,39 @@ static __always_inline bool guest_pv_has(struct kvm_vcpu 
*vcpu,
         return vcpu->arch.pv_cpuid.features & (1u << kvm_feature);
  }

-static __always_inline void guest_cpu_cap_set(struct kvm_vcpu *vcpu,
+static __always_inline void guest_cpu_cap_set(u32 *caps,
                                               unsigned int x86_feature)
  {
         unsigned int x86_leaf = __feature_leaf(x86_feature);

         reverse_cpuid_check(x86_leaf);
-       vcpu->arch.cpu_caps[x86_leaf] |= __feature_bit(x86_feature);
+       caps[x86_leaf] |= __feature_bit(x86_feature);
  }

-static __always_inline void guest_cpu_cap_clear(struct kvm_vcpu *vcpu,
+static __always_inline void guest_cpu_cap_clear(u32 *caps,
                                                 unsigned int x86_feature)
  {
         unsigned int x86_leaf = __feature_leaf(x86_feature);

         reverse_cpuid_check(x86_leaf);
-       vcpu->arch.cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature);
+       caps[x86_leaf] &= ~__feature_bit(x86_feature);
  }

-static __always_inline void guest_cpu_cap_change(struct kvm_vcpu *vcpu,
+static __always_inline void guest_cpu_cap_change(u32 *caps,
                                                  unsigned int x86_feature,
                                                  bool guest_has_cap)
  {
         if (guest_has_cap)
-               guest_cpu_cap_set(vcpu, x86_feature);
+               guest_cpu_cap_set(caps, x86_feature);
         else
-               guest_cpu_cap_clear(vcpu, x86_feature);
+               guest_cpu_cap_clear(caps, x86_feature);
  }

-static __always_inline void guest_cpu_cap_restrict(struct kvm_vcpu *vcpu,
+static __always_inline void guest_cpu_cap_restrict(u32 *caps,
                                                    unsigned int x86_feature)
  {
         if (!kvm_cpu_cap_has(x86_feature))
-               guest_cpu_cap_clear(vcpu, x86_feature);
+               guest_cpu_cap_clear(caps, x86_feature);
  }

  static __always_inline bool guest_cpu_cap_has(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 6fe2d7bf4959..dd4ca07c3cd0 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4315,14 +4315,14 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
          * XSS on VM-Enter/VM-Exit.  Failure to do so would effectively give
          * the guest read/write access to the host's XSS.
          */
-       guest_cpu_cap_restrict(vcpu, X86_FEATURE_XSAVE);
-       guest_cpu_cap_change(vcpu, X86_FEATURE_XSAVES,
+       guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_XSAVE);
+       guest_cpu_cap_change(vcpu->arch.cpu_caps, X86_FEATURE_XSAVES,
                              boot_cpu_has(X86_FEATURE_XSAVES) &&
                              guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVE));

-       guest_cpu_cap_restrict(vcpu, X86_FEATURE_NRIPS);
-       guest_cpu_cap_restrict(vcpu, X86_FEATURE_TSCRATEMSR);
-       guest_cpu_cap_restrict(vcpu, X86_FEATURE_LBRV);
+       guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_NRIPS);
+       guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_TSCRATEMSR);
+       guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_LBRV);

         /*
          * Intercept VMLOAD if the vCPU mode is Intel in order to emulate that
@@ -4330,12 +4330,12 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
          * SVM on Intel is bonkers and extremely unlikely to work).
          */
         if (!guest_cpuid_is_intel(vcpu))
-               guest_cpu_cap_restrict(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
+               guest_cpu_cap_restrict(vcpu->arch.cpu_caps, 
X86_FEATURE_V_VMSAVE_VMLOAD);

-       guest_cpu_cap_restrict(vcpu, X86_FEATURE_PAUSEFILTER);
-       guest_cpu_cap_restrict(vcpu, X86_FEATURE_PFTHRESHOLD);
-       guest_cpu_cap_restrict(vcpu, X86_FEATURE_VGIF);
-       guest_cpu_cap_restrict(vcpu, X86_FEATURE_VNMI);
+       guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_PAUSEFILTER);
+       guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_PFTHRESHOLD);
+       guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_VGIF);
+       guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_VNMI);

         svm_recalc_instruction_intercepts(vcpu, svm);

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 7645945af5c5..c23c96dc24cf 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7752,13 +7752,13 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
          * to the guest.  XSAVES depends on CR4.OSXSAVE, and CR4.OSXSAVE can be
          * set if and only if XSAVE is supported.
          */
-       guest_cpu_cap_restrict(vcpu, X86_FEATURE_XSAVE);
+       guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_XSAVE);
         if (guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVE))
-               guest_cpu_cap_restrict(vcpu, X86_FEATURE_XSAVES);
+               guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_XSAVES);
         else
-               guest_cpu_cap_clear(vcpu, X86_FEATURE_XSAVES);
+               guest_cpu_cap_clear(vcpu->arch.cpu_caps, X86_FEATURE_XSAVES);

-       guest_cpu_cap_restrict(vcpu, X86_FEATURE_VMX);
+       guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_VMX);

         vmx_setup_uret_msrs(vmx);