linux-kernel - Re: [PATCH 3/3] kvm: mmu: Separate pte generation from set

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <87pnetkuov.fsf@vitty.brq.redhat.com>
Date:   Wed, 05 Feb 2020 14:52:32 +0100
From:   Vitaly Kuznetsov <vkuznets@...hat.com>
To:     Ben Gardon <bgardon@...gle.com>, linux-kernel@...r.kernel.org,
        kvm@...r.kernel.org, linux-kselftest@...r.kernel.org
Cc:     Paolo Bonzini <pbonzini@...hat.com>, Peter Xu <peterx@...hat.com>,
        Sean Christopherson <sean.j.christopherson@...el.com>,
        Peter Shier <pshier@...gle.com>,
        Oliver Upton <oupton@...gle.com>,
        Ben Gardon <bgardon@...gle.com>
Subject: Re: [PATCH 3/3] kvm: mmu: Separate pte generation from set_spte

Ben Gardon <bgardon@...gle.com> writes:

> Separate the functions for generating leaf page table entries from the
> function that inserts them into the paging structure. This refactoring
> will facilitate changes to the MMU sychronization model to use atomic
> compare / exchanges (which are not guaranteed to succeed) instead of a
> monolithic MMU lock.
>
> No functional change expected.
>
> Tested by running kvm-unit-tests on an Intel Haswell machine. This
> commit introduced no new failures.
>
> This commit can be viewed in Gerrit at:
> 	https://linux-review.googlesource.com/c/virt/kvm/kvm/+/2360
>
> Signed-off-by: Ben Gardon <bgardon@...gle.com>
> Reviewed-by: Peter Shier <pshier@...gle.com>
> ---
>  arch/x86/kvm/mmu/mmu.c | 52 +++++++++++++++++++++++++++---------------
>  1 file changed, 34 insertions(+), 18 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index b81010d0edae1..9239ad5265dc6 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3000,20 +3000,14 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
>  #define SET_SPTE_WRITE_PROTECTED_PT	BIT(0)
>  #define SET_SPTE_NEED_REMOTE_TLB_FLUSH	BIT(1)
>  
> -static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
> -		    unsigned int pte_access, int level,
> -		    gfn_t gfn, kvm_pfn_t pfn, bool speculative,
> -		    bool can_unsync, bool host_writable)
> +static u64 make_spte(struct kvm_vcpu *vcpu, unsigned int pte_access, int level,
> +		     gfn_t gfn, kvm_pfn_t pfn, u64 old_spte, bool speculative,
> +		     bool can_unsync, bool host_writable, bool ad_disabled,
> +		     int *ret)

With such a long parameter list we may think about passing a pointer to
a structure instead (common for make_spte()/set_spte())

>  {
>  	u64 spte = 0;
> -	int ret = 0;
> -	struct kvm_mmu_page *sp;
> -
> -	if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
> -		return 0;
>  
> -	sp = page_header(__pa(sptep));
> -	if (sp_ad_disabled(sp))
> +	if (ad_disabled)
>  		spte |= SPTE_AD_DISABLED_MASK;
>  	else if (kvm_vcpu_ad_need_write_protect(vcpu))
>  		spte |= SPTE_AD_WRPROT_ONLY_MASK;
> @@ -3066,27 +3060,49 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
>  		 * is responsibility of mmu_get_page / kvm_sync_page.
>  		 * Same reasoning can be applied to dirty page accounting.
>  		 */
> -		if (!can_unsync && is_writable_pte(*sptep))
> -			goto set_pte;
> +		if (!can_unsync && is_writable_pte(old_spte))
> +			return spte;
>  
>  		if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
>  			pgprintk("%s: found shadow page for %llx, marking ro\n",
>  				 __func__, gfn);
> -			ret |= SET_SPTE_WRITE_PROTECTED_PT;
> +			*ret |= SET_SPTE_WRITE_PROTECTED_PT;
>  			pte_access &= ~ACC_WRITE_MASK;
>  			spte &= ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE);
>  		}
>  	}
>  
> -	if (pte_access & ACC_WRITE_MASK) {
> -		kvm_vcpu_mark_page_dirty(vcpu, gfn);
> +	if (pte_access & ACC_WRITE_MASK)
>  		spte |= spte_shadow_dirty_mask(spte);
> -	}
>  
>  	if (speculative)
>  		spte = mark_spte_for_access_track(spte);
>  
> -set_pte:
> +	return spte;
> +}
> +
> +static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
> +		    unsigned int pte_access, int level,
> +		    gfn_t gfn, kvm_pfn_t pfn, bool speculative,
> +		    bool can_unsync, bool host_writable)
> +{
> +	u64 spte = 0;
> +	struct kvm_mmu_page *sp;
> +	int ret = 0;
> +
> +	if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
> +		return 0;
> +
> +	sp = page_header(__pa(sptep));
> +
> +	spte = make_spte(vcpu, pte_access, level, gfn, pfn, *sptep, speculative,
> +			 can_unsync, host_writable, sp_ad_disabled(sp), &ret);

I'm probably missing something, but in make_spte() I see just one place
which writes to '*ret' so at the end, this is either
SET_SPTE_WRITE_PROTECTED_PT or 0 (which we got only because we
initialize it to 0 in set_spte()). Unless this is preparation to some
other change, I don't see much value in the complication.

Can we actually reverse the logic, pass 'spte' by reference and return
'ret'?

> +	if (!spte)
> +		return 0;
> +
> +	if (spte & PT_WRITABLE_MASK)
> +		kvm_vcpu_mark_page_dirty(vcpu, gfn);
> +
>  	if (mmu_spte_update(sptep, spte))
>  		ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH;
>  	return ret;

-- 
Vitaly