lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20120414114422.e3fe6e2abbfdcce61e6f69c8@gmail.com>
Date:	Sat, 14 Apr 2012 11:44:22 +0900
From:	Takuya Yoshikawa <takuya.yoshikawa@...il.com>
To:	Xiao Guangrong <xiaoguangrong@...ux.vnet.ibm.com>
Cc:	Avi Kivity <avi@...hat.com>, Marcelo Tosatti <mtosatti@...hat.com>,
	LKML <linux-kernel@...r.kernel.org>, KVM <kvm@...r.kernel.org>
Subject: Re: [PATCH v2 07/16] KVM: MMU: introduce for_each_pte_list_spte

On Fri, 13 Apr 2012 18:12:41 +0800b
Xiao Guangrong <xiaoguangrong@...ux.vnet.ibm.com> wrote:

> It is used to walk all the sptes of the specified pte_list, after
> this, the code of pte_list_walk can be removed
> 
> And it can restart the walking automatically if the spte is zapped

Well, I want to ask two questions:

	- why do you prefer pte_list_* naming to rmap_*?
	  (not a big issue but just curious)
	- Are you sure the whole indirection by this patch will
	  not introduce any regression?
	  (not restricted to get_dirty)

As my previous patch showed, just a slight trivial change can introduce
siginificant performance regression/improvement.

Thanks,
	Takuya

> 
> Signed-off-by: Xiao Guangrong <xiaoguangrong@...ux.vnet.ibm.com>
> ---
>  arch/x86/kvm/mmu.c       |  233 +++++++++++++++++++++++-----------------------
>  arch/x86/kvm/mmu_audit.c |    6 +-
>  2 files changed, 118 insertions(+), 121 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index a1c3628..4e91e94 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -900,26 +900,110 @@ static void pte_list_remove(u64 *spte, unsigned long *pte_list)
>  	}
>  }
> 
> -typedef void (*pte_list_walk_fn) (u64 *spte);
> -static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
> -{
> +/*
> + * Used by the following functions to iterate through the sptes linked by a
> + * pte_list. All fields are private and not assumed to be used outside.
> + */
> +struct spte_iterator {
> +	/* private fields */
> +	unsigned long *pte_list;
> +	/* holds the sptep if not NULL */
>  	struct pte_list_desc *desc;
> -	int i;
> +	/* index of the sptep, -1 means the walking does not start. */
> +	int pos;
> +};
> 
> -	if (!*pte_list)
> -		return;
> +static void pte_list_walk_init(struct spte_iterator *iter,
> +			       unsigned long *pte_list)
> +{
> +	iter->pte_list = pte_list;
> +	iter->pos = -1;
> +}
> +
> +static void pte_list_walk_check_restart(struct spte_iterator *iter, u64 *spte)
> +{
> +	/*
> +	 * If the spte is zapped, we should set 'iter->pos = -1' to restart
> +	 * the walk.
> +	 */
> +	if (!is_shadow_present_pte(*spte))
> +		iter->pos = -1;
> +}
> +
> +static u64 *pte_list_first(struct spte_iterator *iter)
> +{
> +	unsigned long pte_list = *iter->pte_list;
> +	u64 *sptep;
> +
> +	if (!pte_list)
> +		return NULL;
> +
> +	if (!(pte_list & 1)) {
> +		iter->desc = NULL;
> +		iter->pos = 0;
> +		sptep = (u64 *)pte_list;
> +
> +		goto exit;
> +	}
> +
> +	iter->desc = (struct pte_list_desc *)(pte_list & ~1ul);
> +	iter->pos = 0;
> +	sptep = iter->desc->sptes[iter->pos];
> +
> +exit:
> +	WARN_ON(sptep && !is_shadow_present_pte(*sptep));
> +	return sptep;
> +}
> 
> -	if (!(*pte_list & 1))
> -		return fn((u64 *)*pte_list);
> +static u64 *pte_list_next(struct spte_iterator *iter)
> +{
> +	u64 *sptep = NULL;
> 
> -	desc = (struct pte_list_desc *)(*pte_list & ~1ul);
> -	while (desc) {
> -		for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i)
> -			fn(desc->sptes[i]);
> -		desc = desc->more;
> +	if (iter->pos < 0)
> +		return pte_list_first(iter);
> +
> +	if (iter->desc) {
> +		if (iter->pos < PTE_LIST_EXT - 1) {
> +			++iter->pos;
> +			sptep = iter->desc->sptes[iter->pos];
> +			if (sptep)
> +				goto exit;
> +		}
> +
> +		iter->desc = iter->desc->more;
> +
> +		if (iter->desc) {
> +			iter->pos = 0;
> +			/* desc->sptes[0] cannot be NULL */
> +			sptep = iter->desc->sptes[iter->pos];
> +			goto exit;
> +		}
>  	}
> +
> +exit:
> +	WARN_ON(sptep && !is_shadow_present_pte(*sptep));
> +	return sptep;
>  }
> 
> +
> +#define for_each_pte_list_spte(pte_list, iter, spte)	\
> +	for (pte_list_walk_init(iter, pte_list);	\
> +	      (spte = pte_list_next(iter)) != NULL;	\
> +		pte_list_walk_check_restart(iter, spte))
> +
> +typedef void (*pte_list_walk_fn) (u64 *spte);
> +static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
> +{
> +	struct spte_iterator iter;
> +	u64 *spte;
> +
> +	for_each_pte_list_spte(pte_list, &iter, spte)
> +		fn(spte);
> +}
> +
> +#define for_each_rmap_spte(rmap, iter, spte)	\
> +		for_each_pte_list_spte(rmap, iter, spte)
> +
>  static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
>  				    struct kvm_memory_slot *slot)
>  {
> @@ -974,92 +1058,19 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
>  	pte_list_remove(spte, rmapp);
>  }
> 
> -/*
> - * Used by the following functions to iterate through the sptes linked by a
> - * rmap.  All fields are private and not assumed to be used outside.
> - */
> -struct rmap_iterator {
> -	/* private fields */
> -	struct pte_list_desc *desc;	/* holds the sptep if not NULL */
> -	int pos;			/* index of the sptep */
> -};
> -
> -/*
> - * Iteration must be started by this function.  This should also be used after
> - * removing/dropping sptes from the rmap link because in such cases the
> - * information in the itererator may not be valid.
> - *
> - * Returns sptep if found, NULL otherwise.
> - */
> -static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter)
> -{
> -	u64 *sptep;
> -
> -	if (!rmap)
> -		return NULL;
> -
> -	if (!(rmap & 1)) {
> -		iter->desc = NULL;
> -		sptep = (u64 *)rmap;
> -
> -		goto exit;
> -	}
> -
> -	iter->desc = (struct pte_list_desc *)(rmap & ~1ul);
> -	iter->pos = 0;
> -	sptep = iter->desc->sptes[iter->pos];
> -
> -exit:
> -	WARN_ON(sptep && !is_shadow_present_pte(*sptep));
> -	return sptep;
> -}
> -
> -/*
> - * Must be used with a valid iterator: e.g. after rmap_get_first().
> - *
> - * Returns sptep if found, NULL otherwise.
> - */
> -static u64 *rmap_get_next(struct rmap_iterator *iter)
> -{
> -	u64 *sptep = NULL;
> -
> -	if (iter->desc) {
> -		if (iter->pos < PTE_LIST_EXT - 1) {
> -			++iter->pos;
> -			sptep = iter->desc->sptes[iter->pos];
> -			if (sptep)
> -				goto exit;
> -		}
> -
> -		iter->desc = iter->desc->more;
> -
> -		if (iter->desc) {
> -			iter->pos = 0;
> -			/* desc->sptes[0] cannot be NULL */
> -			sptep = iter->desc->sptes[iter->pos];
> -			goto exit;
> -		}
> -	}
> -
> -exit:
> -	WARN_ON(sptep && !is_shadow_present_pte(*sptep));
> -	return sptep;
> -}
> -
>  static void drop_spte(struct kvm *kvm, u64 *sptep)
>  {
>  	if (mmu_spte_clear_track_bits(sptep))
>  		rmap_remove(kvm, sptep);
>  }
> 
> -/* Return true if the spte is dropped. */
> -static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool large,
> +static void spte_write_protect(struct kvm *kvm, u64 *sptep, bool large,
>  			       bool *flush)
>  {
>  	u64 spte = *sptep;
> 
>  	if (!is_writable_pte(spte))
> -		return false;
> +		return;
> 
>  	*flush |= true;
> 
> @@ -1070,32 +1081,24 @@ static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool large,
> 
>  		drop_spte(kvm, sptep);
>  		--kvm->stat.lpages;
> -		return true;
> +		return;
>  	}
> 
>  	rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
>  	spte = spte & ~PT_WRITABLE_MASK;
>  	mmu_spte_update(sptep, spte);
> -
> -	return false;
>  }
> 
>  static bool
>  __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, int level)
>  {
>  	u64 *sptep;
> -	struct rmap_iterator iter;
> +	struct spte_iterator iter;
>  	bool write_protected = false;
> 
> -	for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
> -		if (spte_write_protect(kvm, sptep, level > PT_PAGE_TABLE_LEVEL,
> -			  &write_protected)) {
> -			sptep = rmap_get_first(*rmapp, &iter);
> -			continue;
> -		}
> -
> -		sptep = rmap_get_next(&iter);
> -	}
> +	for_each_rmap_spte(rmapp, &iter, sptep)
> +		spte_write_protect(kvm, sptep, level > PT_PAGE_TABLE_LEVEL,
> +			  &write_protected);
> 
>  	return write_protected;
>  }
> @@ -1147,10 +1150,10 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
>  			   unsigned long data)
>  {
>  	u64 *sptep;
> -	struct rmap_iterator iter;
> +	struct spte_iterator iter;
>  	int need_tlb_flush = 0;
> 
> -	while ((sptep = rmap_get_first(*rmapp, &iter))) {
> +	for_each_rmap_spte(rmapp, &iter, sptep) {
>  		rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep);
> 
>  		drop_spte(kvm, sptep);
> @@ -1164,7 +1167,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
>  			     unsigned long data)
>  {
>  	u64 *sptep;
> -	struct rmap_iterator iter;
> +	struct spte_iterator iter;
>  	int need_flush = 0;
>  	u64 new_spte;
>  	pte_t *ptep = (pte_t *)data;
> @@ -1173,15 +1176,14 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
>  	WARN_ON(pte_huge(*ptep));
>  	new_pfn = pte_pfn(*ptep);
> 
> -	for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
> +	for_each_rmap_spte(rmapp, &iter, sptep) {
>  		rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", sptep, *sptep);
> 
>  		need_flush = 1;
> 
> -		if (pte_write(*ptep)) {
> +		if (pte_write(*ptep))
>  			drop_spte(kvm, sptep);
> -			sptep = rmap_get_first(*rmapp, &iter);
> -		} else {
> +		else {
>  			new_spte = *sptep & ~PT64_BASE_ADDR_MASK;
>  			new_spte |= (u64)new_pfn << PAGE_SHIFT;
> 
> @@ -1191,7 +1193,6 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
> 
>  			mmu_spte_clear_track_bits(sptep);
>  			mmu_spte_set(sptep, new_spte);
> -			sptep = rmap_get_next(&iter);
>  		}
>  	}
> 
> @@ -1254,7 +1255,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
>  			 unsigned long data)
>  {
>  	u64 *sptep;
> -	struct rmap_iterator iter;
> +	struct spte_iterator iter;
>  	int young = 0;
> 
>  	/*
> @@ -1267,8 +1268,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
>  	if (!shadow_accessed_mask)
>  		return kvm_unmap_rmapp(kvm, rmapp, data);
> 
> -	for (sptep = rmap_get_first(*rmapp, &iter); sptep;
> -	     sptep = rmap_get_next(&iter))
> +	for_each_rmap_spte(rmapp, &iter, sptep)
>  		if (*sptep & PT_ACCESSED_MASK) {
>  			young = 1;
>  			clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)sptep);
> @@ -1281,7 +1281,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
>  			      unsigned long data)
>  {
>  	u64 *sptep;
> -	struct rmap_iterator iter;
> +	struct spte_iterator iter;
>  	int young = 0;
> 
>  	/*
> @@ -1292,8 +1292,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
>  	if (!shadow_accessed_mask)
>  		goto out;
> 
> -	for (sptep = rmap_get_first(*rmapp, &iter); sptep;
> -	     sptep = rmap_get_next(&iter))
> +	for_each_rmap_spte(rmapp, &iter, sptep)
>  		if (*sptep & PT_ACCESSED_MASK) {
>  			young = 1;
>  			break;
> @@ -1955,9 +1954,9 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
>  static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
>  {
>  	u64 *sptep;
> -	struct rmap_iterator iter;
> +	struct spte_iterator iter;
> 
> -	while ((sptep = rmap_get_first(sp->parent_ptes, &iter)))
> +	for_each_pte_list_spte(&sp->parent_ptes, &iter, sptep)
>  		drop_parent_pte(sp, sptep);
>  }
> 
> diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
> index 7d7d0b9..a42bd85 100644
> --- a/arch/x86/kvm/mmu_audit.c
> +++ b/arch/x86/kvm/mmu_audit.c
> @@ -193,7 +193,7 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
>  	struct kvm_memory_slot *slot;
>  	unsigned long *rmapp;
>  	u64 *sptep;
> -	struct rmap_iterator iter;
> +	struct spte_iterator iter;
> 
>  	if (sp->role.direct || sp->unsync || sp->role.invalid)
>  		return;
> @@ -201,13 +201,11 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
>  	slot = gfn_to_memslot(kvm, sp->gfn);
>  	rmapp = &slot->rmap[sp->gfn - slot->base_gfn];
> 
> -	for (sptep = rmap_get_first(*rmapp, &iter); sptep;
> -	     sptep = rmap_get_next(&iter)) {
> +	for_each_rmap_spte(rmapp, &iter, sptep)
>  		if (is_writable_pte(*sptep))
>  			audit_printk(kvm, "shadow page has writable "
>  				     "mappings: gfn %llx role %x\n",
>  				     sp->gfn, sp->role.word);
> -	}
>  }
> 
>  static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
> -- 
> 1.7.7.6
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


-- 
Takuya Yoshikawa <takuya.yoshikawa@...il.com>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ