lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20130828092001.GQ22899@redhat.com>
Date:	Wed, 28 Aug 2013 12:20:01 +0300
From:	Gleb Natapov <gleb@...hat.com>
To:	Xiao Guangrong <xiaoguangrong@...ux.vnet.ibm.com>
Cc:	avi.kivity@...il.com, mtosatti@...hat.com, pbonzini@...hat.com,
	linux-kernel@...r.kernel.org, kvm@...r.kernel.org
Subject: Re: [PATCH 09/12] KVM: MMU: introduce pte-list lockless walker

On Tue, Jul 30, 2013 at 09:02:07PM +0800, Xiao Guangrong wrote:
> The basic idea is from nulls list which uses a nulls to indicate
> whether the desc is moved to different pte-list
> 
> Thanks to SLAB_DESTROY_BY_RCU, the desc can be quickly reused
> 
> Signed-off-by: Xiao Guangrong <xiaoguangrong@...ux.vnet.ibm.com>
> ---
>  arch/x86/kvm/mmu.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 50 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 36caf6a..f8fc0cc 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -1010,6 +1010,14 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
>  		desc->sptes[0] = (u64 *)*pte_list;
>  		desc->sptes[1] = spte;
>  		desc_mark_nulls(pte_list, desc);
> +
> +		/*
> +		 * Esure the old spte has been updated into desc, so
> +		 * that the another side can not get the desc from pte_list
> +		 * but miss the old spte.
> +		 */
> +		smp_wmb();
> +
>  		*pte_list = (unsigned long)desc | 1;
>  		return 1;
>  	}
> @@ -1131,6 +1139,47 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
>  	WARN_ON(desc_get_nulls_value(desc) != pte_list);
>  }
>  
> +/* The caller should hold rcu lock. */
> +typedef void (*pte_list_walk_lockless_fn) (u64 *spte, int level);
> +static void pte_list_walk_lockless(unsigned long *pte_list,
> +				   pte_list_walk_lockless_fn fn, int level)
> +{
> +	struct pte_list_desc *desc;
> +	unsigned long pte_list_value;
> +	int i;
> +
> +restart:
> +	pte_list_value = ACCESS_ONCE(*pte_list);
> +	if (!pte_list_value)
> +		return;
> +
> +	if (!(pte_list_value & 1))
> +		return fn((u64 *)pte_list_value, level);
> +
> +	/*
> +	 * fetch pte_list before read sptes in the desc, see the comments
> +	 * in pte_list_add().
> +	 *
> +	 * There is the data dependence since the desc is got from pte_list.
> +	 */
> +	smp_read_barrier_depends();
> +
> +	desc = (struct pte_list_desc *)(pte_list_value & ~1ul);
> +	while (!desc_is_a_nulls(desc)) {
> +		for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i)
> +			fn(desc->sptes[i], level);
> +
> +		desc = ACCESS_ONCE(desc->more);
> +
> +		/* It is being initialized. */
> +		if (unlikely(!desc))
> +			goto restart;
> +	}
> +
> +	if (unlikely(desc_get_nulls_value(desc) != pte_list))
> +		goto restart;
So is it really enough to guaranty safety and correctness? What if desc
is moved to another rmap while we walking it so fn() is called on
incorrect sptes? Or what if desc is moved to another rmap, but then it
is moved back to initial rmap (but another place in the desc list) so
the check here will not catch that we need to restart walking?


> +}
> +
>  static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
>  				    struct kvm_memory_slot *slot)
>  {
> @@ -4557,7 +4606,7 @@ int kvm_mmu_module_init(void)
>  {
>  	pte_list_desc_cache = kmem_cache_create("pte_list_desc",
>  					    sizeof(struct pte_list_desc),
> -					    0, 0, NULL);
> +					    0, SLAB_DESTROY_BY_RCU, NULL);
>  	if (!pte_list_desc_cache)
>  		goto nomem;
>  
> -- 
> 1.8.1.4

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ