lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1259003425.17871.328.camel@calx>
Date:	Mon, 23 Nov 2009 13:10:25 -0600
From:	Matt Mackall <mpm@...enic.com>
To:	Pekka Enberg <penberg@...helsinki.fi>
Cc:	Peter Zijlstra <peterz@...radead.org>, paulmck@...ux.vnet.ibm.com,
	linux-mm@...ck.org, cl@...ux-foundation.org,
	LKML <linux-kernel@...r.kernel.org>,
	Nick Piggin <npiggin@...e.de>
Subject: Re: lockdep complaints in slab allocator

On Mon, 2009-11-23 at 21:00 +0200, Pekka Enberg wrote:
> Hi Peter,
> 
> On Fri, 2009-11-20 at 16:09 +0100, Peter Zijlstra wrote:
> > > Uh, ok, so apparently I was right after all. There's a comment in 
> > > free_block() above the slab_destroy() call that refers to the comment 
> > > above alloc_slabmgmt() function definition which explains it all.
> > > 
> > > Long story short: ->slab_cachep never points to the same kmalloc cache 
> > > we're allocating or freeing from. Where do we need to put the 
> > > spin_lock_nested() annotation? Would it be enough to just use it in 
> > > cache_free_alien() for alien->lock or do we need it in 
> > > cache_flusharray() as well?
> > 
> > You'd have to somehow push the nested state down from the
> > kmem_cache_free() call in slab_destroy() to all nc->lock sites below.
> 
> That turns out to be _very_ hard. How about something like the following
> untested patch which delays slab_destroy() while we're under nc->lock.
> 
> 			Pekka

This seems like a lot of work to paper over a lockdep false positive in
code that should be firmly in the maintenance end of its lifecycle? I'd
rather the fix or papering over happen in lockdep.

Introducing extra cacheline pressure by passing to_destroy around also
seems like a good way to trickle away SLAB's narrow remaining
performance advantages.

> 
> diff --git a/mm/slab.c b/mm/slab.c
> index 7dfa481..6f522e3 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -316,7 +316,7 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
>  static int drain_freelist(struct kmem_cache *cache,
>  			struct kmem_list3 *l3, int tofree);
>  static void free_block(struct kmem_cache *cachep, void **objpp, int len,
> -			int node);
> +			int node, struct list_head *to_destroy);
>  static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
>  static void cache_reap(struct work_struct *unused);
>  
> @@ -1002,7 +1002,8 @@ static void free_alien_cache(struct array_cache **ac_ptr)
>  }
>  
>  static void __drain_alien_cache(struct kmem_cache *cachep,
> -				struct array_cache *ac, int node)
> +				struct array_cache *ac, int node,
> +				struct list_head *to_destroy)
>  {
>  	struct kmem_list3 *rl3 = cachep->nodelists[node];
>  
> @@ -1016,12 +1017,22 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
>  		if (rl3->shared)
>  			transfer_objects(rl3->shared, ac, ac->limit);
>  
> -		free_block(cachep, ac->entry, ac->avail, node);
> +		free_block(cachep, ac->entry, ac->avail, node, to_destroy);
>  		ac->avail = 0;
>  		spin_unlock(&rl3->list_lock);
>  	}
>  }
>  
> +static void slab_destroy(struct kmem_cache *, struct slab *);
> +
> +static void destroy_slabs(struct kmem_cache *cache, struct list_head *to_destroy)
> +{
> +	struct slab *slab, *tmp;
> +
> +	list_for_each_entry_safe(slab, tmp, to_destroy, list)
> +		slab_destroy(cache, slab);
> +}
> +
>  /*
>   * Called from cache_reap() to regularly drain alien caches round robin.
>   */
> @@ -1033,8 +1044,11 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
>  		struct array_cache *ac = l3->alien[node];
>  
>  		if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
> -			__drain_alien_cache(cachep, ac, node);
> +			LIST_HEAD(to_destroy);
> +
> +			__drain_alien_cache(cachep, ac, node, &to_destroy);
>  			spin_unlock_irq(&ac->lock);
> +			destroy_slabs(cachep, &to_destroy);
>  		}
>  	}
>  }
> @@ -1049,9 +1063,12 @@ static void drain_alien_cache(struct kmem_cache *cachep,
>  	for_each_online_node(i) {
>  		ac = alien[i];
>  		if (ac) {
> +			LIST_HEAD(to_destroy);
> +
>  			spin_lock_irqsave(&ac->lock, flags);
> -			__drain_alien_cache(cachep, ac, i);
> +			__drain_alien_cache(cachep, ac, i, &to_destroy);
>  			spin_unlock_irqrestore(&ac->lock, flags);
> +			destroy_slabs(cachep, &to_destroy);
>  		}
>  	}
>  }
> @@ -1076,17 +1093,20 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
>  	l3 = cachep->nodelists[node];
>  	STATS_INC_NODEFREES(cachep);
>  	if (l3->alien && l3->alien[nodeid]) {
> +		LIST_HEAD(to_destroy);
> +
>  		alien = l3->alien[nodeid];
>  		spin_lock(&alien->lock);
>  		if (unlikely(alien->avail == alien->limit)) {
>  			STATS_INC_ACOVERFLOW(cachep);
> -			__drain_alien_cache(cachep, alien, nodeid);
> +			__drain_alien_cache(cachep, alien, nodeid, &to_destroy);
>  		}
>  		alien->entry[alien->avail++] = objp;
>  		spin_unlock(&alien->lock);
> +		destroy_slabs(cachep, &to_destroy);
>  	} else {
>  		spin_lock(&(cachep->nodelists[nodeid])->list_lock);
> -		free_block(cachep, &objp, 1, nodeid);
> +		free_block(cachep, &objp, 1, nodeid, NULL);
>  		spin_unlock(&(cachep->nodelists[nodeid])->list_lock);
>  	}
>  	return 1;
> @@ -1118,7 +1138,7 @@ static void __cpuinit cpuup_canceled(long cpu)
>  		/* Free limit for this kmem_list3 */
>  		l3->free_limit -= cachep->batchcount;
>  		if (nc)
> -			free_block(cachep, nc->entry, nc->avail, node);
> +			free_block(cachep, nc->entry, nc->avail, node, NULL);
>  
>  		if (!cpus_empty(*mask)) {
>  			spin_unlock_irq(&l3->list_lock);
> @@ -1128,7 +1148,7 @@ static void __cpuinit cpuup_canceled(long cpu)
>  		shared = l3->shared;
>  		if (shared) {
>  			free_block(cachep, shared->entry,
> -				   shared->avail, node);
> +				   shared->avail, node, NULL);
>  			l3->shared = NULL;
>  		}
>  
> @@ -2402,7 +2422,7 @@ static void do_drain(void *arg)
>  	check_irq_off();
>  	ac = cpu_cache_get(cachep);
>  	spin_lock(&cachep->nodelists[node]->list_lock);
> -	free_block(cachep, ac->entry, ac->avail, node);
> +	free_block(cachep, ac->entry, ac->avail, node, NULL);
>  	spin_unlock(&cachep->nodelists[node]->list_lock);
>  	ac->avail = 0;
>  }
> @@ -3410,7 +3430,7 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
>   * Caller needs to acquire correct kmem_list's list_lock
>   */
>  static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
> -		       int node)
> +		       int node, struct list_head *to_destroy)
>  {
>  	int i;
>  	struct kmem_list3 *l3;
> @@ -3439,7 +3459,10 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
>  				 * a different cache, refer to comments before
>  				 * alloc_slabmgmt.
>  				 */
> -				slab_destroy(cachep, slabp);
> +				if (to_destroy)
> +					list_add(&slabp->list, to_destroy);
> +				else
> +					slab_destroy(cachep, slabp);
>  			} else {
>  				list_add(&slabp->list, &l3->slabs_free);
>  			}
> @@ -3479,7 +3502,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
>  		}
>  	}
>  
> -	free_block(cachep, ac->entry, batchcount, node);
> +	free_block(cachep, ac->entry, batchcount, node, NULL);
>  free_done:
>  #if STATS
>  	{
> @@ -3822,7 +3845,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
>  
>  			if (shared)
>  				free_block(cachep, shared->entry,
> -						shared->avail, node);
> +						shared->avail, node, NULL);
>  
>  			l3->shared = new_shared;
>  			if (!l3->alien) {
> @@ -3925,7 +3948,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
>  		if (!ccold)
>  			continue;
>  		spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
> -		free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
> +		free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i), NULL);
>  		spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
>  		kfree(ccold);
>  	}
> @@ -4007,7 +4030,7 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
>  			tofree = force ? ac->avail : (ac->limit + 4) / 5;
>  			if (tofree > ac->avail)
>  				tofree = (ac->avail + 1) / 2;
> -			free_block(cachep, ac->entry, tofree, node);
> +			free_block(cachep, ac->entry, tofree, node, NULL);
>  			ac->avail -= tofree;
>  			memmove(ac->entry, &(ac->entry[tofree]),
>  				sizeof(void *) * ac->avail);
> 



-- 
http://selenic.com : development and support for Mercurial and Linux


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ