lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220714150532.wndjlbozlnhniofo@quack3>
Date:   Thu, 14 Jul 2022 17:05:32 +0200
From:   Jan Kara <jack@...e.cz>
To:     Ritesh Harjani <ritesh.list@...il.com>
Cc:     Jan Kara <jack@...e.cz>, Ted Tso <tytso@....edu>,
        linux-ext4@...r.kernel.org
Subject: Re: [PATCH 10/10] mbcache: Automatically delete entries from cache
 on freeing

On Thu 14-07-22 18:39:51, Ritesh Harjani wrote:
> On 22/07/12 12:54PM, Jan Kara wrote:
> > Use the fact that entries with elevated refcount are not removed from
> 
> The elevated refcnt means >= 2?

Well, it means when there is real user of the mbcache entry. So 3 before
this patch, 2 after this patch...

> > the hash and just move removal of the entry from the hash to the entry
> > freeing time. When doing this we also change the generic code to hold
> > one reference to the cache entry, not two of them, which makes code
> > somewhat more obvious.
> >
> > Signed-off-by: Jan Kara <jack@...e.cz>
> > ---
> >  fs/mbcache.c            | 108 +++++++++++++++-------------------------
> >  include/linux/mbcache.h |  24 ++++++---
> >  2 files changed, 55 insertions(+), 77 deletions(-)
> >
> > diff --git a/fs/mbcache.c b/fs/mbcache.c
> > index d1ebb5df2856..96f1d49d30a5 100644
> > --- a/fs/mbcache.c
> > +++ b/fs/mbcache.c
> > @@ -90,7 +90,7 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
> >  		return -ENOMEM;
> >
> >  	INIT_LIST_HEAD(&entry->e_list);
> > -	/* One ref for hash, one ref returned */
> > +	/* Initial hash reference */
> >  	atomic_set(&entry->e_refcnt, 1);
> >  	entry->e_key = key;
> >  	entry->e_value = value;
> > @@ -106,21 +106,28 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
> >  		}
> >  	}
> >  	hlist_bl_add_head(&entry->e_hash_list, head);
> > -	hlist_bl_unlock(head);
> > -
> > +	/*
> > +	 * Add entry to LRU list before it can be found by
> > +	 * mb_cache_entry_delete() to avoid races
> > +	 */
> 
> No reference to mb_cache_entry_delete() now. It is
> mb_cache_entry_delete_or_get()

Thanks, will fix.

> >  	spin_lock(&cache->c_list_lock);
> >  	list_add_tail(&entry->e_list, &cache->c_list);
> > -	/* Grab ref for LRU list */
> > -	atomic_inc(&entry->e_refcnt);
> >  	cache->c_entry_count++;
> >  	spin_unlock(&cache->c_list_lock);
> > +	hlist_bl_unlock(head);
> >
> >  	return 0;
> >  }
> >  EXPORT_SYMBOL(mb_cache_entry_create);
> >
> > -void __mb_cache_entry_free(struct mb_cache_entry *entry)
> > +void __mb_cache_entry_free(struct mb_cache *cache, struct mb_cache_entry *entry)
> >  {
> > +	struct hlist_bl_head *head;
> > +
> > +	head = mb_cache_entry_head(cache, entry->e_key);
> > +	hlist_bl_lock(head);
> > +	hlist_bl_del(&entry->e_hash_list);
> > +	hlist_bl_unlock(head);
> >  	kmem_cache_free(mb_entry_cache, entry);
> >  }
> >  EXPORT_SYMBOL(__mb_cache_entry_free);
> > @@ -134,7 +141,7 @@ EXPORT_SYMBOL(__mb_cache_entry_free);
> >   */
> >  void mb_cache_entry_wait_unused(struct mb_cache_entry *entry)
> >  {
> > -	wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 3);
> > +	wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 2);
> >  }
> >  EXPORT_SYMBOL(mb_cache_entry_wait_unused);
> >
> > @@ -155,10 +162,9 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
> >  	while (node) {
> >  		entry = hlist_bl_entry(node, struct mb_cache_entry,
> >  				       e_hash_list);
> > -		if (entry->e_key == key && entry->e_reusable) {
> > -			atomic_inc(&entry->e_refcnt);
> > +		if (entry->e_key == key && entry->e_reusable &&
> > +		    atomic_inc_not_zero(&entry->e_refcnt))
> >  			goto out;
> > -		}
> >  		node = node->next;
> >  	}
> >  	entry = NULL;
> > @@ -218,10 +224,9 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
> >  	head = mb_cache_entry_head(cache, key);
> >  	hlist_bl_lock(head);
> >  	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
> > -		if (entry->e_key == key && entry->e_value == value) {
> > -			atomic_inc(&entry->e_refcnt);
> > +		if (entry->e_key == key && entry->e_value == value &&
> > +		    atomic_inc_not_zero(&entry->e_refcnt))
> >  			goto out;
> > -		}
> >  	}
> >  	entry = NULL;
> >  out:
> > @@ -244,37 +249,25 @@ EXPORT_SYMBOL(mb_cache_entry_get);
> >  struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
> >  						    u32 key, u64 value)
> >  {
> > -	struct hlist_bl_node *node;
> > -	struct hlist_bl_head *head;
> >  	struct mb_cache_entry *entry;
> >
> > -	head = mb_cache_entry_head(cache, key);
> > -	hlist_bl_lock(head);
> > -	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
> > -		if (entry->e_key == key && entry->e_value == value) {
> > -			if (atomic_read(&entry->e_refcnt) > 2) {
> > -				atomic_inc(&entry->e_refcnt);
> > -				hlist_bl_unlock(head);
> > -				return entry;
> > -			}
> > -			/* We keep hash list reference to keep entry alive */
> > -			hlist_bl_del_init(&entry->e_hash_list);
> > -			hlist_bl_unlock(head);
> > -			spin_lock(&cache->c_list_lock);
> > -			if (!list_empty(&entry->e_list)) {
> > -				list_del_init(&entry->e_list);
> > -				if (!WARN_ONCE(cache->c_entry_count == 0,
> > -		"mbcache: attempt to decrement c_entry_count past zero"))
> > -					cache->c_entry_count--;
> > -				atomic_dec(&entry->e_refcnt);
> > -			}
> > -			spin_unlock(&cache->c_list_lock);
> > -			mb_cache_entry_put(cache, entry);
> > -			return NULL;
> > -		}
> > -	}
> > -	hlist_bl_unlock(head);
> > +	entry = mb_cache_entry_get(cache, key, value);
> > +	if (!entry)
> > +		return NULL;
> > +
> > +	/*
> > +	 * Drop the ref we got from mb_cache_entry_get() and the initial hash
> > +	 * ref if we are the last user
> > +	 */
> > +	if (atomic_cmpxchg(&entry->e_refcnt, 2, 0) != 2)
> > +		return entry;
> >
> > +	spin_lock(&cache->c_list_lock);
> > +	if (!list_empty(&entry->e_list))
> > +		list_del_init(&entry->e_list);
> > +	cache->c_entry_count--;
> > +	spin_unlock(&cache->c_list_lock);
> > +	__mb_cache_entry_free(cache, entry);
> >  	return NULL;
> >  }
> >  EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
> > @@ -306,42 +299,24 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
> >  				     unsigned long nr_to_scan)
> >  {
> >  	struct mb_cache_entry *entry;
> > -	struct hlist_bl_head *head;
> >  	unsigned long shrunk = 0;
> >
> >  	spin_lock(&cache->c_list_lock);
> >  	while (nr_to_scan-- && !list_empty(&cache->c_list)) {
> >  		entry = list_first_entry(&cache->c_list,
> >  					 struct mb_cache_entry, e_list);
> > -		if (entry->e_referenced || atomic_read(&entry->e_refcnt) > 2) {
> > +		/* Drop initial hash reference if there is no user */
> > +		if (entry->e_referenced ||
> > +		    atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
> 
> So here if the refcnt of an entry is 1. That means it is still in use right.

No. One reference is held by the hashtable/LRU itself. So 1 means entry is
free.

> So the shrinker will do the atomic_cmpxchg and make it to 0. And then
> delete the entry from the cache?
> This will only happen for entry with just 1 reference count.
> 
> Is that correct understanding?

Correct. Basically the atomic 1 -> 0 transition makes sure we are not
racing with anybody else doing the 1 -> 2 transition. And once reference
gets to 0, we make sure no new references can be created.

								Honza
-- 
Jan Kara <jack@...e.com>
SUSE Labs, CR

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ