lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1450285224-1525-7-git-send-email-jack@suse.cz>
Date:	Wed, 16 Dec 2015 18:00:23 +0100
From:	Jan Kara <jack@...e.cz>
To:	Ted Tso <tytso@....edu>
Cc:	Andreas Grünbacher 
	<andreas.gruenbacher@...il.com>,
	Andreas Dilger <adilger@...ger.ca>,
	Laurent GUERBY <laurent@...rby.net>,
	linux-ext4@...r.kernel.org, Jan Kara <jack@...e.cz>
Subject: [PATCH 6/7] mbcache2: Use referenced bit instead of LRU

Currently we maintain perfect LRU list by moving entry to the tail of
the list when it gets used. However these operations on cache-global
list are relatively expensive.

In this patch we switch to lazy updates of LRU list. Whenever entry gets
used, we set a referenced bit in it. When reclaiming entries, we give
referenced entries another round in the LRU. Since the list is not a
real LRU anymore, rename it to just 'list'.

In my testing this logic gives about 30% boost to workloads with mostly
unique xattr blocks (e.g. xattr-bench with 10 files and 10000 unique
xattr values).

Signed-off-by: Jan Kara <jack@...e.cz>
---
 fs/mbcache2.c            | 87 +++++++++++++++++++++++++++++++-----------------
 include/linux/mbcache2.h | 11 +++---
 2 files changed, 63 insertions(+), 35 deletions(-)

diff --git a/fs/mbcache2.c b/fs/mbcache2.c
index 8b3ef891573c..133ecb2c42ce 100644
--- a/fs/mbcache2.c
+++ b/fs/mbcache2.c
@@ -30,9 +30,9 @@ struct mb2_cache {
 	int			c_bucket_bits;
 	/* Maximum entries in cache to avoid degrading hash too much */
 	int			c_max_entries;
-	/* Protects c_lru_list, c_entry_count */
-	spinlock_t		c_lru_list_lock;
-	struct list_head	c_lru_list;
+	/* Protects c_list, c_entry_count */
+	spinlock_t		c_list_lock;
+	struct list_head	c_list;
 	/* Number of entries in cache */
 	unsigned long		c_entry_count;
 	struct shrinker		c_shrink;
@@ -45,6 +45,29 @@ static struct kmem_cache *mb2_entry_cache;
 static unsigned long mb2_cache_shrink(struct mb2_cache *cache,
 				      unsigned int nr_to_scan);
 
+static inline bool mb2_cache_entry_referenced(struct mb2_cache_entry *entry)
+{
+	return entry->_e_hash_list_head & 1;
+}
+
+static inline void mb2_cache_entry_set_referenced(struct mb2_cache_entry *entry)
+{
+	entry->_e_hash_list_head |= 1;
+}
+
+static inline void mb2_cache_entry_clear_referenced(
+					struct mb2_cache_entry *entry)
+{
+	entry->_e_hash_list_head &= ~1;
+}
+
+static inline struct hlist_bl_head *mb2_cache_entry_head(
+					struct mb2_cache_entry *entry)
+{
+	return (struct hlist_bl_head *)
+			(entry->_e_hash_list_head & ~1);
+}
+
 /*
  * Number of entries to reclaim synchronously when there are too many entries
  * in cache
@@ -83,13 +106,13 @@ struct mb2_cache_entry *mb2_cache_entry_create(struct mb2_cache *cache,
 	if (!entry)
 		return ERR_PTR(-ENOMEM);
 
-	INIT_LIST_HEAD(&entry->e_lru_list);
+	INIT_LIST_HEAD(&entry->e_list);
 	/* One ref for hash, one ref returned */
 	atomic_set(&entry->e_refcnt, 2);
 	entry->e_key = key;
 	entry->e_block = block;
 	head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
-	entry->e_hash_list_head = head;
+	entry->_e_hash_list_head = (unsigned long)head;
 	hlist_bl_lock(head);
 	hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
 		if (dup->e_key == key && dup->e_block == block) {
@@ -101,12 +124,12 @@ struct mb2_cache_entry *mb2_cache_entry_create(struct mb2_cache *cache,
 	hlist_bl_add_head(&entry->e_hash_list, head);
 	hlist_bl_unlock(head);
 
-	spin_lock(&cache->c_lru_list_lock);
-	list_add_tail(&entry->e_lru_list, &cache->c_lru_list);
+	spin_lock(&cache->c_list_lock);
+	list_add_tail(&entry->e_list, &cache->c_list);
 	/* Grab ref for LRU list */
 	atomic_inc(&entry->e_refcnt);
 	cache->c_entry_count++;
-	spin_unlock(&cache->c_lru_list_lock);
+	spin_unlock(&cache->c_list_lock);
 
 	return entry;
 }
@@ -127,7 +150,7 @@ static struct mb2_cache_entry *__entry_find(struct mb2_cache *cache,
 	struct hlist_bl_head *head;
 
 	if (entry)
-		head = entry->e_hash_list_head;
+		head = mb2_cache_entry_head(entry);
 	else
 		head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
 	hlist_bl_lock(head);
@@ -206,13 +229,13 @@ void mb2_cache_entry_delete_block(struct mb2_cache *cache, unsigned int key,
 			/* We keep hash list reference to keep entry alive */
 			hlist_bl_del_init(&entry->e_hash_list);
 			hlist_bl_unlock(head);
-			spin_lock(&cache->c_lru_list_lock);
-			if (!list_empty(&entry->e_lru_list)) {
-				list_del_init(&entry->e_lru_list);
+			spin_lock(&cache->c_list_lock);
+			if (!list_empty(&entry->e_list)) {
+				list_del_init(&entry->e_list);
 				cache->c_entry_count--;
 				atomic_dec(&entry->e_refcnt);
 			}
-			spin_unlock(&cache->c_lru_list_lock);
+			spin_unlock(&cache->c_list_lock);
 			mb2_cache_entry_put(cache, entry);
 			return;
 		}
@@ -225,15 +248,12 @@ EXPORT_SYMBOL(mb2_cache_entry_delete_block);
  * @cache - cache the entry belongs to
  * @entry - entry that got used
  *
- * Move entry in lru list to reflect the fact that it was used.
+ * Marks entry as used to give hit higher chances of surviving in cache.
  */
 void mb2_cache_entry_touch(struct mb2_cache *cache,
 			   struct mb2_cache_entry *entry)
 {
-	spin_lock(&cache->c_lru_list_lock);
-	if (!list_empty(&entry->e_lru_list))
-		list_move_tail(&cache->c_lru_list, &entry->e_lru_list);
-	spin_unlock(&cache->c_lru_list_lock);
+	mb2_cache_entry_set_referenced(entry);
 }
 EXPORT_SYMBOL(mb2_cache_entry_touch);
 
@@ -254,18 +274,23 @@ static unsigned long mb2_cache_shrink(struct mb2_cache *cache,
 	struct hlist_bl_head *head;
 	unsigned int shrunk = 0;
 
-	spin_lock(&cache->c_lru_list_lock);
-	while (nr_to_scan-- && !list_empty(&cache->c_lru_list)) {
-		entry = list_first_entry(&cache->c_lru_list,
-					 struct mb2_cache_entry, e_lru_list);
-		list_del_init(&entry->e_lru_list);
+	spin_lock(&cache->c_list_lock);
+	while (nr_to_scan-- && !list_empty(&cache->c_list)) {
+		entry = list_first_entry(&cache->c_list,
+					 struct mb2_cache_entry, e_list);
+		if (mb2_cache_entry_referenced(entry)) {
+			mb2_cache_entry_clear_referenced(entry);
+			list_move_tail(&cache->c_list, &entry->e_list);
+			continue;
+		}
+		list_del_init(&entry->e_list);
 		cache->c_entry_count--;
 		/*
 		 * We keep LRU list reference so that entry doesn't go away
 		 * from under us.
 		 */
-		spin_unlock(&cache->c_lru_list_lock);
-		head = entry->e_hash_list_head;
+		spin_unlock(&cache->c_list_lock);
+		head = mb2_cache_entry_head(entry);
 		hlist_bl_lock(head);
 		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
 			hlist_bl_del_init(&entry->e_hash_list);
@@ -275,9 +300,9 @@ static unsigned long mb2_cache_shrink(struct mb2_cache *cache,
 		if (mb2_cache_entry_put(cache, entry))
 			shrunk++;
 		cond_resched();
-		spin_lock(&cache->c_lru_list_lock);
+		spin_lock(&cache->c_list_lock);
 	}
-	spin_unlock(&cache->c_lru_list_lock);
+	spin_unlock(&cache->c_list_lock);
 
 	return shrunk;
 }
@@ -321,8 +346,8 @@ struct mb2_cache *mb2_cache_create(int bucket_bits)
 		goto err_out;
 	cache->c_bucket_bits = bucket_bits;
 	cache->c_max_entries = bucket_count << 4;
-	INIT_LIST_HEAD(&cache->c_lru_list);
-	spin_lock_init(&cache->c_lru_list_lock);
+	INIT_LIST_HEAD(&cache->c_list);
+	spin_lock_init(&cache->c_list_lock);
 	cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
 				GFP_KERNEL);
 	if (!cache->c_hash) {
@@ -364,13 +389,13 @@ void mb2_cache_destroy(struct mb2_cache *cache)
 	 * We don't bother with any locking. Cache must not be used at this
 	 * point.
 	 */
-	list_for_each_entry_safe(entry, next, &cache->c_lru_list, e_lru_list) {
+	list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
 		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
 			hlist_bl_del_init(&entry->e_hash_list);
 			atomic_dec(&entry->e_refcnt);
 		} else
 			WARN_ON(1);
-		list_del(&entry->e_lru_list);
+		list_del(&entry->e_list);
 		WARN_ON(atomic_read(&entry->e_refcnt) != 1);
 		mb2_cache_entry_put(cache, entry);
 	}
diff --git a/include/linux/mbcache2.h b/include/linux/mbcache2.h
index 26ec4ecd2a18..e84983be8311 100644
--- a/include/linux/mbcache2.h
+++ b/include/linux/mbcache2.h
@@ -10,8 +10,8 @@
 struct mb2_cache;
 
 struct mb2_cache_entry {
-	/* LRU list - protected by cache->c_lru_list_lock */
-	struct list_head	e_lru_list;
+	/* List of entries in cache - protected by cache->c_list_lock */
+	struct list_head	e_list;
 	/* Hash table list - protected by bitlock in e_hash_list_head */
 	struct hlist_bl_node	e_hash_list;
 	atomic_t		e_refcnt;
@@ -19,8 +19,11 @@ struct mb2_cache_entry {
 	unsigned int		e_key;
 	/* Block number of hashed block - stable during lifetime of the entry */
 	sector_t		e_block;
-	/* Head of hash list (for list bit lock) - stable */
-	struct hlist_bl_head	*e_hash_list_head;
+	/*
+	 * Head of hash list (for list bit lock) - stable. Combined with
+	 * referenced bit of entry
+	 */
+	unsigned long		_e_hash_list_head;
 };
 
 struct mb2_cache *mb2_cache_create(int bucket_bits);
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ