linux-ext4 - [PATCH] ext4: attempt to shrink directory on dentry removal

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190821182740.97127-1-harshadshirwadkar@gmail.com>
Date:   Wed, 21 Aug 2019 11:27:40 -0700
From:   Harshad Shirwadkar <harshadshirwadkar@...il.com>
To:     linux-ext4@...r.kernel.org
Cc:     Harshad Shirwadkar <harshadshirwadkar@...il.com>
Subject: [PATCH] ext4: attempt to shrink directory on dentry removal

On every dentry deletion, this patch traverses directory inode in
reverse direction and frees up empty dirent blocks until it finds a
non-empty dirent block. We leverage the fact that we never clear
dentry name when we delete dentrys by merging them with the previous
one. So, even though the dirent block has only fake dentry which spans
across the entire block, we can use the name in this dead entry to
perform dx lookup and find intermediate dx node blocks as well as
offset inside these blocks.

As of now, we only support non-indexed directories and indexed
directories with no intermediate dx nodes. This technique can also be
used to remove intermediate dx nodes. But it needs a little more
interesting logic to make that happen since we don't store directory
entry name in intermediate nodes.

Ran kvm-xfstests smoke test-suite and verified that there are no
failures. Also, verified that when all the files are deleted in a
directory, directory shrinks to either 4k for non-indexed directories
or 8k for indexed directories with 1 level.

This patch is an improvement over previous patch that I sent out
earlier this month. So, if this patch looks alright, then we can drop
the other shrinking patch.

Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@...il.com>

---
This patch supersedes the other directory shrinking patch sent in Aug
2019 ("ext4: shrink directory when last block is empty").

 fs/ext4/namei.c | 176 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 176 insertions(+)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 129029534075..00a6602605ab 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -882,6 +882,60 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
 	return ret_err;
 }
 
+/*
+ * This function tries to remove the entry of a dirent block (which was just
+ * emptied by the caller) from the dx frame. It does so by reducing the count by
+ * 1 and left shifting all the entries after the deleted entry.
+ */
+int
+ext4_remove_dx_entry(handle_t *handle, struct inode *dir,
+		     struct dx_frame *dx_frame)
+{
+	struct dx_entry *entries;
+	unsigned int count;
+	unsigned int limit;
+	int err, i;
+
+	entries = dx_frame->entries;
+	count = dx_get_count(entries);
+	limit = dx_get_limit(entries);
+
+	if (count == 1)
+		return -EINVAL;
+
+	for (i = 0; i < count; i++)
+		if (entries[i].block == cpu_to_le64(dx_get_block(dx_frame->at)))
+			break;
+
+	if (i >= count)
+		return -EINVAL;
+
+	err = ext4_journal_get_write_access(handle, dx_frame->bh);
+	if (err) {
+		ext4_std_error(dir->i_sb, err);
+		return -EINVAL;
+	}
+
+	for (; i < count - 1; i++)
+		entries[i] = entries[i + 1];
+
+	/*
+	 * If i was 0 when we began above loop, we would have overwritten count
+	 * and limit values sin ce those values live in dx_entry->hash of the
+	 * first entry. We need to update count but we should set limit as well.
+	 */
+	dx_set_count(entries, count - 1);
+	dx_set_limit(entries, limit);
+
+	err = ext4_handle_dirty_dx_node(handle, dir, dx_frame->bh);
+	if (err) {
+		ext4_std_error(dir->i_sb, err);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static void dx_release(struct dx_frame *frames)
 {
 	struct dx_root_info *info;
@@ -1409,6 +1463,19 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
 	return 0;
 }
 
+static inline bool is_empty_dirent_block(struct inode *dir,
+					 struct buffer_head *bh)
+{
+	struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)bh->b_data;
+	int	csum_size = 0;
+
+	if (ext4_has_metadata_csum(dir->i_sb))
+		csum_size = sizeof(struct ext4_dir_entry_tail);
+
+	return ext4_rec_len_from_disk(de->rec_len, dir->i_sb->s_blocksize) ==
+			dir->i_sb->s_blocksize - csum_size && de->inode == 0;
+}
+
 static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
 			       struct ext4_dir_entry *de)
 {
@@ -2476,6 +2543,113 @@ int ext4_generic_delete_entry(handle_t *handle,
 	return -ENOENT;
 }
 
+int ext4_try_dir_shrink(handle_t *handle, struct inode *dir)
+{
+	struct dx_frame frames[EXT4_HTREE_LEVEL], *frame_ptr;
+	struct dx_root_info *info;
+	struct dx_hash_info hinfo;
+	struct ext4_dir_entry_2 *dead_de;
+	struct buffer_head *bh;
+	int err;
+	ext4_lblk_t lblk, min_lblk;
+
+	hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
+	if (hinfo.hash_version <= DX_HASH_TEA)
+		hinfo.hash_version +=
+				EXT4_SB(dir->i_sb)->s_hash_unsigned;
+	hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
+
+	if (is_dx(dir))
+		min_lblk = 2;
+	else
+		min_lblk = 1;
+	/*
+	 * Read blocks from directory in reverse orders and clean them up one by
+	 * one!
+	 */
+	for (lblk = dir->i_size >> dir->i_sb->s_blocksize_bits;
+	     lblk > min_lblk; lblk--) {
+		bh = ext4_bread(handle, dir, lblk - 1, 0);
+		if (IS_ERR(bh))
+			return -EIO;
+
+		if (!is_empty_dirent_block(dir, bh))
+			break;
+
+		if (!is_dx(dir))
+			continue;
+
+		dead_de = (struct ext4_dir_entry_2 *)bh->b_data;
+		ext4fs_dirhash(dir, dead_de->name, dead_de->name_len, &hinfo);
+
+		frame_ptr = dx_probe(NULL, dir, &hinfo, frames);
+		if (IS_ERR(frame_ptr)) {
+			dx_release(frames);
+			break;
+		}
+
+		/*
+		 * Cross-check if the dead de helped us find the block that we
+		 * are looking to delete.
+		 */
+		if (unlikely(dx_get_block(frames[0].at) != lblk - 1)) {
+			dx_release(frames);
+			break;
+		}
+
+		info = &((struct dx_root *)frames[0].bh->b_data)->info;
+		if (info->indirect_levels > 0) {
+			/*
+			 * We don't shrink in this case. That's because the
+			 * block that we just read could very well be an index
+			 * block. If it's an index block, we need to do special
+			 * handling to delete the block. Couple of options:
+			 *
+			 * (1) After deleting any dentry, if the dirent block
+			 *     becomes emtpy, we can remove the entry of the
+			 *     dirent block from its index node. If we do that
+			 *     then after enough deletions, index block could
+			 *     become empty. The problem with this approach is
+			 *     that after removing entry of a particular hash
+			 *     value from an index block, if a new dentry maps
+			 *     to same hash value, we probably will end up
+			 *     allocating one more block. Now our initial dirent
+			 *     block becomes orphan.
+			 *
+			 * (2) If the last block in the directory inode is an
+			 *     index block, then we could check all the leaf
+			 *     dirent blocks that are part of this dirent
+			 *     block. If all of them are empty, then we can
+			 *     remove the entry of this index block from its
+			 *     parent block and truncate this index block off.
+			 *
+			 *  In both the options though, we need a way to look-up
+			 *  the parent of an index block. Since we don't store
+			 *  any dirent name in an index block, it's not easy to
+			 *  lookup the parent of an index block. But if we want
+			 *  to implement this, then we can very well store a
+			 *  dead dirent which has a name and which maps to the
+			 *  index block in question. We could then use that dead
+			 *  entry to lookup parents for the index block.
+			 */
+			dx_release(frames);
+			return -EOPNOTSUPP;
+		}
+
+		err = ext4_remove_dx_entry(handle, dir, &frames[0]);
+		dx_release(frames);
+		if (err)
+			break;
+	}
+
+	if (lblk < dir->i_size >> dir->i_sb->s_blocksize_bits) {
+		dir->i_size = lblk * dir->i_sb->s_blocksize;
+		ext4_truncate(dir);
+	}
+
+	return 0;
+}
+
 static int ext4_delete_entry(handle_t *handle,
 			     struct inode *dir,
 			     struct ext4_dir_entry_2 *de_del,
@@ -2510,6 +2684,8 @@ static int ext4_delete_entry(handle_t *handle,
 	if (unlikely(err))
 		goto out;
 
+	ext4_try_dir_shrink(handle, dir);
+
 	return 0;
 out:
 	if (err != -ENOENT)
-- 
2.23.0.rc1.153.gdeed80330f-goog