lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20241029204501.47463-1-catherine.hoang@oracle.com>
Date: Tue, 29 Oct 2024 13:45:01 -0700
From: Catherine Hoang <catherine.hoang@...cle.com>
To: linux-ext4@...r.kernel.org
Cc: djwong@...nel.org
Subject: [RFC PATCH v1] ext2: remove buffer heads from quota handling

This patch removes the use of buffer heads from the quota read and
write paths. To do so, we implement a new buffer cache using an
rhashtable. Each buffer stores data from an associated block, and
can be read or written to as needed.

Ultimately, we want to completely remove buffer heads from ext2.
This patch serves as an example than can be applied to other parts
of the filesystem.

Signed-off-by: Catherine Hoang <catherine.hoang@...cle.com>
---
 fs/ext2/Makefile |   2 +-
 fs/ext2/cache.c  | 195 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/ext2/ext2.h   |  30 ++++++++
 fs/ext2/inode.c  |  20 +++++
 fs/ext2/super.c  |  62 ++++++++-------
 5 files changed, 281 insertions(+), 28 deletions(-)
 create mode 100644 fs/ext2/cache.c

diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile
index 8860948ef9ca..e8b38243058f 100644
--- a/fs/ext2/Makefile
+++ b/fs/ext2/Makefile
@@ -5,7 +5,7 @@
 
 obj-$(CONFIG_EXT2_FS) += ext2.o
 
-ext2-y := balloc.o dir.o file.o ialloc.o inode.o \
+ext2-y := balloc.o cache.o dir.o file.o ialloc.o inode.o \
 	  ioctl.o namei.o super.o symlink.o trace.o
 
 # For tracepoints to include our trace.h from tracepoint infrastructure
diff --git a/fs/ext2/cache.c b/fs/ext2/cache.c
new file mode 100644
index 000000000000..c58416392c52
--- /dev/null
+++ b/fs/ext2/cache.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2024 Oracle. All rights reserved.
+ */
+
+#include "ext2.h"
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/rhashtable.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+
+static const struct rhashtable_params buffer_cache_params = {
+	.key_len     = sizeof(sector_t),
+	.key_offset  = offsetof(struct ext2_buffer, b_block),
+	.head_offset = offsetof(struct ext2_buffer, b_rhash),
+	.automatic_shrinking = true,
+};
+
+static struct ext2_buffer *insert_buffer_cache(struct super_block *sb, struct ext2_buffer *new_buf)
+{
+	struct ext2_sb_info *sbi = EXT2_SB(sb);
+	struct rhashtable *buffer_cache = &sbi->buffer_cache;
+	struct ext2_buffer *old_buf;
+
+	spin_lock(&sbi->buffer_cache_lock);
+	old_buf = rhashtable_lookup_get_insert_fast(buffer_cache,
+				&new_buf->b_rhash, buffer_cache_params);
+	spin_unlock(&sbi->buffer_cache_lock);
+
+	if (old_buf)
+		return old_buf;
+
+	return new_buf;
+}
+
+static void buf_write_end_io(struct bio *bio)
+{
+	struct ext2_buffer *buf = bio->bi_private;
+
+	clear_bit(EXT2_BUF_DIRTY_BIT, &buf->b_flags);
+	bio_put(bio);
+}
+
+static int submit_buffer_read(struct super_block *sb, struct ext2_buffer *buf)
+{
+	struct bio_vec bio_vec;
+	struct bio bio;
+	sector_t sector = buf->b_block * (sb->s_blocksize >> 9);
+
+	bio_init(&bio, sb->s_bdev, &bio_vec, 1, REQ_OP_READ);
+	bio.bi_iter.bi_sector = sector;
+
+	__bio_add_page(&bio, buf->b_page, buf->b_size, 0);
+
+	return submit_bio_wait(&bio);
+}
+
+static void submit_buffer_write(struct super_block *sb, struct ext2_buffer *buf)
+{
+	struct bio *bio;
+	sector_t sector = buf->b_block * (sb->s_blocksize >> 9);
+
+	bio = bio_alloc(sb->s_bdev, 1, REQ_OP_WRITE, GFP_KERNEL);
+
+	bio->bi_iter.bi_sector = sector;
+	bio->bi_end_io = buf_write_end_io;
+	bio->bi_private = buf;
+
+	__bio_add_page(bio, buf->b_page, buf->b_size, 0);
+
+	submit_bio(bio);
+}
+
+int sync_buffers(struct super_block *sb)
+{
+	struct ext2_sb_info *sbi = EXT2_SB(sb);
+	struct rhashtable *buffer_cache = &sbi->buffer_cache;
+	struct rhashtable_iter iter;
+	struct ext2_buffer *buf;
+	struct blk_plug plug;
+
+	blk_start_plug(&plug);
+	rhashtable_walk_enter(buffer_cache, &iter);
+	rhashtable_walk_start(&iter);
+	while ((buf = rhashtable_walk_next(&iter)) != NULL) {
+		if (IS_ERR(buf))
+			continue;
+		if (test_bit(EXT2_BUF_DIRTY_BIT, &buf->b_flags)) {
+			submit_buffer_write(sb, buf);
+		}
+	}
+	rhashtable_walk_stop(&iter);
+	rhashtable_walk_exit(&iter);
+	blk_finish_plug(&plug);
+
+	return 0;
+}
+
+static struct ext2_buffer *lookup_buffer_cache(struct super_block *sb, sector_t block)
+{
+	struct ext2_sb_info *sbi = EXT2_SB(sb);
+	struct rhashtable *buffer_cache = &sbi->buffer_cache;
+	struct ext2_buffer *found = NULL;
+
+	found = rhashtable_lookup_fast(buffer_cache, &block, buffer_cache_params);
+
+	return found;
+}
+
+static int init_buffer(struct super_block *sb, sector_t block, struct ext2_buffer **buf_ptr)
+{
+	struct ext2_buffer *buf;
+
+	buf = kmalloc(sizeof(struct ext2_buffer), GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	buf->b_block = block;
+	buf->b_size = sb->s_blocksize;
+	buf->b_flags = 0;
+
+	mutex_init(&buf->b_lock);
+	refcount_set(&buf->b_refcount, 1);
+
+	buf->b_page = alloc_page(GFP_KERNEL);
+	if (!buf->b_page)
+		return -ENOMEM;
+
+	buf->b_data = page_address(buf->b_page);
+
+	*buf_ptr = buf;
+
+	return 0;
+}
+
+void put_buffer(struct ext2_buffer *buf)
+{
+	refcount_dec(&buf->b_refcount);
+	mutex_unlock(&buf->b_lock);
+}
+
+struct ext2_buffer *get_buffer(struct super_block *sb, sector_t block, bool need_uptodate)
+{
+	int err;
+	struct ext2_buffer *buf;
+	struct ext2_buffer *new_buf;
+
+	buf = lookup_buffer_cache(sb, block);
+
+	if (!buf) {
+		err = init_buffer(sb, block, &new_buf);
+		if (err)
+			return ERR_PTR(err);
+
+		if (need_uptodate) {
+			err = submit_buffer_read(sb, new_buf);
+			if (err)
+				return ERR_PTR(err);
+		}
+
+		buf = insert_buffer_cache(sb, new_buf);
+		if (IS_ERR(buf))
+			kfree(new_buf);
+	}
+
+	mutex_lock(&buf->b_lock);
+	refcount_inc(&buf->b_refcount);
+
+	return buf;
+}
+
+void buffer_set_dirty(struct ext2_buffer *buf)
+{
+    set_bit(EXT2_BUF_DIRTY_BIT, &buf->b_flags);
+}
+
+int init_buffer_cache(struct rhashtable *buffer_cache)
+{
+	return rhashtable_init(buffer_cache, &buffer_cache_params);
+}
+
+static void destroy_buffer(void *ptr, void *arg)
+{
+	struct ext2_buffer *buf = ptr;
+
+	WARN_ON(test_bit(EXT2_BUF_DIRTY_BIT, &buf->b_flags));
+	__free_page(buf->b_page);
+	kfree(buf);
+}
+
+void destroy_buffer_cache(struct rhashtable *buffer_cache)
+{
+	rhashtable_free_and_destroy(buffer_cache, destroy_buffer, NULL);
+}
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index f38bdd46e4f7..ce0bb03527e0 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -18,6 +18,7 @@
 #include <linux/rbtree.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
+#include <linux/rhashtable.h>
 
 /* XXX Here for now... not interested in restructing headers JUST now */
 
@@ -116,6 +117,8 @@ struct ext2_sb_info {
 	struct mb_cache *s_ea_block_cache;
 	struct dax_device *s_daxdev;
 	u64 s_dax_part_off;
+	struct rhashtable buffer_cache;
+	spinlock_t buffer_cache_lock;
 };
 
 static inline spinlock_t *
@@ -683,6 +686,24 @@ struct ext2_inode_info {
  */
 #define EXT2_STATE_NEW			0x00000001 /* inode is newly created */
 
+/*
+ * ext2 buffer
+ */
+struct ext2_buffer {
+	sector_t b_block;
+	struct rhash_head b_rhash;
+	struct page *b_page;
+	size_t b_size;
+	char *b_data;
+	unsigned long b_flags;
+	refcount_t b_refcount;
+	struct mutex b_lock;
+};
+
+/*
+ * Buffer flags
+ */
+ #define EXT2_BUF_DIRTY_BIT 0
 
 /*
  * Function prototypes
@@ -716,6 +737,14 @@ extern int ext2_should_retry_alloc(struct super_block *sb, int *retries);
 extern void ext2_init_block_alloc_info(struct inode *);
 extern void ext2_rsv_window_add(struct super_block *sb, struct ext2_reserve_window_node *rsv);
 
+/* cache.c */
+extern int init_buffer_cache(struct rhashtable *);
+extern void destroy_buffer_cache(struct rhashtable *buffer_cache);
+extern int sync_buffers(struct super_block *);
+extern struct ext2_buffer *get_buffer(struct super_block *, sector_t, bool);
+extern void put_buffer(struct ext2_buffer *);
+extern void buffer_set_dirty(struct ext2_buffer *);
+
 /* dir.c */
 int ext2_add_link(struct dentry *, struct inode *);
 int ext2_inode_by_name(struct inode *dir,
@@ -741,6 +770,7 @@ extern int ext2_write_inode (struct inode *, struct writeback_control *);
 extern void ext2_evict_inode(struct inode *);
 void ext2_write_failed(struct address_space *mapping, loff_t to);
 extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
+extern int ext2_get_block_bno(struct inode *, sector_t, int, u32 *, bool *);
 extern int ext2_setattr (struct mnt_idmap *, struct dentry *, struct iattr *);
 extern int ext2_getattr (struct mnt_idmap *, const struct path *,
 			 struct kstat *, u32, unsigned int);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 0caa1650cee8..7e7e6a5916c4 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -803,6 +803,26 @@ int ext2_get_block(struct inode *inode, sector_t iblock,
 
 }
 
+int ext2_get_block_bno(struct inode *inode, sector_t iblock,
+		int create, u32 *bno, bool *mapped)
+{
+	struct super_block *sb = inode->i_sb;
+	struct buffer_head tmp_bh;
+	int err;
+
+	tmp_bh.b_state = 0;
+	tmp_bh.b_size = sb->s_blocksize;
+
+	err = ext2_get_block(inode, iblock, &tmp_bh, 0);
+	if (err)
+		return err;
+
+	*mapped = buffer_mapped(&tmp_bh);
+	*bno = tmp_bh.b_blocknr;
+
+	return 0;
+}
+
 static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 		unsigned flags, struct iomap *iomap, struct iomap *srcmap)
 {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 37f7ce56adce..11d88882ad24 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -152,6 +152,8 @@ static void ext2_put_super (struct super_block * sb)
 	ext2_xattr_destroy_cache(sbi->s_ea_block_cache);
 	sbi->s_ea_block_cache = NULL;
 
+	destroy_buffer_cache(&sbi->buffer_cache);
+
 	if (!sb_rdonly(sb)) {
 		struct ext2_super_block *es = sbi->s_es;
 
@@ -835,6 +837,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off,
 					   NULL, NULL);
 
+	spin_lock_init(&sbi->buffer_cache_lock);
+	ret = init_buffer_cache(&sbi->buffer_cache);
+	if (ret) {
+		ext2_msg(sb, KERN_ERR, "error: unable to create buffer cache");
+		goto failed_sbi;
+	}
+
 	spin_lock_init(&sbi->s_lock);
 	ret = -EINVAL;
 
@@ -1278,6 +1287,8 @@ static int ext2_sync_fs(struct super_block *sb, int wait)
 	 */
 	dquot_writeback_dquots(sb, -1);
 
+	sync_buffers(sb);
+
 	spin_lock(&sbi->s_lock);
 	if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
 		ext2_debug("setting valid to 0\n");
@@ -1491,9 +1502,10 @@ static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data,
 	int offset = off & (sb->s_blocksize - 1);
 	int tocopy;
 	size_t toread;
-	struct buffer_head tmp_bh;
-	struct buffer_head *bh;
 	loff_t i_size = i_size_read(inode);
+	struct ext2_buffer *buf;
+	u32 bno;
+	bool mapped;
 
 	if (off > i_size)
 		return 0;
@@ -1503,20 +1515,19 @@ static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data,
 	while (toread > 0) {
 		tocopy = min_t(size_t, sb->s_blocksize - offset, toread);
 
-		tmp_bh.b_state = 0;
-		tmp_bh.b_size = sb->s_blocksize;
-		err = ext2_get_block(inode, blk, &tmp_bh, 0);
+		err = ext2_get_block_bno(inode, blk, 0, &bno, &mapped);
 		if (err < 0)
 			return err;
-		if (!buffer_mapped(&tmp_bh))	/* A hole? */
+		if (!mapped)	/* A hole? */
 			memset(data, 0, tocopy);
 		else {
-			bh = sb_bread(sb, tmp_bh.b_blocknr);
-			if (!bh)
-				return -EIO;
-			memcpy(data, bh->b_data+offset, tocopy);
-			brelse(bh);
+			buf = get_buffer(sb, bno, 1);
+			if (IS_ERR(buf))
+				return PTR_ERR(buf);
+			memcpy(data, buf->b_data+offset, tocopy);
+			put_buffer(buf);
 		}
+
 		offset = 0;
 		toread -= tocopy;
 		data += tocopy;
@@ -1535,32 +1546,29 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
 	int offset = off & (sb->s_blocksize - 1);
 	int tocopy;
 	size_t towrite = len;
-	struct buffer_head tmp_bh;
-	struct buffer_head *bh;
+	struct ext2_buffer *buf;
+	u32 bno;
+	bool mapped;
 
 	while (towrite > 0) {
 		tocopy = min_t(size_t, sb->s_blocksize - offset, towrite);
 
-		tmp_bh.b_state = 0;
-		tmp_bh.b_size = sb->s_blocksize;
-		err = ext2_get_block(inode, blk, &tmp_bh, 1);
+		err = ext2_get_block_bno(inode, blk, 1, &bno, &mapped);
 		if (err < 0)
 			goto out;
+
 		if (offset || tocopy != EXT2_BLOCK_SIZE(sb))
-			bh = sb_bread(sb, tmp_bh.b_blocknr);
+			buf = get_buffer(sb, bno, 1);
 		else
-			bh = sb_getblk(sb, tmp_bh.b_blocknr);
-		if (unlikely(!bh)) {
-			err = -EIO;
+			buf = get_buffer(sb, bno, 0);
+		if (IS_ERR(buf)) {
+			err = PTR_ERR(buf);
 			goto out;
 		}
-		lock_buffer(bh);
-		memcpy(bh->b_data+offset, data, tocopy);
-		flush_dcache_page(bh->b_page);
-		set_buffer_uptodate(bh);
-		mark_buffer_dirty(bh);
-		unlock_buffer(bh);
-		brelse(bh);
+		memcpy(buf->b_data+offset, data, tocopy);
+		buffer_set_dirty(buf);
+		put_buffer(buf);
+
 		offset = 0;
 		towrite -= tocopy;
 		data += tocopy;
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ