[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250326014928.61507-2-catherine.hoang@oracle.com>
Date: Tue, 25 Mar 2025 18:49:25 -0700
From: Catherine Hoang <catherine.hoang@...cle.com>
To: linux-ext4@...r.kernel.org
Cc: djwong@...nel.org
Subject: [RFC PATCH v2 1/4] ext2: remove buffer heads from superblock
The superblock is stored in the buffer_head s_sbh in struct ext2_sb_info.
Replace this buffer head with the new ext2_buffer and update the buffer
functions accordingly. This patch also introduces new buffer cache code
needed for future patches.
Signed-off-by: Catherine Hoang <catherine.hoang@...cle.com>
---
fs/ext2/Makefile | 2 +-
fs/ext2/cache.c | 302 +++++++++++++++++++++++++++++++++++++++++++++++
fs/ext2/ext2.h | 43 ++++++-
fs/ext2/super.c | 52 +++++---
fs/ext2/xattr.c | 2 +-
5 files changed, 379 insertions(+), 22 deletions(-)
create mode 100644 fs/ext2/cache.c
diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile
index 8860948ef9ca..e8b38243058f 100644
--- a/fs/ext2/Makefile
+++ b/fs/ext2/Makefile
@@ -5,7 +5,7 @@
obj-$(CONFIG_EXT2_FS) += ext2.o
-ext2-y := balloc.o dir.o file.o ialloc.o inode.o \
+ext2-y := balloc.o cache.o dir.o file.o ialloc.o inode.o \
ioctl.o namei.o super.o symlink.o trace.o
# For tracepoints to include our trace.h from tracepoint infrastructure
diff --git a/fs/ext2/cache.c b/fs/ext2/cache.c
new file mode 100644
index 000000000000..464c506ba1b6
--- /dev/null
+++ b/fs/ext2/cache.c
@@ -0,0 +1,302 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2025 Oracle. All rights reserved.
+ */
+
+#include "ext2.h"
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/rhashtable.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+
+static const struct rhashtable_params buffer_cache_params = {
+ .key_len = sizeof(sector_t),
+ .key_offset = offsetof(struct ext2_buffer, b_block),
+ .head_offset = offsetof(struct ext2_buffer, b_rhash),
+ .automatic_shrinking = true,
+};
+
+void ext2_buffer_lock(struct ext2_buffer *buf)
+{
+ mutex_lock(&buf->b_lock);
+}
+
+void ext2_buffer_unlock(struct ext2_buffer *buf)
+{
+ mutex_unlock(&buf->b_lock);
+}
+
+void ext2_buffer_set_dirty(struct ext2_buffer *buf)
+{
+ set_bit(EXT2_BUF_DIRTY_BIT, &buf->b_flags);
+}
+
+static int ext2_buffer_uptodate(struct ext2_buffer *buf)
+{
+ return test_bit(EXT2_BUF_UPTODATE_BIT, &buf->b_flags);
+}
+
+void ext2_buffer_set_uptodate(struct ext2_buffer *buf)
+{
+ set_bit(EXT2_BUF_UPTODATE_BIT, &buf->b_flags);
+}
+
+void ext2_buffer_clear_uptodate(struct ext2_buffer *buf)
+{
+ clear_bit(EXT2_BUF_UPTODATE_BIT, &buf->b_flags);
+}
+
+int ext2_buffer_error(struct ext2_buffer *buf)
+{
+ return buf->b_error;
+}
+
+void ext2_buffer_clear_error(struct ext2_buffer *buf)
+{
+ buf->b_error = 0;
+}
+
+static struct ext2_buffer *ext2_insert_buffer_cache(struct super_block *sb, struct ext2_buffer *new_buf)
+{
+ struct ext2_sb_info *sbi = EXT2_SB(sb);
+ struct ext2_buffer_cache *bc = &sbi->s_buffer_cache;
+ struct rhashtable *buffer_cache = &bc->bc_hash;
+ struct ext2_buffer *old_buf;
+
+ rcu_read_lock();
+ old_buf = rhashtable_lookup_get_insert_fast(buffer_cache,
+ &new_buf->b_rhash, buffer_cache_params);
+
+ if (old_buf) {
+ refcount_inc(&old_buf->b_refcount);
+ rcu_read_unlock();
+ return old_buf;
+ }
+
+ refcount_inc(&new_buf->b_refcount);
+ rcu_read_unlock();
+ return new_buf;
+}
+
+static void ext2_buf_write_end_io(struct bio *bio)
+{
+ struct ext2_buffer *buf = bio->bi_private;
+ int err = blk_status_to_errno(bio->bi_status);
+
+ buf->b_error = err;
+ complete(&buf->b_complete);
+ mutex_unlock(&buf->b_lock);
+ bio_put(bio);
+}
+
+static int ext2_submit_buffer_read(struct super_block *sb, struct ext2_buffer *buf)
+{
+ struct bio_vec bio_vec;
+ struct bio bio;
+ sector_t sector = buf->b_block * (sb->s_blocksize >> 9);
+ int error;
+
+ bio_init(&bio, sb->s_bdev, &bio_vec, 1, REQ_OP_READ);
+ bio.bi_iter.bi_sector = sector;
+
+ buf->b_size = sb->s_blocksize;
+ __bio_add_page(&bio, buf->b_page, buf->b_size, 0);
+
+ mutex_lock(&buf->b_lock);
+ error = submit_bio_wait(&bio);
+ ext2_buffer_set_uptodate(buf);
+ mutex_unlock(&buf->b_lock);
+
+ return error;
+}
+
+static void ext2_submit_buffer_write(struct super_block *sb, struct ext2_buffer *buf)
+{
+ struct bio *bio;
+ sector_t sector = buf->b_block * (sb->s_blocksize >> 9);
+
+ bio = bio_alloc(sb->s_bdev, 1, REQ_OP_WRITE, GFP_KERNEL);
+
+ bio->bi_iter.bi_sector = sector;
+ bio->bi_end_io = ext2_buf_write_end_io;
+ bio->bi_private = buf;
+
+ __bio_add_page(bio, buf->b_page, buf->b_size, 0);
+
+ mutex_lock(&buf->b_lock);
+ submit_bio(bio);
+}
+
+static int ext2_sync_buffer_cache_wait(struct list_head *submit_list)
+{
+ struct ext2_buffer *buf, *n;
+ int error = 0, error2;
+
+ list_for_each_entry_safe(buf, n, submit_list, b_list) {
+ wait_for_completion(&buf->b_complete);
+ refcount_dec(&buf->b_refcount);
+ error2 = buf->b_error;
+ if (!error)
+ error = error2;
+ }
+
+ return error;
+}
+
+int ext2_sync_buffer_wait(struct super_block *sb, struct ext2_buffer *buf)
+{
+ if (test_and_clear_bit(EXT2_BUF_DIRTY_BIT, &buf->b_flags)) {
+ ext2_submit_buffer_write(sb, buf);
+ wait_for_completion(&buf->b_complete);
+ return buf->b_error;
+ }
+
+ return 0;
+}
+
+int ext2_sync_buffer_cache(struct super_block *sb)
+{
+ struct ext2_sb_info *sbi = EXT2_SB(sb);
+ struct ext2_buffer_cache *bc = &sbi->s_buffer_cache;
+ struct rhashtable *buffer_cache = &bc->bc_hash;
+ struct rhashtable_iter iter;
+ struct ext2_buffer *buf, *n;
+ struct blk_plug plug;
+ LIST_HEAD(submit_list);
+
+ rhashtable_walk_enter(buffer_cache, &iter);
+ rhashtable_walk_start(&iter);
+ while ((buf = rhashtable_walk_next(&iter)) != NULL) {
+ if (IS_ERR(buf))
+ continue;
+ if (test_and_clear_bit(EXT2_BUF_DIRTY_BIT, &buf->b_flags)) {
+ refcount_inc(&buf->b_refcount);
+ list_add(&buf->b_list, &submit_list);
+ }
+ }
+ rhashtable_walk_stop(&iter);
+ rhashtable_walk_exit(&iter);
+
+ blk_start_plug(&plug);
+ list_for_each_entry_safe(buf, n, &submit_list, b_list) {
+ ext2_submit_buffer_write(sb, buf);
+ }
+ blk_finish_plug(&plug);
+
+ return ext2_sync_buffer_cache_wait(&submit_list);
+}
+
+static struct ext2_buffer *ext2_lookup_buffer_cache(struct super_block *sb, sector_t block)
+{
+ struct ext2_sb_info *sbi = EXT2_SB(sb);
+ struct ext2_buffer_cache *bc = &sbi->s_buffer_cache;
+ struct rhashtable *buffer_cache = &bc->bc_hash;
+ struct ext2_buffer *found = NULL;
+
+ rcu_read_lock();
+ found = rhashtable_lookup(buffer_cache, &block, buffer_cache_params);
+ if (found && !refcount_inc_not_zero(&found->b_refcount))
+ found = NULL;
+ rcu_read_unlock();
+
+ return found;
+}
+
+static struct ext2_buffer *ext2_init_buffer(struct super_block *sb, sector_t block, bool need_uptodate)
+{
+ struct ext2_buffer *buf;
+ gfp_t gfp = GFP_KERNEL;
+
+ buf = kmalloc(sizeof(struct ext2_buffer), GFP_KERNEL);
+ if (!buf)
+ return NULL;
+
+ buf->b_block = block;
+ buf->b_size = sb->s_blocksize;
+ buf->b_flags = 0;
+ buf->b_error = 0;
+
+ mutex_init(&buf->b_lock);
+ refcount_set(&buf->b_refcount, 1);
+ init_completion(&buf->b_complete);
+
+ if (!need_uptodate)
+ gfp |= __GFP_ZERO;
+
+ buf->b_page = alloc_page(gfp);
+ if (!buf->b_page) {
+ kfree_rcu(buf, b_rcu);
+ return NULL;
+ }
+
+ buf->b_data = page_address(buf->b_page);
+
+ return buf;
+}
+
+static void ext2_destroy_buffer(void *ptr, void *arg)
+{
+ struct ext2_buffer *buf = ptr;
+
+ WARN_ON(test_bit(EXT2_BUF_DIRTY_BIT, &buf->b_flags));
+ __free_page(buf->b_page);
+ kfree(buf);
+}
+
+void ext2_put_buffer(struct super_block *sb, struct ext2_buffer *buf)
+{
+ if (!buf)
+ return;
+
+ WARN_ON(refcount_read(&buf->b_refcount) < 1);
+ refcount_dec(&buf->b_refcount);
+}
+
+
+static struct ext2_buffer *ext2_find_get_buffer(struct super_block *sb, sector_t block, bool need_uptodate)
+{
+ int err;
+ struct ext2_buffer *buf;
+ struct ext2_buffer *new_buf;
+
+ buf = ext2_lookup_buffer_cache(sb, block);
+
+ if (!buf) {
+ new_buf = ext2_init_buffer(sb, block, need_uptodate);
+ if (!new_buf)
+ return ERR_PTR(-ENOMEM);
+
+ buf = ext2_insert_buffer_cache(sb, new_buf);
+ if (IS_ERR(buf) || buf != new_buf)
+ ext2_destroy_buffer(new_buf, NULL);
+ }
+
+ if (need_uptodate && !ext2_buffer_uptodate(buf)) {
+ err = ext2_submit_buffer_read(sb, buf);
+ if (err)
+ return ERR_PTR(err);
+ }
+
+ return buf;
+}
+
+struct ext2_buffer *ext2_get_buffer(struct super_block *sb, sector_t bno)
+{
+ return ext2_find_get_buffer(sb, bno, false);
+}
+
+struct ext2_buffer *ext2_read_buffer(struct super_block *sb, sector_t bno)
+{
+ return ext2_find_get_buffer(sb, bno, true);
+}
+
+int ext2_init_buffer_cache(struct ext2_buffer_cache *bc)
+{
+ return rhashtable_init(&bc->bc_hash, &buffer_cache_params);
+}
+
+void ext2_destroy_buffer_cache(struct ext2_buffer_cache *bc)
+{
+ rhashtable_free_and_destroy(&bc->bc_hash, ext2_destroy_buffer, NULL);
+}
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index f38bdd46e4f7..bfed70fd6430 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -18,6 +18,7 @@
#include <linux/rbtree.h>
#include <linux/mm.h>
#include <linux/highmem.h>
+#include <linux/rhashtable.h>
/* XXX Here for now... not interested in restructing headers JUST now */
@@ -61,6 +62,29 @@ struct ext2_block_alloc_info {
ext2_fsblk_t last_alloc_physical_block;
};
+struct ext2_buffer {
+ sector_t b_block;
+ struct rhash_head b_rhash;
+ struct rcu_head b_rcu;
+ struct page *b_page;
+ size_t b_size;
+ char *b_data;
+ unsigned long b_flags;
+ refcount_t b_refcount;
+ struct mutex b_lock;
+ struct completion b_complete;
+ struct list_head b_list;
+ int b_error;
+};
+
+/* ext2_buffer flags */
+#define EXT2_BUF_DIRTY_BIT 0
+#define EXT2_BUF_UPTODATE_BIT 1
+
+struct ext2_buffer_cache {
+ struct rhashtable bc_hash;
+};
+
#define rsv_start rsv_window._rsv_start
#define rsv_end rsv_window._rsv_end
@@ -79,7 +103,7 @@ struct ext2_sb_info {
unsigned long s_groups_count; /* Number of groups in the fs */
unsigned long s_overhead_last; /* Last calculated overhead */
unsigned long s_blocks_last; /* Last seen block count */
- struct buffer_head * s_sbh; /* Buffer containing the super block */
+ struct ext2_buffer * s_sbuf; /* Buffer containing the super block */
struct ext2_super_block * s_es; /* Pointer to the super block in the buffer */
struct buffer_head ** s_group_desc;
unsigned long s_mount_opt;
@@ -116,6 +140,7 @@ struct ext2_sb_info {
struct mb_cache *s_ea_block_cache;
struct dax_device *s_daxdev;
u64 s_dax_part_off;
+ struct ext2_buffer_cache s_buffer_cache;
};
static inline spinlock_t *
@@ -716,6 +741,22 @@ extern int ext2_should_retry_alloc(struct super_block *sb, int *retries);
extern void ext2_init_block_alloc_info(struct inode *);
extern void ext2_rsv_window_add(struct super_block *sb, struct ext2_reserve_window_node *rsv);
+/* cache.c */
+extern void ext2_buffer_lock(struct ext2_buffer *);
+extern void ext2_buffer_unlock(struct ext2_buffer *);
+extern int ext2_init_buffer_cache(struct ext2_buffer_cache *);
+extern void ext2_destroy_buffer_cache(struct ext2_buffer_cache *);
+extern int ext2_sync_buffer_wait(struct super_block *, struct ext2_buffer *);
+extern int ext2_sync_buffer_cache(struct super_block *);
+extern struct ext2_buffer *ext2_get_buffer(struct super_block *, sector_t);
+extern struct ext2_buffer *ext2_read_buffer(struct super_block *, sector_t);
+extern void ext2_put_buffer(struct super_block *, struct ext2_buffer *);
+extern void ext2_buffer_set_dirty(struct ext2_buffer *);
+extern void ext2_buffer_set_uptodate(struct ext2_buffer *);
+extern void ext2_buffer_clear_uptodate(struct ext2_buffer *);
+extern int ext2_buffer_error(struct ext2_buffer *);
+extern void ext2_buffer_clear_error(struct ext2_buffer *);
+
/* dir.c */
int ext2_add_link(struct dentry *, struct inode *);
int ext2_inode_by_name(struct inode *dir,
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 37f7ce56adce..ac53f587d140 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -168,7 +168,8 @@ static void ext2_put_super (struct super_block * sb)
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
- brelse (sbi->s_sbh);
+ ext2_put_buffer (sb, sbi->s_sbuf);
+ ext2_destroy_buffer_cache(&sbi->s_buffer_cache);
sb->s_fs_info = NULL;
kfree(sbi->s_blockgroup_lock);
fs_put_dax(sbi->s_daxdev, NULL);
@@ -803,7 +804,7 @@ static unsigned long descriptor_loc(struct super_block *sb,
static int ext2_fill_super(struct super_block *sb, void *data, int silent)
{
- struct buffer_head * bh;
+ struct ext2_buffer * buf;
struct ext2_sb_info * sbi;
struct ext2_super_block * es;
struct inode *root;
@@ -835,6 +836,12 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off,
NULL, NULL);
+ ret = ext2_init_buffer_cache(&sbi->s_buffer_cache);
+ if (ret) {
+ ext2_msg(sb, KERN_ERR, "error: unable to create buffer cache");
+ goto failed_sbi;
+ }
+
spin_lock_init(&sbi->s_lock);
ret = -EINVAL;
@@ -862,7 +869,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
logic_sb_block = sb_block;
}
- if (!(bh = sb_bread(sb, logic_sb_block))) {
+ if (IS_ERR(buf = ext2_read_buffer(sb, logic_sb_block))) {
ext2_msg(sb, KERN_ERR, "error: unable to read superblock");
goto failed_sbi;
}
@@ -870,7 +877,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
* Note: s_es must be initialized as soon as possible because
* some ext2 macro-instructions depend on its value
*/
- es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
+ es = (struct ext2_super_block *) (((char *)buf->b_data) + offset);
sbi->s_es = es;
sb->s_magic = le16_to_cpu(es->s_magic);
@@ -966,7 +973,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
/* If the blocksize doesn't match, re-read the thing.. */
if (sb->s_blocksize != blocksize) {
- brelse(bh);
+ ext2_buffer_clear_uptodate(buf);
+ ext2_put_buffer(sb, buf);
if (!sb_set_blocksize(sb, blocksize)) {
ext2_msg(sb, KERN_ERR,
@@ -976,13 +984,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
logic_sb_block = (sb_block*BLOCK_SIZE) / blocksize;
offset = (sb_block*BLOCK_SIZE) % blocksize;
- bh = sb_bread(sb, logic_sb_block);
- if(!bh) {
+ buf = ext2_read_buffer(sb, logic_sb_block);
+ if(IS_ERR(buf)) {
ext2_msg(sb, KERN_ERR, "error: couldn't read"
"superblock on 2nd try");
goto failed_sbi;
}
- es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
+ es = (struct ext2_super_block *) (((char *)buf->b_data) + offset);
sbi->s_es = es;
if (es->s_magic != cpu_to_le16(EXT2_SUPER_MAGIC)) {
ext2_msg(sb, KERN_ERR, "error: magic mismatch");
@@ -1021,7 +1029,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_inodes_per_block;
sbi->s_desc_per_block = sb->s_blocksize /
sizeof (struct ext2_group_desc);
- sbi->s_sbh = bh;
+ sbi->s_sbuf = buf;
sbi->s_mount_state = le16_to_cpu(es->s_state);
sbi->s_addr_per_block_bits =
ilog2 (EXT2_ADDR_PER_BLOCK(sb));
@@ -1031,7 +1039,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
if (sb->s_magic != EXT2_SUPER_MAGIC)
goto cantfind_ext2;
- if (sb->s_blocksize != bh->b_size) {
+ if (sb->s_blocksize != buf->b_size) {
if (!silent)
ext2_msg(sb, KERN_ERR, "error: unsupported blocksize");
goto failed_mount;
@@ -1213,7 +1221,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
kvfree(sbi->s_group_desc);
kfree(sbi->s_debts);
failed_mount:
- brelse(bh);
+ ext2_put_buffer(sb, buf);
failed_sbi:
fs_put_dax(sbi->s_daxdev, NULL);
sb->s_fs_info = NULL;
@@ -1224,9 +1232,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
static void ext2_clear_super_error(struct super_block *sb)
{
- struct buffer_head *sbh = EXT2_SB(sb)->s_sbh;
+ struct ext2_buffer *sbuf = EXT2_SB(sb)->s_sbuf;
- if (buffer_write_io_error(sbh)) {
+ if (ext2_buffer_error(sbuf)) {
/*
* Oh, dear. A previous attempt to write the
* superblock failed. This could happen because the
@@ -1237,8 +1245,8 @@ static void ext2_clear_super_error(struct super_block *sb)
*/
ext2_msg(sb, KERN_ERR,
"previous I/O error to superblock detected");
- clear_buffer_write_io_error(sbh);
- set_buffer_uptodate(sbh);
+ ext2_buffer_clear_error(sbuf);
+ ext2_buffer_set_uptodate(sbuf);
}
}
@@ -1252,9 +1260,9 @@ void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es,
es->s_wtime = cpu_to_le32(ktime_get_real_seconds());
/* unlock before we do IO */
spin_unlock(&EXT2_SB(sb)->s_lock);
- mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
+ ext2_buffer_set_dirty(EXT2_SB(sb)->s_sbuf);
if (wait)
- sync_dirty_buffer(EXT2_SB(sb)->s_sbh);
+ ext2_sync_buffer_wait(sb, EXT2_SB(sb)->s_sbuf);
}
/*
@@ -1271,13 +1279,19 @@ static int ext2_sync_fs(struct super_block *sb, int wait)
{
struct ext2_sb_info *sbi = EXT2_SB(sb);
struct ext2_super_block *es = EXT2_SB(sb)->s_es;
+ int err = 0;
/*
* Write quota structures to quota file, sync_blockdev() will write
* them to disk later
*/
- dquot_writeback_dquots(sb, -1);
+ err = dquot_writeback_dquots(sb, -1);
+ if (err)
+ goto out;
+
+ err = ext2_sync_buffer_cache(sb);
+out:
spin_lock(&sbi->s_lock);
if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
ext2_debug("setting valid to 0\n");
@@ -1285,7 +1299,7 @@ static int ext2_sync_fs(struct super_block *sb, int wait)
}
spin_unlock(&sbi->s_lock);
ext2_sync_super(sb, es, wait);
- return 0;
+ return err;
}
static int ext2_freeze(struct super_block *sb)
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index c885dcc3bd0d..1eb4a8607f67 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -387,7 +387,7 @@ static void ext2_xattr_update_super_block(struct super_block *sb)
ext2_update_dynamic_rev(sb);
EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR);
spin_unlock(&EXT2_SB(sb)->s_lock);
- mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
+ ext2_buffer_set_dirty(EXT2_SB(sb)->s_sbuf);
}
/*
--
2.43.0
Powered by blists - more mailing lists