[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <o7udqn5qfsay5kt4dlewtms23je65prmvqqlc6szbxtpke2v3f@bj2fnzdplnzn>
Date: Wed, 12 Nov 2025 16:56:53 +0100
From: Jan Kara <jack@...e.cz>
To: libaokun@...weicloud.com
Cc: linux-ext4@...r.kernel.org, tytso@....edu, adilger.kernel@...ger.ca,
jack@...e.cz, linux-kernel@...r.kernel.org, kernel@...kajraghav.com,
mcgrof@...nel.org, ebiggers@...nel.org, willy@...radead.org, yi.zhang@...wei.com,
yangerkun@...wei.com, chengzhihao1@...wei.com, libaokun1@...wei.com
Subject: Re: [PATCH v3 21/24] ext4: make data=journal support large block size
On Tue 11-11-25 22:26:31, libaokun@...weicloud.com wrote:
> From: Baokun Li <libaokun1@...wei.com>
>
> Currently, ext4_set_inode_mapping_order() does not set max folio order
> for files with the data journalling flag. For files that already have
> large folios enabled, ext4_inode_journal_mode() ignores the data
> journalling flag once max folio order is set.
>
> This is not because data journalling cannot work with large folios, but
> because credit estimates will go through the roof if there are too many
> blocks per folio.
>
> Since the real constraint is blocks-per-folio, to support data=journal
> under LBS, we now set max folio order to be equal to min folio order for
> files with the journalling flag. When LBS is disabled, the max folio order
> remains unset as before.
>
> Therefore, before ext4_change_inode_journal_flag() switches the journalling
> mode, we call truncate_pagecache() to drop all page cache for that inode,
> and filemap_write_and_wait() is called unconditionally.
>
> After that, once the journalling mode has been switched, we can safely
> reset the inode mapping order, and the mapping_large_folio_support() check
> in ext4_inode_journal_mode() can be removed.
>
> Suggested-by: Jan Kara <jack@...e.cz>
> Signed-off-by: Baokun Li <libaokun1@...wei.com>
Looks good. Feel free to add:
Reviewed-by: Jan Kara <jack@...e.cz>
Honza
> ---
> fs/ext4/ext4_jbd2.c | 3 +--
> fs/ext4/inode.c | 32 ++++++++++++++++++--------------
> 2 files changed, 19 insertions(+), 16 deletions(-)
>
> diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
> index a0e66bc10093..05e5946ed9b3 100644
> --- a/fs/ext4/ext4_jbd2.c
> +++ b/fs/ext4/ext4_jbd2.c
> @@ -16,8 +16,7 @@ int ext4_inode_journal_mode(struct inode *inode)
> ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE) ||
> test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
> (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
> - !test_opt(inode->i_sb, DELALLOC) &&
> - !mapping_large_folio_support(inode->i_mapping))) {
> + !test_opt(inode->i_sb, DELALLOC))) {
> /* We do not support data journalling for encrypted data */
> if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode))
> return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 22d215f90c64..613a989bf750 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -5152,9 +5152,6 @@ static bool ext4_should_enable_large_folio(struct inode *inode)
>
> if (!S_ISREG(inode->i_mode))
> return false;
> - if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
> - ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
> - return false;
> if (ext4_has_feature_verity(sb))
> return false;
> if (ext4_has_feature_encrypt(sb))
> @@ -5172,12 +5169,20 @@ static bool ext4_should_enable_large_folio(struct inode *inode)
> umin(MAX_PAGECACHE_ORDER, (11 + (i)->i_blkbits - PAGE_SHIFT))
> void ext4_set_inode_mapping_order(struct inode *inode)
> {
> + u32 max_order;
> +
> if (!ext4_should_enable_large_folio(inode))
> return;
>
> + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
> + ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
> + max_order = EXT4_SB(inode->i_sb)->s_min_folio_order;
> + else
> + max_order = EXT4_MAX_PAGECACHE_ORDER(inode);
> +
> mapping_set_folio_order_range(inode->i_mapping,
> EXT4_SB(inode->i_sb)->s_min_folio_order,
> - EXT4_MAX_PAGECACHE_ORDER(inode));
> + max_order);
> }
>
> struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
> @@ -6553,14 +6558,14 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
> * dirty data which can be converted only after flushing the dirty
> * data (and journalled aops don't know how to handle these cases).
> */
> - if (val) {
> - filemap_invalidate_lock(inode->i_mapping);
> - err = filemap_write_and_wait(inode->i_mapping);
> - if (err < 0) {
> - filemap_invalidate_unlock(inode->i_mapping);
> - return err;
> - }
> + filemap_invalidate_lock(inode->i_mapping);
> + err = filemap_write_and_wait(inode->i_mapping);
> + if (err < 0) {
> + filemap_invalidate_unlock(inode->i_mapping);
> + return err;
> }
> + /* Before switch the inode journalling mode evict all the page cache. */
> + truncate_pagecache(inode, 0);
>
> alloc_ctx = ext4_writepages_down_write(inode->i_sb);
> jbd2_journal_lock_updates(journal);
> @@ -6585,12 +6590,11 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
> ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
> }
> ext4_set_aops(inode);
> + ext4_set_inode_mapping_order(inode);
>
> jbd2_journal_unlock_updates(journal);
> ext4_writepages_up_write(inode->i_sb, alloc_ctx);
> -
> - if (val)
> - filemap_invalidate_unlock(inode->i_mapping);
> + filemap_invalidate_unlock(inode->i_mapping);
>
> /* Finally we can mark the inode as dirty. */
>
> --
> 2.46.1
>
--
Jan Kara <jack@...e.com>
SUSE Labs, CR
Powered by blists - more mailing lists