[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <d84de70a91656d57b27831043b6ab064c89db828.camel@ibm.com>
Date: Tue, 2 Dec 2025 20:33:28 +0000
From: Viacheslav Dubeyko <Slava.Dubeyko@....com>
To: "linux-fsdevel@...r.kernel.org" <linux-fsdevel@...r.kernel.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"jkoolstra@...all.nl" <jkoolstra@...all.nl>
CC: "glaubitz@...sik.fu-berlin.de" <glaubitz@...sik.fu-berlin.de>,
"frank.li@...o.com" <frank.li@...o.com>,
"slava@...eyko.com"
<slava@...eyko.com>,
"skhan@...uxfoundation.org" <skhan@...uxfoundation.org>,
"syzbot+17cc9bb6d8d69b4139f0@...kaller.appspotmail.com"
<syzbot+17cc9bb6d8d69b4139f0@...kaller.appspotmail.com>
Subject: RE: [PATCH v2] hfs: replace BUG_ONs with error handling
On Tue, 2025-12-02 at 17:45 +0100, Jori Koolstra wrote:
> Hi Viacheslav,
>
> I am not completely sure if I understood all your suggestions, so let me reply
> with code instead (before I submit a v3).
>
> diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
> index 86a6b317b474..9835427b728e 100644
> --- a/fs/hfs/dir.c
> +++ b/fs/hfs/dir.c
> @@ -196,8 +196,8 @@ static int hfs_create(struct mnt_idmap *idmap, struct inode *dir,
> int res;
>
> inode = hfs_new_inode(dir, &dentry->d_name, mode);
> - if (!inode)
> - return -ENOMEM;
> + if (IS_ERR(inode))
> + return PTR_ERR(inode);
>
> res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode);
> if (res) {
> @@ -226,8 +226,8 @@ static struct dentry *hfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
> int res;
>
> inode = hfs_new_inode(dir, &dentry->d_name, S_IFDIR | mode);
> - if (!inode)
> - return ERR_PTR(-ENOMEM);
> + if (IS_ERR(inode))
> + return ERR_CAST(inode);
>
> res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode);
> if (res) {
> @@ -254,11 +254,24 @@ static struct dentry *hfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
> */
> static int hfs_remove(struct inode *dir, struct dentry *dentry)
> {
> + struct hfs_sb_info *sbi = HFS_SB(dir->i_sb);
> struct inode *inode = d_inode(dentry);
> int res;
>
> if (S_ISDIR(inode->i_mode) && inode->i_size != 2)
> return -ENOTEMPTY;
> +
> + if (unlikely(S_ISDIR(inode->i_mode)
I think it doesn't make sense to check if it's file or folder. Because, if any
of these values are corrupted somehow, then driver is not in good state already.
So we can simple check folder_count and file_count. I believe that probability
of such event is really low.
> + && atomic64_read(&sbi->folder_count) > U32_MAX)) {
> + pr_err("cannot remove directory: folder count exceeds limit\n");
We can simply make generalized message here: "cannot remove file/folder: count
exceeds limit".
> + return -EINVAL;
It's not about input values, from my point of view. We have
folder_count/file_count out of range. I think -ERANGE more good fit here.
> + }
> + if (unlikely(!S_ISDIR(inode->i_mode)
> + && atomic64_read(&sbi->file_count) > U32_MAX)) {
> + pr_err("cannot remove file: file count exceeds limit\n");
> + return -EINVAL;
> + }
> +
> res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
> if (res)
> return res;
> diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
> index 81ad93e6312f..0fec6fd1cde7 100644
> --- a/fs/hfs/inode.c
> +++ b/fs/hfs/inode.c
> @@ -186,16 +186,23 @@ struct inode *hfs_new_inode(struct inode *dir, const struct qstr *name, umode_t
> s64 next_id;
> s64 file_count;
> s64 folder_count;
> + int err = -ENOMEM;
>
> if (!inode)
> - return NULL;
> + goto out_err;
> +
> + err = -EINVAL;
We didn't provide next_id/folder_count/file_count as input values here. It
sounds like these values are out of range. Maybe, -ERANGE here?
>
> mutex_init(&HFS_I(inode)->extents_lock);
> INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
> spin_lock_init(&HFS_I(inode)->open_dir_lock);
> hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name);
> next_id = atomic64_inc_return(&HFS_SB(sb)->next_id);
> - BUG_ON(next_id > U32_MAX);
> + if (next_id > U32_MAX) {
> + atomic64_dec(&HFS_SB(sb)->next_id);
> + pr_err("cannot create new inode: next CNID exceeds limit\n");
> + goto out_discard;
> + }
> inode->i_ino = (u32)next_id;
> inode->i_mode = mode;
> inode->i_uid = current_fsuid();
> @@ -209,7 +216,11 @@ struct inode *hfs_new_inode(struct inode *dir, const struct qstr *name, umode_t
> if (S_ISDIR(mode)) {
> inode->i_size = 2;
> folder_count = atomic64_inc_return(&HFS_SB(sb)->folder_count);
> - BUG_ON(folder_count > U32_MAX);
> + if (folder_count > U32_MAX) {
> + atomic64_dec(&HFS_SB(sb)->folder_count);
> + pr_err("cannot create new inode: folder count exceeds limit\n");
> + goto out_discard;
> + }
> if (dir->i_ino == HFS_ROOT_CNID)
> HFS_SB(sb)->root_dirs++;
> inode->i_op = &hfs_dir_inode_operations;
> @@ -219,7 +230,11 @@ struct inode *hfs_new_inode(struct inode *dir, const struct qstr *name, umode_t
> } else if (S_ISREG(mode)) {
> HFS_I(inode)->clump_blocks = HFS_SB(sb)->clumpablks;
> file_count = atomic64_inc_return(&HFS_SB(sb)->file_count);
> - BUG_ON(file_count > U32_MAX);
> + if (file_count > U32_MAX) {
> + atomic64_dec(&HFS_SB(sb)->file_count);
> + pr_err("cannot create new inode: file count exceeds limit\n");
> + goto out_discard;
> + }
> if (dir->i_ino == HFS_ROOT_CNID)
> HFS_SB(sb)->root_files++;
> inode->i_op = &hfs_file_inode_operations;
> @@ -243,6 +258,11 @@ struct inode *hfs_new_inode(struct inode *dir, const struct qstr *name, umode_t
> hfs_mark_mdb_dirty(sb);
>
> return inode;
> +
> + out_discard:
> + iput(inode);
> + out_err:
> + return ERR_PTR(err);
> }
>
> void hfs_delete_inode(struct inode *inode)
> @@ -251,7 +271,6 @@ void hfs_delete_inode(struct inode *inode)
>
> hfs_dbg("ino %lu\n", inode->i_ino);
> if (S_ISDIR(inode->i_mode)) {
> - BUG_ON(atomic64_read(&HFS_SB(sb)->folder_count) > U32_MAX);
> atomic64_dec(&HFS_SB(sb)->folder_count);
> if (HFS_I(inode)->cat_key.ParID == cpu_to_be32(HFS_ROOT_CNID))
> HFS_SB(sb)->root_dirs--;
> @@ -260,7 +279,6 @@ void hfs_delete_inode(struct inode *inode)
> return;
> }
>
> - BUG_ON(atomic64_read(&HFS_SB(sb)->file_count) > U32_MAX);
> atomic64_dec(&HFS_SB(sb)->file_count);
> if (HFS_I(inode)->cat_key.ParID == cpu_to_be32(HFS_ROOT_CNID))
> HFS_SB(sb)->root_files--;
> diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
> index 53f3fae60217..907776773e25 100644
> --- a/fs/hfs/mdb.c
> +++ b/fs/hfs/mdb.c
> @@ -150,11 +150,27 @@ int hfs_mdb_get(struct super_block *sb)
>
> /* These parameters are read from and written to the MDB */
> HFS_SB(sb)->free_ablocks = be16_to_cpu(mdb->drFreeBks);
> +
> atomic64_set(&HFS_SB(sb)->next_id, be32_to_cpu(mdb->drNxtCNID));
> + if (atomic64_read(&HFS_SB(sb)->next_id) > U32_MAX) {
> + pr_err("next CNID exceeds limit — filesystem possibly corrupted. It is recommended to run fsck\n");
Maybe, it will be good to have two messages here instead of one:
pr_err("next CNID exceeds limit\n");
pr_err("filesystem possibly corrupted. It is recommended to run fsck\n");
Otherwise, we have really long string.
> + goto out_bh;
> + }
> +
> HFS_SB(sb)->root_files = be16_to_cpu(mdb->drNmFls);
> HFS_SB(sb)->root_dirs = be16_to_cpu(mdb->drNmRtDirs);
> +
> atomic64_set(&HFS_SB(sb)->file_count, be32_to_cpu(mdb->drFilCnt));
> + if (atomic64_read(&HFS_SB(sb)->file_count) > U32_MAX) {
> + pr_err("file count exceeds limit — filesystem possibly corrupted. It is recommended to run fsck\n");
Ditto.
> + goto out_bh;
> + }
> +
> atomic64_set(&HFS_SB(sb)->folder_count, be32_to_cpu(mdb->drDirCnt));
> + if (atomic64_read(&HFS_SB(sb)->folder_count) > U32_MAX) {
> + pr_err("folder count exceeds limit — filesystem possibly corrupted. It is recommended to run fsck\n");
Ditto.
> + goto out_bh;
> + }
I think we can use mount READ-ONLY in such case. You can consider such pattern
[1] instead of returning error and breaking the mount logic:
if (!(attrib & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) {
pr_warn("filesystem was not cleanly unmounted, running fsck.hfs
is recommended. mounting read-only.\n");
sb->s_flags |= SB_RDONLY;
}
>
> /* TRY to get the alternate (backup) MDB. */
> sect = part_start + part_size - 2;
> @@ -273,15 +289,12 @@ void hfs_mdb_commit(struct super_block *sb)
> /* These parameters may have been modified, so write them back */
> mdb->drLsMod = hfs_mtime();
> mdb->drFreeBks = cpu_to_be16(HFS_SB(sb)->free_ablocks);
> - BUG_ON(atomic64_read(&HFS_SB(sb)->next_id) > U32_MAX);
> mdb->drNxtCNID =
> cpu_to_be32((u32)atomic64_read(&HFS_SB(sb)->next_id));
> mdb->drNmFls = cpu_to_be16(HFS_SB(sb)->root_files);
> mdb->drNmRtDirs = cpu_to_be16(HFS_SB(sb)->root_dirs);
> - BUG_ON(atomic64_read(&HFS_SB(sb)->file_count) > U32_MAX);
> mdb->drFilCnt =
> cpu_to_be32((u32)atomic64_read(&HFS_SB(sb)->file_count));
> - BUG_ON(atomic64_read(&HFS_SB(sb)->folder_count) > U32_MAX);
> mdb->drDirCnt =
> cpu_to_be32((u32)atomic64_read(&HFS_SB(sb)->folder_count));
>
> diff --git a/fs/hfs/super.c b/fs/hfs/super.c
> index 47f50fa555a4..23c583ffe575 100644
> --- a/fs/hfs/super.c
> +++ b/fs/hfs/super.c
> @@ -32,8 +32,29 @@ static struct kmem_cache *hfs_inode_cachep;
> MODULE_DESCRIPTION("Apple Macintosh file system support");
> MODULE_LICENSE("GPL");
>
> +static bool hfs_mdb_verify(struct super_block *sb) {
> + struct hfs_sb_info *sbi = HFS_SB(sb);
> +
> + /* failure of one of the following checks indicates programmer error */
> + if (atomic64_read(&sbi->next_id) > U32_MAX)
> + pr_err("mdb invalid: next CNID exceeds limit\n");
> + else if (atomic64_read(&sbi->file_count) > U32_MAX)
> + pr_err("mdb invalid: file count exceeds limit\n");
> + else if (atomic64_read(&sbi->folder_count) > U32_MAX)
> + pr_err("mdb invalid: folder count exceeds limit\n");
> + else
> + return true;
> +
> + return false;
> +}
> +
> static int hfs_sync_fs(struct super_block *sb, int wait)
> {
> + if (!hfs_mdb_verify(sb)) {
> + pr_err("cannot sync fs because hfs_mdb_verify() failed\n");
> + return -EINVAL;
OK. I think that don't execute commit is not good strategy here, anyway. First
of all, we are protecting from exceeding next_id/folder_count/file_count above
U32_MAX. So, it's really low probability event. Potentially, we still could have
such corruption during file system driver operations. However, even if
next_id/folder_count/file_count values will be inconsistent, then FSCK can
easily recover file system.
So, I suggest of executing the check and inform about potential corruption with
recommendation of running FSCK. But we need to execute commit anyway, because of
low probability of such event and easy recovering by FSCK from such corruption.
So, don't return error here but continue the logic. To break commit is more
harmful here than to execute it, from my point of view.
> + }
> +
> hfs_mdb_commit(sb);
> return 0;
> }
> @@ -65,6 +86,11 @@ static void flush_mdb(struct work_struct *work)
> sbi->work_queued = 0;
> spin_unlock(&sbi->work_lock);
>
> + if (!hfs_mdb_verify(sb)) {
> + pr_err("flushing mdb failed because hfs_mdb_verify() failed\n");
> + return;
Ditto.
Thanks,
Slava.
> + }
> +
> hfs_mdb_commit(sb);
> }
>
> There is a choice to be made with the delayed work that flushes the mdb when
> it is marked dirty, because of course if the counts or CNID exceeds limit
> it is unlikely to change later on. So, flush_mdb will just keep failing. I
> don't see any other solution then marking it RO if this happens. Curious
> what you think.
>
> >
> > I had impression that HFS already has it. But even if it is not so, then it
> > sounds like another task. Let's don't mix the different problems into one
> > solution. Otherwise, we will have a huge patch.
> >
>
> Agreed. Also, I just checked and hfs does not have this behavior (yet).
>
> Thanks,
> Jori.
[1] https://elixir.bootlin.com/linux/v6.18/source/fs/hfs/mdb.c#L211
Powered by blists - more mailing lists