[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20251211135206.14006-1-almaz.alexandrovich@paragon-software.com>
Date: Thu, 11 Dec 2025 14:52:06 +0100
From: Konstantin Komarov <almaz.alexandrovich@...agon-software.com>
To: <ntfs3@...ts.linux.dev>
CC: <linux-kernel@...r.kernel.org>, <linux-fsdevel@...r.kernel.org>,
Konstantin Komarov <almaz.alexandrovich@...agon-software.com>
Subject: [PATCH] fs/ntfs3: allow readdir() to finish after directory mutations without rewinddir()
This patch introduces a per-directory version counter that increments on
each directory modification (indx_insert_entry() / indx_delete_entry()).
ntfs_readdir() uses this version to detect whether the directory has
changed since enumeration began. If readdir() reaches end-of-directory
but the version has changed, the walk restarts from the beginning of the
index tree instead of returning prematurely. This provides rmdir-like
behavior for tools that remove entries as they enumerate them.
Prior to this change, bonnie++ directory operations could fail due to
premature termination of readdir() during concurrent index updates.
With this patch applied, bonnie++ completes successfully with no errors.
Signed-off-by: Konstantin Komarov <almaz.alexandrovich@...agon-software.com>
---
fs/ntfs3/dir.c | 102 ++++++++++++++++++++++++++++++++-------------
fs/ntfs3/index.c | 2 +
fs/ntfs3/ntfs_fs.h | 1 +
3 files changed, 76 insertions(+), 29 deletions(-)
diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c
index 1dbb661ffe0f..24cb64d5521a 100644
--- a/fs/ntfs3/dir.c
+++ b/fs/ntfs3/dir.c
@@ -392,33 +392,77 @@ static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
* ntfs_readdir - file_operations::iterate_shared
*
* Use non sorted enumeration.
- * We have an example of broken volume where sorted enumeration
- * counts each name twice.
+ * Sorted enumeration may result infinite loop if names tree contains loop.
*/
static int ntfs_readdir(struct file *file, struct dir_context *ctx)
{
const struct INDEX_ROOT *root;
- u64 vbo;
size_t bit;
- loff_t eod;
int err = 0;
struct inode *dir = file_inode(file);
struct ntfs_inode *ni = ntfs_i(dir);
struct super_block *sb = dir->i_sb;
struct ntfs_sb_info *sbi = sb->s_fs_info;
loff_t i_size = i_size_read(dir);
- u32 pos = ctx->pos;
+ u64 pos = ctx->pos;
u8 *name = NULL;
struct indx_node *node = NULL;
u8 index_bits = ni->dir.index_bits;
+ size_t max_bit = i_size >> ni->dir.index_bits;
+ loff_t eod = i_size + sbi->record_size;
/* Name is a buffer of PATH_MAX length. */
static_assert(NTFS_NAME_LEN * 4 < PATH_MAX);
- eod = i_size + sbi->record_size;
+ if (!pos) {
+ /*
+ * ni->dir.version increments each directory change.
+ * Save the initial value of ni->dir.version.
+ */
+ file->private_data = (void *)ni->dir.version;
+ }
- if (pos >= eod)
- return 0;
+ if (pos >= eod) {
+ if (file->private_data == (void *)ni->dir.version) {
+ /* No changes since first readdir. */
+ return 0;
+ }
+
+ /*
+ * Handle directories that changed after the initial readdir().
+ *
+ * Some user space code implements recursive removal like this instead
+ * of calling rmdir(2) directly:
+ *
+ * fd = opendir(path);
+ * while ((dent = readdir(fd)))
+ * unlinkat(dirfd(fd), dent->d_name, 0);
+ * closedir(fd);
+ *
+ * POSIX leaves unspecified what readdir() should return once the
+ * directory has been modified after opendir()/rewinddir(), so this
+ * pattern is not guaranteed to work on all filesystems or platforms.
+ *
+ * In ntfs3 the internal name tree may be reshaped while entries are
+ * being removed, so there is no stable anchor for continuing a
+ * single-pass walk based on the original readdir() order.
+ *
+ * In practice some widely used tools (for example certain rm(1)
+ * implementations) have used this readdir()/unlink() loop, and some
+ * filesystems behave in a way that effectively makes it work in the
+ * common case.
+ *
+ * The code below follows that practice and tries to provide
+ * "rmdir-like" behaviour for such callers on ntfs3, even though the
+ * situation is not strictly defined by the APIs.
+ *
+ * Apple documents the same readdir()/unlink() issue and a workaround
+ * for HFS file systems in:
+ * https://web.archive.org/web/20220122122948/https:/support.apple.com/kb/TA21420?locale=en_US
+ */
+ ctx->pos = pos = 3;
+ file->private_data = (void *)ni->dir.version;
+ }
if (!dir_emit_dots(file, ctx))
return 0;
@@ -454,35 +498,31 @@ static int ntfs_readdir(struct file *file, struct dir_context *ctx)
if (pos >= sbi->record_size) {
bit = (pos - sbi->record_size) >> index_bits;
} else {
+ /*
+ * Add each name from root in 'ctx'.
+ */
err = ntfs_read_hdr(sbi, ni, &root->ihdr, 0, pos, name, ctx);
if (err)
goto out;
bit = 0;
}
- if (!i_size) {
- ctx->pos = eod;
- goto out;
- }
-
- for (;;) {
- vbo = (u64)bit << index_bits;
- if (vbo >= i_size) {
- ctx->pos = eod;
- goto out;
- }
-
+ /*
+ * Enumerate indexes until the end of dir.
+ */
+ for (; bit < max_bit; bit += 1) {
+ /* Get the next used index. */
err = indx_used_bit(&ni->dir, ni, &bit);
if (err)
goto out;
if (bit == MINUS_ONE_T) {
- ctx->pos = eod;
- goto out;
+ /* no more used indexes. end of dir. */
+ break;
}
- vbo = (u64)bit << index_bits;
- if (vbo >= i_size) {
+ if (bit >= max_bit) {
+ /* Corrupted directory. */
err = -EINVAL;
goto out;
}
@@ -492,20 +532,24 @@ static int ntfs_readdir(struct file *file, struct dir_context *ctx)
if (err)
goto out;
+ /*
+ * Add each name from index in 'ctx'.
+ */
err = ntfs_read_hdr(sbi, ni, &node->index->ihdr,
- vbo + sbi->record_size, pos, name, ctx);
+ ((u64)bit << index_bits) + sbi->record_size,
+ pos, name, ctx);
if (err)
goto out;
-
- bit += 1;
}
out:
-
__putname(name);
put_indx_node(node);
- if (err == 1) {
+ if (!err) {
+ /* End of directory. */
+ ctx->pos = eod;
+ } else if (err == 1) {
/* 'ctx' is full. */
err = 0;
} else if (err == -ENOENT) {
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index f0cfa000ffbb..d08bee3c20fa 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -2002,6 +2002,7 @@ int indx_insert_entry(struct ntfs_index *indx, struct ntfs_inode *ni,
fnd->level - 1, fnd);
}
+ indx->version += 1;
out:
fnd_put(fnd_a);
out1:
@@ -2649,6 +2650,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni,
mi->dirty = true;
}
+ indx->version += 1;
out:
fnd_put(fnd2);
out1:
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index 18b14f7db4ad..cee7b73b9670 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -191,6 +191,7 @@ struct ntfs_index {
struct runs_tree alloc_run;
/* read/write access to 'bitmap_run'/'alloc_run' while ntfs_readdir */
struct rw_semaphore run_lock;
+ size_t version; /* increment each change */
/*TODO: Remove 'cmp'. */
NTFS_CMP_FUNC cmp;
--
2.43.0
Powered by blists - more mailing lists