lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20251211135206.14006-1-almaz.alexandrovich@paragon-software.com>
Date: Thu, 11 Dec 2025 14:52:06 +0100
From: Konstantin Komarov <almaz.alexandrovich@...agon-software.com>
To: <ntfs3@...ts.linux.dev>
CC: <linux-kernel@...r.kernel.org>, <linux-fsdevel@...r.kernel.org>,
	Konstantin Komarov <almaz.alexandrovich@...agon-software.com>
Subject: [PATCH] fs/ntfs3: allow readdir() to finish after directory mutations without rewinddir()

This patch introduces a per-directory version counter that increments on
each directory modification (indx_insert_entry() / indx_delete_entry()).
ntfs_readdir() uses this version to detect whether the directory has
changed since enumeration began. If readdir() reaches end-of-directory
but the version has changed, the walk restarts from the beginning of the
index tree instead of returning prematurely. This provides rmdir-like
behavior for tools that remove entries as they enumerate them.

Prior to this change, bonnie++ directory operations could fail due to
premature termination of readdir() during concurrent index updates.
With this patch applied, bonnie++ completes successfully with no errors.

Signed-off-by: Konstantin Komarov <almaz.alexandrovich@...agon-software.com>
---
 fs/ntfs3/dir.c     | 102 ++++++++++++++++++++++++++++++++-------------
 fs/ntfs3/index.c   |   2 +
 fs/ntfs3/ntfs_fs.h |   1 +
 3 files changed, 76 insertions(+), 29 deletions(-)

diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c
index 1dbb661ffe0f..24cb64d5521a 100644
--- a/fs/ntfs3/dir.c
+++ b/fs/ntfs3/dir.c
@@ -392,33 +392,77 @@ static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
  * ntfs_readdir - file_operations::iterate_shared
  *
  * Use non sorted enumeration.
- * We have an example of broken volume where sorted enumeration
- * counts each name twice.
+ * Sorted enumeration may result infinite loop if names tree contains loop.
  */
 static int ntfs_readdir(struct file *file, struct dir_context *ctx)
 {
 	const struct INDEX_ROOT *root;
-	u64 vbo;
 	size_t bit;
-	loff_t eod;
 	int err = 0;
 	struct inode *dir = file_inode(file);
 	struct ntfs_inode *ni = ntfs_i(dir);
 	struct super_block *sb = dir->i_sb;
 	struct ntfs_sb_info *sbi = sb->s_fs_info;
 	loff_t i_size = i_size_read(dir);
-	u32 pos = ctx->pos;
+	u64 pos = ctx->pos;
 	u8 *name = NULL;
 	struct indx_node *node = NULL;
 	u8 index_bits = ni->dir.index_bits;
+	size_t max_bit = i_size >> ni->dir.index_bits;
+	loff_t eod = i_size + sbi->record_size;
 
 	/* Name is a buffer of PATH_MAX length. */
 	static_assert(NTFS_NAME_LEN * 4 < PATH_MAX);
 
-	eod = i_size + sbi->record_size;
+	if (!pos) {
+		/*
+		 * ni->dir.version increments each directory change.
+		 * Save the initial value of ni->dir.version.
+		 */
+		file->private_data = (void *)ni->dir.version;
+	}
 
-	if (pos >= eod)
-		return 0;
+	if (pos >= eod) {
+		if (file->private_data == (void *)ni->dir.version) {
+			/* No changes since first readdir. */
+			return 0;
+		}
+
+		/*
+		 * Handle directories that changed after the initial readdir().
+		 *
+		 * Some user space code implements recursive removal like this instead
+		 * of calling rmdir(2) directly:
+		 *
+		 *      fd = opendir(path);
+		 *      while ((dent = readdir(fd)))
+		 *              unlinkat(dirfd(fd), dent->d_name, 0);
+		 *      closedir(fd);
+		 *
+		 * POSIX leaves unspecified what readdir() should return once the
+		 * directory has been modified after opendir()/rewinddir(), so this
+		 * pattern is not guaranteed to work on all filesystems or platforms.
+		 *
+		 * In ntfs3 the internal name tree may be reshaped while entries are
+		 * being removed, so there is no stable anchor for continuing a
+		 * single-pass walk based on the original readdir() order.
+		 *
+		 * In practice some widely used tools (for example certain rm(1)
+		 * implementations) have used this readdir()/unlink() loop, and some
+		 * filesystems behave in a way that effectively makes it work in the
+		 * common case.
+		 *
+		 * The code below follows that practice and tries to provide
+		 * "rmdir-like" behaviour for such callers on ntfs3, even though the
+		 * situation is not strictly defined by the APIs.
+		 *
+		 * Apple documents the same readdir()/unlink() issue and a workaround
+		 * for HFS file systems in:
+		 * https://web.archive.org/web/20220122122948/https:/support.apple.com/kb/TA21420?locale=en_US
+		 */
+		ctx->pos = pos = 3;
+		file->private_data = (void *)ni->dir.version;
+	}
 
 	if (!dir_emit_dots(file, ctx))
 		return 0;
@@ -454,35 +498,31 @@ static int ntfs_readdir(struct file *file, struct dir_context *ctx)
 	if (pos >= sbi->record_size) {
 		bit = (pos - sbi->record_size) >> index_bits;
 	} else {
+		/*
+		 * Add each name from root in 'ctx'.
+		 */
 		err = ntfs_read_hdr(sbi, ni, &root->ihdr, 0, pos, name, ctx);
 		if (err)
 			goto out;
 		bit = 0;
 	}
 
-	if (!i_size) {
-		ctx->pos = eod;
-		goto out;
-	}
-
-	for (;;) {
-		vbo = (u64)bit << index_bits;
-		if (vbo >= i_size) {
-			ctx->pos = eod;
-			goto out;
-		}
-
+	/*
+	 * Enumerate indexes until the end of dir.
+	 */
+	for (; bit < max_bit; bit += 1) {
+		/* Get the next used index. */
 		err = indx_used_bit(&ni->dir, ni, &bit);
 		if (err)
 			goto out;
 
 		if (bit == MINUS_ONE_T) {
-			ctx->pos = eod;
-			goto out;
+			/* no more used indexes. end of dir. */
+			break;
 		}
 
-		vbo = (u64)bit << index_bits;
-		if (vbo >= i_size) {
+		if (bit >= max_bit) {
+			/* Corrupted directory. */
 			err = -EINVAL;
 			goto out;
 		}
@@ -492,20 +532,24 @@ static int ntfs_readdir(struct file *file, struct dir_context *ctx)
 		if (err)
 			goto out;
 
+		/*
+		 * Add each name from index in 'ctx'.
+		 */
 		err = ntfs_read_hdr(sbi, ni, &node->index->ihdr,
-				    vbo + sbi->record_size, pos, name, ctx);
+				    ((u64)bit << index_bits) + sbi->record_size,
+				    pos, name, ctx);
 		if (err)
 			goto out;
-
-		bit += 1;
 	}
 
 out:
-
 	__putname(name);
 	put_indx_node(node);
 
-	if (err == 1) {
+	if (!err) {
+		/* End of directory. */
+		ctx->pos = eod;
+	} else if (err == 1) {
 		/* 'ctx' is full. */
 		err = 0;
 	} else if (err == -ENOENT) {
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index f0cfa000ffbb..d08bee3c20fa 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -2002,6 +2002,7 @@ int indx_insert_entry(struct ntfs_index *indx, struct ntfs_inode *ni,
 					      fnd->level - 1, fnd);
 	}
 
+	indx->version += 1;
 out:
 	fnd_put(fnd_a);
 out1:
@@ -2649,6 +2650,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni,
 		mi->dirty = true;
 	}
 
+	indx->version += 1;
 out:
 	fnd_put(fnd2);
 out1:
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index 18b14f7db4ad..cee7b73b9670 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -191,6 +191,7 @@ struct ntfs_index {
 	struct runs_tree alloc_run;
 	/* read/write access to 'bitmap_run'/'alloc_run' while ntfs_readdir */
 	struct rw_semaphore run_lock;
+	size_t version; /* increment each change */
 
 	/*TODO: Remove 'cmp'. */
 	NTFS_CMP_FUNC cmp;
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ