lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251229105932.11360-6-linkinjeon@kernel.org>
Date: Mon, 29 Dec 2025 19:59:23 +0900
From: Namjae Jeon <linkinjeon@...nel.org>
To: viro@...iv.linux.org.uk,
	brauner@...nel.org,
	hch@...radead.org,
	hch@....de,
	tytso@....edu,
	willy@...radead.org,
	jack@...e.cz,
	djwong@...nel.org,
	josef@...icpanda.com,
	sandeen@...deen.net,
	rgoldwyn@...e.com,
	xiang@...nel.org,
	dsterba@...e.com,
	pali@...nel.org,
	ebiggers@...nel.org,
	neil@...wn.name,
	amir73il@...il.com
Cc: linux-fsdevel@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	iamjoonsoo.kim@....com,
	cheol.lee@....com,
	jay.sim@....com,
	gunho.lee@....com,
	Namjae Jeon <linkinjeon@...nel.org>
Subject: [PATCH v3 05/14] ntfs: update directory operations

This updates the implementation of directory operations.

Signed-off-by: Namjae Jeon <linkinjeon@...nel.org>
---
 fs/ntfs/dir.c   | 1639 +++++++++++++-------------------
 fs/ntfs/index.c | 2397 ++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 2700 insertions(+), 1336 deletions(-)

diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 629723a8d712..26cae08a59a7 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1,26 +1,24 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project.
+/**
+ * NTFS kernel directory operations. Part of the Linux-NTFS project.
  *
  * Copyright (c) 2001-2007 Anton Altaparmakov
  * Copyright (c) 2002 Richard Russon
+ * Copyright (c) 2025 LG Electronics Co., Ltd.
  */
 
-#include <linux/buffer_head.h>
-#include <linux/slab.h>
 #include <linux/blkdev.h>
 
 #include "dir.h"
-#include "aops.h"
-#include "attrib.h"
 #include "mft.h"
-#include "debug.h"
 #include "ntfs.h"
+#include "index.h"
+#include "reparse.h"
 
-/*
+/**
  * The little endian Unicode string $I30 as a global constant.
  */
-ntfschar I30[5] = { cpu_to_le16('$'), cpu_to_le16('I'),
+__le16 I30[5] = { cpu_to_le16('$'), cpu_to_le16('I'),
 		cpu_to_le16('3'),	cpu_to_le16('0'), 0 };
 
 /**
@@ -61,30 +59,29 @@ ntfschar I30[5] = { cpu_to_le16('$'), cpu_to_le16('I'),
  *	       locked whilst being accessed otherwise we may find a corrupt
  *	       page due to it being under ->writepage at the moment which
  *	       applies the mst protection fixups before writing out and then
- *	       removes them again after the write is complete after which it 
+ *	       removes them again after the write is complete after which it
  *	       unlocks the page.
  */
-MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
-		const int uname_len, ntfs_name **res)
+u64 ntfs_lookup_inode_by_name(struct ntfs_inode *dir_ni, const __le16 *uname,
+		const int uname_len, struct ntfs_name **res)
 {
-	ntfs_volume *vol = dir_ni->vol;
+	struct ntfs_volume *vol = dir_ni->vol;
 	struct super_block *sb = vol->sb;
-	MFT_RECORD *m;
-	INDEX_ROOT *ir;
-	INDEX_ENTRY *ie;
-	INDEX_ALLOCATION *ia;
+	struct inode *ia_vi = NULL;
+	struct mft_record *m;
+	struct index_root *ir;
+	struct index_entry *ie;
+	struct index_block *ia;
 	u8 *index_end;
 	u64 mref;
-	ntfs_attr_search_ctx *ctx;
+	struct ntfs_attr_search_ctx *ctx;
 	int err, rc;
-	VCN vcn, old_vcn;
+	s64 vcn, old_vcn;
 	struct address_space *ia_mapping;
-	struct page *page;
-	u8 *kaddr;
-	ntfs_name *name = NULL;
+	struct folio *folio;
+	u8 *kaddr = NULL;
+	struct ntfs_name *name = NULL;
 
-	BUG_ON(!S_ISDIR(VFS_I(dir_ni)->i_mode));
-	BUG_ON(NInoAttr(dir_ni));
 	/* Get hold of the mft record for the directory. */
 	m = map_mft_record(dir_ni);
 	if (IS_ERR(m)) {
@@ -102,30 +99,30 @@ MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 			0, ctx);
 	if (unlikely(err)) {
 		if (err == -ENOENT) {
-			ntfs_error(sb, "Index root attribute missing in "
-					"directory inode 0x%lx.",
-					dir_ni->mft_no);
+			ntfs_error(sb,
+				"Index root attribute missing in directory inode 0x%lx.",
+				dir_ni->mft_no);
 			err = -EIO;
 		}
 		goto err_out;
 	}
 	/* Get to the index root value (it's been verified in read_inode). */
-	ir = (INDEX_ROOT*)((u8*)ctx->attr +
+	ir = (struct index_root *)((u8 *)ctx->attr +
 			le16_to_cpu(ctx->attr->data.resident.value_offset));
-	index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
+	index_end = (u8 *)&ir->index + le32_to_cpu(ir->index.index_length);
 	/* The first index entry. */
-	ie = (INDEX_ENTRY*)((u8*)&ir->index +
+	ie = (struct index_entry *)((u8 *)&ir->index +
 			le32_to_cpu(ir->index.entries_offset));
 	/*
 	 * Loop until we exceed valid memory (corruption case) or until we
 	 * reach the last entry.
 	 */
-	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
+	for (;; ie = (struct index_entry *)((u8 *)ie + le16_to_cpu(ie->length))) {
 		/* Bounds checks. */
-		if ((u8*)ie < (u8*)ctx->mrec || (u8*)ie +
-				sizeof(INDEX_ENTRY_HEADER) > index_end ||
-				(u8*)ie + le16_to_cpu(ie->key_length) >
-				index_end)
+		if ((u8 *)ie < (u8 *)ctx->mrec ||
+		    (u8 *)ie + sizeof(struct index_entry_header) > index_end ||
+		    (u8 *)ie + sizeof(struct index_entry_header) + le16_to_cpu(ie->key_length) >
+				index_end || (u8 *)ie + le16_to_cpu(ie->length) > index_end)
 			goto dir_err_out;
 		/*
 		 * The last entry cannot contain a name. It can however contain
@@ -133,6 +130,13 @@ MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 		 */
 		if (ie->flags & INDEX_ENTRY_END)
 			break;
+		/* Key length should not be zero if it is not last entry. */
+		if (!ie->key_length)
+			goto dir_err_out;
+		/* Check the consistency of an index entry */
+		if (ntfs_index_entry_inconsistent(NULL, vol, ie, COLLATION_FILE_NAME,
+				dir_ni->mft_no))
+			goto dir_err_out;
 		/*
 		 * We perform a case sensitive comparison and if that matches
 		 * we are done and return the mft reference of the inode (i.e.
@@ -141,7 +145,7 @@ MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 		 * returning.
 		 */
 		if (ntfs_are_names_equal(uname, uname_len,
-				(ntfschar*)&ie->key.file_name.file_name,
+				(__le16 *)&ie->key.file_name.file_name,
 				ie->key.file_name.file_name_length,
 				CASE_SENSITIVE, vol->upcase, vol->upcase_len)) {
 found_it:
@@ -157,7 +161,7 @@ MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 			 */
 			if (ie->key.file_name.file_name_type == FILE_NAME_DOS) {
 				if (!name) {
-					name = kmalloc(sizeof(ntfs_name),
+					name = kmalloc(sizeof(struct ntfs_name),
 							GFP_NOFS);
 					if (!name) {
 						err = -ENOMEM;
@@ -188,30 +192,26 @@ MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 		 * only cache the mft reference and the file name type (we set
 		 * the name length to zero for simplicity).
 		 */
-		if (!NVolCaseSensitive(vol) &&
-				ie->key.file_name.file_name_type &&
-				ntfs_are_names_equal(uname, uname_len,
-				(ntfschar*)&ie->key.file_name.file_name,
-				ie->key.file_name.file_name_length,
-				IGNORE_CASE, vol->upcase, vol->upcase_len)) {
-			int name_size = sizeof(ntfs_name);
+		if ((!NVolCaseSensitive(vol) ||
+		     ie->key.file_name.file_name_type == FILE_NAME_DOS) &&
+		    ntfs_are_names_equal(uname, uname_len,
+					 (__le16 *)&ie->key.file_name.file_name,
+					 ie->key.file_name.file_name_length,
+					 IGNORE_CASE, vol->upcase,
+					 vol->upcase_len)) {
+			int name_size = sizeof(struct ntfs_name);
 			u8 type = ie->key.file_name.file_name_type;
 			u8 len = ie->key.file_name.file_name_length;
 
 			/* Only one case insensitive matching name allowed. */
 			if (name) {
-				ntfs_error(sb, "Found already allocated name "
-						"in phase 1. Please run chkdsk "
-						"and if that doesn't find any "
-						"errors please report you saw "
-						"this message to "
-						"linux-ntfs-dev@...ts."
-						"sourceforge.net.");
+				ntfs_error(sb,
+					"Found already allocated name in phase 1. Please run chkdsk");
 				goto dir_err_out;
 			}
 
 			if (type != FILE_NAME_DOS)
-				name_size += len * sizeof(ntfschar);
+				name_size += len * sizeof(__le16);
 			name = kmalloc(name_size, GFP_NOFS);
 			if (!name) {
 				err = -ENOMEM;
@@ -222,7 +222,7 @@ MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 			if (type != FILE_NAME_DOS) {
 				name->len = len;
 				memcpy(name->name, ie->key.file_name.file_name,
-						len * sizeof(ntfschar));
+						len * sizeof(__le16));
 			} else
 				name->len = 0;
 			*res = name;
@@ -232,7 +232,7 @@ MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 		 * know which way in the B+tree we have to go.
 		 */
 		rc = ntfs_collate_names(uname, uname_len,
-				(ntfschar*)&ie->key.file_name.file_name,
+				(__le16 *)&ie->key.file_name.file_name,
 				ie->key.file_name.file_name_length, 1,
 				IGNORE_CASE, vol->upcase, vol->upcase_len);
 		/*
@@ -251,7 +251,7 @@ MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 		 * collation.
 		 */
 		rc = ntfs_collate_names(uname, uname_len,
-				(ntfschar*)&ie->key.file_name.file_name,
+				(__le16 *)&ie->key.file_name.file_name,
 				ie->key.file_name.file_name_length, 1,
 				CASE_SENSITIVE, vol->upcase, vol->upcase_len);
 		if (rc == -1)
@@ -281,109 +281,117 @@ MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 		err = -ENOENT;
 		goto err_out;
 	} /* Child node present, descend into it. */
-	/* Consistency check: Verify that an index allocation exists. */
-	if (!NInoIndexAllocPresent(dir_ni)) {
-		ntfs_error(sb, "No index allocation attribute but index entry "
-				"requires one. Directory inode 0x%lx is "
-				"corrupt or driver bug.", dir_ni->mft_no);
-		goto err_out;
-	}
+
 	/* Get the starting vcn of the index_block holding the child node. */
-	vcn = sle64_to_cpup((sle64*)((u8*)ie + le16_to_cpu(ie->length) - 8));
-	ia_mapping = VFS_I(dir_ni)->i_mapping;
+	vcn = le64_to_cpup((__le64 *)((u8 *)ie + le16_to_cpu(ie->length) - 8));
+
 	/*
 	 * We are done with the index root and the mft record. Release them,
-	 * otherwise we deadlock with ntfs_map_page().
+	 * otherwise we deadlock with read_mapping_folio().
 	 */
 	ntfs_attr_put_search_ctx(ctx);
 	unmap_mft_record(dir_ni);
 	m = NULL;
 	ctx = NULL;
+
+	ia_vi = ntfs_index_iget(VFS_I(dir_ni), I30, 4);
+	if (IS_ERR(ia_vi)) {
+		err = PTR_ERR(ia_vi);
+		goto err_out;
+	}
+
+	ia_mapping = ia_vi->i_mapping;
 descend_into_child_node:
 	/*
 	 * Convert vcn to index into the index allocation attribute in units
 	 * of PAGE_SIZE and map the page cache page, reading it from
 	 * disk if necessary.
 	 */
-	page = ntfs_map_page(ia_mapping, vcn <<
-			dir_ni->itype.index.vcn_size_bits >> PAGE_SHIFT);
-	if (IS_ERR(page)) {
+	folio = read_mapping_folio(ia_mapping, vcn <<
+			dir_ni->itype.index.vcn_size_bits >> PAGE_SHIFT, NULL);
+	if (IS_ERR(folio)) {
 		ntfs_error(sb, "Failed to map directory index page, error %ld.",
-				-PTR_ERR(page));
-		err = PTR_ERR(page);
+				-PTR_ERR(folio));
+		err = PTR_ERR(folio);
 		goto err_out;
 	}
-	lock_page(page);
-	kaddr = (u8*)page_address(page);
+
+	folio_lock(folio);
+	kaddr = kmalloc(PAGE_SIZE, GFP_NOFS);
+	if (!kaddr) {
+		err = -ENOMEM;
+		folio_unlock(folio);
+		folio_put(folio);
+		goto unm_err_out;
+	}
+
+	memcpy_from_folio(kaddr, folio, 0, PAGE_SIZE);
+	post_read_mst_fixup((struct ntfs_record *)kaddr, PAGE_SIZE);
+	folio_unlock(folio);
+	folio_put(folio);
 fast_descend_into_child_node:
 	/* Get to the index allocation block. */
-	ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
+	ia = (struct index_block *)(kaddr + ((vcn <<
 			dir_ni->itype.index.vcn_size_bits) & ~PAGE_MASK));
 	/* Bounds checks. */
-	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE) {
-		ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
-				"inode 0x%lx or driver bug.", dir_ni->mft_no);
+	if ((u8 *)ia < kaddr || (u8 *)ia > kaddr + PAGE_SIZE) {
+		ntfs_error(sb,
+			"Out of bounds check failed. Corrupt directory inode 0x%lx or driver bug.",
+			dir_ni->mft_no);
 		goto unm_err_out;
 	}
 	/* Catch multi sector transfer fixup errors. */
 	if (unlikely(!ntfs_is_indx_record(ia->magic))) {
-		ntfs_error(sb, "Directory index record with vcn 0x%llx is "
-				"corrupt.  Corrupt inode 0x%lx.  Run chkdsk.",
-				(unsigned long long)vcn, dir_ni->mft_no);
+		ntfs_error(sb,
+			"Directory index record with vcn 0x%llx is corrupt.  Corrupt inode 0x%lx.  Run chkdsk.",
+			(unsigned long long)vcn, dir_ni->mft_no);
 		goto unm_err_out;
 	}
-	if (sle64_to_cpu(ia->index_block_vcn) != vcn) {
-		ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is "
-				"different from expected VCN (0x%llx). "
-				"Directory inode 0x%lx is corrupt or driver "
-				"bug.", (unsigned long long)
-				sle64_to_cpu(ia->index_block_vcn),
-				(unsigned long long)vcn, dir_ni->mft_no);
+	if (le64_to_cpu(ia->index_block_vcn) != vcn) {
+		ntfs_error(sb,
+			"Actual VCN (0x%llx) of index buffer is different from expected VCN (0x%llx). Directory inode 0x%lx is corrupt or driver bug.",
+			(unsigned long long)le64_to_cpu(ia->index_block_vcn),
+			(unsigned long long)vcn, dir_ni->mft_no);
 		goto unm_err_out;
 	}
 	if (le32_to_cpu(ia->index.allocated_size) + 0x18 !=
 			dir_ni->itype.index.block_size) {
-		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
-				"0x%lx has a size (%u) differing from the "
-				"directory specified size (%u). Directory "
-				"inode is corrupt or driver bug.",
-				(unsigned long long)vcn, dir_ni->mft_no,
-				le32_to_cpu(ia->index.allocated_size) + 0x18,
-				dir_ni->itype.index.block_size);
+		ntfs_error(sb,
+			"Index buffer (VCN 0x%llx) of directory inode 0x%lx has a size (%u) differing from the directory specified size (%u). Directory inode is corrupt or driver bug.",
+			(unsigned long long)vcn, dir_ni->mft_no,
+			le32_to_cpu(ia->index.allocated_size) + 0x18,
+			dir_ni->itype.index.block_size);
 		goto unm_err_out;
 	}
-	index_end = (u8*)ia + dir_ni->itype.index.block_size;
+	index_end = (u8 *)ia + dir_ni->itype.index.block_size;
 	if (index_end > kaddr + PAGE_SIZE) {
-		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
-				"0x%lx crosses page boundary. Impossible! "
-				"Cannot access! This is probably a bug in the "
-				"driver.", (unsigned long long)vcn,
-				dir_ni->mft_no);
+		ntfs_error(sb,
+			"Index buffer (VCN 0x%llx) of directory inode 0x%lx crosses page boundary. Impossible! Cannot access! This is probably a bug in the driver.",
+			(unsigned long long)vcn, dir_ni->mft_no);
 		goto unm_err_out;
 	}
-	index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
-	if (index_end > (u8*)ia + dir_ni->itype.index.block_size) {
-		ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of directory "
-				"inode 0x%lx exceeds maximum size.",
-				(unsigned long long)vcn, dir_ni->mft_no);
+	index_end = (u8 *)&ia->index + le32_to_cpu(ia->index.index_length);
+	if (index_end > (u8 *)ia + dir_ni->itype.index.block_size) {
+		ntfs_error(sb,
+			"Size of index buffer (VCN 0x%llx) of directory inode 0x%lx exceeds maximum size.",
+			(unsigned long long)vcn, dir_ni->mft_no);
 		goto unm_err_out;
 	}
 	/* The first index entry. */
-	ie = (INDEX_ENTRY*)((u8*)&ia->index +
+	ie = (struct index_entry *)((u8 *)&ia->index +
 			le32_to_cpu(ia->index.entries_offset));
 	/*
 	 * Iterate similar to above big loop but applied to index buffer, thus
 	 * loop until we exceed valid memory (corruption case) or until we
 	 * reach the last entry.
 	 */
-	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
-		/* Bounds check. */
-		if ((u8*)ie < (u8*)ia || (u8*)ie +
-				sizeof(INDEX_ENTRY_HEADER) > index_end ||
-				(u8*)ie + le16_to_cpu(ie->key_length) >
-				index_end) {
-			ntfs_error(sb, "Index entry out of bounds in "
-					"directory inode 0x%lx.",
+	for (;; ie = (struct index_entry *)((u8 *)ie + le16_to_cpu(ie->length))) {
+		/* Bounds checks. */
+		if ((u8 *)ie < (u8 *)ia ||
+		    (u8 *)ie + sizeof(struct index_entry_header) > index_end ||
+		    (u8 *)ie + sizeof(struct index_entry_header) + le16_to_cpu(ie->key_length) >
+				index_end || (u8 *)ie + le16_to_cpu(ie->length) > index_end) {
+			ntfs_error(sb, "Index entry out of bounds in directory inode 0x%lx.",
 					dir_ni->mft_no);
 			goto unm_err_out;
 		}
@@ -393,6 +401,13 @@ MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 		 */
 		if (ie->flags & INDEX_ENTRY_END)
 			break;
+		/* Key length should not be zero if it is not last entry. */
+		if (!ie->key_length)
+			goto unm_err_out;
+		/* Check the consistency of an index entry */
+		if (ntfs_index_entry_inconsistent(NULL, vol, ie, COLLATION_FILE_NAME,
+				dir_ni->mft_no))
+			goto unm_err_out;
 		/*
 		 * We perform a case sensitive comparison and if that matches
 		 * we are done and return the mft reference of the inode (i.e.
@@ -401,7 +416,7 @@ MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 		 * returning.
 		 */
 		if (ntfs_are_names_equal(uname, uname_len,
-				(ntfschar*)&ie->key.file_name.file_name,
+				(__le16 *)&ie->key.file_name.file_name,
 				ie->key.file_name.file_name_length,
 				CASE_SENSITIVE, vol->upcase, vol->upcase_len)) {
 found_it2:
@@ -417,7 +432,7 @@ MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 			 */
 			if (ie->key.file_name.file_name_type == FILE_NAME_DOS) {
 				if (!name) {
-					name = kmalloc(sizeof(ntfs_name),
+					name = kmalloc(sizeof(struct ntfs_name),
 							GFP_NOFS);
 					if (!name) {
 						err = -ENOMEM;
@@ -434,8 +449,8 @@ MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 				*res = NULL;
 			}
 			mref = le64_to_cpu(ie->data.dir.indexed_file);
-			unlock_page(page);
-			ntfs_unmap_page(page);
+			kfree(kaddr);
+			iput(ia_vi);
 			return mref;
 		}
 		/*
@@ -448,32 +463,27 @@ MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 		 * only cache the mft reference and the file name type (we set
 		 * the name length to zero for simplicity).
 		 */
-		if (!NVolCaseSensitive(vol) &&
-				ie->key.file_name.file_name_type &&
-				ntfs_are_names_equal(uname, uname_len,
-				(ntfschar*)&ie->key.file_name.file_name,
-				ie->key.file_name.file_name_length,
-				IGNORE_CASE, vol->upcase, vol->upcase_len)) {
-			int name_size = sizeof(ntfs_name);
+		if ((!NVolCaseSensitive(vol) ||
+		     ie->key.file_name.file_name_type == FILE_NAME_DOS) &&
+		    ntfs_are_names_equal(uname, uname_len,
+					 (__le16 *)&ie->key.file_name.file_name,
+					 ie->key.file_name.file_name_length,
+					 IGNORE_CASE, vol->upcase,
+					 vol->upcase_len)) {
+			int name_size = sizeof(struct ntfs_name);
 			u8 type = ie->key.file_name.file_name_type;
 			u8 len = ie->key.file_name.file_name_length;
 
 			/* Only one case insensitive matching name allowed. */
 			if (name) {
-				ntfs_error(sb, "Found already allocated name "
-						"in phase 2. Please run chkdsk "
-						"and if that doesn't find any "
-						"errors please report you saw "
-						"this message to "
-						"linux-ntfs-dev@...ts."
-						"sourceforge.net.");
-				unlock_page(page);
-				ntfs_unmap_page(page);
+				ntfs_error(sb,
+					"Found already allocated name in phase 2. Please run chkdsk");
+				kfree(kaddr);
 				goto dir_err_out;
 			}
 
 			if (type != FILE_NAME_DOS)
-				name_size += len * sizeof(ntfschar);
+				name_size += len * sizeof(__le16);
 			name = kmalloc(name_size, GFP_NOFS);
 			if (!name) {
 				err = -ENOMEM;
@@ -484,7 +494,7 @@ MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 			if (type != FILE_NAME_DOS) {
 				name->len = len;
 				memcpy(name->name, ie->key.file_name.file_name,
-						len * sizeof(ntfschar));
+						len * sizeof(__le16));
 			} else
 				name->len = 0;
 			*res = name;
@@ -494,7 +504,7 @@ MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 		 * know which way in the B+tree we have to go.
 		 */
 		rc = ntfs_collate_names(uname, uname_len,
-				(ntfschar*)&ie->key.file_name.file_name,
+				(__le16 *)&ie->key.file_name.file_name,
 				ie->key.file_name.file_name_length, 1,
 				IGNORE_CASE, vol->upcase, vol->upcase_len);
 		/*
@@ -513,7 +523,7 @@ MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 		 * collation.
 		 */
 		rc = ntfs_collate_names(uname, uname_len,
-				(ntfschar*)&ie->key.file_name.file_name,
+				(__le16 *)&ie->key.file_name.file_name,
 				ie->key.file_name.file_name_length, 1,
 				CASE_SENSITIVE, vol->upcase, vol->upcase_len);
 		if (rc == -1)
@@ -533,29 +543,29 @@ MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 	 */
 	if (ie->flags & INDEX_ENTRY_NODE) {
 		if ((ia->index.flags & NODE_MASK) == LEAF_NODE) {
-			ntfs_error(sb, "Index entry with child node found in "
-					"a leaf node in directory inode 0x%lx.",
-					dir_ni->mft_no);
+			ntfs_error(sb,
+				"Index entry with child node found in a leaf node in directory inode 0x%lx.",
+				dir_ni->mft_no);
 			goto unm_err_out;
 		}
 		/* Child node present, descend into it. */
 		old_vcn = vcn;
-		vcn = sle64_to_cpup((sle64*)((u8*)ie +
+		vcn = le64_to_cpup((__le64 *)((u8 *)ie +
 				le16_to_cpu(ie->length) - 8));
 		if (vcn >= 0) {
-			/* If vcn is in the same page cache page as old_vcn we
-			 * recycle the mapped page. */
-			if (old_vcn << vol->cluster_size_bits >>
-					PAGE_SHIFT == vcn <<
-					vol->cluster_size_bits >>
-					PAGE_SHIFT)
+			/*
+			 * If vcn is in the same page cache page as old_vcn we
+			 * recycle the mapped page.
+			 */
+			if (NTFS_CLU_TO_PIDX(vol, old_vcn) ==
+			    NTFS_CLU_TO_PIDX(vol, vcn))
 				goto fast_descend_into_child_node;
-			unlock_page(page);
-			ntfs_unmap_page(page);
+			kfree(kaddr);
+			kaddr = NULL;
 			goto descend_into_child_node;
 		}
-		ntfs_error(sb, "Negative child node vcn in directory inode "
-				"0x%lx.", dir_ni->mft_no);
+		ntfs_error(sb, "Negative child node vcn in directory inode 0x%lx.",
+				dir_ni->mft_no);
 		goto unm_err_out;
 	}
 	/*
@@ -564,15 +574,14 @@ MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 	 * associated with it.
 	 */
 	if (name) {
-		unlock_page(page);
-		ntfs_unmap_page(page);
+		kfree(kaddr);
+		iput(ia_vi);
 		return name->mref;
 	}
 	ntfs_debug("Entry not found.");
 	err = -ENOENT;
 unm_err_out:
-	unlock_page(page);
-	ntfs_unmap_page(page);
+	kfree(kaddr);
 err_out:
 	if (!err)
 		err = -EIO;
@@ -580,858 +589,495 @@ MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
 		ntfs_attr_put_search_ctx(ctx);
 	if (m)
 		unmap_mft_record(dir_ni);
-	if (name) {
-		kfree(name);
-		*res = NULL;
-	}
+	kfree(name);
+	*res = NULL;
+	if (ia_vi && !IS_ERR(ia_vi))
+		iput(ia_vi);
 	return ERR_MREF(err);
 dir_err_out:
 	ntfs_error(sb, "Corrupt directory.  Aborting lookup.");
 	goto err_out;
 }
 
-#if 0
-
-// TODO: (AIA)
-// The algorithm embedded in this code will be required for the time when we
-// want to support adding of entries to directories, where we require correct
-// collation of file names in order not to cause corruption of the filesystem.
-
 /**
- * ntfs_lookup_inode_by_name - find an inode in a directory given its name
- * @dir_ni:	ntfs inode of the directory in which to search for the name
- * @uname:	Unicode name for which to search in the directory
- * @uname_len:	length of the name @uname in Unicode characters
+ * ntfs_filldir - ntfs specific filldir method
+ * @vol:	current ntfs volume
+ * @ndir:	ntfs inode of current directory
+ * @ia_page:	page in which the index allocation buffer @ie is in resides
+ * @ie:		current index entry
+ * @name:	buffer to use for the converted name
+ * @actor:	what to feed the entries to
  *
- * Look for an inode with name @uname in the directory with inode @dir_ni.
- * ntfs_lookup_inode_by_name() walks the contents of the directory looking for
- * the Unicode name. If the name is found in the directory, the corresponding
- * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it
- * is a 64-bit number containing the sequence number.
+ * Convert the Unicode @name to the loaded NLS and pass it to the @filldir
+ * callback.
  *
- * On error, a negative value is returned corresponding to the error code. In
- * particular if the inode is not found -ENOENT is returned. Note that you
- * can't just check the return value for being negative, you have to check the
- * inode number for being negative which you can extract using MREC(return
- * value).
+ * If @ia_page is not NULL it is the locked page containing the index
+ * allocation block containing the index entry @ie.
  *
- * Note, @uname_len does not include the (optional) terminating NULL character.
+ * Note, we drop (and then reacquire) the page lock on @ia_page across the
+ * @filldir() call otherwise we would deadlock with NFSd when it calls ->lookup
+ * since ntfs_lookup() will lock the same page.  As an optimization, we do not
+ * retake the lock if we are returning a non-zero value as ntfs_readdir()
+ * would need to drop the lock immediately anyway.
  */
-u64 ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
-		const int uname_len)
+static inline int ntfs_filldir(struct ntfs_volume *vol,
+		struct ntfs_inode *ndir, struct page *ia_page, struct index_entry *ie,
+		u8 *name, struct dir_context *actor)
 {
-	ntfs_volume *vol = dir_ni->vol;
-	struct super_block *sb = vol->sb;
-	MFT_RECORD *m;
-	INDEX_ROOT *ir;
-	INDEX_ENTRY *ie;
-	INDEX_ALLOCATION *ia;
-	u8 *index_end;
-	u64 mref;
-	ntfs_attr_search_ctx *ctx;
-	int err, rc;
-	IGNORE_CASE_BOOL ic;
-	VCN vcn, old_vcn;
-	struct address_space *ia_mapping;
-	struct page *page;
-	u8 *kaddr;
+	unsigned long mref;
+	int name_len;
+	unsigned int dt_type;
+	u8 name_type;
 
-	/* Get hold of the mft record for the directory. */
-	m = map_mft_record(dir_ni);
-	if (IS_ERR(m)) {
-		ntfs_error(sb, "map_mft_record() failed with error code %ld.",
-				-PTR_ERR(m));
-		return ERR_MREF(PTR_ERR(m));
+	name_type = ie->key.file_name.file_name_type;
+	if (name_type == FILE_NAME_DOS) {
+		ntfs_debug("Skipping DOS name space entry.");
+		return 0;
 	}
-	ctx = ntfs_attr_get_search_ctx(dir_ni, m);
-	if (!ctx) {
-		err = -ENOMEM;
-		goto err_out;
+	if (MREF_LE(ie->data.dir.indexed_file) == FILE_root) {
+		ntfs_debug("Skipping root directory self reference entry.");
+		return 0;
 	}
-	/* Find the index root attribute in the mft record. */
-	err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL,
-			0, ctx);
-	if (unlikely(err)) {
-		if (err == -ENOENT) {
-			ntfs_error(sb, "Index root attribute missing in "
-					"directory inode 0x%lx.",
-					dir_ni->mft_no);
-			err = -EIO;
-		}
-		goto err_out;
+	if (MREF_LE(ie->data.dir.indexed_file) < FILE_first_user &&
+			!NVolShowSystemFiles(vol)) {
+		ntfs_debug("Skipping system file.");
+		return 0;
 	}
-	/* Get to the index root value (it's been verified in read_inode). */
-	ir = (INDEX_ROOT*)((u8*)ctx->attr +
-			le16_to_cpu(ctx->attr->data.resident.value_offset));
-	index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
-	/* The first index entry. */
-	ie = (INDEX_ENTRY*)((u8*)&ir->index +
-			le32_to_cpu(ir->index.entries_offset));
+	if (!NVolShowHiddenFiles(vol) &&
+	    (ie->key.file_name.file_attributes & FILE_ATTR_HIDDEN)) {
+		ntfs_debug("Skipping hidden file.");
+		return 0;
+	}
+
+	name_len = ntfs_ucstonls(vol, (__le16 *)&ie->key.file_name.file_name,
+			ie->key.file_name.file_name_length, &name,
+			NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1);
+	if (name_len <= 0) {
+		ntfs_warning(vol->sb, "Skipping unrepresentable inode 0x%llx.",
+				(long long)MREF_LE(ie->data.dir.indexed_file));
+		return 0;
+	}
+
+	mref = MREF_LE(ie->data.dir.indexed_file);
+	if (ie->key.file_name.file_attributes &
+			FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT)
+		dt_type = DT_DIR;
+	else if (ie->key.file_name.file_attributes & FILE_ATTR_REPARSE_POINT)
+		dt_type = ntfs_reparse_tag_dt_types(vol, mref);
+	else
+		dt_type = DT_REG;
+
 	/*
-	 * Loop until we exceed valid memory (corruption case) or until we
-	 * reach the last entry.
+	 * Drop the page lock otherwise we deadlock with NFS when it calls
+	 * ->lookup since ntfs_lookup() will lock the same page.
 	 */
-	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
-		/* Bounds checks. */
-		if ((u8*)ie < (u8*)ctx->mrec || (u8*)ie +
-				sizeof(INDEX_ENTRY_HEADER) > index_end ||
-				(u8*)ie + le16_to_cpu(ie->key_length) >
-				index_end)
-			goto dir_err_out;
-		/*
-		 * The last entry cannot contain a name. It can however contain
-		 * a pointer to a child node in the B+tree so we just break out.
-		 */
-		if (ie->flags & INDEX_ENTRY_END)
-			break;
-		/*
-		 * If the current entry has a name type of POSIX, the name is
-		 * case sensitive and not otherwise. This has the effect of us
-		 * not being able to access any POSIX file names which collate
-		 * after the non-POSIX one when they only differ in case, but
-		 * anyone doing screwy stuff like that deserves to burn in
-		 * hell... Doing that kind of stuff on NT4 actually causes
-		 * corruption on the partition even when using SP6a and Linux
-		 * is not involved at all.
-		 */
-		ic = ie->key.file_name.file_name_type ? IGNORE_CASE :
-				CASE_SENSITIVE;
-		/*
-		 * If the names match perfectly, we are done and return the
-		 * mft reference of the inode (i.e. the inode number together
-		 * with the sequence number for consistency checking. We
-		 * convert it to cpu format before returning.
-		 */
-		if (ntfs_are_names_equal(uname, uname_len,
-				(ntfschar*)&ie->key.file_name.file_name,
-				ie->key.file_name.file_name_length, ic,
-				vol->upcase, vol->upcase_len)) {
-found_it:
-			mref = le64_to_cpu(ie->data.dir.indexed_file);
-			ntfs_attr_put_search_ctx(ctx);
-			unmap_mft_record(dir_ni);
-			return mref;
+	if (ia_page)
+		unlock_page(ia_page);
+	ntfs_debug("Calling filldir for %s with len %i, fpos 0x%llx, inode 0x%lx, DT_%s.",
+		name, name_len, actor->pos, mref, dt_type == DT_DIR ? "DIR" : "REG");
+	if (!dir_emit(actor, name, name_len, mref, dt_type))
+		return 1;
+	/* Relock the page but not if we are aborting ->readdir. */
+	if (ia_page)
+		lock_page(ia_page);
+	return 0;
+}
+
+struct ntfs_file_private {
+	void *key;
+	__le16 key_length;
+	bool end_in_iterate;
+	loff_t curr_pos;
+};
+
+struct ntfs_index_ra {
+	unsigned long start_index;
+	unsigned int count;
+	struct rb_node rb_node;
+};
+
+static void ntfs_insert_rb(struct ntfs_index_ra *nir, struct rb_root *root)
+{
+	struct rb_node **new = &root->rb_node, *parent = NULL;
+	struct ntfs_index_ra *cnir;
+
+	while (*new) {
+		parent = *new;
+		cnir = rb_entry(parent, struct ntfs_index_ra, rb_node);
+		if (nir->start_index < cnir->start_index)
+			new = &parent->rb_left;
+		else if (nir->start_index >= cnir->start_index + cnir->count)
+			new = &parent->rb_right;
+		else {
+			pr_err("nir start index : %ld, count : %d, cnir start_index : %ld, count : %d\n",
+				nir->start_index, nir->count, cnir->start_index, cnir->count);
+			return;
 		}
-		/*
-		 * Not a perfect match, need to do full blown collation so we
-		 * know which way in the B+tree we have to go.
-		 */
-		rc = ntfs_collate_names(uname, uname_len,
-				(ntfschar*)&ie->key.file_name.file_name,
-				ie->key.file_name.file_name_length, 1,
-				IGNORE_CASE, vol->upcase, vol->upcase_len);
-		/*
-		 * If uname collates before the name of the current entry, there
-		 * is definitely no such name in this index but we might need to
-		 * descend into the B+tree so we just break out of the loop.
-		 */
-		if (rc == -1)
-			break;
-		/* The names are not equal, continue the search. */
-		if (rc)
-			continue;
-		/*
-		 * Names match with case insensitive comparison, now try the
-		 * case sensitive comparison, which is required for proper
-		 * collation.
-		 */
-		rc = ntfs_collate_names(uname, uname_len,
-				(ntfschar*)&ie->key.file_name.file_name,
-				ie->key.file_name.file_name_length, 1,
-				CASE_SENSITIVE, vol->upcase, vol->upcase_len);
-		if (rc == -1)
-			break;
-		if (rc)
-			continue;
-		/*
-		 * Perfect match, this will never happen as the
-		 * ntfs_are_names_equal() call will have gotten a match but we
-		 * still treat it correctly.
-		 */
-		goto found_it;
 	}
-	/*
-	 * We have finished with this index without success. Check for the
-	 * presence of a child node.
-	 */
-	if (!(ie->flags & INDEX_ENTRY_NODE)) {
-		/* No child node, return -ENOENT. */
-		err = -ENOENT;
-		goto err_out;
-	} /* Child node present, descend into it. */
-	/* Consistency check: Verify that an index allocation exists. */
-	if (!NInoIndexAllocPresent(dir_ni)) {
-		ntfs_error(sb, "No index allocation attribute but index entry "
-				"requires one. Directory inode 0x%lx is "
-				"corrupt or driver bug.", dir_ni->mft_no);
-		goto err_out;
+
+	rb_link_node(&nir->rb_node, parent, new);
+	rb_insert_color(&nir->rb_node, root);
+}
+
+static int ntfs_ia_blocks_readahead(struct ntfs_inode *ia_ni, loff_t pos)
+{
+	unsigned long dir_start_index, dir_end_index;
+	struct inode *ia_vi = VFS_I(ia_ni);
+	struct file_ra_state *dir_ra;
+
+	dir_end_index = (i_size_read(ia_vi) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	dir_start_index = (pos + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+	if (dir_start_index >= dir_end_index)
+		return 0;
+
+	dir_ra = kzalloc(sizeof(*dir_ra), GFP_NOFS);
+	if (!dir_ra)
+		return -ENOMEM;
+
+	file_ra_state_init(dir_ra, ia_vi->i_mapping);
+	dir_end_index = (i_size_read(ia_vi) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	dir_start_index = (pos + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	dir_ra->ra_pages = dir_end_index - dir_start_index;
+	page_cache_sync_readahead(ia_vi->i_mapping, dir_ra, NULL,
+			dir_start_index, dir_end_index - dir_start_index);
+	kfree(dir_ra);
+
+	return 0;
+}
+
+static int ntfs_readdir(struct file *file, struct dir_context *actor)
+{
+	struct inode *vdir = file_inode(file);
+	struct super_block *sb = vdir->i_sb;
+	struct ntfs_inode *ndir = NTFS_I(vdir);
+	struct ntfs_volume *vol = NTFS_SB(sb);
+	struct ntfs_attr_search_ctx *ctx = NULL;
+	struct ntfs_index_context *ictx = NULL;
+	u8 *name;
+	struct index_root *ir;
+	struct index_entry *next = NULL;
+	struct ntfs_file_private *private = NULL;
+	int err = 0;
+	loff_t ie_pos = 2; /* initialize it with dot and dotdot size */
+	struct ntfs_index_ra *nir = NULL;
+	unsigned long index;
+	struct rb_root ra_root = RB_ROOT;
+	struct file_ra_state *ra;
+
+	ntfs_debug("Entering for inode 0x%lx, fpos 0x%llx.",
+			vdir->i_ino, actor->pos);
+
+	if (file->private_data) {
+		private = file->private_data;
+
+		if (actor->pos != private->curr_pos) {
+			/*
+			 * If actor->pos is different from the previous passed
+			 * one, Discard the private->key and fill dirent buffer
+			 * with linear lookup.
+			 */
+			kfree(private->key);
+			private->key = NULL;
+			private->end_in_iterate = false;
+		} else if (private->end_in_iterate) {
+			kfree(private->key);
+			kfree(file->private_data);
+			file->private_data = NULL;
+			return 0;
+		}
 	}
-	/* Get the starting vcn of the index_block holding the child node. */
-	vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8);
-	ia_mapping = VFS_I(dir_ni)->i_mapping;
-	/*
-	 * We are done with the index root and the mft record. Release them,
-	 * otherwise we deadlock with ntfs_map_page().
-	 */
-	ntfs_attr_put_search_ctx(ctx);
-	unmap_mft_record(dir_ni);
-	m = NULL;
-	ctx = NULL;
-descend_into_child_node:
+
+	/* Emulate . and .. for all directories. */
+	if (!dir_emit_dots(file, actor))
+		return 0;
+
 	/*
-	 * Convert vcn to index into the index allocation attribute in units
-	 * of PAGE_SIZE and map the page cache page, reading it from
-	 * disk if necessary.
+	 * Allocate a buffer to store the current name being processed
+	 * converted to format determined by current NLS.
 	 */
-	page = ntfs_map_page(ia_mapping, vcn <<
-			dir_ni->itype.index.vcn_size_bits >> PAGE_SHIFT);
-	if (IS_ERR(page)) {
-		ntfs_error(sb, "Failed to map directory index page, error %ld.",
-				-PTR_ERR(page));
-		err = PTR_ERR(page);
-		goto err_out;
+	name = kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1, GFP_NOFS);
+	if (unlikely(!name))
+		return -ENOMEM;
+
+	mutex_lock_nested(&ndir->mrec_lock, NTFS_INODE_MUTEX_PARENT);
+	ictx = ntfs_index_ctx_get(ndir, I30, 4);
+	if (!ictx) {
+		kfree(name);
+		mutex_unlock(&ndir->mrec_lock);
+		return -ENOMEM;
 	}
-	lock_page(page);
-	kaddr = (u8*)page_address(page);
-fast_descend_into_child_node:
-	/* Get to the index allocation block. */
-	ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
-			dir_ni->itype.index.vcn_size_bits) & ~PAGE_MASK));
-	/* Bounds checks. */
-	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE) {
-		ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
-				"inode 0x%lx or driver bug.", dir_ni->mft_no);
-		goto unm_err_out;
-	}
-	/* Catch multi sector transfer fixup errors. */
-	if (unlikely(!ntfs_is_indx_record(ia->magic))) {
-		ntfs_error(sb, "Directory index record with vcn 0x%llx is "
-				"corrupt.  Corrupt inode 0x%lx.  Run chkdsk.",
-				(unsigned long long)vcn, dir_ni->mft_no);
-		goto unm_err_out;
-	}
-	if (sle64_to_cpu(ia->index_block_vcn) != vcn) {
-		ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is "
-				"different from expected VCN (0x%llx). "
-				"Directory inode 0x%lx is corrupt or driver "
-				"bug.", (unsigned long long)
-				sle64_to_cpu(ia->index_block_vcn),
-				(unsigned long long)vcn, dir_ni->mft_no);
-		goto unm_err_out;
-	}
-	if (le32_to_cpu(ia->index.allocated_size) + 0x18 !=
-			dir_ni->itype.index.block_size) {
-		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
-				"0x%lx has a size (%u) differing from the "
-				"directory specified size (%u). Directory "
-				"inode is corrupt or driver bug.",
-				(unsigned long long)vcn, dir_ni->mft_no,
-				le32_to_cpu(ia->index.allocated_size) + 0x18,
-				dir_ni->itype.index.block_size);
-		goto unm_err_out;
-	}
-	index_end = (u8*)ia + dir_ni->itype.index.block_size;
-	if (index_end > kaddr + PAGE_SIZE) {
-		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
-				"0x%lx crosses page boundary. Impossible! "
-				"Cannot access! This is probably a bug in the "
-				"driver.", (unsigned long long)vcn,
-				dir_ni->mft_no);
-		goto unm_err_out;
-	}
-	index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
-	if (index_end > (u8*)ia + dir_ni->itype.index.block_size) {
-		ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of directory "
-				"inode 0x%lx exceeds maximum size.",
-				(unsigned long long)vcn, dir_ni->mft_no);
-		goto unm_err_out;
+
+	ra = kzalloc(sizeof(struct file_ra_state), GFP_NOFS);
+	if (!ra) {
+		kfree(name);
+		ntfs_index_ctx_put(ictx);
+		mutex_unlock(&ndir->mrec_lock);
+		return -ENOMEM;
 	}
-	/* The first index entry. */
-	ie = (INDEX_ENTRY*)((u8*)&ia->index +
-			le32_to_cpu(ia->index.entries_offset));
-	/*
-	 * Iterate similar to above big loop but applied to index buffer, thus
-	 * loop until we exceed valid memory (corruption case) or until we
-	 * reach the last entry.
-	 */
-	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
-		/* Bounds check. */
-		if ((u8*)ie < (u8*)ia || (u8*)ie +
-				sizeof(INDEX_ENTRY_HEADER) > index_end ||
-				(u8*)ie + le16_to_cpu(ie->key_length) >
-				index_end) {
-			ntfs_error(sb, "Index entry out of bounds in "
-					"directory inode 0x%lx.",
-					dir_ni->mft_no);
-			goto unm_err_out;
-		}
-		/*
-		 * The last entry cannot contain a name. It can however contain
-		 * a pointer to a child node in the B+tree so we just break out.
-		 */
-		if (ie->flags & INDEX_ENTRY_END)
-			break;
-		/*
-		 * If the current entry has a name type of POSIX, the name is
-		 * case sensitive and not otherwise. This has the effect of us
-		 * not being able to access any POSIX file names which collate
-		 * after the non-POSIX one when they only differ in case, but
-		 * anyone doing screwy stuff like that deserves to burn in
-		 * hell... Doing that kind of stuff on NT4 actually causes
-		 * corruption on the partition even when using SP6a and Linux
-		 * is not involved at all.
-		 */
-		ic = ie->key.file_name.file_name_type ? IGNORE_CASE :
-				CASE_SENSITIVE;
-		/*
-		 * If the names match perfectly, we are done and return the
-		 * mft reference of the inode (i.e. the inode number together
-		 * with the sequence number for consistency checking. We
-		 * convert it to cpu format before returning.
-		 */
-		if (ntfs_are_names_equal(uname, uname_len,
-				(ntfschar*)&ie->key.file_name.file_name,
-				ie->key.file_name.file_name_length, ic,
-				vol->upcase, vol->upcase_len)) {
-found_it2:
-			mref = le64_to_cpu(ie->data.dir.indexed_file);
-			unlock_page(page);
-			ntfs_unmap_page(page);
-			return mref;
-		}
-		/*
-		 * Not a perfect match, need to do full blown collation so we
-		 * know which way in the B+tree we have to go.
-		 */
-		rc = ntfs_collate_names(uname, uname_len,
-				(ntfschar*)&ie->key.file_name.file_name,
-				ie->key.file_name.file_name_length, 1,
-				IGNORE_CASE, vol->upcase, vol->upcase_len);
-		/*
-		 * If uname collates before the name of the current entry, there
-		 * is definitely no such name in this index but we might need to
-		 * descend into the B+tree so we just break out of the loop.
-		 */
-		if (rc == -1)
-			break;
-		/* The names are not equal, continue the search. */
-		if (rc)
-			continue;
-		/*
-		 * Names match with case insensitive comparison, now try the
-		 * case sensitive comparison, which is required for proper
-		 * collation.
-		 */
-		rc = ntfs_collate_names(uname, uname_len,
-				(ntfschar*)&ie->key.file_name.file_name,
-				ie->key.file_name.file_name_length, 1,
-				CASE_SENSITIVE, vol->upcase, vol->upcase_len);
-		if (rc == -1)
-			break;
-		if (rc)
-			continue;
+	file_ra_state_init(ra, vol->mft_ino->i_mapping);
+
+	if (private && private->key) {
 		/*
-		 * Perfect match, this will never happen as the
-		 * ntfs_are_names_equal() call will have gotten a match but we
-		 * still treat it correctly.
+		 * Find index witk private->key using ntfs_index_lookup()
+		 * instead of linear index lookup.
 		 */
-		goto found_it2;
-	}
-	/*
-	 * We have finished with this index buffer without success. Check for
-	 * the presence of a child node.
-	 */
-	if (ie->flags & INDEX_ENTRY_NODE) {
-		if ((ia->index.flags & NODE_MASK) == LEAF_NODE) {
-			ntfs_error(sb, "Index entry with child node found in "
-					"a leaf node in directory inode 0x%lx.",
-					dir_ni->mft_no);
-			goto unm_err_out;
+		err = ntfs_index_lookup(private->key,
+					le16_to_cpu(private->key_length),
+					ictx);
+		if (!err) {
+			next = ictx->entry;
+			/*
+			 * Update ie_pos with private->curr_pos
+			 * to make next d_off of dirent correct.
+			 */
+			ie_pos = private->curr_pos;
+
+			if (actor->pos > vol->mft_record_size && ictx->ia_ni) {
+				err = ntfs_ia_blocks_readahead(ictx->ia_ni, actor->pos);
+				if (err)
+					goto out;
+			}
+
+			goto nextdir;
+		} else {
+			goto out;
 		}
-		/* Child node present, descend into it. */
-		old_vcn = vcn;
-		vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8);
-		if (vcn >= 0) {
-			/* If vcn is in the same page cache page as old_vcn we
-			 * recycle the mapped page. */
-			if (old_vcn << vol->cluster_size_bits >>
-					PAGE_SHIFT == vcn <<
-					vol->cluster_size_bits >>
-					PAGE_SHIFT)
-				goto fast_descend_into_child_node;
-			unlock_page(page);
-			ntfs_unmap_page(page);
-			goto descend_into_child_node;
+	} else if (!private) {
+		private = kzalloc(sizeof(struct ntfs_file_private), GFP_KERNEL);
+		if (!private) {
+			err = -ENOMEM;
+			goto out;
 		}
-		ntfs_error(sb, "Negative child node vcn in directory inode "
-				"0x%lx.", dir_ni->mft_no);
-		goto unm_err_out;
+		file->private_data = private;
 	}
-	/* No child node, return -ENOENT. */
-	ntfs_debug("Entry not found.");
-	err = -ENOENT;
-unm_err_out:
-	unlock_page(page);
-	ntfs_unmap_page(page);
-err_out:
-	if (!err)
-		err = -EIO;
-	if (ctx)
+
+	ctx = ntfs_attr_get_search_ctx(ndir, NULL);
+	if (!ctx) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* Find the index root attribute in the mft record. */
+	if (ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, 0,
+				ctx)) {
+		ntfs_error(sb, "Index root attribute missing in directory inode %ld",
+				ndir->mft_no);
 		ntfs_attr_put_search_ctx(ctx);
-	if (m)
-		unmap_mft_record(dir_ni);
-	return ERR_MREF(err);
-dir_err_out:
-	ntfs_error(sb, "Corrupt directory. Aborting lookup.");
-	goto err_out;
-}
+		err = -ENOMEM;
+		goto out;
+	}
 
-#endif
+	/* Get to the index root value. */
+	ir = (struct index_root *)((u8 *)ctx->attr +
+			le16_to_cpu(ctx->attr->data.resident.value_offset));
 
-/**
- * ntfs_filldir - ntfs specific filldir method
- * @vol:	current ntfs volume
- * @ndir:	ntfs inode of current directory
- * @ia_page:	page in which the index allocation buffer @ie is in resides
- * @ie:		current index entry
- * @name:	buffer to use for the converted name
- * @actor:	what to feed the entries to
- *
- * Convert the Unicode @name to the loaded NLS and pass it to the @filldir
- * callback.
- *
- * If @ia_page is not NULL it is the locked page containing the index
- * allocation block containing the index entry @ie.
- *
- * Note, we drop (and then reacquire) the page lock on @ia_page across the
- * @filldir() call otherwise we would deadlock with NFSd when it calls ->lookup
- * since ntfs_lookup() will lock the same page.  As an optimization, we do not
- * retake the lock if we are returning a non-zero value as ntfs_readdir()
- * would need to drop the lock immediately anyway.
- */
-static inline int ntfs_filldir(ntfs_volume *vol,
-		ntfs_inode *ndir, struct page *ia_page, INDEX_ENTRY *ie,
-		u8 *name, struct dir_context *actor)
-{
-	unsigned long mref;
-	int name_len;
-	unsigned dt_type;
-	FILE_NAME_TYPE_FLAGS name_type;
+	ictx->ir = ir;
+	ictx->actx = ctx;
+	ictx->parent_vcn[ictx->pindex] = VCN_INDEX_ROOT_PARENT;
+	ictx->is_in_root = true;
+	ictx->parent_pos[ictx->pindex] = 0;
 
-	name_type = ie->key.file_name.file_name_type;
-	if (name_type == FILE_NAME_DOS) {
-		ntfs_debug("Skipping DOS name space entry.");
-		return 0;
-	}
-	if (MREF_LE(ie->data.dir.indexed_file) == FILE_root) {
-		ntfs_debug("Skipping root directory self reference entry.");
-		return 0;
-	}
-	if (MREF_LE(ie->data.dir.indexed_file) < FILE_first_user &&
-			!NVolShowSystemFiles(vol)) {
-		ntfs_debug("Skipping system file.");
-		return 0;
-	}
-	name_len = ntfs_ucstonls(vol, (ntfschar*)&ie->key.file_name.file_name,
-			ie->key.file_name.file_name_length, &name,
-			NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1);
-	if (name_len <= 0) {
-		ntfs_warning(vol->sb, "Skipping unrepresentable inode 0x%llx.",
-				(long long)MREF_LE(ie->data.dir.indexed_file));
-		return 0;
+	ictx->block_size = le32_to_cpu(ir->index_block_size);
+	if (ictx->block_size < NTFS_BLOCK_SIZE) {
+		ntfs_error(sb, "Index block size (%d) is smaller than the sector size (%d)",
+				ictx->block_size, NTFS_BLOCK_SIZE);
+		err = -EIO;
+		goto out;
 	}
-	if (ie->key.file_name.file_attributes &
-			FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT)
-		dt_type = DT_DIR;
-	else
-		dt_type = DT_REG;
-	mref = MREF_LE(ie->data.dir.indexed_file);
-	/*
-	 * Drop the page lock otherwise we deadlock with NFS when it calls
-	 * ->lookup since ntfs_lookup() will lock the same page.
-	 */
-	if (ia_page)
-		unlock_page(ia_page);
-	ntfs_debug("Calling filldir for %s with len %i, fpos 0x%llx, inode "
-			"0x%lx, DT_%s.", name, name_len, actor->pos, mref,
-			dt_type == DT_DIR ? "DIR" : "REG");
-	if (!dir_emit(actor, name, name_len, mref, dt_type))
-		return 1;
-	/* Relock the page but not if we are aborting ->readdir. */
-	if (ia_page)
-		lock_page(ia_page);
-	return 0;
-}
 
-/*
- * We use the same basic approach as the old NTFS driver, i.e. we parse the
- * index root entries and then the index allocation entries that are marked
- * as in use in the index bitmap.
- *
- * While this will return the names in random order this doesn't matter for
- * ->readdir but OTOH results in a faster ->readdir.
- *
- * VFS calls ->readdir without BKL but with i_mutex held. This protects the VFS
- * parts (e.g. ->f_pos and ->i_size, and it also protects against directory
- * modifications).
- *
- * Locking:  - Caller must hold i_mutex on the directory.
- *	     - Each page cache page in the index allocation mapping must be
- *	       locked whilst being accessed otherwise we may find a corrupt
- *	       page due to it being under ->writepage at the moment which
- *	       applies the mst protection fixups before writing out and then
- *	       removes them again after the write is complete after which it 
- *	       unlocks the page.
- */
-static int ntfs_readdir(struct file *file, struct dir_context *actor)
-{
-	s64 ia_pos, ia_start, prev_ia_pos, bmp_pos;
-	loff_t i_size;
-	struct inode *bmp_vi, *vdir = file_inode(file);
-	struct super_block *sb = vdir->i_sb;
-	ntfs_inode *ndir = NTFS_I(vdir);
-	ntfs_volume *vol = NTFS_SB(sb);
-	MFT_RECORD *m;
-	INDEX_ROOT *ir = NULL;
-	INDEX_ENTRY *ie;
-	INDEX_ALLOCATION *ia;
-	u8 *name = NULL;
-	int rc, err, ir_pos, cur_bmp_pos;
-	struct address_space *ia_mapping, *bmp_mapping;
-	struct page *bmp_page = NULL, *ia_page = NULL;
-	u8 *kaddr, *bmp, *index_end;
-	ntfs_attr_search_ctx *ctx;
+	if (vol->cluster_size <= ictx->block_size)
+		ictx->vcn_size_bits = vol->cluster_size_bits;
+	else
+		ictx->vcn_size_bits = NTFS_BLOCK_SIZE_BITS;
 
-	ntfs_debug("Entering for inode 0x%lx, fpos 0x%llx.",
-			vdir->i_ino, actor->pos);
-	rc = err = 0;
-	/* Are we at end of dir yet? */
-	i_size = i_size_read(vdir);
-	if (actor->pos >= i_size + vol->mft_record_size)
-		return 0;
-	/* Emulate . and .. for all directories. */
-	if (!dir_emit_dots(file, actor))
-		return 0;
-	m = NULL;
-	ctx = NULL;
-	/*
-	 * Allocate a buffer to store the current name being processed
-	 * converted to format determined by current NLS.
-	 */
-	name = kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1, GFP_NOFS);
-	if (unlikely(!name)) {
-		err = -ENOMEM;
-		goto err_out;
-	}
-	/* Are we jumping straight into the index allocation attribute? */
-	if (actor->pos >= vol->mft_record_size)
-		goto skip_index_root;
-	/* Get hold of the mft record for the directory. */
-	m = map_mft_record(ndir);
-	if (IS_ERR(m)) {
-		err = PTR_ERR(m);
-		m = NULL;
-		goto err_out;
-	}
-	ctx = ntfs_attr_get_search_ctx(ndir, m);
-	if (unlikely(!ctx)) {
-		err = -ENOMEM;
-		goto err_out;
-	}
-	/* Get the offset into the index root attribute. */
-	ir_pos = (s64)actor->pos;
-	/* Find the index root attribute in the mft record. */
-	err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL,
-			0, ctx);
-	if (unlikely(err)) {
-		ntfs_error(sb, "Index root attribute missing in directory "
-				"inode 0x%lx.", vdir->i_ino);
-		goto err_out;
-	}
-	/*
-	 * Copy the index root attribute value to a buffer so that we can put
-	 * the search context and unmap the mft record before calling the
-	 * filldir() callback.  We need to do this because of NFSd which calls
-	 * ->lookup() from its filldir callback() and this causes NTFS to
-	 * deadlock as ntfs_lookup() maps the mft record of the directory and
-	 * we have got it mapped here already.  The only solution is for us to
-	 * unmap the mft record here so that a call to ntfs_lookup() is able to
-	 * map the mft record without deadlocking.
-	 */
-	rc = le32_to_cpu(ctx->attr->data.resident.value_length);
-	ir = kmalloc(rc, GFP_NOFS);
-	if (unlikely(!ir)) {
-		err = -ENOMEM;
-		goto err_out;
-	}
-	/* Copy the index root value (it has been verified in read_inode). */
-	memcpy(ir, (u8*)ctx->attr +
-			le16_to_cpu(ctx->attr->data.resident.value_offset), rc);
-	ntfs_attr_put_search_ctx(ctx);
-	unmap_mft_record(ndir);
-	ctx = NULL;
-	m = NULL;
-	index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
 	/* The first index entry. */
-	ie = (INDEX_ENTRY*)((u8*)&ir->index +
+	next = (struct index_entry *)((u8 *)&ir->index +
 			le32_to_cpu(ir->index.entries_offset));
-	/*
-	 * Loop until we exceed valid memory (corruption case) or until we
-	 * reach the last entry or until filldir tells us it has had enough
-	 * or signals an error (both covered by the rc test).
-	 */
-	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
-		ntfs_debug("In index root, offset 0x%zx.", (u8*)ie - (u8*)ir);
-		/* Bounds checks. */
-		if (unlikely((u8*)ie < (u8*)ir || (u8*)ie +
-				sizeof(INDEX_ENTRY_HEADER) > index_end ||
-				(u8*)ie + le16_to_cpu(ie->key_length) >
-				index_end))
-			goto err_out;
-		/* The last entry cannot contain a name. */
-		if (ie->flags & INDEX_ENTRY_END)
-			break;
-		/* Skip index root entry if continuing previous readdir. */
-		if (ir_pos > (u8*)ie - (u8*)ir)
-			continue;
-		/* Advance the position even if going to skip the entry. */
-		actor->pos = (u8*)ie - (u8*)ir;
-		/* Submit the name to the filldir callback. */
-		rc = ntfs_filldir(vol, ndir, NULL, ie, name, actor);
-		if (rc) {
-			kfree(ir);
-			goto abort;
+
+	if (next->flags & INDEX_ENTRY_NODE) {
+		ictx->ia_ni = ntfs_ia_open(ictx, ictx->idx_ni);
+		if (!ictx->ia_ni) {
+			err = -EINVAL;
+			goto out;
 		}
+
+		err = ntfs_ia_blocks_readahead(ictx->ia_ni, actor->pos);
+		if (err)
+			goto out;
 	}
-	/* We are done with the index root and can free the buffer. */
-	kfree(ir);
-	ir = NULL;
-	/* If there is no index allocation attribute we are finished. */
-	if (!NInoIndexAllocPresent(ndir))
-		goto EOD;
-	/* Advance fpos to the beginning of the index allocation. */
-	actor->pos = vol->mft_record_size;
-skip_index_root:
-	kaddr = NULL;
-	prev_ia_pos = -1LL;
-	/* Get the offset into the index allocation attribute. */
-	ia_pos = (s64)actor->pos - vol->mft_record_size;
-	ia_mapping = vdir->i_mapping;
-	ntfs_debug("Inode 0x%lx, getting index bitmap.", vdir->i_ino);
-	bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4);
-	if (IS_ERR(bmp_vi)) {
-		ntfs_error(sb, "Failed to get bitmap attribute.");
-		err = PTR_ERR(bmp_vi);
-		goto err_out;
-	}
-	bmp_mapping = bmp_vi->i_mapping;
-	/* Get the starting bitmap bit position and sanity check it. */
-	bmp_pos = ia_pos >> ndir->itype.index.block_size_bits;
-	if (unlikely(bmp_pos >> 3 >= i_size_read(bmp_vi))) {
-		ntfs_error(sb, "Current index allocation position exceeds "
-				"index bitmap size.");
-		goto iput_err_out;
-	}
-	/* Get the starting bit position in the current bitmap page. */
-	cur_bmp_pos = bmp_pos & ((PAGE_SIZE * 8) - 1);
-	bmp_pos &= ~(u64)((PAGE_SIZE * 8) - 1);
-get_next_bmp_page:
-	ntfs_debug("Reading bitmap with page index 0x%llx, bit ofs 0x%llx",
-			(unsigned long long)bmp_pos >> (3 + PAGE_SHIFT),
-			(unsigned long long)bmp_pos &
-			(unsigned long long)((PAGE_SIZE * 8) - 1));
-	bmp_page = ntfs_map_page(bmp_mapping,
-			bmp_pos >> (3 + PAGE_SHIFT));
-	if (IS_ERR(bmp_page)) {
-		ntfs_error(sb, "Reading index bitmap failed.");
-		err = PTR_ERR(bmp_page);
-		bmp_page = NULL;
-		goto iput_err_out;
-	}
-	bmp = (u8*)page_address(bmp_page);
-	/* Find next index block in use. */
-	while (!(bmp[cur_bmp_pos >> 3] & (1 << (cur_bmp_pos & 7)))) {
-find_next_index_buffer:
-		cur_bmp_pos++;
-		/*
-		 * If we have reached the end of the bitmap page, get the next
-		 * page, and put away the old one.
-		 */
-		if (unlikely((cur_bmp_pos >> 3) >= PAGE_SIZE)) {
-			ntfs_unmap_page(bmp_page);
-			bmp_pos += PAGE_SIZE * 8;
-			cur_bmp_pos = 0;
-			goto get_next_bmp_page;
+
+	if (next->flags & INDEX_ENTRY_NODE) {
+		next = ntfs_index_walk_down(next, ictx);
+		if (!next) {
+			err = -EIO;
+			goto out;
 		}
-		/* If we have reached the end of the bitmap, we are done. */
-		if (unlikely(((bmp_pos + cur_bmp_pos) >> 3) >= i_size))
-			goto unm_EOD;
-		ia_pos = (bmp_pos + cur_bmp_pos) <<
-				ndir->itype.index.block_size_bits;
 	}
-	ntfs_debug("Handling index buffer 0x%llx.",
-			(unsigned long long)bmp_pos + cur_bmp_pos);
-	/* If the current index buffer is in the same page we reuse the page. */
-	if ((prev_ia_pos & (s64)PAGE_MASK) !=
-			(ia_pos & (s64)PAGE_MASK)) {
-		prev_ia_pos = ia_pos;
-		if (likely(ia_page != NULL)) {
-			unlock_page(ia_page);
-			ntfs_unmap_page(ia_page);
+
+	if (next && !(next->flags & INDEX_ENTRY_END))
+		goto nextdir;
+
+	while ((next = ntfs_index_next(next, ictx)) != NULL) {
+nextdir:
+		/* Check the consistency of an index entry */
+		if (ntfs_index_entry_inconsistent(ictx, vol, next, COLLATION_FILE_NAME,
+					ndir->mft_no)) {
+			err = -EIO;
+			goto out;
 		}
-		/*
-		 * Map the page cache page containing the current ia_pos,
-		 * reading it from disk if necessary.
-		 */
-		ia_page = ntfs_map_page(ia_mapping, ia_pos >> PAGE_SHIFT);
-		if (IS_ERR(ia_page)) {
-			ntfs_error(sb, "Reading index allocation data failed.");
-			err = PTR_ERR(ia_page);
-			ia_page = NULL;
-			goto err_out;
+
+		if (ie_pos < actor->pos) {
+			ie_pos += next->length;
+			continue;
 		}
-		lock_page(ia_page);
-		kaddr = (u8*)page_address(ia_page);
-	}
-	/* Get the current index buffer. */
-	ia = (INDEX_ALLOCATION*)(kaddr + (ia_pos & ~PAGE_MASK &
-					  ~(s64)(ndir->itype.index.block_size - 1)));
-	/* Bounds checks. */
-	if (unlikely((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE)) {
-		ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
-				"inode 0x%lx or driver bug.", vdir->i_ino);
-		goto err_out;
-	}
-	/* Catch multi sector transfer fixup errors. */
-	if (unlikely(!ntfs_is_indx_record(ia->magic))) {
-		ntfs_error(sb, "Directory index record with vcn 0x%llx is "
-				"corrupt.  Corrupt inode 0x%lx.  Run chkdsk.",
-				(unsigned long long)ia_pos >>
-				ndir->itype.index.vcn_size_bits, vdir->i_ino);
-		goto err_out;
-	}
-	if (unlikely(sle64_to_cpu(ia->index_block_vcn) != (ia_pos &
-			~(s64)(ndir->itype.index.block_size - 1)) >>
-			ndir->itype.index.vcn_size_bits)) {
-		ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is "
-				"different from expected VCN (0x%llx). "
-				"Directory inode 0x%lx is corrupt or driver "
-				"bug. ", (unsigned long long)
-				sle64_to_cpu(ia->index_block_vcn),
-				(unsigned long long)ia_pos >>
-				ndir->itype.index.vcn_size_bits, vdir->i_ino);
-		goto err_out;
-	}
-	if (unlikely(le32_to_cpu(ia->index.allocated_size) + 0x18 !=
-			ndir->itype.index.block_size)) {
-		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
-				"0x%lx has a size (%u) differing from the "
-				"directory specified size (%u). Directory "
-				"inode is corrupt or driver bug.",
-				(unsigned long long)ia_pos >>
-				ndir->itype.index.vcn_size_bits, vdir->i_ino,
-				le32_to_cpu(ia->index.allocated_size) + 0x18,
-				ndir->itype.index.block_size);
-		goto err_out;
-	}
-	index_end = (u8*)ia + ndir->itype.index.block_size;
-	if (unlikely(index_end > kaddr + PAGE_SIZE)) {
-		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
-				"0x%lx crosses page boundary. Impossible! "
-				"Cannot access! This is probably a bug in the "
-				"driver.", (unsigned long long)ia_pos >>
-				ndir->itype.index.vcn_size_bits, vdir->i_ino);
-		goto err_out;
-	}
-	ia_start = ia_pos & ~(s64)(ndir->itype.index.block_size - 1);
-	index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
-	if (unlikely(index_end > (u8*)ia + ndir->itype.index.block_size)) {
-		ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of directory "
-				"inode 0x%lx exceeds maximum size.",
-				(unsigned long long)ia_pos >>
-				ndir->itype.index.vcn_size_bits, vdir->i_ino);
-		goto err_out;
-	}
-	/* The first index entry in this index buffer. */
-	ie = (INDEX_ENTRY*)((u8*)&ia->index +
-			le32_to_cpu(ia->index.entries_offset));
-	/*
-	 * Loop until we exceed valid memory (corruption case) or until we
-	 * reach the last entry or until filldir tells us it has had enough
-	 * or signals an error (both covered by the rc test).
-	 */
-	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
-		ntfs_debug("In index allocation, offset 0x%llx.",
-				(unsigned long long)ia_start +
-				(unsigned long long)((u8*)ie - (u8*)ia));
-		/* Bounds checks. */
-		if (unlikely((u8*)ie < (u8*)ia || (u8*)ie +
-				sizeof(INDEX_ENTRY_HEADER) > index_end ||
-				(u8*)ie + le16_to_cpu(ie->key_length) >
-				index_end))
-			goto err_out;
-		/* The last entry cannot contain a name. */
-		if (ie->flags & INDEX_ENTRY_END)
+
+		actor->pos = ie_pos;
+
+		index = NTFS_MFT_NR_TO_PIDX(vol, MREF_LE(next->data.dir.indexed_file));
+		if (nir) {
+			struct ntfs_index_ra *cnir;
+			struct rb_node *node = ra_root.rb_node;
+
+			if (nir->start_index <= index &&
+			    index < nir->start_index + nir->count) {
+				/* No behavior */
+				goto filldir;
+			}
+
+			while (node) {
+				cnir = rb_entry(node, struct ntfs_index_ra, rb_node);
+				if (cnir->start_index <= index &&
+				    index < cnir->start_index + cnir->count) {
+					goto filldir;
+				} else if (cnir->start_index + cnir->count == index) {
+					cnir->count++;
+					goto filldir;
+				} else if (!cnir->start_index && cnir->start_index - 1 == index) {
+					cnir->start_index = index;
+					goto filldir;
+				}
+
+				if (index < cnir->start_index)
+					node = node->rb_left;
+				else if (index >= cnir->start_index + cnir->count)
+					node = node->rb_right;
+			}
+
+			if (nir->start_index + nir->count == index) {
+				nir->count++;
+			} else if (!nir->start_index && nir->start_index - 1 == index) {
+				nir->start_index = index;
+			} else if (nir->count > 2) {
+				ntfs_insert_rb(nir, &ra_root);
+				nir = NULL;
+			} else {
+				nir->start_index = index;
+				nir->count = 1;
+			}
+		}
+
+		if (!nir) {
+			nir = kzalloc(sizeof(struct ntfs_index_ra), GFP_KERNEL);
+			if (nir) {
+				nir->start_index = index;
+				nir->count = 1;
+			}
+		}
+
+filldir:
+		/* Submit the name to the filldir callback. */
+		err = ntfs_filldir(vol, ndir, NULL, next, name, actor);
+		if (err) {
+			/*
+			 * Store index key value to file private_data to start
+			 * from current index offset on next round.
+			 */
+			private = file->private_data;
+			kfree(private->key);
+			private->key = kmalloc(le16_to_cpu(next->key_length), GFP_KERNEL);
+			if (!private->key) {
+				err = -ENOMEM;
+				goto out;
+			}
+
+			memcpy(private->key, &next->key.file_name, le16_to_cpu(next->key_length));
+			private->key_length = next->key_length;
 			break;
-		/* Skip index block entry if continuing previous readdir. */
-		if (ia_pos - ia_start > (u8*)ie - (u8*)ia)
-			continue;
-		/* Advance the position even if going to skip the entry. */
-		actor->pos = (u8*)ie - (u8*)ia +
-				(sle64_to_cpu(ia->index_block_vcn) <<
-				ndir->itype.index.vcn_size_bits) +
-				vol->mft_record_size;
-		/*
-		 * Submit the name to the @filldir callback.  Note,
-		 * ntfs_filldir() drops the lock on @ia_page but it retakes it
-		 * before returning, unless a non-zero value is returned in
-		 * which case the page is left unlocked.
-		 */
-		rc = ntfs_filldir(vol, ndir, ia_page, ie, name, actor);
-		if (rc) {
-			/* @ia_page is already unlocked in this case. */
-			ntfs_unmap_page(ia_page);
-			ntfs_unmap_page(bmp_page);
-			iput(bmp_vi);
-			goto abort;
 		}
+		ie_pos += next->length;
 	}
-	goto find_next_index_buffer;
-unm_EOD:
-	if (ia_page) {
-		unlock_page(ia_page);
-		ntfs_unmap_page(ia_page);
-	}
-	ntfs_unmap_page(bmp_page);
-	iput(bmp_vi);
-EOD:
-	/* We are finished, set fpos to EOD. */
-	actor->pos = i_size + vol->mft_record_size;
-abort:
-	kfree(name);
-	return 0;
-err_out:
-	if (bmp_page) {
-		ntfs_unmap_page(bmp_page);
-iput_err_out:
-		iput(bmp_vi);
+
+	if (!err)
+		private->end_in_iterate = true;
+	else
+		err = 0;
+
+	private->curr_pos = actor->pos = ie_pos;
+out:
+	while (!RB_EMPTY_ROOT(&ra_root)) {
+		struct ntfs_index_ra *cnir;
+		struct rb_node *node;
+
+		node = rb_first(&ra_root);
+		cnir = rb_entry(node, struct ntfs_index_ra, rb_node);
+		ra->ra_pages = cnir->count;
+		page_cache_sync_readahead(vol->mft_ino->i_mapping, ra, NULL,
+				cnir->start_index, cnir->count);
+		rb_erase(node, &ra_root);
+		kfree(cnir);
 	}
-	if (ia_page) {
-		unlock_page(ia_page);
-		ntfs_unmap_page(ia_page);
+
+	if (err) {
+		private->curr_pos = actor->pos;
+		private->end_in_iterate = true;
+		err = 0;
 	}
-	kfree(ir);
+	ntfs_index_ctx_put(ictx);
 	kfree(name);
-	if (ctx)
-		ntfs_attr_put_search_ctx(ctx);
-	if (m)
-		unmap_mft_record(ndir);
-	if (!err)
-		err = -EIO;
-	ntfs_debug("Failed. Returning error code %i.", -err);
+	kfree(nir);
+	kfree(ra);
+	mutex_unlock(&ndir->mrec_lock);
 	return err;
 }
 
+int ntfs_check_empty_dir(struct ntfs_inode *ni, struct mft_record *ni_mrec)
+{
+	struct ntfs_attr_search_ctx *ctx;
+	int ret = 0;
+
+	if (!(ni_mrec->flags & MFT_RECORD_IS_DIRECTORY))
+		return 0;
+
+	ctx = ntfs_attr_get_search_ctx(ni, NULL);
+	if (!ctx) {
+		ntfs_error(ni->vol->sb, "Failed to get search context");
+		return -ENOMEM;
+	}
+
+	/* Find the index root attribute in the mft record. */
+	ret = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL,
+				0, ctx);
+	if (ret) {
+		ntfs_error(ni->vol->sb, "Index root attribute missing in directory inode %lld",
+				(unsigned long long)ni->mft_no);
+		ntfs_attr_put_search_ctx(ctx);
+		return ret;
+	}
+
+	/* Non-empty directory? */
+	if (ctx->attr->data.resident.value_length !=
+	    sizeof(struct index_root) + sizeof(struct index_entry_header)) {
+		/* Both ENOTEMPTY and EEXIST are ok. We use the more common. */
+		ret = -ENOTEMPTY;
+		ntfs_debug("Directory is not empty\n");
+	}
+
+	ntfs_attr_put_search_ctx(ctx);
+
+	return ret;
+}
+
 /**
  * ntfs_dir_open - called when an inode is about to be opened
  * @vi:		inode to be opened
@@ -1457,13 +1103,21 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp)
 	return 0;
 }
 
-#ifdef NTFS_RW
+static int ntfs_dir_release(struct inode *vi, struct file *filp)
+{
+	if (filp->private_data) {
+		kfree(((struct ntfs_file_private *)filp->private_data)->key);
+		kfree(filp->private_data);
+		filp->private_data = NULL;
+	}
+	return 0;
+}
 
 /**
  * ntfs_dir_fsync - sync a directory to disk
- * @filp:	directory to be synced
- * @start:	offset in bytes of the beginning of data range to sync
- * @end:	offset in bytes of the end of data range (inclusive)
+ * @filp:	file describing the directory to be synced
+ * @start:	start offset to be synced
+ * @end:	end offset to be synced
  * @datasync:	if non-zero only flush user data and not metadata
  *
  * Data integrity sync of a directory to disk.  Used for fsync, fdatasync, and
@@ -1479,27 +1133,55 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp)
  * anyway.
  *
  * Locking: Caller must hold i_mutex on the inode.
- *
- * TODO: We should probably also write all attribute/index inodes associated
- * with this inode but since we have no simple way of getting to them we ignore
- * this problem for now.  We do write the $BITMAP attribute if it is present
- * which is the important one for a directory so things are not too bad.
  */
 static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
 			  int datasync)
 {
 	struct inode *bmp_vi, *vi = filp->f_mapping->host;
+	struct ntfs_volume *vol = NTFS_I(vi)->vol;
+	struct ntfs_inode *ni = NTFS_I(vi);
+	struct ntfs_attr_search_ctx *ctx;
+	struct inode *parent_vi, *ia_vi;
 	int err, ret;
-	ntfs_attr na;
+	struct ntfs_attr na;
 
 	ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
 
+	if (NVolShutdown(vol))
+		return -EIO;
+
+	ctx = ntfs_attr_get_search_ctx(ni, NULL);
+	if (!ctx)
+		return -ENOMEM;
+
+	mutex_lock_nested(&ni->mrec_lock, NTFS_INODE_MUTEX_NORMAL_2);
+	while (!(err = ntfs_attr_lookup(AT_FILE_NAME, NULL, 0, 0, 0, NULL, 0, ctx))) {
+		struct file_name_attr *fn = (struct file_name_attr *)((u8 *)ctx->attr +
+				le16_to_cpu(ctx->attr->data.resident.value_offset));
+
+		parent_vi = ntfs_iget(vi->i_sb, MREF_LE(fn->parent_directory));
+		if (IS_ERR(parent_vi))
+			continue;
+		mutex_lock_nested(&NTFS_I(parent_vi)->mrec_lock, NTFS_INODE_MUTEX_PARENT_2);
+		ia_vi = ntfs_index_iget(parent_vi, I30, 4);
+		mutex_unlock(&NTFS_I(parent_vi)->mrec_lock);
+		if (IS_ERR(ia_vi)) {
+			iput(parent_vi);
+			continue;
+		}
+		write_inode_now(ia_vi, 1);
+		iput(ia_vi);
+		write_inode_now(parent_vi, 1);
+		iput(parent_vi);
+	}
+	mutex_unlock(&ni->mrec_lock);
+	ntfs_attr_put_search_ctx(ctx);
+
 	err = file_write_and_wait_range(filp, start, end);
 	if (err)
 		return err;
 	inode_lock(vi);
 
-	BUG_ON(!S_ISDIR(vi->i_mode));
 	/* If the bitmap attribute inode is in memory sync it, too. */
 	na.mft_no = vi->i_ino;
 	na.type = AT_BITMAP;
@@ -1507,34 +1189,41 @@ static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
 	na.name_len = 4;
 	bmp_vi = ilookup5(vi->i_sb, vi->i_ino, ntfs_test_inode, &na);
 	if (bmp_vi) {
- 		write_inode_now(bmp_vi, !datasync);
+		write_inode_now(bmp_vi, !datasync);
 		iput(bmp_vi);
 	}
 	ret = __ntfs_write_inode(vi, 1);
+
 	write_inode_now(vi, !datasync);
+
+	write_inode_now(vol->mftbmp_ino, 1);
+	down_write(&vol->lcnbmp_lock);
+	write_inode_now(vol->lcnbmp_ino, 1);
+	up_write(&vol->lcnbmp_lock);
+	write_inode_now(vol->mft_ino, 1);
+
 	err = sync_blockdev(vi->i_sb->s_bdev);
 	if (unlikely(err && !ret))
 		ret = err;
 	if (likely(!ret))
 		ntfs_debug("Done.");
 	else
-		ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx.  Error "
-				"%u.", datasync ? "data" : "", vi->i_ino, -ret);
+		ntfs_warning(vi->i_sb,
+			"Failed to f%ssync inode 0x%lx.  Error %u.",
+			datasync ? "data" : "", vi->i_ino, -ret);
 	inode_unlock(vi);
 	return ret;
 }
 
-#endif /* NTFS_RW */
-
-WRAP_DIR_ITER(ntfs_readdir) // FIXME!
 const struct file_operations ntfs_dir_ops = {
 	.llseek		= generic_file_llseek,	/* Seek inside directory. */
 	.read		= generic_read_dir,	/* Return -EISDIR. */
-	.iterate_shared	= shared_ntfs_readdir,	/* Read directory contents. */
-#ifdef NTFS_RW
+	.iterate_shared	= ntfs_readdir,		/* Read directory contents. */
 	.fsync		= ntfs_dir_fsync,	/* Sync a directory to disk. */
-#endif /* NTFS_RW */
-	/*.ioctl	= ,*/			/* Perform function on the
-						   mounted filesystem. */
 	.open		= ntfs_dir_open,	/* Open directory. */
+	.release	= ntfs_dir_release,
+	.unlocked_ioctl	= ntfsp_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= ntfsp_compat_ioctl,
+#endif
 };
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c
index d46c2c03a032..d22a44dd4b33 100644
--- a/fs/ntfs/index.c
+++ b/fs/ntfs/index.c
@@ -1,217 +1,607 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * index.c - NTFS kernel index handling.  Part of the Linux-NTFS project.
+ * NTFS kernel index handling. Part of the Linux-NTFS project.
  *
  * Copyright (c) 2004-2005 Anton Altaparmakov
+ * Copyright (c) 2025 LG Electronics Co., Ltd.
+ *
+ * Part of this file is based on code from the NTFS-3G project.
+ * and is copyrighted by the respective authors below:
+ * Copyright (c) 2004-2005 Anton Altaparmakov
+ * Copyright (c) 2004-2005 Richard Russon
+ * Copyright (c) 2005-2006 Yura Pakhuchiy
+ * Copyright (c) 2005-2008 Szabolcs Szakacsits
+ * Copyright (c) 2007-2021 Jean-Pierre Andre
  */
 
-#include <linux/slab.h>
-
-#include "aops.h"
 #include "collate.h"
-#include "debug.h"
 #include "index.h"
 #include "ntfs.h"
+#include "malloc.h"
+#include "attrlist.h"
+
+/*
+ * ntfs_index_entry_inconsistent - Check the consistency of an index entry
+ *
+ * Make sure data and key do not overflow from entry.
+ * As a side effect, an entry with zero length is rejected.
+ * This entry must be a full one (no INDEX_ENTRY_END flag), and its
+ * length must have been checked beforehand to not overflow from the
+ * index record.
+ */
+int ntfs_index_entry_inconsistent(struct ntfs_index_context *icx,
+		struct ntfs_volume *vol, const struct index_entry *ie,
+		__le32 collation_rule, u64 inum)
+{
+	if (icx) {
+		struct index_header *ih;
+		u8 *ie_start, *ie_end;
+
+		if (icx->is_in_root)
+			ih = &icx->ir->index;
+		else
+			ih = &icx->ib->index;
+
+		if ((le32_to_cpu(ih->index_length) > le32_to_cpu(ih->allocated_size)) ||
+				(le32_to_cpu(ih->index_length) > icx->block_size)) {
+			ntfs_error(vol->sb, "%s Index entry(0x%p)'s length is too big.",
+					icx->is_in_root ? "Index root" : "Index block",
+					(u8 *)icx->entry);
+			return -EINVAL;
+		}
+
+		ie_start = (u8 *)ih + le32_to_cpu(ih->entries_offset);
+		ie_end = (u8 *)ih + le32_to_cpu(ih->index_length);
+
+		if (ie_start > (u8 *)ie ||
+		    ie_end <= ((u8 *)ie + ie->length) ||
+		    ie->length > le32_to_cpu(ih->allocated_size) ||
+		    ie->length > icx->block_size) {
+			ntfs_error(vol->sb, "Index entry(0x%p) is out of range from %s",
+					(u8 *)icx->entry,
+					icx->is_in_root ? "index root" : "index block");
+			return -EIO;
+		}
+	}
+
+	if (ie->key_length &&
+	    ((le16_to_cpu(ie->key_length) + offsetof(struct index_entry, key)) >
+	     le16_to_cpu(ie->length))) {
+		ntfs_error(vol->sb, "Overflow from index entry in inode %lld\n",
+				(long long)inum);
+		return -EIO;
+
+	} else {
+		if (collation_rule == COLLATION_FILE_NAME) {
+			if ((offsetof(struct index_entry, key.file_name.file_name) +
+			     ie->key.file_name.file_name_length	* sizeof(__le16)) >
+					le16_to_cpu(ie->length)) {
+				ntfs_error(vol->sb,
+					"File name overflow from index entry in inode %lld\n",
+					(long long)inum);
+				return -EIO;
+			}
+		} else {
+			if (ie->data.vi.data_length &&
+			    ((le16_to_cpu(ie->data.vi.data_offset) +
+			      le16_to_cpu(ie->data.vi.data_length)) >
+			     le16_to_cpu(ie->length))) {
+				ntfs_error(vol->sb,
+					"Data overflow from index entry in inode %lld\n",
+					(long long)inum);
+				return -EIO;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ntfs_index_entry_mark_dirty - mark an index entry dirty
+ * @ictx:	ntfs index context describing the index entry
+ *
+ * Mark the index entry described by the index entry context @ictx dirty.
+ *
+ * If the index entry is in the index root attribute, simply mark the inode
+ * containing the index root attribute dirty.  This ensures the mftrecord, and
+ * hence the index root attribute, will be written out to disk later.
+ *
+ * If the index entry is in an index block belonging to the index allocation
+ * attribute, set ib_dirty to true, thus index block will be updated during
+ * ntfs_index_ctx_put.
+ */
+void ntfs_index_entry_mark_dirty(struct ntfs_index_context *ictx)
+{
+	if (ictx->is_in_root)
+		mark_mft_record_dirty(ictx->actx->ntfs_ino);
+	else if (ictx->ib)
+		ictx->ib_dirty = true;
+}
+
+static s64 ntfs_ib_vcn_to_pos(struct ntfs_index_context *icx, s64 vcn)
+{
+	return vcn << icx->vcn_size_bits;
+}
+
+static s64 ntfs_ib_pos_to_vcn(struct ntfs_index_context *icx, s64 pos)
+{
+	return pos >> icx->vcn_size_bits;
+}
+
+static int ntfs_ib_write(struct ntfs_index_context *icx, struct index_block *ib)
+{
+	s64 ret, vcn = le64_to_cpu(ib->index_block_vcn);
+
+	ntfs_debug("vcn: %lld\n", vcn);
+
+	ret = pre_write_mst_fixup((struct ntfs_record *)ib, icx->block_size);
+	if (ret)
+		return -EIO;
+
+	ret = ntfs_inode_attr_pwrite(VFS_I(icx->ia_ni),
+			ntfs_ib_vcn_to_pos(icx, vcn), icx->block_size,
+			(u8 *)ib, icx->sync_write);
+	if (ret != icx->block_size) {
+		ntfs_debug("Failed to write index block %lld, inode %llu",
+				vcn, (unsigned long long)icx->idx_ni->mft_no);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int ntfs_icx_ib_write(struct ntfs_index_context *icx)
+{
+	int err;
+
+	err = ntfs_ib_write(icx, icx->ib);
+	if (err)
+		return err;
+
+	icx->ib_dirty = false;
+
+	return 0;
+}
+
+int ntfs_icx_ib_sync_write(struct ntfs_index_context *icx)
+{
+	int ret;
+
+	if (icx->ib_dirty == false)
+		return 0;
+
+	icx->sync_write = true;
+
+	ret = ntfs_ib_write(icx, icx->ib);
+	if (!ret) {
+		ntfs_free(icx->ib);
+		icx->ib = NULL;
+		icx->ib_dirty = false;
+	} else {
+		post_write_mst_fixup((struct ntfs_record *)icx->ib);
+		icx->sync_write = false;
+	}
+
+	return ret;
+}
 
 /**
  * ntfs_index_ctx_get - allocate and initialize a new index context
- * @idx_ni:	ntfs index inode with which to initialize the context
+ * @ni:		ntfs inode with which to initialize the context
+ * @name:	name of the which context describes
+ * @name_len:	length of the index name
  *
- * Allocate a new index context, initialize it with @idx_ni and return it.
+ * Allocate a new index context, initialize it with @ni and return it.
  * Return NULL if allocation failed.
- *
- * Locking:  Caller must hold i_mutex on the index inode.
  */
-ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni)
+struct ntfs_index_context *ntfs_index_ctx_get(struct ntfs_inode *ni,
+		__le16 *name, u32 name_len)
+{
+	struct ntfs_index_context *icx;
+
+	ntfs_debug("Entering\n");
+
+	if (!ni)
+		return NULL;
+
+	if (ni->nr_extents == -1)
+		ni = ni->ext.base_ntfs_ino;
+
+	icx = kmem_cache_alloc(ntfs_index_ctx_cache, GFP_NOFS);
+	if (icx)
+		*icx = (struct ntfs_index_context) {
+			.idx_ni = ni,
+			.name = name,
+			.name_len = name_len,
+		};
+	return icx;
+}
+
+static void ntfs_index_ctx_free(struct ntfs_index_context *icx)
 {
-	ntfs_index_context *ictx;
+	ntfs_debug("Entering\n");
+
+	if (icx->actx) {
+		ntfs_attr_put_search_ctx(icx->actx);
+		icx->actx = NULL;
+	}
+
+	if (!icx->is_in_root) {
+		if (icx->ib_dirty)
+			ntfs_ib_write(icx, icx->ib);
+		ntfs_free(icx->ib);
+		icx->ib = NULL;
+	}
 
-	ictx = kmem_cache_alloc(ntfs_index_ctx_cache, GFP_NOFS);
-	if (ictx)
-		*ictx = (ntfs_index_context){ .idx_ni = idx_ni };
-	return ictx;
+	if (icx->ia_ni) {
+		iput(VFS_I(icx->ia_ni));
+		icx->ia_ni = NULL;
+	}
 }
 
 /**
  * ntfs_index_ctx_put - release an index context
- * @ictx:	index context to free
+ * @icx:	index context to free
  *
- * Release the index context @ictx, releasing all associated resources.
+ * Release the index context @icx, releasing all associated resources.
+ */
+void ntfs_index_ctx_put(struct ntfs_index_context *icx)
+{
+	ntfs_index_ctx_free(icx);
+	kmem_cache_free(ntfs_index_ctx_cache, icx);
+}
+
+/**
+ * ntfs_index_ctx_reinit - reinitialize an index context
+ * @icx:	index context to reinitialize
  *
- * Locking:  Caller must hold i_mutex on the index inode.
+ * Reinitialize the index context @icx so it can be used for ntfs_index_lookup.
  */
-void ntfs_index_ctx_put(ntfs_index_context *ictx)
+void ntfs_index_ctx_reinit(struct ntfs_index_context *icx)
 {
-	if (ictx->entry) {
-		if (ictx->is_in_root) {
-			if (ictx->actx)
-				ntfs_attr_put_search_ctx(ictx->actx);
-			if (ictx->base_ni)
-				unmap_mft_record(ictx->base_ni);
-		} else {
-			struct page *page = ictx->page;
-			if (page) {
-				BUG_ON(!PageLocked(page));
-				unlock_page(page);
-				ntfs_unmap_page(page);
-			}
-		}
+	ntfs_debug("Entering\n");
+
+	ntfs_index_ctx_free(icx);
+
+	*icx = (struct ntfs_index_context) {
+		.idx_ni = icx->idx_ni,
+		.name = icx->name,
+		.name_len = icx->name_len,
+	};
+}
+
+static __le64 *ntfs_ie_get_vcn_addr(struct index_entry *ie)
+{
+	return (__le64 *)((u8 *)ie + le16_to_cpu(ie->length) - sizeof(s64));
+}
+
+/**
+ *  Get the subnode vcn to which the index entry refers.
+ */
+static s64 ntfs_ie_get_vcn(struct index_entry *ie)
+{
+	return le64_to_cpup(ntfs_ie_get_vcn_addr(ie));
+}
+
+static struct index_entry *ntfs_ie_get_first(struct index_header *ih)
+{
+	return (struct index_entry *)((u8 *)ih + le32_to_cpu(ih->entries_offset));
+}
+
+static struct index_entry *ntfs_ie_get_next(struct index_entry *ie)
+{
+	return (struct index_entry *)((char *)ie + le16_to_cpu(ie->length));
+}
+
+static u8 *ntfs_ie_get_end(struct index_header *ih)
+{
+	return (u8 *)ih + le32_to_cpu(ih->index_length);
+}
+
+static int ntfs_ie_end(struct index_entry *ie)
+{
+	return ie->flags & INDEX_ENTRY_END || !ie->length;
+}
+
+/**
+ *  Find the last entry in the index block
+ */
+static struct index_entry *ntfs_ie_get_last(struct index_entry *ie, char *ies_end)
+{
+	ntfs_debug("Entering\n");
+
+	while ((char *)ie < ies_end && !ntfs_ie_end(ie))
+		ie = ntfs_ie_get_next(ie);
+
+	return ie;
+}
+
+static struct index_entry *ntfs_ie_get_by_pos(struct index_header *ih, int pos)
+{
+	struct index_entry *ie;
+
+	ntfs_debug("pos: %d\n", pos);
+
+	ie = ntfs_ie_get_first(ih);
+
+	while (pos-- > 0)
+		ie = ntfs_ie_get_next(ie);
+
+	return ie;
+}
+
+static struct index_entry *ntfs_ie_prev(struct index_header *ih, struct index_entry *ie)
+{
+	struct index_entry *ie_prev = NULL;
+	struct index_entry *tmp;
+
+	ntfs_debug("Entering\n");
+
+	tmp = ntfs_ie_get_first(ih);
+
+	while (tmp != ie) {
+		ie_prev = tmp;
+		tmp = ntfs_ie_get_next(tmp);
 	}
-	kmem_cache_free(ntfs_index_ctx_cache, ictx);
-	return;
+
+	return ie_prev;
+}
+
+static int ntfs_ih_numof_entries(struct index_header *ih)
+{
+	int n;
+	struct index_entry *ie;
+	u8 *end;
+
+	ntfs_debug("Entering\n");
+
+	end = ntfs_ie_get_end(ih);
+	ie = ntfs_ie_get_first(ih);
+	for (n = 0; !ntfs_ie_end(ie) && (u8 *)ie < end; n++)
+		ie = ntfs_ie_get_next(ie);
+	return n;
+}
+
+static int ntfs_ih_one_entry(struct index_header *ih)
+{
+	return (ntfs_ih_numof_entries(ih) == 1);
+}
+
+static int ntfs_ih_zero_entry(struct index_header *ih)
+{
+	return (ntfs_ih_numof_entries(ih) == 0);
+}
+
+static void ntfs_ie_delete(struct index_header *ih, struct index_entry *ie)
+{
+	u32 new_size;
+
+	ntfs_debug("Entering\n");
+
+	new_size = le32_to_cpu(ih->index_length) - le16_to_cpu(ie->length);
+	ih->index_length = cpu_to_le32(new_size);
+	memmove(ie, (u8 *)ie + le16_to_cpu(ie->length),
+			new_size - ((u8 *)ie - (u8 *)ih));
+}
+
+static void ntfs_ie_set_vcn(struct index_entry *ie, s64 vcn)
+{
+	*ntfs_ie_get_vcn_addr(ie) = cpu_to_le64(vcn);
 }
 
 /**
- * ntfs_index_lookup - find a key in an index and return its index entry
- * @key:	[IN] key for which to search in the index
- * @key_len:	[IN] length of @key in bytes
- * @ictx:	[IN/OUT] context describing the index and the returned entry
- *
- * Before calling ntfs_index_lookup(), @ictx must have been obtained from a
- * call to ntfs_index_ctx_get().
- *
- * Look for the @key in the index specified by the index lookup context @ictx.
- * ntfs_index_lookup() walks the contents of the index looking for the @key.
- *
- * If the @key is found in the index, 0 is returned and @ictx is setup to
- * describe the index entry containing the matching @key.  @ictx->entry is the
- * index entry and @ictx->data and @ictx->data_len are the index entry data and
- * its length in bytes, respectively.
- *
- * If the @key is not found in the index, -ENOENT is returned and @ictx is
- * setup to describe the index entry whose key collates immediately after the
- * search @key, i.e. this is the position in the index at which an index entry
- * with a key of @key would need to be inserted.
+ *  Insert @ie index entry at @pos entry. Used @ih values should be ok already.
+ */
+static void ntfs_ie_insert(struct index_header *ih, struct index_entry *ie,
+		struct index_entry *pos)
+{
+	int ie_size = le16_to_cpu(ie->length);
+
+	ntfs_debug("Entering\n");
+
+	ih->index_length = cpu_to_le32(le32_to_cpu(ih->index_length) + ie_size);
+	memmove((u8 *)pos + ie_size, pos,
+			le32_to_cpu(ih->index_length) - ((u8 *)pos - (u8 *)ih) - ie_size);
+	memcpy(pos, ie, ie_size);
+}
+
+static struct index_entry *ntfs_ie_dup(struct index_entry *ie)
+{
+	struct index_entry *dup;
+
+	ntfs_debug("Entering\n");
+
+	dup = ntfs_malloc_nofs(le16_to_cpu(ie->length));
+	if (dup)
+		memcpy(dup, ie, le16_to_cpu(ie->length));
+
+	return dup;
+}
+
+static struct index_entry *ntfs_ie_dup_novcn(struct index_entry *ie)
+{
+	struct index_entry *dup;
+	int size = le16_to_cpu(ie->length);
+
+	ntfs_debug("Entering\n");
+
+	if (ie->flags & INDEX_ENTRY_NODE)
+		size -= sizeof(s64);
+
+	dup = ntfs_malloc_nofs(size);
+	if (dup) {
+		memcpy(dup, ie, size);
+		dup->flags &= ~INDEX_ENTRY_NODE;
+		dup->length = cpu_to_le16(size);
+	}
+	return dup;
+}
+
+/*
+ * Check the consistency of an index block
  *
- * If an error occurs return the negative error code and @ictx is left
- * untouched.
+ * Make sure the index block does not overflow from the index record.
+ * The size of block is assumed to have been checked to be what is
+ * defined in the index root.
  *
- * When finished with the entry and its data, call ntfs_index_ctx_put() to free
- * the context and other associated resources.
+ * Returns 0 if no error was found -1 otherwise (with errno unchanged)
  *
- * If the index entry was modified, call flush_dcache_index_entry_page()
- * immediately after the modification and either ntfs_index_entry_mark_dirty()
- * or ntfs_index_entry_write() before the call to ntfs_index_ctx_put() to
- * ensure that the changes are written to disk.
+ * |<--->|  offsetof(struct index_block, index)
+ * |     |<--->|  sizeof(struct index_header)
+ * |     |     |
+ * |     |     | seq          index entries         unused
+ * |=====|=====|=====|===========================|==============|
+ * |     |           |                           |              |
+ * |     |<--------->| entries_offset            |              |
+ * |     |<---------------- index_length ------->|              |
+ * |     |<--------------------- allocated_size --------------->|
+ * |<--------------------------- block_size ------------------->|
  *
- * Locking:  - Caller must hold i_mutex on the index inode.
- *	     - Each page cache page in the index allocation mapping must be
- *	       locked whilst being accessed otherwise we may find a corrupt
- *	       page due to it being under ->writepage at the moment which
- *	       applies the mst protection fixups before writing out and then
- *	       removes them again after the write is complete after which it 
- *	       unlocks the page.
+ * size(struct index_header) <= ent_offset < ind_length <= alloc_size < bk_size
  */
-int ntfs_index_lookup(const void *key, const int key_len,
-		ntfs_index_context *ictx)
-{
-	VCN vcn, old_vcn;
-	ntfs_inode *idx_ni = ictx->idx_ni;
-	ntfs_volume *vol = idx_ni->vol;
-	struct super_block *sb = vol->sb;
-	ntfs_inode *base_ni = idx_ni->ext.base_ntfs_ino;
-	MFT_RECORD *m;
-	INDEX_ROOT *ir;
-	INDEX_ENTRY *ie;
-	INDEX_ALLOCATION *ia;
-	u8 *index_end, *kaddr;
-	ntfs_attr_search_ctx *actx;
-	struct address_space *ia_mapping;
-	struct page *page;
-	int rc, err = 0;
-
-	ntfs_debug("Entering.");
-	BUG_ON(!NInoAttr(idx_ni));
-	BUG_ON(idx_ni->type != AT_INDEX_ALLOCATION);
-	BUG_ON(idx_ni->nr_extents != -1);
-	BUG_ON(!base_ni);
-	BUG_ON(!key);
-	BUG_ON(key_len <= 0);
-	if (!ntfs_is_collation_rule_supported(
-			idx_ni->itype.index.collation_rule)) {
-		ntfs_error(sb, "Index uses unsupported collation rule 0x%x.  "
-				"Aborting lookup.", le32_to_cpu(
-				idx_ni->itype.index.collation_rule));
-		return -EOPNOTSUPP;
+static int ntfs_index_block_inconsistent(struct ntfs_index_context *icx,
+		struct index_block *ib, s64 vcn)
+{
+	u32 ib_size = (unsigned int)le32_to_cpu(ib->index.allocated_size) +
+		offsetof(struct index_block, index);
+	struct super_block *sb = icx->idx_ni->vol->sb;
+	unsigned long long inum = icx->idx_ni->mft_no;
+
+	ntfs_debug("Entering\n");
+
+	if (!ntfs_is_indx_record(ib->magic)) {
+
+		ntfs_error(sb, "Corrupt index block signature: vcn %lld inode %llu\n",
+				vcn, (unsigned long long)icx->idx_ni->mft_no);
+		return -1;
+	}
+
+	if (le64_to_cpu(ib->index_block_vcn) != vcn) {
+		ntfs_error(sb,
+			"Corrupt index block: s64 (%lld) is different from expected s64 (%lld) in inode %llu\n",
+			(long long)le64_to_cpu(ib->index_block_vcn),
+			vcn, inum);
+		return -1;
 	}
-	/* Get hold of the mft record for the index inode. */
-	m = map_mft_record(base_ni);
-	if (IS_ERR(m)) {
-		ntfs_error(sb, "map_mft_record() failed with error code %ld.",
-				-PTR_ERR(m));
-		return PTR_ERR(m);
+
+	if (ib_size != icx->block_size) {
+		ntfs_error(sb,
+			"Corrupt index block : s64 (%lld) of inode %llu has a size (%u) differing from the index specified size (%u)\n",
+			vcn, inum, ib_size, icx->block_size);
+		return -1;
 	}
-	actx = ntfs_attr_get_search_ctx(base_ni, m);
-	if (unlikely(!actx)) {
-		err = -ENOMEM;
+
+	if (le32_to_cpu(ib->index.entries_offset) < sizeof(struct index_header)) {
+		ntfs_error(sb, "Invalid index entry offset in inode %lld\n", inum);
+		return -1;
+	}
+	if (le32_to_cpu(ib->index.index_length) <=
+	    le32_to_cpu(ib->index.entries_offset)) {
+		ntfs_error(sb, "No space for index entries in inode %lld\n", inum);
+		return -1;
+	}
+	if (le32_to_cpu(ib->index.allocated_size) <
+	    le32_to_cpu(ib->index.index_length)) {
+		ntfs_error(sb, "Index entries overflow in inode %lld\n", inum);
+		return -1;
+	}
+
+	return 0;
+}
+
+static struct index_root *ntfs_ir_lookup(struct ntfs_inode *ni, __le16 *name,
+		u32 name_len, struct ntfs_attr_search_ctx **ctx)
+{
+	struct attr_record *a;
+	struct index_root *ir = NULL;
+
+	ntfs_debug("Entering\n");
+	*ctx = ntfs_attr_get_search_ctx(ni, NULL);
+	if (!*ctx) {
+		ntfs_error(ni->vol->sb, "%s, Failed to get search context", __func__);
+		return NULL;
+	}
+
+	if (ntfs_attr_lookup(AT_INDEX_ROOT, name, name_len, CASE_SENSITIVE,
+				0, NULL, 0, *ctx)) {
+		ntfs_error(ni->vol->sb, "Failed to lookup $INDEX_ROOT");
 		goto err_out;
 	}
-	/* Find the index root attribute in the mft record. */
-	err = ntfs_attr_lookup(AT_INDEX_ROOT, idx_ni->name, idx_ni->name_len,
-			CASE_SENSITIVE, 0, NULL, 0, actx);
-	if (unlikely(err)) {
-		if (err == -ENOENT) {
-			ntfs_error(sb, "Index root attribute missing in inode "
-					"0x%lx.", idx_ni->mft_no);
-			err = -EIO;
-		}
+
+	a = (*ctx)->attr;
+	if (a->non_resident) {
+		ntfs_error(ni->vol->sb, "Non-resident $INDEX_ROOT detected");
 		goto err_out;
 	}
-	/* Get to the index root value (it has been verified in read_inode). */
-	ir = (INDEX_ROOT*)((u8*)actx->attr +
-			le16_to_cpu(actx->attr->data.resident.value_offset));
-	index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
-	/* The first index entry. */
-	ie = (INDEX_ENTRY*)((u8*)&ir->index +
-			le32_to_cpu(ir->index.entries_offset));
+
+	ir = (struct index_root *)((char *)a + le16_to_cpu(a->data.resident.value_offset));
+err_out:
+	if (!ir) {
+		ntfs_attr_put_search_ctx(*ctx);
+		*ctx = NULL;
+	}
+	return ir;
+}
+
+static struct index_root *ntfs_ir_lookup2(struct ntfs_inode *ni, __le16 *name, u32 len)
+{
+	struct ntfs_attr_search_ctx *ctx;
+	struct index_root *ir;
+
+	ir = ntfs_ir_lookup(ni, name, len, &ctx);
+	if (ir)
+		ntfs_attr_put_search_ctx(ctx);
+	return ir;
+}
+
+/**
+ * Find a key in the index block.
+ */
+static int ntfs_ie_lookup(const void *key, const int key_len,
+		struct ntfs_index_context *icx, struct index_header *ih,
+		s64 *vcn, struct index_entry **ie_out)
+{
+	struct index_entry *ie;
+	u8 *index_end;
+	int rc, item = 0;
+
+	ntfs_debug("Entering\n");
+
+	index_end = ntfs_ie_get_end(ih);
+
 	/*
 	 * Loop until we exceed valid memory (corruption case) or until we
 	 * reach the last entry.
 	 */
-	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
+	for (ie = ntfs_ie_get_first(ih); ; ie = ntfs_ie_get_next(ie)) {
 		/* Bounds checks. */
-		if ((u8*)ie < (u8*)actx->mrec || (u8*)ie +
-				sizeof(INDEX_ENTRY_HEADER) > index_end ||
-				(u8*)ie + le16_to_cpu(ie->length) > index_end)
-			goto idx_err_out;
+		if ((u8 *)ie + sizeof(struct index_entry_header) > index_end ||
+				(u8 *)ie + le16_to_cpu(ie->length) > index_end) {
+			ntfs_error(icx->idx_ni->vol->sb,
+					"Index entry out of bounds in inode %llu.\n",
+					(unsigned long long)icx->idx_ni->mft_no);
+			return -ERANGE;
+		}
+
 		/*
 		 * The last entry cannot contain a key.  It can however contain
 		 * a pointer to a child node in the B+tree so we just break out.
 		 */
-		if (ie->flags & INDEX_ENTRY_END)
+		if (ntfs_ie_end(ie))
 			break;
-		/* Further bounds checks. */
-		if ((u32)sizeof(INDEX_ENTRY_HEADER) +
-				le16_to_cpu(ie->key_length) >
-				le16_to_cpu(ie->data.vi.data_offset) ||
-				(u32)le16_to_cpu(ie->data.vi.data_offset) +
-				le16_to_cpu(ie->data.vi.data_length) >
-				le16_to_cpu(ie->length))
-			goto idx_err_out;
-		/* If the keys match perfectly, we setup @ictx and return 0. */
-		if ((key_len == le16_to_cpu(ie->key_length)) && !memcmp(key,
-				&ie->key, key_len)) {
-ir_done:
-			ictx->is_in_root = true;
-			ictx->ir = ir;
-			ictx->actx = actx;
-			ictx->base_ni = base_ni;
-			ictx->ia = NULL;
-			ictx->page = NULL;
-done:
-			ictx->entry = ie;
-			ictx->data = (u8*)ie +
-					le16_to_cpu(ie->data.vi.data_offset);
-			ictx->data_len = le16_to_cpu(ie->data.vi.data_length);
-			ntfs_debug("Done.");
-			return err;
-		}
+
 		/*
 		 * Not a perfect match, need to do full blown collation so we
 		 * know which way in the B+tree we have to go.
 		 */
-		rc = ntfs_collate(vol, idx_ni->itype.index.collation_rule, key,
-				key_len, &ie->key, le16_to_cpu(ie->key_length));
+		rc = ntfs_collate(icx->idx_ni->vol, icx->cr, key, key_len, &ie->key,
+				le16_to_cpu(ie->key_length));
+		if (rc == -2) {
+			ntfs_error(icx->idx_ni->vol->sb,
+				"Collation error. Perhaps a filename contains invalid characters?\n");
+			return -ERANGE;
+		}
 		/*
 		 * If @key collates before the key of the current entry, there
 		 * is definitely no such key in this index but we might need to
@@ -219,222 +609,1507 @@ int ntfs_index_lookup(const void *key, const int key_len,
 		 */
 		if (rc == -1)
 			break;
-		/*
-		 * A match should never happen as the memcmp() call should have
-		 * cought it, but we still treat it correctly.
-		 */
-		if (!rc)
-			goto ir_done;
-		/* The keys are not equal, continue the search. */
+
+		if (!rc) {
+			*ie_out = ie;
+			icx->parent_pos[icx->pindex] = item;
+			return 0;
+		}
+
+		item++;
 	}
 	/*
-	 * We have finished with this index without success.  Check for the
-	 * presence of a child node and if not present setup @ictx and return
-	 * -ENOENT.
+	 * We have finished with this index block without success. Check for the
+	 * presence of a child node and if not present return with errno ENOENT,
+	 * otherwise we will keep searching in another index block.
 	 */
 	if (!(ie->flags & INDEX_ENTRY_NODE)) {
-		ntfs_debug("Entry not found.");
-		err = -ENOENT;
-		goto ir_done;
-	} /* Child node present, descend into it. */
-	/* Consistency check: Verify that an index allocation exists. */
-	if (!NInoIndexAllocPresent(idx_ni)) {
-		ntfs_error(sb, "No index allocation attribute but index entry "
-				"requires one.  Inode 0x%lx is corrupt or "
-				"driver bug.", idx_ni->mft_no);
-		goto err_out;
+		ntfs_debug("Index entry wasn't found.\n");
+		*ie_out = ie;
+		return -ENOENT;
 	}
+
 	/* Get the starting vcn of the index_block holding the child node. */
-	vcn = sle64_to_cpup((sle64*)((u8*)ie + le16_to_cpu(ie->length) - 8));
-	ia_mapping = VFS_I(idx_ni)->i_mapping;
-	/*
-	 * We are done with the index root and the mft record.  Release them,
-	 * otherwise we deadlock with ntfs_map_page().
-	 */
-	ntfs_attr_put_search_ctx(actx);
-	unmap_mft_record(base_ni);
-	m = NULL;
-	actx = NULL;
-descend_into_child_node:
-	/*
-	 * Convert vcn to index into the index allocation attribute in units
-	 * of PAGE_SIZE and map the page cache page, reading it from
-	 * disk if necessary.
-	 */
-	page = ntfs_map_page(ia_mapping, vcn <<
-			idx_ni->itype.index.vcn_size_bits >> PAGE_SHIFT);
-	if (IS_ERR(page)) {
-		ntfs_error(sb, "Failed to map index page, error %ld.",
-				-PTR_ERR(page));
-		err = PTR_ERR(page);
-		goto err_out;
+	*vcn = ntfs_ie_get_vcn(ie);
+	if (*vcn < 0) {
+		ntfs_error(icx->idx_ni->vol->sb, "Negative vcn in inode %llu\n",
+				(unsigned long long)icx->idx_ni->mft_no);
+		return -EINVAL;
 	}
-	lock_page(page);
-	kaddr = (u8*)page_address(page);
-fast_descend_into_child_node:
-	/* Get to the index allocation block. */
-	ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
-			idx_ni->itype.index.vcn_size_bits) & ~PAGE_MASK));
-	/* Bounds checks. */
-	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE) {
-		ntfs_error(sb, "Out of bounds check failed.  Corrupt inode "
-				"0x%lx or driver bug.", idx_ni->mft_no);
-		goto unm_err_out;
-	}
-	/* Catch multi sector transfer fixup errors. */
-	if (unlikely(!ntfs_is_indx_record(ia->magic))) {
-		ntfs_error(sb, "Index record with vcn 0x%llx is corrupt.  "
-				"Corrupt inode 0x%lx.  Run chkdsk.",
-				(long long)vcn, idx_ni->mft_no);
-		goto unm_err_out;
-	}
-	if (sle64_to_cpu(ia->index_block_vcn) != vcn) {
-		ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is "
-				"different from expected VCN (0x%llx).  Inode "
-				"0x%lx is corrupt or driver bug.",
-				(unsigned long long)
-				sle64_to_cpu(ia->index_block_vcn),
-				(unsigned long long)vcn, idx_ni->mft_no);
-		goto unm_err_out;
-	}
-	if (le32_to_cpu(ia->index.allocated_size) + 0x18 !=
-			idx_ni->itype.index.block_size) {
-		ntfs_error(sb, "Index buffer (VCN 0x%llx) of inode 0x%lx has "
-				"a size (%u) differing from the index "
-				"specified size (%u).  Inode is corrupt or "
-				"driver bug.", (unsigned long long)vcn,
-				idx_ni->mft_no,
-				le32_to_cpu(ia->index.allocated_size) + 0x18,
-				idx_ni->itype.index.block_size);
-		goto unm_err_out;
-	}
-	index_end = (u8*)ia + idx_ni->itype.index.block_size;
-	if (index_end > kaddr + PAGE_SIZE) {
-		ntfs_error(sb, "Index buffer (VCN 0x%llx) of inode 0x%lx "
-				"crosses page boundary.  Impossible!  Cannot "
-				"access!  This is probably a bug in the "
-				"driver.", (unsigned long long)vcn,
-				idx_ni->mft_no);
-		goto unm_err_out;
-	}
-	index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
-	if (index_end > (u8*)ia + idx_ni->itype.index.block_size) {
-		ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of inode "
-				"0x%lx exceeds maximum size.",
-				(unsigned long long)vcn, idx_ni->mft_no);
-		goto unm_err_out;
-	}
-	/* The first index entry. */
-	ie = (INDEX_ENTRY*)((u8*)&ia->index +
-			le32_to_cpu(ia->index.entries_offset));
-	/*
-	 * Iterate similar to above big loop but applied to index buffer, thus
-	 * loop until we exceed valid memory (corruption case) or until we
-	 * reach the last entry.
-	 */
-	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
-		/* Bounds checks. */
-		if ((u8*)ie < (u8*)ia || (u8*)ie +
-				sizeof(INDEX_ENTRY_HEADER) > index_end ||
-				(u8*)ie + le16_to_cpu(ie->length) > index_end) {
-			ntfs_error(sb, "Index entry out of bounds in inode "
-					"0x%lx.", idx_ni->mft_no);
-			goto unm_err_out;
-		}
-		/*
-		 * The last entry cannot contain a key.  It can however contain
-		 * a pointer to a child node in the B+tree so we just break out.
-		 */
-		if (ie->flags & INDEX_ENTRY_END)
-			break;
-		/* Further bounds checks. */
-		if ((u32)sizeof(INDEX_ENTRY_HEADER) +
-				le16_to_cpu(ie->key_length) >
-				le16_to_cpu(ie->data.vi.data_offset) ||
-				(u32)le16_to_cpu(ie->data.vi.data_offset) +
-				le16_to_cpu(ie->data.vi.data_length) >
-				le16_to_cpu(ie->length)) {
-			ntfs_error(sb, "Index entry out of bounds in inode "
-					"0x%lx.", idx_ni->mft_no);
-			goto unm_err_out;
-		}
-		/* If the keys match perfectly, we setup @ictx and return 0. */
-		if ((key_len == le16_to_cpu(ie->key_length)) && !memcmp(key,
-				&ie->key, key_len)) {
-ia_done:
-			ictx->is_in_root = false;
-			ictx->actx = NULL;
-			ictx->base_ni = NULL;
-			ictx->ia = ia;
-			ictx->page = page;
-			goto done;
-		}
-		/*
-		 * Not a perfect match, need to do full blown collation so we
-		 * know which way in the B+tree we have to go.
-		 */
-		rc = ntfs_collate(vol, idx_ni->itype.index.collation_rule, key,
-				key_len, &ie->key, le16_to_cpu(ie->key_length));
-		/*
-		 * If @key collates before the key of the current entry, there
-		 * is definitely no such key in this index but we might need to
-		 * descend into the B+tree so we just break out of the loop.
-		 */
-		if (rc == -1)
-			break;
-		/*
-		 * A match should never happen as the memcmp() call should have
-		 * cought it, but we still treat it correctly.
-		 */
-		if (!rc)
-			goto ia_done;
-		/* The keys are not equal, continue the search. */
+
+	ntfs_debug("Parent entry number %d\n", item);
+	icx->parent_pos[icx->pindex] = item;
+
+	return -EAGAIN;
+}
+
+struct ntfs_inode *ntfs_ia_open(struct ntfs_index_context *icx, struct ntfs_inode *ni)
+{
+	struct inode *ia_vi;
+
+	ia_vi = ntfs_index_iget(VFS_I(ni), icx->name, icx->name_len);
+	if (IS_ERR(ia_vi)) {
+		ntfs_error(icx->idx_ni->vol->sb,
+				"Failed to open index allocation of inode %llu",
+				(unsigned long long)ni->mft_no);
+		return NULL;
 	}
-	/*
-	 * We have finished with this index buffer without success.  Check for
-	 * the presence of a child node and if not present return -ENOENT.
-	 */
-	if (!(ie->flags & INDEX_ENTRY_NODE)) {
-		ntfs_debug("Entry not found.");
-		err = -ENOENT;
-		goto ia_done;
+
+	return NTFS_I(ia_vi);
+}
+
+static int ntfs_ib_read(struct ntfs_index_context *icx, s64 vcn, struct index_block *dst)
+{
+	s64 pos, ret;
+
+	ntfs_debug("vcn: %lld\n", vcn);
+
+	pos = ntfs_ib_vcn_to_pos(icx, vcn);
+
+	ret = ntfs_inode_attr_pread(VFS_I(icx->ia_ni), pos, icx->block_size, (u8 *)dst);
+	if (ret != icx->block_size) {
+		if (ret == -1)
+			ntfs_error(icx->idx_ni->vol->sb, "Failed to read index block");
+		else
+			ntfs_error(icx->idx_ni->vol->sb,
+				"Failed to read full index block at %lld\n", pos);
+		return -1;
+	}
+
+	post_read_mst_fixup((struct ntfs_record *)((u8 *)dst), icx->block_size);
+	if (ntfs_index_block_inconsistent(icx, dst, vcn))
+		return -1;
+
+	return 0;
+}
+
+static int ntfs_icx_parent_inc(struct ntfs_index_context *icx)
+{
+	icx->pindex++;
+	if (icx->pindex >= MAX_PARENT_VCN) {
+		ntfs_error(icx->idx_ni->vol->sb, "Index is over %d level deep", MAX_PARENT_VCN);
+		return -EOPNOTSUPP;
 	}
-	if ((ia->index.flags & NODE_MASK) == LEAF_NODE) {
-		ntfs_error(sb, "Index entry with child node found in a leaf "
-				"node in inode 0x%lx.", idx_ni->mft_no);
-		goto unm_err_out;
+	return 0;
+}
+
+static int ntfs_icx_parent_dec(struct ntfs_index_context *icx)
+{
+	icx->pindex--;
+	if (icx->pindex < 0) {
+		ntfs_error(icx->idx_ni->vol->sb, "Corrupt index pointer (%d)", icx->pindex);
+		return -EINVAL;
 	}
+	return 0;
+}
+
+/**
+ * ntfs_index_lookup - find a key in an index and return its index entry
+ * @key:	key for which to search in the index
+ * @key_len:	length of @key in bytes
+ * @icx:	context describing the index and the returned entry
+ *
+ * Before calling ntfs_index_lookup(), @icx must have been obtained from a
+ * call to ntfs_index_ctx_get().
+ *
+ * Look for the @key in the index specified by the index lookup context @icx.
+ * ntfs_index_lookup() walks the contents of the index looking for the @key.
+ *
+ * If the @key is found in the index, 0 is returned and @icx is setup to
+ * describe the index entry containing the matching @key.  @icx->entry is the
+ * index entry and @icx->data and @icx->data_len are the index entry data and
+ * its length in bytes, respectively.
+ *
+ * If the @key is not found in the index, -ENOENT is returned and
+ * @icx is setup to describe the index entry whose key collates immediately
+ * after the search @key, i.e. this is the position in the index at which
+ * an index entry with a key of @key would need to be inserted.
+ *
+ * When finished with the entry and its data, call ntfs_index_ctx_put() to free
+ * the context and other associated resources.
+ *
+ * If the index entry was modified, call ntfs_index_entry_mark_dirty() before
+ * the call to ntfs_index_ctx_put() to ensure that the changes are written
+ * to disk.
+ */
+int ntfs_index_lookup(const void *key, const int key_len, struct ntfs_index_context *icx)
+{
+	s64 old_vcn, vcn;
+	struct ntfs_inode *ni = icx->idx_ni;
+	struct super_block *sb = ni->vol->sb;
+	struct index_root *ir;
+	struct index_entry *ie;
+	struct index_block *ib = NULL;
+	int err = 0;
+
+	ntfs_debug("Entering\n");
+
+	if (!key || key_len <= 0) {
+		ntfs_error(sb, "key: %p  key_len: %d", key, key_len);
+		return -EINVAL;
+	}
+
+	ir = ntfs_ir_lookup(ni, icx->name, icx->name_len, &icx->actx);
+	if (!ir)
+		return -EIO;
+
+	icx->block_size = le32_to_cpu(ir->index_block_size);
+	if (icx->block_size < NTFS_BLOCK_SIZE) {
+		err = -EINVAL;
+		ntfs_error(sb,
+			"Index block size (%d) is smaller than the sector size (%d)",
+			icx->block_size, NTFS_BLOCK_SIZE);
+		goto err_out;
+	}
+
+	if (ni->vol->cluster_size <= icx->block_size)
+		icx->vcn_size_bits = ni->vol->cluster_size_bits;
+	else
+		icx->vcn_size_bits = ni->vol->sector_size_bits;
+
+	icx->cr = ir->collation_rule;
+	if (!ntfs_is_collation_rule_supported(icx->cr)) {
+		err = -EOPNOTSUPP;
+		ntfs_error(sb, "Unknown collation rule 0x%x",
+				(unsigned int)le32_to_cpu(icx->cr));
+		goto err_out;
+	}
+
+	old_vcn = VCN_INDEX_ROOT_PARENT;
+	err = ntfs_ie_lookup(key, key_len, icx, &ir->index, &vcn, &ie);
+	if (err == -ERANGE || err == -EINVAL)
+		goto err_out;
+
+	icx->ir = ir;
+	if (err != -EAGAIN) {
+		icx->is_in_root = true;
+		icx->parent_vcn[icx->pindex] = old_vcn;
+		goto done;
+	}
+
 	/* Child node present, descend into it. */
+	icx->ia_ni = ntfs_ia_open(icx, ni);
+	if (!icx->ia_ni) {
+		err = -ENOENT;
+		goto err_out;
+	}
+
+	ib = ntfs_malloc_nofs(icx->block_size);
+	if (!ib) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+descend_into_child_node:
+	icx->parent_vcn[icx->pindex] = old_vcn;
+	if (ntfs_icx_parent_inc(icx)) {
+		err = -EIO;
+		goto err_out;
+	}
 	old_vcn = vcn;
-	vcn = sle64_to_cpup((sle64*)((u8*)ie + le16_to_cpu(ie->length) - 8));
-	if (vcn >= 0) {
-		/*
-		 * If vcn is in the same page cache page as old_vcn we recycle
-		 * the mapped page.
-		 */
-		if (old_vcn << vol->cluster_size_bits >>
-				PAGE_SHIFT == vcn <<
-				vol->cluster_size_bits >>
-				PAGE_SHIFT)
-			goto fast_descend_into_child_node;
-		unlock_page(page);
-		ntfs_unmap_page(page);
-		goto descend_into_child_node;
-	}
-	ntfs_error(sb, "Negative child node vcn in inode 0x%lx.",
-			idx_ni->mft_no);
-unm_err_out:
-	unlock_page(page);
-	ntfs_unmap_page(page);
+
+	ntfs_debug("Descend into node with s64 %lld.\n", vcn);
+
+	if (ntfs_ib_read(icx, vcn, ib)) {
+		err = -EIO;
+		goto err_out;
+	}
+	err = ntfs_ie_lookup(key, key_len, icx, &ib->index, &vcn, &ie);
+	if (err != -EAGAIN) {
+		if (err == -EINVAL || err == -ERANGE)
+			goto err_out;
+
+		icx->is_in_root = false;
+		icx->ib = ib;
+		icx->parent_vcn[icx->pindex] = vcn;
+		goto done;
+	}
+
+	if ((ib->index.flags & NODE_MASK) == LEAF_NODE) {
+		ntfs_error(icx->idx_ni->vol->sb,
+			"Index entry with child node found in a leaf node in inode 0x%llx.\n",
+			(unsigned long long)ni->mft_no);
+		goto err_out;
+	}
+
+	goto descend_into_child_node;
 err_out:
+	if (icx->actx) {
+		ntfs_attr_put_search_ctx(icx->actx);
+		icx->actx = NULL;
+	}
+	ntfs_free(ib);
 	if (!err)
 		err = -EIO;
-	if (actx)
-		ntfs_attr_put_search_ctx(actx);
-	if (m)
-		unmap_mft_record(base_ni);
 	return err;
-idx_err_out:
-	ntfs_error(sb, "Corrupt index.  Aborting lookup.");
+done:
+	icx->entry = ie;
+	icx->data = (u8 *)ie + offsetof(struct index_entry, key);
+	icx->data_len = le16_to_cpu(ie->key_length);
+	ntfs_debug("Done.\n");
+	return err;
+
+}
+
+static struct index_block *ntfs_ib_alloc(s64 ib_vcn, u32 ib_size,
+		u8 node_type)
+{
+	struct index_block *ib;
+	int ih_size = sizeof(struct index_header);
+
+	ntfs_debug("Entering ib_vcn = %lld ib_size = %u\n", ib_vcn, ib_size);
+
+	ib = ntfs_malloc_nofs(ib_size);
+	if (!ib)
+		return NULL;
+
+	ib->magic = magic_INDX;
+	ib->usa_ofs = cpu_to_le16(sizeof(struct index_block));
+	ib->usa_count = cpu_to_le16(ib_size / NTFS_BLOCK_SIZE + 1);
+	/* Set USN to 1 */
+	*(__le16 *)((char *)ib + le16_to_cpu(ib->usa_ofs)) = cpu_to_le16(1);
+	ib->lsn = 0;
+	ib->index_block_vcn = cpu_to_le64(ib_vcn);
+	ib->index.entries_offset = cpu_to_le32((ih_size +
+				le16_to_cpu(ib->usa_count) * 2 + 7) & ~7);
+	ib->index.index_length = 0;
+	ib->index.allocated_size = cpu_to_le32(ib_size -
+			(sizeof(struct index_block) - ih_size));
+	ib->index.flags = node_type;
+
+	return ib;
+}
+
+/**
+ *  Find the median by going through all the entries
+ */
+static struct index_entry *ntfs_ie_get_median(struct index_header *ih)
+{
+	struct index_entry *ie, *ie_start;
+	u8 *ie_end;
+	int i = 0, median;
+
+	ntfs_debug("Entering\n");
+
+	ie = ie_start = ntfs_ie_get_first(ih);
+	ie_end = (u8 *)ntfs_ie_get_end(ih);
+
+	while ((u8 *)ie < ie_end && !ntfs_ie_end(ie)) {
+		ie = ntfs_ie_get_next(ie);
+		i++;
+	}
+	/*
+	 * NOTE: this could be also the entry at the half of the index block.
+	 */
+	median = i / 2 - 1;
+
+	ntfs_debug("Entries: %d  median: %d\n", i, median);
+
+	for (i = 0, ie = ie_start; i <= median; i++)
+		ie = ntfs_ie_get_next(ie);
+
+	return ie;
+}
+
+static u64 ntfs_ibm_vcn_to_pos(struct ntfs_index_context *icx, s64 vcn)
+{
+	u64 pos = ntfs_ib_vcn_to_pos(icx, vcn);
+
+	do_div(pos, icx->block_size);
+	return pos;
+}
+
+static s64 ntfs_ibm_pos_to_vcn(struct ntfs_index_context *icx, s64 pos)
+{
+	return ntfs_ib_pos_to_vcn(icx, pos * icx->block_size);
+}
+
+static int ntfs_ibm_add(struct ntfs_index_context *icx)
+{
+	u8 bmp[8];
+
+	ntfs_debug("Entering\n");
+
+	if (ntfs_attr_exist(icx->idx_ni, AT_BITMAP, icx->name, icx->name_len))
+		return 0;
+	/*
+	 * AT_BITMAP must be at least 8 bytes.
+	 */
+	memset(bmp, 0, sizeof(bmp));
+	if (ntfs_attr_add(icx->idx_ni, AT_BITMAP, icx->name, icx->name_len,
+				bmp, sizeof(bmp))) {
+		ntfs_error(icx->idx_ni->vol->sb, "Failed to add AT_BITMAP");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ntfs_ibm_modify(struct ntfs_index_context *icx, s64 vcn, int set)
+{
+	u8 byte;
+	u64 pos = ntfs_ibm_vcn_to_pos(icx, vcn);
+	u32 bpos = pos / 8;
+	u32 bit = 1 << (pos % 8);
+	struct ntfs_inode *bmp_ni;
+	struct inode *bmp_vi;
+	int ret = 0;
+
+	ntfs_debug("%s vcn: %lld\n", set ? "set" : "clear", vcn);
+
+	bmp_vi = ntfs_attr_iget(VFS_I(icx->idx_ni), AT_BITMAP, icx->name, icx->name_len);
+	if (IS_ERR(bmp_vi)) {
+		ntfs_error(icx->idx_ni->vol->sb, "Failed to open $BITMAP attribute");
+		return PTR_ERR(bmp_vi);
+	}
+
+	bmp_ni = NTFS_I(bmp_vi);
+
+	if (set) {
+		if (bmp_ni->data_size < bpos + 1) {
+			ret = ntfs_attr_truncate(bmp_ni, (bmp_ni->data_size + 8) & ~7);
+			if (ret) {
+				ntfs_error(icx->idx_ni->vol->sb, "Failed to truncate AT_BITMAP");
+				goto err;
+			}
+			i_size_write(bmp_vi, (loff_t)bmp_ni->data_size);
+		}
+	}
+
+	if (ntfs_inode_attr_pread(bmp_vi, bpos, 1, &byte) != 1) {
+		ret = -EIO;
+		ntfs_error(icx->idx_ni->vol->sb, "Failed to read $BITMAP");
+		goto err;
+	}
+
+	if (set)
+		byte |= bit;
+	else
+		byte &= ~bit;
+
+	if (ntfs_inode_attr_pwrite(bmp_vi, bpos, 1, &byte, false) != 1) {
+		ret = -EIO;
+		ntfs_error(icx->idx_ni->vol->sb, "Failed to write $Bitmap");
+		goto err;
+	}
+
+err:
+	iput(bmp_vi);
+	return ret;
+}
+
+static int ntfs_ibm_set(struct ntfs_index_context *icx, s64 vcn)
+{
+	return ntfs_ibm_modify(icx, vcn, 1);
+}
+
+static int ntfs_ibm_clear(struct ntfs_index_context *icx, s64 vcn)
+{
+	return ntfs_ibm_modify(icx, vcn, 0);
+}
+
+static s64 ntfs_ibm_get_free(struct ntfs_index_context *icx)
+{
+	u8 *bm;
+	int bit;
+	s64 vcn, byte, size;
+
+	ntfs_debug("Entering\n");
+
+	bm = ntfs_attr_readall(icx->idx_ni, AT_BITMAP,  icx->name, icx->name_len,
+			&size);
+	if (!bm)
+		return (s64)-1;
+
+	for (byte = 0; byte < size; byte++) {
+		if (bm[byte] == 255)
+			continue;
+
+		for (bit = 0; bit < 8; bit++) {
+			if (!(bm[byte] & (1 << bit))) {
+				vcn = ntfs_ibm_pos_to_vcn(icx, byte * 8 + bit);
+				goto out;
+			}
+		}
+	}
+
+	vcn = ntfs_ibm_pos_to_vcn(icx, size * 8);
+out:
+	ntfs_debug("allocated vcn: %lld\n", vcn);
+
+	if (ntfs_ibm_set(icx, vcn))
+		vcn = (s64)-1;
+
+	ntfs_free(bm);
+	return vcn;
+}
+
+static struct index_block *ntfs_ir_to_ib(struct index_root *ir, s64 ib_vcn)
+{
+	struct index_block *ib;
+	struct index_entry *ie_last;
+	char *ies_start, *ies_end;
+	int i;
+
+	ntfs_debug("Entering\n");
+
+	ib = ntfs_ib_alloc(ib_vcn, le32_to_cpu(ir->index_block_size), LEAF_NODE);
+	if (!ib)
+		return NULL;
+
+	ies_start = (char *)ntfs_ie_get_first(&ir->index);
+	ies_end   = (char *)ntfs_ie_get_end(&ir->index);
+	ie_last   = ntfs_ie_get_last((struct index_entry *)ies_start, ies_end);
+	/*
+	 * Copy all entries, including the termination entry
+	 * as well, which can never have any data.
+	 */
+	i = (char *)ie_last - ies_start + le16_to_cpu(ie_last->length);
+	memcpy(ntfs_ie_get_first(&ib->index), ies_start, i);
+
+	ib->index.flags = ir->index.flags;
+	ib->index.index_length = cpu_to_le32(i +
+			le32_to_cpu(ib->index.entries_offset));
+	return ib;
+}
+
+static void ntfs_ir_nill(struct index_root *ir)
+{
+	struct index_entry *ie_last;
+	char *ies_start, *ies_end;
+
+	ntfs_debug("Entering\n");
+
+	ies_start = (char *)ntfs_ie_get_first(&ir->index);
+	ies_end   = (char *)ntfs_ie_get_end(&ir->index);
+	ie_last   = ntfs_ie_get_last((struct index_entry *)ies_start, ies_end);
+	/*
+	 * Move the index root termination entry forward
+	 */
+	if ((char *)ie_last > ies_start) {
+		memmove((char *)ntfs_ie_get_first(&ir->index),
+			(char *)ie_last, le16_to_cpu(ie_last->length));
+		ie_last = (struct index_entry *)ies_start;
+	}
+}
+
+static int ntfs_ib_copy_tail(struct ntfs_index_context *icx, struct index_block *src,
+		struct index_entry *median, s64 new_vcn)
+{
+	u8 *ies_end;
+	struct index_entry *ie_head;		/* first entry after the median */
+	int tail_size, ret;
+	struct index_block *dst;
+
+	ntfs_debug("Entering\n");
+
+	dst = ntfs_ib_alloc(new_vcn, icx->block_size,
+			src->index.flags & NODE_MASK);
+	if (!dst)
+		return -ENOMEM;
+
+	ie_head = ntfs_ie_get_next(median);
+
+	ies_end = (u8 *)ntfs_ie_get_end(&src->index);
+	tail_size = ies_end - (u8 *)ie_head;
+	memcpy(ntfs_ie_get_first(&dst->index), ie_head, tail_size);
+
+	dst->index.index_length = cpu_to_le32(tail_size +
+			le32_to_cpu(dst->index.entries_offset));
+	ret = ntfs_ib_write(icx, dst);
+
+	ntfs_free(dst);
+	return ret;
+}
+
+static int ntfs_ib_cut_tail(struct ntfs_index_context *icx, struct index_block *ib,
+		struct index_entry *ie)
+{
+	char *ies_start, *ies_end;
+	struct index_entry *ie_last;
+	int ret;
+
+	ntfs_debug("Entering\n");
+
+	ies_start = (char *)ntfs_ie_get_first(&ib->index);
+	ies_end   = (char *)ntfs_ie_get_end(&ib->index);
+
+	ie_last   = ntfs_ie_get_last((struct index_entry *)ies_start, ies_end);
+	if (ie_last->flags & INDEX_ENTRY_NODE)
+		ntfs_ie_set_vcn(ie_last, ntfs_ie_get_vcn(ie));
+
+	unsafe_memcpy(ie, ie_last, le16_to_cpu(ie_last->length),
+			/* alloc is larger than ie_last->length, see ntfs_ie_get_last() */);
+
+	ib->index.index_length = cpu_to_le32(((char *)ie - ies_start) +
+			le16_to_cpu(ie->length) + le32_to_cpu(ib->index.entries_offset));
+
+	ret = ntfs_ib_write(icx, ib);
+	return ret;
+}
+
+static int ntfs_ia_add(struct ntfs_index_context *icx)
+{
+	int ret;
+
+	ntfs_debug("Entering\n");
+
+	ret = ntfs_ibm_add(icx);
+	if (ret)
+		return ret;
+
+	if (!ntfs_attr_exist(icx->idx_ni, AT_INDEX_ALLOCATION, icx->name, icx->name_len)) {
+		ret = ntfs_attr_add(icx->idx_ni, AT_INDEX_ALLOCATION, icx->name,
+					icx->name_len, NULL, 0);
+		if (ret) {
+			ntfs_error(icx->idx_ni->vol->sb, "Failed to add AT_INDEX_ALLOCATION");
+			return ret;
+		}
+	}
+
+	icx->ia_ni = ntfs_ia_open(icx, icx->idx_ni);
+	if (!icx->ia_ni)
+		return -ENOENT;
+
+	return 0;
+}
+
+static int ntfs_ir_reparent(struct ntfs_index_context *icx)
+{
+	struct ntfs_attr_search_ctx *ctx = NULL;
+	struct index_root *ir;
+	struct index_entry *ie;
+	struct index_block *ib = NULL;
+	s64 new_ib_vcn;
+	int ix_root_size;
+	int ret = 0;
+
+	ntfs_debug("Entering\n");
+
+	ir = ntfs_ir_lookup2(icx->idx_ni, icx->name, icx->name_len);
+	if (!ir) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	if ((ir->index.flags & NODE_MASK) == SMALL_INDEX) {
+		ret = ntfs_ia_add(icx);
+		if (ret)
+			goto out;
+	}
+
+	new_ib_vcn = ntfs_ibm_get_free(icx);
+	if (new_ib_vcn < 0) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ir = ntfs_ir_lookup2(icx->idx_ni, icx->name, icx->name_len);
+	if (!ir) {
+		ret = -ENOENT;
+		goto clear_bmp;
+	}
+
+	ib = ntfs_ir_to_ib(ir, new_ib_vcn);
+	if (ib == NULL) {
+		ret = -EIO;
+		ntfs_error(icx->idx_ni->vol->sb, "Failed to move index root to index block");
+		goto clear_bmp;
+	}
+
+	ret = ntfs_ib_write(icx, ib);
+	if (ret)
+		goto clear_bmp;
+
+retry:
+	ir = ntfs_ir_lookup(icx->idx_ni, icx->name, icx->name_len, &ctx);
+	if (!ir) {
+		ret = -ENOENT;
+		goto clear_bmp;
+	}
+
+	ntfs_ir_nill(ir);
+
+	ie = ntfs_ie_get_first(&ir->index);
+	ie->flags |= INDEX_ENTRY_NODE;
+	ie->length = cpu_to_le16(sizeof(struct index_entry_header) + sizeof(s64));
+
+	ir->index.flags = LARGE_INDEX;
+	NInoSetIndexAllocPresent(icx->idx_ni);
+	ir->index.index_length = cpu_to_le32(le32_to_cpu(ir->index.entries_offset) +
+			le16_to_cpu(ie->length));
+	ir->index.allocated_size = ir->index.index_length;
+
+	ix_root_size = sizeof(struct index_root) - sizeof(struct index_header) +
+		le32_to_cpu(ir->index.allocated_size);
+	ret  = ntfs_resident_attr_value_resize(ctx->mrec, ctx->attr, ix_root_size);
+	if (ret) {
+		/*
+		 * When there is no space to build a non-resident
+		 * index, we may have to move the root to an extent
+		 */
+		if ((ret == -ENOSPC) && (ctx->al_entry || !ntfs_inode_add_attrlist(icx->idx_ni))) {
+			ntfs_attr_put_search_ctx(ctx);
+			ctx = NULL;
+			ir = ntfs_ir_lookup(icx->idx_ni, icx->name, icx->name_len, &ctx);
+			if (ir && !ntfs_attr_record_move_away(ctx, ix_root_size -
+					le32_to_cpu(ctx->attr->data.resident.value_length))) {
+				if (ntfs_attrlist_update(ctx->base_ntfs_ino ?
+							 ctx->base_ntfs_ino : ctx->ntfs_ino))
+					goto clear_bmp;
+				ntfs_attr_put_search_ctx(ctx);
+				ctx = NULL;
+				goto retry;
+			}
+		}
+		goto clear_bmp;
+	} else {
+		icx->idx_ni->data_size = icx->idx_ni->initialized_size = ix_root_size;
+		icx->idx_ni->allocated_size = (ix_root_size  + 7) & ~7;
+	}
+	ntfs_ie_set_vcn(ie, new_ib_vcn);
+
+err_out:
+	ntfs_free(ib);
+	if (ctx)
+		ntfs_attr_put_search_ctx(ctx);
+out:
+	return ret;
+clear_bmp:
+	ntfs_ibm_clear(icx, new_ib_vcn);
 	goto err_out;
 }
+
+/**
+ * ntfs_ir_truncate - Truncate index root attribute
+ */
+static int ntfs_ir_truncate(struct ntfs_index_context *icx, int data_size)
+{
+	int ret;
+
+	ntfs_debug("Entering\n");
+
+	/*
+	 *  INDEX_ROOT must be resident and its entries can be moved to
+	 *  struct index_block, so ENOSPC isn't a real error.
+	 */
+	ret = ntfs_attr_truncate(icx->idx_ni, data_size + offsetof(struct index_root, index));
+	if (!ret) {
+		i_size_write(VFS_I(icx->idx_ni), icx->idx_ni->initialized_size);
+		icx->ir = ntfs_ir_lookup2(icx->idx_ni, icx->name, icx->name_len);
+		if (!icx->ir)
+			return -ENOENT;
+
+		icx->ir->index.allocated_size = cpu_to_le32(data_size);
+	} else if (ret != -ENOSPC)
+		ntfs_error(icx->idx_ni->vol->sb, "Failed to truncate INDEX_ROOT");
+
+	return ret;
+}
+
+/**
+ * ntfs_ir_make_space - Make more space for the index root attribute
+ */
+static int ntfs_ir_make_space(struct ntfs_index_context *icx, int data_size)
+{
+	int ret;
+
+	ntfs_debug("Entering\n");
+
+	ret = ntfs_ir_truncate(icx, data_size);
+	if (ret == -ENOSPC) {
+		ret = ntfs_ir_reparent(icx);
+		if (!ret)
+			ret = -EAGAIN;
+		else
+			ntfs_error(icx->idx_ni->vol->sb, "Failed to modify INDEX_ROOT");
+	}
+
+	return ret;
+}
+
+/*
+ * NOTE: 'ie' must be a copy of a real index entry.
+ */
+static int ntfs_ie_add_vcn(struct index_entry **ie)
+{
+	struct index_entry *p, *old = *ie;
+
+	old->length = cpu_to_le16(le16_to_cpu(old->length) + sizeof(s64));
+	p = ntfs_realloc_nofs(old, le16_to_cpu(old->length),
+			le16_to_cpu(old->length) - sizeof(s64));
+	if (!p)
+		return -ENOMEM;
+
+	p->flags |= INDEX_ENTRY_NODE;
+	*ie = p;
+	return 0;
+}
+
+static int ntfs_ih_insert(struct index_header *ih, struct index_entry *orig_ie, s64 new_vcn,
+		int pos)
+{
+	struct index_entry *ie_node, *ie;
+	int ret = 0;
+	s64 old_vcn;
+
+	ntfs_debug("Entering\n");
+	ie = ntfs_ie_dup(orig_ie);
+	if (!ie)
+		return -ENOMEM;
+
+	if (!(ie->flags & INDEX_ENTRY_NODE)) {
+		ret = ntfs_ie_add_vcn(&ie);
+		if (ret)
+			goto out;
+	}
+
+	ie_node = ntfs_ie_get_by_pos(ih, pos);
+	old_vcn = ntfs_ie_get_vcn(ie_node);
+	ntfs_ie_set_vcn(ie_node, new_vcn);
+
+	ntfs_ie_insert(ih, ie, ie_node);
+	ntfs_ie_set_vcn(ie_node, old_vcn);
+out:
+	ntfs_free(ie);
+	return ret;
+}
+
+static s64 ntfs_icx_parent_vcn(struct ntfs_index_context *icx)
+{
+	return icx->parent_vcn[icx->pindex];
+}
+
+static s64 ntfs_icx_parent_pos(struct ntfs_index_context *icx)
+{
+	return icx->parent_pos[icx->pindex];
+}
+
+static int ntfs_ir_insert_median(struct ntfs_index_context *icx, struct index_entry *median,
+		s64 new_vcn)
+{
+	u32 new_size;
+	int ret;
+
+	ntfs_debug("Entering\n");
+
+	icx->ir = ntfs_ir_lookup2(icx->idx_ni, icx->name, icx->name_len);
+	if (!icx->ir)
+		return -ENOENT;
+
+	new_size = le32_to_cpu(icx->ir->index.index_length) +
+		le16_to_cpu(median->length);
+	if (!(median->flags & INDEX_ENTRY_NODE))
+		new_size += sizeof(s64);
+
+	ret = ntfs_ir_make_space(icx, new_size);
+	if (ret)
+		return ret;
+
+	icx->ir = ntfs_ir_lookup2(icx->idx_ni, icx->name, icx->name_len);
+	if (!icx->ir)
+		return -ENOENT;
+
+	return ntfs_ih_insert(&icx->ir->index, median, new_vcn,
+			ntfs_icx_parent_pos(icx));
+}
+
+static int ntfs_ib_split(struct ntfs_index_context *icx, struct index_block *ib);
+
+struct split_info {
+	struct list_head entry;
+	s64 new_vcn;
+	struct index_block *ib;
+};
+
+static int ntfs_ib_insert(struct ntfs_index_context *icx, struct index_entry *ie, s64 new_vcn,
+		struct split_info *si)
+{
+	struct index_block *ib;
+	u32 idx_size, allocated_size;
+	int err;
+	s64 old_vcn;
+
+	ntfs_debug("Entering\n");
+
+	ib = ntfs_malloc_nofs(icx->block_size);
+	if (!ib)
+		return -ENOMEM;
+
+	old_vcn = ntfs_icx_parent_vcn(icx);
+
+	err = ntfs_ib_read(icx, old_vcn, ib);
+	if (err)
+		goto err_out;
+
+	idx_size = le32_to_cpu(ib->index.index_length);
+	allocated_size = le32_to_cpu(ib->index.allocated_size);
+	if (idx_size + le16_to_cpu(ie->length) + sizeof(s64) > allocated_size) {
+		si->ib = ib;
+		si->new_vcn = new_vcn;
+		return -EAGAIN;
+	}
+
+	err = ntfs_ih_insert(&ib->index, ie, new_vcn, ntfs_icx_parent_pos(icx));
+	if (err)
+		goto err_out;
+
+	err = ntfs_ib_write(icx, ib);
+
+err_out:
+	ntfs_free(ib);
+	return err;
+}
+
+/**
+ * ntfs_ib_split - Split an index block
+ */
+static int ntfs_ib_split(struct ntfs_index_context *icx, struct index_block *ib)
+{
+	struct index_entry *median;
+	s64 new_vcn;
+	int ret;
+	struct split_info *si;
+	LIST_HEAD(ntfs_cut_tail_list);
+
+	ntfs_debug("Entering\n");
+
+resplit:
+	ret = ntfs_icx_parent_dec(icx);
+	if (ret)
+		goto out;
+
+	median  = ntfs_ie_get_median(&ib->index);
+	new_vcn = ntfs_ibm_get_free(icx);
+	if (new_vcn < 0) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = ntfs_ib_copy_tail(icx, ib, median, new_vcn);
+	if (ret) {
+		ntfs_ibm_clear(icx, new_vcn);
+		goto out;
+	}
+
+	if (ntfs_icx_parent_vcn(icx) == VCN_INDEX_ROOT_PARENT) {
+		ret = ntfs_ir_insert_median(icx, median, new_vcn);
+		if (ret) {
+			ntfs_ibm_clear(icx, new_vcn);
+			goto out;
+		}
+	} else {
+		si = kzalloc(sizeof(struct split_info), GFP_NOFS);
+		if (!si) {
+			ntfs_ibm_clear(icx, new_vcn);
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		ret = ntfs_ib_insert(icx, median, new_vcn, si);
+		if (ret == -EAGAIN) {
+			list_add_tail(&si->entry, &ntfs_cut_tail_list);
+			ib = si->ib;
+			goto resplit;
+		} else if (ret) {
+			ntfs_free(si->ib);
+			kfree(si);
+			ntfs_ibm_clear(icx, new_vcn);
+			goto out;
+		}
+		kfree(si);
+	}
+
+	ret = ntfs_ib_cut_tail(icx, ib, median);
+
+out:
+	while (!list_empty(&ntfs_cut_tail_list)) {
+		si = list_last_entry(&ntfs_cut_tail_list, struct split_info, entry);
+		ntfs_ibm_clear(icx, si->new_vcn);
+		ntfs_free(si->ib);
+		list_del(&si->entry);
+		kfree(si);
+		if (!ret)
+			ret = -EAGAIN;
+	}
+
+	return ret;
+}
+
+int ntfs_ie_add(struct ntfs_index_context *icx, struct index_entry *ie)
+{
+	struct index_header *ih;
+	int allocated_size, new_size;
+	int ret;
+
+	while (1) {
+		ret = ntfs_index_lookup(&ie->key, le16_to_cpu(ie->key_length), icx);
+		if (!ret) {
+			ret = -EEXIST;
+			ntfs_error(icx->idx_ni->vol->sb, "Index already have such entry");
+			goto err_out;
+		}
+		if (ret != -ENOENT) {
+			ntfs_error(icx->idx_ni->vol->sb, "Failed to find place for new entry");
+			goto err_out;
+		}
+		ret = 0;
+
+		if (icx->is_in_root)
+			ih = &icx->ir->index;
+		else
+			ih = &icx->ib->index;
+
+		allocated_size = le32_to_cpu(ih->allocated_size);
+		new_size = le32_to_cpu(ih->index_length) + le16_to_cpu(ie->length);
+
+		if (new_size <= allocated_size)
+			break;
+
+		ntfs_debug("index block sizes: allocated: %d  needed: %d\n",
+				allocated_size, new_size);
+
+		if (icx->is_in_root)
+			ret = ntfs_ir_make_space(icx, new_size);
+		else
+			ret = ntfs_ib_split(icx, icx->ib);
+		if (ret && ret != -EAGAIN)
+			goto err_out;
+
+		mark_mft_record_dirty(icx->actx->ntfs_ino);
+		ntfs_index_ctx_reinit(icx);
+	}
+
+	ntfs_ie_insert(ih, ie, icx->entry);
+	ntfs_index_entry_mark_dirty(icx);
+
+err_out:
+	ntfs_debug("%s\n", ret ? "Failed" : "Done");
+	return ret;
+}
+
+/**
+ * ntfs_index_add_filename - add filename to directory index
+ * @ni:		ntfs inode describing directory to which index add filename
+ * @fn:		FILE_NAME attribute to add
+ * @mref:	reference of the inode which @fn describes
+ */
+int ntfs_index_add_filename(struct ntfs_inode *ni, struct file_name_attr *fn, u64 mref)
+{
+	struct index_entry *ie;
+	struct ntfs_index_context *icx;
+	int fn_size, ie_size, err;
+
+	ntfs_debug("Entering\n");
+
+	if (!ni || !fn)
+		return -EINVAL;
+
+	fn_size = (fn->file_name_length * sizeof(__le16)) +
+		sizeof(struct file_name_attr);
+	ie_size = (sizeof(struct index_entry_header) + fn_size + 7) & ~7;
+
+	ie = ntfs_malloc_nofs(ie_size);
+	if (!ie)
+		return -ENOMEM;
+
+	ie->data.dir.indexed_file = cpu_to_le64(mref);
+	ie->length	 = cpu_to_le16(ie_size);
+	ie->key_length	 = cpu_to_le16(fn_size);
+
+	unsafe_memcpy(&ie->key, fn, fn_size,
+		      /* "fn_size" was correctly calculated above */);
+
+	icx = ntfs_index_ctx_get(ni, I30, 4);
+	if (!icx) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = ntfs_ie_add(icx, ie);
+	ntfs_index_ctx_put(icx);
+out:
+	ntfs_free(ie);
+	return err;
+}
+
+static int ntfs_ih_takeout(struct ntfs_index_context *icx, struct index_header *ih,
+		struct index_entry *ie, struct index_block *ib)
+{
+	struct index_entry *ie_roam;
+	int freed_space;
+	bool full;
+	int ret = 0;
+
+	ntfs_debug("Entering\n");
+
+	full = ih->index_length == ih->allocated_size;
+	ie_roam = ntfs_ie_dup_novcn(ie);
+	if (!ie_roam)
+		return -ENOMEM;
+
+	ntfs_ie_delete(ih, ie);
+
+	if (ntfs_icx_parent_vcn(icx) == VCN_INDEX_ROOT_PARENT) {
+		/*
+		 * Recover the space which may have been freed
+		 * while deleting an entry from root index
+		 */
+		freed_space = le32_to_cpu(ih->allocated_size) -
+			le32_to_cpu(ih->index_length);
+		if (full && (freed_space > 0) && !(freed_space & 7)) {
+			ntfs_ir_truncate(icx, le32_to_cpu(ih->index_length));
+			/* do nothing if truncation fails */
+		}
+
+		mark_mft_record_dirty(icx->actx->ntfs_ino);
+	} else {
+		ret = ntfs_ib_write(icx, ib);
+		if (ret)
+			goto out;
+	}
+
+	ntfs_index_ctx_reinit(icx);
+
+	ret = ntfs_ie_add(icx, ie_roam);
+out:
+	ntfs_free(ie_roam);
+	return ret;
+}
+
+/**
+ *  Used if an empty index block to be deleted has END entry as the parent
+ *  in the INDEX_ROOT which is the only one there.
+ */
+static void ntfs_ir_leafify(struct ntfs_index_context *icx, struct index_header *ih)
+{
+	struct index_entry *ie;
+
+	ntfs_debug("Entering\n");
+
+	ie = ntfs_ie_get_first(ih);
+	ie->flags &= ~INDEX_ENTRY_NODE;
+	ie->length = cpu_to_le16(le16_to_cpu(ie->length) - sizeof(s64));
+
+	ih->index_length = cpu_to_le32(le32_to_cpu(ih->index_length) - sizeof(s64));
+	ih->flags &= ~LARGE_INDEX;
+	NInoClearIndexAllocPresent(icx->idx_ni);
+
+	/* Not fatal error */
+	ntfs_ir_truncate(icx, le32_to_cpu(ih->index_length));
+}
+
+/**
+ *  Used if an empty index block to be deleted has END entry as the parent
+ *  in the INDEX_ROOT which is not the only one there.
+ */
+static int ntfs_ih_reparent_end(struct ntfs_index_context *icx, struct index_header *ih,
+		struct index_block *ib)
+{
+	struct index_entry *ie, *ie_prev;
+
+	ntfs_debug("Entering\n");
+
+	ie = ntfs_ie_get_by_pos(ih, ntfs_icx_parent_pos(icx));
+	ie_prev = ntfs_ie_prev(ih, ie);
+	if (!ie_prev)
+		return -EIO;
+	ntfs_ie_set_vcn(ie, ntfs_ie_get_vcn(ie_prev));
+
+	return ntfs_ih_takeout(icx, ih, ie_prev, ib);
+}
+
+static int ntfs_index_rm_leaf(struct ntfs_index_context *icx)
+{
+	struct index_block *ib = NULL;
+	struct index_header *parent_ih;
+	struct index_entry *ie;
+	int ret;
+
+	ntfs_debug("pindex: %d\n", icx->pindex);
+
+	ret = ntfs_icx_parent_dec(icx);
+	if (ret)
+		return ret;
+
+	ret = ntfs_ibm_clear(icx, icx->parent_vcn[icx->pindex + 1]);
+	if (ret)
+		return ret;
+
+	if (ntfs_icx_parent_vcn(icx) == VCN_INDEX_ROOT_PARENT)
+		parent_ih = &icx->ir->index;
+	else {
+		ib = ntfs_malloc_nofs(icx->block_size);
+		if (!ib)
+			return -ENOMEM;
+
+		ret = ntfs_ib_read(icx, ntfs_icx_parent_vcn(icx), ib);
+		if (ret)
+			goto out;
+
+		parent_ih = &ib->index;
+	}
+
+	ie = ntfs_ie_get_by_pos(parent_ih, ntfs_icx_parent_pos(icx));
+	if (!ntfs_ie_end(ie)) {
+		ret = ntfs_ih_takeout(icx, parent_ih, ie, ib);
+		goto out;
+	}
+
+	if (ntfs_ih_zero_entry(parent_ih)) {
+		if (ntfs_icx_parent_vcn(icx) == VCN_INDEX_ROOT_PARENT) {
+			ntfs_ir_leafify(icx, parent_ih);
+			goto out;
+		}
+
+		ret = ntfs_index_rm_leaf(icx);
+		goto out;
+	}
+
+	ret = ntfs_ih_reparent_end(icx, parent_ih, ib);
+out:
+	ntfs_free(ib);
+	return ret;
+}
+
+static int ntfs_index_rm_node(struct ntfs_index_context *icx)
+{
+	int entry_pos, pindex;
+	s64 vcn;
+	struct index_block *ib = NULL;
+	struct index_entry *ie_succ, *ie, *entry = icx->entry;
+	struct index_header *ih;
+	u32 new_size;
+	int delta, ret;
+
+	ntfs_debug("Entering\n");
+
+	if (!icx->ia_ni) {
+		icx->ia_ni = ntfs_ia_open(icx, icx->idx_ni);
+		if (!icx->ia_ni)
+			return -EINVAL;
+	}
+
+	ib = ntfs_malloc_nofs(icx->block_size);
+	if (!ib)
+		return -ENOMEM;
+
+	ie_succ = ntfs_ie_get_next(icx->entry);
+	entry_pos = icx->parent_pos[icx->pindex]++;
+	pindex = icx->pindex;
+descend:
+	vcn = ntfs_ie_get_vcn(ie_succ);
+	ret = ntfs_ib_read(icx, vcn, ib);
+	if (ret)
+		goto out;
+
+	ie_succ = ntfs_ie_get_first(&ib->index);
+
+	ret = ntfs_icx_parent_inc(icx);
+	if (ret)
+		goto out;
+
+	icx->parent_vcn[icx->pindex] = vcn;
+	icx->parent_pos[icx->pindex] = 0;
+
+	if ((ib->index.flags & NODE_MASK) == INDEX_NODE)
+		goto descend;
+
+	if (ntfs_ih_zero_entry(&ib->index)) {
+		ret = -EIO;
+		ntfs_error(icx->idx_ni->vol->sb, "Empty index block");
+		goto out;
+	}
+
+	ie = ntfs_ie_dup(ie_succ);
+	if (!ie) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = ntfs_ie_add_vcn(&ie);
+	if (ret)
+		goto out2;
+
+	ntfs_ie_set_vcn(ie, ntfs_ie_get_vcn(icx->entry));
+
+	if (icx->is_in_root)
+		ih = &icx->ir->index;
+	else
+		ih = &icx->ib->index;
+
+	delta = le16_to_cpu(ie->length) - le16_to_cpu(icx->entry->length);
+	new_size = le32_to_cpu(ih->index_length) + delta;
+	if (delta > 0) {
+		if (icx->is_in_root) {
+			ret = ntfs_ir_make_space(icx, new_size);
+			if (ret != 0)
+				goto out2;
+
+			ih = &icx->ir->index;
+			entry = ntfs_ie_get_by_pos(ih, entry_pos);
+
+		} else if (new_size > le32_to_cpu(ih->allocated_size)) {
+			icx->pindex = pindex;
+			ret = ntfs_ib_split(icx, icx->ib);
+			if (!ret)
+				ret = -EAGAIN;
+			goto out2;
+		}
+	}
+
+	ntfs_ie_delete(ih, entry);
+	ntfs_ie_insert(ih, ie, entry);
+
+	if (icx->is_in_root)
+		ret = ntfs_ir_truncate(icx, new_size);
+	else
+		ret = ntfs_icx_ib_write(icx);
+	if (ret)
+		goto out2;
+
+	ntfs_ie_delete(&ib->index, ie_succ);
+
+	if (ntfs_ih_zero_entry(&ib->index))
+		ret = ntfs_index_rm_leaf(icx);
+	else
+		ret = ntfs_ib_write(icx, ib);
+
+out2:
+	ntfs_free(ie);
+out:
+	ntfs_free(ib);
+	return ret;
+}
+
+/**
+ * ntfs_index_rm - remove entry from the index
+ * @icx:	index context describing entry to delete
+ *
+ * Delete entry described by @icx from the index. Index context is always
+ * reinitialized after use of this function, so it can be used for index
+ * lookup once again.
+ */
+int ntfs_index_rm(struct ntfs_index_context *icx)
+{
+	struct index_header *ih;
+	int ret = 0;
+
+	ntfs_debug("Entering\n");
+
+	if (!icx || (!icx->ib && !icx->ir) || ntfs_ie_end(icx->entry)) {
+		ret = -EINVAL;
+		goto err_out;
+	}
+	if (icx->is_in_root)
+		ih = &icx->ir->index;
+	else
+		ih = &icx->ib->index;
+
+	if (icx->entry->flags & INDEX_ENTRY_NODE) {
+		ret = ntfs_index_rm_node(icx);
+		if (ret)
+			goto err_out;
+	} else if (icx->is_in_root || !ntfs_ih_one_entry(ih)) {
+		ntfs_ie_delete(ih, icx->entry);
+
+		if (icx->is_in_root)
+			ret = ntfs_ir_truncate(icx, le32_to_cpu(ih->index_length));
+		else
+			ret = ntfs_icx_ib_write(icx);
+		if (ret)
+			goto err_out;
+	} else {
+		ret = ntfs_index_rm_leaf(icx);
+		if (ret)
+			goto err_out;
+	}
+
+	return 0;
+err_out:
+	return ret;
+}
+
+int ntfs_index_remove(struct ntfs_inode *dir_ni, const void *key, const int keylen)
+{
+	int ret = 0;
+	struct ntfs_index_context *icx;
+
+	icx = ntfs_index_ctx_get(dir_ni, I30, 4);
+	if (!icx)
+		return -EINVAL;
+
+	while (1) {
+		ret = ntfs_index_lookup(key, keylen, icx);
+		if (ret)
+			goto err_out;
+
+		ret = ntfs_index_rm(icx);
+		if (ret && ret != -EAGAIN)
+			goto err_out;
+		else if (!ret)
+			break;
+
+		mark_mft_record_dirty(icx->actx->ntfs_ino);
+		ntfs_index_ctx_reinit(icx);
+	}
+
+	mark_mft_record_dirty(icx->actx->ntfs_ino);
+
+	ntfs_index_ctx_put(icx);
+	return 0;
+err_out:
+	ntfs_index_ctx_put(icx);
+	ntfs_error(dir_ni->vol->sb, "Delete failed");
+	return ret;
+}
+
+/*
+ * ntfs_index_walk_down - walk down the index tree (leaf bound)
+ * until there are no subnode in the first index entry returns
+ * the entry at the bottom left in subnode
+ */
+struct index_entry *ntfs_index_walk_down(struct index_entry *ie, struct ntfs_index_context *ictx)
+{
+	struct index_entry *entry;
+	s64 vcn;
+
+	entry = ie;
+	do {
+		vcn = ntfs_ie_get_vcn(entry);
+		if (ictx->is_in_root) {
+			/* down from level zero */
+			ictx->ir = NULL;
+			ictx->ib = (struct index_block *)ntfs_malloc_nofs(ictx->block_size);
+			ictx->pindex = 1;
+			ictx->is_in_root = false;
+		} else {
+			/* down from non-zero level */
+			ictx->pindex++;
+		}
+
+		ictx->parent_pos[ictx->pindex] = 0;
+		ictx->parent_vcn[ictx->pindex] = vcn;
+		if (!ntfs_ib_read(ictx, vcn, ictx->ib)) {
+			ictx->entry = ntfs_ie_get_first(&ictx->ib->index);
+			entry = ictx->entry;
+		} else
+			entry = NULL;
+	} while (entry && (entry->flags & INDEX_ENTRY_NODE));
+
+	return entry;
+}
+
+/**
+ * ntfs_index_walk_up - walk up the index tree (root bound) until
+ * there is a valid data entry in parent returns the parent entry
+ * or NULL if no more parent.
+ */
+static struct index_entry *ntfs_index_walk_up(struct index_entry *ie,
+		struct ntfs_index_context *ictx)
+{
+	struct index_entry *entry;
+	s64 vcn;
+
+	entry = ie;
+	if (ictx->pindex > 0) {
+		do {
+			ictx->pindex--;
+			if (!ictx->pindex) {
+				/* we have reached the root */
+				kfree(ictx->ib);
+				ictx->ib = NULL;
+				ictx->is_in_root = true;
+				/* a new search context is to be allocated */
+				if (ictx->actx)
+					ntfs_attr_put_search_ctx(ictx->actx);
+				ictx->ir = ntfs_ir_lookup(ictx->idx_ni, ictx->name,
+						ictx->name_len, &ictx->actx);
+				if (ictx->ir)
+					entry = ntfs_ie_get_by_pos(&ictx->ir->index,
+							ictx->parent_pos[ictx->pindex]);
+				else
+					entry = NULL;
+			} else {
+					/* up into non-root node */
+				vcn = ictx->parent_vcn[ictx->pindex];
+				if (!ntfs_ib_read(ictx, vcn, ictx->ib)) {
+					entry = ntfs_ie_get_by_pos(&ictx->ib->index,
+							ictx->parent_pos[ictx->pindex]);
+				} else
+					entry = NULL;
+			}
+		ictx->entry = entry;
+		} while (entry && (ictx->pindex > 0) &&
+				(entry->flags & INDEX_ENTRY_END));
+	} else
+		entry = NULL;
+
+	return entry;
+}
+
+/**
+ * ntfs_index_next - get next entry in an index according to collating sequence.
+ * Returns next entry or NULL if none.
+ *
+ * Sample layout :
+ *
+ *                 +---+---+---+---+---+---+---+---+    n ptrs to subnodes
+ *                 |   |   | 10| 25| 33|   |   |   |    n-1 keys in between
+ *                 +---+---+---+---+---+---+---+---+    no key in last entry
+ *                              | A | A
+ *                              | | | +-------------------------------+
+ *   +--------------------------+ | +-----+                           |
+ *   |                            +--+    |                           |
+ *   V                               |    V                           |
+ * +---+---+---+---+---+---+---+---+ |  +---+---+---+---+---+---+---+---+
+ * | 11| 12| 13| 14| 15| 16| 17|   | |  | 26| 27| 28| 29| 30| 31| 32|   |
+ * +---+---+---+---+---+---+---+---+ |  +---+---+---+---+---+---+---+---+
+ *                               |   |
+ *       +-----------------------+   |
+ *       |                           |
+ *     +---+---+---+---+---+---+---+---+
+ *     | 18| 19| 20| 21| 22| 23| 24|   |
+ *     +---+---+---+---+---+---+---+---+
+ */
+struct index_entry *ntfs_index_next(struct index_entry *ie, struct ntfs_index_context *ictx)
+{
+	struct index_entry *next;
+	__le16 flags;
+
+	/*
+	 * lookup() may have returned an invalid node
+	 * when searching for a partial key
+	 * if this happens, walk up
+	 */
+	if (ie->flags & INDEX_ENTRY_END)
+		next = ntfs_index_walk_up(ie, ictx);
+	else {
+		/*
+		 * get next entry in same node
+		 * there is always one after any entry with data
+		 */
+		next = (struct index_entry *)((char *)ie + le16_to_cpu(ie->length));
+		++ictx->parent_pos[ictx->pindex];
+		flags = next->flags;
+
+		/* walk down if it has a subnode */
+		if (flags & INDEX_ENTRY_NODE) {
+			if (!ictx->ia_ni)
+				ictx->ia_ni = ntfs_ia_open(ictx, ictx->idx_ni);
+
+			next = ntfs_index_walk_down(next, ictx);
+		} else {
+
+			/* walk up it has no subnode, nor data */
+			if (flags & INDEX_ENTRY_END)
+				next = ntfs_index_walk_up(next, ictx);
+		}
+	}
+
+	/* return NULL if stuck at end of a block */
+	if (next && (next->flags & INDEX_ENTRY_END))
+		next = NULL;
+
+	return next;
+}
-- 
2.25.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ