[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e493188d4668b45b87e8bf284fcb59a65e45b6e3.1738709036.git-series.apopple@nvidia.com>
Date: Wed, 5 Feb 2025 09:48:03 +1100
From: Alistair Popple <apopple@...dia.com>
To: akpm@...ux-foundation.org,
dan.j.williams@...el.com,
linux-mm@...ck.org
Cc: Alistair Popple <apopple@...dia.com>,
Alison Schofield <alison.schofield@...el.com>,
lina@...hilina.net,
zhang.lyra@...il.com,
gerald.schaefer@...ux.ibm.com,
vishal.l.verma@...el.com,
dave.jiang@...el.com,
logang@...tatee.com,
bhelgaas@...gle.com,
jack@...e.cz,
jgg@...pe.ca,
catalin.marinas@....com,
will@...nel.org,
mpe@...erman.id.au,
npiggin@...il.com,
dave.hansen@...ux.intel.com,
ira.weiny@...el.com,
willy@...radead.org,
djwong@...nel.org,
tytso@....edu,
linmiaohe@...wei.com,
david@...hat.com,
peterx@...hat.com,
linux-doc@...r.kernel.org,
linux-kernel@...r.kernel.org,
linux-arm-kernel@...ts.infradead.org,
linuxppc-dev@...ts.ozlabs.org,
nvdimm@...ts.linux.dev,
linux-cxl@...r.kernel.org,
linux-fsdevel@...r.kernel.org,
linux-ext4@...r.kernel.org,
linux-xfs@...r.kernel.org,
jhubbard@...dia.com,
hch@....de,
david@...morbit.com,
chenhuacai@...nel.org,
kernel@...0n.name,
loongarch@...ts.linux.dev
Subject: [PATCH v7 06/20] fs/dax: Always remove DAX page-cache entries when breaking layouts
Prior to any truncation operations file systems call
dax_break_mapping() to ensure pages in the range are not under going
DMA. Later DAX page-cache entries will be removed by
truncate_folio_batch_exceptionals() in the generic page-cache code.
However this makes it possible for folios to be removed from the
page-cache even though they are still DMA busy if the file-system
hasn't called dax_break_mapping(). It also means they can never be
waited on in future because FS DAX will lose track of them once the
page-cache entry has been deleted.
Instead it is better to delete the FS DAX entry when the file-system
calls dax_break_mapping() as part of it's truncate operation. This
ensures only idle pages can be removed from the FS DAX page-cache and
makes it easy to detect if a file-system hasn't called
dax_break_mapping() prior to a truncate operation.
Signed-off-by: Alistair Popple <apopple@...dia.com>
Reviewed-by: Dan Williams <dan.j.williams@...el.com>
---
Changes for v7:
- s/dax_break_mapping/dax_break_layout/ suggested by Dan.
- Rework dax_break_mapping() to take a NULL callback for NOWAIT
behaviour as suggested by Dan.
---
fs/dax.c | 40 ++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_inode.c | 5 ++---
include/linux/dax.h | 2 ++
mm/truncate.c | 16 +++++++++++++++-
4 files changed, 59 insertions(+), 4 deletions(-)
diff --git a/fs/dax.c b/fs/dax.c
index 710b280..39f1dc0 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -845,6 +845,36 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
return ret;
}
+void dax_delete_mapping_range(struct address_space *mapping,
+ loff_t start, loff_t end)
+{
+ void *entry;
+ pgoff_t start_idx = start >> PAGE_SHIFT;
+ pgoff_t end_idx;
+ XA_STATE(xas, &mapping->i_pages, start_idx);
+
+ /* If end == LLONG_MAX, all pages from start to till end of file */
+ if (end == LLONG_MAX)
+ end_idx = ULONG_MAX;
+ else
+ end_idx = end >> PAGE_SHIFT;
+
+ xas_lock_irq(&xas);
+ xas_for_each(&xas, entry, end_idx) {
+ if (!xa_is_value(entry))
+ continue;
+ entry = wait_entry_unlocked_exclusive(&xas, entry);
+ if (!entry)
+ continue;
+ dax_disassociate_entry(entry, mapping, true);
+ xas_store(&xas, NULL);
+ mapping->nrpages -= 1UL << dax_entry_order(entry);
+ put_unlocked_entry(&xas, entry, WAKE_ALL);
+ }
+ xas_unlock_irq(&xas);
+}
+EXPORT_SYMBOL_GPL(dax_delete_mapping_range);
+
static int wait_page_idle(struct page *page,
void (cb)(struct inode *),
struct inode *inode)
@@ -856,6 +886,9 @@ static int wait_page_idle(struct page *page,
/*
* Unmaps the inode and waits for any DMA to complete prior to deleting the
* DAX mapping entries for the range.
+ *
+ * For NOWAIT behavior, pass @cb as NULL to early-exit on first found
+ * busy page
*/
int dax_break_layout(struct inode *inode, loff_t start, loff_t end,
void (cb)(struct inode *))
@@ -870,10 +903,17 @@ int dax_break_layout(struct inode *inode, loff_t start, loff_t end,
page = dax_layout_busy_page_range(inode->i_mapping, start, end);
if (!page)
break;
+ if (!cb) {
+ error = -ERESTARTSYS;
+ break;
+ }
error = wait_page_idle(page, cb, inode);
} while (error == 0);
+ if (!page)
+ dax_delete_mapping_range(inode->i_mapping, start, end);
+
return error;
}
EXPORT_SYMBOL_GPL(dax_break_layout);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 62c2ae3..c9ffabe 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2732,7 +2732,6 @@ xfs_mmaplock_two_inodes_and_break_dax_layout(
struct xfs_inode *ip2)
{
int error;
- struct page *page;
if (ip1->i_ino > ip2->i_ino)
swap(ip1, ip2);
@@ -2756,8 +2755,8 @@ xfs_mmaplock_two_inodes_and_break_dax_layout(
* need to unlock & lock the XFS_MMAPLOCK_EXCL which is not suitable
* for this nested lock case.
*/
- page = dax_layout_busy_page(VFS_I(ip2)->i_mapping);
- if (!dax_page_is_idle(page)) {
+ error = dax_break_layout(VFS_I(ip2), 0, -1, NULL);
+ if (error) {
xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
goto again;
diff --git a/include/linux/dax.h b/include/linux/dax.h
index a6b277f..2fbb262 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -255,6 +255,8 @@ vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order,
vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
unsigned int order, pfn_t pfn);
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
+void dax_delete_mapping_range(struct address_space *mapping,
+ loff_t start, loff_t end);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index);
int __must_check dax_break_layout(struct inode *inode, loff_t start,
diff --git a/mm/truncate.c b/mm/truncate.c
index e2e115a..0395e57 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -78,8 +78,22 @@ static void truncate_folio_batch_exceptionals(struct address_space *mapping,
if (dax_mapping(mapping)) {
for (i = j; i < nr; i++) {
- if (xa_is_value(fbatch->folios[i]))
+ if (xa_is_value(fbatch->folios[i])) {
+ /*
+ * File systems should already have called
+ * dax_break_layout_entry() to remove all DAX
+ * entries while holding a lock to prevent
+ * establishing new entries. Therefore we
+ * shouldn't find any here.
+ */
+ WARN_ON_ONCE(1);
+
+ /*
+ * Delete the mapping so truncate_pagecache()
+ * doesn't loop forever.
+ */
dax_delete_mapping_entry(mapping, indices[i]);
+ }
}
goto out;
}
--
git-series 0.9.1
Powered by blists - more mailing lists