[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6f23832debd919787c57fc5ef19561a45c034bce.1738709036.git-series.apopple@nvidia.com>
Date: Wed, 5 Feb 2025 09:48:04 +1100
From: Alistair Popple <apopple@...dia.com>
To: akpm@...ux-foundation.org,
dan.j.williams@...el.com,
linux-mm@...ck.org
Cc: Alistair Popple <apopple@...dia.com>,
Alison Schofield <alison.schofield@...el.com>,
lina@...hilina.net,
zhang.lyra@...il.com,
gerald.schaefer@...ux.ibm.com,
vishal.l.verma@...el.com,
dave.jiang@...el.com,
logang@...tatee.com,
bhelgaas@...gle.com,
jack@...e.cz,
jgg@...pe.ca,
catalin.marinas@....com,
will@...nel.org,
mpe@...erman.id.au,
npiggin@...il.com,
dave.hansen@...ux.intel.com,
ira.weiny@...el.com,
willy@...radead.org,
djwong@...nel.org,
tytso@....edu,
linmiaohe@...wei.com,
david@...hat.com,
peterx@...hat.com,
linux-doc@...r.kernel.org,
linux-kernel@...r.kernel.org,
linux-arm-kernel@...ts.infradead.org,
linuxppc-dev@...ts.ozlabs.org,
nvdimm@...ts.linux.dev,
linux-cxl@...r.kernel.org,
linux-fsdevel@...r.kernel.org,
linux-ext4@...r.kernel.org,
linux-xfs@...r.kernel.org,
jhubbard@...dia.com,
hch@....de,
david@...morbit.com,
chenhuacai@...nel.org,
kernel@...0n.name,
loongarch@...ts.linux.dev
Subject: [PATCH v7 07/20] fs/dax: Ensure all pages are idle prior to filesystem unmount
File systems call dax_break_mapping() prior to reallocating file system
blocks to ensure the page is not undergoing any DMA or other
accesses. Generally this is needed when a file is truncated to ensure that
if a block is reallocated nothing is writing to it. However filesystems
currently don't call this when an FS DAX inode is evicted.
This can cause problems when the file system is unmounted as a page can
continue to be under going DMA or other remote access after unmount. This
means if the file system is remounted any truncate or other operation which
requires the underlying file system block to be freed will not wait for the
remote access to complete. Therefore a busy block may be reallocated to a
new file leading to corruption.
Signed-off-by: Alistair Popple <apopple@...dia.com>
---
Changes for v7:
- Don't take locks during inode eviction as suggested by Darrick and
therefore remove the callback for dax_break_mapping_uninterruptible().
- Use common definition of dax_page_is_idle().
- Fixed smatch suggestion in dax_break_mapping_uninterruptible().
- Rename dax_break_mapping_uninterruptible() to dax_break_layout_final()
as suggested by Dan.
Changes for v5:
- Don't wait for pages to be idle in non-DAX mappings
---
fs/dax.c | 27 +++++++++++++++++++++++++++
fs/ext4/inode.c | 2 ++
fs/xfs/xfs_super.c | 12 ++++++++++++
include/linux/dax.h | 5 +++++
4 files changed, 46 insertions(+)
diff --git a/fs/dax.c b/fs/dax.c
index 39f1dc0..9c28eb3 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -883,6 +883,13 @@ static int wait_page_idle(struct page *page,
TASK_INTERRUPTIBLE, 0, 0, cb(inode));
}
+static void wait_page_idle_uninterruptible(struct page *page,
+ struct inode *inode)
+{
+ ___wait_var_event(page, dax_page_is_idle(page),
+ TASK_UNINTERRUPTIBLE, 0, 0, schedule());
+}
+
/*
* Unmaps the inode and waits for any DMA to complete prior to deleting the
* DAX mapping entries for the range.
@@ -918,6 +925,26 @@ int dax_break_layout(struct inode *inode, loff_t start, loff_t end,
}
EXPORT_SYMBOL_GPL(dax_break_layout);
+void dax_break_layout_final(struct inode *inode)
+{
+ struct page *page;
+
+ if (!dax_mapping(inode->i_mapping))
+ return;
+
+ do {
+ page = dax_layout_busy_page_range(inode->i_mapping, 0,
+ LLONG_MAX);
+ if (!page)
+ break;
+
+ wait_page_idle_uninterruptible(page, inode);
+ } while (true);
+
+ dax_delete_mapping_range(inode->i_mapping, 0, LLONG_MAX);
+}
+EXPORT_SYMBOL_GPL(dax_break_layout_final);
+
/*
* Invalidate DAX entry if it is clean.
*/
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2342bac..3cc8da6 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -181,6 +181,8 @@ void ext4_evict_inode(struct inode *inode)
trace_ext4_evict_inode(inode);
+ dax_break_layout_final(inode);
+
if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
ext4_evict_ea_inode(inode);
if (inode->i_nlink) {
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index d92d7a0..22abe0e 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -751,6 +751,17 @@ xfs_fs_drop_inode(
return generic_drop_inode(inode);
}
+STATIC void
+xfs_fs_evict_inode(
+ struct inode *inode)
+{
+ if (IS_DAX(inode))
+ dax_break_layout_final(inode);
+
+ truncate_inode_pages_final(&inode->i_data);
+ clear_inode(inode);
+}
+
static void
xfs_mount_free(
struct xfs_mount *mp)
@@ -1215,6 +1226,7 @@ static const struct super_operations xfs_super_operations = {
.destroy_inode = xfs_fs_destroy_inode,
.dirty_inode = xfs_fs_dirty_inode,
.drop_inode = xfs_fs_drop_inode,
+ .evict_inode = xfs_fs_evict_inode,
.put_super = xfs_fs_put_super,
.sync_fs = xfs_fs_sync_fs,
.freeze_fs = xfs_fs_freeze,
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 2fbb262..2333c30 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -232,6 +232,10 @@ static inline int __must_check dax_break_layout(struct inode *inode,
{
return 0;
}
+
+static inline void dax_break_layout_final(struct inode *inode)
+{
+}
#endif
bool dax_alive(struct dax_device *dax_dev);
@@ -266,6 +270,7 @@ static inline int __must_check dax_break_layout_inode(struct inode *inode,
{
return dax_break_layout(inode, 0, LLONG_MAX, cb);
}
+void dax_break_layout_final(struct inode *inode);
int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
struct inode *dest, loff_t destoff,
loff_t len, bool *is_same,
--
git-series 0.9.1
Powered by blists - more mailing lists