[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260107153332.64727-15-john@groves.net>
Date: Wed, 7 Jan 2026 09:33:23 -0600
From: John Groves <John@...ves.net>
To: John Groves <John@...ves.net>,
Miklos Szeredi <miklos@...redi.hu>,
Dan Williams <dan.j.williams@...el.com>,
Bernd Schubert <bschubert@....com>,
Alison Schofield <alison.schofield@...el.com>
Cc: John Groves <jgroves@...ron.com>,
Jonathan Corbet <corbet@....net>,
Vishal Verma <vishal.l.verma@...el.com>,
Dave Jiang <dave.jiang@...el.com>,
Matthew Wilcox <willy@...radead.org>,
Jan Kara <jack@...e.cz>,
Alexander Viro <viro@...iv.linux.org.uk>,
David Hildenbrand <david@...nel.org>,
Christian Brauner <brauner@...nel.org>,
"Darrick J . Wong" <djwong@...nel.org>,
Randy Dunlap <rdunlap@...radead.org>,
Jeff Layton <jlayton@...nel.org>,
Amir Goldstein <amir73il@...il.com>,
Jonathan Cameron <Jonathan.Cameron@...wei.com>,
Stefan Hajnoczi <shajnocz@...hat.com>,
Joanne Koong <joannelkoong@...il.com>,
Josef Bacik <josef@...icpanda.com>,
Bagas Sanjaya <bagasdotme@...il.com>,
Chen Linxuan <chenlinxuan@...ontech.com>,
James Morse <james.morse@....com>,
Fuad Tabba <tabba@...gle.com>,
Sean Christopherson <seanjc@...gle.com>,
Shivank Garg <shivankg@....com>,
Ackerley Tng <ackerleytng@...gle.com>,
Gregory Price <gourry@...rry.net>,
Aravind Ramesh <arramesh@...ron.com>,
Ajay Joshi <ajayjoshi@...ron.com>,
venkataravis@...ron.com,
linux-doc@...r.kernel.org,
linux-kernel@...r.kernel.org,
nvdimm@...ts.linux.dev,
linux-cxl@...r.kernel.org,
linux-fsdevel@...r.kernel.org,
John Groves <john@...ves.net>
Subject: [PATCH V3 14/21] famfs_fuse: Plumb the GET_FMAP message/response
Upon completion of an OPEN, if we're in famfs-mode we do a GET_FMAP to
retrieve and cache up the file-to-dax map in the kernel. If this
succeeds, read/write/mmap are resolved direct-to-dax with no upcalls.
Signed-off-by: John Groves <john@...ves.net>
---
MAINTAINERS | 8 +++++
fs/fuse/Makefile | 1 +
fs/fuse/famfs.c | 74 +++++++++++++++++++++++++++++++++++++++
fs/fuse/file.c | 14 +++++++-
fs/fuse/fuse_i.h | 47 ++++++++++++++++++++++++-
fs/fuse/inode.c | 8 ++++-
fs/fuse/iomode.c | 2 +-
include/uapi/linux/fuse.h | 7 ++++
8 files changed, 157 insertions(+), 4 deletions(-)
create mode 100644 fs/fuse/famfs.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 90429cb06090..526309943026 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10374,6 +10374,14 @@ F: fs/fuse/
F: include/uapi/linux/fuse.h
F: tools/testing/selftests/filesystems/fuse/
+FUSE [FAMFS Fabric-Attached Memory File System]
+M: John Groves <jgroves@...ron.com>
+M: John Groves <John@...ves.net>
+L: linux-cxl@...r.kernel.org
+L: linux-fsdevel@...r.kernel.org
+S: Supported
+F: fs/fuse/famfs.c
+
FUTEX SUBSYSTEM
M: Thomas Gleixner <tglx@...utronix.de>
M: Ingo Molnar <mingo@...hat.com>
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 22ad9538dfc4..3f8dcc8cbbd0 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -17,5 +17,6 @@ fuse-$(CONFIG_FUSE_DAX) += dax.o
fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o backing.o
fuse-$(CONFIG_SYSCTL) += sysctl.o
fuse-$(CONFIG_FUSE_IO_URING) += dev_uring.o
+fuse-$(CONFIG_FUSE_FAMFS_DAX) += famfs.o
virtiofs-y := virtio_fs.o
diff --git a/fs/fuse/famfs.c b/fs/fuse/famfs.c
new file mode 100644
index 000000000000..0f7e3f00e1e7
--- /dev/null
+++ b/fs/fuse/famfs.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * famfs - dax file system for shared fabric-attached memory
+ *
+ * Copyright 2023-2025 Micron Technology, Inc.
+ *
+ * This file system, originally based on ramfs the dax support from xfs,
+ * is intended to allow multiple host systems to mount a common file system
+ * view of dax files that map to shared memory.
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/dax.h>
+#include <linux/iomap.h>
+#include <linux/path.h>
+#include <linux/namei.h>
+#include <linux/string.h>
+
+#include "fuse_i.h"
+
+
+#define FMAP_BUFSIZE PAGE_SIZE
+
+int
+fuse_get_fmap(struct fuse_mount *fm, struct inode *inode)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ size_t fmap_bufsize = FMAP_BUFSIZE;
+ u64 nodeid = get_node_id(inode);
+ ssize_t fmap_size;
+ void *fmap_buf;
+ int rc;
+
+ FUSE_ARGS(args);
+
+ /* Don't retrieve if we already have the famfs metadata */
+ if (fi->famfs_meta)
+ return 0;
+
+ fmap_buf = kcalloc(1, FMAP_BUFSIZE, GFP_KERNEL);
+ if (!fmap_buf)
+ return -EIO;
+
+ args.opcode = FUSE_GET_FMAP;
+ args.nodeid = nodeid;
+
+ /* Variable-sized output buffer
+ * this causes fuse_simple_request() to return the size of the
+ * output payload
+ */
+ args.out_argvar = true;
+ args.out_numargs = 1;
+ args.out_args[0].size = fmap_bufsize;
+ args.out_args[0].value = fmap_buf;
+
+ /* Send GET_FMAP command */
+ rc = fuse_simple_request(fm, &args);
+ if (rc < 0) {
+ pr_err("%s: err=%d from fuse_simple_request()\n",
+ __func__, rc);
+ return rc;
+ }
+ fmap_size = rc;
+
+ /* We retrieved the "fmap" (the file's map to memory), but
+ * we haven't used it yet. A call to famfs_file_init_dax() will be added
+ * here in a subsequent patch, when we add the ability to attach
+ * fmaps to files.
+ */
+
+ kfree(fmap_buf);
+ return 0;
+}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 093569033ed1..1f64bf68b5ee 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -277,6 +277,16 @@ static int fuse_open(struct inode *inode, struct file *file)
err = fuse_do_open(fm, get_node_id(inode), file, false);
if (!err) {
ff = file->private_data;
+
+ if ((fm->fc->famfs_iomap) && (S_ISREG(inode->i_mode))) {
+ /* Get the famfs fmap - failure is fatal */
+ err = fuse_get_fmap(fm, inode);
+ if (err) {
+ fuse_sync_release(fi, ff, file->f_flags);
+ goto out_nowrite;
+ }
+ }
+
err = fuse_finish_open(inode, file);
if (err)
fuse_sync_release(fi, ff, file->f_flags);
@@ -284,12 +294,14 @@ static int fuse_open(struct inode *inode, struct file *file)
fuse_truncate_update_attr(inode, file);
}
+out_nowrite:
if (is_wb_truncate || dax_truncate)
fuse_release_nowrite(inode);
if (!err) {
if (is_truncate)
truncate_pagecache(inode, 0);
- else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+ else if (!(ff->open_flags & FOPEN_KEEP_CACHE) &&
+ !fuse_file_famfs(fi))
invalidate_inode_pages2(inode->i_mapping);
}
if (dax_truncate)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 84d0ee2a501d..691c7850cf4e 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -223,6 +223,14 @@ struct fuse_inode {
* so preserve the blocksize specified by the server.
*/
u8 cached_i_blkbits;
+
+#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
+ /* Pointer to the file's famfs metadata. Primary content is the
+ * in-memory version of the fmap - the map from file's offset range
+ * to DAX memory
+ */
+ void *famfs_meta;
+#endif
};
/** FUSE inode state bits */
@@ -1525,11 +1533,14 @@ void fuse_free_conn(struct fuse_conn *fc);
/* dax.c */
+static inline int fuse_file_famfs(struct fuse_inode *fi); /* forward */
+
/* This macro is used by virtio_fs, but now it also needs to filter for
* "not famfs"
*/
#define FUSE_IS_VIRTIO_DAX(fuse_inode) (IS_ENABLED(CONFIG_FUSE_DAX) \
- && IS_DAX(&fuse_inode->inode))
+ && IS_DAX(&fuse_inode->inode) \
+ && !fuse_file_famfs(fuse_inode))
ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to);
ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from);
@@ -1654,4 +1665,38 @@ static inline void famfs_teardown(struct fuse_conn *fc)
#endif
}
+static inline struct fuse_backing *famfs_meta_set(struct fuse_inode *fi,
+ void *meta)
+{
+#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
+ return xchg(&fi->famfs_meta, meta);
+#else
+ return NULL;
+#endif
+}
+
+static inline void famfs_meta_free(struct fuse_inode *fi)
+{
+ /* Stub wil be connected in a subsequent commit */
+}
+
+static inline int fuse_file_famfs(struct fuse_inode *fi)
+{
+#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
+ return (READ_ONCE(fi->famfs_meta) != NULL);
+#else
+ return 0;
+#endif
+}
+
+#if IS_ENABLED(CONFIG_FUSE_FAMFS_DAX)
+int fuse_get_fmap(struct fuse_mount *fm, struct inode *inode);
+#else
+static inline int
+fuse_get_fmap(struct fuse_mount *fm, struct inode *inode)
+{
+ return 0;
+}
+#endif
+
#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 2e0844aabbae..9e121a1d63b7 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -120,6 +120,9 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
fuse_inode_backing_set(fi, NULL);
+ if (IS_ENABLED(CONFIG_FUSE_FAMFS_DAX))
+ famfs_meta_set(fi, NULL);
+
return &fi->inode;
out_free_forget:
@@ -141,6 +144,9 @@ static void fuse_free_inode(struct inode *inode)
if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
fuse_backing_put(fuse_inode_backing(fi));
+ if (S_ISREG(inode->i_mode) && fuse_file_famfs(fi))
+ famfs_meta_free(fi);
+
kmem_cache_free(fuse_inode_cachep, fi);
}
@@ -162,7 +168,7 @@ static void fuse_evict_inode(struct inode *inode)
/* Will write inode on close/munmap and in all other dirtiers */
WARN_ON(inode_state_read_once(inode) & I_DIRTY_INODE);
- if (FUSE_IS_VIRTIO_DAX(fi))
+ if (FUSE_IS_VIRTIO_DAX(fi) || fuse_file_famfs(fi))
dax_break_layout_final(inode);
truncate_inode_pages_final(&inode->i_data);
diff --git a/fs/fuse/iomode.c b/fs/fuse/iomode.c
index 31ee7f3304c6..948148316ef0 100644
--- a/fs/fuse/iomode.c
+++ b/fs/fuse/iomode.c
@@ -203,7 +203,7 @@ int fuse_file_io_open(struct file *file, struct inode *inode)
* io modes are not relevant with DAX and with server that does not
* implement open.
*/
- if (FUSE_IS_VIRTIO_DAX(fi) || !ff->args)
+ if (FUSE_IS_VIRTIO_DAX(fi) || fuse_file_famfs(fi) || !ff->args)
return 0;
/*
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 5e2c93433823..bfb92a4aa8a9 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -669,6 +669,9 @@ enum fuse_opcode {
FUSE_STATX = 52,
FUSE_COPY_FILE_RANGE_64 = 53,
+ /* Famfs / devdax opcodes */
+ FUSE_GET_FMAP = 54,
+
/* CUSE specific operations */
CUSE_INIT = 4096,
@@ -1313,4 +1316,8 @@ struct fuse_uring_cmd_req {
uint8_t padding[6];
};
+/* Famfs fmap message components */
+
+#define FAMFS_FMAP_MAX 32768 /* Largest supported fmap message */
+
#endif /* _LINUX_FUSE_H */
--
2.49.0
Powered by blists - more mailing lists