[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5rw4mc7kqbxic3iunmwz5s3zv4sl2xlw3qdwndtmnxceqsrdyo@uxu252gg5t2a>
Date: Sun, 6 Jul 2025 12:07:01 -0500
From: John Groves <John@...ves.net>
To: Jonathan Cameron <Jonathan.Cameron@...wei.com>
Cc: Dan Williams <dan.j.williams@...el.com>,
Miklos Szeredi <miklos@...redi.hu>, Bernd Schubert <bschubert@....com>,
John Groves <jgroves@...ron.com>, Jonathan Corbet <corbet@....net>,
Vishal Verma <vishal.l.verma@...el.com>, Dave Jiang <dave.jiang@...el.com>,
Matthew Wilcox <willy@...radead.org>, Jan Kara <jack@...e.cz>,
Alexander Viro <viro@...iv.linux.org.uk>, Christian Brauner <brauner@...nel.org>,
"Darrick J . Wong" <djwong@...nel.org>, Randy Dunlap <rdunlap@...radead.org>,
Jeff Layton <jlayton@...nel.org>, Kent Overstreet <kent.overstreet@...ux.dev>,
linux-doc@...r.kernel.org, linux-kernel@...r.kernel.org, nvdimm@...ts.linux.dev,
linux-cxl@...r.kernel.org, linux-fsdevel@...r.kernel.org,
Amir Goldstein <amir73il@...il.com>, Stefan Hajnoczi <shajnocz@...hat.com>,
Joanne Koong <joannelkoong@...il.com>, Josef Bacik <josef@...icpanda.com>,
Aravind Ramesh <arramesh@...ron.com>, Ajay Joshi <ajayjoshi@...ron.com>
Subject: Re: [RFC V2 14/18] famfs_fuse: GET_DAXDEV message and daxdev_table
On 25/07/04 02:20PM, Jonathan Cameron wrote:
> On Thu, 3 Jul 2025 13:50:28 -0500
> John Groves <John@...ves.net> wrote:
>
> > * The new GET_DAXDEV message/response is enabled
> > * The command it triggered by the update_daxdev_table() call, if there
> > are any daxdevs in the subject fmap that are not represented in the
> > daxdev_dable yet.
> >
> > Signed-off-by: John Groves <john@...ves.net>
>
> More drive by stuff you can ignore for now if you like.
Always appreciated...
>
> > ---
> > fs/fuse/famfs.c | 227 ++++++++++++++++++++++++++++++++++++++
> > fs/fuse/famfs_kfmap.h | 26 +++++
> > fs/fuse/fuse_i.h | 1 +
> > fs/fuse/inode.c | 4 +-
> > fs/namei.c | 1 +
> > include/uapi/linux/fuse.h | 18 +++
> > 6 files changed, 276 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/fuse/famfs.c b/fs/fuse/famfs.c
> > index 41c4d92f1451..f5e01032b825 100644
> > --- a/fs/fuse/famfs.c
> > +++ b/fs/fuse/famfs.c
>
> > +/**
> > + * famfs_fuse_get_daxdev() - Retrieve info for a DAX device from fuse server
> > + *
> > + * Send a GET_DAXDEV message to the fuse server to retrieve info on a
> > + * dax device.
> > + *
> > + * @fm: fuse_mount
> > + * @index: the index of the dax device; daxdevs are referred to by index
> > + * in fmaps, and the server resolves the index to a particular daxdev
> > + *
> > + * Returns: 0=success
> > + * -errno=failure
> > + */
> > +static int
> > +famfs_fuse_get_daxdev(struct fuse_mount *fm, const u64 index)
> > +{
> > + struct fuse_daxdev_out daxdev_out = { 0 };
> > + struct fuse_conn *fc = fm->fc;
> > + struct famfs_daxdev *daxdev;
> > + int err = 0;
> > +
> > + FUSE_ARGS(args);
> > +
> > + /* Store the daxdev in our table */
> > + if (index >= fc->dax_devlist->nslots) {
> > + pr_err("%s: index(%lld) > nslots(%d)\n",
> > + __func__, index, fc->dax_devlist->nslots);
> > + err = -EINVAL;
> > + goto out;
> > + }
> > +
> > + args.opcode = FUSE_GET_DAXDEV;
> > + args.nodeid = index;
> > +
> > + args.in_numargs = 0;
> > +
> > + args.out_numargs = 1;
> > + args.out_args[0].size = sizeof(daxdev_out);
> > + args.out_args[0].value = &daxdev_out;
> > +
> > + /* Send GET_DAXDEV command */
> > + err = fuse_simple_request(fm, &args);
> > + if (err) {
> > + pr_err("%s: err=%d from fuse_simple_request()\n",
> > + __func__, err);
> > + /*
> > + * Error will be that the payload is smaller than FMAP_BUFSIZE,
> > + * which is the max we can handle. Empty payload handled below.
> > + */
> > + goto out;
> > + }
> > +
> > + down_write(&fc->famfs_devlist_sem);
>
> Worth thinking about guard() in this code in general.
> Simplify some of the error paths at least.
Thinking about it. Not sure I'll go there yet; I find the guard macros
a bit confusing...
>
> > +
> > + daxdev = &fc->dax_devlist->devlist[index];
> > +
> > + /* Abort if daxdev is now valid */
> > + if (daxdev->valid) {
> > + up_write(&fc->famfs_devlist_sem);
> > + /* We already have a valid entry at this index */
> > + err = -EALREADY;
> > + goto out;
> > + }
> > +
> > + /* Verify that the dev is valid and can be opened and gets the devno */
> > + err = famfs_verify_daxdev(daxdev_out.name, &daxdev->devno);
> > + if (err) {
> > + up_write(&fc->famfs_devlist_sem);
> > + pr_err("%s: err=%d from famfs_verify_daxdev()\n", __func__, err);
> > + goto out;
> > + }
> > +
> > + /* This will fail if it's not a dax device */
> > + daxdev->devp = dax_dev_get(daxdev->devno);
> > + if (!daxdev->devp) {
> > + up_write(&fc->famfs_devlist_sem);
> > + pr_warn("%s: device %s not found or not dax\n",
> > + __func__, daxdev_out.name);
> > + err = -ENODEV;
> > + goto out;
> > + }
> > +
> > + daxdev->name = kstrdup(daxdev_out.name, GFP_KERNEL);
> > + wmb(); /* all daxdev fields must be visible before marking it valid */
> > + daxdev->valid = 1;
> > +
> > + up_write(&fc->famfs_devlist_sem);
> > +
> > +out:
> > + return err;
> > +}
> > +
> > +/**
> > + * famfs_update_daxdev_table() - Update the daxdev table
> > + * @fm - fuse_mount
> > + * @meta - famfs_file_meta, in-memory format, built from a GET_FMAP response
> > + *
> > + * This function is called for each new file fmap, to verify whether all
> > + * referenced daxdevs are already known (i.e. in the table). Any daxdev
> > + * indices that referenced in @meta but not in the table will be retrieved via
> > + * famfs_fuse_get_daxdev() and added to the table
> > + *
> > + * Return: 0=success
> > + * -errno=failure
> > + */
> > +static int
> > +famfs_update_daxdev_table(
> > + struct fuse_mount *fm,
> > + const struct famfs_file_meta *meta)
> > +{
> > + struct famfs_dax_devlist *local_devlist;
> > + struct fuse_conn *fc = fm->fc;
> > + int err;
> > + int i;
> > +
> > + /* First time through we will need to allocate the dax_devlist */
> > + if (!fc->dax_devlist) {
> > + local_devlist = kcalloc(1, sizeof(*fc->dax_devlist), GFP_KERNEL);
> > + if (!local_devlist)
> > + return -ENOMEM;
> > +
> > + local_devlist->nslots = MAX_DAXDEVS;
> > +
> > + local_devlist->devlist = kcalloc(MAX_DAXDEVS,
> > + sizeof(struct famfs_daxdev),
> > + GFP_KERNEL);
> > + if (!local_devlist->devlist) {
> > + kfree(local_devlist);
> > + return -ENOMEM;
> > + }
> > +
> > + /* We don't need the famfs_devlist_sem here because we use cmpxchg... */
> > + if (cmpxchg(&fc->dax_devlist, NULL, local_devlist) != NULL) {
> > + kfree(local_devlist->devlist);
> > + kfree(local_devlist); /* another thread beat us to it */
> > + }
> > + }
> > +
> > + down_read(&fc->famfs_devlist_sem);
> > + for (i = 0; i < fc->dax_devlist->nslots; i++) {
> > + if (meta->dev_bitmap & (1ULL << i)) {
> Flip for readability.
> if (!(meta->dev_bitmap & (1ULL << i))
> continue;
I like it - done..
>
> Or can we use bitmap_from_arr64() and
> for_each_set_bit() to optimize this a little.
Could do, but I feel like that's a bit harder [for me] to read.
>
> > + /* This file meta struct references devindex i
> > + * if devindex i isn't in the table; get it...
> > + */
> > + if (!(fc->dax_devlist->devlist[i].valid)) {
> > + up_read(&fc->famfs_devlist_sem);
> > +
> > + err = famfs_fuse_get_daxdev(fm, i);
> > + if (err)
> > + pr_err("%s: failed to get daxdev=%d\n",
> > + __func__, i);
> Don't want to surface that error?
I'm thinking on that. Failure to retrieve a dax device is currently
game over for the whole mount (because there is just one of them currently,
and it's retrieved to get access to the superblock and metadata log).
Once additional daxdevs are enabled there will be more nuance, but any
file that references a 'missing' dax device will be non-operative, so
putting something in the log makes sense to me.
I may surface it a bit differently, but I think it needs to surface.
> > +
> > + down_read(&fc->famfs_devlist_sem);
> > + }
> > + }
> > + }
> > + up_read(&fc->famfs_devlist_sem);
> > +
> > + return 0;
> > +}
> > +
> > +/***************************************************************************/
>
> ?
One of my tics is divider comments. Will probably drop it though ;)
>
> > diff --git a/fs/fuse/famfs_kfmap.h b/fs/fuse/famfs_kfmap.h
> > index ce785d76719c..f79707b9f761 100644
> > --- a/fs/fuse/famfs_kfmap.h
> > +++ b/fs/fuse/famfs_kfmap.h
> > @@ -60,4 +60,30 @@ struct famfs_file_meta {
> > };
> > };
> >
> > +/**
> > + * famfs_daxdev - tracking struct for a daxdev within a famfs file system
> > + *
> > + * This is the in-memory daxdev metadata that is populated by
> > + * the responses to GET_FMAP messages
> > + */
> > +struct famfs_daxdev {
> > + /* Include dev uuid? */
> > + bool valid;
> > + bool error;
> > + dev_t devno;
> > + struct dax_device *devp;
> > + char *name;
> > +};
> > +
> > +#define MAX_DAXDEVS 24
> > +
> > +/**
> > + * famfs_dax_devlist - list of famfs_daxdev's
>
> Run kernel-doc script over these. It gets grumpy about partial
> documentation.
Thank you... I just did, and fixed a couple of issues it complained about.
>
> > + */
> > +struct famfs_dax_devlist {
> > + int nslots;
> > + int ndevs;
> > + struct famfs_daxdev *devlist; /* XXX: make this an xarray! */
> > +};
> > +
> > #endif /* FAMFS_KFMAP_H */
>
> > diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> > index ecaaa62910f0..8a81b6c334fe 100644
> > --- a/include/uapi/linux/fuse.h
> > +++ b/include/uapi/linux/fuse.h
> > @@ -235,6 +235,9 @@
> > * - struct fuse_famfs_simple_ext
> > * - struct fuse_famfs_iext
> > * - struct fuse_famfs_fmap_header
> > + * - Add the following structs for the GET_DAXDEV message and reply
> > + * - struct fuse_get_daxdev_in
> > + * - struct fuse_get_daxdev_out
> > * - Add the following enumerated types
> > * - enum fuse_famfs_file_type
> > * - enum famfs_ext_type
> > @@ -1351,6 +1354,20 @@ struct fuse_famfs_fmap_header {
> > uint64_t reserved1;
> > };
> >
> > +struct fuse_get_daxdev_in {
> > + uint32_t daxdev_num;
> > +};
> > +
> > +#define DAXDEV_NAME_MAX 256
> > +struct fuse_daxdev_out {
> > + uint16_t index;
> > + uint16_t reserved;
> > + uint32_t reserved2;
> > + uint64_t reserved3; /* enough space for a uuid if we need it */
>
> Odd place for the comment. If it just refers to reserved3 then nope
> not enough space. If you mean that and reserved4 then fiar enough
> but that's not obvious as it stands.
Good point. Moved it above in -next
>
> > + uint64_t reserved4;
> > + char name[DAXDEV_NAME_MAX];
> > +};
> > +
> > static inline int32_t fmap_msg_min_size(void)
> > {
> > /* Smallest fmap message is a header plus one simple extent */
> > @@ -1358,4 +1375,5 @@ static inline int32_t fmap_msg_min_size(void)
> > + sizeof(struct fuse_famfs_simple_ext));
> > }
> >
> > +
> Stray change. Worth a quick scrub to clean these out (even in an RFC) as they just add
> noise to the bits you want people to look at!
Yup, will fix.
BTW, public service announcement: I've discovered the awesomeness of jj
(aka ju jutsu, aka jj-vcs) as a wrapper for git that is great at the kind
of rebase problems that come with factoring and re-factoring patch set
branches. Without jj, more stuff like this would have slipped through ;)
<snip>
Thanks!
John
Powered by blists - more mailing lists