[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAOQ4uxgN5du9ukfYLBPh88+NMLt6AzSSgx4F+UJmugZ86CvB1g@mail.gmail.com>
Date: Fri, 21 Nov 2025 08:49:47 +0100
From: Amir Goldstein <amir73il@...il.com>
To: Luis Henriques <luis@...lia.com>
Cc: Miklos Szeredi <miklos@...redi.hu>, "Darrick J. Wong" <djwong@...nel.org>,
Bernd Schubert <bschubert@....com>, Kevin Chen <kchen@....com>,
Horst Birthelmer <hbirthelmer@....com>, linux-fsdevel@...r.kernel.org,
linux-kernel@...r.kernel.org, Matt Harvey <mharvey@...ptrading.com>,
kernel-dev@...lia.com
Subject: Re: [RFC PATCH v1 3/3] fuse: implementation of the FUSE_LOOKUP_HANDLE operation
On Thu, Nov 20, 2025 at 11:55 AM Luis Henriques <luis@...lia.com> wrote:
>
> The implementation of LOOKUP_HANDLE simply modifies the LOOKUP operation to
> include an extra inarg: the file handle for the parent directory (if it is
> available). Also, because fuse_entry_out now has a extra variable size
> struct (the actual handle), it also sets the out_argvar flag to true.
>
> Most of the other modifications in this patch are a fallout from these
> changes: because fuse_entry_out has been modified to include a variable size
> struct, every operation that receives such a parameter have to take this
> into account:
>
> CREATE, LINK, LOOKUP, MKDIR, MKNOD, READDIRPLUS, SYMLINK, TMPFILE
>
Overall, this is exactly what I had in mind.
Maybe it's utter garbage but that's what I was aiming for ;)
I'd like to get feedback from Miklos and Bernd on the details of the
protocol extension, especially w.r.t backward compat aspects.
> The export_operations were also modified to use this new file handle instead
> if the lookup_handle operation is implemented for the file system.
This part is really broken. I will comment on it, but would not recommend
that you include it in the same patch, because it is an independent change
that may even require an independent opt-in flag.
>
> Signed-off-by: Luis Henriques <luis@...lia.com>
> ---
> fs/fuse/dev.c | 1 +
> fs/fuse/dir.c | 75 +++++++++++++++-----
> fs/fuse/fuse_i.h | 34 ++++++++--
> fs/fuse/inode.c | 139 +++++++++++++++++++++++++++++++++-----
> fs/fuse/readdir.c | 10 +--
> include/uapi/linux/fuse.h | 8 +++
> 6 files changed, 224 insertions(+), 43 deletions(-)
>
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 132f38619d70..2f659e70a088 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -632,6 +632,7 @@ static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
> break;
> }
> }
> + /* XXX handle FUSE_COMPAT_45_ENTRY_OUT_SIZE */
> }
>
> static void fuse_force_creds(struct fuse_req *req)
> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> index 77d50ea30b61..a40f7aa700b0 100644
> --- a/fs/fuse/dir.c
> +++ b/fs/fuse/dir.c
> @@ -169,7 +169,8 @@ static void fuse_invalidate_entry(struct dentry *entry)
> }
>
> static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
> - u64 nodeid, const struct qstr *name,
> + u64 nodeid, struct inode *dir,
> + const struct qstr *name,
> struct fuse_entry_out *outarg)
> {
> args->opcode = FUSE_LOOKUP;
> @@ -181,8 +182,20 @@ static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
> args->in_args[2].size = 1;
> args->in_args[2].value = "";
> args->out_numargs = 1;
> - args->out_args[0].size = sizeof(struct fuse_entry_out);
> + args->out_args[0].size = sizeof(*outarg) + outarg->fh.size;
> args->out_args[0].value = outarg;
> +
> + if (fc->lookup_handle && dir) {
> + struct fuse_inode *fi = get_fuse_inode(dir);
> +
> + args->opcode = FUSE_LOOKUP_HANDLE;
> + if (fi && fi->fh) {
> + args->in_numargs = 4;
> + args->in_args[3].size = sizeof(*fi->fh) + fi->fh->size;
> + args->in_args[3].value = fi->fh;
> + }
> + args->out_argvar = true;
> + }
> }
>
> /*
> @@ -240,7 +253,7 @@ static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
>
> attr_version = fuse_get_attr_version(fm->fc);
>
> - fuse_lookup_init(fm->fc, &args, get_node_id(dir),
> + fuse_lookup_init(fm->fc, &args, get_node_id(dir), dir,
> name, outarg);
> ret = fuse_simple_request(fm, &args);
> /* Zero nodeid is same as -ENOENT */
> @@ -248,7 +261,8 @@ static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
> ret = -ENOENT;
> if (!ret) {
> fi = get_fuse_inode(inode);
> - if (outarg->nodeid != get_node_id(inode) ||
> + if (!fuse_file_handle_is_equal(fm->fc, fi->fh, &outarg->fh) ||
> + outarg->nodeid != get_node_id(inode) ||
> (bool) IS_AUTOMOUNT(inode) != (bool) (outarg->attr.flags & FUSE_ATTR_SUBMOUNT)) {
> fuse_queue_forget(fm->fc, forget,
> outarg->nodeid, 1);
> @@ -365,8 +379,9 @@ bool fuse_invalid_attr(struct fuse_attr *attr)
> return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
> }
>
> -int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
> - struct fuse_entry_out *outarg, struct inode **inode)
> +int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct inode *dir,
> + const struct qstr *name, struct fuse_entry_out *outarg,
> + struct inode **inode)
> {
> struct fuse_mount *fm = get_fuse_mount_super(sb);
> FUSE_ARGS(args);
> @@ -388,14 +403,15 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
> attr_version = fuse_get_attr_version(fm->fc);
> evict_ctr = fuse_get_evict_ctr(fm->fc);
>
> - fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
> + fuse_lookup_init(fm->fc, &args, nodeid, dir, name, outarg);
> err = fuse_simple_request(fm, &args);
> /* Zero nodeid is same as -ENOENT, but with valid timeout */
> - if (err || !outarg->nodeid)
> + if (err < 0 || !outarg->nodeid) // XXX err = size if args->out_argvar = true
> goto out_put_forget;
>
> err = -EIO;
> - if (fuse_invalid_attr(&outarg->attr))
> + if (fuse_invalid_attr(&outarg->attr) ||
> + fuse_invalid_file_handle(fm->fc, &outarg->fh))
> goto out_put_forget;
> if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) {
> pr_warn_once("root generation should be zero\n");
> @@ -404,7 +420,8 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
>
> *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
> &outarg->attr, ATTR_TIMEOUT(outarg),
> - attr_version, evict_ctr);
> + attr_version, evict_ctr,
> + &outarg->fh);
> err = -ENOMEM;
> if (!*inode) {
> fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
> @@ -440,14 +457,14 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
> return ERR_PTR(-ENOMEM);
>
> locked = fuse_lock_inode(dir);
> - err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
> + err = fuse_lookup_name(dir->i_sb, get_node_id(dir), dir, &entry->d_name,
> outarg, &inode);
> fuse_unlock_inode(dir, locked);
> if (err == -ENOENT) {
> outarg_valid = false;
> err = 0;
> }
> - if (err)
> + if (err < 0) // XXX err = size if args->out_argvar = true
> goto out_err;
>
> err = -EIO;
> @@ -688,8 +705,16 @@ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
> args.in_args[0].value = &inarg;
> args.in_args[1].size = entry->d_name.len + 1;
> args.in_args[1].value = entry->d_name.name;
> + if (fm->fc->lookup_handle) {
> + fi = get_fuse_inode(dir);
> + if (fi->fh) {
> + args.in_numargs = 3;
> + args.in_args[2].size = sizeof(*fi->fh) + fi->fh->size;
> + args.in_args[3].value = fi->fh;
> + }
> + }
> args.out_numargs = 2;
> - args.out_args[0].size = sizeof(*outentry);
> + args.out_args[0].size = sizeof(*outentry) + outentry->fh.size;
> args.out_args[0].value = outentry;
> /* Store outarg for fuse_finish_open() */
> outopenp = &ff->args->open_outarg;
> @@ -707,6 +732,7 @@ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
>
> err = -EIO;
> if (!S_ISREG(outentry->attr.mode) || invalid_nodeid(outentry->nodeid) ||
> + fuse_invalid_file_handle(fm->fc, &outentry->fh) ||
> fuse_invalid_attr(&outentry->attr))
> goto out_free_outentry;
>
> @@ -714,7 +740,8 @@ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
> ff->nodeid = outentry->nodeid;
> ff->open_flags = outopenp->open_flags;
> inode = fuse_iget(dir->i_sb, outentry->nodeid, outentry->generation,
> - &outentry->attr, ATTR_TIMEOUT(outentry), 0, 0);
> + &outentry->attr, ATTR_TIMEOUT(outentry), 0, 0,
> + &outentry->fh);
> if (!inode) {
> flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
> fuse_sync_release(NULL, ff, flags);
> @@ -828,9 +855,21 @@ static struct dentry *create_new_entry(struct mnt_idmap *idmap, struct fuse_moun
> goto out_put_forget_req;
> }
>
> + if (fm->fc->lookup_handle) {
> + struct fuse_inode *fi = get_fuse_inode(dir);
> + int idx = args->in_numargs;
> +
> + WARN_ON_ONCE(idx >= 4);
> +
> + if (fi->fh) {
This assertion can be handled gracefully
if (fi->fh && !WARN_ON_ONCE(idx >= 4)) {
> + args->in_args[idx].size = sizeof(*fi->fh) + fi->fh->size;
> + args->in_args[idx].value = fi->fh;
> + args->in_numargs++;
> + }
> + }
> args->nodeid = get_node_id(dir);
> args->out_numargs = 1;
> - args->out_args[0].size = sizeof(*outarg);
> + args->out_args[0].size = sizeof(*outarg) + outarg->fh.size;
> args->out_args[0].value = outarg;
>
> if (args->opcode != FUSE_LINK) {
> @@ -845,14 +884,16 @@ static struct dentry *create_new_entry(struct mnt_idmap *idmap, struct fuse_moun
> goto out_free_outarg;
>
> err = -EIO;
> - if (invalid_nodeid(outarg->nodeid) || fuse_invalid_attr(&outarg->attr))
> + if (invalid_nodeid(outarg->nodeid) || fuse_invalid_attr(&outarg->attr) ||
> + fuse_invalid_file_handle(fm->fc, &outarg->fh))
> goto out_free_outarg;
>
> if ((outarg->attr.mode ^ mode) & S_IFMT)
> goto out_free_outarg;
>
> inode = fuse_iget(dir->i_sb, outarg->nodeid, outarg->generation,
> - &outarg->attr, ATTR_TIMEOUT(outarg), 0, 0);
> + &outarg->attr, ATTR_TIMEOUT(outarg), 0, 0,
> + &outarg->fh);
> if (!inode) {
> fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
> kfree(outarg);
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index d997fdcede9b..ad9477394e4b 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -216,6 +216,8 @@ struct fuse_inode {
> * so preserve the blocksize specified by the server.
> */
> u8 cached_i_blkbits;
> +
> + struct fuse_file_handle *fh;
> };
>
> /** FUSE inode state bits */
> @@ -1065,6 +1067,26 @@ static inline int invalid_nodeid(u64 nodeid)
> return !nodeid || nodeid == FUSE_ROOT_ID;
> }
>
> +static inline bool fuse_invalid_file_handle(struct fuse_conn *fc,
> + struct fuse_file_handle *handle)
> +{
> + if (!fc->lookup_handle)
> + return false;
> +
> + return !handle->size || (handle->size >= FUSE_MAX_HANDLE_SZ);
> +}
> +
> +static inline bool fuse_file_handle_is_equal(struct fuse_conn *fc,
> + struct fuse_file_handle *fh1,
> + struct fuse_file_handle *fh2)
> +{
> + if (!fc->lookup_handle || !fh2->size || // XXX more OPs without handle
> + ((fh1->size == fh2->size) &&
> + (!memcmp(fh1->handle, fh2->handle, fh1->size))))
> + return true;
> + return false;
> +}
> +
> static inline u64 fuse_get_attr_version(struct fuse_conn *fc)
> {
> return atomic64_read(&fc->attr_version);
> @@ -1096,7 +1118,10 @@ static inline struct fuse_entry_out *fuse_entry_out_alloc(struct fuse_conn *fc)
> {
> struct fuse_entry_out *entryout;
>
> - entryout = kzalloc(sizeof(*entryout), GFP_KERNEL_ACCOUNT);
> + entryout = kzalloc(sizeof(*entryout) + fc->max_handle_sz,
> + GFP_KERNEL_ACCOUNT);
> + if (entryout)
> + entryout->fh.size = fc->max_handle_sz;
>
> return entryout;
> }
> @@ -1143,10 +1168,11 @@ extern const struct dentry_operations fuse_dentry_operations;
> struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
> int generation, struct fuse_attr *attr,
> u64 attr_valid, u64 attr_version,
> - u64 evict_ctr);
> + u64 evict_ctr, struct fuse_file_handle *fh);
>
> -int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
> - struct fuse_entry_out *outarg, struct inode **inode);
> +int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct inode *dir,
> + const struct qstr *name, struct fuse_entry_out *outarg,
> + struct inode **inode);
>
> /**
> * Send FORGET command
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index 30ee37c29057..23b8e4932da8 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -95,6 +95,25 @@ static struct fuse_submount_lookup *fuse_alloc_submount_lookup(void)
> return NULL;
> }
>
> +/*
> + * XXX postpone this allocation and later use the real size instead of max
> + */
> +static bool fuse_inode_handle_alloc(struct super_block *sb,
> + struct fuse_inode *fi)
> +{
> + struct fuse_conn *fc = get_fuse_conn_super(sb);
> +
> + fi->fh = NULL;
> + if (fc->lookup_handle) {
> + fi->fh = kzalloc(sizeof(*fi->fh) + fc->max_handle_sz,
> + GFP_KERNEL_ACCOUNT);
> + if (!fi->fh)
> + return false;
> + }
> +
> + return true;
> +}
> +
> static struct inode *fuse_alloc_inode(struct super_block *sb)
> {
> struct fuse_inode *fi;
> @@ -120,8 +139,15 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
> if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
> fuse_inode_backing_set(fi, NULL);
>
> + if (!fuse_inode_handle_alloc(sb, fi))
> + goto out_free_dax;
> +
> return &fi->inode;
>
> +out_free_dax:
> +#ifdef CONFIG_FUSE_DAX
> + kfree(fi->dax);
> +#endif
> out_free_forget:
> kfree(fi->forget);
> out_free:
> @@ -132,6 +158,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
> static void fuse_free_inode(struct inode *inode)
> {
> struct fuse_inode *fi = get_fuse_inode(inode);
> + struct fuse_conn *fc = get_fuse_conn(inode);
>
> mutex_destroy(&fi->mutex);
> kfree(fi->forget);
> @@ -141,6 +168,9 @@ static void fuse_free_inode(struct inode *inode)
> if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
> fuse_backing_put(fuse_inode_backing(fi));
>
> + if (fc->lookup_handle)
> + kfree(fi->fh);
> +
> kmem_cache_free(fuse_inode_cachep, fi);
> }
>
> @@ -465,7 +495,7 @@ static int fuse_inode_set(struct inode *inode, void *_nodeidp)
> struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
> int generation, struct fuse_attr *attr,
> u64 attr_valid, u64 attr_version,
> - u64 evict_ctr)
> + u64 evict_ctr, struct fuse_file_handle *fh)
> {
> struct inode *inode;
> struct fuse_inode *fi;
> @@ -505,14 +535,29 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
> if (!inode)
> return NULL;
>
> + fi = get_fuse_inode(inode);
> + if (fc->lookup_handle && fh->size) {
> + if (fi->fh->size == 0) {
> + if (fh->size >= fc->max_handle_sz)
> + pr_warn("Truncating file handle size (%u)\n",
> + fh->size);
> + fi->fh->size = fh->size < fc->max_handle_sz ?
> + fh->size : fc->max_handle_sz - 1;
I can't think of anything good that can come out of copying a truncated
handle. If server provides a handle that does not meet the agreed
max_handle_sz then it is an invalid handle.
> + memcpy(fi->fh->handle, fh->handle, fi->fh->size);
> + } else
> + pr_warn("handle was already set (size: %u)\n",
> + fi->fh->size);
> + }
> if ((inode->i_state & I_NEW)) {
> inode->i_flags |= S_NOATIME;
> if (!fc->writeback_cache || !S_ISREG(attr->mode))
> inode->i_flags |= S_NOCMTIME;
> inode->i_generation = generation;
> +
> fuse_init_inode(inode, attr, fc);
> unlock_new_inode(inode);
> - } else if (fuse_stale_inode(inode, generation, attr)) {
> + } else if (fuse_stale_inode(inode, generation, attr) ||
> + !fuse_file_handle_is_equal(fc, fi->fh, fh)) {
> /* nodeid was reused, any I/O on the old inode should fail */
> fuse_make_bad(inode);
> if (inode != d_inode(sb->s_root)) {
> @@ -521,7 +566,6 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
> goto retry;
> }
> }
> - fi = get_fuse_inode(inode);
> spin_lock(&fi->lock);
> fi->nlookup++;
> spin_unlock(&fi->lock);
> @@ -1059,12 +1103,23 @@ static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned int mo
> attr.mode = mode;
> attr.ino = FUSE_ROOT_ID;
> attr.nlink = 1;
> - return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0, 0);
> + return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0, 0, NULL); // XXX
> }
>
> +enum {
> + HANDLE_TYPE_NODEID = 0,
> + HANDLE_TYPE_HANDLE = 1,
> +};
> +
> struct fuse_inode_handle {
> - u64 nodeid;
> - u32 generation;
> + u32 type;
> + union {
> + struct {
> + u64 nodeid;
> + u32 generation;
> + };
> + struct fuse_file_handle fh;
> + };
> };
>
These changes belong in a different patch, so I will reply in a separate email.
Thanks,
Amir.
Powered by blists - more mailing lists