[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <76f21528-9b14-4277-8f4c-f30036884e75@ddn.com>
Date: Mon, 15 Dec 2025 17:39:22 +0000
From: Bernd Schubert <bschubert@....com>
To: Luis Henriques <luis@...lia.com>, Miklos Szeredi <miklos@...redi.hu>
CC: Amir Goldstein <amir73il@...il.com>, "Darrick J. Wong"
<djwong@...nel.org>, Kevin Chen <kchen@....com>, Horst Birthelmer
<hbirthelmer@....com>, "linux-fsdevel@...r.kernel.org"
<linux-fsdevel@...r.kernel.org>, "linux-kernel@...r.kernel.org"
<linux-kernel@...r.kernel.org>, Matt Harvey <mharvey@...ptrading.com>,
"kernel-dev@...lia.com" <kernel-dev@...lia.com>
Subject: Re: [RFC PATCH v2 4/6] fuse: implementation of the FUSE_LOOKUP_HANDLE
operation
On 12/12/25 19:12, Luis Henriques wrote:
> The implementation of LOOKUP_HANDLE modifies the LOOKUP operation to include
> an extra inarg: the file handle for the parent directory (if it is
> available). Also, because fuse_entry_out now has a extra variable size
> struct (the actual handle), it also sets the out_argvar flag to true.
>
> Most of the other modifications in this patch are a fallout from these
> changes: because fuse_entry_out has been modified to include a variable size
> struct, every operation that receives such a parameter have to take this
> into account:
>
> CREATE, LINK, LOOKUP, MKDIR, MKNOD, READDIRPLUS, SYMLINK, TMPFILE
>
> Signed-off-by: Luis Henriques <luis@...lia.com>
> ---
> fs/fuse/dev.c | 16 +++++++
> fs/fuse/dir.c | 87 ++++++++++++++++++++++++++++++---------
> fs/fuse/fuse_i.h | 34 +++++++++++++--
> fs/fuse/inode.c | 69 +++++++++++++++++++++++++++----
> fs/fuse/readdir.c | 10 ++---
> include/uapi/linux/fuse.h | 8 ++++
> 6 files changed, 189 insertions(+), 35 deletions(-)
>
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 629e8a043079..fc6acf45ae27 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -606,6 +606,22 @@ static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
> if (fc->minor < 4 && args->opcode == FUSE_STATFS)
> args->out_args[0].size = FUSE_COMPAT_STATFS_SIZE;
>
> + if (fc->minor < 45) {
Could we use fc->lookup_handle here? Numbers are hard with backports
> + switch (args->opcode) {
> + case FUSE_CREATE:
> + case FUSE_LINK:
> + case FUSE_LOOKUP:
> + case FUSE_MKDIR:
> + case FUSE_MKNOD:
> + /* XXX case FUSE_READDIRPLUS: */
> + case FUSE_SYMLINK:
> + case FUSE_TMPFILE:
> + if (!WARN_ON_ONCE(args->in_numargs == 0))
> + args->in_numargs--;
> + args->out_args[0].size = FUSE_COMPAT_45_ENTRY_OUT_SIZE;
> + break;
> + }
> + }
> if (fc->minor < 9) {
> switch (args->opcode) {
> case FUSE_LOOKUP:
> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> index e3fd5d148741..a6edb444180f 100644
> --- a/fs/fuse/dir.c
> +++ b/fs/fuse/dir.c
> @@ -169,7 +169,8 @@ static void fuse_invalidate_entry(struct dentry *entry)
> }
>
> static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
> - u64 nodeid, const struct qstr *name,
> + u64 nodeid, struct inode *dir,
> + const struct qstr *name,
> struct fuse_entry_out *outarg)
> {
> args->opcode = FUSE_LOOKUP;
> @@ -181,8 +182,24 @@ static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
> args->in_args[2].size = 1;
> args->in_args[2].value = "";
> args->out_numargs = 1;
> - args->out_args[0].size = sizeof(struct fuse_entry_out);
> + args->out_args[0].size = sizeof(*outarg) + outarg->fh.size;
> args->out_args[0].value = outarg;
> +
> + if (fc->lookup_handle) {
> + struct fuse_inode *fi = NULL;
> +
> + args->opcode = FUSE_LOOKUP_HANDLE;
> + args->out_argvar = true;
> +
> + if (dir)
> + fi = get_fuse_inode(dir);
> +
> + if (fi && fi->fh) {
> + args->in_numargs = 4;
> + args->in_args[3].size = sizeof(*fi->fh) + fi->fh->size;
> + args->in_args[3].value = fi->fh;
> + }
> + }
> }
>
> /*
> @@ -240,7 +257,7 @@ static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
>
> attr_version = fuse_get_attr_version(fm->fc);
>
> - fuse_lookup_init(fm->fc, &args, get_node_id(dir),
> + fuse_lookup_init(fm->fc, &args, get_node_id(dir), dir,
> name, outarg);
> ret = fuse_simple_request(fm, &args);
> /* Zero nodeid is same as -ENOENT */
> @@ -248,7 +265,8 @@ static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
> ret = -ENOENT;
> if (!ret) {
> fi = get_fuse_inode(inode);
> - if (outarg->nodeid != get_node_id(inode) ||
> + if (!fuse_file_handle_is_equal(fm->fc, fi->fh, &outarg->fh) ||
> + outarg->nodeid != get_node_id(inode) ||
> (bool) IS_AUTOMOUNT(inode) != (bool) (outarg->attr.flags & FUSE_ATTR_SUBMOUNT)) {
> fuse_queue_forget(fm->fc, forget,
> outarg->nodeid, 1);
> @@ -365,8 +383,9 @@ bool fuse_invalid_attr(struct fuse_attr *attr)
> return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
> }
>
> -int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
> - struct fuse_entry_out *outarg, struct inode **inode)
> +int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct inode *dir,
> + const struct qstr *name, struct fuse_entry_out *outarg,
> + struct inode **inode)
> {
> struct fuse_mount *fm = get_fuse_mount_super(sb);
> FUSE_ARGS(args);
> @@ -388,14 +407,15 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
> attr_version = fuse_get_attr_version(fm->fc);
> evict_ctr = fuse_get_evict_ctr(fm->fc);
>
> - fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
> + fuse_lookup_init(fm->fc, &args, nodeid, dir, name, outarg);
> err = fuse_simple_request(fm, &args);
> /* Zero nodeid is same as -ENOENT, but with valid timeout */
> - if (err || !outarg->nodeid)
> + if (err < 0 || !outarg->nodeid) // XXX err = size if args->out_argvar = true
> goto out_put_forget;
>
> err = -EIO;
> - if (fuse_invalid_attr(&outarg->attr))
> + if (fuse_invalid_attr(&outarg->attr) ||
> + fuse_invalid_file_handle(fm->fc, &outarg->fh))
> goto out_put_forget;
> if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) {
> pr_warn_once("root generation should be zero\n");
> @@ -404,7 +424,8 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
>
> *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
> &outarg->attr, ATTR_TIMEOUT(outarg),
> - attr_version, evict_ctr);
> + attr_version, evict_ctr,
> + &outarg->fh);
> err = -ENOMEM;
> if (!*inode) {
> fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
> @@ -440,14 +461,14 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
> return ERR_PTR(-ENOMEM);
>
> locked = fuse_lock_inode(dir);
> - err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
> + err = fuse_lookup_name(dir->i_sb, get_node_id(dir), dir, &entry->d_name,
> outarg, &inode);
> fuse_unlock_inode(dir, locked);
> if (err == -ENOENT) {
> outarg_valid = false;
> err = 0;
> }
> - if (err)
> + if (err < 0) // XXX err = size if args->out_argvar = true
> goto out_err;
>
> err = -EIO;
> @@ -689,24 +710,36 @@ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
> args.in_args[1].size = entry->d_name.len + 1;
> args.in_args[1].value = entry->d_name.name;
> args.out_numargs = 2;
> - args.out_args[0].size = sizeof(*outentry);
> + args.out_args[0].size = sizeof(*outentry) + outentry->fh.size;
> args.out_args[0].value = outentry;
> /* Store outarg for fuse_finish_open() */
> outopenp = &ff->args->open_outarg;
> args.out_args[1].size = sizeof(*outopenp);
> args.out_args[1].value = outopenp;
>
> + if (fm->fc->lookup_handle) {
> + fi = get_fuse_inode(dir);
> + args.out_argvar = true;
> + args.out_argvar_idx = 0;
> + if (fi->fh) {
> + args.in_numargs = 3;
> + args.in_args[2].size = sizeof(*fi->fh) + fi->fh->size;
> + args.in_args[2].value = fi->fh;
> + }
> + }
> +
> err = get_create_ext(idmap, &args, dir, entry, mode);
> if (err)
> goto out_free_outentry;
>
> err = fuse_simple_idmap_request(idmap, fm, &args);
> free_ext_value(&args);
> - if (err)
> + if (err < 0) // XXX err = size if args->out_argvar = true
> goto out_free_outentry;
>
> err = -EIO;
> if (!S_ISREG(outentry->attr.mode) || invalid_nodeid(outentry->nodeid) ||
> + fuse_invalid_file_handle(fm->fc, &outentry->fh) ||
> fuse_invalid_attr(&outentry->attr))
> goto out_free_outentry;
>
> @@ -714,7 +747,8 @@ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
> ff->nodeid = outentry->nodeid;
> ff->open_flags = outopenp->open_flags;
> inode = fuse_iget(dir->i_sb, outentry->nodeid, outentry->generation,
> - &outentry->attr, ATTR_TIMEOUT(outentry), 0, 0);
> + &outentry->attr, ATTR_TIMEOUT(outentry), 0, 0,
> + &outentry->fh);
> if (!inode) {
> flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
> fuse_sync_release(NULL, ff, flags);
> @@ -830,9 +864,22 @@ static struct dentry *create_new_entry(struct mnt_idmap *idmap, struct fuse_moun
>
> args->nodeid = get_node_id(dir);
> args->out_numargs = 1;
> - args->out_args[0].size = sizeof(*outarg);
> + args->out_args[0].size = sizeof(*outarg) + outarg->fh.size;
> args->out_args[0].value = outarg;
>
> + if (fm->fc->lookup_handle) {
> + struct fuse_inode *fi = get_fuse_inode(dir);
> + int idx = args->in_numargs;
> +
> + args->out_argvar = true;
> + args->out_argvar_idx = 0;
> + if (fi->fh && !WARN_ON_ONCE(idx >= 4)) {
> + args->in_args[idx].size = sizeof(*fi->fh) + fi->fh->size;
> + args->in_args[idx].value = fi->fh;
> + args->in_numargs++;
> + }
> + }
> +
> if (args->opcode != FUSE_LINK) {
> err = get_create_ext(idmap, args, dir, entry, mode);
> if (err)
> @@ -841,18 +888,20 @@ static struct dentry *create_new_entry(struct mnt_idmap *idmap, struct fuse_moun
>
> err = fuse_simple_idmap_request(idmap, fm, args);
> free_ext_value(args);
> - if (err)
> + if (err < 0) // XXX err = size if args->out_argvar = true
> goto out_free_outarg;
>
> err = -EIO;
> - if (invalid_nodeid(outarg->nodeid) || fuse_invalid_attr(&outarg->attr))
> + if (invalid_nodeid(outarg->nodeid) || fuse_invalid_attr(&outarg->attr) ||
> + fuse_invalid_file_handle(fm->fc, &outarg->fh))
> goto out_free_outarg;
>
> if ((outarg->attr.mode ^ mode) & S_IFMT)
> goto out_free_outarg;
>
> inode = fuse_iget(dir->i_sb, outarg->nodeid, outarg->generation,
> - &outarg->attr, ATTR_TIMEOUT(outarg), 0, 0);
> + &outarg->attr, ATTR_TIMEOUT(outarg), 0, 0,
> + &outarg->fh);
> if (!inode) {
> fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
> kfree(outarg);
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index fad05fae7e54..d0f3c81b5612 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -216,6 +216,8 @@ struct fuse_inode {
> * so preserve the blocksize specified by the server.
> */
> u8 cached_i_blkbits;
> +
> + struct fuse_file_handle *fh;
> };
>
> /** FUSE inode state bits */
> @@ -1067,6 +1069,26 @@ static inline int invalid_nodeid(u64 nodeid)
> return !nodeid || nodeid == FUSE_ROOT_ID;
> }
>
> +static inline bool fuse_invalid_file_handle(struct fuse_conn *fc,
> + struct fuse_file_handle *handle)
> +{
> + if (!fc->lookup_handle)
> + return false;
> +
> + return !handle->size || (handle->size >= FUSE_MAX_HANDLE_SZ);
> +}
> +
> +static inline bool fuse_file_handle_is_equal(struct fuse_conn *fc,
> + struct fuse_file_handle *fh1,
> + struct fuse_file_handle *fh2)
> +{
> + if (!fc->lookup_handle || !fh2->size || // XXX more OPs without handle
> + ((fh1->size == fh2->size) &&
> + (!memcmp(fh1->handle, fh2->handle, fh1->size))))
> + return true;
> + return false;
> +}
> +
> static inline u64 fuse_get_attr_version(struct fuse_conn *fc)
> {
> return atomic64_read(&fc->attr_version);
> @@ -1098,7 +1120,10 @@ static inline struct fuse_entry_out *fuse_entry_out_alloc(struct fuse_conn *fc)
> {
> struct fuse_entry_out *entryout;
>
> - entryout = kzalloc(sizeof(*entryout), GFP_KERNEL_ACCOUNT);
> + entryout = kzalloc(sizeof(*entryout) + fc->max_handle_sz,
> + GFP_KERNEL_ACCOUNT);
> + if (entryout)
> + entryout->fh.size = fc->max_handle_sz;
>
> return entryout;
> }
> @@ -1145,10 +1170,11 @@ extern const struct dentry_operations fuse_dentry_operations;
> struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
> int generation, struct fuse_attr *attr,
> u64 attr_valid, u64 attr_version,
> - u64 evict_ctr);
> + u64 evict_ctr, struct fuse_file_handle *fh);
>
> -int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
> - struct fuse_entry_out *outarg, struct inode **inode);
> +int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct inode *dir,
> + const struct qstr *name, struct fuse_entry_out *outarg,
> + struct inode **inode);
>
> /**
> * Send FORGET command
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index bc84e7ed1e3d..f565f7e8118d 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -95,6 +95,25 @@ static struct fuse_submount_lookup *fuse_alloc_submount_lookup(void)
> return NULL;
> }
>
> +/*
> + * XXX postpone this allocation and later use the real size instead of max
> + */
> +static bool fuse_inode_handle_alloc(struct super_block *sb,
> + struct fuse_inode *fi)
> +{
> + struct fuse_conn *fc = get_fuse_conn_super(sb);
> +
> + fi->fh = NULL;
> + if (fc->lookup_handle) {
> + fi->fh = kzalloc(sizeof(*fi->fh) + fc->max_handle_sz,
> + GFP_KERNEL_ACCOUNT);
> + if (!fi->fh)
> + return false;
> + }
> +
> + return true;
> +}
> +
> static struct inode *fuse_alloc_inode(struct super_block *sb)
> {
> struct fuse_inode *fi;
> @@ -120,8 +139,15 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
> if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
> fuse_inode_backing_set(fi, NULL);
>
> + if (!fuse_inode_handle_alloc(sb, fi))
> + goto out_free_dax;
> +
> return &fi->inode;
>
> +out_free_dax:
> +#ifdef CONFIG_FUSE_DAX
> + kfree(fi->dax);
> +#endif
> out_free_forget:
> kfree(fi->forget);
> out_free:
> @@ -132,6 +158,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
> static void fuse_free_inode(struct inode *inode)
> {
> struct fuse_inode *fi = get_fuse_inode(inode);
> + struct fuse_conn *fc = get_fuse_conn(inode);
>
> mutex_destroy(&fi->mutex);
> kfree(fi->forget);
> @@ -141,6 +168,9 @@ static void fuse_free_inode(struct inode *inode)
> if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
> fuse_backing_put(fuse_inode_backing(fi));
>
> + if (fc->lookup_handle)
> + kfree(fi->fh);
> +
> kmem_cache_free(fuse_inode_cachep, fi);
> }
>
> @@ -465,7 +495,7 @@ static int fuse_inode_set(struct inode *inode, void *_nodeidp)
> struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
> int generation, struct fuse_attr *attr,
> u64 attr_valid, u64 attr_version,
> - u64 evict_ctr)
> + u64 evict_ctr, struct fuse_file_handle *fh)
> {
> struct inode *inode;
> struct fuse_inode *fi;
> @@ -505,6 +535,30 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
> if (!inode)
> return NULL;
>
> + fi = get_fuse_inode(inode);
> + if (fc->lookup_handle) {
> + if ((fh == NULL) && (nodeid != FUSE_ROOT_ID)) {
> + pr_err("NULL file handle for nodeid %llu\n", nodeid);
> + iput(inode);
> + return NULL;
Hmm, so there are conditions like "if (fi && fi->fh) {" in lookup and I
was thinking "nice, fuse-server can decide to skip the fh for some
inodes like FUSE_ROOT_ID. But now it gets forbidden here. In combination
with the other comment in fuse_inode_handle_alloc(), could be allocate
here to the needed size and allow fuse-server to not send the handle
for some files?
Thanks,
Bernd
Powered by blists - more mailing lists