[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJfpegsTmiO-sKaBLgoVT4WxDXBkRES=HF1YmQN1ES7gfJEJ+w@mail.gmail.com>
Date: Tue, 2 Aug 2022 14:50:32 +0200
From: Miklos Szeredi <miklos@...redi.hu>
To: "Eric W. Biederman" <ebiederm@...ssion.com>
Cc: Al Viro <viro@...iv.linux.org.uk>, Oleg Nesterov <oleg@...hat.com>,
Tycho Andersen <tycho@...ho.pizza>,
"Serge E. Hallyn" <serge@...lyn.com>, linux-kernel@...r.kernel.org,
linux-fsdevel@...r.kernel.org
Subject: Re: [RFC][PATCH v2] fuse: In fuse_flush only wait if someone wants
the return code
On Sat, 30 Jul 2022 at 07:11, Eric W. Biederman <ebiederm@...ssion.com> wrote:
>
>
> In my very light testing this resolves a hang where a thread of the
> fuse server was accessing the fuse filesystem (the fuse server is
> serving up), when the fuse server is killed.
>
> The practical problem is that the fuse server file descriptor was
> being closed after the file descriptor into the fuse filesystem so
> that the fuse filesystem operations were being blocked for instead of
> being aborted. Simply skipping the unnecessary wait resolves this
> issue.
>
> This is just a proof of concept and someone should look to see if the
> fuse max_background limit could cause a problem with this approach.
max_background just throttles the number of background requests that
the userspace filesystem can *unqueue*. It doesn't affect queuing in
any way.
> Additionally testing PF_EXITING is a very crude way to tell if someone
> wants the return code from the vfs flush operation. As such in the
> long run it probably makes sense to get some direct vfs support for
> knowing if flush needs to block until all of the flushing is complete
> and a status/return code can be returned.
>
> Unless I have missed something this is a generic optimization that can
> apply to many network filesystems.
>
> Al, vfs folks? (igrab/iput sorted so as not to be distractions).
>
> Perhaps a .flush_async method without a return code and a
> filp_close_async function without a return code to take advantage of
> this in the general sense.
>
> Waiting potentially indefinitely for user space in do_exit seems like a
> bad idea. Especially when all that the wait is for is to get a return
> code that will never be examined.
The wait is for posix locks to get unlocked. But "remote" posix locks
are almost never used due to problems like this, so I think it's safe
to do this.
>
> Signed-off-by: "Eric W. Biederman" <ebiederm@...ssion.com>
> ---
> fs/fuse/file.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 59 insertions(+)
>
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 05caa2b9272e..2bd94acd761f 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -464,6 +464,62 @@ static void fuse_sync_writes(struct inode *inode)
> fuse_release_nowrite(inode);
> }
>
> +struct fuse_flush_args {
> + struct fuse_args args;
> + struct fuse_flush_in inarg;
> + struct inode *inode;
> +};
> +
> +static void fuse_flush_end(struct fuse_mount *fm, struct fuse_args *args, int err)
> +{
> + struct fuse_flush_args *fa = container_of(args, typeof(*fa), args);
> +
> + if (err == -ENOSYS) {
> + fm->fc->no_flush = 1;
> + err = 0;
> + }
> +
> + /*
> + * In memory i_blocks is not maintained by fuse, if writeback cache is
> + * enabled, i_blocks from cached attr may not be accurate.
> + */
> + if (!err && fm->fc->writeback_cache)
> + fuse_invalidate_attr_mask(fa->inode, STATX_BLOCKS);
> +
> + iput(fa->inode);
Filesystems might expect not just he inode to not be destroyed but
also the file, so do what other file operations do, keep a ref on ff:
fuse_file_put(fa->ff, false, false);
> + kfree(fa);
> +}
> +
> +static int fuse_flush_async(struct file *file, fl_owner_t id)
> +{
> + struct inode *inode = file_inode(file);
> + struct fuse_mount *fm = get_fuse_mount(inode);
> + struct fuse_file *ff = file->private_data;
> + struct fuse_flush_args *fa;
> + int err;
> +
> + fa = kzalloc(sizeof(*fa), GFP_KERNEL);
> + if (!fa)
> + return -ENOMEM;
> +
> + fa->inarg.fh = ff->fh;
> + fa->inarg.lock_owner = fuse_lock_owner_id(fm->fc, id);
> + fa->args.opcode = FUSE_FLUSH;
> + fa->args.nodeid = get_node_id(inode);
> + fa->args.in_numargs = 1;
> + fa->args.in_args[0].size = sizeof(fa->inarg);
> + fa->args.in_args[0].value = &fa->inarg;
> + fa->args.force = true;
> + fa->args.end = fuse_flush_end;
> + fa->inode = igrab(inode);
fa->ff = fuse_file_get(ff);
> +
> + err = fuse_simple_background(fm, &fa->args, GFP_KERNEL);
> + if (err)
> + fuse_flush_end(fm, &fa->args, err);
> +
> + return err;
> +}
> +
> static int fuse_flush(struct file *file, fl_owner_t id)
> {
> struct inode *inode = file_inode(file);
> @@ -495,6 +551,9 @@ static int fuse_flush(struct file *file, fl_owner_t id)
> if (fm->fc->no_flush)
> goto inval_attr_out;
>
> + if (current->flags & PF_EXITING)
> + return fuse_flush_async(file, id);
> +
> memset(&inarg, 0, sizeof(inarg));
> inarg.fh = ff->fh;
> inarg.lock_owner = fuse_lock_owner_id(fm->fc, id);
> --
> 2.35.3
>
Powered by blists - more mailing lists