linux-kernel - Re: [PATCH v4] netfs: do not unlock and put the folio twice

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4597bdc0-b7bf-c30f-ea4a-41599adeed86@redhat.com>
Date:   Sat, 9 Jul 2022 08:27:40 +0800
From:   Xiubo Li <xiubli@...hat.com>
To:     David Howells <dhowells@...hat.com>
Cc:     idryomov@...il.com, jlayton@...nel.org, marc.dionne@...istor.com,
        willy@...radead.org, keescook@...omium.org,
        kirill.shutemov@...ux.intel.com, william.kucharski@...cle.com,
        linux-afs@...ts.infradead.org, linux-kernel@...r.kernel.org,
        ceph-devel@...r.kernel.org, linux-cachefs@...hat.com,
        vshankar@...hat.com
Subject: Re: [PATCH v4] netfs: do not unlock and put the folio twice


On 7/7/22 9:21 PM, David Howells wrote:
> Here's my take on this.  I've made the error: path handle folio == NULL, so
> you don't need to split that error case.  I've also changed
> ->check_write_begin() so that it returns 0, not -EAGAIN, if we drop the folio;
> the process is retried then if the folio pointer got cleared.
>
> As a result, you don't have to discard the page if you want to return an error
> and thus don't need the additional afs patch
>
> David
> ---
> commit 8489c89f6a186272593ab5e3fffbd47ea21185b7
> Author: Xiubo Li <xiubli@...hat.com>
> Date:   Thu Jul 7 12:51:11 2022 +0800
>
>      netfs: do not unlock and put the folio twice
>      
>      check_write_begin() will unlock and put the folio when return
>      non-zero.  So we should avoid unlocking and putting it twice in
>      netfs layer.
>      
>      Change the way ->check_write_begin() works in the following two ways:
>      
>       (1) Pass it a pointer to the folio pointer, allowing it to unlock and put
>           the folio prior to doing the stuff it wants to do, provided it clears
>           the folio pointer.
>      
>       (2) Change the return values such that 0 with folio pointer set means
>           continue, 0 with folio pointer cleared means re-get and all error
>           codes indicating an error (no special treatment for -EAGAIN).
>      
>      Link: https://tracker.ceph.com/issues/56423
>      Link: https://lore.kernel.org/r/20220707045112.10177-2-xiubli@redhat.com/
>      Signed-off-by: Xiubo Li <xiubli@...hat.com>
>      Co-developed-by: David Howells <dhowells@...hat.com>
>      Signed-off-by: David Howells <dhowells@...hat.com>
>
> diff --git a/Documentation/filesystems/netfs_library.rst b/Documentation/filesystems/netfs_library.rst
> index 4d19b19bcc08..89085e1c22db 100644
> --- a/Documentation/filesystems/netfs_library.rst
> +++ b/Documentation/filesystems/netfs_library.rst
> @@ -301,7 +301,7 @@ through which it can issue requests and negotiate::
>   		void (*issue_read)(struct netfs_io_subrequest *subreq);
>   		bool (*is_still_valid)(struct netfs_io_request *rreq);
>   		int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
> -					 struct folio *folio, void **_fsdata);
> +					 struct folio **_folio, void **_fsdata);
>   		void (*done)(struct netfs_io_request *rreq);
>   	};
>   
> @@ -381,8 +381,10 @@ The operations are as follows:
>      allocated/grabbed the folio to be modified to allow the filesystem to flush
>      conflicting state before allowing it to be modified.
>   
> -   It should return 0 if everything is now fine, -EAGAIN if the folio should be
> -   regrabbed and any other error code to abort the operation.
> +   It may unlock and discard the folio it was given and set the caller's folio
> +   pointer to NULL.  It should return 0 if everything is now fine (*_folio
> +   left set) or the op should be retried (*_folio cleared) and any other error
> +   code to abort the operation.
>   
>    * ``done``
>   
> diff --git a/fs/afs/file.c b/fs/afs/file.c
> index 42118a4f3383..afacce797fb9 100644
> --- a/fs/afs/file.c
> +++ b/fs/afs/file.c
> @@ -375,7 +375,7 @@ static int afs_begin_cache_operation(struct netfs_io_request *rreq)
>   }
>   
>   static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len,
> -				 struct folio *folio, void **_fsdata)
> +				 struct folio **folio, void **_fsdata)
>   {
>   	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
>   
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index 6dee88815491..ab070a24ca23 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -63,7 +63,7 @@
>   	 (CONGESTION_ON_THRESH(congestion_kb) >> 2))
>   
>   static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
> -					struct folio *folio, void **_fsdata);
> +					struct folio **folio, void **_fsdata);
>   
>   static inline struct ceph_snap_context *page_snap_context(struct page *page)
>   {
> @@ -1288,18 +1288,19 @@ ceph_find_incompatible(struct page *page)
>   }
>   
>   static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
> -					struct folio *folio, void **_fsdata)
> +					struct folio **folio, void **_fsdata)
>   {
>   	struct inode *inode = file_inode(file);
>   	struct ceph_inode_info *ci = ceph_inode(inode);
>   	struct ceph_snap_context *snapc;
>   
> -	snapc = ceph_find_incompatible(folio_page(folio, 0));
> +	snapc = ceph_find_incompatible(folio_page(*folio, 0));
>   	if (snapc) {
>   		int r;
>   
> -		folio_unlock(folio);
> -		folio_put(folio);
> +		folio_unlock(*folio);
> +		folio_put(*folio);
> +		*folio = NULL;
>   		if (IS_ERR(snapc))
>   			return PTR_ERR(snapc);
>   
> diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
> index 42f892c5712e..69bbf1c25cf4 100644
> --- a/fs/netfs/buffered_read.c
> +++ b/fs/netfs/buffered_read.c
> @@ -319,8 +319,9 @@ static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
>    * conflicting writes once the folio is grabbed and locked.  It is passed a
>    * pointer to the fsdata cookie that gets returned to the VM to be passed to
>    * write_end.  It is permitted to sleep.  It should return 0 if the request
> - * should go ahead; unlock the folio and return -EAGAIN to cause the folio to
> - * be regot; or return an error.
> + * should go ahead or it may return an error.  It may also unlock and put the
> + * folio, provided it sets *_folio to NULL, in which case a return of 0 will
> + * cause the folio to be re-got and the process to be retried.
>    *
>    * The calling netfs must initialise a netfs context contiguous to the vfs
>    * inode before calling this.
> @@ -348,13 +349,13 @@ int netfs_write_begin(struct netfs_inode *ctx,
>   
>   	if (ctx->ops->check_write_begin) {
>   		/* Allow the netfs (eg. ceph) to flush conflicts. */
> -		ret = ctx->ops->check_write_begin(file, pos, len, folio, _fsdata);
> +		ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata);
>   		if (ret < 0) {
>   			trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
> -			if (ret == -EAGAIN)
> -				goto retry;
>   			goto error;
>   		}
> +		if (!folio)
> +			goto retry;
>   	}
>   
>   	if (folio_test_uptodate(folio))
> @@ -416,8 +417,10 @@ int netfs_write_begin(struct netfs_inode *ctx,
>   error_put:
>   	netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
>   error:
> -	folio_unlock(folio);
> -	folio_put(folio);
> +	if (folio) {
> +		folio_unlock(folio);
> +		folio_put(folio);
> +	}
>   	_leave(" = %d", ret);
>   	return ret;
>   }
> diff --git a/include/linux/netfs.h b/include/linux/netfs.h
> index 1773e5df8e65..6ab5d56dac74 100644
> --- a/include/linux/netfs.h
> +++ b/include/linux/netfs.h
> @@ -214,7 +214,7 @@ struct netfs_request_ops {
>   	void (*issue_read)(struct netfs_io_subrequest *subreq);
>   	bool (*is_still_valid)(struct netfs_io_request *rreq);
>   	int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
> -				 struct folio *folio, void **_fsdata);
> +				 struct folio **_folio, void **_fsdata);
>   	void (*done)(struct netfs_io_request *rreq);
>   };
>   
>
This version looks good to me.

Thanks David!