lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 30 Mar 2023 09:45:32 +0800
From:   Xiubo Li <xiubli@...hat.com>
To:     David Howells <dhowells@...hat.com>,
        Matthew Wilcox <willy@...radead.org>,
        "David S. Miller" <davem@...emloft.net>,
        Eric Dumazet <edumazet@...gle.com>,
        Jakub Kicinski <kuba@...nel.org>,
        Paolo Abeni <pabeni@...hat.com>
Cc:     Al Viro <viro@...iv.linux.org.uk>,
        Christoph Hellwig <hch@...radead.org>,
        Jens Axboe <axboe@...nel.dk>, Jeff Layton <jlayton@...nel.org>,
        Christian Brauner <brauner@...nel.org>,
        Chuck Lever III <chuck.lever@...cle.com>,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        netdev@...r.kernel.org, linux-fsdevel@...r.kernel.org,
        linux-kernel@...r.kernel.org, linux-mm@...ck.org,
        Ilya Dryomov <idryomov@...il.com>, ceph-devel@...r.kernel.org
Subject: Re: [RFC PATCH v2 37/48] ceph: Use sendmsg(MSG_SPLICE_PAGES) rather
 than sendpage()

David,

BTW, will this two patch depend on the others in this patch series ?

I am planing to run a test with these two later.

Thanks

- Xiubo

On 29/03/2023 22:13, David Howells wrote:
> Use sendmsg() and MSG_SPLICE_PAGES rather than sendpage in ceph when
> transmitting data.  For the moment, this can only transmit one page at a
> time because of the architecture of net/ceph/, but if
> write_partial_message_data() can be given a bvec[] at a time by the
> iteration code, this would allow pages to be sent in a batch.
>
> Signed-off-by: David Howells <dhowells@...hat.com>
> cc: Ilya Dryomov <idryomov@...il.com>
> cc: Xiubo Li <xiubli@...hat.com>
> cc: Jeff Layton <jlayton@...nel.org>
> cc: "David S. Miller" <davem@...emloft.net>
> cc: Eric Dumazet <edumazet@...gle.com>
> cc: Jakub Kicinski <kuba@...nel.org>
> cc: Paolo Abeni <pabeni@...hat.com>
> cc: Jens Axboe <axboe@...nel.dk>
> cc: Matthew Wilcox <willy@...radead.org>
> cc: ceph-devel@...r.kernel.org
> cc: netdev@...r.kernel.org
> ---
>   net/ceph/messenger_v2.c | 89 +++++++++--------------------------------
>   1 file changed, 18 insertions(+), 71 deletions(-)
>
> diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
> index 301a991dc6a6..1637a0c21126 100644
> --- a/net/ceph/messenger_v2.c
> +++ b/net/ceph/messenger_v2.c
> @@ -117,91 +117,38 @@ static int ceph_tcp_recv(struct ceph_connection *con)
>   	return ret;
>   }
>   
> -static int do_sendmsg(struct socket *sock, struct iov_iter *it)
> -{
> -	struct msghdr msg = { .msg_flags = CEPH_MSG_FLAGS };
> -	int ret;
> -
> -	msg.msg_iter = *it;
> -	while (iov_iter_count(it)) {
> -		ret = sock_sendmsg(sock, &msg);
> -		if (ret <= 0) {
> -			if (ret == -EAGAIN)
> -				ret = 0;
> -			return ret;
> -		}
> -
> -		iov_iter_advance(it, ret);
> -	}
> -
> -	WARN_ON(msg_data_left(&msg));
> -	return 1;
> -}
> -
> -static int do_try_sendpage(struct socket *sock, struct iov_iter *it)
> -{
> -	struct msghdr msg = { .msg_flags = CEPH_MSG_FLAGS };
> -	struct bio_vec bv;
> -	int ret;
> -
> -	if (WARN_ON(!iov_iter_is_bvec(it)))
> -		return -EINVAL;
> -
> -	while (iov_iter_count(it)) {
> -		/* iov_iter_iovec() for ITER_BVEC */
> -		bvec_set_page(&bv, it->bvec->bv_page,
> -			      min(iov_iter_count(it),
> -				  it->bvec->bv_len - it->iov_offset),
> -			      it->bvec->bv_offset + it->iov_offset);
> -
> -		/*
> -		 * sendpage cannot properly handle pages with
> -		 * page_count == 0, we need to fall back to sendmsg if
> -		 * that's the case.
> -		 *
> -		 * Same goes for slab pages: skb_can_coalesce() allows
> -		 * coalescing neighboring slab objects into a single frag
> -		 * which triggers one of hardened usercopy checks.
> -		 */
> -		if (sendpage_ok(bv.bv_page)) {
> -			ret = sock->ops->sendpage(sock, bv.bv_page,
> -						  bv.bv_offset, bv.bv_len,
> -						  CEPH_MSG_FLAGS);
> -		} else {
> -			iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bv, 1, bv.bv_len);
> -			ret = sock_sendmsg(sock, &msg);
> -		}
> -		if (ret <= 0) {
> -			if (ret == -EAGAIN)
> -				ret = 0;
> -			return ret;
> -		}
> -
> -		iov_iter_advance(it, ret);
> -	}
> -
> -	return 1;
> -}
> -
>   /*
>    * Write as much as possible.  The socket is expected to be corked,
>    * so we don't bother with MSG_MORE/MSG_SENDPAGE_NOTLAST here.
>    *
>    * Return:
> - *   1 - done, nothing (else) to write
> + *  >0 - done, nothing (else) to write
>    *   0 - socket is full, need to wait
>    *  <0 - error
>    */
>   static int ceph_tcp_send(struct ceph_connection *con)
>   {
> +	struct msghdr msg = {
> +		.msg_iter	= con->v2.out_iter,
> +		.msg_flags	= CEPH_MSG_FLAGS,
> +	};
>   	int ret;
>   
> +	if (WARN_ON(!iov_iter_is_bvec(&con->v2.out_iter)))
> +		return -EINVAL;
> +
> +	if (con->v2.out_iter_sendpage)
> +		msg.msg_flags |= MSG_SPLICE_PAGES;
> +
>   	dout("%s con %p have %zu try_sendpage %d\n", __func__, con,
>   	     iov_iter_count(&con->v2.out_iter), con->v2.out_iter_sendpage);
> -	if (con->v2.out_iter_sendpage)
> -		ret = do_try_sendpage(con->sock, &con->v2.out_iter);
> -	else
> -		ret = do_sendmsg(con->sock, &con->v2.out_iter);
> +
> +	ret = sock_sendmsg(con->sock, &msg);
> +	if (ret > 0)
> +		iov_iter_advance(&con->v2.out_iter, ret);
> +	else if (ret == -EAGAIN)
> +		ret = 0;
> +
>   	dout("%s con %p ret %d left %zu\n", __func__, con, ret,
>   	     iov_iter_count(&con->v2.out_iter));
>   	return ret;
>
-- 
Best Regards,

Xiubo Li (李秀波)

Email: xiubli@...hat.com/xiubli@....com
Slack: @Xiubo Li

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ