lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-id: <176039854564.1793333.2137462594901890659@noble.neil.brown.name>
Date: Tue, 14 Oct 2025 10:35:45 +1100
From: NeilBrown <neilb@...mail.net>
To: "Jeff Layton" <jlayton@...nel.org>
Cc: "Chuck Lever" <chuck.lever@...cle.com>,
 "Olga Kornievskaia" <okorniev@...hat.com>, "Dai Ngo" <Dai.Ngo@...cle.com>,
 "Tom Talpey" <tom@...pey.com>, "Trond Myklebust" <trondmy@...nel.org>,
 "Anna Schumaker" <anna@...nel.org>, "David S. Miller" <davem@...emloft.net>,
 "Eric Dumazet" <edumazet@...gle.com>, "Jakub Kicinski" <kuba@...nel.org>,
 "Paolo Abeni" <pabeni@...hat.com>, "Simon Horman" <horms@...nel.org>,
 "David Howells" <dhowells@...hat.com>, "Brandon Adams" <brandona@...a.com>,
 linux-nfs@...r.kernel.org, netdev@...r.kernel.org,
 linux-kernel@...r.kernel.org, "Jeff Layton" <jlayton@...nel.org>
Subject:
 Re: [PATCH v7] sunrpc: allocate a separate bvec array for socket sends

On Tue, 14 Oct 2025, Jeff Layton wrote:
> svc_tcp_sendmsg() calls xdr_buf_to_bvec() with the second slot of
> rq_bvec as the start, but doesn't reduce the array length by one, which
> could lead to an array overrun. Also, rq_bvec is always rq_maxpages in
> length, which can be too short in some cases, since the TCP record
> marker consumes a slot.
> 
> Fix both problems by adding a separate bvec array to the svc_sock that
> is specifically for sending. For TCP, make this array one slot longer
> than rq_maxpages, to account for the record marker. For UDP, only
> allocate as large an array as we need since it's limited to 64k of
> payload.
> 
> Signed-off-by: Jeff Layton <jlayton@...nel.org>
> ---
> Another update based on Neil's feedback. This version allocates the
> array when the socket is allocated, and fixes the UDP length
> calculation.

Looks good - thanks.

Reviewed-by: NeilBrown <neil@...wn.name>

NeilBrown


> ---
> Changes in v7:
> - use RPCSVC_MAXPAYLOAD_UDP instead of assuming max of 64kb
> - Link to v6: https://lore.kernel.org/r/20251013-rq_bvec-v6-1-17982fc64ad2@kernel.org
> 
> Changes in v6:
> - allocate sk_bvec in ->xpo_create
> - fix the array-length calculation for UDP
> - Link to v5: https://lore.kernel.org/r/20251010-rq_bvec-v5-1-44976250199d@kernel.org
> 
> Changes in v5:
> - reduce the size of sk_bvec on UDP sockets
> - Link to v4: https://lore.kernel.org/r/20251010-rq_bvec-v4-1-627567f1ce91@kernel.org
> 
> Changes in v4:
> - switch to allocating a separate bvec for sends in the svc_sock
> - Link to v3: https://lore.kernel.org/r/20251009-rq_bvec-v3-0-57181360b9cb@kernel.org
> 
> Changes in v3:
> - Add rq_bvec_len field and use it in appropriate places
> - Link to v2: https://lore.kernel.org/r/20251008-rq_bvec-v2-0-823c0a85a27c@kernel.org
> 
> Changes in v2:
> - Better changelog message for patch #2
> - Link to v1: https://lore.kernel.org/r/20251008-rq_bvec-v1-0-7f23d32d75e5@kernel.org
> ---
>  include/linux/sunrpc/svcsock.h |  3 +++
>  net/sunrpc/svcsock.c           | 55 ++++++++++++++++++++++++++++++++++++------
>  2 files changed, 51 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
> index 963bbe251e52109a902f6b9097b6e9c3c23b1fd8..de37069aba90899be19b1090e6e90e509a3cf530 100644
> --- a/include/linux/sunrpc/svcsock.h
> +++ b/include/linux/sunrpc/svcsock.h
> @@ -26,6 +26,9 @@ struct svc_sock {
>  	void			(*sk_odata)(struct sock *);
>  	void			(*sk_owspace)(struct sock *);
>  
> +	/* For sends (protected by xpt_mutex) */
> +	struct bio_vec		*sk_bvec;
> +
>  	/* private TCP part */
>  	/* On-the-wire fragment header: */
>  	__be32			sk_marker;
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index 0cb9c4d457453b26db29f08985b056c3f8d59447..93de79020a2d859a9c0b9464d794c167531d701d 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -68,6 +68,17 @@
>  
>  #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
>  
> +/*
> + * For UDP:
> + * 1 for header page
> + * enough pages for RPCSVC_MAXPAYLOAD_UDP
> + * 1 in case payload is not aligned
> + * 1 for tail page
> + */
> +enum {
> +	SUNRPC_MAX_UDP_SENDPAGES = 1 + RPCSVC_MAXPAYLOAD_UDP / PAGE_SIZE + 1 + 1
> +};
> +
>  /* To-do: to avoid tying up an nfsd thread while waiting for a
>   * handshake request, the request could instead be deferred.
>   */
> @@ -740,14 +751,14 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
>  	if (svc_xprt_is_dead(xprt))
>  		goto out_notconn;
>  
> -	count = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, xdr);
> +	count = xdr_buf_to_bvec(svsk->sk_bvec, SUNRPC_MAX_UDP_SENDPAGES, xdr);
>  
> -	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
> +	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_bvec,
>  		      count, rqstp->rq_res.len);
>  	err = sock_sendmsg(svsk->sk_sock, &msg);
>  	if (err == -ECONNREFUSED) {
>  		/* ICMP error on earlier request. */
> -		iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
> +		iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_bvec,
>  			      count, rqstp->rq_res.len);
>  		err = sock_sendmsg(svsk->sk_sock, &msg);
>  	}
> @@ -1236,19 +1247,19 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
>  	int ret;
>  
>  	/* The stream record marker is copied into a temporary page
> -	 * fragment buffer so that it can be included in rq_bvec.
> +	 * fragment buffer so that it can be included in sk_bvec.
>  	 */
>  	buf = page_frag_alloc(&svsk->sk_frag_cache, sizeof(marker),
>  			      GFP_KERNEL);
>  	if (!buf)
>  		return -ENOMEM;
>  	memcpy(buf, &marker, sizeof(marker));
> -	bvec_set_virt(rqstp->rq_bvec, buf, sizeof(marker));
> +	bvec_set_virt(svsk->sk_bvec, buf, sizeof(marker));
>  
> -	count = xdr_buf_to_bvec(rqstp->rq_bvec + 1, rqstp->rq_maxpages,
> +	count = xdr_buf_to_bvec(svsk->sk_bvec + 1, rqstp->rq_maxpages,
>  				&rqstp->rq_res);
>  
> -	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
> +	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_bvec,
>  		      1 + count, sizeof(marker) + rqstp->rq_res.len);
>  	ret = sock_sendmsg(svsk->sk_sock, &msg);
>  	page_frag_free(buf);
> @@ -1393,6 +1404,20 @@ void svc_sock_update_bufs(struct svc_serv *serv)
>  	spin_unlock_bh(&serv->sv_lock);
>  }
>  
> +static int svc_sock_sendpages(struct svc_serv *serv, struct socket *sock, int flags)
> +{
> +	switch (sock->type) {
> +	case SOCK_STREAM:
> +		/* +1 for TCP record marker */
> +		if (flags & SVC_SOCK_TEMPORARY)
> +			return svc_serv_maxpages(serv) + 1;
> +		return 0;
> +	case SOCK_DGRAM:
> +		return SUNRPC_MAX_UDP_SENDPAGES;
> +	}
> +	return -EINVAL;
> +}
> +
>  /*
>   * Initialize socket for RPC use and create svc_sock struct
>   */
> @@ -1403,12 +1428,26 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
>  	struct svc_sock	*svsk;
>  	struct sock	*inet;
>  	int		pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
> +	int		sendpages;
>  	unsigned long	pages;
>  
> +	sendpages = svc_sock_sendpages(serv, sock, flags);
> +	if (sendpages < 0)
> +		return ERR_PTR(sendpages);
> +
>  	pages = svc_serv_maxpages(serv);
>  	svsk = kzalloc(struct_size(svsk, sk_pages, pages), GFP_KERNEL);
>  	if (!svsk)
>  		return ERR_PTR(-ENOMEM);
> +
> +	if (sendpages) {
> +		svsk->sk_bvec = kcalloc(sendpages, sizeof(*svsk->sk_bvec), GFP_KERNEL);
> +		if (!svsk->sk_bvec) {
> +			kfree(svsk);
> +			return ERR_PTR(-ENOMEM);
> +		}
> +	}
> +
>  	svsk->sk_maxpages = pages;
>  
>  	inet = sock->sk;
> @@ -1420,6 +1459,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
>  				     inet->sk_protocol,
>  				     ntohs(inet_sk(inet)->inet_sport));
>  		if (err < 0) {
> +			kfree(svsk->sk_bvec);
>  			kfree(svsk);
>  			return ERR_PTR(err);
>  		}
> @@ -1637,5 +1677,6 @@ static void svc_sock_free(struct svc_xprt *xprt)
>  		sock_release(sock);
>  
>  	page_frag_cache_drain(&svsk->sk_frag_cache);
> +	kfree(svsk->sk_bvec);
>  	kfree(svsk);
>  }
> 
> ---
> base-commit: 05d2192090744b16ce05bd221c459a9357c17525
> change-id: 20251008-rq_bvec-b66afd0fdbbb
> 
> Best regards,
> -- 
> Jeff Layton <jlayton@...nel.org>
> 
> 


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ