lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <35efc079-0992-3ac7-ba8d-fcf021f7567f@oracle.com>
Date:   Wed, 14 Feb 2018 11:10:50 -0800
From:   Santosh Shilimkar <santosh.shilimkar@...cle.com>
To:     Sowmini Varadhan <sowmini.varadhan@...cle.com>,
        netdev@...r.kernel.org, willemdebruijn.kernel@...il.com
Cc:     davem@...emloft.net, rds-devel@....oracle.com
Subject: Re: [PATCH V2 net-next 5/7] rds: zerocopy Tx support.

On 2/14/2018 2:28 AM, Sowmini Varadhan wrote:
> If the MSG_ZEROCOPY flag is specified with rds_sendmsg(), and,
> if the SO_ZEROCOPY socket option has been set on the PF_RDS socket,
> application pages sent down with rds_sendmsg() are pinned.
> 
> The pinning uses the accounting infrastructure added by
> Commit a91dbff551a6 ("sock: ulimit on MSG_ZEROCOPY pages")
> 
> The payload bytes in the message may not be modified for the
> duration that the message has been pinned. A multi-threaded
> application using this infrastructure may thus need to be notified
> about send-completion so that it can free/reuse the buffers
> passed to rds_sendmsg(). Notification of send-completion will
> identify each message-buffer by a cookie that the application
> must specify as ancillary data to rds_sendmsg().
> The ancillary data in this case has cmsg_level == SOL_RDS
> and cmsg_type == RDS_CMSG_ZCOPY_COOKIE.
> 
> Signed-off-by: Sowmini Varadhan <sowmini.varadhan@...cle.com>
> ---
> v2:
>    - remove unused data_len argument to rds_rm_size;
>    - unmap as necessary if we fail in the middle of zerocopy setup
> 
>   include/uapi/linux/rds.h |    1 +
>   net/rds/message.c        |   51 +++++++++++++++++++++++++++++++++++++++++++++-
>   net/rds/rds.h            |    3 +-
>   net/rds/send.c           |   44 ++++++++++++++++++++++++++++++++++-----
>   4 files changed, 91 insertions(+), 8 deletions(-)
> 
> diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
> index e71d449..12e3bca 100644
> --- a/include/uapi/linux/rds.h
> +++ b/include/uapi/linux/rds.h
> @@ -103,6 +103,7 @@
>   #define RDS_CMSG_MASKED_ATOMIC_FADD	8
>   #define RDS_CMSG_MASKED_ATOMIC_CSWP	9
>   #define RDS_CMSG_RXPATH_LATENCY		11
> +#define	RDS_CMSG_ZCOPY_COOKIE		12
>
s/RDS_CMSG_ZCOPY_COOKIE/RDS_CMSG_ZMSGCOPY_COOKIE	

>   #define RDS_INFO_FIRST			10000
>   #define RDS_INFO_COUNTERS		10000
> diff --git a/net/rds/message.c b/net/rds/message.c
> index d874b74..e499566 100644
> --- a/net/rds/message.c
> +++ b/net/rds/message.c
> @@ -341,12 +341,14 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
>   	return rm;
>   }
>   
> -int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from)
> +int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
> +			       bool zcopy)
>   {
>   	unsigned long to_copy, nbytes;
>   	unsigned long sg_off;
>   	struct scatterlist *sg;
>   	int ret = 0;
> +	int length = iov_iter_count(from);
>   
>   	rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
>   
> @@ -356,6 +358,53 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from)
>   	sg = rm->data.op_sg;
>   	sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
>   
> +	if (zcopy) {
> +		int total_copied = 0;
> +		struct sk_buff *skb;
> +
> +		skb = alloc_skb(SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(u32),
> +				GFP_KERNEL);
This can sleep so you might want to check if you want to use ATOMIC 
version here.

> +		if (!skb)
> +			return -ENOMEM;
> +		rm->data.op_mmp_znotifier = RDS_ZCOPY_SKB(skb);
> +		memset(rm->data.op_mmp_znotifier, 0,
> +		       sizeof(*rm->data.op_mmp_znotifier));
> +		if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
> +					    length)) {
> +			consume_skb(skb);
> +			rm->data.op_mmp_znotifier = NULL;
> +			return -ENOMEM;
> +		}
NOMEM new application visible change but probably the right one for this
particular case. Just need to make sure application can handle this
error.


> diff --git a/net/rds/rds.h b/net/rds/rds.h
> index 6e8fc4c..dfdc9b3 100644
> --- a/net/rds/rds.h
> +++ b/net/rds/rds.h
> @@ -784,7 +784,8 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
>   /* message.c */
>   struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
>   struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
> -int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from);
> +int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
> +			       bool zcopy);
>   struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
>   void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
>   				 __be16 dport, u64 seq);
> diff --git a/net/rds/send.c b/net/rds/send.c
> index 5ac0925..80171cf 100644
> --- a/net/rds/send.c
> +++ b/net/rds/send.c

[...]

> @@ -1087,8 +1112,15 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
>   		goto out;
>   	}
>   
> +	if (zcopy) {
> +		if (rs->rs_transport->t_type != RDS_TRANS_TCP) {
> +			ret = -EOPNOTSUPP;
> +			goto out;
> +		}
> +		num_sgs = iov_iter_npages(&msg->msg_iter, INT_MAX);
> +	}

Instead of this transport check, lets move this under transport function
which then can be populated by TCP transport.

Rest of the changes looks good.

Regards,
Santosh

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ