lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 20 Aug 2015 10:36:46 -0400
From:	Willem de Bruijn <willemb@...gle.com>
To:	netdev@...r.kernel.org
Cc:	mst@...hat.com, jasowang@...hat.com,
	Willem de Bruijn <willemb@...gle.com>
Subject: [PATCH net-next RFC 07/10] raw: enable sendmsg zerocopy with hdrincl

From: Willem de Bruijn <willemb@...gle.com>

Add MSG_ZEROCOPY support to inet/raw when passing IP_HDRINCL

Tested:
  raw loopback test //net/socket:snd_zerocopy_lo -r -z passes:

  without zerocopy (-r):
    rx=69348 (4327 MB) tx=69348 txc=0
    rx=145590 (9085 MB) tx=145590 txc=0
    rx=219210 (13679 MB) tx=219210 txc=0
    rx=293688 (18327 MB) tx=293688 txc=0

  with zerocopy (-r -z):
    rx=258132 (16108 MB) tx=258132 txc=258122
    rx=541266 (33777 MB) tx=541266 txc=541256
    rx=822606 (51334 MB) tx=822606 txc=822596
    rx=1105776 (69005 MB) tx=1105776 txc=1105766

  raw hdrincl loopback test //net/socket:snd_zerocopy_lo -R -z passes:

  without zerocopy (-R):
    rx=101904 (6359 MB) tx=101904 txc=0
    rx=215256 (13432 MB) tx=215256 txc=0
    rx=328584 (20505 MB) tx=328584 txc=0
    rx=442008 (27583 MB) tx=442008 txc=0

  with zerocopy (-R -z):
    rx=265398 (16562 MB) tx=265398 txc=265392
    rx=558744 (34868 MB) tx=558744 txc=558738
    rx=853308 (53250 MB) tx=853308 txc=853302
    rx=1148142 (71649 MB) tx=1148142 txc=1148136

Signed-off-by: Willem de Bruijn <willemb@...gle.com>
---
 net/ipv4/raw.c | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 561cd4b..c4fa57d 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -347,7 +347,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 	unsigned int iphlen;
 	int err;
 	struct rtable *rt = *rtp;
-	int hlen, tlen;
+	int hlen, tlen, linear;
 
 	if (length > rt->dst.dev->mtu) {
 		ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -359,8 +359,14 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 
 	hlen = LL_RESERVED_SPACE(rt->dst.dev);
 	tlen = rt->dst.dev->needed_tailroom;
+	linear = length;
+
+	if (flags & MSG_ZEROCOPY && length &&
+	    sock_can_zerocopy(sk, rt, CHECKSUM_UNNECESSARY))
+		linear = min_t(int, length, MAX_HEADER);
+
 	skb = sock_alloc_send_skb(sk,
-				  length + hlen + tlen + 15,
+				  linear + hlen + tlen + 15,
 				  flags & MSG_DONTWAIT, &err);
 	if (!skb)
 		goto error;
@@ -373,15 +379,14 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 
 	skb_reset_network_header(skb);
 	iph = ip_hdr(skb);
-	skb_put(skb, length);
+	skb_put(skb, linear);
 
 	skb->ip_summed = CHECKSUM_NONE;
 
 	sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
-
 	skb->transport_header = skb->network_header;
 	err = -EFAULT;
-	if (memcpy_from_msg(iph, msg, length))
+	if (memcpy_from_msg(iph, msg, linear))
 		goto error_free;
 
 	iphlen = iph->ihl * 4;
@@ -397,6 +402,17 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 	if (iphlen > length)
 		goto error_free;
 
+	if (length != linear) {
+		size_t datalen = length - linear;
+
+		if (!skb_zerocopy_alloc(skb, datalen))
+			goto error_zcopy;
+		err = skb_zerocopy_add_frags_iter(sk, skb, &msg->msg_iter,
+						  datalen, skb_uarg(skb));
+		if (err != datalen)
+			goto error_zcopy;
+	}
+
 	if (iphlen >= sizeof(*iph)) {
 		if (!iph->saddr)
 			iph->saddr = fl4->saddr;
@@ -420,6 +436,8 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 out:
 	return 0;
 
+error_zcopy:
+	sock_zerocopy_put_abort(skb_zcopy(skb));
 error_free:
 	kfree_skb(skb);
 error:
-- 
2.5.0.276.gf5e568e

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ