lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:	Sun, 21 Feb 2016 20:40:59 +0200
From:	"Michael S. Tsirkin" <mst@...hat.com>
To:	linux-kernel@...r.kernel.org
Cc:	virtualization@...ts.linux-foundation.org, netdev@...r.kernel.org
Subject: [PATCH] virtio_net: switch to build_skb for mrg_rxbuf

For small packets data copy was observed to
take up about 15% CPU time. Switch to build_skb
and avoid the copy when using mergeable rx buffers.

As a bonus, medium-size skbs that fit in a page will be
completely linear.

Of course, we now need to lower the lower bound on packet size,
to make sure a sane number of skbs fits in rx socket buffer.
By how much? I don't know yet.

It might also be useful to prefetch the packet buffer since
net stack will likely use it soon.

Lightly tested, in particular, I didn't yet test what this
actually does to performance - sending this out for early
feedback/flames.

TODO: it appears that Linux won't handle correctly the case of first
buffer being very small (or consisting exclusively of virtio header).
This is already the case for current code, so don't bother
testing yet.

Signed-off-by: Michael S. Tsirkin <mst@...hat.com>
---
 drivers/net/virtio_net.c | 44 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 35 insertions(+), 9 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 767ab11..20f8dda 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -347,6 +347,26 @@ err:
 	return NULL;
 }
 
+#define VNET_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
+#define VNET_SKB_BUG (VNET_SKB_PAD < sizeof(struct virtio_net_hdr_mrg_rxbuf))
+#define VNET_SKB_LEN(len) ((len) - sizeof(struct virtio_net_hdr_mrg_rxbuf))
+#define VNET_SKB_OFF VNET_SKB_LEN(VNET_SKB_PAD)
+
+static struct sk_buff *vnet_build_skb(struct virtnet_info *vi,
+				      void *buf,
+				      unsigned int len, unsigned int truesize)
+{
+	struct sk_buff *skb = build_skb(buf, truesize);
+
+	if (!skb)
+		return NULL;
+
+	skb_reserve(skb, VNET_SKB_PAD);
+	skb_put(skb, VNET_SKB_LEN(len));
+
+	return skb;
+}
+
 static struct sk_buff *receive_mergeable(struct net_device *dev,
 					 struct virtnet_info *vi,
 					 struct receive_queue *rq,
@@ -354,14 +374,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 					 unsigned int len)
 {
 	void *buf = mergeable_ctx_to_buf_address(ctx);
-	struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
+	struct virtio_net_hdr_mrg_rxbuf *hdr = buf + VNET_SKB_OFF;
 	u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
 	struct page *page = virt_to_head_page(buf);
-	int offset = buf - page_address(page);
 	unsigned int truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
+	int offset;
 
-	struct sk_buff *head_skb = page_to_skb(vi, rq, page, offset, len,
-					       truesize);
+	struct sk_buff *head_skb = vnet_build_skb(vi, buf, len, truesize);
 	struct sk_buff *curr_skb = head_skb;
 
 	if (unlikely(!curr_skb))
@@ -606,11 +625,14 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
 
 static unsigned int get_mergeable_buf_len(struct ewma_pkt_len *avg_pkt_len)
 {
-	const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+	unsigned int hdr;
 	unsigned int len;
 
-	len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
-			GOOD_PACKET_LEN, PAGE_SIZE - hdr_len);
+	hdr = ALIGN(VNET_SKB_PAD + sizeof(struct skb_shared_info),
+		    MERGEABLE_BUFFER_ALIGN);
+
+	len = hdr + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
+			       500 /* TODO */, PAGE_SIZE - hdr);
 	return ALIGN(len, MERGEABLE_BUFFER_ALIGN);
 }
 
@@ -626,7 +648,10 @@ static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
 	if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp)))
 		return -ENOMEM;
 
-	buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
+	BUILD_BUG_ON(VNET_SKB_BUG);
+
+	buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset +
+		VNET_SKB_OFF;
 	ctx = mergeable_buf_to_ctx(buf, len);
 	get_page(alloc_frag->page);
 	alloc_frag->offset += len;
@@ -641,7 +666,8 @@ static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
 		alloc_frag->offset += hole;
 	}
 
-	sg_init_one(rq->sg, buf, len);
+	sg_init_one(rq->sg, buf,
+		    len - VNET_SKB_OFF - sizeof(struct skb_shared_info));
 	err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, (void *)ctx, gfp);
 	if (err < 0)
 		put_page(virt_to_head_page(buf));
-- 
MST

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ