lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20090803171807.17268.49836.stgit@dev.haskins.net>
Date:	Mon, 03 Aug 2009 13:18:07 -0400
From:	Gregory Haskins <ghaskins@...ell.com>
To:	linux-kernel@...r.kernel.org
Cc:	alacrityvm-devel@...ts.sourceforge.net, netdev@...r.kernel.org
Subject: [PATCH 7/7] venet: add scatter-gather/GSO support

SG/GSO significantly enhance the performance of network traffic under
certain circumstances.  We implement this feature as a separate patch
to avoid intially complicating the baseline venet driver.  This will
presumably make the review process slightly easier, since we can
focus on the basic interface first.

Signed-off-by: Gregory Haskins <ghaskins@...ell.com>
---

 drivers/net/vbus-enet.c |  249 +++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/venet.h   |   39 +++++++
 2 files changed, 275 insertions(+), 13 deletions(-)

diff --git a/drivers/net/vbus-enet.c b/drivers/net/vbus-enet.c
index 8fcc2d6..5aa56ff 100644
--- a/drivers/net/vbus-enet.c
+++ b/drivers/net/vbus-enet.c
@@ -42,6 +42,8 @@ static int rx_ringlen = 256;
 module_param(rx_ringlen, int, 0444);
 static int tx_ringlen = 256;
 module_param(tx_ringlen, int, 0444);
+static int sg_enabled = 1;
+module_param(sg_enabled, int, 0444);
 
 #define PDEBUG(_dev, fmt, args...) dev_dbg(&(_dev)->dev, fmt, ## args)
 
@@ -58,8 +60,17 @@ struct vbus_enet_priv {
 	struct vbus_enet_queue     rxq;
 	struct vbus_enet_queue     txq;
 	struct tasklet_struct      txtask;
+	struct {
+		int                sg:1;
+		int                tso:1;
+		int                ufo:1;
+		int                tso6:1;
+		int                ecn:1;
+	} flags;
 };
 
+static void vbus_enet_tx_reap(struct vbus_enet_priv *priv, int force);
+
 static struct vbus_enet_priv *
 napi_to_priv(struct napi_struct *napi)
 {
@@ -193,6 +204,93 @@ rx_teardown(struct vbus_enet_priv *priv)
 	}
 }
 
+static int
+tx_setup(struct vbus_enet_priv *priv)
+{
+	struct ioq *ioq = priv->txq.queue;
+	struct ioq_iterator iter;
+	int i;
+	int ret;
+
+	if (!priv->flags.sg)
+		/*
+		 * There is nothing to do for a ring that is not using
+		 * scatter-gather
+		 */
+		return 0;
+
+	ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0);
+	BUG_ON(ret < 0);
+
+	ret = ioq_iter_seek(&iter, ioq_seek_set, 0, 0);
+	BUG_ON(ret < 0);
+
+	/*
+	 * Now populate each descriptor with an empty SG descriptor
+	 */
+	for (i = 0; i < tx_ringlen; i++) {
+		struct venet_sg *vsg;
+		size_t iovlen = sizeof(struct venet_iov) * (MAX_SKB_FRAGS-1);
+		size_t len = sizeof(*vsg) + iovlen;
+
+		vsg = kzalloc(len, GFP_KERNEL);
+		if (!vsg)
+			return -ENOMEM;
+
+		iter.desc->cookie = (u64)vsg;
+		iter.desc->len    = len;
+		iter.desc->ptr    = (u64)__pa(vsg);
+
+		ret = ioq_iter_seek(&iter, ioq_seek_next, 0, 0);
+		BUG_ON(ret < 0);
+	}
+
+	return 0;
+}
+
+static void
+tx_teardown(struct vbus_enet_priv *priv)
+{
+	struct ioq *ioq = priv->txq.queue;
+	struct ioq_iterator iter;
+	int ret;
+
+	/* forcefully free all outstanding transmissions */
+	vbus_enet_tx_reap(priv, 1);
+
+	if (!priv->flags.sg)
+		/*
+		 * There is nothing else to do for a ring that is not using
+		 * scatter-gather
+		 */
+		return;
+
+	ret = ioq_iter_init(ioq, &iter, ioq_idxtype_valid, 0);
+	BUG_ON(ret < 0);
+
+	/* seek to position 0 */
+	ret = ioq_iter_seek(&iter, ioq_seek_set, 0, 0);
+	BUG_ON(ret < 0);
+
+	/*
+	 * free each valid descriptor
+	 */
+	while (iter.desc->cookie) {
+		struct venet_sg *vsg = (struct venet_sg *)iter.desc->cookie;
+
+		iter.desc->valid = 0;
+		wmb();
+
+		iter.desc->ptr = 0;
+		iter.desc->cookie = 0;
+
+		ret = ioq_iter_seek(&iter, ioq_seek_next, 0, 0);
+		BUG_ON(ret < 0);
+
+		kfree(vsg);
+	}
+}
+
 /*
  * Open and close
  */
@@ -396,14 +494,67 @@ vbus_enet_tx_start(struct sk_buff *skb, struct net_device *dev)
 	BUG_ON(ret < 0);
 	BUG_ON(iter.desc->sown);
 
-	/*
-	 * We simply put the skb right onto the ring.  We will get an interrupt
-	 * later when the data has been consumed and we can reap the pointers
-	 * at that time
-	 */
-	iter.desc->cookie = (u64)skb;
-	iter.desc->len = (u64)skb->len;
-	iter.desc->ptr = (u64)__pa(skb->data);
+	if (priv->flags.sg) {
+		struct venet_sg *vsg = (struct venet_sg *)iter.desc->cookie;
+		struct scatterlist sgl[MAX_SKB_FRAGS+1];
+		struct scatterlist *sg;
+		int count, maxcount = ARRAY_SIZE(sgl);
+
+		sg_init_table(sgl, maxcount);
+
+		memset(vsg, 0, sizeof(*vsg));
+
+		vsg->cookie = (u64)skb;
+		vsg->len    = skb->len;
+
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
+			vsg->flags      |= VENET_SG_FLAG_NEEDS_CSUM;
+			vsg->csum.start  = skb->csum_start - skb_headroom(skb);
+			vsg->csum.offset = skb->csum_offset;
+		}
+
+		if (skb_is_gso(skb)) {
+			struct skb_shared_info *sinfo = skb_shinfo(skb);
+
+			vsg->flags |= VENET_SG_FLAG_GSO;
+
+			vsg->gso.hdrlen = skb_transport_header(skb) - skb->data;
+			vsg->gso.size = sinfo->gso_size;
+			if (sinfo->gso_type & SKB_GSO_TCPV4)
+				vsg->gso.type = VENET_GSO_TYPE_TCPV4;
+			else if (sinfo->gso_type & SKB_GSO_TCPV6)
+				vsg->gso.type = VENET_GSO_TYPE_TCPV6;
+			else if (sinfo->gso_type & SKB_GSO_UDP)
+				vsg->gso.type = VENET_GSO_TYPE_UDP;
+			else
+				panic("Virtual-Ethernet: unknown GSO type " \
+				      "0x%x\n", sinfo->gso_type);
+
+			if (sinfo->gso_type & SKB_GSO_TCP_ECN)
+				vsg->flags |= VENET_SG_FLAG_ECN;
+		}
+
+		count = skb_to_sgvec(skb, sgl, 0, skb->len);
+
+		BUG_ON(count > maxcount);
+
+		for (sg = &sgl[0]; sg; sg = sg_next(sg)) {
+			struct venet_iov *iov = &vsg->iov[vsg->count++];
+
+			iov->len = sg->length;
+			iov->ptr = (u64)sg_phys(sg);
+		}
+
+	} else {
+		/*
+		 * non scatter-gather mode: simply put the skb right onto the
+		 * ring.
+		 */
+		iter.desc->cookie = (u64)skb;
+		iter.desc->len = (u64)skb->len;
+		iter.desc->ptr = (u64)__pa(skb->data);
+	}
+
 	iter.desc->valid  = 1;
 
 	priv->dev->stats.tx_packets++;
@@ -459,7 +610,17 @@ vbus_enet_tx_reap(struct vbus_enet_priv *priv, int force)
 	 * owned by the south-side
 	 */
 	while (iter.desc->valid && (!iter.desc->sown || force)) {
-		struct sk_buff *skb = (struct sk_buff *)iter.desc->cookie;
+		struct sk_buff *skb;
+
+		if (priv->flags.sg) {
+			struct venet_sg *vsg;
+
+			vsg = (struct venet_sg *)iter.desc->cookie;
+			skb = (struct sk_buff *)vsg->cookie;
+
+		} else {
+			skb = (struct sk_buff *)iter.desc->cookie;
+		}
 
 		PDEBUG(priv->dev, "completed sending %d bytes\n", skb->len);
 
@@ -538,6 +699,47 @@ tx_isr(struct ioq_notifier *notifier)
        tasklet_schedule(&priv->txtask);
 }
 
+static int
+vbus_enet_negcap(struct vbus_enet_priv *priv)
+{
+	int ret;
+	struct venet_capabilities caps;
+
+	memset(&caps, 0, sizeof(caps));
+
+	if (sg_enabled) {
+		caps.gid = VENET_CAP_GROUP_SG;
+		caps.bits |= (VENET_CAP_SG|VENET_CAP_TSO4|VENET_CAP_TSO6
+			      |VENET_CAP_ECN);
+		/* note: exclude UFO for now due to stack bug */
+	}
+
+	ret = devcall(priv, VENET_FUNC_NEGCAP, &caps, sizeof(caps));
+	if (ret < 0)
+		return ret;
+
+	if (caps.bits & VENET_CAP_SG) {
+		priv->flags.sg = true;
+
+		if (caps.bits & VENET_CAP_TSO4)
+			priv->flags.tso = true;
+		if (caps.bits & VENET_CAP_TSO6)
+			priv->flags.tso6 = true;
+		if (caps.bits & VENET_CAP_UFO)
+			priv->flags.ufo = true;
+		if (caps.bits & VENET_CAP_ECN)
+			priv->flags.ecn = true;
+
+		dev_info(&priv->dev->dev, "Detected GSO features %s%s%s%s\n",
+			 priv->flags.tso  ? "t" : "-",
+			 priv->flags.tso6 ? "T" : "-",
+			 priv->flags.ufo  ? "u" : "-",
+			 priv->flags.ecn  ? "e" : "-");
+	}
+
+	return 0;
+}
+
 static const struct net_device_ops vbus_enet_netdev_ops = {
 	.ndo_open          = vbus_enet_open,
 	.ndo_stop          = vbus_enet_stop,
@@ -574,12 +776,21 @@ vbus_enet_probe(struct vbus_device_proxy *vdev)
 	priv->dev  = dev;
 	priv->vdev = vdev;
 
+	ret = vbus_enet_negcap(priv);
+	if (ret < 0) {
+		printk(KERN_INFO "VENET: Error negotiating capabilities for " \
+		       "%lld\n",
+		       priv->vdev->id);
+		goto out_free;
+	}
+
 	tasklet_init(&priv->txtask, deferred_tx_isr, (unsigned long)priv);
 
 	queue_init(priv, &priv->rxq, VENET_QUEUE_RX, rx_ringlen, rx_isr);
 	queue_init(priv, &priv->txq, VENET_QUEUE_TX, tx_ringlen, tx_isr);
 
 	rx_setup(priv);
+	tx_setup(priv);
 
 	ioq_notify_enable(priv->rxq.queue, 0);  /* enable interrupts */
 	ioq_notify_enable(priv->txq.queue, 0);
@@ -599,6 +810,22 @@ vbus_enet_probe(struct vbus_device_proxy *vdev)
 
 	dev->features |= NETIF_F_HIGHDMA;
 
+	if (priv->flags.sg) {
+		dev->features |= NETIF_F_SG|NETIF_F_HW_CSUM|NETIF_F_FRAGLIST;
+
+		if (priv->flags.tso)
+			dev->features |= NETIF_F_TSO;
+
+		if (priv->flags.ufo)
+			dev->features |= NETIF_F_UFO;
+
+		if (priv->flags.tso6)
+			dev->features |= NETIF_F_TSO6;
+
+		if (priv->flags.ecn)
+			dev->features |= NETIF_F_TSO_ECN;
+	}
+
 	ret = register_netdev(dev);
 	if (ret < 0) {
 		printk(KERN_INFO "VENET: error %i registering device \"%s\"\n",
@@ -626,9 +853,9 @@ vbus_enet_remove(struct vbus_device_proxy *vdev)
 	napi_disable(&priv->napi);
 
 	rx_teardown(priv);
-	vbus_enet_tx_reap(priv, 1);
-
 	ioq_put(priv->rxq.queue);
+
+	tx_teardown(priv);
 	ioq_put(priv->txq.queue);
 
 	dev->ops->close(dev, 0);
diff --git a/include/linux/venet.h b/include/linux/venet.h
index 586be40..47ed37d 100644
--- a/include/linux/venet.h
+++ b/include/linux/venet.h
@@ -37,8 +37,43 @@ struct venet_capabilities {
 	__u32 bits;
 };
 
-/* CAPABILITIES-GROUP 0 */
-/* #define VENET_CAP_FOO    0   (No capabilities defined yet, for now) */
+#define VENET_CAP_GROUP_SG 0
+
+/* CAPABILITIES-GROUP SG */
+#define VENET_CAP_SG     (1 << 0)
+#define VENET_CAP_TSO4   (1 << 1)
+#define VENET_CAP_TSO6   (1 << 2)
+#define VENET_CAP_ECN    (1 << 3)
+#define VENET_CAP_UFO    (1 << 4)
+
+struct venet_iov {
+	__u32 len;
+	__u64 ptr;
+};
+
+#define VENET_SG_FLAG_NEEDS_CSUM (1 << 0)
+#define VENET_SG_FLAG_GSO        (1 << 1)
+#define VENET_SG_FLAG_ECN        (1 << 2)
+
+struct venet_sg {
+	__u64            cookie;
+	__u32            flags;
+	__u32            len;     /* total length of all iovs */
+	struct {
+		__u16    start;	  /* csum starting position */
+		__u16    offset;  /* offset to place csum */
+	} csum;
+	struct {
+#define VENET_GSO_TYPE_TCPV4	0	/* IPv4 TCP (TSO) */
+#define VENET_GSO_TYPE_UDP	1	/* IPv4 UDP (UFO) */
+#define VENET_GSO_TYPE_TCPV6	2	/* IPv6 TCP */
+		__u8     type;
+		__u16    hdrlen;
+		__u16    size;
+	} gso;
+	__u32            count;   /* nr of iovs */
+	struct venet_iov iov[1];
+};
 
 #define VENET_FUNC_LINKUP   0
 #define VENET_FUNC_LINKDOWN 1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ