lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Fri, 14 Sep 2007 14:31:56 +0530
From:	Krishna Kumar <krkumar2@...ibm.com>
To:	johnpol@....mipt.ru, herbert@...dor.apana.org.au, hadi@...erus.ca,
	kaber@...sh.net, shemminger@...ux-foundation.org,
	davem@...emloft.net
Cc:	jagana@...ibm.com, Robert.Olsson@...a.slu.se,
	peter.p.waskiewicz.jr@...el.com, xma@...ibm.com, gaagaan@...il.com,
	kumarkr@...ux.ibm.com, rdreier@...co.com, rick.jones2@...com,
	mcarlson@...adcom.com, jeff@...zik.org, mchan@...adcom.com,
	general@...ts.openfabrics.org, netdev@...r.kernel.org,
	tgraf@...g.ch, randy.dunlap@...cle.com,
	Krishna Kumar <krkumar2@...ibm.com>, sri@...ibm.com
Subject: [PATCH 3/10 REV5] [sched] Modify qdisc_run to support batching

Modify qdisc_run() to support batching. Modify callers of qdisc_run to
use batching, modify qdisc_restart to implement batching.

Signed-off-by: Krishna Kumar <krkumar2@...ibm.com>
---
 include/linux/netdevice.h |    2 
 include/net/pkt_sched.h   |   17 +++++--
 net/core/dev.c            |   45 ++++++++++++++++++
 net/sched/sch_generic.c   |  109 ++++++++++++++++++++++++++++++++++++----------
 4 files changed, 145 insertions(+), 28 deletions(-)

diff -ruNp org/include/net/pkt_sched.h new/include/net/pkt_sched.h
--- org/include/net/pkt_sched.h	2007-09-13 09:11:09.000000000 +0530
+++ new/include/net/pkt_sched.h	2007-09-14 10:25:36.000000000 +0530
@@ -80,13 +80,24 @@ extern struct qdisc_rate_table *qdisc_ge
 		struct rtattr *tab);
 extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
 
-extern void __qdisc_run(struct net_device *dev);
+static inline void qdisc_block(struct net_device *dev)
+{
+	while (test_and_set_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
+		yield();
+}
+
+static inline void qdisc_unblock(struct net_device *dev)
+{
+	clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state);
+}
+
+extern void __qdisc_run(struct net_device *dev, struct sk_buff_head *blist);
 
-static inline void qdisc_run(struct net_device *dev)
+static inline void qdisc_run(struct net_device *dev, struct sk_buff_head *blist)
 {
 	if (!netif_queue_stopped(dev) &&
 	    !test_and_set_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
-		__qdisc_run(dev);
+		__qdisc_run(dev, blist);
 }
 
 extern int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
diff -ruNp org/include/linux/netdevice.h new/include/linux/netdevice.h
--- org/include/linux/netdevice.h	2007-09-13 09:11:09.000000000 +0530
+++ new/include/linux/netdevice.h	2007-09-14 10:26:21.000000000 +0530
@@ -1013,6 +1013,8 @@ extern int		dev_set_mac_address(struct n
 					    struct sockaddr *);
 extern int		dev_hard_start_xmit(struct sk_buff *skb,
 					    struct net_device *dev);
+extern int		dev_add_skb_to_blist(struct sk_buff *skb,
+					     struct net_device *dev);
 
 extern int		netdev_budget;
 
diff -ruNp org/net/sched/sch_generic.c new/net/sched/sch_generic.c
--- org/net/sched/sch_generic.c	2007-09-13 09:11:10.000000000 +0530
+++ new/net/sched/sch_generic.c	2007-09-14 10:25:36.000000000 +0530
@@ -59,26 +59,30 @@ static inline int qdisc_qlen(struct Qdis
 static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev,
 				  struct Qdisc *q)
 {
-	if (unlikely(skb->next))
-		dev->gso_skb = skb;
-	else
-		q->ops->requeue(skb, q);
+	if (skb) {
+		if (unlikely(skb->next))
+			dev->gso_skb = skb;
+		else
+			q->ops->requeue(skb, q);
+	}
 
 	netif_schedule(dev);
 	return 0;
 }
 
-static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev,
-					      struct Qdisc *q)
+static inline int dev_requeue_skb_wrapper(struct sk_buff *skb,
+					  struct net_device *dev,
+					  struct Qdisc *q)
 {
-	struct sk_buff *skb;
-
-	if ((skb = dev->gso_skb))
-		dev->gso_skb = NULL;
-	else
-		skb = q->dequeue(q);
+	if (dev->skb_blist) {
+		/*
+		 * In case of tx full, batching drivers would have put all
+		 * skbs into skb_blist so there is no skb to requeue.
+		 */
+		skb = NULL;
+	}
 
-	return skb;
+	return dev_requeue_skb(skb, dev, q);
 }
 
 static inline int handle_dev_cpu_collision(struct sk_buff *skb,
@@ -91,10 +95,15 @@ static inline int handle_dev_cpu_collisi
 		/*
 		 * Same CPU holding the lock. It may be a transient
 		 * configuration error, when hard_start_xmit() recurses. We
-		 * detect it by checking xmit owner and drop the packet when
-		 * deadloop is detected. Return OK to try the next skb.
+		 * detect it by checking xmit owner and drop the packet (or
+		 * all packets in batching case) when deadloop is detected.
+		 * Return OK to try the next skb.
 		 */
-		kfree_skb(skb);
+		if (likely(skb))
+			kfree_skb(skb);
+		else if (!skb_queue_empty(dev->skb_blist))
+			skb_queue_purge(dev->skb_blist);
+
 		if (net_ratelimit())
 			printk(KERN_WARNING "Dead loop on netdevice %s, "
 			       "fix it urgently!\n", dev->name);
@@ -111,6 +120,53 @@ static inline int handle_dev_cpu_collisi
 	return ret;
 }
 
+#define DEQUEUE_SKB(q)		(q->dequeue(q))
+
+static inline struct sk_buff *get_gso_skb(struct net_device *dev)
+{
+	struct sk_buff *skb;
+
+	if ((skb = dev->gso_skb))
+		dev->gso_skb = NULL;
+
+	return skb;
+}
+
+/*
+ * Algorithm to get skb(s) is:
+ *	- If gso skb present, return it.
+ *	- Non batching drivers, or if the batch list is empty and there is
+ *	  1 skb in the queue - dequeue skb and put it in *skbp to tell the
+ *	  caller to use the single xmit API.
+ *	- Batching drivers where the batch list already contains atleast one
+ *	  skb, or if there are multiple skbs in the queue: keep dequeue'ing
+ *	  skb's upto a limit and set *skbp to NULL to tell the caller to use
+ *	  the multiple xmit API.
+ *
+ * Returns:
+ *	1 - atleast one skb is to be sent out, *skbp contains skb or NULL
+ *	    (in case >1 skbs present in blist for batching)
+ *	0 - no skbs to be sent.
+ */
+static inline int get_skb(struct net_device *dev, struct Qdisc *q,
+			  struct sk_buff_head *blist, struct sk_buff **skbp)
+{
+	if ((*skbp = get_gso_skb(dev)) != NULL)
+		return 1;
+
+	if (!blist || (!skb_queue_len(blist) && qdisc_qlen(q) <= 1)) {
+		return likely((*skbp = DEQUEUE_SKB(q)) != NULL);
+	} else {
+		struct sk_buff *skb;
+		int max = dev->tx_queue_len - skb_queue_len(blist);
+
+		while (max > 0 && (skb = DEQUEUE_SKB(q)) != NULL)
+			max -= dev_add_skb_to_blist(skb, dev);
+
+		return 1;	/* there is atleast one skb in skb_blist */
+	}
+}
+
 /*
  * NOTE: Called under dev->queue_lock with locally disabled BH.
  *
@@ -130,7 +186,8 @@ static inline int handle_dev_cpu_collisi
  *				>0 - queue is not empty.
  *
  */
-static inline int qdisc_restart(struct net_device *dev)
+static inline int qdisc_restart(struct net_device *dev,
+				struct sk_buff_head *blist)
 {
 	struct Qdisc *q = dev->qdisc;
 	struct sk_buff *skb;
@@ -138,7 +195,7 @@ static inline int qdisc_restart(struct n
 	int ret;
 
 	/* Dequeue packet */
-	if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL))
+	if (unlikely(get_skb(dev, q, blist, &skb) == 0))
 		return 0;
 
 	/*
@@ -168,7 +225,7 @@ static inline int qdisc_restart(struct n
 
 	switch (ret) {
 	case NETDEV_TX_OK:
-		/* Driver sent out skb successfully */
+		/* Driver sent out skb (or entire skb_blist) successfully */
 		ret = qdisc_qlen(q);
 		break;
 
@@ -183,21 +240,21 @@ static inline int qdisc_restart(struct n
 			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
 			       dev->name, ret, q->q.qlen);
 
-		ret = dev_requeue_skb(skb, dev, q);
+		ret = dev_requeue_skb_wrapper(skb, dev, q);
 		break;
 	}
 
 	return ret;
 }
 
-void __qdisc_run(struct net_device *dev)
+void __qdisc_run(struct net_device *dev, struct sk_buff_head *blist)
 {
 	do {
-		if (!qdisc_restart(dev))
+		if (!qdisc_restart(dev, blist))
 			break;
 	} while (!netif_queue_stopped(dev));
 
-	clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state);
+	qdisc_unblock(dev);
 }
 
 static void dev_watchdog(unsigned long arg)
@@ -575,6 +632,12 @@ void dev_deactivate(struct net_device *d
 	qdisc = dev->qdisc;
 	dev->qdisc = &noop_qdisc;
 
+	if (dev->skb_blist) {
+		/* Release skbs on batch list */
+		if (!skb_queue_empty(dev->skb_blist))
+			skb_queue_purge(dev->skb_blist);
+	}
+
 	qdisc_reset(qdisc);
 
 	skb = dev->gso_skb;
diff -ruNp org/net/core/dev.c new/net/core/dev.c
--- org/net/core/dev.c	2007-09-14 10:24:27.000000000 +0530
+++ new/net/core/dev.c	2007-09-14 10:25:36.000000000 +0530
@@ -1542,6 +1542,46 @@ static int dev_gso_segment(struct sk_buf
 	return 0;
 }
 
+/*
+ * Add skb (skbs in case segmentation is required) to dev->skb_blist. No one
+ * can add to this list simultaneously since we are holding QDISC RUNNING
+ * bit. Also list is safe from simultaneous deletes too since skbs are
+ * dequeued only when the driver is invoked.
+ *
+ * Returns count of successful skb(s) added to skb_blist.
+ */
+int dev_add_skb_to_blist(struct sk_buff *skb, struct net_device *dev)
+{
+	if (!list_empty(&ptype_all))
+		dev_queue_xmit_nit(skb, dev);
+
+	if (netif_needs_gso(dev, skb)) {
+		if (unlikely(dev_gso_segment(skb))) {
+			kfree_skb(skb);
+			return 0;
+		}
+
+		if (skb->next) {
+			int count = 0;
+
+			do {
+				struct sk_buff *nskb = skb->next;
+
+				skb->next = nskb->next;
+				__skb_queue_tail(dev->skb_blist, nskb);
+				count++;
+			} while (skb->next);
+
+			/* Reset destructor for kfree_skb to work */
+			skb->destructor = DEV_GSO_CB(skb)->destructor;
+			kfree_skb(skb);
+			return count;
+		}
+	}
+	__skb_queue_tail(dev->skb_blist, skb);
+	return 1;
+}
+
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	if (likely(skb)) {
@@ -1697,7 +1737,7 @@ gso:
 			/* reset queue_mapping to zero */
 			skb->queue_mapping = 0;
 			rc = q->enqueue(skb, q);
-			qdisc_run(dev);
+			qdisc_run(dev, NULL);
 			spin_unlock(&dev->queue_lock);
 
 			rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
@@ -1895,7 +1935,8 @@ static void net_tx_action(struct softirq
 			clear_bit(__LINK_STATE_SCHED, &dev->state);
 
 			if (spin_trylock(&dev->queue_lock)) {
-				qdisc_run(dev);
+				/* Send all skbs if driver supports batching */
+				qdisc_run(dev, dev->skb_blist);
 				spin_unlock(&dev->queue_lock);
 			} else {
 				netif_schedule(dev);
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ