lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 25 Aug 2010 12:00:50 -0700
From:	Ben Greear <greearb@...delatech.com>
To:	netdev@...r.kernel.org
Cc:	Ben Greear <greearb@...delatech.com>
Subject: [net-next 1/2] qdisc:  Allow qdiscs to provide backpressure up the stack.

Some qdiscs, in some instances, can reliably detect when they
are about to drop a packet in the dev_queue_xmit path.  In
this case, it would be nice to provide backpressure up the
stack, and NOT free the skb in the qdisc logic.

Signed-off-by: Ben Greear <greearb@...delatech.com>
---
:100644 100644 59962db... 20be932... M	include/linux/netdevice.h
:100644 100644 3c8728a... 146a97a... M	include/net/sch_generic.h
:100644 100644 859e30f... f360a9b... M	net/core/dev.c
:100644 100644 2aeb3a4... 0692717... M	net/sched/sch_generic.c
 include/linux/netdevice.h |    7 +++++++
 include/net/sch_generic.h |   19 +++++++++++++++++++
 net/core/dev.c            |   19 ++++++++++++++-----
 net/sched/sch_generic.c   |   20 ++++++++++++++++++++
 4 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 59962db..20be932 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -97,6 +97,7 @@ struct wireless_dev;
 #define NET_XMIT_DROP		0x01	/* skb dropped			*/
 #define NET_XMIT_CN		0x02	/* congestion notification	*/
 #define NET_XMIT_POLICED	0x03	/* skb is shot by police	*/
+#define NET_XMIT_BUSY	  	0x04	/* congestion, but skb was NOT freed */
 #define NET_XMIT_MASK		0x0f	/* qdisc flags in net/sch_generic.h */
 
 /* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It
@@ -1296,6 +1297,12 @@ extern int		dev_open(struct net_device *dev);
 extern int		dev_close(struct net_device *dev);
 extern void		dev_disable_lro(struct net_device *dev);
 extern int		dev_queue_xmit(struct sk_buff *skb);
+
+/* Similar to dev_queue_xmit, but if try_no_consume != 0,
+ * it may return NET_XMIT_BUSY and NOT free the skb if it detects congestion
+ */
+extern int		try_dev_queue_xmit(struct sk_buff *skb, int try_no_consume);
+
 extern int		register_netdevice(struct net_device *dev);
 extern void		unregister_netdevice_queue(struct net_device *dev,
 						   struct list_head *head);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 3c8728a..146a97a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -43,6 +43,7 @@ struct qdisc_size_table {
 
 struct Qdisc {
 	int 			(*enqueue)(struct sk_buff *skb, struct Qdisc *dev);
+	int 			(*try_enqueue)(struct sk_buff *, struct Qdisc *dev); /* May return NET_XMIT_BUSY and NOT free skb. */
 	struct sk_buff *	(*dequeue)(struct Qdisc *dev);
 	unsigned		flags;
 #define TCQ_F_BUILTIN		1
@@ -135,6 +136,7 @@ struct Qdisc_ops {
 	int			priv_size;
 
 	int 			(*enqueue)(struct sk_buff *, struct Qdisc *);
+	int 			(*try_enqueue)(struct sk_buff *, struct Qdisc *); /* May return NET_XMIT_BUSY and NOT free skb. */
 	struct sk_buff *	(*dequeue)(struct Qdisc *);
 	struct sk_buff *	(*peek)(struct Qdisc *);
 	unsigned int		(*drop)(struct Qdisc *);
@@ -426,6 +428,23 @@ static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch)
 	return qdisc_enqueue(skb, sch) & NET_XMIT_MASK;
 }
 
+static inline int try_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+#ifdef CONFIG_NET_SCHED
+	if (sch->stab)
+		qdisc_calculate_pkt_len(skb, sch->stab);
+#endif
+	if (sch->try_enqueue)
+		return sch->try_enqueue(skb, sch);
+	return sch->enqueue(skb, sch);
+}
+
+static inline int try_qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch)
+{
+	qdisc_skb_cb(skb)->pkt_len = skb->len;
+	return try_qdisc_enqueue(skb, sch) & NET_XMIT_MASK;
+}
+
 static inline void __qdisc_update_bstats(struct Qdisc *sch, unsigned int len)
 {
 	sch->bstats.bytes += len;
diff --git a/net/core/dev.c b/net/core/dev.c
index 859e30f..f360a9b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2087,7 +2087,8 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 
 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 				 struct net_device *dev,
-				 struct netdev_queue *txq)
+				 struct netdev_queue *txq,
+				 bool try_no_consume)
 {
 	spinlock_t *root_lock = qdisc_lock(q);
 	bool contended = qdisc_is_running(q);
@@ -2128,7 +2129,10 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		rc = NET_XMIT_SUCCESS;
 	} else {
 		skb_dst_force(skb);
-		rc = qdisc_enqueue_root(skb, q);
+		if (try_no_consume) 
+			rc = try_qdisc_enqueue_root(skb, q);
+		else
+			rc = qdisc_enqueue_root(skb, q);
 		if (qdisc_run_begin(q)) {
 			if (unlikely(contended)) {
 				spin_unlock(&q->busylock);
@@ -2168,7 +2172,12 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
  *      the BH enable code must have IRQs enabled so that it will not deadlock.
  *          --BLG
  */
-int dev_queue_xmit(struct sk_buff *skb)
+int dev_queue_xmit(struct sk_buff *skb) {
+	return try_dev_queue_xmit(skb, 0);
+}
+EXPORT_SYMBOL(dev_queue_xmit);
+
+int try_dev_queue_xmit(struct sk_buff *skb, int try_no_consume)
 {
 	struct net_device *dev = skb->dev;
 	struct netdev_queue *txq;
@@ -2187,7 +2196,7 @@ int dev_queue_xmit(struct sk_buff *skb)
 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
 #endif
 	if (q->enqueue) {
-		rc = __dev_xmit_skb(skb, q, dev, txq);
+		rc = __dev_xmit_skb(skb, q, dev, txq, try_no_consume);
 		goto out;
 	}
 
@@ -2239,7 +2248,7 @@ out:
 	rcu_read_unlock_bh();
 	return rc;
 }
-EXPORT_SYMBOL(dev_queue_xmit);
+EXPORT_SYMBOL(try_dev_queue_xmit);
 
 
 /*=======================================================================
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 2aeb3a4..0692717 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -460,6 +460,24 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
 	return qdisc_drop(skb, qdisc);
 }
 
+static int pfifo_fast_try_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
+{
+	if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) {
+		int band = prio2band[skb->priority & TC_PRIO_MAX];
+		struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
+		struct sk_buff_head *list = band2list(priv, band);
+
+		priv->bitmap |= (1 << band);
+		qdisc->q.qlen++;
+		return __qdisc_enqueue_tail(skb, qdisc, list);
+	}
+
+	/* no room to enqueue, tell calling code to back off.  Do NOT free skb, that is
+	 * calling code's to deal with.
+	 */
+	return NET_XMIT_BUSY;
+}
+
 static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
 {
 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
@@ -533,6 +551,7 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
 	.id		=	"pfifo_fast",
 	.priv_size	=	sizeof(struct pfifo_fast_priv),
 	.enqueue	=	pfifo_fast_enqueue,
+	.try_enqueue	=	pfifo_fast_try_enqueue,
 	.dequeue	=	pfifo_fast_dequeue,
 	.peek		=	pfifo_fast_peek,
 	.init		=	pfifo_fast_init,
@@ -564,6 +583,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	spin_lock_init(&sch->busylock);
 	sch->ops = ops;
 	sch->enqueue = ops->enqueue;
+	sch->try_enqueue = ops->try_enqueue;
 	sch->dequeue = ops->dequeue;
 	sch->dev_queue = dev_queue;
 	dev_hold(qdisc_dev(sch));
-- 
1.6.2.5

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ