[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1294975793.3403.117.camel@edumazet-laptop>
Date:	Fri, 14 Jan 2011 04:29:53 +0100
From:	Eric Dumazet <eric.dumazet@...il.com>
To:	Stephen Hemminger <shemminger@...tta.com>
Cc:	David Miller <davem@...emloft.net>, netdev@...r.kernel.org
Subject: Re: [PATCH] CHOKe flow scheduler (0.8)
Le jeudi 13 janvier 2011 à 15:34 -0800, Stephen Hemminger a écrit :
> CHOKe ("CHOose and Kill" or "CHOose and Keep") is an alternative
> packet scheduler based on the Random Exponential Drop (RED) algorithm.
> 
> The core idea is:
>   For every packet arrival:
>   	Calculate Qave
> 	if (Qave < minth) 
> 	     Queue the new packet
> 	else 
> 	     Select randomly a packet from the queue 
> 	     if (both packets from same flow)
> 	     then Drop both the packets
> 	     else if (Qave > maxth)
> 	          Drop packet
> 	     else
> 	       	  Admit packet with probability P (same as RED)
> 
> See also:
>   Rong Pan, Balaji Prabhakar, Konstantinos Psounis, "CHOKe: a stateless active
>    queue management scheme for approximating fair bandwidth allocation", 
>   Proceeding of INFOCOM'2000, March 2000.
> 
> Signed-off-by: Stephen Hemminger <shemminger@...tta.com>
> 
> ---
> 0.8 change queue length and holes account.
>     keep sch->q.qlen updated, and holes counter not needed.
> 
> ---
>  net/sched/Kconfig     |   11 +
>  net/sched/Makefile    |    1 
>  net/sched/sch_choke.c |  536 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 548 insertions(+)
> 
Hi Stephen
Your diffstat was an old one, here the right one.
 include/linux/pkt_sched.h |   29 +
 net/sched/Kconfig         |   11 
 net/sched/Makefile        |    1 
 net/sched/sch_choke.c     |  552 ++++++++++++++++++++++++++++++++++++
I tested v8 and found several serious problems, please find a diff of my
latest changes :
- wrong oskb/skb used in choke_enqueue()
- choke_zap_head_holes() is called from choke_dequeue() and crash if we
dequeued last packet. (!!!)
- out of bound access in choke_zap_tail_holes()
- choke_dequeue() can be shorter
- choke_change() must dequeue/drop in excess packets or risk new array
overfill (if we reduce queue limit by tc qdisc change ...)
- inline is not needed, space errors in include file
Thanks !
 include/linux/pkt_sched.h |    8 +++---
 net/sched/sch_choke.c     |   43 ++++++++++++++++++++++--------------
 2 files changed, 31 insertions(+), 20 deletions(-)
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 83bac92..498c798 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -269,10 +269,10 @@ struct tc_choke_qopt {
 };
 
 struct tc_choke_xstats {
-	__u32           early;          /* Early drops */
-	__u32           pdrop;          /* Drops due to queue limits */
-	__u32           other;          /* Drops due to drop() calls */
-	__u32           marked;         /* Marked packets */
+	__u32		early;          /* Early drops */
+	__u32		pdrop;          /* Drops due to queue limits */
+	__u32		other;          /* Drops due to drop() calls */
+	__u32		marked;         /* Marked packets */
 	__u32		matched;	/* Drops due to flow match */
 };
 
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 136d4e5..c710c57 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -74,7 +74,7 @@ struct choke_sched_data {
 };
 
 /* deliver a random number between 0 and N - 1 */
-static inline u32 random_N(unsigned int N)
+static u32 random_N(unsigned int N)
 {
 	return reciprocal_divide(random32(), N);
 }
@@ -99,13 +99,13 @@ static struct sk_buff *choke_peek_random(struct Qdisc *sch,
 }
 
 /* Is ECN parameter configured */
-static inline int use_ecn(const struct choke_sched_data *q)
+static int use_ecn(const struct choke_sched_data *q)
 {
 	return q->flags & TC_RED_ECN;
 }
 
 /* Should packets over max just be dropped (versus marked) */
-static inline int use_harddrop(const struct choke_sched_data *q)
+static int use_harddrop(const struct choke_sched_data *q)
 {
 	return q->flags & TC_RED_HARDDROP;
 }
@@ -113,20 +113,21 @@ static inline int use_harddrop(const struct choke_sched_data *q)
 /* Move head pointer forward to skip over holes */
 static void choke_zap_head_holes(struct choke_sched_data *q)
 {
-	while (q->tab[q->head] == NULL) {
+	do {
 		q->head = (q->head + 1) & q->tab_mask;
-
-		BUG_ON(q->head == q->tail);
-	}
+		if (q->head == q->tail)
+			break;
+	} while (q->tab[q->head] == NULL);
 }
 
 /* Move tail pointer backwards to reuse holes */
 static void choke_zap_tail_holes(struct choke_sched_data *q)
 {
-	while (q->tab[q->tail - 1] == NULL) {
+	do {
 		q->tail = (q->tail - 1) & q->tab_mask;
-		BUG_ON(q->head == q->tail);
-	}
+		if (q->head == q->tail)
+			break;
+	} while (q->tab[q->tail] == NULL);
 }
 
 /* Drop packet from queue array by creating a "hole" */
@@ -145,7 +146,7 @@ static void choke_drop_by_idx(struct choke_sched_data *q, unsigned int idx)
    2. fast internal classification
    3. use TC filter based classification
 */
-static inline unsigned int choke_classify(struct sk_buff *skb,
+static unsigned int choke_classify(struct sk_buff *skb,
 					  struct Qdisc *sch, int *qerr)
 
 {
@@ -218,7 +219,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 			/* Drop both packets */
 			q->stats.matched++;
 			choke_drop_by_idx(q, idx);
-			sch->qstats.backlog -= qdisc_pkt_len(skb);
+			sch->qstats.backlog -= qdisc_pkt_len(oskb);
 			--sch->q.qlen;
 			qdisc_drop(oskb, sch);
 			goto congestion_drop;
@@ -285,8 +286,7 @@ static struct sk_buff *choke_dequeue(struct Qdisc *sch)
 	}
 
 	skb = q->tab[q->head];
-	q->tab[q->head] = NULL; /* not really needed */
-	q->head = (q->head + 1) & q->tab_mask;
+	q->tab[q->head] = NULL;
 	choke_zap_head_holes(q);
 	--sch->q.qlen;
 	sch->qstats.backlog -= qdisc_pkt_len(skb);
@@ -371,12 +371,23 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
 		sch_tree_lock(sch);
 		old = q->tab;
 		if (old) {
-			unsigned int tail = 0;
+			unsigned int oqlen = sch->q.qlen, tail = 0;
 
 			while (q->head != q->tail) {
-				ntab[tail++] = q->tab[q->head];
+				struct sk_buff *skb = q->tab[q->head];
+
 				q->head = (q->head + 1) & q->tab_mask;
+				if (!skb)
+					continue;
+				if (tail < mask) {
+					ntab[tail++] = skb;
+					continue;
+				}
+				sch->qstats.backlog -= qdisc_pkt_len(skb);
+				--sch->q.qlen;
+				qdisc_drop(skb, sch);
 			}
+			qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
 			q->head = 0;
 			q->tail = tail;
 		}
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists
 
