netdev - [PATCH net-next v2 1/2] pkt_sched: fq: avoid artificial bursts for clocked flows

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1422992932-9893-1-git-send-email-kennetkl@ifi.uio.no>
Date:	Tue,  3 Feb 2015 20:48:52 +0100
From:	Kenneth Klette Jonassen <kennetkl@....uio.no>
To:	netdev@...r.kernel.org
Cc:	Kenneth Klette Jonassen <kennetkl@....uio.no>
Subject: [PATCH net-next v2 1/2] pkt_sched: fq: avoid artificial bursts for clocked flows

Current pacing behavior always throttle flows for a time equal to one full
quantum, starting when a flow exhausts its credit. This is only optimal for
bursty traffic that consumes the entire credit in one sitting.

For flows with small packets, this throttling behavior can cause packets
to artificially queue and transmit in bursts, even when their sending rate
is well below their pacing rate. There is a refill mechanism in fq_enqueue()
that counteracts this in some cases, but it only works when flows space
their packets further apart than the flow refill delay.

Keep track of the time a flows credit was last filled, and use this to
approximate a full credit refill when one quantum of time passes. This is
a more fine-grained approach than the refill heuristic in fq_enqueue(),
which is removed by the next patch in this series.

Since calls to ktime are expensive, time_credit_filled is only set correctly
on dequeue. For new flows, set time_credit_filled to zero and anticipate
dequeue to refill one quantum without throttling. This approach requires
that initial_quantum >= quantum.

Increases memory footprint from 104 to 112 bytes per flow.

V2: avoids ktime_get_ns() on enqueue, as suggested by Eric Dumazet.

Signed-off-by: Kenneth Klette Jonassen <kennetkl@....uio.no>
---
 net/sched/sch_fq.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 313794b..43d5b74 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -71,6 +71,7 @@ struct fq_flow {
 
 	struct rb_node  rate_node;	/* anchor in q->delayed tree */
 	u64		time_next_packet;
+	u64		time_credit_filled;
 };
 
 struct fq_flow_head {
@@ -250,6 +251,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
 			if (unlikely(skb->sk &&
 				     f->socket_hash != sk->sk_hash)) {
 				f->credit = q->initial_quantum;
+				f->time_credit_filled = 0ULL;
 				f->socket_hash = sk->sk_hash;
 				f->time_next_packet = 0ULL;
 			}
@@ -271,6 +273,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
 	if (skb->sk)
 		f->socket_hash = sk->sk_hash;
 	f->credit = q->initial_quantum;
+	f->time_credit_filled = 0ULL;
 
 	rb_link_node(&f->fq_node, parent, p);
 	rb_insert_color(&f->fq_node, root);
@@ -374,8 +377,10 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	qdisc_qstats_backlog_inc(sch, skb);
 	if (fq_flow_is_detached(f)) {
 		fq_flow_add_tail(&q->new_flows, f);
-		if (time_after(jiffies, f->age + q->flow_refill_delay))
-			f->credit = max_t(u32, f->credit, q->quantum);
+		if (time_after(jiffies, f->age + q->flow_refill_delay)) {
+			f->credit = max_t(int, f->credit - q->quantum, 0);
+			f->time_credit_filled = 0ULL;
+		}
 		q->inactive_flows--;
 	}
 
@@ -440,6 +445,7 @@ begin:
 
 	if (f->credit <= 0) {
 		f->credit += q->quantum;
+		f->time_credit_filled = max(now, f->time_next_packet);
 		head->first = f->next;
 		fq_flow_add_tail(&q->old_flows, f);
 		goto begin;
@@ -489,7 +495,7 @@ begin:
 			q->stat_pkts_too_long++;
 		}
 
-		f->time_next_packet = now + len;
+		f->time_next_packet = max(now, f->time_credit_filled + len);
 	}
 out:
 	qdisc_bstats_update(sch, skb);
@@ -679,8 +685,14 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
 			err = -EINVAL;
 	}
 
-	if (tb[TCA_FQ_INITIAL_QUANTUM])
-		q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
+	if (tb[TCA_FQ_INITIAL_QUANTUM]) {
+		u32 initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
+
+		if (initial_quantum > q->quantum)
+			q->initial_quantum = initial_quantum - q->quantum;
+		else
+			q->initial_quantum = 0;
+	}
 
 	if (tb[TCA_FQ_FLOW_DEFAULT_RATE])
 		pr_warn_ratelimited("sch_fq: defrate %u ignored.\n",
@@ -740,7 +752,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
 	sch->limit		= 10000;
 	q->flow_plimit		= 100;
 	q->quantum		= 2 * psched_mtu(qdisc_dev(sch));
-	q->initial_quantum	= 10 * psched_mtu(qdisc_dev(sch));
+	q->initial_quantum	= 10 * psched_mtu(qdisc_dev(sch)) - q->quantum;
 	q->flow_refill_delay	= msecs_to_jiffies(40);
 	q->flow_max_rate	= ~0U;
 	q->rate_enable		= 1;
@@ -773,7 +785,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
 	    nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
 	    nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
-	    nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
+	    nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM,
+		        q->initial_quantum + q->quantum) ||
 	    nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
 	    nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) ||
 	    nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html