lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190225102051.12268-6-lesliemonis@gmail.com>
Date:   Mon, 25 Feb 2019 15:50:49 +0530
From:   Leslie Monis <lesliemonis@...il.com>
To:     jhs@...atatu.com
Cc:     netdev@...r.kernel.org, dave@...t.net,
        "Mohit P. Tahiliani" <tahiliani@...k.edu.in>,
        Dhaval Khandla <dhavaljkhandla26@...il.com>,
        Hrishikesh Hiraskar <hrishihiraskar@...il.com>,
        Manish Kumar B <bmanish15597@...il.com>,
        "Sachin D . Patil" <sdp.sachin@...il.com>,
        Leslie Monis <lesliemonis@...il.com>
Subject: [PATCH net-next v2 5/7] net: sched: pie: add more cases to auto-tune alpha and beta

From: "Mohit P. Tahiliani" <tahiliani@...k.edu.in>

The current implementation scales the local alpha and beta
variables in the calculate_probability function by the same
amount for all values of drop probability below 1%.

RFC 8033 suggests using additional cases for auto-tuning
alpha and beta when the drop probability is less than 1%.

In order to add more auto-tuning cases, MAX_PROB must be
scaled by u64 instead of u32 to prevent underflow when
scaling the local alpha and beta variables in the
calculate_probability function.

Signed-off-by: Mohit P. Tahiliani <tahiliani@...k.edu.in>
Signed-off-by: Dhaval Khandla <dhavaljkhandla26@...il.com>
Signed-off-by: Hrishikesh Hiraskar <hrishihiraskar@...il.com>
Signed-off-by: Manish Kumar B <bmanish15597@...il.com>
Signed-off-by: Sachin D. Patil <sdp.sachin@...il.com>
Signed-off-by: Leslie Monis <lesliemonis@...il.com>
---
 include/uapi/linux/pkt_sched.h |  2 +-
 net/sched/sch_pie.c            | 65 +++++++++++++++++-----------------
 2 files changed, 33 insertions(+), 34 deletions(-)

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 0d18b1d1fbbc..1eb572ef3f27 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -954,7 +954,7 @@ enum {
 #define TCA_PIE_MAX   (__TCA_PIE_MAX - 1)
 
 struct tc_pie_xstats {
-	__u32 prob;             /* current probability */
+	__u64 prob;             /* current probability */
 	__u32 delay;            /* current delay in ms */
 	__u32 avg_dq_rate;      /* current average dq_rate in bits/pie_time */
 	__u32 packets_in;       /* total number of packets enqueued */
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index d88ab53593b3..b9f586cfb51c 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -33,7 +33,7 @@
 
 #define QUEUE_THRESHOLD 16384
 #define DQCOUNT_INVALID -1
-#define MAX_PROB  0xffffffff
+#define MAX_PROB 0xffffffffffffffff
 #define PIE_SCALE 8
 
 /* parameters used */
@@ -49,7 +49,7 @@ struct pie_params {
 
 /* variables used */
 struct pie_vars {
-	u32 prob;		/* probability but scaled by u32 limit. */
+	u64 prob;		/* probability but scaled by u64 limit. */
 	psched_time_t burst_time;
 	psched_time_t qdelay;
 	psched_time_t qdelay_old;
@@ -99,8 +99,8 @@ static void pie_vars_init(struct pie_vars *vars)
 static bool drop_early(struct Qdisc *sch, u32 packet_size)
 {
 	struct pie_sched_data *q = qdisc_priv(sch);
-	u32 rnd;
-	u32 local_prob = q->vars.prob;
+	u64 rnd;
+	u64 local_prob = q->vars.prob;
 	u32 mtu = psched_mtu(qdisc_dev(sch));
 
 	/* If there is still burst allowance left skip random early drop */
@@ -124,11 +124,11 @@ static bool drop_early(struct Qdisc *sch, u32 packet_size)
 	 * probablity. Smaller packets will have lower drop prob in this case
 	 */
 	if (q->params.bytemode && packet_size <= mtu)
-		local_prob = (local_prob / mtu) * packet_size;
+		local_prob = (local_prob / (u64)mtu) * (u64)packet_size;
 	else
 		local_prob = q->vars.prob;
 
-	rnd = prandom_u32();
+	prandom_bytes(&rnd, 8);
 	if (rnd < local_prob)
 		return true;
 
@@ -317,9 +317,10 @@ static void calculate_probability(struct Qdisc *sch)
 	u32 qlen = sch->qstats.backlog;	/* queue size in bytes */
 	psched_time_t qdelay = 0;	/* in pschedtime */
 	psched_time_t qdelay_old = q->vars.qdelay;	/* in pschedtime */
-	s32 delta = 0;		/* determines the change in probability */
-	u32 oldprob;
-	u32 alpha, beta;
+	s64 delta = 0;		/* determines the change in probability */
+	u64 oldprob;
+	u64 alpha, beta;
+	u32 power;
 	bool update_prob = true;
 
 	q->vars.qdelay_old = q->vars.qdelay;
@@ -339,38 +340,36 @@ static void calculate_probability(struct Qdisc *sch)
 	 * value for alpha as 0.125. In this implementation, we use values 0-32
 	 * passed from user space to represent this. Also, alpha and beta have
 	 * unit of HZ and need to be scaled before they can used to update
-	 * probability. alpha/beta are updated locally below by 1) scaling them
-	 * appropriately 2) scaling down by 16 to come to 0-2 range.
-	 * Please see paper for details.
-	 *
-	 * We scale alpha and beta differently depending on whether we are in
-	 * light, medium or high dropping mode.
+	 * probability. alpha/beta are updated locally below by scaling down
+	 * by 16 to come to 0-2 range.
 	 */
-	if (q->vars.prob < MAX_PROB / 100) {
-		alpha =
-		    (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
-		beta =
-		    (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
-	} else if (q->vars.prob < MAX_PROB / 10) {
-		alpha =
-		    (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
-		beta =
-		    (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
-	} else {
-		alpha =
-		    (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
-		beta =
-		    (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+	alpha = ((u64)q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+	beta = ((u64)q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+
+	/* We scale alpha and beta differently depending on how heavy the
+	 * congestion is. Please see RFC 8033 for details.
+	 */
+	if (q->vars.prob < MAX_PROB / 10) {
+		alpha >>= 1;
+		beta >>= 1;
+
+		power = 100;
+		while (q->vars.prob < MAX_PROB / (u64)power &&
+		       power <= 1000000) {
+			alpha >>= 2;
+			beta >>= 2;
+			power *= 10;
+		}
 	}
 
 	/* alpha and beta should be between 0 and 32, in multiples of 1/16 */
-	delta += alpha * ((qdelay - q->params.target));
-	delta += beta * ((qdelay - qdelay_old));
+	delta += alpha * (u64)(qdelay - q->params.target);
+	delta += beta * (u64)(qdelay - qdelay_old);
 
 	oldprob = q->vars.prob;
 
 	/* to ensure we increase probability in steps of no more than 2% */
-	if (delta > (s32)(MAX_PROB / (100 / 2)) &&
+	if (delta > (s64)(MAX_PROB / (100 / 2)) &&
 	    q->vars.prob >= MAX_PROB / 10)
 		delta = (MAX_PROB / 100) * 2;
 
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ