lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 16 Mar 2021 11:40:35 +0100
From:   Peter Zijlstra <peterz@...radead.org>
To:     Huaixin Chang <changhuaixin@...ux.alibaba.com>
Cc:     bsegall@...gle.com, dietmar.eggemann@....com,
        juri.lelli@...hat.com, khlebnikov@...dex-team.ru,
        linux-kernel@...r.kernel.org, mgorman@...e.de, mingo@...hat.com,
        odin@...d.al, odin@...dal.com, pauld@...head.com, pjt@...gle.com,
        rostedt@...dmis.org, shanpeic@...ux.alibaba.com, tj@...nel.org,
        vincent.guittot@...aro.org, xiyou.wangcong@...il.com
Subject: Re: [PATCH v4 1/4] sched/fair: Introduce primitives for CFS
 bandwidth burst

On Tue, Mar 16, 2021 at 12:49:28PM +0800, Huaixin Chang wrote:
> And the maximun amount of CPU a group can consume in
> a given period is "buffer" which is equivalent to "quota" + "burst in
> case that this group has done enough accumulation.

I'm confused as heck about cfs_b->buffer. Why do you need that? What's
wrong with cfs_b->runtime ?

Currently, by being strict, we ignore any remaining runtime and the
period timer resets runtime to quota and life goes on. What you want to
do is instead of resetting runtime, add quota and limit the total.

That is, currently it does:

	runtime = min(runtime + quota, quota);

which by virtue of runtime not being allowed negative, it the exact same
as:

	runtime = quota;

Which is what we have in refill.

Fix that to be something like:

	runtime = min(runtime + quota, quota + burst)

and you're basically done. And that seems *much* simpler.

What am I missing, why have you made it so complicated?


/me looks again..

Oooh, I think I see, all this is because you don't do your constraints
right. Removing static from max_cfs_runtime is a big clue you did that
wrong.

Something like this on top of the first two. Much simpler!

Now I just need to figure out wth you mucked about with where we call
refill.

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8954,7 +8954,7 @@ static DEFINE_MUTEX(cfs_constraints_mute
 const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */
 static const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */
 /* More than 203 days if BW_SHIFT equals 20. */
-const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC;
+static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC;
 
 static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
 
@@ -8989,10 +8989,10 @@ static int tg_set_cfs_bandwidth(struct t
 	if (quota != RUNTIME_INF && quota > max_cfs_runtime)
 		return -EINVAL;
 
-	/*
-	 * Bound burst to defend burst against overflow during bandwidth shift.
-	 */
-	if (burst > max_cfs_runtime)
+	if (burst > quota)
+		return -EINVAL;
+
+	if (quota + burst > max_cfs_runtime)
 		return -EINVAL;
 
 	/*
@@ -9019,8 +9019,6 @@ static int tg_set_cfs_bandwidth(struct t
 	cfs_b->burst = burst;
 
 	if (runtime_enabled) {
-		cfs_b->buffer = min(max_cfs_runtime, quota + burst);
-		cfs_b->max_overrun = DIV_ROUND_UP_ULL(max_cfs_runtime, quota);
 		cfs_b->runtime = cfs_b->quota;
 
 		/* Restart the period timer (if active) to handle new period expiry: */
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4621,23 +4621,22 @@ static inline u64 sched_cfs_bandwidth_sl
  *
  * requires cfs_b->lock
  */
-static void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b,
-					   u64 overrun)
+static void
+__refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b, u64 overrun)
 {
-	u64 refill;
-
-	if (cfs_b->quota != RUNTIME_INF) {
-
-		if (!sysctl_sched_cfs_bw_burst_enabled) {
-			cfs_b->runtime = cfs_b->quota;
-			return;
-		}
+	if (unlikely(cfs_b->quota == RUNTIME_INF))
+		return;
 
-		overrun = min(overrun, cfs_b->max_overrun);
-		refill = cfs_b->quota * overrun;
-		cfs_b->runtime += refill;
-		cfs_b->runtime = min(cfs_b->runtime, cfs_b->buffer);
+	if (!sysctl_sched_cfs_bw_burst_enabled) {
+		cfs_b->runtime = cfs_b->quota;
+		return;
 	}
+
+	/*
+	 * Ignore @overrun since burst <= quota.
+	 */
+	cfs_b->runtime += cfs_b->quota;
+	cfs_b->runtime = min(cfs_b->runtime, cfs_b->quota + cfs_b->burst);
 }
 
 static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
@@ -5226,7 +5225,6 @@ static enum hrtimer_restart sched_cfs_sl
 }
 
 extern const u64 max_cfs_quota_period;
-extern const u64 max_cfs_runtime;
 
 static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
 {
@@ -5256,18 +5254,7 @@ static enum hrtimer_restart sched_cfs_pe
 			new = old * 2;
 			if (new < max_cfs_quota_period) {
 				cfs_b->period = ns_to_ktime(new);
-				cfs_b->quota = min(cfs_b->quota * 2,
-						   max_cfs_runtime);
-
-				cfs_b->buffer = min(cfs_b->quota + cfs_b->burst,
-						    max_cfs_runtime);
-				/*
-				 * Add 1 in case max_overrun becomes 0.
-				 * 0 max_overrun will cause no runtime being
-				 * refilled in __refill_cfs_bandwidth_runtime().
-				 */
-				cfs_b->max_overrun >>= 1;
-				cfs_b->max_overrun++;
+				cfs_b->quota *= 2;
 
 				pr_warn_ratelimited(
 	"cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us = %lld, cfs_quota_us = %lld)\n",
@@ -5300,7 +5287,6 @@ void init_cfs_bandwidth(struct cfs_bandw
 	cfs_b->quota = RUNTIME_INF;
 	cfs_b->period = ns_to_ktime(default_cfs_period());
 	cfs_b->burst = 0;
-	cfs_b->buffer = RUNTIME_INF;
 
 	INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
 	hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -366,8 +366,6 @@ struct cfs_bandwidth {
 	u64			quota;
 	u64			runtime;
 	u64			burst;
-	u64			buffer;
-	u64			max_overrun;
 	s64			hierarchical_quota;
 
 	u8			idle;

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ