lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 30 Jan 2019 15:01:04 +0100
From:   Peter Zijlstra <peterz@...radead.org>
To:     Vincent Guittot <vincent.guittot@...aro.org>
Cc:     linux-kernel@...r.kernel.org, mingo@...hat.com, tj@...nel.org,
        sargun@...gun.me
Subject: Re: [PATCH v2] sched/fair: Fix insertion in rq->leaf_cfs_rq_list

On Wed, Jan 30, 2019 at 06:22:47AM +0100, Vincent Guittot wrote:
> Sargun reported a crash:
>   "I picked up c40f7d74c741a907cfaeb73a7697081881c497d0 sched/fair: Fix
>    infinite loop in update_blocked_averages() by reverting a9e7f6544b9c
>    and put it on top of 4.19.13. In addition to this, I uninlined
>    list_add_leaf_cfs_rq for debugging.
> 
>    This revealed a new bug that we didn't get to because we kept getting
>    crashes from the previous issue. When we are running with cgroups that
>    are rapidly changing, with CFS bandwidth control, and in addition
>    using the cpusets cgroup, we see this crash. Specifically, it seems to
>    occur with cgroups that are throttled and we change the allowed
>    cpuset."
> 
> The algorithm used to order cfs_rq in rq->leaf_cfs_rq_list assumes that
> it will walk down to root the 1st time a cfs_rq is used and we will finish
> to add either a cfs_rq without parent or a cfs_rq with a parent that is
> already on the list. But this is not always true in presence of throttling.
> Because a cfs_rq can be throttled even if it has never been used but other CPUs
> of the cgroup have already used all the bandwdith, we are not sure to go down to
> the root and add all cfs_rq in the list.
> 
> Ensure that all cfs_rq will be added in the list even if they are throttled.
> 
> Reported-by: Sargun Dhillon <sargun@...gun.me>
> Fixes: 9c2791f936ef ("Fix hierarchical order in rq->leaf_cfs_rq_list")
> Signed-off-by: Vincent Guittot <vincent.guittot@...aro.org>

Given my previous patch; how about something like so instead?

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -282,13 +282,15 @@ static inline struct cfs_rq *group_cfs_r
 	return grp->my_q;
 }
 
-static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
+static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
 {
 	struct rq *rq = rq_of(cfs_rq);
 	int cpu = cpu_of(rq);
 
 	if (cfs_rq->on_list)
-		return;
+		return rq->tmp_alone_branch == &rq->leaf_cfs_rq_list;
+
+	cfs_rq->on_list = 1;
 
 	/*
 	 * Ensure we either appear before our parent (if already
@@ -315,7 +317,10 @@ static inline void list_add_leaf_cfs_rq(
 		 * list.
 		 */
 		rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
-	} else if (!cfs_rq->tg->parent) {
+		return true;
+	}
+
+	if (!cfs_rq->tg->parent) {
 		/*
 		 * cfs rq without parent should be put
 		 * at the tail of the list.
@@ -327,23 +332,22 @@ static inline void list_add_leaf_cfs_rq(
 		 * tmp_alone_branch to the beginning of the list.
 		 */
 		rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
-	} else {
-		/*
-		 * The parent has not already been added so we want to
-		 * make sure that it will be put after us.
-		 * tmp_alone_branch points to the begin of the branch
-		 * where we will add parent.
-		 */
-		list_add_rcu(&cfs_rq->leaf_cfs_rq_list,
-			rq->tmp_alone_branch);
-		/*
-		 * update tmp_alone_branch to points to the new begin
-		 * of the branch
-		 */
-		rq->tmp_alone_branch = &cfs_rq->leaf_cfs_rq_list;
+		return true;
 	}
 
-	cfs_rq->on_list = 1;
+	/*
+	 * The parent has not already been added so we want to
+	 * make sure that it will be put after us.
+	 * tmp_alone_branch points to the begin of the branch
+	 * where we will add parent.
+	 */
+	list_add_rcu(&cfs_rq->leaf_cfs_rq_list, rq->tmp_alone_branch);
+	/*
+	 * update tmp_alone_branch to points to the new begin
+	 * of the branch
+	 */
+	rq->tmp_alone_branch = &cfs_rq->leaf_cfs_rq_list;
+	return false;
 }
 
 static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
@@ -4999,6 +5003,12 @@ static void __maybe_unused unthrottle_of
 }
 
 #else /* CONFIG_CFS_BANDWIDTH */
+
+static inline bool cfs_bandwidth_used(void)
+{
+	return false;
+}
+
 static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
 {
 	return rq_clock_task(rq_of(cfs_rq));
@@ -5190,6 +5200,21 @@ enqueue_task_fair(struct rq *rq, struct
 
 	}
 
+	if (cfs_bandwidth_used()) {
+		/*
+		 * When bandwidth control is enabled; the cfs_rq_throttled()
+		 * breaks in the above iteration can result in incomplete
+		 * leaf list maintenance, resulting in triggering the assertion
+		 * below.
+		 */
+		for_each_sched_entity(se) {
+			cfs_rq = cfs_rq_of(se);
+
+			if (list_add_leaf_cfs_rq(cfs_rq))
+				break;
+		}
+	}
+
 	assert_list_leaf_cfs_rq(rq);
 
 	hrtick_update(rq);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ