lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20180907214047.26914-49-jschoenh@amazon.de>
Date:   Fri,  7 Sep 2018 23:40:35 +0200
From:   Jan H. Schönherr <jschoenh@...zon.de>
To:     Ingo Molnar <mingo@...hat.com>,
        Peter Zijlstra <peterz@...radead.org>
Cc:     Jan H. Schönherr <jschoenh@...zon.de>,
        linux-kernel@...r.kernel.org
Subject: [RFC 48/60] cosched: Adjust SE traversal and locking for yielding and buddies

Buddies are not very well defined with coscheduling. Usually, they
bubble up the hierarchy on a single CPU to steer task picking either
away from a certain task (yield a task: skip buddy) or towards a certain
task (yield to a task, execute a woken task: next buddy; execute a
recently preempted task: last buddy).

If we still allow buddies to bubble up the full hierarchy with
coscheduling, then for example yielding a task would always yield the
coscheduled set of tasks it is part of. If we keep effects constrained
to a coscheduled set, then one set could never preempt another set.

For now, we limit buddy activities to the scope of the leader that
does the activity with an exception for preemption, which may operate
in the scope of a different leader. That makes yielding behavior
potentially weird and asymmetric for the time being, but it seems to
work well for preemption.

Signed-off-by: Jan H. Schönherr <jschoenh@...zon.de>
---
 kernel/sched/fair.c | 51 ++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 42 insertions(+), 9 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2227e4840355..6d64f4478fda 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3962,7 +3962,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
 static void __clear_buddies_last(struct sched_entity *se)
 {
-	for_each_sched_entity(se) {
+	for_each_owned_sched_entity(se) {
 		struct cfs_rq *cfs_rq = cfs_rq_of(se);
 		if (cfs_rq->last != se)
 			break;
@@ -3973,7 +3973,7 @@ static void __clear_buddies_last(struct sched_entity *se)
 
 static void __clear_buddies_next(struct sched_entity *se)
 {
-	for_each_sched_entity(se) {
+	for_each_owned_sched_entity(se) {
 		struct cfs_rq *cfs_rq = cfs_rq_of(se);
 		if (cfs_rq->next != se)
 			break;
@@ -3984,7 +3984,7 @@ static void __clear_buddies_next(struct sched_entity *se)
 
 static void __clear_buddies_skip(struct sched_entity *se)
 {
-	for_each_sched_entity(se) {
+	for_each_owned_sched_entity(se) {
 		struct cfs_rq *cfs_rq = cfs_rq_of(se);
 		if (cfs_rq->skip != se)
 			break;
@@ -4005,6 +4005,18 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		__clear_buddies_skip(se);
 }
 
+static void clear_buddies_lock(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	struct rq_owner_flags orf;
+
+	if (cfs_rq->last != se && cfs_rq->next != se && cfs_rq->skip != se)
+		return;
+
+	rq_lock_owned(hrq_of(cfs_rq), &orf);
+	clear_buddies(cfs_rq, se);
+	rq_unlock_owned(hrq_of(cfs_rq), &orf);
+}
+
 static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
 
 static void
@@ -4028,7 +4040,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
 	update_stats_dequeue(cfs_rq, se, flags);
 
-	clear_buddies(cfs_rq, se);
+	clear_buddies_lock(cfs_rq, se);
 
 	if (se != cfs_rq->curr)
 		__dequeue_entity(cfs_rq, se);
@@ -6547,31 +6559,45 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
 
 static void set_last_buddy(struct sched_entity *se)
 {
+	struct rq_owner_flags orf;
+	struct rq *rq;
+
 	if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
 		return;
 
-	for_each_sched_entity(se) {
+	rq = hrq_of(cfs_rq_of(se));
+
+	rq_lock_owned(rq, &orf);
+	for_each_owned_sched_entity(se) {
 		if (SCHED_WARN_ON(!se->on_rq))
-			return;
+			break;
 		cfs_rq_of(se)->last = se;
 	}
+	rq_unlock_owned(rq, &orf);
 }
 
 static void set_next_buddy(struct sched_entity *se)
 {
+	struct rq_owner_flags orf;
+	struct rq *rq;
+
 	if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
 		return;
 
-	for_each_sched_entity(se) {
+	rq = hrq_of(cfs_rq_of(se));
+
+	rq_lock_owned(rq, &orf);
+	for_each_owned_sched_entity(se) {
 		if (SCHED_WARN_ON(!se->on_rq))
-			return;
+			break;
 		cfs_rq_of(se)->next = se;
 	}
+	rq_unlock_owned(rq, &orf);
 }
 
 static void set_skip_buddy(struct sched_entity *se)
 {
-	for_each_sched_entity(se)
+	for_each_owned_sched_entity(se)
 		cfs_rq_of(se)->skip = se;
 }
 
@@ -6831,6 +6857,7 @@ static void yield_task_fair(struct rq *rq)
 	struct task_struct *curr = rq->curr;
 	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
 	struct sched_entity *se = &curr->se;
+	struct rq_owner_flags orf;
 
 	/*
 	 * Are we the only task in the tree?
@@ -6838,6 +6865,7 @@ static void yield_task_fair(struct rq *rq)
 	if (unlikely(rq->nr_running == 1))
 		return;
 
+	rq_lock_owned(rq, &orf);
 	clear_buddies(cfs_rq, se);
 
 	if (curr->policy != SCHED_BATCH) {
@@ -6855,21 +6883,26 @@ static void yield_task_fair(struct rq *rq)
 	}
 
 	set_skip_buddy(se);
+	rq_unlock_owned(rq, &orf);
 }
 
 static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preempt)
 {
 	struct sched_entity *se = &p->se;
+	struct rq_owner_flags orf;
 
 	/* throttled hierarchies are not runnable */
 	if (!se->on_rq || throttled_hierarchy(cfs_rq_of(se)))
 		return false;
 
+	rq_lock_owned(rq, &orf);
+
 	/* Tell the scheduler that we'd really like pse to run next. */
 	set_next_buddy(se);
 
 	yield_task_fair(rq);
 
+	rq_unlock_owned(rq, &orf);
 	return true;
 }
 
-- 
2.9.3.1.gcba166c.dirty

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ