lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 19 Jan 2011 10:44:36 +0100
From:	Peter Zijlstra <peterz@...radead.org>
To:	Yong Zhang <yong.zhang0@...il.com>
Cc:	samu.p.onkalo@...ia.com, mingo@...e.hu,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
	tglx <tglx@...utronix.de>
Subject: Re: Bug in scheduler when using rt_mutex

On Wed, 2011-01-19 at 10:38 +0800, Yong Zhang wrote:
> > Index: linux-2.6/kernel/sched_fair.c
> > ===================================================================
> > --- linux-2.6.orig/kernel/sched_fair.c
> > +++ linux-2.6/kernel/sched_fair.c
> > @@ -4075,6 +4075,22 @@ static void prio_changed_fair(struct rq
> >  static void switched_to_fair(struct rq *rq, struct task_struct *p,
> >                             int running)
> >  {
> > +       struct sched_entity *se = &p->se;
> > +       struct cfs_rq *cfs_rq = cfs_rq_of(se);
> > +
> > +       if (se->on_rq && cfs_rq->curr != se)
> 
> (cfs_rq->curr != se) equals to (!running), no?

more or less, the idea is that we only call __{dequeue,enqueue}_entity()
when the task is actually in the tree and current is not.

> > +               __dequeue_entity(cfs_rq, se);
> > +
> > +       /*
> > +        * se->vruntime can be completely out there, there is no telling
> > +        * how long this task was !fair and on what CPU if any it became
> > +        * !fair. Therefore, reset it to a known, reasonable value.
> > +        */
> > +       se->vruntime = cfs_rq->min_vruntime;
> 
> But this is not fair for !SLEEP task.
> You know se->vruntime -= cfs_rq->min_vruntime for !SLEEP task,
> then after it go through sched_fair-->sched_rt-->sched_fair by some
> means, current cfs_rq->min_vruntime is added back.
> 
> But here se is putted before where it should be. Is this what we want?

well, its more or less screwy anyway, since we don't know for how long
the task was !fair and what cpu it came from etc..

But I guess you're right, we should at least pretend the whole
min_vruntime thing is the 0-lag point (its not) and preserve 'lag' like
we do for migrations... Something like the below.. except I've got a
massive head ache and I'm not at all sure I got the switched_from_fair()
bit right.

---
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -8108,6 +8108,8 @@ EXPORT_SYMBOL(__might_sleep);
 #ifdef CONFIG_MAGIC_SYSRQ
 static void normalize_task(struct rq *rq, struct task_struct *p)
 {
+	struct sched_class *prev_class = p->sched_class;
+	int old_prio = p->prio;
 	int on_rq;
 
 	on_rq = p->se.on_rq;
@@ -8118,6 +8120,8 @@ static void normalize_task(struct rq *rq
 		activate_task(rq, p, 0);
 		resched_task(rq->curr);
 	}
+
+	check_class_changed(rq, p, prev_class, old_prio, task_current(rq, p));
 }
 
 void normalize_rt_tasks(void)
Index: linux-2.6/kernel/sched_fair.c
===================================================================
--- linux-2.6.orig/kernel/sched_fair.c
+++ linux-2.6/kernel/sched_fair.c
@@ -4066,12 +4066,33 @@ static void prio_changed_fair(struct rq
 		check_preempt_curr(rq, p, 0);
 }
 
+static void
+switched_from_fair(struct rq *rq, struct task_struct *p, int running)
+{
+	struct sched_entity *se = &p->se;
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+	if (!se->on_rq && p->state != TASK_RUNNING)
+		se->vruntime -= cfs_rq->min_vruntime;
+}
+
 /*
  * We switched to the sched_fair class.
  */
-static void switched_to_fair(struct rq *rq, struct task_struct *p,
-			     int running)
+static void
+switched_to_fair(struct rq *rq, struct task_struct *p, int running)
 {
+	struct sched_entity *se = &p->se;
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+	if (se->on_rq && cfs_rq->curr != se)
+		__dequeue_entity(cfs_rq, se);
+
+	se->vruntimea += cfs_rq->min_vruntime;
+
+	if (se->on_rq && cfs_rq->curr != se)
+		__enqueue_entity(cfs_rq, se);
+
 	/*
 	 * We were most likely switched from sched_rt, so
 	 * kick off the schedule if running, otherwise just see
@@ -4163,6 +4184,7 @@ static const struct sched_class fair_sch
 	.task_fork		= task_fork_fair,
 
 	.prio_changed		= prio_changed_fair,
+	.switched_from		= switched_from_fair,
 	.switched_to		= switched_to_fair,
 
 	.get_rr_interval	= get_rr_interval_fair,

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ