linux-kernel - Re: [PATCH] sched: rt_rq runtime leakage bug fix

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4DCAC79A.7050505@linux.vnet.ibm.com>
Date:	Thu, 12 May 2011 01:30:02 +0800
From:	Cheng Xu <chengxu@...ux.vnet.ibm.com>
To:	Peter Zijlstra <peterz@...radead.org>
CC:	Ingo Molnar <mingo@...e.hu>,
	Paul Mckenney <paulmck@...ux.vnet.ibm.com>,
	LKML <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH] sched: rt_rq runtime leakage bug fix

Hi Peter,

I tried but hit a boot-time error "Unable to handle kernel paging request for data at address 0x100000008", and therefore would like to propose an alternative patch like,

#define for_each_rt_rq(rt_rq, iter, rq) \
        for (iter = list_entry_rcu(task_groups.next, typeof(*iter), list); \
             (&iter->list != &task_groups) && (rt_rq = iter->rt_rq[cpu_of(rq)]); \
             iter = list_entry_rcu(iter->list.next, typeof(*iter), list))

This worked, it seems to pass the tests.  Is this correct from a scheduler perspective?

For the not CONFIG_RT_GROUP_SCHED part, I used 

typedef struct rt_rq *rt_rq_iter_t;

#define for_each_rt_rq(rt_rq, iter, rq) \
	(void) iter; \
	for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)

An alternative is 
#define for_each_rt_rq(rt_rq, iter, rq) \
	for (rt_rq = iter = &rq->rt; iter; rt_rq = iter = NULL)

The patch is attached below. Could you check whether it is workable? Thank you. 

---
 kernel/sched_rt.c |   22 +++++++++++++++++++---
 1 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index e7cebdc..f9e621a 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -183,6 +183,13 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
 	return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
 }
 
+typedef struct task_group *rt_rq_iter_t;
+
+#define for_each_rt_rq(rt_rq, iter, rq) \
+	for (iter = list_entry_rcu(task_groups.next, typeof(*iter), list); \
+	     (&iter->list != &task_groups) && (rt_rq = iter->rt_rq[cpu_of(rq)]); \
+	     iter = list_entry_rcu(iter->list.next, typeof(*iter), list))
+
 static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
 {
 	list_add_rcu(&rt_rq->leaf_rt_rq_list,
@@ -288,6 +295,12 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
 	return ktime_to_ns(def_rt_bandwidth.rt_period);
 }
 
+typedef struct rt_rq *rt_rq_iter_t;
+
+#define for_each_rt_rq(rt_rq, iter, rq) \
+	(void) iter; \
+	for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
+
 static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
 {
 }
@@ -402,12 +415,13 @@ next:
 static void __disable_runtime(struct rq *rq)
 {
 	struct root_domain *rd = rq->rd;
+	rt_rq_iter_t iter;
 	struct rt_rq *rt_rq;
 
 	if (unlikely(!scheduler_running))
 		return;
 
-	for_each_leaf_rt_rq(rt_rq, rq) {
+	for_each_rt_rq(rt_rq, iter, rq) {
 		struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 		s64 want;
 		int i;
@@ -487,6 +501,7 @@ static void disable_runtime(struct rq *rq)
 
 static void __enable_runtime(struct rq *rq)
 {
+	rt_rq_iter_t iter;
 	struct rt_rq *rt_rq;
 
 	if (unlikely(!scheduler_running))
@@ -495,7 +510,7 @@ static void __enable_runtime(struct rq *rq)
 	/*
 	 * Reset each runqueue's bandwidth settings
 	 */
-	for_each_leaf_rt_rq(rt_rq, rq) {
+	for_each_rt_rq(rt_rq, iter, rq) {
 		struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 
 		raw_spin_lock(&rt_b->rt_runtime_lock);
@@ -1796,10 +1811,11 @@ extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
 
 static void print_rt_stats(struct seq_file *m, int cpu)
 {
+	rt_rq_iter_t iter;
 	struct rt_rq *rt_rq;
 
 	rcu_read_lock();
-	for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu))
+	for_each_rt_rq(rt_rq, iter, cpu_rq(cpu))
 		print_rt_rq(m, cpu, rt_rq);
 	rcu_read_unlock();
 }
-- 
1.7.1




On 2011-5-11 17:21, Peter Zijlstra wrote:
> On Wed, 2011-05-11 at 15:34 +0800, Cheng Xu wrote:
>> This patch is to fix bug report https://lkml.org/lkml/2011/4/26/13
> 
> This really doesn't tell me anything, please restate the relevant
> information.
> 
>> Function __disable_runtime() reports leakage of rt_rq runtime. The
>> root cause is __disable_runtime() assumes it iterates through all the
>> existing rt_rq's while walking rq->leaf_rt_rq_list, which actually
>> contains only runnable rt_rq's. This problem also applies to
>> __enable_runtime() and print_rt_stats().
> 
> Teach your mailer to wrap at 78 characters for changelogs.
> 
>> The patch is based on above analysis, appears to fix the problem, but is only lightly tested.
>>
>>
>> Signed-off-by: Cheng Xu <chengxu@...ux.vnet.ibm.com>
>> Tested-by: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>
>>
> 
> Don't leave whitespace between the tags and the tripple-dash. Also, I'm
> suspecting you're missing a Reported-by: paulmck tag.
> 
>> ---
>>  kernel/sched_rt.c |   31 ++++++++++++++++++++++++-------
>>  1 files changed, 24 insertions(+), 7 deletions(-)
>>
>> diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
>> index e7cebdc..7f478ff 100644
>> --- a/kernel/sched_rt.c
>> +++ b/kernel/sched_rt.c
>> @@ -183,6 +183,13 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
>>  	return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
>>  }
>>  
>> +#define rt_rq_of_rq_decls(name) struct task_group *name
>> +
>> +#define list_for_rt_rq_of_rq(iterator, rq) \
>> +	list_for_each_entry_rcu(iterator, &task_groups, list)
>> +
>> +#define rt_rq_of_rq_deref(iterator, rq) (iterator->rt_rq[cpu_of(rq)])
>> +
>>  static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
>>  {
>>  	list_add_rcu(&rt_rq->leaf_rt_rq_list,
>> @@ -288,6 +295,13 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
>>  	return ktime_to_ns(def_rt_bandwidth.rt_period);
>>  }
>>  
>> +#define rt_rq_of_rq_decls(name) struct rt_rq *name
>> +
>> +#define list_for_rt_rq_of_rq(iterator, rq) \
>> +	for (iterator = &rq->rt; iterator; iterator = NULL)
>> +
>> +#define rt_rq_of_rq_deref(iterator, rq) (iterator)
>> +
>>  static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
>>  {
>>  }
> 
> So I see why you did that, I just don't much like it.. esp the decls
> macros, C has typedef to deal with that problem, also you can get rid of
> the deref macros (now if we were allowed C99 we could avoid the whole
> iter thing and declare a for-scope variable).
> 
> How about something like:
> 
> typedef struct task_group *rt_rq_iter_t;
> 
> #define for_each_rt_rq(rt_rq, iter, rq)                                     \
> 	for (iter = list_entry_rcu(task_groups.next, typeof(*iter), list),  \
> 	     rt_rq = iter->rt_rq[cpu_of(rq)]; &iter->list != &task_groups;  \
> 	     iter = list_entry_rcu(iter->list.next, typeof(*iter), list),   \
> 	     rt_rq = iter->rt_rq[cpu_of(rq)])
> 
> 	    
> which is then used like:
> 
> 	rt_rq_iter_t iter;
> 	struct rt_rq *rt_rq;
> 
> 	for_each_rt_rq(rt_rq, iter, rq) {
> 		/* do something with rt_rq */
> 	}
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/