linux-kernel - Re: [External] Re: [PATCH] sched/core: Avoid obvious double update_rq

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <c1e7fc17-b091-1da1-7fa8-0490cc7f7e4b@bytedance.com>
Date:   Wed, 20 Apr 2022 16:29:24 +0800
From:   Hao Jia <jiahao.os@...edance.com>
To:     Peter Zijlstra <peterz@...radead.org>
Cc:     mingo@...hat.com, juri.lelli@...hat.com,
        vincent.guittot@...aro.org, dietmar.eggemann@....com,
        rostedt@...dmis.org, bsegall@...gle.com, mgorman@...e.de,
        bristot@...hat.com, linux-kernel@...r.kernel.org
Subject: Re: [External] Re: [PATCH] sched/core: Avoid obvious double
 update_rq_clock warning



On 4/19/22 6:48 PM, Peter Zijlstra wrote:
> On Mon, Apr 18, 2022 at 05:09:29PM +0800, Hao Jia wrote:
>> When we use raw_spin_rq_lock to acquire the rq lock and have to
>> update the rq clock while holding the lock, the kernel may issue
>> a WARN_DOUBLE_CLOCK warning.
>>
>> Since we directly use raw_spin_rq_lock to acquire rq lock instead of
>> rq_lock, there is no corresponding change to rq->clock_update_flags.
>> In particular, we have obtained the rq lock of other cores,
>> the core rq->clock_update_flags may be RQCF_UPDATED at this time, and
>> then calling update_rq_clock will trigger the WARN_DOUBLE_CLOCK warning.
> 
>> Signed-off-by: Hao Jia <jiahao.os@...edance.com>
>> ---
>>   kernel/sched/deadline.c | 18 +++++++++++-------
>>   kernel/sched/rt.c       | 20 ++++++++++++++++++--
> 
> Very good for keeping them in sync.
> 
>>   2 files changed, 29 insertions(+), 9 deletions(-)
>>
>> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
>> index fb4255ae0b2c..9207b978cc43 100644
>> --- a/kernel/sched/deadline.c
>> +++ b/kernel/sched/deadline.c
> 
>> @@ -2317,16 +2318,14 @@ static int push_dl_task(struct rq *rq)
>>   		goto retry;
>>   	}
>>   
>> +	rq_pin_lock(rq, &srf);
>> +	rq_pin_lock(later_rq, &drf);
>>   	deactivate_task(rq, next_task, 0);
>>   	set_task_cpu(next_task, later_rq->cpu);
>> -
>> -	/*
>> -	 * Update the later_rq clock here, because the clock is used
>> -	 * by the cpufreq_update_util() inside __add_running_bw().
>> -	 */
>> -	update_rq_clock(later_rq);
>> -	activate_task(later_rq, next_task, ENQUEUE_NOCLOCK);
>> +	activate_task(later_rq, next_task, 0);
>>   	ret = 1;
>> +	rq_unpin_lock(rq, &srf);
>> +	rq_unpin_lock(later_rq, &drf);
>>   
>>   	resched_curr(later_rq);
>>   
> 
>> @@ -2413,11 +2413,15 @@ static void pull_dl_task(struct rq *this_rq)
>>   			if (is_migration_disabled(p)) {
>>   				push_task = get_push_task(src_rq);
>>   			} else {
>> +				rq_pin_lock(this_rq, &this_rf);
>> +				rq_pin_lock(src_rq, &src_rf);
>>   				deactivate_task(src_rq, p, 0);
>>   				set_task_cpu(p, this_cpu);
>>   				activate_task(this_rq, p, 0);
>>   				dmin = p->dl.deadline;
>>   				resched = true;
>> +				rq_unpin_lock(this_rq, &this_rf);
>> +				rq_unpin_lock(src_rq, &src_rf);
>>   			}
>>   
>>   			/* Is there any other task even earlier? */
> 
> I'm really not sure about this part though. This is a bit of a mess. The
> balancer doesn't really need the pinning stuff. I realize you did that
> because we got the clock annotation mixed up with that, but urgh.
> 
> Basically we want double_rq_lock() / double_lock_balance() to clear
> RQCF_UPDATED, right? Perhaps do that directly?
> 
> (maybe with an inline helper and a wee comment?)
> 
> The only immediate problem with this would appear to be that
> _double_rq_lock() behaves differently when it returns 0. Not sure that
> matters.
> 
> Hmm?

Thanks for your review comments.
As you have prompted, the WARN_DOUBLE_CLOCK warning is still triggered 
when _double_rq_lock() returns 0.
Please review the solution below, and based on your review, I will 
submit the v2 patch as soon as possible.
Thanks.


diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 51efaabac3e4..b73fe46cd6c7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -610,10 +610,13 @@ void double_rq_lock(struct rq *rq1, struct rq *rq2)
         swap(rq1, rq2);

     raw_spin_rq_lock(rq1);
-   if (__rq_lockp(rq1) == __rq_lockp(rq2))
-       return;
+   if (__rq_lockp(rq1) != __rq_lockp(rq2))
+       raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);

-   raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
+#ifdef CONFIG_SCHED_DEBUG
+   rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+   rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+#endif
  }
  #endif

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 8dccb34eb190..9fe506a6b7b4 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2544,20 +2544,25 @@ static inline int _double_lock_balance(struct rq 
*this_rq, struct rq *busiest)
     __acquires(this_rq->lock)
  {
     if (__rq_lockp(this_rq) == __rq_lockp(busiest))
-       return 0;
+       goto out;

     if (likely(raw_spin_rq_trylock(busiest)))
-       return 0;
+       goto out;

     if (rq_order_less(this_rq, busiest)) {
         raw_spin_rq_lock_nested(busiest, SINGLE_DEPTH_NESTING);
-       return 0;
+       goto out;
+   } else {
+       raw_spin_rq_unlock(this_rq);
+       double_rq_lock(this_rq, busiest);
+       return 1;
     }
-
-   raw_spin_rq_unlock(this_rq);
-   double_rq_lock(this_rq, busiest);
-
-   return 1;
+out:
+#ifdef CONFIG_SCHED_DEBUG
+   this_rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+   busiest->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+#endif
+   return 0;
  }

  #endif /* CONFIG_PREEMPTION */
@@ -2644,6 +2649,9 @@ static inline void double_rq_lock(struct rq *rq1, 
struct rq *rq2)
     BUG_ON(rq1 != rq2);
     raw_spin_rq_lock(rq1);
     __acquire(rq2->lock);   /* Fake it out ;) */
+#ifdef CONFIG_SCHED_DEBUG
+   rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+#endif
  }
> 
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index f259621f4c93..be4baec84430 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -610,10 +610,13 @@ void double_rq_lock(struct rq *rq1, struct rq *rq2)
>   		swap(rq1, rq2);
>   
>   	raw_spin_rq_lock(rq1);
> -	if (__rq_lockp(rq1) == __rq_lockp(rq2))
> -		return;
> +	if (__rq_lockp(rq1) != __rq_lockp(rq2))
> +		raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
>   
> -	raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
> +#ifdef CONFIG_SCHED_DEBUG
> +	rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
> +	rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
> +#endif
>   }
>   #endif
>   
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 8dccb34eb190..3ca8dd5ca17c 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -2644,6 +2644,10 @@ static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
>   	BUG_ON(rq1 != rq2);
>   	raw_spin_rq_lock(rq1);
>   	__acquire(rq2->lock);	/* Fake it out ;) */
> +#ifdef CONFIG_SCHED_DEBUG
> +	rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
> +	rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
> +#endif
>   }
>   
>   /*