[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <162e6281-8828-e0bc-2b91-183b7f3a1f65@bytedance.com>
Date: Thu, 13 Apr 2023 22:49:41 +0800
From: Qi Zheng <zhengqi.arch@...edance.com>
To: Peter Zijlstra <peterz@...radead.org>
Cc: Vlastimil Babka <vbabka@...e.cz>,
"Zhang, Qiang1" <qiang1.zhang@...el.com>,
Boqun Feng <boqun.feng@...il.com>,
"42.hyeyoo@...il.com" <42.hyeyoo@...il.com>,
"akpm@...ux-foundation.org" <akpm@...ux-foundation.org>,
"roman.gushchin@...ux.dev" <roman.gushchin@...ux.dev>,
"iamjoonsoo.kim@....com" <iamjoonsoo.kim@....com>,
"rientjes@...gle.com" <rientjes@...gle.com>,
"penberg@...nel.org" <penberg@...nel.org>,
"cl@...ux.com" <cl@...ux.com>,
"linux-mm@...ck.org" <linux-mm@...ck.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
Zhao Gongyi <zhaogongyi@...edance.com>,
Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
Thomas Gleixner <tglx@...utronix.de>,
RCU <rcu@...r.kernel.org>,
"Paul E . McKenney" <paulmck@...nel.org>
Subject: Re: [PATCH] mm: slub: annotate kmem_cache_node->list_lock as
raw_spinlock
On 2023/4/13 00:44, Qi Zheng wrote:
>
>
> On 2023/4/12 20:47, Peter Zijlstra wrote:
>> On Wed, Apr 12, 2023 at 08:50:29AM +0200, Vlastimil Babka wrote:
>>
>>>> --- a/lib/debugobjects.c
>>>> +++ b/lib/debugobjects.c
>>>> @@ -562,10 +562,10 @@ __debug_object_init(void *addr, const struct
>>>> debug_obj_descr *descr, int onstack
>>>> unsigned long flags;
>>>>
>>>> /*
>>>> - * On RT enabled kernels the pool refill must happen in
>>>> preemptible
>>>> + * The pool refill must happen in preemptible
>>>> * context:
>>>> */
>>>> - if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible())
>>>> + if (preemptible())
>>>> fill_pool();
>>>
>>> +CC Peterz
>>>
>>> Aha so this is in fact another case where the code is written with
>>> actual differences between PREEMPT_RT and !PREEMPT_RT in mind, but
>>> CONFIG_PROVE_RAW_LOCK_NESTING always assumes PREEMPT_RT?
>>
>> Ooh, tricky, yes. PROVE_RAW_LOCK_NESTING always follows the PREEMP_RT
>> rules and does not expect trickery like the above.
>>
>> Something like the completely untested below might be of help..
>>
>> ---
>> diff --git a/include/linux/lockdep_types.h
>> b/include/linux/lockdep_types.h
>> index d22430840b53..f3120d6a7d9e 100644
>> --- a/include/linux/lockdep_types.h
>> +++ b/include/linux/lockdep_types.h
>> @@ -33,6 +33,7 @@ enum lockdep_wait_type {
>> enum lockdep_lock_type {
>> LD_LOCK_NORMAL = 0, /* normal, catch all */
>> LD_LOCK_PERCPU, /* percpu */
>> + LD_LOCK_WAIT, /* annotation */
>> LD_LOCK_MAX,
>> };
>> diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
>> index 50d4863974e7..a4077f5bb75b 100644
>> --- a/kernel/locking/lockdep.c
>> +++ b/kernel/locking/lockdep.c
>> @@ -2279,8 +2279,9 @@ static inline bool usage_skip(struct lock_list
>> *entry, void *mask)
>> * As a result, we will skip local_lock(), when we search for irq
>> * inversion bugs.
>> */
>> - if (entry->class->lock_type == LD_LOCK_PERCPU) {
>> - if (DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner <
>> LD_WAIT_CONFIG))
>> + if (entry->class->lock_type != LD_LOCK_NORMAL) {
>> + if (entry->class->lock_type == LD_LOCK_PERCPU &&
>> + DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner <
>> LD_WAIT_CONFIG))
>> return false;
>> return true;
>> @@ -4752,7 +4753,8 @@ static int check_wait_context(struct task_struct
>> *curr, struct held_lock *next)
>> for (; depth < curr->lockdep_depth; depth++) {
>> struct held_lock *prev = curr->held_locks + depth;
>> - u8 prev_inner = hlock_class(prev)->wait_type_inner;
>> + struct lock_class *class = hlock_class(prev);
>> + u8 prev_inner = class->wait_type_inner;
>> if (prev_inner) {
>> /*
>> @@ -4762,6 +4764,12 @@ static int check_wait_context(struct
>> task_struct *curr, struct held_lock *next)
>> * Also due to trylocks.
>> */
>> curr_inner = min(curr_inner, prev_inner);
>> +
>> + /*
>> + * Allow override for annotations.
>> + */
>> + if (unlikely(class->lock_type == LD_LOCK_WAIT))
>> + curr_inner = prev_inner;
>> }
>> }
>> diff --git a/lib/debugobjects.c b/lib/debugobjects.c
>> index df86e649d8be..fae71ef72a16 100644
>> --- a/lib/debugobjects.c
>> +++ b/lib/debugobjects.c
>> @@ -565,8 +565,16 @@ __debug_object_init(void *addr, const struct
>> debug_obj_descr *descr, int onstack
>> * On RT enabled kernels the pool refill must happen in preemptible
>> * context:
>> */
>> - if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible())
>> + if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) {
>> + static lockdep_map dep_map = {
>
> static struct lockdep_map dep_map = {
>
>> + .name = "wait-type-override",
>> + .wait_type_inner = LD_WAIT_SLEEP,
>> + .lock_type = LD_LOCK_WAIT,
>> + };
>> + lock_map_acquire(&dep_map);
>> fill_pool();
>> + lock_map_release(&dep_map);
>> + }
>> db = get_bucket((unsigned long) addr);
>
> I just tested the above code, and then got the following
> warning:
>
>
> It seems that the LD_WAIT_SLEEP we set is already greater than the
> LD_WAIT_SPIN of the current context.
>
Can we do something like below? This solves the warning I encountered.
diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
index d22430840b53..f3120d6a7d9e 100644
--- a/include/linux/lockdep_types.h
+++ b/include/linux/lockdep_types.h
@@ -33,6 +33,7 @@ enum lockdep_wait_type {
enum lockdep_lock_type {
LD_LOCK_NORMAL = 0, /* normal, catch all */
LD_LOCK_PERCPU, /* percpu */
+ LD_LOCK_WAIT, /* annotation */
LD_LOCK_MAX,
};
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index dcd1d5bfc1e0..6859dba8a3aa 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2289,8 +2289,9 @@ static inline bool usage_skip(struct lock_list
*entry, void *mask)
* As a result, we will skip local_lock(), when we search for irq
* inversion bugs.
*/
- if (entry->class->lock_type == LD_LOCK_PERCPU) {
- if (DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner <
LD_WAIT_CONFIG))
+ if (entry->class->lock_type != LD_LOCK_NORMAL) {
+ if (entry->class->lock_type == LD_LOCK_PERCPU &&
+ DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner <
LD_WAIT_CONFIG))
return false;
return true;
@@ -3981,6 +3982,9 @@ static inline int
valid_state(struct task_struct *curr, struct held_lock *this,
enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit)
{
+ if (unlikely(hlock_class(this)->lock_type == LD_LOCK_WAIT))
+ return 1;
+
if (unlikely(hlock_class(this)->usage_mask & (1 << bad_bit))) {
graph_unlock();
print_usage_bug(curr, this, bad_bit, new_bit);
@@ -4768,7 +4772,8 @@ static int check_wait_context(struct task_struct
*curr, struct held_lock *next)
for (; depth < curr->lockdep_depth; depth++) {
struct held_lock *prev = curr->held_locks + depth;
- u8 prev_inner = hlock_class(prev)->wait_type_inner;
+ struct lock_class *class = hlock_class(prev);
+ u8 prev_inner = class->wait_type_inner;
if (prev_inner) {
/*
@@ -4778,9 +4783,19 @@ static int check_wait_context(struct task_struct
*curr, struct held_lock *next)
* Also due to trylocks.
*/
curr_inner = min(curr_inner, prev_inner);
+
+ /*
+ * Allow override for annotations.
+ */
+ if (unlikely(class->lock_type == LD_LOCK_WAIT))
+ curr_inner = prev_inner;
}
}
+ if (unlikely(hlock_class(next)->lock_type == LD_LOCK_WAIT &&
+ curr_inner == LD_WAIT_SPIN))
+ curr_inner = LD_WAIT_CONFIG;
+
if (next_outer > curr_inner)
return print_lock_invalid_wait_context(curr, next);
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index df86e649d8be..a8a69991b0d0 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -565,8 +565,16 @@ __debug_object_init(void *addr, const struct
debug_obj_descr *descr, int onstack
* On RT enabled kernels the pool refill must happen in preemptible
* context:
*/
- if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible())
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) {
+ static struct lockdep_map dep_map = {
+ .name = "wait-type-override",
+ .wait_type_inner = LD_WAIT_CONFIG,
+ .lock_type = LD_LOCK_WAIT,
+ };
+ lock_map_acquire(&dep_map);
fill_pool();
+ lock_map_release(&dep_map);
+ }
db = get_bucket((unsigned long) addr);
--
Thanks,
Qi
Powered by blists - more mailing lists