[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250513100730.12664-33-byungchul@sk.com>
Date: Tue, 13 May 2025 19:07:19 +0900
From: Byungchul Park <byungchul@...com>
To: linux-kernel@...r.kernel.org
Cc: kernel_team@...ynix.com,
torvalds@...ux-foundation.org,
damien.lemoal@...nsource.wdc.com,
linux-ide@...r.kernel.org,
adilger.kernel@...ger.ca,
linux-ext4@...r.kernel.org,
mingo@...hat.com,
peterz@...radead.org,
will@...nel.org,
tglx@...utronix.de,
rostedt@...dmis.org,
joel@...lfernandes.org,
sashal@...nel.org,
daniel.vetter@...ll.ch,
duyuyang@...il.com,
johannes.berg@...el.com,
tj@...nel.org,
tytso@....edu,
willy@...radead.org,
david@...morbit.com,
amir73il@...il.com,
gregkh@...uxfoundation.org,
kernel-team@....com,
linux-mm@...ck.org,
akpm@...ux-foundation.org,
mhocko@...nel.org,
minchan@...nel.org,
hannes@...xchg.org,
vdavydov.dev@...il.com,
sj@...nel.org,
jglisse@...hat.com,
dennis@...nel.org,
cl@...ux.com,
penberg@...nel.org,
rientjes@...gle.com,
vbabka@...e.cz,
ngupta@...are.org,
linux-block@...r.kernel.org,
josef@...icpanda.com,
linux-fsdevel@...r.kernel.org,
jack@...e.cz,
jlayton@...nel.org,
dan.j.williams@...el.com,
hch@...radead.org,
djwong@...nel.org,
dri-devel@...ts.freedesktop.org,
rodrigosiqueiramelo@...il.com,
melissa.srw@...il.com,
hamohammed.sa@...il.com,
harry.yoo@...cle.com,
chris.p.wilson@...el.com,
gwan-gyeong.mun@...el.com,
max.byungchul.park@...il.com,
boqun.feng@...il.com,
longman@...hat.com,
yskelg@...il.com,
yunseong.kim@...csson.com,
yeoreum.yun@....com,
netdev@...r.kernel.org,
matthew.brost@...el.com,
her0gyugyu@...il.com
Subject: [PATCH v15 32/43] dept: assign dept map to mmu notifier invalidation synchronization
Resolved the following false positive by introducing explicit dept map
and annotations for dealing with this case:
*** DEADLOCK ***
context A
[S] (unknown)(<sched>:0)
[W] lock(&mm->mmap_lock:0)
[E] try_to_wake_up(<sched>:0)
context B
[S] lock(&mm->mmap_lock:0)
[W] mmu_interval_read_begin(<sched>:0)
[E] unlock(&mm->mmap_lock:0)
[S]: start of the event context
[W]: the wait blocked
[E]: the event not reachable
dept already tracks dependencies between scheduler sleep and ttwu based
on internal timestamp called wgen. However, in case that more than one
event contexts are overwrapped, dept has chance to wrongly guess the
start of the event context like the following:
<before this patch>
context A: lock L
context A: mmu_notifier_invalidate_range_start()
context B: lock L'
context B: mmu_interval_read_begin() : wait
<- here is the start of the event context of C.
context B: unlock L'
context C: lock L''
context C: mmu_notifier_invalidate_range_start()
context A: mmu_notifier_invalidate_range_end()
context A: unlock L
context C: mmu_notifier_invalidate_range_end() : ttwu
<- here is the end of the event context of C. dept observes a wait,
lock L'' within the event context of C. Which causes a false
positive dept report.
context C: unlock L''
By explicitly annotating the interesting event context range, make dept
work with more precise information like:
<after this patch>
context A: lock L
context A: mmu_notifier_invalidate_range_start()
context B: lock L'
context B: mmu_interval_read_begin() : wait
context B: unlock L'
context C: lock L''
context C: mmu_notifier_invalidate_range_start()
<- here is the start of the event context of C.
context A: mmu_notifier_invalidate_range_end()
context A: unlock L
context C: mmu_notifier_invalidate_range_end() : ttwu
<- here is the end of the event context of C. dept doesn't observe
the wait, lock L'' within the event context of C. context C is
responsible only for the range delimited by
mmu_notifier_invalidate_range_{start,end}().
context C: unlock L''
Signed-off-by: Byungchul Park <byungchul@...com>
---
include/linux/mmu_notifier.h | 26 ++++++++++++++++++++++++++
mm/mmu_notifier.c | 31 +++++++++++++++++++++++++++++--
2 files changed, 55 insertions(+), 2 deletions(-)
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index bc2402a45741..1e256f5305b7 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -428,6 +428,14 @@ static inline int mmu_notifier_test_young(struct mm_struct *mm,
return 0;
}
+#ifdef CONFIG_DEPT
+void mmu_notifier_invalidate_dept_ecxt_start(struct mmu_notifier_range *range);
+void mmu_notifier_invalidate_dept_ecxt_end(struct mmu_notifier_range *range);
+#else
+static inline void mmu_notifier_invalidate_dept_ecxt_start(struct mmu_notifier_range *range) {}
+static inline void mmu_notifier_invalidate_dept_ecxt_end(struct mmu_notifier_range *range) {}
+#endif
+
static inline void
mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
{
@@ -439,6 +447,12 @@ mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
__mmu_notifier_invalidate_range_start(range);
}
lock_map_release(&__mmu_notifier_invalidate_range_start_map);
+
+ /*
+ * From now on, waiters could be there by this start until
+ * mmu_notifier_invalidate_range_end().
+ */
+ mmu_notifier_invalidate_dept_ecxt_start(range);
}
/*
@@ -459,6 +473,12 @@ mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range)
ret = __mmu_notifier_invalidate_range_start(range);
}
lock_map_release(&__mmu_notifier_invalidate_range_start_map);
+
+ /*
+ * From now on, waiters could be there by this start until
+ * mmu_notifier_invalidate_range_end().
+ */
+ mmu_notifier_invalidate_dept_ecxt_start(range);
return ret;
}
@@ -470,6 +490,12 @@ mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range)
if (mm_has_notifiers(range->mm))
__mmu_notifier_invalidate_range_end(range);
+
+ /*
+ * The event context that has been started by
+ * mmu_notifier_invalidate_range_start() ends.
+ */
+ mmu_notifier_invalidate_dept_ecxt_end(range);
}
static inline void mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm,
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index fc18fe274505..850d75952f98 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -46,6 +46,7 @@ struct mmu_notifier_subscriptions {
unsigned long active_invalidate_ranges;
struct rb_root_cached itree;
wait_queue_head_t wq;
+ struct dept_map dmap;
struct hlist_head deferred_list;
};
@@ -165,6 +166,25 @@ static void mn_itree_inv_end(struct mmu_notifier_subscriptions *subscriptions)
wake_up_all(&subscriptions->wq);
}
+#ifdef CONFIG_DEPT
+void mmu_notifier_invalidate_dept_ecxt_start(struct mmu_notifier_range *range)
+{
+ struct mmu_notifier_subscriptions *subscriptions =
+ range->mm->notifier_subscriptions;
+
+ if (subscriptions)
+ sdt_ecxt_enter(&subscriptions->dmap);
+}
+void mmu_notifier_invalidate_dept_ecxt_end(struct mmu_notifier_range *range)
+{
+ struct mmu_notifier_subscriptions *subscriptions =
+ range->mm->notifier_subscriptions;
+
+ if (subscriptions)
+ sdt_ecxt_exit(&subscriptions->dmap);
+}
+#endif
+
/**
* mmu_interval_read_begin - Begin a read side critical section against a VA
* range
@@ -246,9 +266,12 @@ mmu_interval_read_begin(struct mmu_interval_notifier *interval_sub)
*/
lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
lock_map_release(&__mmu_notifier_invalidate_range_start_map);
- if (is_invalidating)
+ if (is_invalidating) {
+ sdt_might_sleep_start(&subscriptions->dmap);
wait_event(subscriptions->wq,
READ_ONCE(subscriptions->invalidate_seq) != seq);
+ sdt_might_sleep_end();
+ }
/*
* Notice that mmu_interval_read_retry() can already be true at this
@@ -625,6 +648,7 @@ int __mmu_notifier_register(struct mmu_notifier *subscription,
INIT_HLIST_HEAD(&subscriptions->list);
spin_lock_init(&subscriptions->lock);
+ sdt_map_init(&subscriptions->dmap);
subscriptions->invalidate_seq = 2;
subscriptions->itree = RB_ROOT_CACHED;
init_waitqueue_head(&subscriptions->wq);
@@ -1070,9 +1094,12 @@ void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub)
*/
lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
lock_map_release(&__mmu_notifier_invalidate_range_start_map);
- if (seq)
+ if (seq) {
+ sdt_might_sleep_start(&subscriptions->dmap);
wait_event(subscriptions->wq,
mmu_interval_seq_released(subscriptions, seq));
+ sdt_might_sleep_end();
+ }
/* pairs with mmgrab in mmu_interval_notifier_insert() */
mmdrop(mm);
--
2.17.1
Powered by blists - more mailing lists