[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240626152924.GA17933@redhat.com>
Date: Wed, 26 Jun 2024 17:29:24 +0200
From: Oleg Nesterov <oleg@...hat.com>
To: Andrew Morton <akpm@...ux-foundation.org>,
Michal Hocko <mhocko@...e.com>
Cc: Christian Brauner <brauner@...nel.org>,
"Eric W. Biederman" <ebiederm@...ssion.com>,
Jens Axboe <axboe@...nel.dk>,
Jinliang Zheng <alexjlzheng@...cent.com>,
Mateusz Guzik <mjguzik@...il.com>,
Matthew Wilcox <willy@...radead.org>,
Tycho Andersen <tandersen@...flix.com>,
linux-kernel@...r.kernel.org
Subject: [PATCH 1/2] memcg: mm_update_next_owner: kill the "retry" logic
Add the new helper, try_to_set_owner(), which tries to update mm->owner
once we see c->mm == mm. This way mm_update_next_owner() doesn't need to
restart the list_for_each_entry/for_each_process loops from the very
beginning if it races with exit/exec, it can just continue.
Unlike the current code, try_to_set_owner() re-checks tsk->mm == mm
before it drops tasklist_lock, so it doesn't need get/put_task_struct().
Signed-off-by: Oleg Nesterov <oleg@...hat.com>
---
kernel/exit.c | 57 ++++++++++++++++++++++++---------------------------
1 file changed, 27 insertions(+), 30 deletions(-)
diff --git a/kernel/exit.c b/kernel/exit.c
index 45210443e68d..a1ef5f23d5be 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -439,6 +439,23 @@ static void coredump_task_exit(struct task_struct *tsk)
}
#ifdef CONFIG_MEMCG
+/* drops tasklist_lock if succeeds */
+static bool try_to_set_owner(struct task_struct *tsk, struct mm_struct *mm)
+{
+ bool ret = false;
+
+ task_lock(tsk);
+ if (likely(tsk->mm == mm)) {
+ /* tsk can't pass exit_mm/exec_mmap and exit */
+ read_unlock(&tasklist_lock);
+ WRITE_ONCE(mm->owner, tsk);
+ lru_gen_migrate_mm(mm);
+ ret = true;
+ }
+ task_unlock(tsk);
+ return ret;
+}
+
/*
* A task is exiting. If it owned this mm, find a new owner for the mm.
*/
@@ -446,7 +463,6 @@ void mm_update_next_owner(struct mm_struct *mm)
{
struct task_struct *c, *g, *p = current;
-retry:
/*
* If the exiting or execing task is not the owner, it's
* someone else's problem.
@@ -468,16 +484,16 @@ void mm_update_next_owner(struct mm_struct *mm)
* Search in the children
*/
list_for_each_entry(c, &p->children, sibling) {
- if (c->mm == mm)
- goto assign_new_owner;
+ if (c->mm == mm && try_to_set_owner(c, mm))
+ goto ret;
}
/*
* Search in the siblings
*/
list_for_each_entry(c, &p->real_parent->children, sibling) {
- if (c->mm == mm)
- goto assign_new_owner;
+ if (c->mm == mm && try_to_set_owner(c, mm))
+ goto ret;
}
/*
@@ -489,9 +505,11 @@ void mm_update_next_owner(struct mm_struct *mm)
if (g->flags & PF_KTHREAD)
continue;
for_each_thread(g, c) {
- if (c->mm == mm)
- goto assign_new_owner;
- if (c->mm)
+ struct mm_struct *c_mm = READ_ONCE(c->mm);
+ if (c_mm == mm) {
+ if (try_to_set_owner(c, mm))
+ goto ret;
+ } else if (c_mm)
break;
}
}
@@ -502,30 +520,9 @@ void mm_update_next_owner(struct mm_struct *mm)
* ptrace or page migration (get_task_mm()). Mark owner as NULL.
*/
WRITE_ONCE(mm->owner, NULL);
+ ret:
return;
-assign_new_owner:
- BUG_ON(c == p);
- get_task_struct(c);
- /*
- * The task_lock protects c->mm from changing.
- * We always want mm->owner->mm == mm
- */
- task_lock(c);
- /*
- * Delay read_unlock() till we have the task_lock()
- * to ensure that c does not slip away underneath us
- */
- read_unlock(&tasklist_lock);
- if (c->mm != mm) {
- task_unlock(c);
- put_task_struct(c);
- goto retry;
- }
- WRITE_ONCE(mm->owner, c);
- lru_gen_migrate_mm(mm);
- task_unlock(c);
- put_task_struct(c);
}
#endif /* CONFIG_MEMCG */
--
2.25.1.362.g51ebf55
Powered by blists - more mailing lists