[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20160114195630.GA3520@mtj.duckdns.org>
Date: Thu, 14 Jan 2016 14:56:30 -0500
From: Tejun Heo <tj@...nel.org>
To: Christian Borntraeger <borntraeger@...ibm.com>
Cc: "linux-kernel@...r.kernel.org >> Linux Kernel Mailing List"
<linux-kernel@...r.kernel.org>,
linux-s390 <linux-s390@...r.kernel.org>,
KVM list <kvm@...r.kernel.org>,
Oleg Nesterov <oleg@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
Subject: Re: regression 4.4: deadlock in with cgroup percpu_rwsem
Hello,
Thanks a lot for the report and detailed analysis. Can you please
test whether the following patch fixes the issue?
Thanks.
---
include/linux/cpuset.h | 6 ++++++
kernel/cgroup.c | 2 ++
kernel/cpuset.c | 48 +++++++++++++++++++++++++++++++++++++++++++-----
3 files changed, 51 insertions(+), 5 deletions(-)
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -137,6 +137,8 @@ static inline void set_mems_allowed(node
task_unlock(current);
}
+extern void cpuset_post_attach_flush(void);
+
#else /* !CONFIG_CPUSETS */
static inline bool cpusets_enabled(void) { return false; }
@@ -243,6 +245,10 @@ static inline bool read_mems_allowed_ret
return false;
}
+static inline void cpuset_post_attach_flush(void)
+{
+}
+
#endif /* !CONFIG_CPUSETS */
#endif /* _LINUX_CPUSET_H */
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -57,6 +57,7 @@
#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
#include <linux/kthread.h>
#include <linux/delay.h>
+#include <linux/cpuset.h>
#include <linux/atomic.h>
@@ -2739,6 +2740,7 @@ out_unlock_rcu:
out_unlock_threadgroup:
percpu_up_write(&cgroup_threadgroup_rwsem);
cgroup_kn_unlock(of->kn);
+ cpuset_post_attach_flush();
return ret ?: nbytes;
}
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -287,6 +287,8 @@ static struct cpuset top_cpuset = {
static DEFINE_MUTEX(cpuset_mutex);
static DEFINE_SPINLOCK(callback_lock);
+static struct workqueue_struct *cpuset_migrate_mm_wq;
+
/*
* CPU / memory hotplug is handled asynchronously.
*/
@@ -971,6 +973,23 @@ static int update_cpumask(struct cpuset
return 0;
}
+struct cpuset_migrate_mm_work {
+ struct work_struct work;
+ struct mm_struct *mm;
+ nodemask_t from;
+ nodemask_t to;
+};
+
+static void cpuset_migrate_mm_workfn(struct work_struct *work)
+{
+ struct cpuset_migrate_mm_work *mwork =
+ container_of(work, struct cpuset_migrate_mm_work, work);
+
+ do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL);
+ mmput(mwork->mm);
+ kfree(mwork);
+}
+
/*
* cpuset_migrate_mm
*
@@ -989,16 +1008,31 @@ static void cpuset_migrate_mm(struct mm_
const nodemask_t *to)
{
struct task_struct *tsk = current;
+ struct cpuset_migrate_mm_work *mwork;
tsk->mems_allowed = *to;
- do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
+ mwork = kzalloc(sizeof(*mwork), GFP_KERNEL);
+ if (mwork) {
+ mwork->mm = mm;
+ mwork->from = *from;
+ mwork->to = *to;
+ INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn);
+ queue_work(cpuset_migrate_mm_wq, &mwork->work);
+ } else {
+ mmput(mm);
+ }
rcu_read_lock();
guarantee_online_mems(task_cs(tsk), &tsk->mems_allowed);
rcu_read_unlock();
}
+void cpuset_post_attach_flush(void)
+{
+ flush_workqueue(cpuset_migrate_mm_wq);
+}
+
/*
* cpuset_change_task_nodemask - change task's mems_allowed and mempolicy
* @tsk: the task to change
@@ -1097,7 +1131,8 @@ static void update_tasks_nodemask(struct
mpol_rebind_mm(mm, &cs->mems_allowed);
if (migrate)
cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems);
- mmput(mm);
+ else
+ mmput(mm);
}
css_task_iter_end(&it);
@@ -1545,11 +1580,11 @@ static void cpuset_attach(struct cgroup_
* @old_mems_allowed is the right nodesets that we
* migrate mm from.
*/
- if (is_memory_migrate(cs)) {
+ if (is_memory_migrate(cs))
cpuset_migrate_mm(mm, &oldcs->old_mems_allowed,
&cpuset_attach_nodemask_to);
- }
- mmput(mm);
+ else
+ mmput(mm);
}
}
@@ -2359,6 +2394,9 @@ void __init cpuset_init_smp(void)
top_cpuset.effective_mems = node_states[N_MEMORY];
register_hotmemory_notifier(&cpuset_track_online_nodes_nb);
+
+ cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0);
+ BUG_ON(!cpuset_migrate_mm_wq);
}
/**
Powered by blists - more mailing lists