[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220525151517.8430-3-mkoutny@suse.com>
Date: Wed, 25 May 2022 17:15:17 +0200
From: Michal Koutný <mkoutny@...e.com>
To: cgroups@...r.kernel.org, linux-kernel@...r.kernel.org
Cc: Tejun Heo <tj@...nel.org>, Zefan Li <lizefan.x@...edance.com>,
Johannes Weiner <hannes@...xchg.org>,
Bui Quang Minh <minhquangbui99@...il.com>,
Tadeusz Struk <tadeusz.struk@...aro.org>
Subject: [PATCH 2/2] cgroup: Use separate work structs on css release path
The cgroup_subsys_state of cgroup subsystems (not cgroup->self) use both
kill and release callbacks on their release path (see comment for
css_free_rwork_fn()).
When the last reference is also the base reference, we run into issues
when active work_struct (1) is re-initialized from css_release (2).
// ref=1: only base reference
kill_css()
css_get() // fuse, ref+=1 == 2
percpu_ref_kill_and_confirm
// ref -= 1 == 1: kill base references
[via rcu]
css_killed_ref_fn == refcnt.confirm_switch
queue_work(css->destroy_work) (1)
[via css->destroy_work]
css_killed_work_fn == wq.func
offline_css() // needs fuse
css_put // ref -= 1 == 0: de-fuse, was last
...
percpu_ref_put_many
css_release
queue_work(css->destroy_work) (2)
[via css->destroy_work]
css_release_work_fn == wq.func
Despite we take a fuse reference in css_killed_work_fn() it serves
for pinning the css until only after offline_css().
We could check inside css_release whether destroy_work is active
(WORK_STRUCT_PENDING_BIT) and daisy-chain css_release_work_fn from
css_release(). In order to avoid clashes with various stages of the work
item processing, we just spend some space in css (my config's css grows
to 232B + 32B) and create a separate work entry for each user.
Reported-by: syzbot+e42ae441c3b10acf9e9d@...kaller.appspotmail.com
Reported-by: Tadeusz Struk <tadeusz.struk@...aro.org>
Link: https://lore.kernel.org/r/20220412192459.227740-1-tadeusz.struk@linaro.org/
Signed-off-by: Tadeusz Struk <tadeusz.struk@...aro.org>
Signed-off-by: Michal Koutný <mkoutny@...e.com>
---
include/linux/cgroup-defs.h | 5 +++--
kernel/cgroup/cgroup.c | 14 +++++++-------
2 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 1bfcfb1af352..16b99aa04305 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -178,8 +178,9 @@ struct cgroup_subsys_state {
*/
atomic_t online_cnt;
- /* percpu_ref killing and RCU release */
- struct work_struct destroy_work;
+ /* percpu_ref killing, css release, and RCU release work structs */
+ struct work_struct killed_ref_work;
+ struct work_struct release_work;
struct rcu_work destroy_rwork;
/*
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index a5b0d5d54fbc..33b3a44391d7 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5102,7 +5102,7 @@ static struct cftype cgroup_base_files[] = {
* css_free_work_fn().
*
* It is actually hairier because both step 2 and 4 require process context
- * and thus involve punting to css->destroy_work adding two additional
+ * and thus involve punting to css->release_work adding two additional
* steps to the already complex sequence.
*/
static void css_free_rwork_fn(struct work_struct *work)
@@ -5157,7 +5157,7 @@ static void css_free_rwork_fn(struct work_struct *work)
static void css_release_work_fn(struct work_struct *work)
{
struct cgroup_subsys_state *css =
- container_of(work, struct cgroup_subsys_state, destroy_work);
+ container_of(work, struct cgroup_subsys_state, release_work);
struct cgroup_subsys *ss = css->ss;
struct cgroup *cgrp = css->cgroup;
@@ -5213,8 +5213,8 @@ static void css_release(struct percpu_ref *ref)
struct cgroup_subsys_state *css =
container_of(ref, struct cgroup_subsys_state, refcnt);
- INIT_WORK(&css->destroy_work, css_release_work_fn);
- queue_work(cgroup_destroy_wq, &css->destroy_work);
+ INIT_WORK(&css->release_work, css_release_work_fn);
+ queue_work(cgroup_destroy_wq, &css->release_work);
}
static void init_and_link_css(struct cgroup_subsys_state *css,
@@ -5549,7 +5549,7 @@ int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
static void css_killed_work_fn(struct work_struct *work)
{
struct cgroup_subsys_state *css =
- container_of(work, struct cgroup_subsys_state, destroy_work);
+ container_of(work, struct cgroup_subsys_state, killed_ref_work);
mutex_lock(&cgroup_mutex);
@@ -5570,8 +5570,8 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
container_of(ref, struct cgroup_subsys_state, refcnt);
if (atomic_dec_and_test(&css->online_cnt)) {
- INIT_WORK(&css->destroy_work, css_killed_work_fn);
- queue_work(cgroup_destroy_wq, &css->destroy_work);
+ INIT_WORK(&css->killed_ref_work, css_killed_work_fn);
+ queue_work(cgroup_destroy_wq, &css->killed_ref_work);
}
}
--
2.35.3
Powered by blists - more mailing lists