[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20210526222557.3118114-2-guro@fb.com>
Date: Wed, 26 May 2021 15:25:56 -0700
From: Roman Gushchin <guro@...com>
To: Jan Kara <jack@...e.cz>, Tejun Heo <tj@...nel.org>
CC: <linux-fsdevel@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
<linux-mm@...ck.org>, Alexander Viro <viro@...iv.linux.org.uk>,
Dennis Zhou <dennis@...nel.org>,
Dave Chinner <dchinner@...hat.com>, <cgroups@...r.kernel.org>,
Roman Gushchin <guro@...com>
Subject: [PATCH v5 1/2] writeback, cgroup: keep list of inodes attached to bdi_writeback
Currently there is no way to iterate over inodes attached to a
specific cgwb structure. It limits the ability to efficiently
reclaim the writeback structure itself and associated memory and
block cgroup structures without scanning all inodes belonging to a sb,
which can be prohibitively expensive.
While dirty/in-active-writeback an inode belongs to one of the
bdi_writeback's io lists: b_dirty, b_io, b_more_io and b_dirty_time.
Once cleaned up, it's removed from all io lists. So the
inode->i_io_list can be reused to maintain the list of inodes,
attached to a bdi_writeback structure.
This patch introduces a new wb->b_attached list, which contains all
inodes which were dirty at least once and are attached to the given
cgwb. Inodes attached to the root bdi_writeback structures are never
placed on such list. The following patch will use this list to try to
release cgwbs structures more efficiently.
Suggested-by: Jan Kara <jack@...e.cz>
Signed-off-by: Roman Gushchin <guro@...com>
---
fs/fs-writeback.c | 66 ++++++++++++++++++++++++--------
include/linux/backing-dev-defs.h | 1 +
include/linux/backing-dev.h | 7 ++++
include/linux/writeback.h | 1 +
mm/backing-dev.c | 2 +
5 files changed, 60 insertions(+), 17 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e91980f49388..631ef6366293 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -135,18 +135,23 @@ static bool inode_io_list_move_locked(struct inode *inode,
* inode_io_list_del_locked - remove an inode from its bdi_writeback IO list
* @inode: inode to be removed
* @wb: bdi_writeback @inode is being removed from
+ * @final: inode is going to be freed and can't reappear on any IO list
*
* Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and
* clear %WB_has_dirty_io if all are empty afterwards.
*/
static void inode_io_list_del_locked(struct inode *inode,
- struct bdi_writeback *wb)
+ struct bdi_writeback *wb,
+ bool final)
{
assert_spin_locked(&wb->list_lock);
assert_spin_locked(&inode->i_lock);
inode->i_state &= ~I_SYNC_QUEUED;
- list_del_init(&inode->i_io_list);
+ if (final)
+ list_del_init(&inode->i_io_list);
+ else
+ inode_cgwb_move_to_attached(inode, wb);
wb_io_lists_depopulated(wb);
}
@@ -278,6 +283,25 @@ void __inode_attach_wb(struct inode *inode, struct page *page)
}
EXPORT_SYMBOL_GPL(__inode_attach_wb);
+/**
+ * inode_cgwb_move_to_attached - put the inode onto wb->b_attached list
+ * @inode: inode of interest with i_lock held
+ * @wb: target bdi_writeback
+ *
+ * Remove the inode from wb's io lists and if necessarily put onto b_attached
+ * list. Only inodes attached to cgwb's are kept on this list.
+ */
+void inode_cgwb_move_to_attached(struct inode *inode, struct bdi_writeback *wb)
+{
+ assert_spin_locked(&wb->list_lock);
+ assert_spin_locked(&inode->i_lock);
+
+ if (wb != &wb->bdi->wb)
+ list_move(&inode->i_io_list, &wb->b_attached);
+ else
+ list_del_init(&inode->i_io_list);
+}
+
/**
* locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it
* @inode: inode of interest with i_lock held
@@ -419,21 +443,29 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
wb_get(new_wb);
/*
- * Transfer to @new_wb's IO list if necessary. The specific list
- * @inode was on is ignored and the inode is put on ->b_dirty which
- * is always correct including from ->b_dirty_time. The transfer
- * preserves @inode->dirtied_when ordering.
+ * Transfer to @new_wb's IO list if necessary. If the @inode is dirty,
+ * the specific list @inode was on is ignored and the @inode is put on
+ * ->b_dirty which is always correct including from ->b_dirty_time.
+ * The transfer preserves @inode->dirtied_when ordering. If the @inode
+ * was clean, it means it was on the b_attached list, so move it onto
+ * the b_attached list of @new_wb.
*/
if (!list_empty(&inode->i_io_list)) {
- struct inode *pos;
-
- inode_io_list_del_locked(inode, old_wb);
+ inode_io_list_del_locked(inode, old_wb, true);
inode->i_wb = new_wb;
- list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
- if (time_after_eq(inode->dirtied_when,
- pos->dirtied_when))
- break;
- inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev);
+
+ if (inode->i_state & I_DIRTY_ALL) {
+ struct inode *pos;
+
+ list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
+ if (time_after_eq(inode->dirtied_when,
+ pos->dirtied_when))
+ break;
+ inode_io_list_move_locked(inode, new_wb,
+ pos->i_io_list.prev);
+ } else {
+ inode_cgwb_move_to_attached(inode, new_wb);
+ }
} else {
inode->i_wb = new_wb;
}
@@ -1124,7 +1156,7 @@ void inode_io_list_del(struct inode *inode)
wb = inode_to_wb_and_lock_list(inode);
spin_lock(&inode->i_lock);
- inode_io_list_del_locked(inode, wb);
+ inode_io_list_del_locked(inode, wb, true);
spin_unlock(&inode->i_lock);
spin_unlock(&wb->list_lock);
}
@@ -1437,7 +1469,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
inode->i_state &= ~I_SYNC_QUEUED;
} else {
/* The inode is clean. Remove from writeback lists. */
- inode_io_list_del_locked(inode, wb);
+ inode_io_list_del_locked(inode, wb, false);
}
}
@@ -1589,7 +1621,7 @@ static int writeback_single_inode(struct inode *inode,
* responsible for the writeback lists.
*/
if (!(inode->i_state & I_DIRTY_ALL))
- inode_io_list_del_locked(inode, wb);
+ inode_io_list_del_locked(inode, wb, false);
spin_unlock(&wb->list_lock);
inode_sync_complete(inode);
out:
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index fff9367a6348..e5dc238ebe4f 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -154,6 +154,7 @@ struct bdi_writeback {
struct cgroup_subsys_state *blkcg_css; /* and blkcg */
struct list_head memcg_node; /* anchored at memcg->cgwb_list */
struct list_head blkcg_node; /* anchored at blkcg->cgwb_list */
+ struct list_head b_attached; /* attached inodes, protected by list_lock */
union {
struct work_struct release_work;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 44df4fcef65c..4256e66802e6 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -177,6 +177,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
void wb_memcg_offline(struct mem_cgroup *memcg);
void wb_blkcg_offline(struct blkcg *blkcg);
int inode_congested(struct inode *inode, int cong_bits);
+void inode_cgwb_move_to_attached(struct inode *inode, struct bdi_writeback *wb);
/**
* inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
@@ -345,6 +346,12 @@ static inline bool inode_cgwb_enabled(struct inode *inode)
return false;
}
+static inline void inode_cgwb_move_to_attached(struct inode *inode,
+ struct bdi_writeback *wb)
+{
+ list_del_init(&inode->i_io_list);
+}
+
static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
{
return &bdi->wb;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 8e5c5bb16e2d..572a13c40c90 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -212,6 +212,7 @@ static inline void wait_on_inode(struct inode *inode)
#include <linux/bio.h>
void __inode_attach_wb(struct inode *inode, struct page *page);
+void inode_cgwb_move_to_attached(struct inode *inode, struct bdi_writeback *wb);
void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
struct inode *inode)
__releases(&inode->i_lock);
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 576220acd686..54c5dc4b8c24 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -396,6 +396,7 @@ static void cgwb_release_workfn(struct work_struct *work)
fprop_local_destroy_percpu(&wb->memcg_completions);
percpu_ref_exit(&wb->refcnt);
wb_exit(wb);
+ WARN_ON_ONCE(!list_empty(&wb->b_attached));
kfree_rcu(wb, rcu);
}
@@ -472,6 +473,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
wb->memcg_css = memcg_css;
wb->blkcg_css = blkcg_css;
+ INIT_LIST_HEAD(&wb->b_attached);
INIT_WORK(&wb->release_work, cgwb_release_workfn);
set_bit(WB_registered, &wb->state);
--
2.31.1
Powered by blists - more mailing lists