lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue,  6 Jan 2015 16:26:18 -0500
From:	Tejun Heo <tj@...nel.org>
To:	axboe@...nel.dk
Cc:	linux-kernel@...r.kernel.org, jack@...e.cz, hch@...radead.org,
	hannes@...xchg.org, linux-fsdevel@...r.kernel.org,
	vgoyal@...hat.com, lizefan@...wei.com, cgroups@...r.kernel.org,
	linux-mm@...ck.org, mhocko@...e.cz, clm@...com,
	fengguang.wu@...el.com, david@...morbit.com,
	Tejun Heo <tj@...nel.org>
Subject: [PATCH 41/45] writeback: make __writeback_single_inode() cgroup writeback aware

Metadata is always dirtied against the root cgroup and should thus be
written out only by the root cgroup writeback.  This patch updates
__writeback_single_inode() so that it skips writing out metadata if
the writeback is for a non-root cgroup.  wbc_skip_metadata() is added
to decide whether to skip metadata writeback.

Signed-off-by: Tejun Heo <tj@...nel.org>
Cc: Jens Axboe <axboe@...nel.dk>
Cc: Jan Kara <jack@...e.cz>
---
 fs/fs-writeback.c | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index dbfd0b0..2bb14d5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -684,6 +684,19 @@ static inline bool iwbl_still_has_dirty_pages(struct inode_wb_link *iwbl,
 	return test_bit(IWBL_DIRTY_PAGES, &iwbl->data);
 }
 
+/**
+ * wbc_skip_metadata - determine whether to skip writing out metadata
+ * @wbc: writeback_control in effect
+ *
+ * Called by __writeback_single_inode() to decide whether to skip writing
+ * out metadata.  Metadata is always dirtied against the root cgroup and
+ * should only be written out by the root.
+ */
+static inline bool wbc_skip_metadata(struct writeback_control *wbc)
+{
+	return wbc->iwbl && !iwbl_is_root(wbc->iwbl);
+}
+
 #else	/* CONFIG_CGROUP_WRITEBACK */
 
 static void init_cgwb_dirty_page_context(struct dirty_context *dctx)
@@ -791,6 +804,11 @@ static inline bool iwbl_still_has_dirty_pages(struct inode_wb_link *iwbl,
 	return mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY);
 }
 
+static inline bool wbc_skip_metadata(struct writeback_control *wbc)
+{
+	return false;
+}
+
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 
 /**
@@ -1128,6 +1146,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 	struct address_space *mapping = inode->i_mapping;
 	struct inode_wb_link *iwbl = inode_writeback_iwbl(inode, wbc);
 	long nr_to_write = wbc->nr_to_write;
+	bool skip_metadata = wbc_skip_metadata(wbc);
 	unsigned dirty;
 	int ret;
 
@@ -1144,7 +1163,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 	 * separate, external IO completion path and ->sync_fs for guaranteeing
 	 * inode metadata is written back correctly.
 	 */
-	if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) {
+	if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync && !skip_metadata) {
 		int err = filemap_fdatawait(mapping);
 		if (ret == 0)
 			ret = err;
@@ -1157,8 +1176,12 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 	 */
 	spin_lock(&inode->i_lock);
 
-	dirty = inode->i_state & I_DIRTY;
-	inode->i_state &= ~I_DIRTY;
+	if (skip_metadata)
+		dirty = inode->i_state & I_DIRTY_PAGES;
+	else
+		dirty = inode->i_state & I_DIRTY;
+
+	inode->i_state &= ~dirty;
 
 	/*
 	 * Paired with smp_mb() in __mark_inode_dirty_dctx().  This allows
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ