lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 26 Jan 2017 14:58:17 +0300
From:   "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
To:     "Theodore Ts'o" <tytso@....edu>,
        Andreas Dilger <adilger.kernel@...ger.ca>,
        Jan Kara <jack@...e.com>,
        Andrew Morton <akpm@...ux-foundation.org>
Cc:     Alexander Viro <viro@...iv.linux.org.uk>,
        Hugh Dickins <hughd@...gle.com>,
        Andrea Arcangeli <aarcange@...hat.com>,
        Dave Hansen <dave.hansen@...el.com>,
        Vlastimil Babka <vbabka@...e.cz>,
        Matthew Wilcox <willy@...radead.org>,
        Ross Zwisler <ross.zwisler@...ux.intel.com>,
        linux-ext4@...r.kernel.org, linux-fsdevel@...r.kernel.org,
        linux-kernel@...r.kernel.org, linux-mm@...ck.org,
        linux-block@...r.kernel.org,
        "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
Subject: [PATCHv6 35/37] ext4: reserve larger jounral transaction for huge pages

If huge pages enabled, in worst case with 2048 blocks underlying a page,
each possibly in a different block group we have much more metadata to
commit.

Let's update estimates accordingly.

I was not able to trigger bad situation without the patch as it's hard to
construct very fragmented filesystem, but hopefully this change would be
enough to address the concern.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@...ux.intel.com>
---
 fs/ext4/ext4_jbd2.h | 16 +++++++++++++---
 fs/ext4/inode.c     | 34 +++++++++++++++++++++++++++-------
 2 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index f97611171023..6e4e534d6e98 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -353,11 +353,21 @@ static inline int ext4_journal_restart(handle_t *handle, int nblocks)
 	return 0;
 }
 
+static inline int __ext4_journal_blocks_per_page(struct inode *inode, bool thp)
+{
+	int bpp = 0;
+	if (EXT4_JOURNAL(inode) != NULL) {
+		bpp = jbd2_journal_blocks_per_page(inode);
+		if (thp)
+			bpp <<= HPAGE_PMD_ORDER;
+	}
+	return bpp;
+}
+
 static inline int ext4_journal_blocks_per_page(struct inode *inode)
 {
-	if (EXT4_JOURNAL(inode) != NULL)
-		return jbd2_journal_blocks_per_page(inode);
-	return 0;
+	return __ext4_journal_blocks_per_page(inode,
+			(inode->i_flags & S_HUGE_MODE) != S_HUGE_NEVER);
 }
 
 static inline int ext4_journal_force_commit(journal_t *journal)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5bf68bbe65ec..c30562b6e685 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -141,6 +141,7 @@ static int __ext4_journalled_writepage(struct page *page, unsigned int len);
 static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
 static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
 				  int pextents);
+static int __ext4_writepage_trans_blocks(struct inode *inode, int bpp);
 
 /*
  * Test whether an inode is a fast symlink.
@@ -4496,6 +4497,21 @@ void ext4_set_inode_flags(struct inode *inode)
 	    !ext4_should_journal_data(inode) && !ext4_has_inline_data(inode) &&
 	    !ext4_encrypted_inode(inode))
 		new_fl |= S_DAX;
+
+	if ((new_fl & S_HUGE_MODE) != S_HUGE_NEVER &&
+			EXT4_JOURNAL(inode) != NULL) {
+		int bpp = __ext4_journal_blocks_per_page(inode, true);
+		int credits = __ext4_writepage_trans_blocks(inode, bpp);
+
+		if (EXT4_JOURNAL(inode)->j_max_transaction_buffers < credits) {
+			pr_warn_once("EXT4-fs (%s): "
+					"journal is too small for huge pages. "
+					"Disable huge pages support.\n",
+					inode->i_sb->s_id);
+			new_fl &= ~S_HUGE_MODE;
+		}
+	}
+
 	inode_set_flags(inode, new_fl,
 			S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
 }
@@ -5471,6 +5487,16 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
 	return ret;
 }
 
+static int __ext4_writepage_trans_blocks(struct inode *inode, int bpp)
+{
+	int ret = ext4_meta_trans_blocks(inode, bpp, bpp);
+
+	/* Account for data blocks for journalled mode */
+	if (ext4_should_journal_data(inode))
+		ret += bpp;
+	return ret;
+}
+
 /*
  * Calculate the total number of credits to reserve to fit
  * the modification of a single pages into a single transaction,
@@ -5484,14 +5510,8 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
 int ext4_writepage_trans_blocks(struct inode *inode)
 {
 	int bpp = ext4_journal_blocks_per_page(inode);
-	int ret;
-
-	ret = ext4_meta_trans_blocks(inode, bpp, bpp);
 
-	/* Account for data blocks for journalled mode */
-	if (ext4_should_journal_data(inode))
-		ret += bpp;
-	return ret;
+	return __ext4_writepage_trans_blocks(inode, bpp);
 }
 
 /*
-- 
2.11.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ