lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190624222951.37076-7-songliubraving@fb.com>
Date:   Mon, 24 Jun 2019 15:29:51 -0700
From:   Song Liu <songliubraving@...com>
To:     <linux-mm@...ck.org>, <linux-fsdevel@...r.kernel.org>,
        <linux-kernel@...r.kernel.org>
CC:     <matthew.wilcox@...cle.com>, <kirill.shutemov@...ux.intel.com>,
        <kernel-team@...com>, <william.kucharski@...cle.com>,
        <akpm@...ux-foundation.org>, <hdanton@...a.com>,
        Song Liu <songliubraving@...com>
Subject: [PATCH v8 6/6] mm,thp: avoid writes to file with THP in pagecache

In previous patch, an application could put part of its text section in
THP via madvise(). These THPs will be protected from writes when the
application is still running (TXTBSY). However, after the application
exits, the file is available for writes.

This patch avoids writes to file THP by dropping page cache for the file
when the file is open for write. A new counter nr_thps is added to struct
address_space. In do_last(), if the file is open for write and nr_thps
is non-zero, we drop page cache for the whole file.

Reported-by: kbuild test robot <lkp@...el.com>
Signed-off-by: Song Liu <songliubraving@...com>
---
 fs/inode.c         |  2 ++
 fs/namei.c         | 23 ++++++++++++++++++++++-
 include/linux/fs.h | 28 ++++++++++++++++++++++++++++
 mm/filemap.c       |  1 +
 mm/khugepaged.c    |  4 +++-
 5 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index df6542ec3b88..7f27a5fd147b 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -181,6 +181,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	mapping->flags = 0;
 	mapping->wb_err = 0;
 	atomic_set(&mapping->i_mmap_writable, 0);
+	if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS))
+		atomic_set(&mapping->nr_thps, 0);
 	mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
 	mapping->private_data = NULL;
 	mapping->writeback_index = 0;
diff --git a/fs/namei.c b/fs/namei.c
index 20831c2fbb34..3d95e94029cc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3249,6 +3249,23 @@ static int lookup_open(struct nameidata *nd, struct path *path,
 	return error;
 }
 
+/*
+ * The file is open for write, so it is not mmapped with VM_DENYWRITE. If
+ * it still has THP in page cache, drop the whole file from pagecache
+ * before processing writes. This helps us avoid handling write back of
+ * THP for now.
+ */
+static inline void release_file_thp(struct file *file)
+{
+	if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS)) {
+		struct inode *inode = file_inode(file);
+
+		if (inode_is_open_for_write(inode) &&
+		    filemap_nr_thps(inode->i_mapping))
+			truncate_pagecache(inode, 0);
+	}
+}
+
 /*
  * Handle the last step of open()
  */
@@ -3418,7 +3435,11 @@ static int do_last(struct nameidata *nd,
 		goto out;
 opened:
 	error = ima_file_check(file, op->acc_mode);
-	if (!error && will_truncate)
+	if (error)
+		goto out;
+
+	release_file_thp(file);
+	if (will_truncate)
 		error = handle_truncate(file);
 out:
 	if (unlikely(error > 0)) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f7fdfe93e25d..20443d63692e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -427,6 +427,7 @@ int pagecache_write_end(struct file *, struct address_space *mapping,
  * @i_pages: Cached pages.
  * @gfp_mask: Memory allocation flags to use for allocating pages.
  * @i_mmap_writable: Number of VM_SHARED mappings.
+ * @nr_thps: Number of THPs in the pagecache (non-shmem only).
  * @i_mmap: Tree of private and shared mappings.
  * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
  * @nrpages: Number of page entries, protected by the i_pages lock.
@@ -444,6 +445,10 @@ struct address_space {
 	struct xarray		i_pages;
 	gfp_t			gfp_mask;
 	atomic_t		i_mmap_writable;
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+	/* number of thp, only for non-shmem files */
+	atomic_t		nr_thps;
+#endif
 	struct rb_root_cached	i_mmap;
 	struct rw_semaphore	i_mmap_rwsem;
 	unsigned long		nrpages;
@@ -2790,6 +2795,29 @@ static inline errseq_t filemap_sample_wb_err(struct address_space *mapping)
 	return errseq_sample(&mapping->wb_err);
 }
 
+static inline int filemap_nr_thps(struct address_space *mapping)
+{
+	if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS))
+		return atomic_read(&mapping->nr_thps);
+	return 0;
+}
+
+static inline void filemap_nr_thps_inc(struct address_space *mapping)
+{
+	if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS))
+		atomic_inc(&mapping->nr_thps);
+	else
+		WARN_ON_ONCE(1);
+}
+
+static inline void filemap_nr_thps_dec(struct address_space *mapping)
+{
+	if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS))
+		atomic_dec(&mapping->nr_thps);
+	else
+		WARN_ON_ONCE(1);
+}
+
 extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
 			   int datasync);
 extern int vfs_fsync(struct file *file, int datasync);
diff --git a/mm/filemap.c b/mm/filemap.c
index e79ceccdc6df..a8e86c136381 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -205,6 +205,7 @@ static void unaccount_page_cache_page(struct address_space *mapping,
 			__dec_node_page_state(page, NR_SHMEM_THPS);
 	} else if (PageTransHuge(page)) {
 		__dec_node_page_state(page, NR_FILE_THPS);
+		filemap_nr_thps_dec(mapping);
 	}
 
 	/*
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index acbbbeaa083c..0bbc6be51197 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1503,8 +1503,10 @@ static void collapse_file(struct mm_struct *mm,
 
 	if (is_shmem)
 		__inc_node_page_state(new_page, NR_SHMEM_THPS);
-	else
+	else {
 		__inc_node_page_state(new_page, NR_FILE_THPS);
+		filemap_nr_thps_inc(mapping);
+	}
 
 	if (nr_none) {
 		struct zone *zone = page_zone(new_page);
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ