lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Thu, 1 Feb 2024 18:08:35 +0800
From: Liu Shixin <liushixin2@...wei.com>
To: Alexander Viro <viro@...iv.linux.org.uk>, Christian Brauner
	<brauner@...nel.org>, Jan Kara <jack@...e.cz>, Matthew Wilcox
	<willy@...radead.org>, Andrew Morton <akpm@...ux-foundation.org>
CC: <linux-fsdevel@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
	<linux-mm@...ck.org>, Liu Shixin <liushixin2@...wei.com>
Subject: [PATCH 2/2] mm/readahead: limit sync readahead while too many active refault

When the pagefault is not for write and the refault distance is close,
the page will be activated directly. If there are too many such pages in
a file, that means the pages may be reclaimed immediately.
In such situation, there is no positive effect to read-ahead since it will
only waste IO. So collect the number of such pages and when the number is
too large, stop bothering with read-ahead for a while until it decreased
automatically.

Define 'too large' as 10000 experientially, which can solves the problem
and does not affect by the occasional active refault.

Signed-off-by: Liu Shixin <liushixin2@...wei.com>
---
 include/linux/fs.h      |  2 ++
 include/linux/pagemap.h |  1 +
 mm/filemap.c            | 16 ++++++++++++++++
 mm/readahead.c          |  4 ++++
 4 files changed, 23 insertions(+)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index ed5966a704951..f2a1825442f5a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -960,6 +960,7 @@ struct fown_struct {
  *      the first of these pages is accessed.
  * @ra_pages: Maximum size of a readahead request, copied from the bdi.
  * @mmap_miss: How many mmap accesses missed in the page cache.
+ * @active_refault: Number of active page refault.
  * @prev_pos: The last byte in the most recent read request.
  *
  * When this structure is passed to ->readahead(), the "most recent"
@@ -971,6 +972,7 @@ struct file_ra_state {
 	unsigned int async_size;
 	unsigned int ra_pages;
 	unsigned int mmap_miss;
+	unsigned int active_refault;
 	loff_t prev_pos;
 };
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 2df35e65557d2..da9eaf985dec4 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1256,6 +1256,7 @@ struct readahead_control {
 	pgoff_t _index;
 	unsigned int _nr_pages;
 	unsigned int _batch_count;
+	unsigned int _active_refault;
 	bool _workingset;
 	unsigned long _pflags;
 };
diff --git a/mm/filemap.c b/mm/filemap.c
index 750e779c23db7..4de80592ab270 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3037,6 +3037,7 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
 
 #ifdef CONFIG_MMU
 #define MMAP_LOTSAMISS  (100)
+#define ACTIVE_REFAULT_LIMIT	(10000)
 /*
  * lock_folio_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock
  * @vmf - the vm_fault for this fault.
@@ -3142,6 +3143,18 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
 	if (mmap_miss > MMAP_LOTSAMISS)
 		return fpin;
 
+	ractl._active_refault = READ_ONCE(ra->active_refault);
+	if (ractl._active_refault)
+		WRITE_ONCE(ra->active_refault, --ractl._active_refault);
+
+	/*
+	 * If there are a lot of refault of active pages in this file,
+	 * that means the memory reclaim is ongoing. Stop bothering with
+	 * read-ahead since it will only waste IO.
+	 */
+	if (ractl._active_refault >= ACTIVE_REFAULT_LIMIT)
+		return fpin;
+
 	/*
 	 * mmap read-around
 	 */
@@ -3151,6 +3164,9 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
 	ra->async_size = ra->ra_pages / 4;
 	ractl._index = ra->start;
 	page_cache_ra_order(&ractl, ra, 0);
+
+	WRITE_ONCE(ra->active_refault, ractl._active_refault);
+
 	return fpin;
 }
 
diff --git a/mm/readahead.c b/mm/readahead.c
index cc4abb67eb223..d79bb70a232c4 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -263,6 +263,10 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
 			folio_set_readahead(folio);
 		ractl->_workingset |= folio_test_workingset(folio);
 		ractl->_nr_pages++;
+		if (unlikely(folio_test_workingset(folio)))
+			ractl->_active_refault++;
+		else if (unlikely(ractl->_active_refault))
+			ractl->_active_refault--;
 	}
 
 	/*
-- 
2.25.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ