lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240326185032.72159-2-ryncsn@gmail.com>
Date: Wed, 27 Mar 2024 02:50:23 +0800
From: Kairui Song <ryncsn@...il.com>
To: linux-mm@...ck.org
Cc: "Huang, Ying" <ying.huang@...el.com>,
	Chris Li <chrisl@...nel.org>,
	Minchan Kim <minchan@...nel.org>,
	Barry Song <v-songbaohua@...o.com>,
	Ryan Roberts <ryan.roberts@....com>,
	Yu Zhao <yuzhao@...gle.com>,
	SeongJae Park <sj@...nel.org>,
	David Hildenbrand <david@...hat.com>,
	Yosry Ahmed <yosryahmed@...gle.com>,
	Johannes Weiner <hannes@...xchg.org>,
	Matthew Wilcox <willy@...radead.org>,
	Nhat Pham <nphamcs@...il.com>,
	Chengming Zhou <zhouchengming@...edance.com>,
	Andrew Morton <akpm@...ux-foundation.org>,
	linux-kernel@...r.kernel.org,
	Kairui Song <kasong@...cent.com>
Subject: [RFC PATCH 01/10] mm/filemap: split filemap storing logic into a standalone helper

From: Kairui Song <kasong@...cent.com>

Swapcache can reuse this part for multi index support, no change of
performance from page cache side except noise:

Test in 8G memory cgroup and 16G brd ramdisk.

  echo 3 > /proc/sys/vm/drop_caches

  fio -name=cached --numjobs=16 --filename=/mnt/test.img \
    --buffered=1 --ioengine=mmap --rw=randread --time_based \
    --ramp_time=30s --runtime=5m --group_reporting

Before:
bw (  MiB/s): min=  493, max= 3947, per=100.00%, avg=2625.56, stdev=25.74, samples=8651
iops        : min=126454, max=1010681, avg=672142.61, stdev=6590.48, samples=8651

After:
bw (  MiB/s): min=  298, max= 3840, per=100.00%, avg=2614.34, stdev=23.77, samples=8689
iops        : min=76464, max=983045, avg=669270.35, stdev=6084.31, samples=8689

Test result with THP (do a THP randread then switch to 4K page in hope it
issues a lot of splitting):

  echo 3 > /proc/sys/vm/drop_caches

  fio -name=cached --numjobs=16 --filename=/mnt/test.img \
      --buffered=1 --ioengine=mmap -thp=1 --readonly \
      --rw=randread --time_based --ramp_time=30s --runtime=10m \
      --group_reporting

  fio -name=cached --numjobs=16 --filename=/mnt/test.img \
      --buffered=1 --ioengine=mmap \
      --rw=randread --time_based --runtime=5s --group_reporting

Before:
bw (  KiB/s): min= 4611, max=15370, per=100.00%, avg=8928.74, stdev=105.17, samples=19146
iops        : min= 1151, max= 3842, avg=2231.27, stdev=26.29, samples=19146

READ: bw=4635B/s (4635B/s), 4635B/s-4635B/s (4635B/s-4635B/s), io=64.0KiB (65.5kB), run=14137-14137msec

After:
bw (  KiB/s): min= 4691, max=15666, per=100.00%, avg=8890.30, stdev=104.53, samples=19056
iops        : min= 1167, max= 3913, avg=2218.68, stdev=26.15, samples=19056

READ: bw=4590B/s (4590B/s), 4590B/s-4590B/s (4590B/s-4590B/s), io=64.0KiB (65.5kB), run=14275-14275msec

Signed-off-by: Kairui Song <kasong@...cent.com>
---
 mm/filemap.c | 124 +++++++++++++++++++++++++++------------------------
 1 file changed, 65 insertions(+), 59 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 90b86f22a9df..0ccdc9e92764 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -848,38 +848,23 @@ void replace_page_cache_folio(struct folio *old, struct folio *new)
 }
 EXPORT_SYMBOL_GPL(replace_page_cache_folio);
 
-noinline int __filemap_add_folio(struct address_space *mapping,
-		struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp)
+static int __filemap_lock_store(struct xa_state *xas, struct folio *folio,
+				  pgoff_t index, gfp_t gfp, void **shadowp)
 {
-	XA_STATE(xas, &mapping->i_pages, index);
-	void *alloced_shadow = NULL;
-	int alloced_order = 0;
-	bool huge;
-	long nr;
-
-	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
-	VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
-	mapping_set_update(&xas, mapping);
-
-	VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
-	xas_set_order(&xas, index, folio_order(folio));
-	huge = folio_test_hugetlb(folio);
-	nr = folio_nr_pages(folio);
-
+	void *entry, *old, *alloced_shadow = NULL;
+	int order, split_order, alloced_order = 0;
 	gfp &= GFP_RECLAIM_MASK;
-	folio_ref_add(folio, nr);
-	folio->mapping = mapping;
-	folio->index = xas.xa_index;
 
 	for (;;) {
-		int order = -1, split_order = 0;
-		void *entry, *old = NULL;
+		order = -1;
+		split_order = 0;
+		old = NULL;
 
-		xas_lock_irq(&xas);
-		xas_for_each_conflict(&xas, entry) {
+		xas_lock_irq(xas);
+		xas_for_each_conflict(xas, entry) {
 			old = entry;
 			if (!xa_is_value(entry)) {
-				xas_set_err(&xas, -EEXIST);
+				xas_set_err(xas, -EEXIST);
 				goto unlock;
 			}
 			/*
@@ -887,72 +872,93 @@ noinline int __filemap_add_folio(struct address_space *mapping,
 			 * it will be the first and only entry iterated.
 			 */
 			if (order == -1)
-				order = xas_get_order(&xas);
+				order = xas_get_order(xas);
 		}
 
 		/* entry may have changed before we re-acquire the lock */
 		if (alloced_order && (old != alloced_shadow || order != alloced_order)) {
-			xas_destroy(&xas);
+			xas_destroy(xas);
 			alloced_order = 0;
 		}
 
 		if (old) {
 			if (order > 0 && order > folio_order(folio)) {
-				/* How to handle large swap entries? */
-				BUG_ON(shmem_mapping(mapping));
 				if (!alloced_order) {
 					split_order = order;
 					goto unlock;
 				}
-				xas_split(&xas, old, order);
-				xas_reset(&xas);
+				xas_split(xas, old, order);
+				xas_reset(xas);
 			}
 			if (shadowp)
 				*shadowp = old;
 		}
 
-		xas_store(&xas, folio);
-		if (xas_error(&xas))
-			goto unlock;
-
-		mapping->nrpages += nr;
-
-		/* hugetlb pages do not participate in page cache accounting */
-		if (!huge) {
-			__lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr);
-			if (folio_test_pmd_mappable(folio))
-				__lruvec_stat_mod_folio(folio,
-						NR_FILE_THPS, nr);
-		}
-
+		xas_store(xas, folio);
+		if (!xas_error(xas))
+			return 0;
 unlock:
-		xas_unlock_irq(&xas);
+		xas_unlock_irq(xas);
 
 		/* split needed, alloc here and retry. */
 		if (split_order) {
-			xas_split_alloc(&xas, old, split_order, gfp);
-			if (xas_error(&xas))
+			xas_split_alloc(xas, old, split_order, gfp);
+			if (xas_error(xas))
 				goto error;
 			alloced_shadow = old;
 			alloced_order = split_order;
-			xas_reset(&xas);
+			xas_reset(xas);
 			continue;
 		}
 
-		if (!xas_nomem(&xas, gfp))
+		if (!xas_nomem(xas, gfp))
 			break;
 	}
 
-	if (xas_error(&xas))
-		goto error;
-
-	trace_mm_filemap_add_to_page_cache(folio);
-	return 0;
 error:
-	folio->mapping = NULL;
-	/* Leave page->index set: truncation relies upon it */
-	folio_put_refs(folio, nr);
-	return xas_error(&xas);
+	return xas_error(xas);
+}
+
+noinline int __filemap_add_folio(struct address_space *mapping,
+		struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp)
+{
+	XA_STATE(xas, &mapping->i_pages, index);
+	bool huge;
+	long nr;
+	int ret;
+
+	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+	VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
+	mapping_set_update(&xas, mapping);
+
+	VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
+	xas_set_order(&xas, index, folio_order(folio));
+	huge = folio_test_hugetlb(folio);
+	nr = folio_nr_pages(folio);
+
+	folio_ref_add(folio, nr);
+	folio->mapping = mapping;
+	folio->index = xas.xa_index;
+
+	ret = __filemap_lock_store(&xas, folio, index, gfp, shadowp);
+	if (!ret) {
+		mapping->nrpages += nr;
+		/* hugetlb pages do not participate in page cache accounting */
+		if (!huge) {
+			__lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr);
+			if (folio_test_pmd_mappable(folio))
+				__lruvec_stat_mod_folio(folio,
+						NR_FILE_THPS, nr);
+		}
+		xas_unlock_irq(&xas);
+		trace_mm_filemap_add_to_page_cache(folio);
+	} else {
+		folio->mapping = NULL;
+		/* Leave page->index set: truncation relies upon it */
+		folio_put_refs(folio, nr);
+	}
+
+	return ret;
 }
 ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO);
 
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ