[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240326185032.72159-2-ryncsn@gmail.com>
Date: Wed, 27 Mar 2024 02:50:23 +0800
From: Kairui Song <ryncsn@...il.com>
To: linux-mm@...ck.org
Cc: "Huang, Ying" <ying.huang@...el.com>,
Chris Li <chrisl@...nel.org>,
Minchan Kim <minchan@...nel.org>,
Barry Song <v-songbaohua@...o.com>,
Ryan Roberts <ryan.roberts@....com>,
Yu Zhao <yuzhao@...gle.com>,
SeongJae Park <sj@...nel.org>,
David Hildenbrand <david@...hat.com>,
Yosry Ahmed <yosryahmed@...gle.com>,
Johannes Weiner <hannes@...xchg.org>,
Matthew Wilcox <willy@...radead.org>,
Nhat Pham <nphamcs@...il.com>,
Chengming Zhou <zhouchengming@...edance.com>,
Andrew Morton <akpm@...ux-foundation.org>,
linux-kernel@...r.kernel.org,
Kairui Song <kasong@...cent.com>
Subject: [RFC PATCH 01/10] mm/filemap: split filemap storing logic into a standalone helper
From: Kairui Song <kasong@...cent.com>
Swapcache can reuse this part for multi index support, no change of
performance from page cache side except noise:
Test in 8G memory cgroup and 16G brd ramdisk.
echo 3 > /proc/sys/vm/drop_caches
fio -name=cached --numjobs=16 --filename=/mnt/test.img \
--buffered=1 --ioengine=mmap --rw=randread --time_based \
--ramp_time=30s --runtime=5m --group_reporting
Before:
bw ( MiB/s): min= 493, max= 3947, per=100.00%, avg=2625.56, stdev=25.74, samples=8651
iops : min=126454, max=1010681, avg=672142.61, stdev=6590.48, samples=8651
After:
bw ( MiB/s): min= 298, max= 3840, per=100.00%, avg=2614.34, stdev=23.77, samples=8689
iops : min=76464, max=983045, avg=669270.35, stdev=6084.31, samples=8689
Test result with THP (do a THP randread then switch to 4K page in hope it
issues a lot of splitting):
echo 3 > /proc/sys/vm/drop_caches
fio -name=cached --numjobs=16 --filename=/mnt/test.img \
--buffered=1 --ioengine=mmap -thp=1 --readonly \
--rw=randread --time_based --ramp_time=30s --runtime=10m \
--group_reporting
fio -name=cached --numjobs=16 --filename=/mnt/test.img \
--buffered=1 --ioengine=mmap \
--rw=randread --time_based --runtime=5s --group_reporting
Before:
bw ( KiB/s): min= 4611, max=15370, per=100.00%, avg=8928.74, stdev=105.17, samples=19146
iops : min= 1151, max= 3842, avg=2231.27, stdev=26.29, samples=19146
READ: bw=4635B/s (4635B/s), 4635B/s-4635B/s (4635B/s-4635B/s), io=64.0KiB (65.5kB), run=14137-14137msec
After:
bw ( KiB/s): min= 4691, max=15666, per=100.00%, avg=8890.30, stdev=104.53, samples=19056
iops : min= 1167, max= 3913, avg=2218.68, stdev=26.15, samples=19056
READ: bw=4590B/s (4590B/s), 4590B/s-4590B/s (4590B/s-4590B/s), io=64.0KiB (65.5kB), run=14275-14275msec
Signed-off-by: Kairui Song <kasong@...cent.com>
---
mm/filemap.c | 124 +++++++++++++++++++++++++++------------------------
1 file changed, 65 insertions(+), 59 deletions(-)
diff --git a/mm/filemap.c b/mm/filemap.c
index 90b86f22a9df..0ccdc9e92764 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -848,38 +848,23 @@ void replace_page_cache_folio(struct folio *old, struct folio *new)
}
EXPORT_SYMBOL_GPL(replace_page_cache_folio);
-noinline int __filemap_add_folio(struct address_space *mapping,
- struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp)
+static int __filemap_lock_store(struct xa_state *xas, struct folio *folio,
+ pgoff_t index, gfp_t gfp, void **shadowp)
{
- XA_STATE(xas, &mapping->i_pages, index);
- void *alloced_shadow = NULL;
- int alloced_order = 0;
- bool huge;
- long nr;
-
- VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
- VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
- mapping_set_update(&xas, mapping);
-
- VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
- xas_set_order(&xas, index, folio_order(folio));
- huge = folio_test_hugetlb(folio);
- nr = folio_nr_pages(folio);
-
+ void *entry, *old, *alloced_shadow = NULL;
+ int order, split_order, alloced_order = 0;
gfp &= GFP_RECLAIM_MASK;
- folio_ref_add(folio, nr);
- folio->mapping = mapping;
- folio->index = xas.xa_index;
for (;;) {
- int order = -1, split_order = 0;
- void *entry, *old = NULL;
+ order = -1;
+ split_order = 0;
+ old = NULL;
- xas_lock_irq(&xas);
- xas_for_each_conflict(&xas, entry) {
+ xas_lock_irq(xas);
+ xas_for_each_conflict(xas, entry) {
old = entry;
if (!xa_is_value(entry)) {
- xas_set_err(&xas, -EEXIST);
+ xas_set_err(xas, -EEXIST);
goto unlock;
}
/*
@@ -887,72 +872,93 @@ noinline int __filemap_add_folio(struct address_space *mapping,
* it will be the first and only entry iterated.
*/
if (order == -1)
- order = xas_get_order(&xas);
+ order = xas_get_order(xas);
}
/* entry may have changed before we re-acquire the lock */
if (alloced_order && (old != alloced_shadow || order != alloced_order)) {
- xas_destroy(&xas);
+ xas_destroy(xas);
alloced_order = 0;
}
if (old) {
if (order > 0 && order > folio_order(folio)) {
- /* How to handle large swap entries? */
- BUG_ON(shmem_mapping(mapping));
if (!alloced_order) {
split_order = order;
goto unlock;
}
- xas_split(&xas, old, order);
- xas_reset(&xas);
+ xas_split(xas, old, order);
+ xas_reset(xas);
}
if (shadowp)
*shadowp = old;
}
- xas_store(&xas, folio);
- if (xas_error(&xas))
- goto unlock;
-
- mapping->nrpages += nr;
-
- /* hugetlb pages do not participate in page cache accounting */
- if (!huge) {
- __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr);
- if (folio_test_pmd_mappable(folio))
- __lruvec_stat_mod_folio(folio,
- NR_FILE_THPS, nr);
- }
-
+ xas_store(xas, folio);
+ if (!xas_error(xas))
+ return 0;
unlock:
- xas_unlock_irq(&xas);
+ xas_unlock_irq(xas);
/* split needed, alloc here and retry. */
if (split_order) {
- xas_split_alloc(&xas, old, split_order, gfp);
- if (xas_error(&xas))
+ xas_split_alloc(xas, old, split_order, gfp);
+ if (xas_error(xas))
goto error;
alloced_shadow = old;
alloced_order = split_order;
- xas_reset(&xas);
+ xas_reset(xas);
continue;
}
- if (!xas_nomem(&xas, gfp))
+ if (!xas_nomem(xas, gfp))
break;
}
- if (xas_error(&xas))
- goto error;
-
- trace_mm_filemap_add_to_page_cache(folio);
- return 0;
error:
- folio->mapping = NULL;
- /* Leave page->index set: truncation relies upon it */
- folio_put_refs(folio, nr);
- return xas_error(&xas);
+ return xas_error(xas);
+}
+
+noinline int __filemap_add_folio(struct address_space *mapping,
+ struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp)
+{
+ XA_STATE(xas, &mapping->i_pages, index);
+ bool huge;
+ long nr;
+ int ret;
+
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+ VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
+ mapping_set_update(&xas, mapping);
+
+ VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
+ xas_set_order(&xas, index, folio_order(folio));
+ huge = folio_test_hugetlb(folio);
+ nr = folio_nr_pages(folio);
+
+ folio_ref_add(folio, nr);
+ folio->mapping = mapping;
+ folio->index = xas.xa_index;
+
+ ret = __filemap_lock_store(&xas, folio, index, gfp, shadowp);
+ if (!ret) {
+ mapping->nrpages += nr;
+ /* hugetlb pages do not participate in page cache accounting */
+ if (!huge) {
+ __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr);
+ if (folio_test_pmd_mappable(folio))
+ __lruvec_stat_mod_folio(folio,
+ NR_FILE_THPS, nr);
+ }
+ xas_unlock_irq(&xas);
+ trace_mm_filemap_add_to_page_cache(folio);
+ } else {
+ folio->mapping = NULL;
+ /* Leave page->index set: truncation relies upon it */
+ folio_put_refs(folio, nr);
+ }
+
+ return ret;
}
ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO);
--
2.43.0
Powered by blists - more mailing lists