[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240627201737.3506959-1-nphamcs@gmail.com>
Date: Thu, 27 Jun 2024 13:17:37 -0700
From: Nhat Pham <nphamcs@...il.com>
To: akpm@...ux-foundation.org
Cc: hannes@...xchg.org,
kernel-team@...a.com,
linux-mm@...ck.org,
linux-kernel@...r.kernel.org,
stable@...r.kernel.org,
willy@...radead.org,
david@...hat.com,
ryan.roberts@....com,
ying.huang@...el.com,
viro@...iv.linux.org.uk,
kasong@...cent.com,
yosryahmed@...gle.com,
shakeel.butt@...ux.dev,
linux-fsdevel@...r.kernel.org
Subject: [PATCH] cachestat: do not flush stats in recency check
syzbot detects that cachestat() is flushing stats, which can sleep, in
its RCU read section (see [1]). This is done in the
workingset_test_recent() step (which checks if the folio's eviction is
recent).
Move the stat flushing step to before the RCU read section of cachestat,
and skip stat flushing during the recency check.
[1]: https://lore.kernel.org/cgroups/000000000000f71227061bdf97e0@google.com/
Reported-by: syzbot+b7f13b2d0cc156edf61a@...kaller.appspotmail.com
Closes: https://lore.kernel.org/cgroups/000000000000f71227061bdf97e0@google.com/
Debugged-by: Johannes Weiner <hannes@...xchg.org>
Suggested-by: Johannes Weiner <hannes@...xchg.org>
Signed-off-by: Nhat Pham <nphamcs@...il.com>
Fixes: b00684722262 ("mm: workingset: move the stats flush into workingset_test_recent()")
Cc: stable@...r.kernel.org # v6.8+
---
include/linux/swap.h | 3 ++-
mm/filemap.c | 5 ++++-
mm/workingset.c | 14 +++++++++++---
3 files changed, 17 insertions(+), 5 deletions(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index bd450023b9a4..e685e93ba354 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -354,7 +354,8 @@ static inline swp_entry_t page_swap_entry(struct page *page)
}
/* linux/mm/workingset.c */
-bool workingset_test_recent(void *shadow, bool file, bool *workingset);
+bool workingset_test_recent(void *shadow, bool file, bool *workingset,
+ bool flush);
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
void workingset_refault(struct folio *folio, void *shadow);
diff --git a/mm/filemap.c b/mm/filemap.c
index fedefb10d947..298485d4b992 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -4248,6 +4248,9 @@ static void filemap_cachestat(struct address_space *mapping,
XA_STATE(xas, &mapping->i_pages, first_index);
struct folio *folio;
+ /* Flush stats (and potentially sleep) outside the RCU read section. */
+ mem_cgroup_flush_stats_ratelimited(NULL);
+
rcu_read_lock();
xas_for_each(&xas, folio, last_index) {
int order;
@@ -4311,7 +4314,7 @@ static void filemap_cachestat(struct address_space *mapping,
goto resched;
}
#endif
- if (workingset_test_recent(shadow, true, &workingset))
+ if (workingset_test_recent(shadow, true, &workingset, false))
cs->nr_recently_evicted += nr_pages;
goto resched;
diff --git a/mm/workingset.c b/mm/workingset.c
index c22adb93622a..a2b28e356e68 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -412,10 +412,12 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg)
* @file: whether the corresponding folio is from the file lru.
* @workingset: where the workingset value unpacked from shadow should
* be stored.
+ * @flush: whether to flush cgroup rstat.
*
* Return: true if the shadow is for a recently evicted folio; false otherwise.
*/
-bool workingset_test_recent(void *shadow, bool file, bool *workingset)
+bool workingset_test_recent(void *shadow, bool file, bool *workingset,
+ bool flush)
{
struct mem_cgroup *eviction_memcg;
struct lruvec *eviction_lruvec;
@@ -467,10 +469,16 @@ bool workingset_test_recent(void *shadow, bool file, bool *workingset)
/*
* Flush stats (and potentially sleep) outside the RCU read section.
+ *
+ * Note that workingset_test_recent() itself might be called in RCU read
+ * section (for e.g, in cachestat) - these callers need to skip flushing
+ * stats (via the flush argument).
+ *
* XXX: With per-memcg flushing and thresholding, is ratelimiting
* still needed here?
*/
- mem_cgroup_flush_stats_ratelimited(eviction_memcg);
+ if (flush)
+ mem_cgroup_flush_stats_ratelimited(eviction_memcg);
eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
refault = atomic_long_read(&eviction_lruvec->nonresident_age);
@@ -558,7 +566,7 @@ void workingset_refault(struct folio *folio, void *shadow)
mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
- if (!workingset_test_recent(shadow, file, &workingset))
+ if (!workingset_test_recent(shadow, file, &workingset, true))
return;
folio_set_active(folio);
base-commit: a5c6fededf806aba1ff9b0f01278f7d089da5725
--
2.43.0
Powered by blists - more mailing lists