[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230330191801.1967435-7-yosryahmed@google.com>
Date: Thu, 30 Mar 2023 19:17:59 +0000
From: Yosry Ahmed <yosryahmed@...gle.com>
To: Tejun Heo <tj@...nel.org>, Josef Bacik <josef@...icpanda.com>,
Jens Axboe <axboe@...nel.dk>,
Zefan Li <lizefan.x@...edance.com>,
Johannes Weiner <hannes@...xchg.org>,
Michal Hocko <mhocko@...nel.org>,
Roman Gushchin <roman.gushchin@...ux.dev>,
Shakeel Butt <shakeelb@...gle.com>,
Muchun Song <muchun.song@...ux.dev>,
Andrew Morton <akpm@...ux-foundation.org>,
"Michal Koutný" <mkoutny@...e.com>
Cc: Vasily Averin <vasily.averin@...ux.dev>, cgroups@...r.kernel.org,
linux-block@...r.kernel.org, linux-kernel@...r.kernel.org,
linux-mm@...ck.org, bpf@...r.kernel.org,
Yosry Ahmed <yosryahmed@...gle.com>,
Michal Hocko <mhocko@...e.com>
Subject: [PATCH v3 6/8] workingset: memcg: sleep when flushing stats in workingset_refault()
In workingset_refault(), we call
mem_cgroup_flush_stats_atomic_ratelimited() to read accurate stats
within an RCU read section and with sleeping disallowed. Move the
call above the RCU read section to make it non-atomic.
Flushing is an expensive operation that scales with the number of cpus
and the number of cgroups in the system, so avoid doing it atomically
where possible.
Since workingset_refault() is the only caller of
mem_cgroup_flush_stats_atomic_ratelimited(), just make it non-atomic,
and rename it to mem_cgroup_flush_stats_ratelimited().
Signed-off-by: Yosry Ahmed <yosryahmed@...gle.com>
Acked-by: Shakeel Butt <shakeelb@...gle.com>
Acked-by: Johannes Weiner <hannes@...xchg.org>
Acked-by: Michal Hocko <mhocko@...e.com>
---
include/linux/memcontrol.h | 4 ++--
mm/memcontrol.c | 4 ++--
mm/workingset.c | 5 +++--
3 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b424ba3ebd09..a4bc3910a2eb 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1038,7 +1038,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
void mem_cgroup_flush_stats(void);
void mem_cgroup_flush_stats_atomic(void);
-void mem_cgroup_flush_stats_atomic_ratelimited(void);
+void mem_cgroup_flush_stats_ratelimited(void);
void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
int val);
@@ -1540,7 +1540,7 @@ static inline void mem_cgroup_flush_stats_atomic(void)
{
}
-static inline void mem_cgroup_flush_stats_atomic_ratelimited(void)
+static inline void mem_cgroup_flush_stats_ratelimited(void)
{
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a2ce3aa10d94..361c0bbf7283 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -673,10 +673,10 @@ void mem_cgroup_flush_stats_atomic(void)
do_flush_stats(true);
}
-void mem_cgroup_flush_stats_atomic_ratelimited(void)
+void mem_cgroup_flush_stats_ratelimited(void)
{
if (time_after64(jiffies_64, READ_ONCE(flush_next_time)))
- mem_cgroup_flush_stats_atomic();
+ mem_cgroup_flush_stats();
}
static void flush_memcg_stats_dwork(struct work_struct *w)
diff --git a/mm/workingset.c b/mm/workingset.c
index dab0c362b9e3..3025beee9b34 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -406,6 +406,9 @@ void workingset_refault(struct folio *folio, void *shadow)
unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset);
eviction <<= bucket_order;
+ /* Flush stats (and potentially sleep) before holding RCU read lock */
+ mem_cgroup_flush_stats_ratelimited();
+
rcu_read_lock();
/*
* Look up the memcg associated with the stored ID. It might
@@ -461,8 +464,6 @@ void workingset_refault(struct folio *folio, void *shadow)
lruvec = mem_cgroup_lruvec(memcg, pgdat);
mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
-
- mem_cgroup_flush_stats_atomic_ratelimited();
/*
* Compare the distance to the existing workingset size. We
* don't activate pages that couldn't stay resident even if
--
2.40.0.348.gf938b09366-goog
Powered by blists - more mailing lists