[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240430060612.2171650-3-shakeel.butt@linux.dev>
Date: Mon, 29 Apr 2024 23:06:06 -0700
From: Shakeel Butt <shakeel.butt@...ux.dev>
To: Andrew Morton <akpm@...ux-foundation.org>,
Johannes Weiner <hannes@...xchg.org>,
Michal Hocko <mhocko@...nel.org>,
Roman Gushchin <roman.gushchin@...ux.dev>,
Muchun Song <muchun.song@...ux.dev>,
Yosry Ahmed <yosryahmed@...gle.com>,
"T . J . Mercier" <tjmercier@...gle.com>
Cc: kernel-team@...a.com,
linux-mm@...ck.org,
linux-kernel@...r.kernel.org
Subject: [PATCH v3 2/8] memcg: dynamically allocate lruvec_stats
To decouple the dependency of lruvec_stats on NR_VM_NODE_STAT_ITEMS, we
need to dynamically allocate lruvec_stats in the mem_cgroup_per_node
structure. Also move the definition of lruvec_stats_percpu and
lruvec_stats and related functions to the memcontrol.c to facilitate
later patches. No functional changes in the patch.
Signed-off-by: Shakeel Butt <shakeel.butt@...ux.dev>
Reviewed-by: Yosry Ahmed <yosryahmed@...gle.com>
---
Changes since v2:
- N/A
include/linux/memcontrol.h | 62 +++------------------------
mm/memcontrol.c | 87 ++++++++++++++++++++++++++++++++------
2 files changed, 81 insertions(+), 68 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9aba0d0462ca..ab8a6e884375 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -83,6 +83,8 @@ enum mem_cgroup_events_target {
struct memcg_vmstats_percpu;
struct memcg_vmstats;
+struct lruvec_stats_percpu;
+struct lruvec_stats;
struct mem_cgroup_reclaim_iter {
struct mem_cgroup *position;
@@ -90,25 +92,6 @@ struct mem_cgroup_reclaim_iter {
unsigned int generation;
};
-struct lruvec_stats_percpu {
- /* Local (CPU and cgroup) state */
- long state[NR_VM_NODE_STAT_ITEMS];
-
- /* Delta calculation for lockless upward propagation */
- long state_prev[NR_VM_NODE_STAT_ITEMS];
-};
-
-struct lruvec_stats {
- /* Aggregated (CPU and subtree) state */
- long state[NR_VM_NODE_STAT_ITEMS];
-
- /* Non-hierarchical (CPU aggregated) state */
- long state_local[NR_VM_NODE_STAT_ITEMS];
-
- /* Pending child counts during tree propagation */
- long state_pending[NR_VM_NODE_STAT_ITEMS];
-};
-
/*
* per-node information in memory controller.
*/
@@ -116,7 +99,7 @@ struct mem_cgroup_per_node {
struct lruvec lruvec;
struct lruvec_stats_percpu __percpu *lruvec_stats_percpu;
- struct lruvec_stats lruvec_stats;
+ struct lruvec_stats *lruvec_stats;
unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
@@ -1037,42 +1020,9 @@ static inline void mod_memcg_page_state(struct page *page,
}
unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx);
-
-static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
- enum node_stat_item idx)
-{
- struct mem_cgroup_per_node *pn;
- long x;
-
- if (mem_cgroup_disabled())
- return node_page_state(lruvec_pgdat(lruvec), idx);
-
- pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
- x = READ_ONCE(pn->lruvec_stats.state[idx]);
-#ifdef CONFIG_SMP
- if (x < 0)
- x = 0;
-#endif
- return x;
-}
-
-static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
- enum node_stat_item idx)
-{
- struct mem_cgroup_per_node *pn;
- long x = 0;
-
- if (mem_cgroup_disabled())
- return node_page_state(lruvec_pgdat(lruvec), idx);
-
- pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
- x = READ_ONCE(pn->lruvec_stats.state_local[idx]);
-#ifdef CONFIG_SMP
- if (x < 0)
- x = 0;
-#endif
- return x;
-}
+unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx);
+unsigned long lruvec_page_state_local(struct lruvec *lruvec,
+ enum node_stat_item idx);
void mem_cgroup_flush_stats(struct mem_cgroup *memcg);
void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c146187cda9c..7126459ec56a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -576,6 +576,60 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
return mz;
}
+struct lruvec_stats_percpu {
+ /* Local (CPU and cgroup) state */
+ long state[NR_VM_NODE_STAT_ITEMS];
+
+ /* Delta calculation for lockless upward propagation */
+ long state_prev[NR_VM_NODE_STAT_ITEMS];
+};
+
+struct lruvec_stats {
+ /* Aggregated (CPU and subtree) state */
+ long state[NR_VM_NODE_STAT_ITEMS];
+
+ /* Non-hierarchical (CPU aggregated) state */
+ long state_local[NR_VM_NODE_STAT_ITEMS];
+
+ /* Pending child counts during tree propagation */
+ long state_pending[NR_VM_NODE_STAT_ITEMS];
+};
+
+unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx)
+{
+ struct mem_cgroup_per_node *pn;
+ long x;
+
+ if (mem_cgroup_disabled())
+ return node_page_state(lruvec_pgdat(lruvec), idx);
+
+ pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+ x = READ_ONCE(pn->lruvec_stats->state[idx]);
+#ifdef CONFIG_SMP
+ if (x < 0)
+ x = 0;
+#endif
+ return x;
+}
+
+unsigned long lruvec_page_state_local(struct lruvec *lruvec,
+ enum node_stat_item idx)
+{
+ struct mem_cgroup_per_node *pn;
+ long x = 0;
+
+ if (mem_cgroup_disabled())
+ return node_page_state(lruvec_pgdat(lruvec), idx);
+
+ pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+ x = READ_ONCE(pn->lruvec_stats->state_local[idx]);
+#ifdef CONFIG_SMP
+ if (x < 0)
+ x = 0;
+#endif
+ return x;
+}
+
/* Subset of vm_event_item to report for memcg event stats */
static const unsigned int memcg_vm_event_stat[] = {
PGPGIN,
@@ -5491,18 +5545,25 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
if (!pn)
return 1;
+ pn->lruvec_stats = kzalloc_node(sizeof(struct lruvec_stats), GFP_KERNEL,
+ node);
+ if (!pn->lruvec_stats)
+ goto fail;
+
pn->lruvec_stats_percpu = alloc_percpu_gfp(struct lruvec_stats_percpu,
GFP_KERNEL_ACCOUNT);
- if (!pn->lruvec_stats_percpu) {
- kfree(pn);
- return 1;
- }
+ if (!pn->lruvec_stats_percpu)
+ goto fail;
lruvec_init(&pn->lruvec);
pn->memcg = memcg;
memcg->nodeinfo[node] = pn;
return 0;
+fail:
+ kfree(pn->lruvec_stats);
+ kfree(pn);
+ return 1;
}
static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
@@ -5513,6 +5574,7 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
return;
free_percpu(pn->lruvec_stats_percpu);
+ kfree(pn->lruvec_stats);
kfree(pn);
}
@@ -5865,18 +5927,19 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
for_each_node_state(nid, N_MEMORY) {
struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid];
- struct mem_cgroup_per_node *ppn = NULL;
+ struct lruvec_stats *lstats = pn->lruvec_stats;
+ struct lruvec_stats *plstats = NULL;
struct lruvec_stats_percpu *lstatc;
if (parent)
- ppn = parent->nodeinfo[nid];
+ plstats = parent->nodeinfo[nid]->lruvec_stats;
lstatc = per_cpu_ptr(pn->lruvec_stats_percpu, cpu);
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
- delta = pn->lruvec_stats.state_pending[i];
+ delta = lstats->state_pending[i];
if (delta)
- pn->lruvec_stats.state_pending[i] = 0;
+ lstats->state_pending[i] = 0;
delta_cpu = 0;
v = READ_ONCE(lstatc->state[i]);
@@ -5887,12 +5950,12 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
}
if (delta_cpu)
- pn->lruvec_stats.state_local[i] += delta_cpu;
+ lstats->state_local[i] += delta_cpu;
if (delta) {
- pn->lruvec_stats.state[i] += delta;
- if (ppn)
- ppn->lruvec_stats.state_pending[i] += delta;
+ lstats->state[i] += delta;
+ if (plstats)
+ plstats->state_pending[i] += delta;
}
}
}
--
2.43.0
Powered by blists - more mailing lists