[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250408181705.GE816@cmpxchg.org>
Date: Tue, 8 Apr 2025 14:17:05 -0400
From: Johannes Weiner <hannes@...xchg.org>
To: Gregory Price <gourry@...rry.net>
Cc: linux-mm@...ck.org, cgroups@...r.kernel.org,
linux-kernel@...r.kernel.org, kernel-team@...a.com,
longman@...hat.com, tj@...nel.org, mkoutny@...e.com,
akpm@...ux-foundation.org
Subject: Re: [RFC PATCH] vmscan,cgroup: apply mems_effective to reclaim
On Thu, Mar 20, 2025 at 05:09:19PM -0400, Gregory Price wrote:
> @@ -4296,3 +4296,13 @@ void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
> seq_printf(m, "Mems_allowed_list:\t%*pbl\n",
> nodemask_pr_args(&task->mems_allowed));
> }
> +
> +bool memcg_mems_allowed(struct mem_cgroup *memcg, int nid)
This should probably be
cgroup_mems_allowed(struct cgroup *, int)
and then have a
mem_cgroup_mems_allowed(struct mem_cgroup *, int)
that does the e_css translation, with the necessary dummy functions to
work with all CONFIG combinations.
> +{
> + struct cgroup_subsys_state *css;
> + struct cpuset *cs;
> +
> + css = cgroup_get_e_css(memcg->css.cgroup, &cpuset_cgrp_subsys);
> + cs = css ? container_of(css, struct cpuset, css) : NULL;
> + return cs ? node_isset(nid, cs->effective_mems) : true;
You need a css_put() to drop the ref from cgroup_get_e_css(), but
otherwise accessing css should be safe this way.
AFAICS you need callback_lock to query cs->effective_mems.
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 2b2ab386cab5..04152ea1c03d 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -342,16 +342,22 @@ static void flush_reclaim_state(struct scan_control *sc)
> }
> }
>
> -static bool can_demote(int nid, struct scan_control *sc)
> +static bool can_demote(int nid, struct scan_control *sc,
> + struct mem_cgroup *memcg)
> {
> + int demotion_nid;
> +
> if (!numa_demotion_enabled)
> return false;
> if (sc && sc->no_demotion)
> return false;
> - if (next_demotion_node(nid) == NUMA_NO_NODE)
> +
> + demotion_nid = next_demotion_node(nid);
> + if (demotion_nid == NUMA_NO_NODE)
> return false;
>
> - return true;
> + /* If demotion node isn't in mems_allowed, fall back */
> + return memcg ? memcg_mems_allowed(memcg, demotion_nid) : true;
> }
>
> static inline bool can_reclaim_anon_pages(struct mem_cgroup *memcg,
> @@ -376,7 +382,7 @@ static inline bool can_reclaim_anon_pages(struct mem_cgroup *memcg,
> *
> * Can it be reclaimed from this node via demotion?
> */
> - return can_demote(nid, sc);
> + return can_demote(nid, sc, NULL);
This has appropriate memcg context from get_scan_count(), use that.
> @@ -2654,7 +2662,7 @@ static bool can_age_anon_pages(struct pglist_data *pgdat,
> return true;
>
> /* Also valuable if anon pages can be demoted: */
> - return can_demote(pgdat->node_id, sc);
> + return can_demote(pgdat->node_id, sc, NULL);
Make this take an lruvec, then pass lruvec_memcg() to can_demote().
shrink_lruvec() already has the lruvec.
kswapd_age_node() has to do the test from inside the memcg loop, since
demotion and thus aging now very much depends on each cgroup's policy.
> }
>
> #ifdef CONFIG_LRU_GEN
> @@ -2732,7 +2740,7 @@ static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc)
> if (!sc->may_swap)
> return 0;
>
> - if (!can_demote(pgdat->node_id, sc) &&
> + if (!can_demote(pgdat->node_id, sc, NULL) &&
> mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH)
> return 0;
MGLRU, so grain of salt, but that memcg looks appropriate for passing.
> @@ -4695,7 +4703,7 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
> if (list_empty(&list))
> return scanned;
> retry:
> - reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false);
> + reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false, NULL);
This also seems to have appropriate lruvec/memcg context.
Powered by blists - more mailing lists