[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Z5eBWRRINH5RaeRk@tiehlicka>
Date: Mon, 27 Jan 2025 13:51:37 +0100
From: Michal Hocko <mhocko@...e.com>
To: Johannes Weiner <hannes@...xchg.org>
Cc: Andrew Morton <akpm@...ux-foundation.org>,
Roman Gushchin <roman.gushchin@...ux.dev>,
Shakeel Butt <shakeel.butt@...ux.dev>,
Muchun Song <muchun.song@...ux.dev>, linux-mm@...ck.org,
cgroups@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] mm: memcontrol: move memsw charge callbacks to v1
On Fri 24-01-25 00:41:32, Johannes Weiner wrote:
> The interweaving of two entirely different swap accounting strategies
> has been one of the more confusing parts of the memcg code. Split out
> the v1 code to clarify the implementation and a handful of callsites,
> and to avoid building the v1 bits when !CONFIG_MEMCG_V1.
>
> text data bss dec hex filename
> 39253 6446 4160 49859 c2c3 mm/memcontrol.o.old
> 38877 6382 4160 49419 c10b mm/memcontrol.o
>
> Signed-off-by: Johannes Weiner <hannes@...xchg.org>
Acked-by: Michal Hocko <mhocko@...e.com>
Thanks!
> ---
> include/linux/memcontrol.h | 17 +++--
> include/linux/swap.h | 5 --
> mm/huge_memory.c | 2 +-
> mm/memcontrol-v1.c | 89 ++++++++++++++++++++++++-
> mm/memcontrol-v1.h | 6 +-
> mm/memcontrol.c | 129 ++++++-------------------------------
> mm/memory.c | 2 +-
> mm/shmem.c | 2 +-
> mm/swap_state.c | 2 +-
> mm/vmscan.c | 2 +-
> 10 files changed, 126 insertions(+), 130 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 6e74b8254d9b..57664e2a8fb7 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -649,8 +649,6 @@ int mem_cgroup_charge_hugetlb(struct folio* folio, gfp_t gfp);
> int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm,
> gfp_t gfp, swp_entry_t entry);
>
> -void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr_pages);
> -
> void __mem_cgroup_uncharge(struct folio *folio);
>
> /**
> @@ -1165,10 +1163,6 @@ static inline int mem_cgroup_swapin_charge_folio(struct folio *folio,
> return 0;
> }
>
> -static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr)
> -{
> -}
> -
> static inline void mem_cgroup_uncharge(struct folio *folio)
> {
> }
> @@ -1848,6 +1842,9 @@ static inline void mem_cgroup_exit_user_fault(void)
> current->in_user_fault = 0;
> }
>
> +void memcg1_swapout(struct folio *folio, swp_entry_t entry);
> +void memcg1_swapin(swp_entry_t entry, unsigned int nr_pages);
> +
> #else /* CONFIG_MEMCG_V1 */
> static inline
> unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order,
> @@ -1875,6 +1872,14 @@ static inline void mem_cgroup_exit_user_fault(void)
> {
> }
>
> +static inline void memcg1_swapout(struct folio *folio, swp_entry_t entry)
> +{
> +}
> +
> +static inline void memcg1_swapin(swp_entry_t entry, unsigned int nr_pages)
> +{
> +}
> +
> #endif /* CONFIG_MEMCG_V1 */
>
> #endif /* _LINUX_MEMCONTROL_H */
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index b13b72645db3..91b30701274e 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -659,7 +659,6 @@ static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp)
> #endif
>
> #if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP)
> -void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry);
> int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry);
> static inline int mem_cgroup_try_charge_swap(struct folio *folio,
> swp_entry_t entry)
> @@ -680,10 +679,6 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_p
> extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg);
> extern bool mem_cgroup_swap_full(struct folio *folio);
> #else
> -static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
> -{
> -}
> -
> static inline int mem_cgroup_try_charge_swap(struct folio *folio,
> swp_entry_t entry)
> {
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 3d3ebdc002d5..c40b42a1015a 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -3740,7 +3740,7 @@ void deferred_split_folio(struct folio *folio, bool partially_mapped)
>
> /*
> * Exclude swapcache: originally to avoid a corrupt deferred split
> - * queue. Nowadays that is fully prevented by mem_cgroup_swapout();
> + * queue. Nowadays that is fully prevented by memcg1_swapout();
> * but if page reclaim is already handling the same folio, it is
> * unnecessary to handle it again in the shrinker, so excluding
> * swapcache here may still be a useful optimization.
> diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
> index 6d184fae0ad1..1d16a99fb964 100644
> --- a/mm/memcontrol-v1.c
> +++ b/mm/memcontrol-v1.c
> @@ -581,8 +581,59 @@ void memcg1_commit_charge(struct folio *folio, struct mem_cgroup *memcg)
> local_irq_restore(flags);
> }
>
> -void memcg1_swapout(struct folio *folio, struct mem_cgroup *memcg)
> +/**
> + * memcg1_swapout - transfer a memsw charge to swap
> + * @folio: folio whose memsw charge to transfer
> + * @entry: swap entry to move the charge to
> + *
> + * Transfer the memsw charge of @folio to @entry.
> + */
> +void memcg1_swapout(struct folio *folio, swp_entry_t entry)
> {
> + struct mem_cgroup *memcg, *swap_memcg;
> + unsigned int nr_entries;
> +
> + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
> + VM_BUG_ON_FOLIO(folio_ref_count(folio), folio);
> +
> + if (mem_cgroup_disabled())
> + return;
> +
> + if (!do_memsw_account())
> + return;
> +
> + memcg = folio_memcg(folio);
> +
> + VM_WARN_ON_ONCE_FOLIO(!memcg, folio);
> + if (!memcg)
> + return;
> +
> + /*
> + * In case the memcg owning these pages has been offlined and doesn't
> + * have an ID allocated to it anymore, charge the closest online
> + * ancestor for the swap instead and transfer the memory+swap charge.
> + */
> + swap_memcg = mem_cgroup_id_get_online(memcg);
> + nr_entries = folio_nr_pages(folio);
> + /* Get references for the tail pages, too */
> + if (nr_entries > 1)
> + mem_cgroup_id_get_many(swap_memcg, nr_entries - 1);
> + mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries);
> +
> + swap_cgroup_record(folio, entry);
> +
> + folio_unqueue_deferred_split(folio);
> + folio->memcg_data = 0;
> +
> + if (!mem_cgroup_is_root(memcg))
> + page_counter_uncharge(&memcg->memory, nr_entries);
> +
> + if (memcg != swap_memcg) {
> + if (!mem_cgroup_is_root(swap_memcg))
> + page_counter_charge(&swap_memcg->memsw, nr_entries);
> + page_counter_uncharge(&memcg->memsw, nr_entries);
> + }
> +
> /*
> * Interrupts should be disabled here because the caller holds the
> * i_pages lock which is taken with interrupts-off. It is
> @@ -594,6 +645,42 @@ void memcg1_swapout(struct folio *folio, struct mem_cgroup *memcg)
> memcg1_charge_statistics(memcg, -folio_nr_pages(folio));
> preempt_enable_nested();
> memcg1_check_events(memcg, folio_nid(folio));
> +
> + css_put(&memcg->css);
> +}
> +
> +/*
> + * memcg1_swapin - uncharge swap slot
> + * @entry: the first swap entry for which the pages are charged
> + * @nr_pages: number of pages which will be uncharged
> + *
> + * Call this function after successfully adding the charged page to swapcache.
> + *
> + * Note: This function assumes the page for which swap slot is being uncharged
> + * is order 0 page.
> + */
> +void memcg1_swapin(swp_entry_t entry, unsigned int nr_pages)
> +{
> + /*
> + * Cgroup1's unified memory+swap counter has been charged with the
> + * new swapcache page, finish the transfer by uncharging the swap
> + * slot. The swap slot would also get uncharged when it dies, but
> + * it can stick around indefinitely and we'd count the page twice
> + * the entire time.
> + *
> + * Cgroup2 has separate resource counters for memory and swap,
> + * so this is a non-issue here. Memory and swap charge lifetimes
> + * correspond 1:1 to page and swap slot lifetimes: we charge the
> + * page to memory here, and uncharge swap when the slot is freed.
> + */
> + if (do_memsw_account()) {
> + /*
> + * The swap entry might not get freed for a long time,
> + * let's not wait for it. The page already received a
> + * memory+swap charge, drop the swap entry duplicate.
> + */
> + mem_cgroup_uncharge_swap(entry, nr_pages);
> + }
> }
>
> void memcg1_uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
> diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h
> index 4c8f36430fe9..1dc759e65471 100644
> --- a/mm/memcontrol-v1.h
> +++ b/mm/memcontrol-v1.h
> @@ -39,6 +39,9 @@ unsigned long memcg_page_state_output(struct mem_cgroup *memcg, int item);
> unsigned long memcg_page_state_local_output(struct mem_cgroup *memcg, int item);
> int memory_stat_show(struct seq_file *m, void *v);
>
> +void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n);
> +struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg);
> +
> /* Cgroup v1-specific declarations */
> #ifdef CONFIG_MEMCG_V1
>
> @@ -69,7 +72,6 @@ void memcg1_oom_finish(struct mem_cgroup *memcg, bool locked);
> void memcg1_oom_recover(struct mem_cgroup *memcg);
>
> void memcg1_commit_charge(struct folio *folio, struct mem_cgroup *memcg);
> -void memcg1_swapout(struct folio *folio, struct mem_cgroup *memcg);
> void memcg1_uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
> unsigned long nr_memory, int nid);
>
> @@ -107,8 +109,6 @@ static inline void memcg1_oom_recover(struct mem_cgroup *memcg) {}
> static inline void memcg1_commit_charge(struct folio *folio,
> struct mem_cgroup *memcg) {}
>
> -static inline void memcg1_swapout(struct folio *folio, struct mem_cgroup *memcg) {}
> -
> static inline void memcg1_uncharge_batch(struct mem_cgroup *memcg,
> unsigned long pgpgout,
> unsigned long nr_memory, int nid) {}
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 818143b81760..a95cb3fbb2c8 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -3377,7 +3377,7 @@ static void mem_cgroup_id_remove(struct mem_cgroup *memcg)
> }
> }
>
> -static void __maybe_unused mem_cgroup_id_get_many(struct mem_cgroup *memcg,
> +void __maybe_unused mem_cgroup_id_get_many(struct mem_cgroup *memcg,
> unsigned int n)
> {
> refcount_add(n, &memcg->id.ref);
> @@ -3398,6 +3398,24 @@ static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
> mem_cgroup_id_put_many(memcg, 1);
> }
>
> +struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
> +{
> + while (!refcount_inc_not_zero(&memcg->id.ref)) {
> + /*
> + * The root cgroup cannot be destroyed, so it's refcount must
> + * always be >= 1.
> + */
> + if (WARN_ON_ONCE(mem_cgroup_is_root(memcg))) {
> + VM_BUG_ON(1);
> + break;
> + }
> + memcg = parent_mem_cgroup(memcg);
> + if (!memcg)
> + memcg = root_mem_cgroup;
> + }
> + return memcg;
> +}
> +
> /**
> * mem_cgroup_from_id - look up a memcg from a memcg id
> * @id: the memcg id to look up
> @@ -4585,40 +4603,6 @@ int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm,
> return ret;
> }
>
> -/*
> - * mem_cgroup_swapin_uncharge_swap - uncharge swap slot
> - * @entry: the first swap entry for which the pages are charged
> - * @nr_pages: number of pages which will be uncharged
> - *
> - * Call this function after successfully adding the charged page to swapcache.
> - *
> - * Note: This function assumes the page for which swap slot is being uncharged
> - * is order 0 page.
> - */
> -void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr_pages)
> -{
> - /*
> - * Cgroup1's unified memory+swap counter has been charged with the
> - * new swapcache page, finish the transfer by uncharging the swap
> - * slot. The swap slot would also get uncharged when it dies, but
> - * it can stick around indefinitely and we'd count the page twice
> - * the entire time.
> - *
> - * Cgroup2 has separate resource counters for memory and swap,
> - * so this is a non-issue here. Memory and swap charge lifetimes
> - * correspond 1:1 to page and swap slot lifetimes: we charge the
> - * page to memory here, and uncharge swap when the slot is freed.
> - */
> - if (do_memsw_account()) {
> - /*
> - * The swap entry might not get freed for a long time,
> - * let's not wait for it. The page already received a
> - * memory+swap charge, drop the swap entry duplicate.
> - */
> - mem_cgroup_uncharge_swap(entry, nr_pages);
> - }
> -}
> -
> struct uncharge_gather {
> struct mem_cgroup *memcg;
> unsigned long nr_memory;
> @@ -4944,81 +4928,6 @@ static int __init mem_cgroup_init(void)
> subsys_initcall(mem_cgroup_init);
>
> #ifdef CONFIG_SWAP
> -static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
> -{
> - while (!refcount_inc_not_zero(&memcg->id.ref)) {
> - /*
> - * The root cgroup cannot be destroyed, so it's refcount must
> - * always be >= 1.
> - */
> - if (WARN_ON_ONCE(mem_cgroup_is_root(memcg))) {
> - VM_BUG_ON(1);
> - break;
> - }
> - memcg = parent_mem_cgroup(memcg);
> - if (!memcg)
> - memcg = root_mem_cgroup;
> - }
> - return memcg;
> -}
> -
> -/**
> - * mem_cgroup_swapout - transfer a memsw charge to swap
> - * @folio: folio whose memsw charge to transfer
> - * @entry: swap entry to move the charge to
> - *
> - * Transfer the memsw charge of @folio to @entry.
> - */
> -void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
> -{
> - struct mem_cgroup *memcg, *swap_memcg;
> - unsigned int nr_entries;
> -
> - VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
> - VM_BUG_ON_FOLIO(folio_ref_count(folio), folio);
> -
> - if (mem_cgroup_disabled())
> - return;
> -
> - if (!do_memsw_account())
> - return;
> -
> - memcg = folio_memcg(folio);
> -
> - VM_WARN_ON_ONCE_FOLIO(!memcg, folio);
> - if (!memcg)
> - return;
> -
> - /*
> - * In case the memcg owning these pages has been offlined and doesn't
> - * have an ID allocated to it anymore, charge the closest online
> - * ancestor for the swap instead and transfer the memory+swap charge.
> - */
> - swap_memcg = mem_cgroup_id_get_online(memcg);
> - nr_entries = folio_nr_pages(folio);
> - /* Get references for the tail pages, too */
> - if (nr_entries > 1)
> - mem_cgroup_id_get_many(swap_memcg, nr_entries - 1);
> - mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries);
> -
> - swap_cgroup_record(folio, entry);
> -
> - folio_unqueue_deferred_split(folio);
> - folio->memcg_data = 0;
> -
> - if (!mem_cgroup_is_root(memcg))
> - page_counter_uncharge(&memcg->memory, nr_entries);
> -
> - if (memcg != swap_memcg) {
> - if (!mem_cgroup_is_root(swap_memcg))
> - page_counter_charge(&swap_memcg->memsw, nr_entries);
> - page_counter_uncharge(&memcg->memsw, nr_entries);
> - }
> -
> - memcg1_swapout(folio, memcg);
> - css_put(&memcg->css);
> -}
> -
> /**
> * __mem_cgroup_try_charge_swap - try charging swap space for a folio
> * @folio: folio being added to swap
> diff --git a/mm/memory.c b/mm/memory.c
> index 2a20e3810534..708ae27673b1 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -4393,7 +4393,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
> }
> need_clear_cache = true;
>
> - mem_cgroup_swapin_uncharge_swap(entry, nr_pages);
> + memcg1_swapin(entry, nr_pages);
>
> shadow = get_shadow_from_swap_cache(entry);
> if (shadow)
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 44379bee5b96..d885ecb6fe1e 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -2017,7 +2017,7 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode,
> __folio_set_swapbacked(new);
> new->swap = entry;
>
> - mem_cgroup_swapin_uncharge_swap(entry, nr_pages);
> + memcg1_swapin(entry, nr_pages);
> shadow = get_shadow_from_swap_cache(entry);
> if (shadow)
> workingset_refault(new, shadow);
> diff --git a/mm/swap_state.c b/mm/swap_state.c
> index ca42b2be64d9..2e1acb210e57 100644
> --- a/mm/swap_state.c
> +++ b/mm/swap_state.c
> @@ -521,7 +521,7 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
> if (add_to_swap_cache(new_folio, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow))
> goto fail_unlock;
>
> - mem_cgroup_swapin_uncharge_swap(entry, 1);
> + memcg1_swapin(entry, 1);
>
> if (shadow)
> workingset_refault(new_folio, shadow);
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 3bbe917b6a34..b2b2f27b10a0 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -769,7 +769,7 @@ static int __remove_mapping(struct address_space *mapping, struct folio *folio,
> if (reclaimed && !mapping_exiting(mapping))
> shadow = workingset_eviction(folio, target_memcg);
> __delete_from_swap_cache(folio, swap, shadow);
> - mem_cgroup_swapout(folio, swap);
> + memcg1_swapout(folio, swap);
> xa_unlock_irq(&mapping->i_pages);
> put_swap_folio(folio, swap);
> } else {
> --
> 2.48.1
--
Michal Hocko
SUSE Labs
Powered by blists - more mailing lists