[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <o7dpwewfztqpkidrhvpdm57ikid4yswygag5gkjplfwdfkl54l@bs6oh2t4jp7z>
Date: Wed, 6 Nov 2024 15:50:05 -0800
From: Shakeel Butt <shakeel.butt@...ux.dev>
To: Joshua Hahn <joshua.hahnjy@...il.com>
Cc: hannes@...xchg.org, mhocko@...nel.org, roman.gushchin@...ux.dev,
muchun.song@...ux.dev, akpm@...ux-foundation.org, cgroups@...r.kernel.org,
linux-mm@...ck.org, linux-kernel@...r.kernel.org, kernel-team@...a.com
Subject: Re: [PATCH 2/2] memcg/hugetlb: Deprecate hugetlb memcg
try-commit-cancel charging
On Wed, Nov 06, 2024 at 02:14:34PM -0800, Joshua Hahn wrote:
> This patch deprecates the memcg try-{commit,cancel} logic used in hugetlb.
> Instead of having three points of error for memcg accounting, the error
> patch is reduced to just one point at the end, and shares the same path
> with the hugeTLB controller as well.
>
> Please note that the hugeTLB controller still uses the try_charge to
> {commit/cancel} protocol.
>
> Signed-off-by: Joshua Hahn <joshua.hahnjy@...il.com>
>
> ---
> include/linux/memcontrol.h | 3 +--
> mm/hugetlb.c | 35 ++++++++++++-----------------------
> mm/memcontrol.c | 37 +++++++++----------------------------
> 3 files changed, 22 insertions(+), 53 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 25761d55799e..0024634d161f 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -696,8 +696,7 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm,
>
> bool memcg_accounts_hugetlb(void);
>
> -int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp,
> - long nr_pages);
> +int mem_cgroup_charge_hugetlb(struct folio *folio, gfp_t gfp);
Please cleanup mem_cgroup_cancel_charge() and mem_cgroup_commit_charge()
as well as there will be no users after this patch.
>
> int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm,
> gfp_t gfp, swp_entry_t entry);
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index fbb10e52d7ea..db9801b16d13 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -2967,21 +2967,13 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
> struct hugepage_subpool *spool = subpool_vma(vma);
> struct hstate *h = hstate_vma(vma);
> struct folio *folio;
> - long map_chg, map_commit, nr_pages = pages_per_huge_page(h);
> + long map_chg, map_commit;
> long gbl_chg;
> - int memcg_charge_ret, ret, idx;
> + int ret, idx;
> struct hugetlb_cgroup *h_cg = NULL;
> - struct mem_cgroup *memcg;
> bool deferred_reserve;
> gfp_t gfp = htlb_alloc_mask(h) | __GFP_RETRY_MAYFAIL;
>
> - memcg = get_mem_cgroup_from_current();
> - memcg_charge_ret = mem_cgroup_hugetlb_try_charge(memcg, gfp, nr_pages);
> - if (memcg_charge_ret == -ENOMEM) {
> - mem_cgroup_put(memcg);
> - return ERR_PTR(-ENOMEM);
> - }
> -
> idx = hstate_index(h);
> /*
> * Examine the region/reserve map to determine if the process
> @@ -2989,12 +2981,8 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
> * code of zero indicates a reservation exists (no change).
> */
> map_chg = gbl_chg = vma_needs_reservation(h, vma, addr);
> - if (map_chg < 0) {
> - if (!memcg_charge_ret)
> - mem_cgroup_cancel_charge(memcg, nr_pages);
> - mem_cgroup_put(memcg);
> + if (map_chg < 0)
> return ERR_PTR(-ENOMEM);
> - }
>
> /*
> * Processes that did not create the mapping will have no
> @@ -3056,6 +3044,12 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
> /* Fall through */
> }
>
> + ret = mem_cgroup_charge_hugetlb(folio, gfp);
You can not call this with hugetlb_lock held.
> + if (ret == -ENOMEM)
> + goto free_folio;
> + else if (!ret)
> + lruvec_stat_mod_folio(folio, NR_HUGETLB, pages_per_huge_page(h));
> +
> hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, folio);
> /* If allocation is not consuming a reservation, also store the
> * hugetlb_cgroup pointer on the page.
> @@ -3092,13 +3086,11 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
> }
> }
>
> - if (!memcg_charge_ret)
> - mem_cgroup_commit_charge(folio, memcg);
> - lruvec_stat_mod_folio(folio, NR_HUGETLB, pages_per_huge_page(h));
> - mem_cgroup_put(memcg);
> -
> return folio;
>
> +free_folio:
> + spin_unlock_irq(&hugetlb_lock);
> + free_huge_folio(folio);
> out_uncharge_cgroup:
> hugetlb_cgroup_uncharge_cgroup(idx, pages_per_huge_page(h), h_cg);
> out_uncharge_cgroup_reservation:
> @@ -3110,9 +3102,6 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
> hugepage_subpool_put_pages(spool, 1);
> out_end_reservation:
> vma_end_reservation(h, vma, addr);
> - if (!memcg_charge_ret)
> - mem_cgroup_cancel_charge(memcg, nr_pages);
> - mem_cgroup_put(memcg);
> return ERR_PTR(-ENOSPC);
> }
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 59dea0122579..3b728635d6aa 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1448,8 +1448,7 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
> u64 size;
>
> #ifdef CONFIG_HUGETLB_PAGE
> - if (unlikely(memory_stats[i].idx == NR_HUGETLB) &&
> - !(cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING))
> + if (unlikely(memory_stats[i].idx == NR_HUGETLB) && !memcg_accounts_hugetlb())
> continue;
> #endif
> size = memcg_page_state_output(memcg, memory_stats[i].idx);
> @@ -4506,37 +4505,19 @@ bool memcg_accounts_hugetlb(void)
> #endif
> }
>
> -/**
> - * mem_cgroup_hugetlb_try_charge - try to charge the memcg for a hugetlb folio
> - * @memcg: memcg to charge.
> - * @gfp: reclaim mode.
> - * @nr_pages: number of pages to charge.
> - *
> - * This function is called when allocating a huge page folio to determine if
> - * the memcg has the capacity for it. It does not commit the charge yet,
> - * as the hugetlb folio itself has not been obtained from the hugetlb pool.
> - *
> - * Once we have obtained the hugetlb folio, we can call
> - * mem_cgroup_commit_charge() to commit the charge. If we fail to obtain the
> - * folio, we should instead call mem_cgroup_cancel_charge() to undo the effect
> - * of try_charge().
> - *
> - * Returns 0 on success. Otherwise, an error code is returned.
> - */
> -int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp,
> - long nr_pages)
> +int mem_cgroup_charge_hugetlb(struct folio *folio, gfp_t gfp)
> {
> - /*
> - * If hugetlb memcg charging is not enabled, do not fail hugetlb allocation,
> - * but do not attempt to commit charge later (or cancel on error) either.
> - */
> - if (mem_cgroup_disabled() || !memcg ||
> - !cgroup_subsys_on_dfl(memory_cgrp_subsys) || !memcg_accounts_hugetlb())
> + struct mem_cgroup *memcg = get_mem_cgroup_from_current();
Leaking the above reference in error paths.
> +
> + if (mem_cgroup_disabled() || !memcg_accounts_hugetlb() ||
> + !memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys))
> return -EOPNOTSUPP;
>
> - if (try_charge(memcg, gfp, nr_pages))
> + if (charge_memcg(folio, memcg, gfp))
> return -ENOMEM;
>
> + mem_cgroup_put(memcg);
> +
> return 0;
> }
>
> --
> 2.43.5
>
Powered by blists - more mailing lists