lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20231003171329.GB314430@monkey>
Date:   Tue, 3 Oct 2023 10:13:29 -0700
From:   Mike Kravetz <mike.kravetz@...cle.com>
To:     Nhat Pham <nphamcs@...il.com>
Cc:     akpm@...ux-foundation.org, riel@...riel.com, hannes@...xchg.org,
        mhocko@...nel.org, roman.gushchin@...ux.dev, shakeelb@...gle.com,
        muchun.song@...ux.dev, tj@...nel.org, lizefan.x@...edance.com,
        shuah@...nel.org, yosryahmed@...gle.com, fvdl@...gle.com,
        linux-mm@...ck.org, kernel-team@...a.com,
        linux-kernel@...r.kernel.org, cgroups@...r.kernel.org
Subject: Re: [PATCH v3 2/3] hugetlb: memcg: account hugetlb-backed memory in
 memory controller

On 10/02/23 17:18, Nhat Pham wrote:
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index de220e3ff8be..74472e911b0a 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1902,6 +1902,7 @@ void free_huge_folio(struct folio *folio)
>  				     pages_per_huge_page(h), folio);
>  	hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
>  					  pages_per_huge_page(h), folio);
> +	mem_cgroup_uncharge(folio);
>  	if (restore_reserve)
>  		h->resv_huge_pages++;
>  
> @@ -3009,11 +3010,20 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
>  	struct hugepage_subpool *spool = subpool_vma(vma);
>  	struct hstate *h = hstate_vma(vma);
>  	struct folio *folio;
> -	long map_chg, map_commit;
> +	long map_chg, map_commit, nr_pages = pages_per_huge_page(h);
>  	long gbl_chg;
> -	int ret, idx;
> +	int memcg_charge_ret, ret, idx;
>  	struct hugetlb_cgroup *h_cg = NULL;
> +	struct mem_cgroup *memcg;
>  	bool deferred_reserve;
> +	gfp_t gfp = htlb_alloc_mask(h) | __GFP_RETRY_MAYFAIL;
> +
> +	memcg = get_mem_cgroup_from_current();
> +	memcg_charge_ret = mem_cgroup_hugetlb_try_charge(memcg, gfp, nr_pages);
> +	if (memcg_charge_ret == -ENOMEM) {
> +		mem_cgroup_put(memcg);
> +		return ERR_PTR(-ENOMEM);
> +	}
>  
>  	idx = hstate_index(h);
>  	/*
> @@ -3022,8 +3032,12 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
>  	 * code of zero indicates a reservation exists (no change).
>  	 */
>  	map_chg = gbl_chg = vma_needs_reservation(h, vma, addr);
> -	if (map_chg < 0)
> +	if (map_chg < 0) {
> +		if (!memcg_charge_ret)
> +			mem_cgroup_cancel_charge(memcg, nr_pages);
> +		mem_cgroup_put(memcg);
>  		return ERR_PTR(-ENOMEM);
> +	}
>  
>  	/*
>  	 * Processes that did not create the mapping will have no
> @@ -3034,10 +3048,8 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
>  	 */
>  	if (map_chg || avoid_reserve) {
>  		gbl_chg = hugepage_subpool_get_pages(spool, 1);
> -		if (gbl_chg < 0) {
> -			vma_end_reservation(h, vma, addr);
> -			return ERR_PTR(-ENOSPC);
> -		}
> +		if (gbl_chg < 0)
> +			goto out_end_reservation;
>  
>  		/*
>  		 * Even though there was no reservation in the region/reserve
> @@ -3119,6 +3131,11 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
>  			hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
>  					pages_per_huge_page(h), folio);
>  	}
> +
> +	if (!memcg_charge_ret)
> +		mem_cgroup_commit_charge(folio, memcg);
> +	mem_cgroup_put(memcg);
> +
>  	return folio;
>  
>  out_uncharge_cgroup:
> @@ -3130,7 +3147,11 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
>  out_subpool_put:
>  	if (map_chg || avoid_reserve)
>  		hugepage_subpool_put_pages(spool, 1);
> +out_end_reservation:
>  	vma_end_reservation(h, vma, addr);
> +	if (!memcg_charge_ret)
> +		mem_cgroup_cancel_charge(memcg, nr_pages);
> +	mem_cgroup_put(memcg);
>  	return ERR_PTR(-ENOSPC);
>  }
>  

IIUC, huge page usage is charged in alloc_hugetlb_folio and uncharged in
free_huge_folio.  During migration, huge pages are allocated via
alloc_migrate_hugetlb_folio, not alloc_hugetlb_folio.  So, there is no
charging for the migration target page and we uncharge the source page.
It looks like there will be no charge for the huge page after migration?

If my analysis above is correct, then we may need to be careful about
this accounting.  We may not want both source and target pages to be
charged at the same time.
-- 
Mike Kravetz

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ