linux-kernel - Re: [PATCH v9 2/8] hugetlb_cgroup: add interface for charge/uncharge hugetlb reservations

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.DEB.2.21.2001131642270.164268@chino.kir.corp.google.com>
Date:   Mon, 13 Jan 2020 16:45:37 -0800 (PST)
From:   David Rientjes <rientjes@...gle.com>
To:     Mike Kravetz <mike.kravetz@...cle.com>
cc:     Mina Almasry <almasrymina@...gle.com>, shuah@...nel.org,
        shakeelb@...gle.com, gthelen@...gle.com, akpm@...ux-foundation.org,
        linux-kernel@...r.kernel.org, linux-mm@...ck.org,
        linux-kselftest@...r.kernel.org, cgroups@...r.kernel.org,
        aneesh.kumar@...ux.vnet.ibm.com, mkoutny@...e.com
Subject: Re: [PATCH v9 2/8] hugetlb_cgroup: add interface for charge/uncharge
 hugetlb reservations

On Mon, 13 Jan 2020, Mike Kravetz wrote:

> > diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
> > index 35415af9ed26f..b03270b0d5833 100644
> > --- a/mm/hugetlb_cgroup.c
> > +++ b/mm/hugetlb_cgroup.c
> > @@ -96,8 +96,12 @@ static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
> >  	int idx;
> > 
> >  	for (idx = 0; idx < hugetlb_max_hstate; idx++) {
> > -		if (page_counter_read(&h_cg->hugepage[idx]))
> > +		if (page_counter_read(
> > +			    hugetlb_cgroup_get_counter(h_cg, idx, true)) ||
> > +		    page_counter_read(
> > +			    hugetlb_cgroup_get_counter(h_cg, idx, false))) {
> >  			return true;
> > +		}
> >  	}
> >  	return false;
> >  }
> > @@ -108,18 +112,32 @@ static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
> >  	int idx;
> > 
> >  	for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) {
> > -		struct page_counter *counter = &h_cgroup->hugepage[idx];
> > -		struct page_counter *parent = NULL;
> > +		struct page_counter *fault_parent = NULL;
> > +		struct page_counter *reserved_parent = NULL;
> >  		unsigned long limit;
> >  		int ret;
> > 
> > -		if (parent_h_cgroup)
> > -			parent = &parent_h_cgroup->hugepage[idx];
> > -		page_counter_init(counter, parent);
> > +		if (parent_h_cgroup) {
> > +			fault_parent = hugetlb_cgroup_get_counter(
> > +				parent_h_cgroup, idx, false);
> > +			reserved_parent = hugetlb_cgroup_get_counter(
> > +				parent_h_cgroup, idx, true);
> > +		}
> > +		page_counter_init(hugetlb_cgroup_get_counter(h_cgroup, idx,
> > +							     false),
> > +				  fault_parent);
> > +		page_counter_init(hugetlb_cgroup_get_counter(h_cgroup, idx,
> > +							     true),
> > +				  reserved_parent);
> > 
> >  		limit = round_down(PAGE_COUNTER_MAX,
> >  				   1 << huge_page_order(&hstates[idx]));
> > -		ret = page_counter_set_max(counter, limit);
> > +
> > +		ret = page_counter_set_max(
> > +			hugetlb_cgroup_get_counter(h_cgroup, idx, false),
> > +			limit);
> > +		ret = page_counter_set_max(
> > +			hugetlb_cgroup_get_counter(h_cgroup, idx, true), limit);
> >  		VM_BUG_ON(ret);
> 
> The second page_counter_set_max() call overwrites ret before the check in
> VM_BUG_ON().
> 
> >  	}
> >  }
> > @@ -149,7 +167,6 @@ static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
> >  	kfree(h_cgroup);
> >  }
> > 
> > -
> >  /*
> >   * Should be called with hugetlb_lock held.
> >   * Since we are holding hugetlb_lock, pages cannot get moved from
> > @@ -165,7 +182,7 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
> >  	struct hugetlb_cgroup *page_hcg;
> >  	struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
> > 
> > -	page_hcg = hugetlb_cgroup_from_page(page);
> > +	page_hcg = hugetlb_cgroup_from_page(page, false);
> >  	/*
> >  	 * We can have pages in active list without any cgroup
> >  	 * ie, hugepage with less than 3 pages. We can safely
> > @@ -184,7 +201,7 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
> >  	/* Take the pages off the local counter */
> >  	page_counter_cancel(counter, nr_pages);
> > 
> > -	set_hugetlb_cgroup(page, parent);
> > +	set_hugetlb_cgroup(page, parent, false);
> >  out:
> >  	return;
> >  }
> > @@ -227,7 +244,7 @@ static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx,
> >  }
> > 
> >  int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
> > -				 struct hugetlb_cgroup **ptr)
> > +				 struct hugetlb_cgroup **ptr, bool reserved)
> >  {
> >  	int ret = 0;
> >  	struct page_counter *counter;
> > @@ -250,13 +267,20 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
> >  	}
> >  	rcu_read_unlock();
> > 
> > -	if (!page_counter_try_charge(&h_cg->hugepage[idx], nr_pages,
> > -				     &counter)) {
> > +	if (!page_counter_try_charge(hugetlb_cgroup_get_counter(h_cg, idx,
> > +								reserved),
> > +				     nr_pages, &counter)) {
> >  		ret = -ENOMEM;
> >  		hugetlb_event(hugetlb_cgroup_from_counter(counter, idx), idx,
> >  			      HUGETLB_MAX);
> > +		css_put(&h_cg->css);
> > +		goto done;
> >  	}
> > -	css_put(&h_cg->css);
> > +	/* Reservations take a reference to the css because they do not get
> > +	 * reparented.
> 
> I'm hoping someone with more cgroup knowledge can comment on this and any
> consequences of not reparenting reservations.  We previously talked about
> why reparenting would be very difficult/expensive.  I understand why you are
> nopt doing it.  Just do not fully understand what needs to be done from the
> cgroup side.
> 

I don't see any description of how hugetlb_cgroup currently acts wrt 
reparenting in the last patch in the series and how this is the same or 
different for reservations.  I think the discussion that is referenced 
here is probably lost in some previous posting of the series.  I think 
it's particularly useful information that the end user will need to know 
about for its handling so it would benefit from some documentation in the 
last patch.