lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.DEB.2.21.2001131642270.164268@chino.kir.corp.google.com>
Date:   Mon, 13 Jan 2020 16:45:37 -0800 (PST)
From:   David Rientjes <rientjes@...gle.com>
To:     Mike Kravetz <mike.kravetz@...cle.com>
cc:     Mina Almasry <almasrymina@...gle.com>, shuah@...nel.org,
        shakeelb@...gle.com, gthelen@...gle.com, akpm@...ux-foundation.org,
        linux-kernel@...r.kernel.org, linux-mm@...ck.org,
        linux-kselftest@...r.kernel.org, cgroups@...r.kernel.org,
        aneesh.kumar@...ux.vnet.ibm.com, mkoutny@...e.com
Subject: Re: [PATCH v9 2/8] hugetlb_cgroup: add interface for charge/uncharge
 hugetlb reservations

On Mon, 13 Jan 2020, Mike Kravetz wrote:

> > diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
> > index 35415af9ed26f..b03270b0d5833 100644
> > --- a/mm/hugetlb_cgroup.c
> > +++ b/mm/hugetlb_cgroup.c
> > @@ -96,8 +96,12 @@ static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
> >  	int idx;
> > 
> >  	for (idx = 0; idx < hugetlb_max_hstate; idx++) {
> > -		if (page_counter_read(&h_cg->hugepage[idx]))
> > +		if (page_counter_read(
> > +			    hugetlb_cgroup_get_counter(h_cg, idx, true)) ||
> > +		    page_counter_read(
> > +			    hugetlb_cgroup_get_counter(h_cg, idx, false))) {
> >  			return true;
> > +		}
> >  	}
> >  	return false;
> >  }
> > @@ -108,18 +112,32 @@ static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
> >  	int idx;
> > 
> >  	for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) {
> > -		struct page_counter *counter = &h_cgroup->hugepage[idx];
> > -		struct page_counter *parent = NULL;
> > +		struct page_counter *fault_parent = NULL;
> > +		struct page_counter *reserved_parent = NULL;
> >  		unsigned long limit;
> >  		int ret;
> > 
> > -		if (parent_h_cgroup)
> > -			parent = &parent_h_cgroup->hugepage[idx];
> > -		page_counter_init(counter, parent);
> > +		if (parent_h_cgroup) {
> > +			fault_parent = hugetlb_cgroup_get_counter(
> > +				parent_h_cgroup, idx, false);
> > +			reserved_parent = hugetlb_cgroup_get_counter(
> > +				parent_h_cgroup, idx, true);
> > +		}
> > +		page_counter_init(hugetlb_cgroup_get_counter(h_cgroup, idx,
> > +							     false),
> > +				  fault_parent);
> > +		page_counter_init(hugetlb_cgroup_get_counter(h_cgroup, idx,
> > +							     true),
> > +				  reserved_parent);
> > 
> >  		limit = round_down(PAGE_COUNTER_MAX,
> >  				   1 << huge_page_order(&hstates[idx]));
> > -		ret = page_counter_set_max(counter, limit);
> > +
> > +		ret = page_counter_set_max(
> > +			hugetlb_cgroup_get_counter(h_cgroup, idx, false),
> > +			limit);
> > +		ret = page_counter_set_max(
> > +			hugetlb_cgroup_get_counter(h_cgroup, idx, true), limit);
> >  		VM_BUG_ON(ret);
> 
> The second page_counter_set_max() call overwrites ret before the check in
> VM_BUG_ON().
> 
> >  	}
> >  }
> > @@ -149,7 +167,6 @@ static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
> >  	kfree(h_cgroup);
> >  }
> > 
> > -
> >  /*
> >   * Should be called with hugetlb_lock held.
> >   * Since we are holding hugetlb_lock, pages cannot get moved from
> > @@ -165,7 +182,7 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
> >  	struct hugetlb_cgroup *page_hcg;
> >  	struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
> > 
> > -	page_hcg = hugetlb_cgroup_from_page(page);
> > +	page_hcg = hugetlb_cgroup_from_page(page, false);
> >  	/*
> >  	 * We can have pages in active list without any cgroup
> >  	 * ie, hugepage with less than 3 pages. We can safely
> > @@ -184,7 +201,7 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
> >  	/* Take the pages off the local counter */
> >  	page_counter_cancel(counter, nr_pages);
> > 
> > -	set_hugetlb_cgroup(page, parent);
> > +	set_hugetlb_cgroup(page, parent, false);
> >  out:
> >  	return;
> >  }
> > @@ -227,7 +244,7 @@ static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx,
> >  }
> > 
> >  int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
> > -				 struct hugetlb_cgroup **ptr)
> > +				 struct hugetlb_cgroup **ptr, bool reserved)
> >  {
> >  	int ret = 0;
> >  	struct page_counter *counter;
> > @@ -250,13 +267,20 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
> >  	}
> >  	rcu_read_unlock();
> > 
> > -	if (!page_counter_try_charge(&h_cg->hugepage[idx], nr_pages,
> > -				     &counter)) {
> > +	if (!page_counter_try_charge(hugetlb_cgroup_get_counter(h_cg, idx,
> > +								reserved),
> > +				     nr_pages, &counter)) {
> >  		ret = -ENOMEM;
> >  		hugetlb_event(hugetlb_cgroup_from_counter(counter, idx), idx,
> >  			      HUGETLB_MAX);
> > +		css_put(&h_cg->css);
> > +		goto done;
> >  	}
> > -	css_put(&h_cg->css);
> > +	/* Reservations take a reference to the css because they do not get
> > +	 * reparented.
> 
> I'm hoping someone with more cgroup knowledge can comment on this and any
> consequences of not reparenting reservations.  We previously talked about
> why reparenting would be very difficult/expensive.  I understand why you are
> nopt doing it.  Just do not fully understand what needs to be done from the
> cgroup side.
> 

I don't see any description of how hugetlb_cgroup currently acts wrt 
reparenting in the last patch in the series and how this is the same or 
different for reservations.  I think the discussion that is referenced 
here is probably lost in some previous posting of the series.  I think 
it's particularly useful information that the end user will need to know 
about for its handling so it would benefit from some documentation in the 
last patch.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ