[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.DEB.2.21.2001131642270.164268@chino.kir.corp.google.com>
Date: Mon, 13 Jan 2020 16:45:37 -0800 (PST)
From: David Rientjes <rientjes@...gle.com>
To: Mike Kravetz <mike.kravetz@...cle.com>
cc: Mina Almasry <almasrymina@...gle.com>, shuah@...nel.org,
shakeelb@...gle.com, gthelen@...gle.com, akpm@...ux-foundation.org,
linux-kernel@...r.kernel.org, linux-mm@...ck.org,
linux-kselftest@...r.kernel.org, cgroups@...r.kernel.org,
aneesh.kumar@...ux.vnet.ibm.com, mkoutny@...e.com
Subject: Re: [PATCH v9 2/8] hugetlb_cgroup: add interface for charge/uncharge
hugetlb reservations
On Mon, 13 Jan 2020, Mike Kravetz wrote:
> > diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
> > index 35415af9ed26f..b03270b0d5833 100644
> > --- a/mm/hugetlb_cgroup.c
> > +++ b/mm/hugetlb_cgroup.c
> > @@ -96,8 +96,12 @@ static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
> > int idx;
> >
> > for (idx = 0; idx < hugetlb_max_hstate; idx++) {
> > - if (page_counter_read(&h_cg->hugepage[idx]))
> > + if (page_counter_read(
> > + hugetlb_cgroup_get_counter(h_cg, idx, true)) ||
> > + page_counter_read(
> > + hugetlb_cgroup_get_counter(h_cg, idx, false))) {
> > return true;
> > + }
> > }
> > return false;
> > }
> > @@ -108,18 +112,32 @@ static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
> > int idx;
> >
> > for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) {
> > - struct page_counter *counter = &h_cgroup->hugepage[idx];
> > - struct page_counter *parent = NULL;
> > + struct page_counter *fault_parent = NULL;
> > + struct page_counter *reserved_parent = NULL;
> > unsigned long limit;
> > int ret;
> >
> > - if (parent_h_cgroup)
> > - parent = &parent_h_cgroup->hugepage[idx];
> > - page_counter_init(counter, parent);
> > + if (parent_h_cgroup) {
> > + fault_parent = hugetlb_cgroup_get_counter(
> > + parent_h_cgroup, idx, false);
> > + reserved_parent = hugetlb_cgroup_get_counter(
> > + parent_h_cgroup, idx, true);
> > + }
> > + page_counter_init(hugetlb_cgroup_get_counter(h_cgroup, idx,
> > + false),
> > + fault_parent);
> > + page_counter_init(hugetlb_cgroup_get_counter(h_cgroup, idx,
> > + true),
> > + reserved_parent);
> >
> > limit = round_down(PAGE_COUNTER_MAX,
> > 1 << huge_page_order(&hstates[idx]));
> > - ret = page_counter_set_max(counter, limit);
> > +
> > + ret = page_counter_set_max(
> > + hugetlb_cgroup_get_counter(h_cgroup, idx, false),
> > + limit);
> > + ret = page_counter_set_max(
> > + hugetlb_cgroup_get_counter(h_cgroup, idx, true), limit);
> > VM_BUG_ON(ret);
>
> The second page_counter_set_max() call overwrites ret before the check in
> VM_BUG_ON().
>
> > }
> > }
> > @@ -149,7 +167,6 @@ static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
> > kfree(h_cgroup);
> > }
> >
> > -
> > /*
> > * Should be called with hugetlb_lock held.
> > * Since we are holding hugetlb_lock, pages cannot get moved from
> > @@ -165,7 +182,7 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
> > struct hugetlb_cgroup *page_hcg;
> > struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
> >
> > - page_hcg = hugetlb_cgroup_from_page(page);
> > + page_hcg = hugetlb_cgroup_from_page(page, false);
> > /*
> > * We can have pages in active list without any cgroup
> > * ie, hugepage with less than 3 pages. We can safely
> > @@ -184,7 +201,7 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
> > /* Take the pages off the local counter */
> > page_counter_cancel(counter, nr_pages);
> >
> > - set_hugetlb_cgroup(page, parent);
> > + set_hugetlb_cgroup(page, parent, false);
> > out:
> > return;
> > }
> > @@ -227,7 +244,7 @@ static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx,
> > }
> >
> > int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
> > - struct hugetlb_cgroup **ptr)
> > + struct hugetlb_cgroup **ptr, bool reserved)
> > {
> > int ret = 0;
> > struct page_counter *counter;
> > @@ -250,13 +267,20 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
> > }
> > rcu_read_unlock();
> >
> > - if (!page_counter_try_charge(&h_cg->hugepage[idx], nr_pages,
> > - &counter)) {
> > + if (!page_counter_try_charge(hugetlb_cgroup_get_counter(h_cg, idx,
> > + reserved),
> > + nr_pages, &counter)) {
> > ret = -ENOMEM;
> > hugetlb_event(hugetlb_cgroup_from_counter(counter, idx), idx,
> > HUGETLB_MAX);
> > + css_put(&h_cg->css);
> > + goto done;
> > }
> > - css_put(&h_cg->css);
> > + /* Reservations take a reference to the css because they do not get
> > + * reparented.
>
> I'm hoping someone with more cgroup knowledge can comment on this and any
> consequences of not reparenting reservations. We previously talked about
> why reparenting would be very difficult/expensive. I understand why you are
> nopt doing it. Just do not fully understand what needs to be done from the
> cgroup side.
>
I don't see any description of how hugetlb_cgroup currently acts wrt
reparenting in the last patch in the series and how this is the same or
different for reservations. I think the discussion that is referenced
here is probably lost in some previous posting of the series. I think
it's particularly useful information that the end user will need to know
about for its handling so it would benefit from some documentation in the
last patch.
Powered by blists - more mailing lists