lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20181113151127.GA7681@darkstar>
Date:   Tue, 13 Nov 2018 07:11:27 -0800
From:   Patrick Bellasi <patrick.bellasi@....com>
To:     Peter Zijlstra <peterz@...radead.org>
Cc:     linux-kernel@...r.kernel.org, linux-pm@...r.kernel.org,
        Ingo Molnar <mingo@...hat.com>, Tejun Heo <tj@...nel.org>,
        "Rafael J . Wysocki" <rafael.j.wysocki@...el.com>,
        Vincent Guittot <vincent.guittot@...aro.org>,
        Viresh Kumar <viresh.kumar@...aro.org>,
        Paul Turner <pjt@...gle.com>,
        Quentin Perret <quentin.perret@....com>,
        Dietmar Eggemann <dietmar.eggemann@....com>,
        Morten Rasmussen <morten.rasmussen@....com>,
        Juri Lelli <juri.lelli@...hat.com>,
        Todd Kjos <tkjos@...gle.com>,
        Joel Fernandes <joelaf@...gle.com>,
        Steve Muckle <smuckle@...gle.com>,
        Suren Baghdasaryan <surenb@...gle.com>
Subject: Re: [PATCH v5 04/15] sched/core: uclamp: add CPU's clamp groups
 refcounting

On 11-Nov 17:47, Peter Zijlstra wrote:
> On Mon, Oct 29, 2018 at 06:32:59PM +0000, Patrick Bellasi wrote:
> > +static inline void uclamp_cpu_update(struct rq *rq, unsigned int clamp_id)
> > +{
> > +	unsigned int group_id;
> > +	int max_value = 0;
> > +
> > +	for (group_id = 0; group_id < UCLAMP_GROUPS; ++group_id) {
> > +		if (!rq->uclamp.group[clamp_id][group_id].tasks)
> > +			continue;
> > +		/* Both min and max clamps are MAX aggregated */
> > +		if (max_value < rq->uclamp.group[clamp_id][group_id].value)
> > +			max_value = rq->uclamp.group[clamp_id][group_id].value;
> 
> 		max_value = max(max_value, rq->uclamp.group[clamp_id][group_id].value);

Right, I get used to this pattern to avoid write instructions.
I guess that here, being just a function local variable, we don't
really care much...

> > +		if (max_value >= SCHED_CAPACITY_SCALE)
> > +			break;
> > +	}
> > +	rq->uclamp.value[clamp_id] = max_value;
> > +}
> > +
> > +/**
> > + * uclamp_cpu_get_id(): increase reference count for a clamp group on a CPU
> > + * @p: the task being enqueued on a CPU
> > + * @rq: the CPU's rq where the clamp group has to be reference counted
> > + * @clamp_id: the clamp index to update
> > + *
> > + * Once a task is enqueued on a CPU's rq, the clamp group currently defined by
> > + * the task's uclamp::group_id is reference counted on that CPU.
> > + */
> > +static inline void uclamp_cpu_get_id(struct task_struct *p, struct rq *rq,
> > +				     unsigned int clamp_id)
> > +{
> > +	unsigned int group_id;
> > +
> > +	if (unlikely(!p->uclamp[clamp_id].mapped))
> > +		return;
> > +
> > +	group_id = p->uclamp[clamp_id].group_id;
> > +	p->uclamp[clamp_id].active = true;
> > +
> > +	rq->uclamp.group[clamp_id][group_id].tasks += 1;
> 
> 	++
> > +
> > +	if (rq->uclamp.value[clamp_id] < p->uclamp[clamp_id].value)
> > +		rq->uclamp.value[clamp_id] = p->uclamp[clamp_id].value;
> 
> 	rq->uclamp.value[clamp_id] = max(rq->uclamp.value[clamp_id],
> 					 p->uclamp[clamp_id].value);

In this case instead, since we are updating a variable visible from
other CPUs, should not be preferred to avoid assignment when not
required ?

Is the compiler is smart enough to optimize the code above?
... will check better.

> > +}
> > +
> > +/**
> > + * uclamp_cpu_put_id(): decrease reference count for a clamp group on a CPU
> > + * @p: the task being dequeued from a CPU
> > + * @rq: the CPU's rq from where the clamp group has to be released
> > + * @clamp_id: the clamp index to update
> > + *
> > + * When a task is dequeued from a CPU's rq, the CPU's clamp group reference
> > + * counted by the task is released.
> > + * If this was the last task reference coutning the current max clamp group,
> > + * then the CPU clamping is updated to find the new max for the specified
> > + * clamp index.
> > + */
> > +static inline void uclamp_cpu_put_id(struct task_struct *p, struct rq *rq,
> > +				     unsigned int clamp_id)
> > +{
> > +	unsigned int clamp_value;
> > +	unsigned int group_id;
> > +
> > +	if (unlikely(!p->uclamp[clamp_id].mapped))
> > +		return;
> > +
> > +	group_id = p->uclamp[clamp_id].group_id;
> > +	p->uclamp[clamp_id].active = false;
> > +
> 	SCHED_WARN_ON(!rq->uclamp.group[clamp_id][group_id].tasks);
> 
> > +	if (likely(rq->uclamp.group[clamp_id][group_id].tasks))
> > +		rq->uclamp.group[clamp_id][group_id].tasks -= 1;
> 
> 	--
> 
> > +#ifdef CONFIG_SCHED_DEBUG
> > +	else {
> > +		WARN(1, "invalid CPU[%d] clamp group [%u:%u] refcount\n",
> > +		     cpu_of(rq), clamp_id, group_id);
> > +	}
> > +#endif
> 
> > +
> > +	if (likely(rq->uclamp.group[clamp_id][group_id].tasks))
> > +		return;
> > +
> > +	clamp_value = rq->uclamp.group[clamp_id][group_id].value;
> > +#ifdef CONFIG_SCHED_DEBUG
> > +	if (unlikely(clamp_value > rq->uclamp.value[clamp_id])) {
> > +		WARN(1, "invalid CPU[%d] clamp group [%u:%u] value\n",
> > +		     cpu_of(rq), clamp_id, group_id);
> > +	}
> > +#endif
> 
> 	SCHED_WARN_ON(clamp_value > rq->uclamp.value[clamp_id]);
> 
> > +	if (clamp_value >= rq->uclamp.value[clamp_id])
> > +		uclamp_cpu_update(rq, clamp_id);
> > +}
> 
> > @@ -866,6 +1020,28 @@ static void uclamp_group_get(struct uclamp_se *uc_se, unsigned int clamp_id,
> >  	if (res != uc_map_old.data)
> >  		goto retry;
> >  
> > +	/* Ensure each CPU tracks the correct value for this clamp group */
> > +	if (likely(uc_map_new.se_count > 1))
> > +		goto done;
> > +	for_each_possible_cpu(cpu) {
> 
> yuck yuck yuck.. why!?

When a clamp group is released, i.e. no more SEs refcount it, that
group could be mapped in the future to a different clamp value.

Thus, when this happens, a different clamp value can be assigned to
that clamp group and we need to update the value tracked in the CPU
side data structures. That's the value actually used to figure out the
min/max clamps at enqueue/dequeue times.

However, since here we are in the slow-path, this should not be an
issue, isn't it ?

> 
> > +		struct uclamp_cpu *uc_cpu = &cpu_rq(cpu)->uclamp;
> > +
> > +		/* Refcounting is expected to be always 0 for free groups */
> > +		if (unlikely(uc_cpu->group[clamp_id][group_id].tasks)) {
> > +			uc_cpu->group[clamp_id][group_id].tasks = 0;
> > +#ifdef CONFIG_SCHED_DEBUG
> > +			WARN(1, "invalid CPU[%d] clamp group [%u:%u] refcount\n",
> > +			     cpu, clamp_id, group_id);
> > +#endif
> 
> 		SCHED_WARN_ON();
> 
> > +		}
> > +
> > +		if (uc_cpu->group[clamp_id][group_id].value == clamp_value)
> > +			continue;
> > +		uc_cpu->group[clamp_id][group_id].value = clamp_value;
> > +	}
> > +
> > +done:
> > +
> >  	/* Update SE's clamp values and attach it to new clamp group */
> >  	uc_se->value = clamp_value;
> >  	uc_se->group_id = group_id;

-- 
#include <best/regards.h>

Patrick Bellasi

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ