lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87bll6ngrr.derkling@matbug.net>
Date:   Fri, 26 Jun 2020 14:38:48 +0200
From:   Patrick Bellasi <patrick.bellasi@...bug.net>
To:     Qais Yousef <qais.yousef@....com>
Cc:     Ingo Molnar <mingo@...hat.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Valentin Schneider <valentin.schneider@....com>,
        Juri Lelli <juri.lelli@...hat.com>,
        Vincent Guittot <vincent.guittot@...aro.org>,
        Dietmar Eggemann <dietmar.eggemann@....com>,
        Steven Rostedt <rostedt@...dmis.org>,
        Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>,
        Chris Redpath <chris.redpath@....com>,
        Lukasz Luba <lukasz.luba@....com>, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v4 2/2] sched/uclamp: Protect uclamp fast path code with static key


On Thu, Jun 25, 2020 at 17:43:52 +0200, Qais Yousef <qais.yousef@....com> wrote...

[...]

> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 235b2cae00a0..e2f1fffa013c 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -794,6 +794,25 @@ unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE;
>  /* All clamps are required to be less or equal than these values */
>  static struct uclamp_se uclamp_default[UCLAMP_CNT];
>  
> +/*
> + * This static key is used to reduce the uclamp overhead in the fast path. It
> + * only disables the call to uclamp_rq_{inc, dec}() in enqueue/dequeue_task().
> + *
> + * This allows users to continue to enable uclamp in their kernel config with
> + * minimum uclamp overhead in the fast path.
> + *
> + * As soon as userspace modifies any of the uclamp knobs, the static key is
> + * enabled, since we have an actual users that make use of uclamp
> + * functionality.
> + *
> + * The knobs that would enable this static key are:
> + *
> + *   * A task modifying its uclamp value with sched_setattr().
> + *   * An admin modifying the sysctl_sched_uclamp_{min, max} via procfs.
> + *   * An admin modifying the cgroup cpu.uclamp.{min, max}
> + */
> +static DEFINE_STATIC_KEY_FALSE(sched_uclamp_used);

This is me being choosy, but given that:

---8<---
% grep -e '_used[ )]' fair.c core.c
fair.c:	return static_key_false(&__cfs_bandwidth_used);
fair.c:	static_key_slow_inc_cpuslocked(&__cfs_bandwidth_used);
fair.c:	static_key_slow_dec_cpuslocked(&__cfs_bandwidth_used);

% grep -e '_enabled[ )]' fair.c core.c
fair.c:	if (!cfs_bandwidth_used() || !cfs_rq->runtime_enabled)
fair.c:	if (!cfs_rq->runtime_enabled || cfs_rq->nr_running)
fair.c:	if (!cfs_rq->runtime_enabled || cfs_rq->curr)
fair.c:	if (likely(!cfs_rq->runtime_enabled || cfs_rq->runtime_remaining > 0))
fair.c:	cfs_rq->runtime_enabled = 0;
fair.c:		cfs_rq->runtime_enabled = cfs_b->quota != RUNTIME_INF;
fair.c:		if (!cfs_rq->runtime_enabled)
fair.c:		cfs_rq->runtime_enabled = 0;
core.c:	if (static_key_false((&paravirt_steal_rq_enabled))) {
core.c:	if (unlikely(!p->sched_class->uclamp_enabled))
core.c:	if (unlikely(!p->sched_class->uclamp_enabled))
core.c:	 * Prevent race between setting of cfs_rq->runtime_enabled and
core.c:	runtime_enabled = quota != RUNTIME_INF;
core.c:	runtime_was_enabled = cfs_b->quota != RUNTIME_INF;
core.c:	if (runtime_enabled && !runtime_was_enabled)
core.c:	if (runtime_enabled)
core.c:		cfs_rq->runtime_enabled = runtime_enabled;
core.c:	if (runtime_was_enabled && !runtime_enabled)
---8<---

even just for consistency shake, I would still prefer sched_uclamp_enabled for
the static key name.

> +
>  /* Integer rounded range for each bucket */
>  #define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, UCLAMP_BUCKETS)
>  
> @@ -994,9 +1013,16 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
>  	lockdep_assert_held(&rq->lock);
>  
>  	bucket = &uc_rq->bucket[uc_se->bucket_id];
> -	SCHED_WARN_ON(!bucket->tasks);
> -	if (likely(bucket->tasks))
> -		bucket->tasks--;
> +
> +	/*
> +	 * This could happen if sched_uclamp_used was enabled while the
> +	 * current task was running, hence we could end up with unbalanced call
> +	 * to uclamp_rq_dec_id().
> +	 */
> +	if (unlikely(!bucket->tasks))
> +		return;
> +
> +	bucket->tasks--;
>  	uc_se->active = false;

In this chunk you are indeed changing the code.

Are we sure there are not issues with patterns like:

  enqueue(taskA)
  // uclamp gets enabled
  enqueue(taskB)
  dequeue(taskA)
  // bucket->tasks is now 0
  dequeue(taskB)

TaskB has been enqueued with with uclamp enabled, thus it
has got uc_se->active=True and enforced its clamp value at RQ level.

But with your change above we don't reset that anymore.

As per my previous proposal: why not just removing the SCHED_WARN_ON?
That's the only real problem in the code above, since now we are not
more granted to have balanced inc/dec.

[...]

> +bool uclamp_is_enabled(void)
> +{
> +	return static_branch_likely(&sched_uclamp_used);
> +}
> +

The above and the following changes are not necessary if we just
add the guards at the beginning of uclamp_rq_util_with() instead of what
you do in PATCH1.

I think this will give better overheads removal by avoiding not
necessary min/max clamps for both RT and CFS.
  
> diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
> index 7fbaee24c824..3f4e296ccb67 100644
> --- a/kernel/sched/cpufreq_schedutil.c
> +++ b/kernel/sched/cpufreq_schedutil.c
> @@ -210,7 +210,7 @@ unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
>  	unsigned long dl_util, util, irq;
>  	struct rq *rq = cpu_rq(cpu);
>  
> -	if (!IS_BUILTIN(CONFIG_UCLAMP_TASK) &&
> +	if (!uclamp_is_enabled() &&
>  	    type == FREQUENCY_UTIL && rt_rq_is_runnable(&rq->rt)) {
>  		return max;
>  	}
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 1d4e94c1e5fe..aff5430331e4 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -2371,6 +2371,8 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
>  
>  	return clamp(util, min_util, max_util);
>  }
> +
> +bool uclamp_is_enabled(void);
>  #else /* CONFIG_UCLAMP_TASK */
>  static inline
>  unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
> @@ -2378,6 +2380,11 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
>  {
>  	return util;
>  }
> +
> +static inline bool uclamp_is_enabled(void)
> +{
> +	return false;
> +}
>  #endif /* CONFIG_UCLAMP_TASK */
>  
>  #ifdef arch_scale_freq_capacity

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ