lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 24 Mar 2011 12:06:43 +0530
From:	Bharata B Rao <bharata@...ux.vnet.ibm.com>
To:	Paul Turner <pjt@...gle.com>
Cc:	linux-kernel@...r.kernel.org,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Dhaval Giani <dhaval.giani@...il.com>,
	Balbir Singh <balbir@...ux.vnet.ibm.com>,
	Vaidyanathan Srinivasan <svaidy@...ux.vnet.ibm.com>,
	Srivatsa Vaddagiri <vatsa@...ibm.com>,
	Kamalesh Babulal <kamalesh@...ux.vnet.ibm.com>,
	Ingo Molnar <mingo@...e.hu>,
	Pavel Emelyanov <xemul@...nvz.org>,
	Nikhil Rao <ncrao@...gle.com>
Subject: Re: [patch 04/15] sched: throttle cfs_rq entities which exceed their
 local quota

On Tue, Mar 22, 2011 at 08:03:30PM -0700, Paul Turner wrote:
> In account_cfs_rq_quota() (via update_curr()) we track consumption versus a
> cfs_rqs locally assigned quota and whether there is global quota available 
> to provide a refill when it runs out.
> 
> In the case that there is no quota remaining it's necessary to throttle so
> that execution ceases until the susbequent period.  While it is at this
> boundary that we detect (and signal for, via reshed_task) that a throttle is
> required, the actual operation is deferred until put_prev_entity().
> 
> At this point the cfs_rq is marked as throttled and not re-enqueued, this
> avoids potential interactions with throttled runqueues in the event that we
> are not immediately able to evict the running task.
> 
> Signed-off-by: Paul Turner <pjt@...gle.com>
> Signed-off-by: Nikhil Rao <ncrao@...gle.com>
> Signed-off-by: Bharata B Rao <bharata@...ux.vnet.ibm.com>
> ---
>  kernel/sched.c      |    2 
>  kernel/sched_fair.c |  117 +++++++++++++++++++++++++++++++++++++++++++++++++---
>  2 files changed, 113 insertions(+), 6 deletions(-)
> 
> Index: tip/kernel/sched.c
> ===================================================================
> --- tip.orig/kernel/sched.c
> +++ tip/kernel/sched.c
> @@ -386,7 +386,7 @@ struct cfs_rq {
>  	unsigned long load_contribution;
>  #endif
>  #ifdef CONFIG_CFS_BANDWIDTH
> -	int quota_enabled;
> +	int quota_enabled, throttled;
>  	s64 quota_remaining;
>  #endif
>  #endif
> Index: tip/kernel/sched_fair.c
> ===================================================================
> --- tip.orig/kernel/sched_fair.c
> +++ tip/kernel/sched_fair.c
> @@ -321,9 +321,6 @@ find_matching_se(struct sched_entity **s
> 
>  #endif	/* CONFIG_FAIR_GROUP_SCHED */
> 
> -static void account_cfs_rq_quota(struct cfs_rq *cfs_rq,
> -		                 unsigned long delta_exec);
> -
>  /**************************************************************
>   * Scheduling class tree data structure manipulation methods:
>   */
> @@ -588,6 +585,9 @@ __update_curr(struct cfs_rq *cfs_rq, str
>  #endif
>  }
> 
> +static void account_cfs_rq_quota(struct cfs_rq *cfs_rq,
> +		unsigned long delta_exec);
> +
>  static void update_curr(struct cfs_rq *cfs_rq)
>  {
>  	struct sched_entity *curr = cfs_rq->curr;
> @@ -1221,6 +1221,9 @@ static struct sched_entity *pick_next_en
>  	return se;
>  }
> 
> +static void throttle_cfs_rq(struct cfs_rq *cfs_rq);
> +static inline int within_bandwidth(struct cfs_rq *cfs_rq);
> +
>  static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
>  {
>  	/*
> @@ -1230,6 +1233,9 @@ static void put_prev_entity(struct cfs_r
>  	if (prev->on_rq)
>  		update_curr(cfs_rq);
> 
> +	if (!within_bandwidth(cfs_rq))
> +		throttle_cfs_rq(cfs_rq);
> +
>  	check_spread(cfs_rq, prev);
>  	if (prev->on_rq) {
>  		update_stats_wait_start(cfs_rq, prev);
> @@ -1241,6 +1247,8 @@ static void put_prev_entity(struct cfs_r
>  	cfs_rq->curr = NULL;
>  }
> 
> +static void check_cfs_rq_quota(struct cfs_rq *cfs_rq);
> +
>  static void
>  entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
>  {
> @@ -1249,6 +1257,9 @@ entity_tick(struct cfs_rq *cfs_rq, struc
>  	 */
>  	update_curr(cfs_rq);
> 
> +	/* check that entity's usage is still within quota (if enabled) */
> +	check_cfs_rq_quota(cfs_rq);
> +
>  	/*
>  	 * Update share accounting for long-running entities.
>  	 */
> @@ -1294,6 +1305,46 @@ static inline u64 sched_cfs_bandwidth_sl
>         return (u64)sysctl_sched_cfs_bandwidth_slice * NSEC_PER_USEC;
>  }
> 
> +static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
> +{
> +	return cfs_rq->throttled;
> +}
> +
> +static inline int throttled_hierarchy(struct cfs_rq *cfs_rq)
> +{
> +	struct task_group *tg;
> +	struct sched_entity *se;
> +
> +	if (cfs_rq_throttled(cfs_rq))
> +		return 1;
> +
> +	tg = cfs_rq->tg;
> +	se = tg->se[cpu_of(rq_of(cfs_rq))];
> +	if (!se)
> +		return 0;
> +
> +	for_each_sched_entity(se) {
> +		if (cfs_rq_throttled(cfs_rq_of(se)))
> +			return 1;
> +	}
> +
> +	return 0;
> +}
> +
> +static inline int within_bandwidth(struct cfs_rq *cfs_rq)
> +{
> +	return !cfs_rq->quota_enabled || cfs_rq->quota_remaining > 0;
> +}
> +
> +static void check_cfs_rq_quota(struct cfs_rq *cfs_rq)
> +{
> +	if (within_bandwidth(cfs_rq))
> +		return;
> +
> +
> +	resched_task(rq_of(cfs_rq)->curr);
> +}
> +
>  static void request_cfs_rq_quota(struct cfs_rq *cfs_rq)
>  {
>  	struct task_group *tg = cfs_rq->tg;
> @@ -1330,6 +1381,29 @@ static void account_cfs_rq_quota(struct 
>  	request_cfs_rq_quota(cfs_rq);
>  }
> 
> +static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
> +{
> +	struct sched_entity *se;
> +
> +	se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
> +
> +	/* account load preceding throttle */
> +	update_cfs_load(cfs_rq, 0);
> +
> +	for_each_sched_entity(se) {
> +		struct cfs_rq *qcfs_rq = cfs_rq_of(se);
> +		/* throttled entity or throttle-on-deactivate */
> +		if (!se->on_rq)
> +			break;
> +
> +		dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP);
> +		if (qcfs_rq->load.weight)
> +			break;
> +	}
> +
> +	cfs_rq->throttled = 1;
> +}

Since throttling is done from put_prev_entity(), iiuc, you will be
doing 'put' for current entities which are not on the tree. Can you
avoid the dequeue_entity() call here which I think will anyway bail out
from actual dequeueing (se != cfs_rq->curr check in dequeue_entity).

Regards,
Bharata.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ