lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <bee60747-dbad-cfc8-b0a3-a8ac4b1b0d0e@bytedance.com>
Date:   Wed, 10 Feb 2021 10:46:07 +0800
From:   Chengming Zhou <zhouchengming@...edance.com>
To:     Johannes Weiner <hannes@...xchg.org>
Cc:     mingo@...hat.com, peterz@...radead.org, juri.lelli@...hat.com,
        vincent.guittot@...aro.org, rostedt@...dmis.org,
        linux-kernel@...r.kernel.org, songmuchun@...edance.com
Subject: Re: [External] Re: [PATCH v2] psi: Remove the redundant psi_task_tick

Hello Johannes,

在 2021/2/9 下午11:48, Johannes Weiner 写道:
> Hello Chengming,
>
> On Tue, Feb 09, 2021 at 03:10:33PM +0800, Chengming Zhou wrote:
>> When the current task in a cgroup is in_memstall, the corresponding groupc
>> on that cpu is in PSI_MEM_FULL state, so we can exploit that to remove the
>> redundant psi_task_tick from scheduler_tick to save this periodic cost.
> Can you please update the patch name and the changelog to the new
> version of the patch? It's not removing the redundant tick, it's
> moving the reclaim detection from the timer tick to the task state
> tracking machinery using the recently added ONCPU state.

Yes, I will change the name and changelog, it will be clearer for this patch : )

>> Signed-off-by: Muchun Song <songmuchun@...edance.com>
>> Signed-off-by: Chengming Zhou <zhouchengming@...edance.com>
>> ---
>>  include/linux/psi.h  |  1 -
>>  kernel/sched/core.c  |  1 -
>>  kernel/sched/psi.c   | 49 ++++++++++++++-----------------------------------
>>  kernel/sched/stats.h |  9 ---------
>>  4 files changed, 14 insertions(+), 46 deletions(-)
>>
>> diff --git a/include/linux/psi.h b/include/linux/psi.h
>> index 7361023f3fdd..65eb1476ac70 100644
>> --- a/include/linux/psi.h
>> +++ b/include/linux/psi.h
>> @@ -20,7 +20,6 @@ void psi_task_change(struct task_struct *task, int clear, int set);
>>  void psi_task_switch(struct task_struct *prev, struct task_struct *next,
>>  		     bool sleep);
>>  
>> -void psi_memstall_tick(struct task_struct *task, int cpu);
>>  void psi_memstall_enter(unsigned long *flags);
>>  void psi_memstall_leave(unsigned long *flags);
>>  
>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index 15d2562118d1..31788a9b335b 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -4533,7 +4533,6 @@ void scheduler_tick(void)
>>  	update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure);
>>  	curr->sched_class->task_tick(rq, curr, 0);
>>  	calc_global_load_tick(rq);
>> -	psi_task_tick(rq);
>>  
>>  	rq_unlock(rq, &rf);
>>  
>> diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
>> index 2293c45d289d..6e46d9eb279b 100644
>> --- a/kernel/sched/psi.c
>> +++ b/kernel/sched/psi.c
>> @@ -644,8 +644,7 @@ static void poll_timer_fn(struct timer_list *t)
>>  	wake_up_interruptible(&group->poll_wait);
>>  }
>>  
>> -static void record_times(struct psi_group_cpu *groupc, int cpu,
>> -			 bool memstall_tick)
>> +static void record_times(struct psi_group_cpu *groupc, int cpu)
>>  {
>>  	u32 delta;
>>  	u64 now;
>> @@ -664,23 +663,6 @@ static void record_times(struct psi_group_cpu *groupc, int cpu,
>>  		groupc->times[PSI_MEM_SOME] += delta;
>>  		if (groupc->state_mask & (1 << PSI_MEM_FULL))
>>  			groupc->times[PSI_MEM_FULL] += delta;
>> -		else if (memstall_tick) {
>> -			u32 sample;
>> -			/*
>> -			 * Since we care about lost potential, a
>> -			 * memstall is FULL when there are no other
>> -			 * working tasks, but also when the CPU is
>> -			 * actively reclaiming and nothing productive
>> -			 * could run even if it were runnable.
>> -			 *
>> -			 * When the timer tick sees a reclaiming CPU,
>> -			 * regardless of runnable tasks, sample a FULL
>> -			 * tick (or less if it hasn't been a full tick
>> -			 * since the last state change).
>> -			 */
>> -			sample = min(delta, (u32)jiffies_to_nsecs(1));
>> -			groupc->times[PSI_MEM_FULL] += sample;
>> -		}
>>  	}
>>  
>>  	if (groupc->state_mask & (1 << PSI_CPU_SOME)) {
>> @@ -714,7 +696,7 @@ static void psi_group_change(struct psi_group *group, int cpu,
>>  	 */
>>  	write_seqcount_begin(&groupc->seq);
>>  
>> -	record_times(groupc, cpu, false);
>> +	record_times(groupc, cpu);
>>  
>>  	for (t = 0, m = clear; m; m &= ~(1 << t), t++) {
>>  		if (!(m & (1 << t)))
>> @@ -738,6 +720,18 @@ static void psi_group_change(struct psi_group *group, int cpu,
>>  		if (test_state(groupc->tasks, s))
>>  			state_mask |= (1 << s);
>>  	}
>> +
>> +	/*
>> +	 * Since we care about lost potential, a memstall is FULL
>> +	 * when there are no other working tasks, but also when
>> +	 * the CPU is actively reclaiming and nothing productive
>> +	 * could run even if it were runnable. So when the current
>> +	 * task in a cgroup is in_memstall, the corresponding groupc
>> +	 * on that cpu is in PSI_MEM_FULL state.
>> +	 */
>> +	if (groupc->tasks[NR_ONCPU] && cpu_curr(cpu)->in_memstall)
>> +		state_mask |= (1 << PSI_MEM_FULL);
> This doesn't really work with the psi_task_switch() optimization. If
> we switch between two tasks inside a leaf group, where one is memstall
> and the other is not, we don't update the parents properly. So you
> need another branch in there as well for checking memstall. At which
> point the timer tick implementation is likely cheaper and simpler...

Thank you for pointing out this. I will add another branch there to check memstall to update

the parents properly and send a patch-v2.

Thanks.

>> @@ -144,14 +144,6 @@ static inline void psi_sched_switch(struct task_struct *prev,
>>  	psi_task_switch(prev, next, sleep);
>>  }
>>  
>> -static inline void psi_task_tick(struct rq *rq)
>> -{
>> -	if (static_branch_likely(&psi_disabled))
>> -		return;
>> -
>> -	if (unlikely(rq->curr->in_memstall))
>> -		psi_memstall_tick(rq->curr, cpu_of(rq));
>> -}
>>  #else /* CONFIG_PSI */
>>  static inline void psi_enqueue(struct task_struct *p, bool wakeup) {}
>>  static inline void psi_dequeue(struct task_struct *p, bool sleep) {}

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ