lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 03 Aug 2016 00:04:52 +0200
From:	Giovanni Gherdovich <ggherdovich@...e.cz>
To:	Peter Zijlstra <peterz@...radead.org>
Cc:	Ingo Molnar <mingo@...hat.com>,
	Mike Galbraith <mgalbraith@...e.de>,
	Stanislaw Gruszka <sgruszka@...hat.com>,
	linux-kernel@...r.kernel.org, Mel Gorman <mgorman@...e.com>,
	mgorman@...hsingularity.net
Subject: Re: [PATCH] sched/cputime: Mitigate performance regression in
 times()/clock_gettime()

Hello Peter,

thank you for your reply.

On Tue, 2016-08-02 at 12:37 +0200, Peter Zijlstra wrote:
> On Tue, Jul 26, 2016 at 04:07:14PM +0200, Giovanni Gherdovich wrote:
> 
> > Signed-off-by: Mike Galbraith <mgalbraith@...e.de>
> > Signed-off-by: Giovanni Gherdovich <ggherdovich@...e.cz>
> 
> SoB chain is borken. Either Mike wrote the patch in which case you're
> missing a From: Mike header someplace, or you wrote it and Mike needs
> to be a Ack/Reviewed or somesuch.

Right. As Mike already explained, this patch is the result of him
correcting a much more involved/complicated solution I prepared to
solve the problem. I will put the "From: Mike" in v2.

> 
> > ---
> >  kernel/sched/core.c | 4 ++++
> >  1 file changed, 4 insertions(+)
> > 
> > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > index 51d7105..0ef1e69 100644
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -2998,6 +2998,10 @@ unsigned long long task_sched_runtime(struct
> > task_struct *p)
> >  	 * thread, breaking clock_gettime().
> >  	 */
> >  	if (task_current(rq, p) && task_on_rq_queued(p)) {
> > +#if defined(CONFIG_FAIR_GROUP_SCHED)
> 
> This here wants a comment on why we're doing this. Because I'm sure
> that if someone were to read this code in a few weeks they'd go
> WTF!?

I had that config variable set in the machine I was testing on, and
thought that for some reason it was related to my observations. I will
repeat the experiment without it, and if I obtain the same results I
will drop the conditional. Otherwise I will motivate its necessity.

I will submit a v2 early next week, rebasing the patch on the
forthcoming 4.8-rc1 tag and updating the experimental data.

> 
> Also, is there a possibility of manual CSE we should do?
> 
> > +		prefetch((&p->se)->cfs_rq->curr);
> > +		prefetch(&(&p->se)->cfs_rq->curr->exec_start);
> > +#endif
> >  		update_rq_clock(rq);
> >  		p->sched_class->update_curr(rq);
> >  	}

Good point. I verified and GCC 4.8.5 gets it already without hints
needed. This is the alternative code with the CSE that I compiled:

-- -- >8 -- -- >8 -- -- >8 -- -- >8 -- -- >8 -- -- >8 -- -- >8 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 51d7105..5d676db 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2998,6 +2998,11 @@ unsigned long long task_sched_runtime(struct
task_struct *p)
         * thread, breaking clock_gettime().
         */
        if (task_current(rq, p) && task_on_rq_queued(p)) {
+#if defined(CONFIG_FAIR_GROUP_SCHED)
+               struct sched_entity *curr = (&p->se)->cfs_rq->curr;
+               prefetch(curr);
+               prefetch(&curr->exec_start);
+#endif
                update_rq_clock(rq);
                p->sched_class->update_curr(rq);
        }
-- -- >8 -- -- >8 -- -- >8 -- -- >8 -- -- >8 -- -- >8 -- -- >8 

I post below the snippets of generated code with and without CSE that
I got running 'disassemble /m task_sched_runtime' in gdb; you'll see
they're identical. If you prefer the explicit hint I'll include it in
v2, but it's probably safe to say it isn't needed.

Regards,
Giovanni



with CSE: -- -- >8 -- -- >8 -- -- >8 -- -- >8 -- -- >8 -- -- >8 

3001#if defined(CONFIG_FAIR_GROUP_SCHED)
3002                struct sched_entity *curr = (&p->se)->cfs_rq->curr;

   <+117>:   mov    0x1d0(%rbx),%rdx
   <+124>:   mov    0x38(%rdx),%rdx

3003                prefetch(curr);
3004                prefetch(&curr->exec_start);
3005#endif
3006                update_rq_clock(rq);
3007                p->sched_class->update_curr(rq);

   <+144>:   mov    0x58(%rbx),%rdx
   <+148>:   mov    %rax,%rdi
   <+151>:   mov    %rax,-0x20(%rbp)
   <+155>:   callq  *0xb0(%rdx)
   <+161>:   mov    -0x20(%rbp),%rax
   <+165>:   jmp    <task_sched_runtime+66>
   <+167>:   mov    %rax,%rdi
   <+170>:   mov    %rax,-0x20(%rbp)
   <+174>:   callq  <update_rq_clock>
   <+179>:   mov    -0x20(%rbp),%rax
   <+183>:   jmp    <task_sched_runtime+144>
         :  nopl   0x0(%rax)

3008        }
3009        ns = p->se.sum_exec_runtime;

   <+66>:    mov    0xc8(%rbx),%r12

3010        task_rq_unlock(rq, p, &rf);
3011
3012        return ns;

   <+103>:   mov    %r12,%rax


w/o CSE: -- -- >8 -- -- >8 -- -- >8 -- -- >8 -- -- >8 -- -- >8

3001#if defined(CONFIG_FAIR_GROUP_SCHED)
3002                prefetch((&p->se)->cfs_rq->curr);

   <+117>:   mov    0x1d0(%rbx),%rdx
   <+124>:   mov    0x38(%rdx),%rdx

3003                prefetch(&(&p->se)->cfs_rq->curr->exec_start);
3004#endif
3005                update_rq_clock(rq);
3006                p->sched_class->update_curr(rq);

   <+144>:   mov    0x58(%rbx),%rdx
   <+148>:   mov    %rax,%rdi
   <+151>:   mov    %rax,-0x20(%rbp)
   <+155>:   callq  *0xb0(%rdx)
   <+161>:   mov    -0x20(%rbp),%rax
   <+165>:   jmp    <task_sched_runtime+66>
   <+167>:   mov    %rax,%rdi
   <+170>:   mov    %rax,-0x20(%rbp)
   <+174>:   callq  <update_rq_clock>
   <+179>:   mov    -0x20(%rbp),%rax
   <+183>:   jmp    <task_sched_runtime+144>
         :   nopl   0x0(%rax)

3007        }
3008        ns = p->se.sum_exec_runtime;

   <+66>:    mov    0xc8(%rbx),%r12

3009        task_rq_unlock(rq, p, &rf);
3010
3011        return ns;

   <+103>:   mov    %r12,%rax

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ