lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 22 Aug 2017 17:23:47 +0000
From:   "Liang, Kan" <kan.liang@...el.com>
To:     'Mel Gorman' <mgorman@...hsingularity.net>,
        'Linus Torvalds' <torvalds@...ux-foundation.org>
CC:     'Mel Gorman' <mgorman@...e.de>,
        "'Kirill A. Shutemov'" <kirill.shutemov@...ux.intel.com>,
        'Tim Chen' <tim.c.chen@...ux.intel.com>,
        'Peter Zijlstra' <peterz@...radead.org>,
        'Ingo Molnar' <mingo@...e.hu>,
        "'Andi Kleen'" <ak@...ux.intel.com>,
        'Andrew Morton' <akpm@...ux-foundation.org>,
        'Johannes Weiner' <hannes@...xchg.org>,
        'Jan Kara' <jack@...e.cz>, 'linux-mm' <linux-mm@...ck.org>,
        'Linux Kernel Mailing List' <linux-kernel@...r.kernel.org>
Subject: RE: [PATCH 1/2] sched/wait: Break up long wake list walk


> > Covering both paths would be something like the patch below which
> > spins until the page is unlocked or it should reschedule. It's not
> > even boot tested as I spent what time I had on the test case that I
> > hoped would be able to prove it really works.
> 
> I will give it a try.

Although the patch doesn't trigger watchdog, the spin lock wait time
is not small (0.45s).
It may get worse again on larger systems.


Irqsoff ftrace result.
# tracer: irqsoff
#
# irqsoff latency trace v1.1.5 on 4.13.0-rc4+
# --------------------------------------------------------------------
# latency: 451753 us, #4/4, CPU#159 | (M:desktop VP:0, KP:0, SP:0 HP:0 #P:224)
#    -----------------
#    | task: fjsctest-233851 (uid:0 nice:0 policy:0 rt_prio:0)
#    -----------------
#  => started at: wake_up_page_bit
#  => ended at:   wake_up_page_bit
#
#
#                  _------=> CPU#            
#                 / _-----=> irqs-off        
#                | / _----=> need-resched    
#                || / _---=> hardirq/softirq 
#                ||| / _--=> preempt-depth   
#                |||| /     delay            
#  cmd     pid   ||||| time  |   caller      
#     \   /      |||||  \    |   /         
   <...>-233851 159d...    0us@: _raw_spin_lock_irqsave <-wake_up_page_bit
   <...>-233851 159dN.. 451726us+: _raw_spin_unlock_irqrestore <-wake_up_page_bit
   <...>-233851 159dN.. 451754us!: trace_hardirqs_on <-wake_up_page_bit
   <...>-233851 159dN.. 451873us : <stack trace>
 => unlock_page
 => migrate_pages
 => migrate_misplaced_page
 => __handle_mm_fault
 => handle_mm_fault
 => __do_page_fault
 => do_page_fault
 => page_fault


The call stack of wait_on_page_bit_common

   100.00%  (ffffffff971b252b)
            |
            ---__spinwait_on_page_locked
               |          
               |--96.81%--__migration_entry_wait
               |          migration_entry_wait
               |          do_swap_page
               |          __handle_mm_fault
               |          handle_mm_fault
               |          __do_page_fault
               |          do_page_fault
               |          page_fault
               |          |          
               |          |--22.49%--0x123a2
               |          |          |          
               |          |           --22.34%--start_thread
               |          |          
               |          |--15.69%--0x127bc
               |          |          |          
               |          |           --13.20%--start_thread
               |          |          
               |          |--13.48%--0x12352
               |          |          |          
               |          |           --11.74%--start_thread
               |          |          
               |          |--13.43%--0x127f2
               |          |          |          
               |          |           --11.25%--start_thread
               |          |          
               |          |--10.03%--0x1285e
               |          |          |          
               |          |           --8.59%--start_thread
               |          |          
               |          |--5.90%--0x12894
               |          |          |          
               |          |           --5.03%--start_thread
               |          |          
               |          |--5.66%--0x12828
               |          |          |          
               |          |           --4.81%--start_thread
               |          |          
               |          |--5.17%--0x1233c
               |          |          |          
               |          |           --4.46%--start_thread
               |          |          
               |           --4.72%--0x2b788
               |                     |          
               |                      --4.72%--0x127a2
               |                                start_thread
               |          
                --3.19%--do_huge_pmd_numa_page
                          __handle_mm_fault
                          handle_mm_fault
                          __do_page_fault
                          do_page_fault
                          page_fault
                          0x2b788
                          0x127a2
                          start_thread


> 
> >
> > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index
> > 79b36f57c3ba..31cda1288176 100644
> > --- a/include/linux/pagemap.h
> > +++ b/include/linux/pagemap.h
> > @@ -517,6 +517,13 @@ static inline void wait_on_page_locked(struct
> > page
> > *page)
> >  		wait_on_page_bit(compound_head(page), PG_locked);  }
> >
> > +void __spinwait_on_page_locked(struct page *page); static inline void
> > +spinwait_on_page_locked(struct page *page) {
> > +	if (PageLocked(page))
> > +		__spinwait_on_page_locked(page);
> > +}
> > +
> >  static inline int wait_on_page_locked_killable(struct page *page)  {
> >  	if (!PageLocked(page))
> > diff --git a/mm/filemap.c b/mm/filemap.c index
> > a49702445ce0..c9d6f49614bc 100644
> > --- a/mm/filemap.c
> > +++ b/mm/filemap.c
> > @@ -1210,6 +1210,15 @@ int __lock_page_or_retry(struct page *page,
> > struct mm_struct *mm,
> >  	}
> >  }
> >
> > +void __spinwait_on_page_locked(struct page *page) {
> > +	do {
> > +		cpu_relax();
> > +	} while (PageLocked(page) && !cond_resched());
> > +
> > +	wait_on_page_locked(page);
> > +}
> > +
> >  /**
> >   * page_cache_next_hole - find the next hole (not-present entry)
> >   * @mapping: mapping
> > diff --git a/mm/huge_memory.c b/mm/huge_memory.c index
> > 90731e3b7e58..c7025c806420 100644
> > --- a/mm/huge_memory.c
> > +++ b/mm/huge_memory.c
> > @@ -1443,7 +1443,7 @@ int do_huge_pmd_numa_page(struct vm_fault
> *vmf,
> > pmd_t pmd)
> >  		if (!get_page_unless_zero(page))
> >  			goto out_unlock;
> >  		spin_unlock(vmf->ptl);
> > -		wait_on_page_locked(page);
> > +		spinwait_on_page_locked(page);
> >  		put_page(page);
> >  		goto out;
> >  	}
> > @@ -1480,7 +1480,7 @@ int do_huge_pmd_numa_page(struct vm_fault
> *vmf,
> > pmd_t pmd)
> >  		if (!get_page_unless_zero(page))
> >  			goto out_unlock;
> >  		spin_unlock(vmf->ptl);
> > -		wait_on_page_locked(page);
> > +		spinwait_on_page_locked(page);
> >  		put_page(page);
> >  		goto out;
> >  	}
> > diff --git a/mm/migrate.c b/mm/migrate.c index
> > e84eeb4e4356..9b6c3fc5beac 100644
> > --- a/mm/migrate.c
> > +++ b/mm/migrate.c
> > @@ -308,7 +308,7 @@ void __migration_entry_wait(struct mm_struct
> *mm,
> > pte_t *ptep,
> >  	if (!get_page_unless_zero(page))
> >  		goto out;
> >  	pte_unmap_unlock(ptep, ptl);
> > -	wait_on_page_locked(page);
> > +	spinwait_on_page_locked(page);
> >  	put_page(page);
> >  	return;
> >  out:
> >

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ