[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <37D7C6CF3E00A74B8858931C1DB2F0775378A24A@SHSMSX103.ccr.corp.intel.com>
Date: Tue, 22 Aug 2017 17:23:47 +0000
From: "Liang, Kan" <kan.liang@...el.com>
To: 'Mel Gorman' <mgorman@...hsingularity.net>,
'Linus Torvalds' <torvalds@...ux-foundation.org>
CC: 'Mel Gorman' <mgorman@...e.de>,
"'Kirill A. Shutemov'" <kirill.shutemov@...ux.intel.com>,
'Tim Chen' <tim.c.chen@...ux.intel.com>,
'Peter Zijlstra' <peterz@...radead.org>,
'Ingo Molnar' <mingo@...e.hu>,
"'Andi Kleen'" <ak@...ux.intel.com>,
'Andrew Morton' <akpm@...ux-foundation.org>,
'Johannes Weiner' <hannes@...xchg.org>,
'Jan Kara' <jack@...e.cz>, 'linux-mm' <linux-mm@...ck.org>,
'Linux Kernel Mailing List' <linux-kernel@...r.kernel.org>
Subject: RE: [PATCH 1/2] sched/wait: Break up long wake list walk
> > Covering both paths would be something like the patch below which
> > spins until the page is unlocked or it should reschedule. It's not
> > even boot tested as I spent what time I had on the test case that I
> > hoped would be able to prove it really works.
>
> I will give it a try.
Although the patch doesn't trigger watchdog, the spin lock wait time
is not small (0.45s).
It may get worse again on larger systems.
Irqsoff ftrace result.
# tracer: irqsoff
#
# irqsoff latency trace v1.1.5 on 4.13.0-rc4+
# --------------------------------------------------------------------
# latency: 451753 us, #4/4, CPU#159 | (M:desktop VP:0, KP:0, SP:0 HP:0 #P:224)
# -----------------
# | task: fjsctest-233851 (uid:0 nice:0 policy:0 rt_prio:0)
# -----------------
# => started at: wake_up_page_bit
# => ended at: wake_up_page_bit
#
#
# _------=> CPU#
# / _-----=> irqs-off
# | / _----=> need-resched
# || / _---=> hardirq/softirq
# ||| / _--=> preempt-depth
# |||| / delay
# cmd pid ||||| time | caller
# \ / ||||| \ | /
<...>-233851 159d... 0us@: _raw_spin_lock_irqsave <-wake_up_page_bit
<...>-233851 159dN.. 451726us+: _raw_spin_unlock_irqrestore <-wake_up_page_bit
<...>-233851 159dN.. 451754us!: trace_hardirqs_on <-wake_up_page_bit
<...>-233851 159dN.. 451873us : <stack trace>
=> unlock_page
=> migrate_pages
=> migrate_misplaced_page
=> __handle_mm_fault
=> handle_mm_fault
=> __do_page_fault
=> do_page_fault
=> page_fault
The call stack of wait_on_page_bit_common
100.00% (ffffffff971b252b)
|
---__spinwait_on_page_locked
|
|--96.81%--__migration_entry_wait
| migration_entry_wait
| do_swap_page
| __handle_mm_fault
| handle_mm_fault
| __do_page_fault
| do_page_fault
| page_fault
| |
| |--22.49%--0x123a2
| | |
| | --22.34%--start_thread
| |
| |--15.69%--0x127bc
| | |
| | --13.20%--start_thread
| |
| |--13.48%--0x12352
| | |
| | --11.74%--start_thread
| |
| |--13.43%--0x127f2
| | |
| | --11.25%--start_thread
| |
| |--10.03%--0x1285e
| | |
| | --8.59%--start_thread
| |
| |--5.90%--0x12894
| | |
| | --5.03%--start_thread
| |
| |--5.66%--0x12828
| | |
| | --4.81%--start_thread
| |
| |--5.17%--0x1233c
| | |
| | --4.46%--start_thread
| |
| --4.72%--0x2b788
| |
| --4.72%--0x127a2
| start_thread
|
--3.19%--do_huge_pmd_numa_page
__handle_mm_fault
handle_mm_fault
__do_page_fault
do_page_fault
page_fault
0x2b788
0x127a2
start_thread
>
> >
> > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index
> > 79b36f57c3ba..31cda1288176 100644
> > --- a/include/linux/pagemap.h
> > +++ b/include/linux/pagemap.h
> > @@ -517,6 +517,13 @@ static inline void wait_on_page_locked(struct
> > page
> > *page)
> > wait_on_page_bit(compound_head(page), PG_locked); }
> >
> > +void __spinwait_on_page_locked(struct page *page); static inline void
> > +spinwait_on_page_locked(struct page *page) {
> > + if (PageLocked(page))
> > + __spinwait_on_page_locked(page);
> > +}
> > +
> > static inline int wait_on_page_locked_killable(struct page *page) {
> > if (!PageLocked(page))
> > diff --git a/mm/filemap.c b/mm/filemap.c index
> > a49702445ce0..c9d6f49614bc 100644
> > --- a/mm/filemap.c
> > +++ b/mm/filemap.c
> > @@ -1210,6 +1210,15 @@ int __lock_page_or_retry(struct page *page,
> > struct mm_struct *mm,
> > }
> > }
> >
> > +void __spinwait_on_page_locked(struct page *page) {
> > + do {
> > + cpu_relax();
> > + } while (PageLocked(page) && !cond_resched());
> > +
> > + wait_on_page_locked(page);
> > +}
> > +
> > /**
> > * page_cache_next_hole - find the next hole (not-present entry)
> > * @mapping: mapping
> > diff --git a/mm/huge_memory.c b/mm/huge_memory.c index
> > 90731e3b7e58..c7025c806420 100644
> > --- a/mm/huge_memory.c
> > +++ b/mm/huge_memory.c
> > @@ -1443,7 +1443,7 @@ int do_huge_pmd_numa_page(struct vm_fault
> *vmf,
> > pmd_t pmd)
> > if (!get_page_unless_zero(page))
> > goto out_unlock;
> > spin_unlock(vmf->ptl);
> > - wait_on_page_locked(page);
> > + spinwait_on_page_locked(page);
> > put_page(page);
> > goto out;
> > }
> > @@ -1480,7 +1480,7 @@ int do_huge_pmd_numa_page(struct vm_fault
> *vmf,
> > pmd_t pmd)
> > if (!get_page_unless_zero(page))
> > goto out_unlock;
> > spin_unlock(vmf->ptl);
> > - wait_on_page_locked(page);
> > + spinwait_on_page_locked(page);
> > put_page(page);
> > goto out;
> > }
> > diff --git a/mm/migrate.c b/mm/migrate.c index
> > e84eeb4e4356..9b6c3fc5beac 100644
> > --- a/mm/migrate.c
> > +++ b/mm/migrate.c
> > @@ -308,7 +308,7 @@ void __migration_entry_wait(struct mm_struct
> *mm,
> > pte_t *ptep,
> > if (!get_page_unless_zero(page))
> > goto out;
> > pte_unmap_unlock(ptep, ptl);
> > - wait_on_page_locked(page);
> > + spinwait_on_page_locked(page);
> > put_page(page);
> > return;
> > out:
> >
Powered by blists - more mailing lists