[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAKgT0UdFDcz=CQ+6mzcjh-apwy3UyPqAuOozvYr+2PSCNQrENA@mail.gmail.com>
Date: Wed, 29 Jul 2020 10:52:01 -0700
From: Alexander Duyck <alexander.duyck@...il.com>
To: Alex Shi <alex.shi@...ux.alibaba.com>
Cc: Andrew Morton <akpm@...ux-foundation.org>,
Mel Gorman <mgorman@...hsingularity.net>,
Tejun Heo <tj@...nel.org>, Hugh Dickins <hughd@...gle.com>,
Konstantin Khlebnikov <khlebnikov@...dex-team.ru>,
Daniel Jordan <daniel.m.jordan@...cle.com>,
Yang Shi <yang.shi@...ux.alibaba.com>,
Matthew Wilcox <willy@...radead.org>,
Johannes Weiner <hannes@...xchg.org>,
kbuild test robot <lkp@...el.com>,
linux-mm <linux-mm@...ck.org>,
LKML <linux-kernel@...r.kernel.org>, cgroups@...r.kernel.org,
Shakeel Butt <shakeelb@...gle.com>,
Joonsoo Kim <iamjoonsoo.kim@....com>,
Wei Yang <richard.weiyang@...il.com>,
"Kirill A. Shutemov" <kirill@...temov.name>,
Rong Chen <rong.a.chen@...el.com>,
Thomas Gleixner <tglx@...utronix.de>,
Andrey Ryabinin <aryabinin@...tuozzo.com>
Subject: Re: [PATCH v17 18/21] mm/lru: introduce the relock_page_lruvec function
On Sat, Jul 25, 2020 at 6:00 AM Alex Shi <alex.shi@...ux.alibaba.com> wrote:
>
> Use this new function to replace repeated same code, no func change.
>
> Signed-off-by: Alex Shi <alex.shi@...ux.alibaba.com>
> Cc: Johannes Weiner <hannes@...xchg.org>
> Cc: Andrew Morton <akpm@...ux-foundation.org>
> Cc: Thomas Gleixner <tglx@...utronix.de>
> Cc: Andrey Ryabinin <aryabinin@...tuozzo.com>
> Cc: Matthew Wilcox <willy@...radead.org>
> Cc: Mel Gorman <mgorman@...hsingularity.net>
> Cc: Konstantin Khlebnikov <khlebnikov@...dex-team.ru>
> Cc: Hugh Dickins <hughd@...gle.com>
> Cc: Tejun Heo <tj@...nel.org>
> Cc: linux-kernel@...r.kernel.org
> Cc: cgroups@...r.kernel.org
> Cc: linux-mm@...ck.org
> ---
> include/linux/memcontrol.h | 40 ++++++++++++++++++++++++++++++++++++++++
> mm/mlock.c | 9 +--------
> mm/swap.c | 33 +++++++--------------------------
> mm/vmscan.c | 8 +-------
> 4 files changed, 49 insertions(+), 41 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 258901021c6c..6e670f991b42 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -1313,6 +1313,46 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec,
> spin_unlock_irqrestore(&lruvec->lru_lock, flags);
> }
>
> +/* Don't lock again iff page's lruvec locked */
> +static inline struct lruvec *relock_page_lruvec_irq(struct page *page,
> + struct lruvec *locked_lruvec)
> +{
> + struct pglist_data *pgdat = page_pgdat(page);
> + bool locked;
> +
> + rcu_read_lock();
> + locked = mem_cgroup_page_lruvec(page, pgdat) == locked_lruvec;
> + rcu_read_unlock();
> +
> + if (locked)
> + return locked_lruvec;
> +
> + if (locked_lruvec)
> + unlock_page_lruvec_irq(locked_lruvec);
> +
> + return lock_page_lruvec_irq(page);
> +}
> +
> +/* Don't lock again iff page's lruvec locked */
> +static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page,
> + struct lruvec *locked_lruvec, unsigned long *flags)
> +{
> + struct pglist_data *pgdat = page_pgdat(page);
> + bool locked;
> +
> + rcu_read_lock();
> + locked = mem_cgroup_page_lruvec(page, pgdat) == locked_lruvec;
> + rcu_read_unlock();
> +
> + if (locked)
> + return locked_lruvec;
> +
> + if (locked_lruvec)
> + unlock_page_lruvec_irqrestore(locked_lruvec, *flags);
> +
> + return lock_page_lruvec_irqsave(page, flags);
> +}
> +
So looking these over they seem to be pretty inefficient for what they
do. Basically in worst case (locked_lruvec == NULL) you end up calling
mem_cgoup_page_lruvec and all the rcu_read_lock/unlock a couple times
for a single page. It might make more sense to structure this like:
if (locked_lruvec) {
if (lruvec_holds_page_lru_lock(page, locked_lruvec))
return locked_lruvec;
unlock_page_lruvec_irqrestore(locked_lruvec, *flags);
}
return lock_page_lruvec_irqsave(page, flags);
The other piece that has me scratching my head is that I wonder if we
couldn't do this without needing the rcu_read_lock. For example, what
if we were to compare the page mem_cgroup pointer to the memcg back
pointer stored in the mem_cgroup_per_node? It seems like ordering
things this way would significantly reduce the overhead due to the
pointer chasing to see if the page is in the locked lruvec or not.
> #ifdef CONFIG_CGROUP_WRITEBACK
>
> struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb);
> diff --git a/mm/mlock.c b/mm/mlock.c
> index 5d40d259a931..bc2fb3bfbe7a 100644
> --- a/mm/mlock.c
> +++ b/mm/mlock.c
> @@ -303,17 +303,10 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
> /* Phase 1: page isolation */
> for (i = 0; i < nr; i++) {
> struct page *page = pvec->pages[i];
> - struct lruvec *new_lruvec;
>
> /* block memcg change in mem_cgroup_move_account */
> lock_page_memcg(page);
> - new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
> - if (new_lruvec != lruvec) {
> - if (lruvec)
> - unlock_page_lruvec_irq(lruvec);
> - lruvec = lock_page_lruvec_irq(page);
> - }
> -
> + lruvec = relock_page_lruvec_irq(page, lruvec);
> if (TestClearPageMlocked(page)) {
> /*
> * We already have pin from follow_page_mask()
> diff --git a/mm/swap.c b/mm/swap.c
> index 09edac441eb6..6d9c7288f7de 100644
> --- a/mm/swap.c
> +++ b/mm/swap.c
> @@ -209,19 +209,12 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
>
> for (i = 0; i < pagevec_count(pvec); i++) {
> struct page *page = pvec->pages[i];
> - struct lruvec *new_lruvec;
>
> /* block memcg migration during page moving between lru */
> if (!TestClearPageLRU(page))
> continue;
>
> - new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
> - if (lruvec != new_lruvec) {
> - if (lruvec)
> - unlock_page_lruvec_irqrestore(lruvec, flags);
> - lruvec = lock_page_lruvec_irqsave(page, &flags);
> - }
> -
> + lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags);
> (*move_fn)(page, lruvec);
>
> SetPageLRU(page);
> @@ -864,17 +857,12 @@ void release_pages(struct page **pages, int nr)
> }
>
> if (PageLRU(page)) {
> - struct lruvec *new_lruvec;
> -
> - new_lruvec = mem_cgroup_page_lruvec(page,
> - page_pgdat(page));
> - if (new_lruvec != lruvec) {
> - if (lruvec)
> - unlock_page_lruvec_irqrestore(lruvec,
> - flags);
> + struct lruvec *prev_lruvec = lruvec;
> +
> + lruvec = relock_page_lruvec_irqsave(page, lruvec,
> + &flags);
> + if (prev_lruvec != lruvec)
> lock_batch = 0;
> - lruvec = lock_page_lruvec_irqsave(page, &flags);
> - }
>
> __ClearPageLRU(page);
> del_page_from_lru_list(page, lruvec, page_off_lru(page));
> @@ -980,15 +968,8 @@ void __pagevec_lru_add(struct pagevec *pvec)
>
> for (i = 0; i < pagevec_count(pvec); i++) {
> struct page *page = pvec->pages[i];
> - struct lruvec *new_lruvec;
> -
> - new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
> - if (lruvec != new_lruvec) {
> - if (lruvec)
> - unlock_page_lruvec_irqrestore(lruvec, flags);
> - lruvec = lock_page_lruvec_irqsave(page, &flags);
> - }
>
> + lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags);
> __pagevec_lru_add_fn(page, lruvec);
> }
> if (lruvec)
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 168c1659e430..bdb53a678e7e 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -4292,15 +4292,9 @@ void check_move_unevictable_pages(struct pagevec *pvec)
>
> for (i = 0; i < pvec->nr; i++) {
> struct page *page = pvec->pages[i];
> - struct lruvec *new_lruvec;
>
> pgscanned++;
> - new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
> - if (lruvec != new_lruvec) {
> - if (lruvec)
> - unlock_page_lruvec_irq(lruvec);
> - lruvec = lock_page_lruvec_irq(page);
> - }
> + lruvec = relock_page_lruvec_irq(page, lruvec);
>
> if (!PageLRU(page) || !PageUnevictable(page))
> continue;
> --
> 1.8.3.1
>
Powered by blists - more mailing lists