linux-kernel - Re: [PATCH v18 06/32] mm/thp: narrow lru locking

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200910134923.GR6583@casper.infradead.org>
Date:   Thu, 10 Sep 2020 14:49:23 +0100
From:   Matthew Wilcox <willy@...radead.org>
To:     Alex Shi <alex.shi@...ux.alibaba.com>
Cc:     akpm@...ux-foundation.org, mgorman@...hsingularity.net,
        tj@...nel.org, hughd@...gle.com, khlebnikov@...dex-team.ru,
        daniel.m.jordan@...cle.com, hannes@...xchg.org, lkp@...el.com,
        linux-mm@...ck.org, linux-kernel@...r.kernel.org,
        cgroups@...r.kernel.org, shakeelb@...gle.com,
        iamjoonsoo.kim@....com, richard.weiyang@...il.com,
        kirill@...temov.name, alexander.duyck@...il.com,
        rong.a.chen@...el.com, mhocko@...e.com, vdavydov.dev@...il.com,
        shy828301@...il.com, Andrea Arcangeli <aarcange@...hat.com>
Subject: Re: [PATCH v18 06/32] mm/thp: narrow lru locking

On Mon, Aug 24, 2020 at 08:54:39PM +0800, Alex Shi wrote:
> lru_lock and page cache xa_lock have no reason with current sequence,
> put them together isn't necessary. let's narrow the lru locking, but
> left the local_irq_disable to block interrupt re-entry and statistic update.

What stats are you talking about here?

> +++ b/mm/huge_memory.c
> @@ -2397,7 +2397,7 @@ static void __split_huge_page_tail(struct page *head, int tail,
>  }
>  
>  static void __split_huge_page(struct page *page, struct list_head *list,
> -		pgoff_t end, unsigned long flags)
> +			      pgoff_t end)

Please don't change this whitespace.  It's really annoying having to
adjust the whitespace when renaming a function.  Just two tabs indentation
to give a clear separation of arguments from code is fine.


How about this patch instead?  It occurred to me we already have
perfectly good infrastructure to track whether or not interrupts are
already disabled, and so we should use that instead of ensuring that
interrupts are disabled, or tracking that ourselves.

But I may have missed something else that's relying on having
interrupts disabled.  Please check carefully.

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2ccff8472cd4..74cae6c032f9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2376,17 +2376,16 @@ static void __split_huge_page_tail(struct page *head, int tail,
 }
 
 static void __split_huge_page(struct page *page, struct list_head *list,
-		pgoff_t end, unsigned long flags)
+		pgoff_t end)
 {
 	struct page *head = compound_head(page);
 	pg_data_t *pgdat = page_pgdat(head);
 	struct lruvec *lruvec;
 	struct address_space *swap_cache = NULL;
 	unsigned long offset = 0;
+	unsigned long flags;
 	int i;
 
-	lruvec = mem_cgroup_page_lruvec(head, pgdat);
-
 	/* complete memcg works before add pages to LRU */
 	mem_cgroup_split_huge_fixup(head);
 
@@ -2395,9 +2394,13 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 
 		offset = swp_offset(entry);
 		swap_cache = swap_address_space(entry);
-		xa_lock(&swap_cache->i_pages);
+		xa_lock_irq(&swap_cache->i_pages);
 	}
 
+	/* prevent PageLRU to go away from under us, and freeze lru stats */
+	spin_lock_irqsave(&pgdat->lru_lock, flags);
+	lruvec = mem_cgroup_page_lruvec(head, pgdat);
+
 	for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
 		__split_huge_page_tail(head, i, lruvec, list);
 		/* Some pages can be beyond i_size: drop them from page cache */
@@ -2417,6 +2420,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 	}
 
 	ClearPageCompound(head);
+	spin_unlock_irqrestore(&pgdat->lru_lock, flags);
 
 	split_page_owner(head, HPAGE_PMD_ORDER);
 
@@ -2425,18 +2429,16 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 		/* Additional pin to swap cache */
 		if (PageSwapCache(head)) {
 			page_ref_add(head, 2);
-			xa_unlock(&swap_cache->i_pages);
+			xa_unlock_irq(&swap_cache->i_pages);
 		} else {
 			page_ref_inc(head);
 		}
 	} else {
 		/* Additional pin to page cache */
 		page_ref_add(head, 2);
-		xa_unlock(&head->mapping->i_pages);
+		xa_unlock_irq(&head->mapping->i_pages);
 	}
 
-	spin_unlock_irqrestore(&pgdat->lru_lock, flags);
-
 	remap_page(head);
 
 	for (i = 0; i < HPAGE_PMD_NR; i++) {
@@ -2574,7 +2576,6 @@ bool can_split_huge_page(struct page *page, int *pextra_pins)
 int split_huge_page_to_list(struct page *page, struct list_head *list)
 {
 	struct page *head = compound_head(page);
-	struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
 	struct deferred_split *ds_queue = get_deferred_split_queue(head);
 	struct anon_vma *anon_vma = NULL;
 	struct address_space *mapping = NULL;
@@ -2640,9 +2641,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 	unmap_page(head);
 	VM_BUG_ON_PAGE(compound_mapcount(head), head);
 
-	/* prevent PageLRU to go away from under us, and freeze lru stats */
-	spin_lock_irqsave(&pgdata->lru_lock, flags);
-
 	if (mapping) {
 		XA_STATE(xas, &mapping->i_pages, page_index(head));
 
@@ -2650,13 +2648,13 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 		 * Check if the head page is present in page cache.
 		 * We assume all tail are present too, if head is there.
 		 */
-		xa_lock(&mapping->i_pages);
+		xa_lock_irq(&mapping->i_pages);
 		if (xas_load(&xas) != head)
 			goto fail;
 	}
 
 	/* Prevent deferred_split_scan() touching ->_refcount */
-	spin_lock(&ds_queue->split_queue_lock);
+	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
 	count = page_count(head);
 	mapcount = total_mapcount(head);
 	if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
@@ -2664,7 +2662,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 			ds_queue->split_queue_len--;
 			list_del(page_deferred_list(head));
 		}
-		spin_unlock(&ds_queue->split_queue_lock);
+		spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
 		if (mapping) {
 			if (PageSwapBacked(head))
 				__dec_node_page_state(head, NR_SHMEM_THPS);
@@ -2672,7 +2670,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 				__dec_node_page_state(head, NR_FILE_THPS);
 		}
 
-		__split_huge_page(page, list, end, flags);
+		__split_huge_page(page, list, end);
 		if (PageSwapCache(head)) {
 			swp_entry_t entry = { .val = page_private(head) };
 
@@ -2688,10 +2686,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 			dump_page(page, "total_mapcount(head) > 0");
 			BUG();
 		}
-		spin_unlock(&ds_queue->split_queue_lock);
+		spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
 fail:		if (mapping)
-			xa_unlock(&mapping->i_pages);
-		spin_unlock_irqrestore(&pgdata->lru_lock, flags);
+			xa_unlock_irq(&mapping->i_pages);
 		remap_page(head);
 		ret = -EBUSY;
 	}