lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220705062953.914256-1-luofei@unicloud.com>
Date:   Tue, 5 Jul 2022 02:29:53 -0400
From:   luofei <luofei@...cloud.com>
To:     <mike.kravetz@...cle.com>, <songmuchun@...edance.com>,
        <akpm@...ux-foundation.org>
CC:     <linux-mm@...ck.org>, <linux-kernel@...r.kernel.org>,
        luofei <luofei@...cloud.com>
Subject: [PATCH] mm,hwpoison,hugetlb: defer dissolve hwpoison hugepage when allocating vmemmap failed

When dissolving hwpoison hugepage, if the allocation of vmemmap page
failed, the faulty page should not be put back on the hugepage free
list, which will cause the faulty pages to be reused. It's better to
postpone the reexecution of dissolve operation.

Meanwhile when the page fault handling program calls
dissolve_free_huge_page() to dissolve the faulty page, the basic page
fault processing operation(such as migration pages and unmap etc)
has actually completed. There is no need to return -ENOMEM error code
to the upper layer for temporarily vmemmap page allocation failure,
which will cause the caller to make a wrong judgment. So just defer
dissolve and return success.

Signed-off-by: luofei <luofei@...cloud.com>
---
 mm/hugetlb.c | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ca081078e814..db25458eb0a5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -90,6 +90,9 @@ struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;
 
 /* Forward declaration */
 static int hugetlb_acct_memory(struct hstate *h, long delta);
+static LLIST_HEAD(hpage_freelist);
+static void free_hpage_workfn(struct work_struct *work);
+static DECLARE_DELAYED_WORK(free_hpage_work, free_hpage_workfn);
 
 static inline bool subpool_is_free(struct hugepage_subpool *spool)
 {
@@ -1535,15 +1538,21 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
 	if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
 		return;
 
-	if (hugetlb_vmemmap_restore(h, page))
+	if (hugetlb_vmemmap_restore(h, page)) {
+		if (unlikely(PageHWPoison(page))) {
+			llist_add((struct llist_node *)&page->mapping, &hpage_freelist);
+			schedule_delayed_work(&free_hpage_work, HZ);
+			goto out;
+		}
 		goto fail;
+	}
 
 	/*
 	 * Move PageHWPoison flag from head page to the raw error pages,
 	 * which makes any healthy subpages reusable.
 	 */
 	if (unlikely(PageHWPoison(page) && hugetlb_clear_page_hwpoison(page)))
-		goto fail;
+		goto out;
 
 	for (i = 0; i < pages_per_huge_page(h);
 	     i++, subpage = mem_map_next(subpage, page, i)) {
@@ -1574,6 +1583,8 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
 	 */
 	add_hugetlb_page(h, page, true);
 	spin_unlock_irq(&hugetlb_lock);
+out:
+	return;
 }
 
 /*
@@ -1587,8 +1598,6 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
  * to be cleared in free_hpage_workfn() anyway, it is reused as the llist_node
  * structure of a lockless linked list of huge pages to be freed.
  */
-static LLIST_HEAD(hpage_freelist);
-
 static void free_hpage_workfn(struct work_struct *work)
 {
 	struct llist_node *node;
@@ -1616,12 +1625,11 @@ static void free_hpage_workfn(struct work_struct *work)
 		cond_resched();
 	}
 }
-static DECLARE_WORK(free_hpage_work, free_hpage_workfn);
 
 static inline void flush_free_hpage_work(struct hstate *h)
 {
 	if (hugetlb_vmemmap_optimizable(h))
-		flush_work(&free_hpage_work);
+		flush_delayed_work(&free_hpage_work);
 }
 
 static void update_and_free_page(struct hstate *h, struct page *page,
@@ -1634,13 +1642,9 @@ static void update_and_free_page(struct hstate *h, struct page *page,
 
 	/*
 	 * Defer freeing to avoid using GFP_ATOMIC to allocate vmemmap pages.
-	 *
-	 * Only call schedule_work() if hpage_freelist is previously
-	 * empty. Otherwise, schedule_work() had been called but the workfn
-	 * hasn't retrieved the list yet.
 	 */
-	if (llist_add((struct llist_node *)&page->mapping, &hpage_freelist))
-		schedule_work(&free_hpage_work);
+	llist_add((struct llist_node *)&page->mapping, &hpage_freelist);
+	schedule_delayed_work(&free_hpage_work, 0);
 }
 
 static void update_and_free_pages_bulk(struct hstate *h, struct list_head *list)
@@ -2118,11 +2122,15 @@ int dissolve_free_huge_page(struct page *page)
 		rc = hugetlb_vmemmap_restore(h, head);
 		if (!rc) {
 			update_and_free_page(h, head, false);
-		} else {
+		} else if (!PageHWPoison(head)) {
 			spin_lock_irq(&hugetlb_lock);
 			add_hugetlb_page(h, head, false);
 			h->max_huge_pages++;
 			spin_unlock_irq(&hugetlb_lock);
+		} else {
+			llist_add((struct llist_node *)&head->mapping, &hpage_freelist);
+			schedule_delayed_work(&free_hpage_work, HZ);
+			rc = 0;
 		}
 
 		return rc;
-- 
2.27.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ