lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 31 Mar 2015 08:50:46 +0000
From:	Naoya Horiguchi <n-horiguchi@...jp.nec.com>
To:	Andrew Morton <akpm@...ux-foundation.org>
CC:	Hugh Dickins <hughd@...gle.com>, Michal Hocko <mhocko@...e.cz>,
	Mel Gorman <mgorman@...e.de>,
	Johannes Weiner <hannes@...xchg.org>,
	David Rientjes <rientjes@...gle.com>,
	"linux-mm@...ck.org" <linux-mm@...ck.org>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
	"Naoya Horiguchi" <nao.horiguchi@...il.com>
Subject: [PATCH v3 2/3] mm: hugetlb: introduce PageHugeActive flag

We are not safe from calling isolate_huge_page() on a hugepage concurrently,
which can make the victim hugepage in invalid state and results in BUG_ON().

The root problem of this is that we don't have any information on struct page
(so easily accessible) about hugepages' activeness. Note that hugepages'
activeness means just being linked to hstate->hugepage_activelist, which is
not the same as normal pages' activeness represented by PageActive flag.

Normal pages are isolated by isolate_lru_page() which prechecks PageLRU before
isolation, so let's do similarly for hugetlb with a new PageHugeActive flag.

Set/ClearPageHugeActive should be called within hugetlb_lock. But hugetlb_cow()
and hugetlb_no_page() don't do this, being justified because in these function
SetPageHugeActive is called right after the hugepage is allocated and no other
thread tries to isolate it.

Signed-off-by: Naoya Horiguchi <n-horiguchi@...jp.nec.com>
---
ChangeLog v2->v3:
- Use PagePrivate of the first tail page to show hugepage's activeness instead
  of PageLRU
- drop ClearPageLRU in dequeue_hwpoisoned_huge_page() (which was wrong)
- fix return value of isolate_huge_page() (using ret)
- move __put_compound_page() part to a separate patch
- drop "Cc: stable" tag because this is not a simple fix

ChangeLog v1->v2:
- call isolate_huge_page() in soft_offline_huge_page() instead of list_move()
---
 mm/hugetlb.c        | 41 ++++++++++++++++++++++++++++++++++++++---
 mm/memory-failure.c | 14 ++++++++++++--
 2 files changed, 50 insertions(+), 5 deletions(-)

diff --git v4.0-rc6.orig/mm/hugetlb.c v4.0-rc6/mm/hugetlb.c
index c41b2a0ee273..05e0233d30d7 100644
--- v4.0-rc6.orig/mm/hugetlb.c
+++ v4.0-rc6/mm/hugetlb.c
@@ -855,6 +855,31 @@ struct hstate *size_to_hstate(unsigned long size)
 	return NULL;
 }
 
+/*
+ * Page flag to show that the hugepage is "active/in-use" (i.e. being linked to
+ * hstate->hugepage_activelist.)
+ *
+ * This function can be called for tail pages, but never returns true for them.
+ */
+int PageHugeActive(struct page *page)
+{
+	VM_BUG_ON_PAGE(!PageHuge(page), page);
+	return PageHead(page) && PagePrivate(&page[1]);
+}
+
+/* never called for tail page */
+void SetPageHugeActive(struct page *page)
+{
+	VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
+	SetPagePrivate(&page[1]);
+}
+
+void ClearPageHugeActive(struct page *page)
+{
+	VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
+	ClearPagePrivate(&page[1]);
+}
+
 void free_huge_page(struct page *page)
 {
 	/*
@@ -875,6 +900,7 @@ void free_huge_page(struct page *page)
 	ClearPagePrivate(page);
 
 	spin_lock(&hugetlb_lock);
+	ClearPageHugeActive(page);
 	hugetlb_cgroup_uncharge_page(hstate_index(h),
 				     pages_per_huge_page(h), page);
 	if (restore_reserve)
@@ -2891,6 +2917,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
 	copy_user_huge_page(new_page, old_page, address, vma,
 			    pages_per_huge_page(h));
 	__SetPageUptodate(new_page);
+	SetPageHugeActive(new_page);
 
 	mmun_start = address & huge_page_mask(h);
 	mmun_end = mmun_start + huge_page_size(h);
@@ -3003,6 +3030,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		}
 		clear_huge_page(page, address, pages_per_huge_page(h));
 		__SetPageUptodate(page);
+		SetPageHugeActive(page);
 
 		if (vma->vm_flags & VM_MAYSHARE) {
 			int err;
@@ -3812,19 +3840,26 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage)
 
 bool isolate_huge_page(struct page *page, struct list_head *list)
 {
+	bool ret = true;
+
 	VM_BUG_ON_PAGE(!PageHead(page), page);
-	if (!get_page_unless_zero(page))
-		return false;
 	spin_lock(&hugetlb_lock);
+	if (!PageHugeActive(page) || !get_page_unless_zero(page)) {
+		ret = false;
+		goto unlock;
+	}
+	ClearPageHugeActive(page);
 	list_move_tail(&page->lru, list);
+unlock:
 	spin_unlock(&hugetlb_lock);
-	return true;
+	return ret;
 }
 
 void putback_active_hugepage(struct page *page)
 {
 	VM_BUG_ON_PAGE(!PageHead(page), page);
 	spin_lock(&hugetlb_lock);
+	SetPageHugeActive(page);
 	list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist);
 	spin_unlock(&hugetlb_lock);
 	put_page(page);
diff --git v4.0-rc6.orig/mm/memory-failure.c v4.0-rc6/mm/memory-failure.c
index d487f8dc6d39..1d86cca8de26 100644
--- v4.0-rc6.orig/mm/memory-failure.c
+++ v4.0-rc6/mm/memory-failure.c
@@ -1540,8 +1540,18 @@ static int soft_offline_huge_page(struct page *page, int flags)
 	}
 	unlock_page(hpage);
 
-	/* Keep page count to indicate a given hugepage is isolated. */
-	list_move(&hpage->lru, &pagelist);
+	ret = isolate_huge_page(hpage, &pagelist);
+	if (ret) {
+		/*
+		 * get_any_page() and isolate_huge_page() takes a refcount each,
+		 * so need to drop one here.
+		 */
+		put_page(hpage);
+	} else {
+		pr_info("soft offline: %#lx hugepage failed to isolate\n", pfn);
+		return -EBUSY;
+	}
+
 	ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
 				MIGRATE_SYNC, MR_MEMORY_FAILURE);
 	if (ret) {
-- 
1.9.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ