lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1457469802-11850-10-git-send-email-jglisse@redhat.com>
Date:	Tue,  8 Mar 2016 15:43:02 -0500
From:	Jérôme Glisse <jglisse@...hat.com>
To:	akpm@...ux-foundation.org, <linux-kernel@...r.kernel.org>,
	linux-mm@...ck.org
Cc:	Linus Torvalds <torvalds@...ux-foundation.org>, <joro@...tes.org>,
	Mel Gorman <mgorman@...e.de>, "H. Peter Anvin" <hpa@...or.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Andrea Arcangeli <aarcange@...hat.com>,
	Johannes Weiner <jweiner@...hat.com>,
	Larry Woodman <lwoodman@...hat.com>,
	Rik van Riel <riel@...hat.com>,
	Dave Airlie <airlied@...hat.com>,
	Brendan Conoboy <blc@...hat.com>,
	Joe Donohue <jdonohue@...hat.com>,
	Christophe Harle <charle@...dia.com>,
	Duncan Poole <dpoole@...dia.com>,
	Sherry Cheung <SCheung@...dia.com>,
	Subhash Gutti <sgutti@...dia.com>,
	John Hubbard <jhubbard@...dia.com>,
	Mark Hairgrove <mhairgrove@...dia.com>,
	Lucien Dunning <ldunning@...dia.com>,
	Cameron Buschardt <cabuschardt@...dia.com>,
	Arvind Gopalakrishnan <arvindg@...dia.com>,
	Haggai Eran <haggaie@...lanox.com>,
	Shachar Raindel <raindel@...lanox.com>,
	Liran Liss <liranl@...lanox.com>,
	Roland Dreier <roland@...estorage.com>,
	Ben Sander <ben.sander@....com>,
	Greg Stoner <Greg.Stoner@....com>,
	John Bridgman <John.Bridgman@....com>,
	Michael Mantor <Michael.Mantor@....com>,
	Paul Blinzer <Paul.Blinzer@....com>,
	Leonid Shamis <Leonid.Shamis@....com>,
	Laurent Morichetti <Laurent.Morichetti@....com>,
	Alexander Deucher <Alexander.Deucher@....com>,
	Jérôme Glisse <jglisse@...hat.com>
Subject: [PATCH v12 09/29] HMM: add mm page table iterator helpers.

Because inside the mmu_notifier callback we do not have access to the
vma nor do we know which lock we are holding (the mmap semaphore or
the i_mmap_lock) we can not rely on the regular page table walk (nor
do we want as we have to be carefull to not split huge page).

So this patch introduce an helper to iterate of the cpu page table
content in an efficient way for the situation we are in. Which is we
know that none of the page table entry might vanish from below us
and thus it is safe to walk the page table.

The only added value of the iterator is that it keeps the page table
entry level map accross call which fit well with the HMM mirror page
table update code.

Signed-off-by: Jérôme Glisse <jglisse@...hat.com>
---
 mm/hmm.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 101 insertions(+)

diff --git a/mm/hmm.c b/mm/hmm.c
index a9bdab5..74e429a 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -406,6 +406,107 @@ static struct mmu_notifier_ops hmm_notifier_ops = {
 };
 
 
+struct mm_pt_iter {
+	struct mm_struct	*mm;
+	pte_t			*ptep;
+	unsigned long		addr;
+};
+
+static void mm_pt_iter_init(struct mm_pt_iter *pt_iter, struct mm_struct *mm)
+{
+	pt_iter->mm = mm;
+	pt_iter->ptep = NULL;
+	pt_iter->addr = -1UL;
+}
+
+static void mm_pt_iter_fini(struct mm_pt_iter *pt_iter)
+{
+	pte_unmap(pt_iter->ptep);
+	pt_iter->ptep = NULL;
+	pt_iter->addr = -1UL;
+	pt_iter->mm = NULL;
+}
+
+static inline bool mm_pt_iter_in_range(struct mm_pt_iter *pt_iter,
+				       unsigned long addr)
+{
+	return (addr >= pt_iter->addr && addr < (pt_iter->addr + PMD_SIZE));
+}
+
+static struct page *mm_pt_iter_page(struct mm_pt_iter *pt_iter,
+				    unsigned long addr)
+{
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+
+again:
+	/*
+	 * What we are doing here is only valid if we old either the mmap
+	 * semaphore or the i_mmap_lock of vma->address_space the address
+	 * belongs to. Sadly because we can not easily get the vma struct
+	 * we can not sanity test that either of those lock is taken.
+	 *
+	 * We have to rely on people using this code knowing what they do.
+	 */
+	if (mm_pt_iter_in_range(pt_iter, addr) && likely(pt_iter->ptep)) {
+		pte_t pte = *(pt_iter->ptep + pte_index(addr));
+		unsigned long pfn;
+
+		if (pte_none(pte) || !pte_present(pte))
+			return NULL;
+		if (unlikely(pte_special(pte)))
+			return NULL;
+
+		pfn = pte_pfn(pte);
+		if (is_zero_pfn(pfn))
+			return NULL;
+		return pfn_to_page(pfn);
+	}
+
+	if (pt_iter->ptep) {
+		pte_unmap(pt_iter->ptep);
+		pt_iter->ptep = NULL;
+		pt_iter->addr = -1UL;
+	}
+
+	pgdp = pgd_offset(pt_iter->mm, addr);
+	if (pgd_none_or_clear_bad(pgdp))
+		return NULL;
+	pudp = pud_offset(pgdp, addr);
+	if (pud_none_or_clear_bad(pudp))
+		return NULL;
+	pmdp = pmd_offset(pudp, addr);
+	/*
+	 * Because we either have the mmap semaphore or the i_mmap_lock we know
+	 * that pmd can not vanish from under us, thus if pmd exist then it is
+	 * either a huge page or a valid pmd. It might also be in the splitting
+	 * transitory state.
+	 */
+	if (pmd_none(*pmdp) || unlikely(pmd_bad(*pmdp)))
+		return NULL;
+	if (pmd_trans_huge(*pmdp)) {
+		spinlock_t *ptl;
+
+		ptl = pmd_lock(pt_iter->mm, pmdp);
+		if (pmd_trans_huge(*pmdp)) {
+			struct page *page;
+
+			page = pmd_page(*pmdp) + pte_index(addr);
+			spin_unlock(ptl);
+			return page;
+		}
+		/* It was morphing from thp to regular, try again. */
+		spin_unlock(ptl);
+		goto again;
+	}
+	/* Regular pmd and it can not morph. */
+	pt_iter->ptep = pte_offset_map(pmdp, addr & PMD_MASK);
+	pt_iter->addr = addr & PMD_MASK;
+	goto again;
+}
+
+
 /* hmm_mirror - per device mirroring functions.
  *
  * Each device that mirror a process has a uniq hmm_mirror struct. A process
-- 
2.4.3

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ