lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Fri, 13 Sep 2013 15:36:20 +0200
From:	Peter Zijlstra <peterz@...radead.org>
To:	"Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
Cc:	Alex Thorlton <athorlton@....com>, Ingo Molnar <mingo@...hat.com>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Naoya Horiguchi <n-horiguchi@...jp.nec.com>,
	"Eric W . Biederman" <ebiederm@...ssion.com>,
	"Paul E . McKenney" <paulmck@...ux.vnet.ibm.com>,
	Al Viro <viro@...iv.linux.org.uk>,
	Andi Kleen <ak@...ux.intel.com>,
	Andrea Arcangeli <aarcange@...hat.com>,
	Dave Hansen <dave.hansen@...el.com>,
	Dave Jones <davej@...hat.com>,
	David Howells <dhowells@...hat.com>,
	Frederic Weisbecker <fweisbec@...il.com>,
	Johannes Weiner <hannes@...xchg.org>,
	Kees Cook <keescook@...omium.org>,
	Mel Gorman <mgorman@...e.de>,
	Michael Kerrisk <mtk.manpages@...il.com>,
	Oleg Nesterov <oleg@...hat.com>,
	Rik van Riel <riel@...hat.com>,
	Robin Holt <robinmholt@...il.com>,
	Sedat Dilek <sedat.dilek@...il.com>,
	Srikar Dronamraju <srikar@...ux.vnet.ibm.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	linux-kernel@...r.kernel.org, linux-mm@...ck.org
Subject: Re: [PATCH 8/9] mm: implement split page table lock for PMD level

On Fri, Sep 13, 2013 at 04:06:15PM +0300, Kirill A. Shutemov wrote:
> +#if USE_SPLIT_PMD_PTLOCKS
> +
> +static inline void pgtable_pmd_page_ctor(struct page *page)
> +{
> +	spin_lock_init(&page->ptl);
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +	page->pmd_huge_pte = NULL;
> +#endif
> +}
> +
> +static inline void pgtable_pmd_page_dtor(struct page *page)
> +{
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +	VM_BUG_ON(page->pmd_huge_pte);
> +#endif
> +}
> +
> +#define pmd_huge_pte(mm, pmd) (virt_to_page(pmd)->pmd_huge_pte)
> +
> +#else

So on -rt we have the problem that spinlock_t is rather huge (its a
rtmutex) so instead of blowing up the pageframe like that we treat
page->pte as a pointer and allocate the spinlock.

Since allocations could fail the above ctor path gets 'interesting'.

It would be good if new code could assume the ctor could fail so we
don't have to replicate that horror-show.


---
From: Peter Zijlstra <peterz@...radead.org>
Date: Fri, 3 Jul 2009 08:44:54 -0500
Subject: mm: shrink the page frame to !-rt size

He below is a boot-tested hack to shrink the page frame size back to
normal.

Should be a net win since there should be many less PTE-pages than
page-frames.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@...llo.nl>
Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
---
 include/linux/mm.h       |   46 +++++++++++++++++++++++++++++++++++++++-------
 include/linux/mm_types.h |    4 ++++
 mm/memory.c              |   32 ++++++++++++++++++++++++++++++++
 3 files changed, 75 insertions(+), 7 deletions(-)

--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1241,27 +1241,59 @@ static inline pmd_t *pmd_alloc(struct mm
  * overflow into the next struct page (as it might with DEBUG_SPINLOCK).
  * When freeing, reset page->mapping so free_pages_check won't complain.
  */
+#ifndef CONFIG_PREEMPT_RT_FULL
+
 #define __pte_lockptr(page)	&((page)->ptl)
-#define pte_lock_init(_page)	do {					\
-	spin_lock_init(__pte_lockptr(_page));				\
-} while (0)
+
+static inline struct page *pte_lock_init(struct page *page)
+{
+	spin_lock_init(__pte_lockptr(page));
+	return page;
+}
+
 #define pte_lock_deinit(page)	((page)->mapping = NULL)
+
+#else /* !PREEMPT_RT_FULL */
+
+/*
+ * On PREEMPT_RT_FULL the spinlock_t's are too large to embed in the
+ * page frame, hence it only has a pointer and we need to dynamically
+ * allocate the lock when we allocate PTE-pages.
+ *
+ * This is an overall win, since only a small fraction of the pages
+ * will be PTE pages under normal circumstances.
+ */
+
+#define __pte_lockptr(page)	((page)->ptl)
+
+extern struct page *pte_lock_init(struct page *page);
+extern void pte_lock_deinit(struct page *page);
+
+#endif /* PREEMPT_RT_FULL */
+
 #define pte_lockptr(mm, pmd)	({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
 #else	/* !USE_SPLIT_PTLOCKS */
 /*
  * We use mm->page_table_lock to guard all pagetable pages of the mm.
  */
-#define pte_lock_init(page)	do {} while (0)
+static inline struct page *pte_lock_init(struct page *page) { return page; }
 #define pte_lock_deinit(page)	do {} while (0)
 #define pte_lockptr(mm, pmd)	({(void)(pmd); &(mm)->page_table_lock;})
 #endif /* USE_SPLIT_PTLOCKS */
 
-static inline void pgtable_page_ctor(struct page *page)
+static inline struct page *__pgtable_page_ctor(struct page *page)
 {
-	pte_lock_init(page);
-	inc_zone_page_state(page, NR_PAGETABLE);
+	page = pte_lock_init(page);
+	if (page)
+		inc_zone_page_state(page, NR_PAGETABLE);
+	return page;
 }
 
+#define pgtable_page_ctor(page)				\
+do {							\
+	page = __pgtable_page_ctor(page);		\
+} while (0)
+
 static inline void pgtable_page_dtor(struct page *page)
 {
 	pte_lock_deinit(page);
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -142,7 +142,11 @@ struct page {
 						 * system if PG_buddy is set.
 						 */
 #if USE_SPLIT_PTLOCKS
+# ifndef CONFIG_PREEMPT_RT_FULL
 		spinlock_t ptl;
+# else
+		spinlock_t *ptl;
+# endif
 #endif
 		struct kmem_cache *slab_cache;	/* SL[AU]B: Pointer to slab */
 		struct page *first_page;	/* Compound tail pages */
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4328,3 +4328,35 @@ void copy_user_huge_page(struct page *ds
 	}
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
+
+#if defined(CONFIG_PREEMPT_RT_FULL) && (USE_SPLIT_PTLOCKS > 0)
+/*
+ * Heinous hack, relies on the caller doing something like:
+ *
+ *   pte = alloc_pages(PGALLOC_GFP, 0);
+ *   if (pte)
+ *     pgtable_page_ctor(pte);
+ *   return pte;
+ *
+ * This ensures we release the page and return NULL when the
+ * lock allocation fails.
+ */
+struct page *pte_lock_init(struct page *page)
+{
+	page->ptl = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
+	if (page->ptl) {
+		spin_lock_init(__pte_lockptr(page));
+	} else {
+		__free_page(page);
+		page = NULL;
+	}
+	return page;
+}
+
+void pte_lock_deinit(struct page *page)
+{
+	kfree(page->ptl);
+	page->mapping = NULL;
+}
+
+#endif
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ