From: Thomas Gleixner <tglx@linutronix.de>

LDT is not really commonly used on 64bit so the overhead of populating the
fixmap entries on context switch for the rare LDT syscall users is a
reasonable trade off vs. having extra dynamically managed mapping space per
process.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/mmu_context.h |   44 ++++--------------
 arch/x86/kernel/ldt.c              |   87 +++++++++++++++++++++++++++++++------
 2 files changed, 84 insertions(+), 47 deletions(-)

--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -45,13 +45,17 @@ static inline void load_mm_cr4(struct mm
  */
 struct ldt_struct {
 	/*
-	 * Xen requires page-aligned LDTs with special permissions.  This is
-	 * needed to prevent us from installing evil descriptors such as
+	 * Xen requires page-aligned LDTs with special permissions.  This
+	 * is needed to prevent us from installing evil descriptors such as
 	 * call gates.  On native, we could merge the ldt_struct and LDT
-	 * allocations, but it's not worth trying to optimize.
+	 * allocations, but it's not worth trying to optimize and it does
+	 * not work with page table isolation enabled, which requires
+	 * page-aligned LDT entries as well.
 	 */
-	struct desc_struct *entries_va;
-	unsigned int nr_entries;
+	struct desc_struct	*entries_va;
+	phys_addr_t		entries_pa;
+	unsigned int		nr_entries;
+	unsigned int		order;
 };
 
 /*
@@ -59,6 +63,7 @@ struct ldt_struct {
  */
 int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
 void destroy_context_ldt(struct mm_struct *mm);
+void load_mm_ldt(struct mm_struct *mm);
 #else	/* CONFIG_MODIFY_LDT_SYSCALL */
 static inline int init_new_context_ldt(struct task_struct *tsk,
 				       struct mm_struct *mm)
@@ -66,38 +71,11 @@ static inline int init_new_context_ldt(s
 	return 0;
 }
 static inline void destroy_context_ldt(struct mm_struct *mm) {}
-#endif
-
 static inline void load_mm_ldt(struct mm_struct *mm)
 {
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
-	struct ldt_struct *ldt;
-
-	/* READ_ONCE synchronizes with smp_store_release */
-	ldt = READ_ONCE(mm->context.ldt);
-
-	/*
-	 * Any change to mm->context.ldt is followed by an IPI to all
-	 * CPUs with the mm active.  The LDT will not be freed until
-	 * after the IPI is handled by all such CPUs.  This means that,
-	 * if the ldt_struct changes before we return, the values we see
-	 * will be safe, and the new values will be loaded before we run
-	 * any user code.
-	 *
-	 * NB: don't try to convert this to use RCU without extreme care.
-	 * We would still need IRQs off, because we don't want to change
-	 * the local LDT after an IPI loaded a newer value than the one
-	 * that we can see.
-	 */
-
-	if (unlikely(ldt))
-		set_ldt(ldt->entries_va, ldt->nr_entries);
-	else
-		clear_LDT();
-#else
 	clear_LDT();
-#endif
 }
+#endif
 
 static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
 {
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -22,6 +22,7 @@
 #include <asm/desc.h>
 #include <asm/mmu_context.h>
 #include <asm/syscalls.h>
+#include <asm/fixmap.h>
 
 static void refresh_ldt_segments(void)
 {
@@ -42,6 +43,61 @@ static void refresh_ldt_segments(void)
 #endif
 }
 
+#ifdef CONFIG_KERNEL_PAGE_TABLE_ISOLATION
+
+#define LDT_EPP		(PAGE_SIZE / LDT_ENTRY_SIZE)
+
+static void set_ldt_and_map(struct ldt_struct *ldt)
+{
+	phys_addr_t pa = ldt->entries_pa;
+	void *fixva;
+	int idx, i;
+
+	if (!static_cpu_has_bug(X86_BUG_CPU_SECURE_MODE_KPTI)) {
+		set_ldt(ldt->entries_va, ldt->nr_entries);
+		return;
+	}
+
+	idx = get_cpu_entry_area_index(smp_processor_id(), ldt_entries);
+	fixva = (void *) __fix_to_virt(idx);
+	for (i = 0; i < ldt->nr_entries; idx--, i += LDT_EPP, pa += PAGE_SIZE)
+		__set_fixmap(idx, pa, PAGE_KERNEL);
+	set_ldt(fixva, ldt->nr_entries);
+}
+#else
+static void set_ldt_and_map(struct ldt_struct *ldt)
+{
+	set_ldt(ldt->entries_va, ldt->nr_entries);
+}
+#endif
+
+void load_mm_ldt(struct mm_struct *mm)
+{
+	struct ldt_struct *ldt;
+
+	/* READ_ONCE synchronizes with smp_store_release */
+	ldt = READ_ONCE(mm->context.ldt);
+
+	/*
+	 * Any change to mm->context.ldt is followed by an IPI to all
+	 * CPUs with the mm active.  The LDT will not be freed until
+	 * after the IPI is handled by all such CPUs.  This means that,
+	 * if the ldt_struct changes before we return, the values we see
+	 * will be safe, and the new values will be loaded before we run
+	 * any user code.
+	 *
+	 * NB: don't try to convert this to use RCU without extreme care.
+	 * We would still need IRQs off, because we don't want to change
+	 * the local LDT after an IPI loaded a newer value than the one
+	 * that we can see.
+	 */
+
+	if (unlikely(ldt))
+		set_ldt_and_map(ldt);
+	else
+		clear_LDT();
+}
+
 /* context.lock is held for us, so we don't need any locking. */
 static void flush_ldt(void *__mm)
 {
@@ -52,26 +108,35 @@ static void flush_ldt(void *__mm)
 		return;
 
 	pc = &mm->context;
-	set_ldt(pc->ldt->entries_va, pc->ldt->nr_entries);
+	set_ldt_and_map(pc->ldt);
 
 	refresh_ldt_segments();
 }
 
+static void __free_ldt_struct(struct ldt_struct *ldt)
+{
+	free_pages((unsigned long)ldt->entries_va, ldt->order);
+	kfree(ldt);
+}
+
 /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
 static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
 {
 	struct ldt_struct *new_ldt;
 	unsigned int alloc_size;
+	struct page *page;
+	int order;
 
 	if (num_entries > LDT_ENTRIES)
 		return NULL;
 
-	new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
+	new_ldt = kzalloc(sizeof(struct ldt_struct), GFP_KERNEL);
 	if (!new_ldt)
 		return NULL;
 
 	BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
 	alloc_size = num_entries * LDT_ENTRY_SIZE;
+	order = get_order(alloc_size);
 
 	/*
 	 * Xen is very picky: it requires a page-aligned LDT that has no
@@ -79,16 +144,14 @@ static struct ldt_struct *alloc_ldt_stru
 	 * Keep it simple: zero the whole allocation and never allocate less
 	 * than PAGE_SIZE.
 	 */
-	if (alloc_size > PAGE_SIZE)
-		new_ldt->entries_va = vzalloc(alloc_size);
-	else
-		new_ldt->entries_va = (void *)get_zeroed_page(GFP_KERNEL);
-
-	if (!new_ldt->entries_va) {
+	page = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
+	if (!page) {
 		kfree(new_ldt);
 		return NULL;
 	}
-
+	new_ldt->entries_va = page_address(page);
+	new_ldt->entries_pa = virt_to_phys(new_ldt->entries_va);
+	new_ldt->order = order;
 	new_ldt->nr_entries = num_entries;
 	return new_ldt;
 }
@@ -116,11 +179,7 @@ static void free_ldt_struct(struct ldt_s
 		return;
 
 	paravirt_free_ldt(ldt->entries_va, ldt->nr_entries);
-	if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE)
-		vfree_atomic(ldt->entries_va);
-	else
-		free_page((unsigned long)ldt->entries_va);
-	kfree(ldt);
+	__free_ldt_struct(ldt);
 }
 
 /*