[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220622162230.83474-9-kirill.shutemov@linux.intel.com>
Date: Wed, 22 Jun 2022 19:22:30 +0300
From: "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
To: Dave Hansen <dave.hansen@...ux.intel.com>,
Andy Lutomirski <luto@...nel.org>,
Peter Zijlstra <peterz@...radead.org>
Cc: x86@...nel.org, Kostya Serebryany <kcc@...gle.com>,
Andrey Ryabinin <ryabinin.a.a@...il.com>,
Andrey Konovalov <andreyknvl@...il.com>,
Alexander Potapenko <glider@...gle.com>,
Dmitry Vyukov <dvyukov@...gle.com>,
"H . J . Lu" <hjl.tools@...il.com>,
Andi Kleen <ak@...ux.intel.com>,
Rick Edgecombe <rick.p.edgecombe@...el.com>,
linux-mm@...ck.org, linux-kernel@...r.kernel.org,
"Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
Subject: [PATCHv4 OPTIONAL 8/8] x86/mm: Extend LAM to support to LAM_U48
LAM_U48 allows to encode 15 bits of tags into address.
LAM_U48 steals bits above 47-bit for tags and makes it impossible for
userspace to use full address space on 5-level paging machine.
Make these features mutually exclusive: whichever gets enabled first
blocks the other one.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@...ux.intel.com>
---
arch/x86/include/asm/elf.h | 3 ++-
arch/x86/include/asm/mmu_context.h | 13 +++++++++++++
arch/x86/kernel/process_64.c | 23 +++++++++++++++++++++++
arch/x86/kernel/sys_x86_64.c | 5 +++--
arch/x86/mm/hugetlbpage.c | 6 ++++--
arch/x86/mm/mmap.c | 9 ++++++++-
6 files changed, 53 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index cb0ff1055ab1..4df13497a770 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -317,7 +317,8 @@ static inline int mmap_is_ia32(void)
extern unsigned long task_size_32bit(void);
extern unsigned long task_size_64bit(int full_addr_space);
extern unsigned long get_mmap_base(int is_legacy);
-extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len);
+extern bool mmap_address_hint_valid(struct mm_struct *mm,
+ unsigned long addr, unsigned long len);
extern unsigned long get_sigframe_size(void);
#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index a6cded0f5e64..17d31988edd6 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -263,6 +263,19 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
unsigned long __get_current_cr3_fast(void);
+#ifdef CONFIG_X86_5LEVEL
+static inline bool full_va_allowed(struct mm_struct *mm)
+{
+ /* LAM_U48 steals VA bits above 47-bit for tags */
+ return mm->context.lam_cr3_mask != X86_CR3_LAM_U48;
+}
+#else
+static inline bool full_va_allowed(struct mm_struct *mm)
+{
+ return false;
+}
+#endif
+
#include <asm-generic/mmu_context.h>
#endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e328b91d1492..427ebef3f64b 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -756,6 +756,16 @@ static void enable_lam_func(void *mm)
switch_mm(loaded_mm, loaded_mm, current);
}
+static bool lam_u48_allowed(void)
+{
+ struct mm_struct *mm = current->mm;
+
+ if (!full_va_allowed(mm))
+ return true;
+
+ return find_vma(mm, DEFAULT_MAP_WINDOW) == NULL;
+}
+
static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
{
int ret = 0;
@@ -763,6 +773,10 @@ static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
if (!cpu_feature_enabled(X86_FEATURE_LAM))
return -ENODEV;
+ /* lam_u48_allowed() requires mmap_lock */
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
+
mutex_lock(&mm->context.lock);
/* Already enabled? */
@@ -777,6 +791,14 @@ static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
} else if (nr_bits <= 6) {
mm->context.lam_cr3_mask = X86_CR3_LAM_U57;
mm->context.untag_mask = ~GENMASK(62, 57);
+ } else if (nr_bits <= 15) {
+ if (!lam_u48_allowed()) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ mm->context.lam_cr3_mask = X86_CR3_LAM_U48;
+ mm->context.untag_mask = ~GENMASK(62, 48);
} else {
ret = -EINVAL;
goto out;
@@ -788,6 +810,7 @@ static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
on_each_cpu_mask(mm_cpumask(mm), enable_lam_func, mm, true);
out:
mutex_unlock(&mm->context.lock);
+ mmap_write_unlock(mm);
return ret;
}
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 8cc653ffdccd..5ea6aaed89ba 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -21,6 +21,7 @@
#include <asm/elf.h>
#include <asm/ia32.h>
+#include <asm/mmu_context.h>
/*
* Align a virtual address to avoid aliasing in the I$ on AMD F15h.
@@ -182,7 +183,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
/* requesting a specific address */
if (addr) {
addr &= PAGE_MASK;
- if (!mmap_address_hint_valid(addr, len))
+ if (!mmap_address_hint_valid(mm, addr, len))
goto get_unmapped_area;
vma = find_vma(mm, addr);
@@ -203,7 +204,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
* !in_32bit_syscall() check to avoid high addresses for x32
* (and make it no op on native i386).
*/
- if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
+ if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall() && full_va_allowed(mm))
info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
info.align_mask = 0;
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index a0d023cb4292..9fdc8db42365 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -18,6 +18,7 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/elf.h>
+#include <asm/mmu_context.h>
#if 0 /* This is just for testing */
struct page *
@@ -103,6 +104,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
unsigned long pgoff, unsigned long flags)
{
struct hstate *h = hstate_file(file);
+ struct mm_struct *mm = current->mm;
struct vm_unmapped_area_info info;
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
@@ -114,7 +116,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
* If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
* in the full address space.
*/
- if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
+ if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall() && full_va_allowed(mm))
info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
@@ -161,7 +163,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (addr) {
addr &= huge_page_mask(h);
- if (!mmap_address_hint_valid(addr, len))
+ if (!mmap_address_hint_valid(mm, addr, len))
goto get_unmapped_area;
vma = find_vma(mm, addr);
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index c90c20904a60..f9ca824729de 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -21,6 +21,7 @@
#include <linux/elf-randomize.h>
#include <asm/elf.h>
#include <asm/io.h>
+#include <asm/mmu_context.h>
#include "physaddr.h"
@@ -35,6 +36,8 @@ unsigned long task_size_32bit(void)
unsigned long task_size_64bit(int full_addr_space)
{
+ if (!full_va_allowed(current->mm))
+ return DEFAULT_MAP_WINDOW;
return full_addr_space ? TASK_SIZE_MAX : DEFAULT_MAP_WINDOW;
}
@@ -206,11 +209,15 @@ const char *arch_vma_name(struct vm_area_struct *vma)
* the failure of such a fixed mapping request, so the restriction is not
* applied.
*/
-bool mmap_address_hint_valid(unsigned long addr, unsigned long len)
+bool mmap_address_hint_valid(struct mm_struct *mm,
+ unsigned long addr, unsigned long len)
{
if (TASK_SIZE - len < addr)
return false;
+ if (addr + len > DEFAULT_MAP_WINDOW && !full_va_allowed(mm))
+ return false;
+
return (addr > DEFAULT_MAP_WINDOW) == (addr + len > DEFAULT_MAP_WINDOW);
}
--
2.35.1
Powered by blists - more mailing lists