[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220223052223.1202152-21-junaids@google.com>
Date: Tue, 22 Feb 2022 21:21:56 -0800
From: Junaid Shahid <junaids@...gle.com>
To: linux-kernel@...r.kernel.org
Cc: kvm@...r.kernel.org, pbonzini@...hat.com, jmattson@...gle.com,
pjt@...gle.com, oweisse@...gle.com, alexandre.chartre@...cle.com,
rppt@...ux.ibm.com, dave.hansen@...ux.intel.com,
peterz@...radead.org, tglx@...utronix.de, luto@...nel.org,
linux-mm@...ck.org
Subject: [RFC PATCH 20/47] mm: asi: Support for locally non-sensitive vmalloc allocations
A new flag, VM_LOCAL_NONSENSITIVE is added to designate locally
non-sensitive vmalloc/vmap areas. When using the __vmalloc /
__vmalloc_node APIs, if the corresponding GFP flag is specified, the
VM flag is automatically added. When using the __vmalloc_node_range API,
either flag can be specified independently. The VM flag will only map
the vmalloc area as non-sensitive, while the GFP flag will only map the
underlying direct map area as non-sensitive.
When using the __vmalloc_node_range API, instead of VMALLOC_START/END,
VMALLOC_LOCAL_NONSENSITIVE_START/END should be used. This is the range
that will have different ASI page tables for each process, thereby
providing the local mapping.
A command line parameter vmalloc_local_nonsensitive_percent is added to
specify the approximate division between the per-process and global
vmalloc ranges. Note that regular/sensitive vmalloc/vmap allocations
are not restricted by this division and can go anywhere in the entire
vmalloc range. The division only applies to non-sensitive allocations.
Since no attempt is made to balance regular/sensitive allocations across
the division, it is possible that one of these ranges gets filled up
by regular allocations, leaving no room for the non-sensitive
allocations for which that range was designated. But since the vmalloc
range is fairly large, so hopefully that will not be a problem in
practice. If that assumption turns out to be incorrect, we could
implement a more sophisticated scheme.
Signed-off-by: Junaid Shahid <junaids@...gle.com>
---
arch/x86/include/asm/asi.h | 2 +
arch/x86/include/asm/page_64.h | 2 +
arch/x86/include/asm/pgtable_64_types.h | 7 ++-
arch/x86/mm/asi.c | 57 ++++++++++++++++++
include/asm-generic/asi.h | 5 ++
include/linux/vmalloc.h | 6 ++
mm/vmalloc.c | 78 ++++++++++++++++++++-----
7 files changed, 142 insertions(+), 15 deletions(-)
diff --git a/arch/x86/include/asm/asi.h b/arch/x86/include/asm/asi.h
index f11010c0334b..e3cbf6d8801e 100644
--- a/arch/x86/include/asm/asi.h
+++ b/arch/x86/include/asm/asi.h
@@ -46,6 +46,8 @@ DECLARE_PER_CPU_ALIGNED(struct asi_state, asi_cpu_state);
extern pgd_t asi_global_nonsensitive_pgd[];
+void asi_vmalloc_init(void);
+
int asi_init_mm_state(struct mm_struct *mm);
void asi_free_mm_state(struct mm_struct *mm);
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 2845eca02552..b17574349572 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -18,6 +18,8 @@ extern unsigned long vmemmap_base;
#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+extern unsigned long vmalloc_global_nonsensitive_start;
+extern unsigned long vmalloc_local_nonsensitive_end;
extern unsigned long asi_local_map_base;
DECLARE_STATIC_KEY_FALSE(asi_local_map_initialized);
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 0fc380ba25b8..06793f7ef1aa 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -142,8 +142,13 @@ extern unsigned int ptrs_per_p4d;
#define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1)
#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
-#define VMALLOC_GLOBAL_NONSENSITIVE_START VMALLOC_START
+
+#define VMALLOC_LOCAL_NONSENSITIVE_START VMALLOC_START
+#define VMALLOC_LOCAL_NONSENSITIVE_END vmalloc_local_nonsensitive_end
+
+#define VMALLOC_GLOBAL_NONSENSITIVE_START vmalloc_global_nonsensitive_start
#define VMALLOC_GLOBAL_NONSENSITIVE_END VMALLOC_END
+
#endif
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
diff --git a/arch/x86/mm/asi.c b/arch/x86/mm/asi.c
index 3ba0971a318d..91e5ff1224ff 100644
--- a/arch/x86/mm/asi.c
+++ b/arch/x86/mm/asi.c
@@ -3,6 +3,7 @@
#include <linux/init.h>
#include <linux/memblock.h>
#include <linux/memcontrol.h>
+#include <linux/moduleparam.h>
#include <asm/asi.h>
#include <asm/pgalloc.h>
@@ -28,6 +29,17 @@ EXPORT_SYMBOL(asi_local_map_initialized);
unsigned long asi_local_map_base __ro_after_init;
EXPORT_SYMBOL(asi_local_map_base);
+unsigned long vmalloc_global_nonsensitive_start __ro_after_init;
+EXPORT_SYMBOL(vmalloc_global_nonsensitive_start);
+
+unsigned long vmalloc_local_nonsensitive_end __ro_after_init;
+EXPORT_SYMBOL(vmalloc_local_nonsensitive_end);
+
+/* Approximate percent only. Rounded to PGDIR_SIZE boundary. */
+static uint vmalloc_local_nonsensitive_percent __ro_after_init = 50;
+core_param(vmalloc_local_nonsensitive_percent,
+ vmalloc_local_nonsensitive_percent, uint, 0444);
+
int asi_register_class(const char *name, uint flags,
const struct asi_hooks *ops)
{
@@ -307,6 +319,10 @@ int asi_init(struct mm_struct *mm, int asi_index, struct asi **out_asi)
i++)
set_pgd(asi->pgd + i, mm->asi[0].pgd[i]);
+ for (i = pgd_index(VMALLOC_LOCAL_NONSENSITIVE_START);
+ i <= pgd_index(VMALLOC_LOCAL_NONSENSITIVE_END); i++)
+ set_pgd(asi->pgd + i, mm->asi[0].pgd[i]);
+
for (i = pgd_index(VMALLOC_GLOBAL_NONSENSITIVE_START);
i < PTRS_PER_PGD; i++)
set_pgd(asi->pgd + i, asi_global_nonsensitive_pgd[i]);
@@ -432,6 +448,10 @@ void asi_free_mm_state(struct mm_struct *mm)
pgd_index(ASI_LOCAL_MAP +
PFN_PHYS(max_possible_pfn)) + 1);
+ asi_free_pgd_range(&mm->asi[0],
+ pgd_index(VMALLOC_LOCAL_NONSENSITIVE_START),
+ pgd_index(VMALLOC_LOCAL_NONSENSITIVE_END) + 1);
+
free_page((ulong)mm->asi[0].pgd);
}
@@ -671,3 +691,40 @@ void asi_sync_mapping(struct asi *asi, void *start, size_t len)
for (; addr < end; addr = pgd_addr_end(addr, end))
asi_clone_pgd(asi->pgd, asi->mm->asi[0].pgd, addr);
}
+
+void __init asi_vmalloc_init(void)
+{
+ uint start_index = pgd_index(VMALLOC_START);
+ uint end_index = pgd_index(VMALLOC_END);
+ uint global_start_index;
+
+ if (!boot_cpu_has(X86_FEATURE_ASI)) {
+ vmalloc_global_nonsensitive_start = VMALLOC_START;
+ vmalloc_local_nonsensitive_end = VMALLOC_END;
+ return;
+ }
+
+ if (vmalloc_local_nonsensitive_percent == 0) {
+ vmalloc_local_nonsensitive_percent = 1;
+ pr_warn("vmalloc_local_nonsensitive_percent must be non-zero");
+ }
+
+ if (vmalloc_local_nonsensitive_percent >= 100) {
+ vmalloc_local_nonsensitive_percent = 99;
+ pr_warn("vmalloc_local_nonsensitive_percent must be less than 100");
+ }
+
+ global_start_index = start_index + (end_index - start_index) *
+ vmalloc_local_nonsensitive_percent / 100;
+ global_start_index = max(global_start_index, start_index + 1);
+
+ vmalloc_global_nonsensitive_start = -(PTRS_PER_PGD - global_start_index)
+ * PGDIR_SIZE;
+ vmalloc_local_nonsensitive_end = vmalloc_global_nonsensitive_start - 1;
+
+ pr_debug("vmalloc_global_nonsensitive_start = %llx",
+ vmalloc_global_nonsensitive_start);
+
+ VM_BUG_ON(vmalloc_local_nonsensitive_end >= VMALLOC_END);
+ VM_BUG_ON(vmalloc_global_nonsensitive_start <= VMALLOC_START);
+}
diff --git a/include/asm-generic/asi.h b/include/asm-generic/asi.h
index a1c8ebff70e8..7c50d8b64fa4 100644
--- a/include/asm-generic/asi.h
+++ b/include/asm-generic/asi.h
@@ -18,6 +18,9 @@
#define VMALLOC_GLOBAL_NONSENSITIVE_START VMALLOC_START
#define VMALLOC_GLOBAL_NONSENSITIVE_END VMALLOC_END
+#define VMALLOC_LOCAL_NONSENSITIVE_START VMALLOC_START
+#define VMALLOC_LOCAL_NONSENSITIVE_END VMALLOC_END
+
#ifndef _ASSEMBLY_
struct asi_hooks {};
@@ -36,6 +39,8 @@ static inline int asi_init_mm_state(struct mm_struct *mm) { return 0; }
static inline void asi_free_mm_state(struct mm_struct *mm) { }
+static inline void asi_vmalloc_init(void) { }
+
static inline
int asi_init(struct mm_struct *mm, int asi_index, struct asi **out_asi)
{
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 5f85690f27b6..2b4eafc21fa5 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -41,8 +41,10 @@ struct notifier_block; /* in notifier.h */
#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
#define VM_GLOBAL_NONSENSITIVE 0x00000800 /* Similar to __GFP_GLOBAL_NONSENSITIVE */
+#define VM_LOCAL_NONSENSITIVE 0x00001000 /* Similar to __GFP_LOCAL_NONSENSITIVE */
#else
#define VM_GLOBAL_NONSENSITIVE 0
+#define VM_LOCAL_NONSENSITIVE 0
#endif
/* bits [20..32] reserved for arch specific ioremap internals */
@@ -67,6 +69,10 @@ struct vm_struct {
unsigned int nr_pages;
phys_addr_t phys_addr;
const void *caller;
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+ /* Valid if flags contain VM_*_NONSENSITIVE */
+ struct asi *asi;
+#endif
};
struct vmap_area {
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f13bfe7e896b..ea94d8a1e2e9 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2391,18 +2391,25 @@ void __init vmalloc_init(void)
*/
vmap_init_free_space();
vmap_initialized = true;
+
+ asi_vmalloc_init();
}
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+
static int asi_map_vm_area(struct vm_struct *area)
{
if (!static_asi_enabled())
return 0;
if (area->flags & VM_GLOBAL_NONSENSITIVE)
- return asi_map(ASI_GLOBAL_NONSENSITIVE, area->addr,
- get_vm_area_size(area));
+ area->asi = ASI_GLOBAL_NONSENSITIVE;
+ else if (area->flags & VM_LOCAL_NONSENSITIVE)
+ area->asi = ASI_LOCAL_NONSENSITIVE;
+ else
+ return 0;
- return 0;
+ return asi_map(area->asi, area->addr, get_vm_area_size(area));
}
static void asi_unmap_vm_area(struct vm_struct *area)
@@ -2415,11 +2422,17 @@ static void asi_unmap_vm_area(struct vm_struct *area)
* the case when the existing flush from try_purge_vmap_area_lazy()
* and/or vm_unmap_aliases() happens non-lazily.
*/
- if (area->flags & VM_GLOBAL_NONSENSITIVE)
- asi_unmap(ASI_GLOBAL_NONSENSITIVE, area->addr,
- get_vm_area_size(area), true);
+ if (area->flags & (VM_GLOBAL_NONSENSITIVE | VM_LOCAL_NONSENSITIVE))
+ asi_unmap(area->asi, area->addr, get_vm_area_size(area), true);
}
+#else
+
+static inline int asi_map_vm_area(struct vm_struct *area) { return 0; }
+static inline void asi_unmap_vm_area(struct vm_struct *area) { }
+
+#endif
+
static inline void setup_vmalloc_vm_locked(struct vm_struct *vm,
struct vmap_area *va, unsigned long flags, const void *caller)
{
@@ -2463,6 +2476,15 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
if (unlikely(!size))
return NULL;
+ if (static_asi_enabled()) {
+ VM_BUG_ON((flags & VM_LOCAL_NONSENSITIVE) &&
+ !(start >= VMALLOC_LOCAL_NONSENSITIVE_START &&
+ end <= VMALLOC_LOCAL_NONSENSITIVE_END));
+
+ VM_BUG_ON((flags & VM_GLOBAL_NONSENSITIVE) &&
+ start < VMALLOC_GLOBAL_NONSENSITIVE_START);
+ }
+
if (flags & VM_IOREMAP)
align = 1ul << clamp_t(int, get_count_order_long(size),
PAGE_SHIFT, IOREMAP_MAX_ORDER);
@@ -3073,8 +3095,22 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
if (WARN_ON_ONCE(!size))
return NULL;
- if (static_asi_enabled() && (vm_flags & VM_GLOBAL_NONSENSITIVE))
- gfp_mask |= __GFP_ZERO;
+ if (static_asi_enabled()) {
+ VM_BUG_ON((vm_flags & (VM_LOCAL_NONSENSITIVE |
+ VM_GLOBAL_NONSENSITIVE)) ==
+ (VM_LOCAL_NONSENSITIVE | VM_GLOBAL_NONSENSITIVE));
+
+ if ((vm_flags & VM_LOCAL_NONSENSITIVE) &&
+ !mm_asi_enabled(current->mm)) {
+ vm_flags &= ~VM_LOCAL_NONSENSITIVE;
+
+ if (end == VMALLOC_LOCAL_NONSENSITIVE_END)
+ end = VMALLOC_END;
+ }
+
+ if (vm_flags & (VM_GLOBAL_NONSENSITIVE | VM_LOCAL_NONSENSITIVE))
+ gfp_mask |= __GFP_ZERO;
+ }
if ((size >> PAGE_SHIFT) > totalram_pages()) {
warn_alloc(gfp_mask, NULL,
@@ -3166,11 +3202,19 @@ void *__vmalloc_node(unsigned long size, unsigned long align,
gfp_t gfp_mask, int node, const void *caller)
{
ulong vm_flags = 0;
+ ulong start = VMALLOC_START, end = VMALLOC_END;
- if (static_asi_enabled() && (gfp_mask & __GFP_GLOBAL_NONSENSITIVE))
- vm_flags |= VM_GLOBAL_NONSENSITIVE;
+ if (static_asi_enabled()) {
+ if (gfp_mask & __GFP_GLOBAL_NONSENSITIVE) {
+ vm_flags |= VM_GLOBAL_NONSENSITIVE;
+ start = VMALLOC_GLOBAL_NONSENSITIVE_START;
+ } else if (gfp_mask & __GFP_LOCAL_NONSENSITIVE) {
+ vm_flags |= VM_LOCAL_NONSENSITIVE;
+ end = VMALLOC_LOCAL_NONSENSITIVE_END;
+ }
+ }
- return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
+ return __vmalloc_node_range(size, align, start, end,
gfp_mask, PAGE_KERNEL, vm_flags, node, caller);
}
/*
@@ -3678,9 +3722,15 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
/* verify parameters and allocate data structures */
BUG_ON(offset_in_page(align) || !is_power_of_2(align));
- if (static_asi_enabled() && (flags & VM_GLOBAL_NONSENSITIVE)) {
- vmalloc_start = VMALLOC_GLOBAL_NONSENSITIVE_START;
- vmalloc_end = VMALLOC_GLOBAL_NONSENSITIVE_END;
+ if (static_asi_enabled()) {
+ VM_BUG_ON((flags & (VM_LOCAL_NONSENSITIVE |
+ VM_GLOBAL_NONSENSITIVE)) ==
+ (VM_LOCAL_NONSENSITIVE | VM_GLOBAL_NONSENSITIVE));
+
+ if (flags & VM_GLOBAL_NONSENSITIVE)
+ vmalloc_start = VMALLOC_GLOBAL_NONSENSITIVE_START;
+ else if (flags & VM_LOCAL_NONSENSITIVE)
+ vmalloc_end = VMALLOC_LOCAL_NONSENSITIVE_END;
}
vmalloc_start = ALIGN(vmalloc_start, align);
--
2.35.1.473.g83b2b277ed-goog
Powered by blists - more mailing lists