diff --git a/Documentation/x86/kaiser.txt b/Documentation/x86/kaiser.txt index 67a70d2..5b5e9c4 100644 --- a/Documentation/x86/kaiser.txt +++ b/Documentation/x86/kaiser.txt @@ -1,3 +1,6 @@ +Overview +======== + KAISER is a countermeasure against attacks on kernel address information. There are at least three existing, published, approaches using the shared user/kernel mapping and hardware features @@ -18,6 +21,35 @@ This helps ensure that side-channel attacks that leverage the paging structures do not function when KAISER is enabled. It can be enabled by setting CONFIG_KAISER=y +Page Table Management +===================== + +KAISER logically keeps a "copy" of the page tables which unmap +the kernel while in userspace. The kernel manages the page +tables as normal, but the "copying" is done with a few tricks +that mean that we do not have to manage two full copies. + +The first trick is that for any any new kernel mapping, we +presume that we do not want it mapped to userspace. That means +we normally have no copying to do. We only copy the kernel +entries over to the shadow in response to a kaiser_add_*() +call which is rare. + +For a new userspace mapping, the kernel makes the entries in +its page tables like normal. The only difference is when the +kernel makes entries in the top (PGD) level. In addition to +setting the entry in the main kernel PGD, a copy if the entry +is made in the shadow PGD. + +PGD entries always point to another page table. Two PGD +entries pointing to the same thing gives us shared page tables +for all the lower entries. This leaves a single, shared set of +userspace page tables to manage. One PTE to lock, one set set +of accessed bits, dirty bits, etc... + +Overhead +======== + Protection against side-channel attacks is important. But, this protection comes at a cost: diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index 57f7637..cde9014 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -49,9 +49,21 @@ static DEFINE_SPINLOCK(shadow_table_allocation_lock); /* + * This is a generic page table walker used only for walking kernel + * addresses. We use it too help recreate the "shadow" page tables + * which are used while we are in userspace. + * + * This can be called on any kernel memory addresses and will work + * with any page sizes and any types: normal linear map memory, + * vmalloc(), even kmap(). + * + * Note: this is only used when mapping new *kernel* entries into + * the user/shadow page tables. It is never used for userspace + * addresses. + * * Returns -1 on error. */ -static inline unsigned long get_pa_from_mapping(unsigned long vaddr) +static inline unsigned long get_pa_from_kernel_map(unsigned long vaddr) { pgd_t *pgd; p4d_t *p4d; @@ -59,6 +71,8 @@ static inline unsigned long get_pa_from_mapping(unsigned long vaddr) pmd_t *pmd; pte_t *pte; + WARN_ON_ONCE(vaddr < PAGE_OFFSET); + pgd = pgd_offset_k(vaddr); /* * We made all the kernel PGDs present in kaiser_init(). @@ -111,13 +125,19 @@ static inline unsigned long get_pa_from_mapping(unsigned long vaddr) } /* - * This is a relatively normal page table walk, except that it - * also tries to allocate page tables pages along the way. + * Walk the shadow copy of the page tables (optionally) trying to + * allocate page table pages on the way down. Does not support + * large pages since the data we are mapping is (generally) not + * large enough or aligned to 2MB. + * + * Note: this is only used when mapping *new* kernel data into the + * user/shadow page tables. It is never used for userspace data. * * Returns a pointer to a PTE on success, or NULL on failure. */ #define KAISER_WALK_ATOMIC 0x1 -static pte_t *kaiser_pagetable_walk(unsigned long address, unsigned long flags) +static pte_t *kaiser_shadow_pagetable_walk(unsigned long address, + unsigned long flags) { pmd_t *pmd; pud_t *pud; @@ -207,11 +227,11 @@ int kaiser_add_user_map(const void *__start_addr, unsigned long size, unsigned long target_address; for (; address < end_addr; address += PAGE_SIZE) { - target_address = get_pa_from_mapping(address); + target_address = get_pa_from_kernel_map(address); if (target_address == -1) return -EIO; - pte = kaiser_pagetable_walk(address, false); + pte = kaiser_shadow_pagetable_walk(address, false); /* * Errors come from either -ENOMEM for a page * table page, or something screwy that did a @@ -348,7 +368,7 @@ void kaiser_remove_mapping(unsigned long start, unsigned long size) * context. This should not do any allocations because we * should only be walking things that are known to be mapped. */ - pte_t *pte = kaiser_pagetable_walk(addr, KAISER_WALK_ATOMIC); + pte_t *pte = kaiser_shadow_pagetable_walk(addr, KAISER_WALK_ATOMIC); /* * We are removing a mapping that shoud