lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20171222024630.GA13875@outlook.office365.com>
Date:   Thu, 21 Dec 2017 18:46:32 -0800
From:   Andrei Vagin <avagin@...tuozzo.com>
To:     Thomas Gleixner <tglx@...utronix.de>
Cc:     LKML <linux-kernel@...r.kernel.org>, x86@...nel.org,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        Andy Lutomirsky <luto@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        Dave Hansen <dave.hansen@...el.com>,
        Borislav Petkov <bpetkov@...e.de>,
        Greg KH <gregkh@...uxfoundation.org>, keescook@...gle.com,
        hughd@...gle.com, Brian Gerst <brgerst@...il.com>,
        Josh Poimboeuf <jpoimboe@...hat.com>,
        Denys Vlasenko <dvlasenk@...hat.com>,
        Rik van Riel <riel@...hat.com>,
        Boris Ostrovsky <boris.ostrovsky@...cle.com>,
        Juergen Gross <jgross@...e.com>,
        David Laight <David.Laight@...lab.com>,
        Eduardo Valentin <eduval@...zon.com>, aliguori@...zon.com,
        Will Deacon <will.deacon@....com>,
        Vlastimil Babka <vbabka@...e.cz>, daniel.gruss@...k.tugraz.at
Subject: Re: [V181,22/54] x86/cpu_entry_area: Move it out of fixmap

Hi Thomas,

The kernel with this patch doesn't boot, if CONFIG_KASAN is set:
[    0.000000] Linux version 4.14.0-00142-g8604322546c0 (avagin@...top) (gcc version 7.2.1 20170915 (Red Hat 7.2.1-2) (GCC)) #11 SMP Thu Dec 21 18:38:44 PST 2017
[    0.000000] Command line: root=/dev/vda2 ro debug console=ttyS0,115200 LANG=en_US.UTF-8 slub_debug=FZP raid=noautodetect selinux=0 earlyprintk=serial,ttyS0,115200
[    0.000000] x86/fpu: Supporting XSAVE feature 0x001: 'x87 floating point registers'
[    0.000000] x86/fpu: Supporting XSAVE feature 0x002: 'SSE registers'
[    0.000000] x86/fpu: Supporting XSAVE feature 0x004: 'AVX registers'
[    0.000000] x86/fpu: Supporting XSAVE feature 0x008: 'MPX bounds registers'
[    0.000000] x86/fpu: Supporting XSAVE feature 0x010: 'MPX CSR'
[    0.000000] x86/fpu: xstate_offset[2]:  576, xstate_sizes[2]:  256
[    0.000000] x86/fpu: xstate_offset[3]:  832, xstate_sizes[3]:   64
[    0.000000] x86/fpu: xstate_offset[4]:  896, xstate_sizes[4]:   64
[    0.000000] x86/fpu: Enabled xstate features 0x1f, context size is 960 bytes, using 'compacted' format.
[    0.000000] e820: BIOS-provided physical RAM map:
[    0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009fbff] usable
[    0.000000] BIOS-e820: [mem 0x000000000009fc00-0x000000000009ffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000000f0000-0x00000000000fffff] reserved
[    0.000000] BIOS-e820: [mem 0x0000000000100000-0x000000007ffd8fff] usable
[    0.000000] BIOS-e820: [mem 0x000000007ffd9000-0x000000007fffffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000feffc000-0x00000000feffffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000fffc0000-0x00000000ffffffff] reserved
[    0.000000] bootconsole [earlyser0] enabled
[    0.000000] NX (Execute Disable) protection: active
[    0.000000] random: fast init done
[    0.000000] SMBIOS 2.8 present.
[    0.000000] DMI: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc26 04/01/2014
[    0.000000] Hypervisor detected: KVM
[    0.000000] tsc: Fast TSC calibration using PIT
[    0.000000] e820: update [mem 0x00000000-0x00000fff] usable ==> reserved
[    0.000000] e820: remove [mem 0x000a0000-0x000fffff] usable
[    0.000000] e820: last_pfn = 0x7ffd9 max_arch_pfn = 0x400000000
[    0.000000] MTRR default type: write-back
[    0.000000] MTRR fixed ranges enabled:
[    0.000000]   00000-9FFFF write-back
[    0.000000]   A0000-BFFFF uncachable
[    0.000000]   C0000-FFFFF write-protect
[    0.000000] MTRR variable ranges enabled:
[    0.000000]   0 base 0080000000 mask FF80000000 uncachable
[    0.000000]   1 disabled
[    0.000000]   2 disabled
[    0.000000]   3 disabled
[    0.000000]   4 disabled
[    0.000000]   5 disabled
[    0.000000]   6 disabled
[    0.000000]   7 disabled
[    0.000000] x86/PAT: Configuration [0-7]: WB  WC  UC- UC  WB  WP  UC- WT  
[    0.000000] found SMP MP-table at [mem 0x000f6bd0-0x000f6bdf] mapped at [ffffffffff200bd0]
[    0.000000] Base memory trampoline at [ffff880000099000] 99000 size 24576
[    0.000000] Using GB pages for direct mapping
[    0.000000] BRK [0x5bf4e000, 0x5bf4efff] PGTABLE
[    0.000000] BRK [0x5bf4f000, 0x5bf4ffff] PGTABLE
[    0.000000] BRK [0x5bf50000, 0x5bf50fff] PGTABLE
[    0.000000] BRK [0x5bf51000, 0x5bf51fff] PGTABLE
[    0.000000] BRK [0x5bf52000, 0x5bf52fff] PGTABLE
[    0.000000] ACPI: Early table checksum verification disabled
[    0.000000] ACPI: RSDP 0x00000000000F69C0 000014 (v00 BOCHS )
[    0.000000] ACPI: RSDT 0x000000007FFE12FF 00002C (v01 BOCHS  BXPCRSDT 00000001 BXPC 00000001)
[    0.000000] ACPI: FACP 0x000000007FFE120B 000074 (v01 BOCHS  BXPCFACP 00000001 BXPC 00000001)
[    0.000000] ACPI: DSDT 0x000000007FFE0040 0011CB (v01 BOCHS  BXPCDSDT 00000001 BXPC 00000001)
[    0.000000] ACPI: FACS 0x000000007FFE0000 000040
[    0.000000] ACPI: APIC 0x000000007FFE127F 000080 (v01 BOCHS  BXPCAPIC 00000001 BXPC 00000001)
[    0.000000] ACPI: Local APIC address 0xfee00000
[    0.000000] No NUMA configuration found
[    0.000000] Faking a node at [mem 0x0000000000000000-0x000000007ffd8fff]
[    0.000000] NODE_DATA(0) allocated [mem 0x7ffc2000-0x7ffd8fff]
[    0.000000] kvm-clock: Using msrs 4b564d01 and 4b564d00
[    0.000000] kvm-clock: cpu 0, msr 0:7ffc1001, primary cpu clock
[    0.000000] kvm-clock: using sched offset of 137192604594 cycles
[    0.000000] clocksource: kvm-clock: mask: 0xffffffffffffffff max_cycles: 0x1cd42e4dffb, max_idle_ns: 881590591483 ns
[    0.000000] Zone ranges:
[    0.000000]   DMA      [mem 0x0000000000001000-0x0000000000ffffff]
[    0.000000]   DMA32    [mem 0x0000000001000000-0x000000007ffd8fff]
[    0.000000]   Normal   empty
[    0.000000]   Device   empty
[    0.000000] Movable zone start for each node
[    0.000000] Early memory node ranges
[    0.000000]   node   0: [mem 0x0000000000001000-0x000000000009efff]
[    0.000000]   node   0: [mem 0x0000000000100000-0x000000007ffd8fff]
[    0.000000] Initmem setup node 0 [mem 0x0000000000001000-0x000000007ffd8fff]
[    0.000000] On node 0 totalpages: 524151
[    0.000000]   DMA zone: 64 pages used for memmap
[    0.000000]   DMA zone: 21 pages reserved
[    0.000000]   DMA zone: 3998 pages, LIFO batch:0
[    0.000000]   DMA32 zone: 8128 pages used for memmap
[    0.000000]   DMA32 zone: 520153 pages, LIFO batch:31

And then it starts booting again...

On Wed, Dec 20, 2017 at 10:35:25PM +0100, Thomas Gleixner wrote:
> Put the cpu_entry_area into a separate p4d entry. The fixmap gets too bug
> and 0-day already hit a case where the fixmap ptes were cleared by
> cleanup_highmap().
> 
> Aside of that the fixmap API is a pain as it's all backwards.
> 
> Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
> ---
>  Documentation/x86/x86_64/mm.txt         |    2 +
>  arch/x86/include/asm/cpu_entry_area.h   |   24 ++++++++++++-
>  arch/x86/include/asm/desc.h             |    1 
>  arch/x86/include/asm/fixmap.h           |   32 -----------------
>  arch/x86/include/asm/pgtable_32_types.h |   15 ++++++--
>  arch/x86/include/asm/pgtable_64_types.h |   47 +++++++++++++++-----------
>  arch/x86/kernel/dumpstack.c             |    1 
>  arch/x86/kernel/traps.c                 |    5 +-
>  arch/x86/mm/cpu_entry_area.c            |   57 +++++++++++++++++++++++---------
>  arch/x86/mm/dump_pagetables.c           |    6 ++-
>  arch/x86/mm/init_32.c                   |    6 +++
>  arch/x86/mm/kasan_init_64.c             |    6 ++-
>  arch/x86/mm/pgtable_32.c                |    1 
>  arch/x86/xen/mmu_pv.c                   |    2 -
>  14 files changed, 128 insertions(+), 77 deletions(-)
> 
> --- a/Documentation/x86/x86_64/mm.txt
> +++ b/Documentation/x86/x86_64/mm.txt
> @@ -12,6 +12,7 @@ ffffea0000000000 - ffffeaffffffffff (=40
>  ... unused hole ...
>  ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
>  ... unused hole ...
> +fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
>  ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
>  ... unused hole ...
>  ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
> @@ -35,6 +36,7 @@ ffd4000000000000 - ffd5ffffffffffff (=49
>  ... unused hole ...
>  ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
>  ... unused hole ...
> +fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
>  ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
>  ... unused hole ...
>  ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
> --- a/arch/x86/include/asm/cpu_entry_area.h
> +++ b/arch/x86/include/asm/cpu_entry_area.h
> @@ -43,10 +43,32 @@ struct cpu_entry_area {
>  };
>  
>  #define CPU_ENTRY_AREA_SIZE	(sizeof(struct cpu_entry_area))
> -#define CPU_ENTRY_AREA_PAGES	(CPU_ENTRY_AREA_SIZE / PAGE_SIZE)
> +#define CPU_ENTRY_AREA_TOT_SIZE	(CPU_ENTRY_AREA_SIZE * NR_CPUS)
>  
>  DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
>  
>  extern void setup_cpu_entry_areas(void);
> +extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
> +
> +#define	CPU_ENTRY_AREA_RO_IDT		CPU_ENTRY_AREA_BASE
> +#define CPU_ENTRY_AREA_PER_CPU		(CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
> +
> +#define CPU_ENTRY_AREA_RO_IDT_VADDR	((void *)CPU_ENTRY_AREA_RO_IDT)
> +
> +#define CPU_ENTRY_AREA_MAP_SIZE			\
> +	(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
> +
> +static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
> +{
> +	unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
> +	BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
> +
> +	return (struct cpu_entry_area *) va;
> +}
> +
> +static inline struct entry_stack *cpu_entry_stack(int cpu)
> +{
> +	return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
> +}
>  
>  #endif
> --- a/arch/x86/include/asm/desc.h
> +++ b/arch/x86/include/asm/desc.h
> @@ -7,6 +7,7 @@
>  #include <asm/mmu.h>
>  #include <asm/fixmap.h>
>  #include <asm/irq_vectors.h>
> +#include <asm/cpu_entry_area.h>
>  
>  #include <linux/smp.h>
>  #include <linux/percpu.h>
> --- a/arch/x86/include/asm/fixmap.h
> +++ b/arch/x86/include/asm/fixmap.h
> @@ -25,7 +25,6 @@
>  #else
>  #include <uapi/asm/vsyscall.h>
>  #endif
> -#include <asm/cpu_entry_area.h>
>  
>  /*
>   * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall
> @@ -84,7 +83,6 @@ enum fixed_addresses {
>  	FIX_IO_APIC_BASE_0,
>  	FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
>  #endif
> -	FIX_RO_IDT,	/* Virtual mapping for read-only IDT */
>  #ifdef CONFIG_X86_32
>  	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
>  	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
> @@ -100,9 +98,6 @@ enum fixed_addresses {
>  #ifdef	CONFIG_X86_INTEL_MID
>  	FIX_LNW_VRTC,
>  #endif
> -	/* Fixmap entries to remap the GDTs, one per processor. */
> -	FIX_CPU_ENTRY_AREA_TOP,
> -	FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1,
>  
>  #ifdef CONFIG_ACPI_APEI_GHES
>  	/* Used for GHES mapping from assorted contexts */
> @@ -143,7 +138,7 @@ enum fixed_addresses {
>  extern void reserve_top_address(unsigned long reserve);
>  
>  #define FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
> -#define FIXADDR_START		(FIXADDR_TOP - FIXADDR_SIZE)
> +#define FIXADDR_START	(FIXADDR_TOP - FIXADDR_SIZE)
>  
>  extern int fixmaps_set;
>  
> @@ -191,30 +186,5 @@ void __init *early_memremap_decrypted_wp
>  void __early_set_fixmap(enum fixed_addresses idx,
>  			phys_addr_t phys, pgprot_t flags);
>  
> -static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page)
> -{
> -	BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
> -
> -	return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page;
> -}
> -
> -#define __get_cpu_entry_area_offset_index(cpu, offset) ({		\
> -	BUILD_BUG_ON(offset % PAGE_SIZE != 0);				\
> -	__get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE);	\
> -	})
> -
> -#define get_cpu_entry_area_index(cpu, field)				\
> -	__get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field))
> -
> -static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
> -{
> -	return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
> -}
> -
> -static inline struct entry_stack *cpu_entry_stack(int cpu)
> -{
> -	return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
> -}
> -
>  #endif /* !__ASSEMBLY__ */
>  #endif /* _ASM_X86_FIXMAP_H */
> --- a/arch/x86/include/asm/pgtable_32_types.h
> +++ b/arch/x86/include/asm/pgtable_32_types.h
> @@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set
>  #define LAST_PKMAP 1024
>  #endif
>  
> -#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1))	\
> -		    & PMD_MASK)
> +/*
> + * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
> + * to avoid include recursion hell
> + */
> +#define CPU_ENTRY_AREA_PAGES	(NR_CPUS * 40)
> +
> +#define CPU_ENTRY_AREA_BASE				\
> +	((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
> +
> +#define PKMAP_BASE		\
> +	((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
>  
>  #ifdef CONFIG_HIGHMEM
>  # define VMALLOC_END	(PKMAP_BASE - 2 * PAGE_SIZE)
>  #else
> -# define VMALLOC_END	(FIXADDR_START - 2 * PAGE_SIZE)
> +# define VMALLOC_END	(CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
>  #endif
>  
>  #define MODULES_VADDR	VMALLOC_START
> --- a/arch/x86/include/asm/pgtable_64_types.h
> +++ b/arch/x86/include/asm/pgtable_64_types.h
> @@ -76,32 +76,41 @@ typedef struct { pteval_t pte; } pte_t;
>  #define PGDIR_MASK	(~(PGDIR_SIZE - 1))
>  
>  /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
> -#define MAXMEM		_AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
> +#define MAXMEM			_AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
> +
>  #ifdef CONFIG_X86_5LEVEL
> -#define VMALLOC_SIZE_TB _AC(16384, UL)
> -#define __VMALLOC_BASE	_AC(0xff92000000000000, UL)
> -#define __VMEMMAP_BASE	_AC(0xffd4000000000000, UL)
> +# define VMALLOC_SIZE_TB	_AC(16384, UL)
> +# define __VMALLOC_BASE		_AC(0xff92000000000000, UL)
> +# define __VMEMMAP_BASE		_AC(0xffd4000000000000, UL)
>  #else
> -#define VMALLOC_SIZE_TB	_AC(32, UL)
> -#define __VMALLOC_BASE	_AC(0xffffc90000000000, UL)
> -#define __VMEMMAP_BASE	_AC(0xffffea0000000000, UL)
> +# define VMALLOC_SIZE_TB	_AC(32, UL)
> +# define __VMALLOC_BASE		_AC(0xffffc90000000000, UL)
> +# define __VMEMMAP_BASE		_AC(0xffffea0000000000, UL)
>  #endif
> +
>  #ifdef CONFIG_RANDOMIZE_MEMORY
> -#define VMALLOC_START	vmalloc_base
> -#define VMEMMAP_START	vmemmap_base
> +# define VMALLOC_START		vmalloc_base
> +# define VMEMMAP_START		vmemmap_base
>  #else
> -#define VMALLOC_START	__VMALLOC_BASE
> -#define VMEMMAP_START	__VMEMMAP_BASE
> +# define VMALLOC_START		__VMALLOC_BASE
> +# define VMEMMAP_START		__VMEMMAP_BASE
>  #endif /* CONFIG_RANDOMIZE_MEMORY */
> -#define VMALLOC_END	(VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
> -#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
> +
> +#define VMALLOC_END		(VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
> +
> +#define MODULES_VADDR		(__START_KERNEL_map + KERNEL_IMAGE_SIZE)
>  /* The module sections ends with the start of the fixmap */
> -#define MODULES_END   __fix_to_virt(__end_of_fixed_addresses + 1)
> -#define MODULES_LEN   (MODULES_END - MODULES_VADDR)
> -#define ESPFIX_PGD_ENTRY _AC(-2, UL)
> -#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
> -#define EFI_VA_START	 ( -4 * (_AC(1, UL) << 30))
> -#define EFI_VA_END	 (-68 * (_AC(1, UL) << 30))
> +#define MODULES_END		__fix_to_virt(__end_of_fixed_addresses + 1)
> +#define MODULES_LEN		(MODULES_END - MODULES_VADDR)
> +
> +#define ESPFIX_PGD_ENTRY	_AC(-2, UL)
> +#define ESPFIX_BASE_ADDR	(ESPFIX_PGD_ENTRY << P4D_SHIFT)
> +
> +#define CPU_ENTRY_AREA_PGD	_AC(-3, UL)
> +#define CPU_ENTRY_AREA_BASE	(CPU_ENTRY_AREA_PGD << P4D_SHIFT)
> +
> +#define EFI_VA_START		( -4 * (_AC(1, UL) << 30))
> +#define EFI_VA_END		(-68 * (_AC(1, UL) << 30))
>  
>  #define EARLY_DYNAMIC_PAGE_TABLES	64
>  
> --- a/arch/x86/kernel/dumpstack.c
> +++ b/arch/x86/kernel/dumpstack.c
> @@ -18,6 +18,7 @@
>  #include <linux/nmi.h>
>  #include <linux/sysfs.h>
>  
> +#include <asm/cpu_entry_area.h>
>  #include <asm/stacktrace.h>
>  #include <asm/unwind.h>
>  
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -951,8 +951,9 @@ void __init trap_init(void)
>  	 * "sidt" instruction will not leak the location of the kernel, and
>  	 * to defend the IDT against arbitrary memory write vulnerabilities.
>  	 * It will be reloaded in cpu_init() */
> -	__set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
> -	idt_descr.address = fix_to_virt(FIX_RO_IDT);
> +	cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
> +		    PAGE_KERNEL_RO);
> +	idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
>  
>  	/*
>  	 * Should be a barrier for any external CPU state:
> --- a/arch/x86/mm/cpu_entry_area.c
> +++ b/arch/x86/mm/cpu_entry_area.c
> @@ -13,11 +13,18 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char,
>  	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
>  #endif
>  
> +void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
> +{
> +	unsigned long va = (unsigned long) cea_vaddr;
> +
> +	set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
> +}
> +
>  static void __init
> -set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
> +cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
>  {
> -	for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
> -		__set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
> +	for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
> +		cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
>  }
>  
>  /* Setup the fixmap mappings only once per-processor */
> @@ -45,10 +52,12 @@ static void __init setup_cpu_entry_area(
>  	pgprot_t tss_prot = PAGE_KERNEL;
>  #endif
>  
> -	__set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
> -	set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
> -				per_cpu_ptr(&entry_stack_storage, cpu), 1,
> -				PAGE_KERNEL);
> +	cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
> +		    gdt_prot);
> +
> +	cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
> +			     per_cpu_ptr(&entry_stack_storage, cpu), 1,
> +			     PAGE_KERNEL);
>  
>  	/*
>  	 * The Intel SDM says (Volume 3, 7.2.1):
> @@ -70,10 +79,9 @@ static void __init setup_cpu_entry_area(
>  	BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
>  		      offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
>  	BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
> -	set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
> -				&per_cpu(cpu_tss_rw, cpu),
> -				sizeof(struct tss_struct) / PAGE_SIZE,
> -				tss_prot);
> +	cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
> +			     &per_cpu(cpu_tss_rw, cpu),
> +			     sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
>  
>  #ifdef CONFIG_X86_32
>  	per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
> @@ -83,20 +91,37 @@ static void __init setup_cpu_entry_area(
>  	BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
>  	BUILD_BUG_ON(sizeof(exception_stacks) !=
>  		     sizeof(((struct cpu_entry_area *)0)->exception_stacks));
> -	set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
> -				&per_cpu(exception_stacks, cpu),
> -				sizeof(exception_stacks) / PAGE_SIZE,
> -				PAGE_KERNEL);
> +	cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
> +			     &per_cpu(exception_stacks, cpu),
> +			     sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
>  
> -	__set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
> +	cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
>  		     __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
>  #endif
>  }
>  
> +static __init void setup_cpu_entry_area_ptes(void)
> +{
> +#ifdef CONFIG_X86_32
> +	unsigned long start, end;
> +
> +	BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
> +	BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
> +
> +	start = CPU_ENTRY_AREA_BASE;
> +	end = start + CPU_ENTRY_AREA_MAP_SIZE;
> +
> +	for (; start < end; start += PMD_SIZE)
> +		populate_extra_pte(start);
> +#endif
> +}
> +
>  void __init setup_cpu_entry_areas(void)
>  {
>  	unsigned int cpu;
>  
> +	setup_cpu_entry_area_ptes();
> +
>  	for_each_possible_cpu(cpu)
>  		setup_cpu_entry_area(cpu);
>  }
> --- a/arch/x86/mm/dump_pagetables.c
> +++ b/arch/x86/mm/dump_pagetables.c
> @@ -58,6 +58,7 @@ enum address_markers_idx {
>  	KASAN_SHADOW_START_NR,
>  	KASAN_SHADOW_END_NR,
>  #endif
> +	CPU_ENTRY_AREA_NR,
>  #ifdef CONFIG_X86_ESPFIX64
>  	ESPFIX_START_NR,
>  #endif
> @@ -81,6 +82,7 @@ static struct addr_marker address_marker
>  	[KASAN_SHADOW_START_NR]	= { KASAN_SHADOW_START,	"KASAN shadow" },
>  	[KASAN_SHADOW_END_NR]	= { KASAN_SHADOW_END,	"KASAN shadow end" },
>  #endif
> +	[CPU_ENTRY_AREA_NR]	= { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
>  #ifdef CONFIG_X86_ESPFIX64
>  	[ESPFIX_START_NR]	= { ESPFIX_BASE_ADDR,	"ESPfix Area", 16 },
>  #endif
> @@ -104,6 +106,7 @@ enum address_markers_idx {
>  #ifdef CONFIG_HIGHMEM
>  	PKMAP_BASE_NR,
>  #endif
> +	CPU_ENTRY_AREA_NR,
>  	FIXADDR_START_NR,
>  	END_OF_SPACE_NR,
>  };
> @@ -116,6 +119,7 @@ static struct addr_marker address_marker
>  #ifdef CONFIG_HIGHMEM
>  	[PKMAP_BASE_NR]		= { 0UL,		"Persistent kmap() Area" },
>  #endif
> +	[CPU_ENTRY_AREA_NR]	= { 0UL,		"CPU entry area" },
>  	[FIXADDR_START_NR]	= { 0UL,		"Fixmap area" },
>  	[END_OF_SPACE_NR]	= { -1,			NULL }
>  };
> @@ -541,8 +545,8 @@ static int __init pt_dump_init(void)
>  	address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
>  # endif
>  	address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
> +	address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
>  #endif
> -
>  	return 0;
>  }
>  __initcall(pt_dump_init);
> --- a/arch/x86/mm/init_32.c
> +++ b/arch/x86/mm/init_32.c
> @@ -50,6 +50,7 @@
>  #include <asm/setup.h>
>  #include <asm/set_memory.h>
>  #include <asm/page_types.h>
> +#include <asm/cpu_entry_area.h>
>  #include <asm/init.h>
>  
>  #include "mm_internal.h"
> @@ -766,6 +767,7 @@ void __init mem_init(void)
>  	mem_init_print_info(NULL);
>  	printk(KERN_INFO "virtual kernel memory layout:\n"
>  		"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
> +		"  cpu_entry : 0x%08lx - 0x%08lx   (%4ld kB)\n"
>  #ifdef CONFIG_HIGHMEM
>  		"    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
>  #endif
> @@ -777,6 +779,10 @@ void __init mem_init(void)
>  		FIXADDR_START, FIXADDR_TOP,
>  		(FIXADDR_TOP - FIXADDR_START) >> 10,
>  
> +		CPU_ENTRY_AREA_BASE,
> +		CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
> +		CPU_ENTRY_AREA_MAP_SIZE >> 10,
> +
>  #ifdef CONFIG_HIGHMEM
>  		PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
>  		(LAST_PKMAP*PAGE_SIZE) >> 10,
> --- a/arch/x86/mm/kasan_init_64.c
> +++ b/arch/x86/mm/kasan_init_64.c
> @@ -15,6 +15,7 @@
>  #include <asm/tlbflush.h>
>  #include <asm/sections.h>
>  #include <asm/pgtable.h>
> +#include <asm/cpu_entry_area.h>
>  
>  extern struct range pfn_mapped[E820_MAX_ENTRIES];
>  
> @@ -330,12 +331,13 @@ void __init kasan_init(void)
>  			      (unsigned long)kasan_mem_to_shadow(_end),
>  			      early_pfn_to_nid(__pa(_stext)));
>  
> -	shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM);
> +	shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
>  	shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
>  	shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
>  						PAGE_SIZE);
>  
> -	shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE);
> +	shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
> +					CPU_ENTRY_AREA_TOT_SIZE);
>  	shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
>  	shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
>  					PAGE_SIZE);
> --- a/arch/x86/mm/pgtable_32.c
> +++ b/arch/x86/mm/pgtable_32.c
> @@ -10,6 +10,7 @@
>  #include <linux/pagemap.h>
>  #include <linux/spinlock.h>
>  
> +#include <asm/cpu_entry_area.h>
>  #include <asm/pgtable.h>
>  #include <asm/pgalloc.h>
>  #include <asm/fixmap.h>
> --- a/arch/x86/xen/mmu_pv.c
> +++ b/arch/x86/xen/mmu_pv.c
> @@ -2261,7 +2261,6 @@ static void xen_set_fixmap(unsigned idx,
>  
>  	switch (idx) {
>  	case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
> -	case FIX_RO_IDT:
>  #ifdef CONFIG_X86_32
>  	case FIX_WP_TEST:
>  # ifdef CONFIG_HIGHMEM
> @@ -2272,7 +2271,6 @@ static void xen_set_fixmap(unsigned idx,
>  #endif
>  	case FIX_TEXT_POKE0:
>  	case FIX_TEXT_POKE1:
> -	case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM:
>  		/* All local page mappings */
>  		pte = pfn_pte(phys, prot);
>  		break;

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ