Subject: [PATCH v2] x86, 64bit: cleanup highmap tail near partial 2M range 1. should use _brk_end instead of &_end in mark_rodata_ro(). _brk_end can move up to &_end, i.e. to __brk_limit. It's safe to use _brk_end when mark_rodata_ro() is called because extend_brk() is gone already at that point. 2. [_brk_end, pm_end) page range is already converted mem. and is not wasted. 3. add cleanup_highmap_tail for [_brk_end, pm_end). Kernel Layout: [ 0.000000] .brk: [0x0437c000-0x043a1fff] Actually used brk: [ 0.272959] memblock_reserve: [0x0000000437c000-0x00000004382fff] flags 0x0 BRK Before patch: ---[ High Kernel Mapping ]--- ... 0xffffffff83400000-0xffffffff84200000 14M RW PSE GLB NX pmd 0xffffffff84200000-0xffffffff843a2000 1672K RW GLB NX pte 0xffffffff843a2000-0xffffffff84400000 376K RW GLB x pte 0xffffffff84400000-0xffffffffa0000000 444M pmd After patch: ---[ High Kernel Mapping ]--- ... 0xffffffff83400000-0xffffffff84200000 14M RW PSE GLB NX pmd 0xffffffff84200000-0xffffffff84383000 1548K RW GLB NX pte 0xffffffff84383000-0xffffffff84400000 500K pte 0xffffffff84400000-0xffffffffa0000000 444M pmd -v2: according to tglx caculate the pmd postion instead of passing last_pmd. cleanup_highmap_tail could not have __init, as it is called in mark_rodata_ro and mark_rodata_ro is called after free_initmem. highmap_end_pfn should keep PMD_SIZE alignment on !CONFIG_DEBUG_RODATA Signed-off-by: Yinghai Lu --- arch/x86/mm/init_64.c | 22 +++++++++++++++++++++- arch/x86/mm/pageattr.c | 4 ++++ 2 files changed, 25 insertions(+), 1 deletion(-) Index: linux-2.6/arch/x86/mm/init_64.c =================================================================== --- linux-2.6.orig/arch/x86/mm/init_64.c +++ linux-2.6/arch/x86/mm/init_64.c @@ -411,6 +411,23 @@ void __init cleanup_highmap(void) } } +static void cleanup_highmap_tail(unsigned long addr) +{ + int i; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pgd = pgd_offset_k(addr); + pud = (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr); + pmd = (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr); + pte = (pte_t *)pmd_page_vaddr(*pmd) + pte_index(addr); + + for (i = pte_index(addr); i < PTRS_PER_PTE; i++, pte++) + set_pte(pte, __pte(0)); +} + static unsigned long __meminit phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, pgprot_t prot) @@ -1124,7 +1141,8 @@ void mark_rodata_ro(void) unsigned long end = (unsigned long) &__end_rodata_hpage_align; unsigned long text_end = PFN_ALIGN(&__stop___ex_table); unsigned long rodata_end = PFN_ALIGN(&__end_rodata); - unsigned long all_end = PFN_ALIGN(&_end); + unsigned long all_end = PFN_ALIGN(_brk_end); + unsigned long pmd_end = roundup(all_end, PMD_SIZE); printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", (end - start) >> 10); @@ -1137,6 +1155,8 @@ void mark_rodata_ro(void) * should also be not-executable. */ set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); + if (all_end < pmd_end) + cleanup_highmap_tail(all_end); rodata_test(); Index: linux-2.6/arch/x86/mm/pageattr.c =================================================================== --- linux-2.6.orig/arch/x86/mm/pageattr.c +++ linux-2.6/arch/x86/mm/pageattr.c @@ -100,7 +100,11 @@ static inline unsigned long highmap_star static inline unsigned long highmap_end_pfn(void) { +#ifdef CONFIG_DEBUG_RODATA + return __pa_symbol(PFN_ALIGN(_brk_end)) >> PAGE_SHIFT; +#else return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; +#endif } #endif