Subject: [PATCH 2/2] x86, 64bit: cleanup highmap late for not needed range 1. should use _brk_end instead of &_end in mark_rodata_ro(). _brk_end can move up to &_end, i.e. to __brk_limit. It's safe to use _brk_end when mark_rodata_ro() is called because extend_brk() is gone already at that point. 2. add cleanup_highmap_late for range for initmem, around rodata, and [_brk_end, pmd_end] Kernel Layout: [ 0.000000] .text: [0x01000000-0x0200d608] [ 0.000000] .rodata: [0x02200000-0x02a1cfff] [ 0.000000] .data: [0x02c00000-0x02e50e7f] [ 0.000000] .init: [0x02e52000-0x03212fff] [ 0.000000] .bss: [0x03221000-0x0437bfff] [ 0.000000] .brk: [0x0437c000-0x043a1fff] Actually used brk: [ 0.272959] memblock_reserve: [0x0000000437c000-0x00000004382fff] flags 0x0 BRK Before patch: ---[ High Kernel Mapping ]--- 0xffffffff80000000-0xffffffff81000000 16M pmd 0xffffffff81000000-0xffffffff82200000 18M ro PSE GLB x pmd 0xffffffff82200000-0xffffffff82c00000 10M ro PSE GLB NX pmd 0xffffffff82c00000-0xffffffff82e00000 2M RW PSE GLB NX pmd 0xffffffff82e00000-0xffffffff83000000 2M RW GLB NX pte 0xffffffff83000000-0xffffffff83200000 2M RW PSE GLB NX pmd 0xffffffff83200000-0xffffffff83400000 2M RW GLB NX pte 0xffffffff83400000-0xffffffff84200000 14M RW PSE GLB NX pmd 0xffffffff84200000-0xffffffff843a2000 1672K RW GLB NX pte 0xffffffff843a2000-0xffffffff84400000 376K RW GLB x pte 0xffffffff84400000-0xffffffffa0000000 444M pmd After patch: ---[ High Kernel Mapping ]--- 0xffffffff80000000-0xffffffff81000000 16M pmd 0xffffffff81000000-0xffffffff82000000 16M ro PSE GLB x pmd 0xffffffff82000000-0xffffffff82011000 68K ro GLB x pte 0xffffffff82011000-0xffffffff82200000 1980K pte 0xffffffff82200000-0xffffffff82a00000 8M ro PSE GLB NX pmd 0xffffffff82a00000-0xffffffff82a1d000 116K ro GLB NX pte 0xffffffff82a1d000-0xffffffff82c00000 1932K pte 0xffffffff82c00000-0xffffffff82e00000 2M RW PSE GLB NX pmd 0xffffffff82e00000-0xffffffff82e52000 328K RW GLB NX pte 0xffffffff82e52000-0xffffffff83000000 1720K pte 0xffffffff83000000-0xffffffff83200000 2M pmd 0xffffffff83200000-0xffffffff83213000 76K pte 0xffffffff83213000-0xffffffff83400000 1972K RW GLB NX pte 0xffffffff83400000-0xffffffff84200000 14M RW PSE GLB NX pmd 0xffffffff84200000-0xffffffff84383000 1548K RW GLB NX pte 0xffffffff84383000-0xffffffff84400000 500K pte 0xffffffff84400000-0xffffffffa0000000 444M pmd -v3: remove all not used highmap ranges. Signed-off-by: Yinghai Lu --- arch/x86/mm/init_64.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) Index: linux-2.6/arch/x86/mm/init_64.c =================================================================== --- linux-2.6.orig/arch/x86/mm/init_64.c +++ linux-2.6/arch/x86/mm/init_64.c @@ -1098,6 +1098,67 @@ void __init mem_init(void) } #ifdef CONFIG_DEBUG_RODATA +static void remove_highmap_2m(unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset_k(addr); + pud = (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr); + pmd = (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr); + + set_pmd(pmd, __pmd(0)); +} + +static void remove_highmap_2m_partial(unsigned long addr, unsigned long end) +{ + int i; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + int start_index = pte_index(addr); + int end_index = pte_index(end - 1) + 1; + + set_memory_4k(addr, end_index - start_index); + + pgd = pgd_offset_k(addr); + pud = (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr); + pmd = (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr); + pte = (pte_t *)pmd_page_vaddr(*pmd) + start_index; + + for (i = start_index; i < end_index; i++, pte++) + set_pte(pte, __pte(0)); +} + +static void cleanup_highmap_late(unsigned long start, unsigned long end) +{ + unsigned long addr; + unsigned long start_2m_aligned = roundup(start, PMD_SIZE); + unsigned long end_2m_aligned = rounddown(end, PMD_SIZE); + + start = PFN_ALIGN(start); + end &= PAGE_MASK; + + if (start >= end) + return; + + if (start < start_2m_aligned && start_2m_aligned <= end) + remove_highmap_2m_partial(start, start_2m_aligned); + + for (addr = start_2m_aligned; addr < end_2m_aligned; addr += PMD_SIZE) + remove_highmap_2m(addr); + + if (start <= end_2m_aligned && end_2m_aligned < end) + remove_highmap_2m_partial(end_2m_aligned, end); + + subtract_range(pfn_highmapped, NR_RANGE, + __pa_symbol(start) >> PAGE_SHIFT, + __pa_symbol(end) >> PAGE_SHIFT); + nr_pfn_highmapped = clean_sort_range(pfn_highmapped, NR_RANGE); +} + const int rodata_test_data = 0xC3; EXPORT_SYMBOL_GPL(rodata_test_data); @@ -1146,7 +1207,8 @@ void mark_rodata_ro(void) unsigned long end = (unsigned long) &__end_rodata_hpage_align; unsigned long text_end = PFN_ALIGN(&__stop___ex_table); unsigned long rodata_end = PFN_ALIGN(&__end_rodata); - unsigned long all_end = PFN_ALIGN(&_end); + unsigned long data_start = PFN_ALIGN(&_sdata); + unsigned long all_end = PFN_ALIGN(_brk_end); printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", (end - start) >> 10); @@ -1160,6 +1222,12 @@ void mark_rodata_ro(void) */ set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); + cleanup_highmap_late(text_end, rodata_start); + cleanup_highmap_late(rodata_end, data_start); + cleanup_highmap_late(all_end, roundup(_brk_end, PMD_SIZE)); + cleanup_highmap_late((unsigned long)(&__init_begin), + (unsigned long)(&__init_end)); + rodata_test(); #ifdef CONFIG_CPA_DEBUG