lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <DM6PR04MB62013781E311A4F316AEA7F28D8E0@DM6PR04MB6201.namprd04.prod.outlook.com>
Date:   Thu, 28 May 2020 13:35:32 +0000
From:   Anup Patel <Anup.Patel@....com>
To:     Alex Ghiti <alex@...ti.fr>, Anup Patel <anup@...infault.org>
CC:     Paul Walmsley <paul.walmsley@...ive.com>,
        Palmer Dabbelt <palmer@...belt.com>,
        Zong Li <zong.li@...ive.com>, Christoph Hellwig <hch@....de>,
        linux-riscv <linux-riscv@...ts.infradead.org>,
        "linux-kernel@...r.kernel.org List" <linux-kernel@...r.kernel.org>
Subject: RE: [PATCH 5/8] riscv: Implement sv48 support



> -----Original Message-----
> From: linux-kernel-owner@...r.kernel.org <linux-kernel-
> owner@...r.kernel.org> On Behalf Of Alex Ghiti
> Sent: 26 May 2020 22:00
> To: Anup Patel <anup@...infault.org>
> Cc: Paul Walmsley <paul.walmsley@...ive.com>; Palmer Dabbelt
> <palmer@...belt.com>; Zong Li <zong.li@...ive.com>; Christoph Hellwig
> <hch@....de>; linux-riscv <linux-riscv@...ts.infradead.org>; linux-
> kernel@...r.kernel.org List <linux-kernel@...r.kernel.org>
> Subject: Re: [PATCH 5/8] riscv: Implement sv48 support
> 
> Le 5/25/20 à 2:45 AM, Anup Patel a écrit :
> > On Sun, May 24, 2020 at 2:45 PM Alexandre Ghiti <alex@...ti.fr> wrote:
> >> By adding a new 4th level of page table, give the possibility to
> >> 64bit kernel to address 2^48 bytes of virtual address: in practice,
> >> that roughly offers ~160TB of virtual address space to userspace and
> >> allows up to 64TB of physical memory.
> >>
> >> If the underlying hardware does not support sv48, we will
> >> automatically fallback to a standard 3-level page table by folding
> >> the new PUD level into PGDIR level. In order to detect HW
> >> capabilities at runtime, we use SATP feature that ignores writes with an
> unsupported mode.
> >>
> >> Signed-off-by: Alexandre Ghiti <alex@...ti.fr>
> >> ---
> >>   arch/riscv/Kconfig                  |   6 +-
> >>   arch/riscv/include/asm/csr.h        |   3 +-
> >>   arch/riscv/include/asm/fixmap.h     |   1 +
> >>   arch/riscv/include/asm/page.h       |  15 +++
> >>   arch/riscv/include/asm/pgalloc.h    |  36 +++++++
> >>   arch/riscv/include/asm/pgtable-64.h |  97 ++++++++++++++++-
> >>   arch/riscv/include/asm/pgtable.h    |   9 +-
> >>   arch/riscv/kernel/head.S            |   3 +-
> >>   arch/riscv/mm/context.c             |   4 +-
> >>   arch/riscv/mm/init.c                | 159 +++++++++++++++++++++++++---
> >>   10 files changed, 309 insertions(+), 24 deletions(-)
> >>
> >> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index
> >> e167f16131f4..3f73f60e9732 100644
> >> --- a/arch/riscv/Kconfig
> >> +++ b/arch/riscv/Kconfig
> >> @@ -68,6 +68,7 @@ config RISCV
> >>          select ARCH_HAS_GCOV_PROFILE_ALL
> >>          select HAVE_COPY_THREAD_TLS
> >>          select HAVE_ARCH_KASAN if MMU && 64BIT
> >> +       select RELOCATABLE if 64BIT
> >>
> >>   config ARCH_MMAP_RND_BITS_MIN
> >>          default 18 if 64BIT
> >> @@ -106,7 +107,7 @@ config PAGE_OFFSET
> >>          default 0xC0000000 if 32BIT && MAXPHYSMEM_2GB
> >>          default 0x80000000 if 64BIT && !MMU
> >>          default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB
> >> -       default 0xffffffe000000000 if 64BIT && !MAXPHYSMEM_2GB
> >> +       default 0xffffc00000000000 if 64BIT && !MAXPHYSMEM_2GB
> >>
> >>   config ARCH_FLATMEM_ENABLE
> >>          def_bool y
> >> @@ -155,8 +156,11 @@ config GENERIC_HWEIGHT
> >>   config FIX_EARLYCON_MEM
> >>          def_bool MMU
> >>
> >> +# On a 64BIT relocatable kernel, the 4-level page table is at
> >> +runtime folded # on a 3-level page table when sv48 is not supported.
> >>   config PGTABLE_LEVELS
> >>          int
> >> +       default 4 if 64BIT && RELOCATABLE
> >>          default 3 if 64BIT
> >>          default 2
> >>
> >> diff --git a/arch/riscv/include/asm/csr.h
> >> b/arch/riscv/include/asm/csr.h index cec462e198ce..d41536c3f8d4
> >> 100644
> >> --- a/arch/riscv/include/asm/csr.h
> >> +++ b/arch/riscv/include/asm/csr.h
> >> @@ -40,11 +40,10 @@
> >>   #ifndef CONFIG_64BIT
> >>   #define SATP_PPN       _AC(0x003FFFFF, UL)
> >>   #define SATP_MODE_32   _AC(0x80000000, UL)
> >> -#define SATP_MODE      SATP_MODE_32
> >>   #else
> >>   #define SATP_PPN       _AC(0x00000FFFFFFFFFFF, UL)
> >>   #define SATP_MODE_39   _AC(0x8000000000000000, UL)
> >> -#define SATP_MODE      SATP_MODE_39
> >> +#define SATP_MODE_48   _AC(0x9000000000000000, UL)
> >>   #endif
> >>
> >>   /* Exception cause high bit - is an interrupt if set */ diff --git
> >> a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
> >> index 2368d49eb4ef..d891cf9c73c5 100644
> >> --- a/arch/riscv/include/asm/fixmap.h
> >> +++ b/arch/riscv/include/asm/fixmap.h
> >> @@ -27,6 +27,7 @@ enum fixed_addresses {
> >>          FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
> >>          FIX_PTE,
> >>          FIX_PMD,
> >> +       FIX_PUD,
> >>          FIX_TEXT_POKE1,
> >>          FIX_TEXT_POKE0,
> >>          FIX_EARLYCON_MEM_BASE,
> >> diff --git a/arch/riscv/include/asm/page.h
> >> b/arch/riscv/include/asm/page.h index 48bb09b6a9b7..5e77fe7f0d6d
> >> 100644
> >> --- a/arch/riscv/include/asm/page.h
> >> +++ b/arch/riscv/include/asm/page.h
> >> @@ -31,7 +31,19 @@
> >>    * When not using MMU this corresponds to the first free page in
> >>    * physical memory (aligned on a page boundary).
> >>    */
> >> +#ifdef CONFIG_RELOCATABLE
> >> +#define PAGE_OFFSET            __page_offset
> >> +
> >> +#ifdef CONFIG_64BIT
> >> +/*
> >> + * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address
> >> +space so
> >> + * define the PAGE_OFFSET value for SV39.
> >> + */
> >> +#define PAGE_OFFSET_L3         0xffffffe000000000
> >> +#endif /* CONFIG_64BIT */
> >> +#else
> >>   #define PAGE_OFFSET            _AC(CONFIG_PAGE_OFFSET, UL)
> >> +#endif /* CONFIG_RELOCATABLE */
> >>
> >>   #define KERN_VIRT_SIZE (-PAGE_OFFSET)
> >>
> >> @@ -102,6 +114,9 @@ extern unsigned long pfn_base;
> >>   extern unsigned long max_low_pfn;
> >>   extern unsigned long min_low_pfn;
> >>   extern unsigned long kernel_virt_addr;
> >> +#ifdef CONFIG_RELOCATABLE
> >> +extern unsigned long __page_offset;
> >> +#endif
> >>
> >>   #define __pa_to_va_nodebug(x)  ((void *)((unsigned long) (x) +
> va_pa_offset))
> >>   #define linear_mapping_va_to_pa(x)     ((unsigned long)(x) - va_pa_offset)
> >> diff --git a/arch/riscv/include/asm/pgalloc.h
> >> b/arch/riscv/include/asm/pgalloc.h
> >> index 3f601ee8233f..540eaa5a8658 100644
> >> --- a/arch/riscv/include/asm/pgalloc.h
> >> +++ b/arch/riscv/include/asm/pgalloc.h
> >> @@ -36,6 +36,42 @@ static inline void pud_populate(struct mm_struct
> >> *mm, pud_t *pud, pmd_t *pmd)
> >>
> >>          set_pud(pud, __pud((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
> >>   }
> >> +
> >> +static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d,
> >> +pud_t *pud) {
> >> +       if (pgtable_l4_enabled) {
> >> +               unsigned long pfn = virt_to_pfn(pud);
> >> +
> >> +               set_p4d(p4d, __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
> >> +       }
> >> +}
> >> +
> >> +static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d,
> >> +                                    pud_t *pud) {
> >> +       if (pgtable_l4_enabled) {
> >> +               unsigned long pfn = virt_to_pfn(pud);
> >> +
> >> +               set_p4d_safe(p4d,
> >> +                            __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
> >> +       }
> >> +}
> >> +
> >> +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned
> >> +long addr) {
> >> +       if (pgtable_l4_enabled)
> >> +               return (pud_t *)__get_free_page(
> >> +                               GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
> >> +       return NULL;
> >> +}
> >> +
> >> +static inline void pud_free(struct mm_struct *mm, pud_t *pud) {
> >> +       if (pgtable_l4_enabled)
> >> +               free_page((unsigned long)pud); }
> >> +
> >> +#define __pud_free_tlb(tlb, pud, addr)  pud_free((tlb)->mm, pud)
> >>   #endif /* __PAGETABLE_PMD_FOLDED */
> >>
> >>   #define pmd_pgtable(pmd)       pmd_page(pmd)
> >> diff --git a/arch/riscv/include/asm/pgtable-64.h
> >> b/arch/riscv/include/asm/pgtable-64.h
> >> index b15f70a1fdfa..c84c31fbf8da 100644
> >> --- a/arch/riscv/include/asm/pgtable-64.h
> >> +++ b/arch/riscv/include/asm/pgtable-64.h
> >> @@ -8,16 +8,32 @@
> >>
> >>   #include <linux/const.h>
> >>
> >> -#define PGDIR_SHIFT     30
> >> +extern bool pgtable_l4_enabled;
> >> +
> >> +#define PGDIR_SHIFT     (pgtable_l4_enabled ? 39 : 30)
> >>   /* Size of region mapped by a page global directory */
> >>   #define PGDIR_SIZE      (_AC(1, UL) << PGDIR_SHIFT)
> >>   #define PGDIR_MASK      (~(PGDIR_SIZE - 1))
> >>
> >> +/* pud is folded into pgd in case of 3-level page table */
> >> +#define PUD_SHIFT      30
> >> +#define PUD_SIZE       (_AC(1, UL) << PUD_SHIFT)
> >> +#define PUD_MASK       (~(PUD_SIZE - 1))
> >> +
> >>   #define PMD_SHIFT       21
> >>   /* Size of region mapped by a page middle directory */
> >>   #define PMD_SIZE        (_AC(1, UL) << PMD_SHIFT)
> >>   #define PMD_MASK        (~(PMD_SIZE - 1))
> >>
> >> +/* Page Upper Directory entry */
> >> +typedef struct {
> >> +       unsigned long pud;
> >> +} pud_t;
> >> +
> >> +#define pud_val(x)      ((x).pud)
> >> +#define __pud(x)        ((pud_t) { (x) })
> >> +#define PTRS_PER_PUD    (PAGE_SIZE / sizeof(pud_t))
> >> +
> >>   /* Page Middle Directory entry */
> >>   typedef struct {
> >>          unsigned long pmd;
> >> @@ -60,6 +76,16 @@ static inline void pud_clear(pud_t *pudp)
> >>          set_pud(pudp, __pud(0));
> >>   }
> >>
> >> +static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot) {
> >> +       return __pud((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); }
> >> +
> >> +static inline unsigned long _pud_pfn(pud_t pud) {
> >> +       return pud_val(pud) >> _PAGE_PFN_SHIFT; }
> >> +
> >>   static inline unsigned long pud_page_vaddr(pud_t pud)
> >>   {
> >>          return (unsigned long)pfn_to_virt(pud_val(pud) >>
> >> _PAGE_PFN_SHIFT); @@ -70,6 +96,15 @@ static inline struct page
> *pud_page(pud_t pud)
> >>          return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
> >>   }
> >>
> >> +#define mm_pud_folded  mm_pud_folded static inline bool
> >> +mm_pud_folded(struct mm_struct *mm) {
> >> +       if (pgtable_l4_enabled)
> >> +               return false;
> >> +
> >> +       return true;
> >> +}
> >> +
> >>   #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD -
> >> 1))
> >>
> >>   static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) @@
> >> -90,4 +125,64 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
> >>   #define pmd_ERROR(e) \
> >>          pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__,
> >> pmd_val(e))
> >>
> >> +#define pud_ERROR(e)   \
> >> +       pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__,
> >> +pud_val(e))
> >> +
> >> +static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) {
> >> +       if (pgtable_l4_enabled)
> >> +               *p4dp = p4d;
> >> +       else
> >> +               set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) }); }
> >> +
> >> +static inline int p4d_none(p4d_t p4d) {
> >> +       if (pgtable_l4_enabled)
> >> +               return (p4d_val(p4d) == 0);
> >> +
> >> +       return 0;
> >> +}
> >> +
> >> +static inline int p4d_present(p4d_t p4d) {
> >> +       if (pgtable_l4_enabled)
> >> +               return (p4d_val(p4d) & _PAGE_PRESENT);
> >> +
> >> +       return 1;
> >> +}
> >> +
> >> +static inline int p4d_bad(p4d_t p4d) {
> >> +       if (pgtable_l4_enabled)
> >> +               return !p4d_present(p4d);
> >> +
> >> +       return 0;
> >> +}
> >> +
> >> +static inline void p4d_clear(p4d_t *p4d) {
> >> +       if (pgtable_l4_enabled)
> >> +               set_p4d(p4d, __p4d(0)); }
> >> +
> >> +static inline unsigned long p4d_page_vaddr(p4d_t p4d) {
> >> +       if (pgtable_l4_enabled)
> >> +               return (unsigned long)pfn_to_virt(
> >> +                               p4d_val(p4d) >> _PAGE_PFN_SHIFT);
> >> +
> >> +       return pud_page_vaddr((pud_t) { p4d_val(p4d) }); }
> >> +
> >> +#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
> >> +
> >> +static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) {
> >> +       if (pgtable_l4_enabled)
> >> +               return (pud_t *)p4d_page_vaddr(*p4d) +
> >> +pud_index(address);
> >> +
> >> +       return (pud_t *)p4d;
> >> +}
> >> +
> >>   #endif /* _ASM_RISCV_PGTABLE_64_H */ diff --git
> >> a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> >> index 8e96315b3366..b8a8ba69d0a2 100644
> >> --- a/arch/riscv/include/asm/pgtable.h
> >> +++ b/arch/riscv/include/asm/pgtable.h
> >> @@ -20,12 +20,14 @@
> >>    * the kernel.
> >>    */
> >>   #define KERNEL_VIRT_ADDR       (VMALLOC_END - SZ_2G + 1)
> >> -#define KERNEL_LINK_ADDR       KERNEL_VIRT_ADDR
> >> +#define KERNEL_LINK_ADDR       (VMALLOC_LINK_END - SZ_2G + 1)
> >>
> >>   #define VMALLOC_SIZE     (KERN_VIRT_SIZE >> 1)
> >>   #define VMALLOC_END      (PAGE_OFFSET - 1)
> >>   #define VMALLOC_START    (PAGE_OFFSET - VMALLOC_SIZE)
> >>
> >> +#define VMALLOC_LINK_END       (_AC(CONFIG_PAGE_OFFSET, UL) - 1)
> >> +
> >>   #define BPF_JIT_REGION_SIZE    (SZ_128M)
> >>   #define BPF_JIT_REGION_START   (kernel_virt_addr)
> >>   #define BPF_JIT_REGION_END     (kernel_virt_addr +
> BPF_JIT_REGION_SIZE)
> >> @@ -67,8 +69,7 @@
> >>
> >>   #ifndef __ASSEMBLY__
> >>
> >> -/* Page Upper Directory not used in RISC-V */ -#include
> >> <asm-generic/pgtable-nopud.h>
> >> +#include <asm-generic/pgtable-nop4d.h>
> >>   #include <asm/page.h>
> >>   #include <asm/tlbflush.h>
> >>   #include <linux/mm_types.h>
> >> @@ -81,7 +82,7 @@
> >>
> >>   #ifdef CONFIG_MMU
> >>   #ifdef CONFIG_64BIT
> >> -#define VA_BITS                39
> >> +#define VA_BITS                (pgtable_l4_enabled ? 48 : 39)
> >>   #define PA_BITS                56
> >>   #else
> >>   #define VA_BITS                32
> >> diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
> >> index 8f5bb7731327..0632c4834c68 100644
> >> --- a/arch/riscv/kernel/head.S
> >> +++ b/arch/riscv/kernel/head.S
> >> @@ -62,7 +62,8 @@ relocate:
> >>
> >>          /* Compute satp for kernel page tables, but don't load it yet */
> >>          srl a2, a0, PAGE_SHIFT
> >> -       li a1, SATP_MODE
> >> +       la a1, satp_mode
> >> +       REG_L a1, 0(a1)
> >>          or a2, a2, a1
> >>
> >>          /*
> >> diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index
> >> 613ec81a8979..6830504f8b11 100644
> >> --- a/arch/riscv/mm/context.c
> >> +++ b/arch/riscv/mm/context.c
> >> @@ -9,6 +9,8 @@
> >>   #include <asm/cacheflush.h>
> >>   #include <asm/mmu_context.h>
> >>
> >> +extern u64 satp_mode;
> > Please move this to asm/pgtable.h next to "extern void *dtb_early_va".
> >
> > Same thing can be done for "pgtable_l4_enabled" to help PATCH7.
> >
> > I forgot to mention this in previous emails.
> 
> 
> Ok, I'll do that in v2 too, thanks.
> Anup, do you have time to take a look at the relocatable series I have posted
> earlier ?
> As sv48 support depends on that, it would be nice to have your review too.

Sure, I will review tomorrow or day after.

Thanks,
Anup

> 
> Thanks,
> 
> Alex
> 
> 
> >
> > Regards,
> > Anup
> >
> >
> >
> >> +
> >>   /*
> >>    * When necessary, performs a deferred icache flush for the given MM
> context,
> >>    * on the local CPU.  RISC-V has no direct mechanism for
> >> instruction cache @@ -59,7 +61,7 @@ void switch_mm(struct mm_struct
> *prev, struct mm_struct *next,
> >>          cpumask_set_cpu(cpu, mm_cpumask(next));
> >>
> >>   #ifdef CONFIG_MMU
> >> -       csr_write(CSR_SATP, virt_to_pfn(next->pgd) | SATP_MODE);
> >> +       csr_write(CSR_SATP, virt_to_pfn(next->pgd) | satp_mode);
> >>          local_flush_tlb_all();
> >>   #endif
> >>
> >> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index
> >> 5782cae58ac2..bad8da099ff6 100644
> >> --- a/arch/riscv/mm/init.c
> >> +++ b/arch/riscv/mm/init.c
> >> @@ -25,8 +25,23 @@
> >>
> >>   #include "../kernel/head.h"
> >>
> >> -unsigned long kernel_virt_addr = KERNEL_VIRT_ADDR;
> >> +#ifdef CONFIG_64BIT
> >> +u64 satp_mode = IS_ENABLED(CONFIG_MAXPHYSMEM_2GB) ?
> >> +                               SATP_MODE_39 : SATP_MODE_48; bool
> >> +pgtable_l4_enabled = IS_ENABLED(CONFIG_MAXPHYSMEM_2GB) ? false :
> >> +true; #else
> >> +u64 satp_mode = SATP_MODE_32;
> >> +bool pgtable_l4_enabled;
> >> +#endif
> >> +EXPORT_SYMBOL(pgtable_l4_enabled);
> >> +EXPORT_SYMBOL(satp_mode);
> >> +
> >> +unsigned long kernel_virt_addr;
> >>   EXPORT_SYMBOL(kernel_virt_addr);
> >> +#ifdef CONFIG_RELOCATABLE
> >> +unsigned long __page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
> >> +EXPORT_SYMBOL(__page_offset); #endif
> >>
> >>   unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
> >>
> >> __page_aligned_bss; @@ -254,9 +269,12 @@ static void __init
> >> create_pte_mapping(pte_t *ptep,
> >>
> >>   #ifndef __PAGETABLE_PMD_FOLDED
> >>
> >> +pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss;
> >>   pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
> >> +pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss;
> >>   pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
> >>   pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
> >> +pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
> >>
> >>   static pmd_t *__init get_pmd_virt(phys_addr_t pa)
> >>   {
> >> @@ -273,7 +291,8 @@ static phys_addr_t __init alloc_pmd(uintptr_t va)
> >>          if (mmu_enabled)
> >>                  return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
> >>
> >> -       BUG_ON((va - kernel_virt_addr) >> PGDIR_SHIFT);
> >> +       /* Only one PMD is available for early mapping */
> >> +       BUG_ON((va - kernel_virt_addr) >> PUD_SHIFT);
> >>
> >>          return (uintptr_t)early_pmd;
> >>   }
> >> @@ -305,19 +324,70 @@ static void __init create_pmd_mapping(pmd_t
> *pmdp,
> >>          create_pte_mapping(ptep, va, pa, sz, prot);
> >>   }
> >>
> >> -#define pgd_next_t             pmd_t
> >> -#define alloc_pgd_next(__va)   alloc_pmd(__va)
> >> -#define get_pgd_next_virt(__pa)        get_pmd_virt(__pa)
> >> +static pud_t *__init get_pud_virt(phys_addr_t pa) {
> >> +       if (mmu_enabled) {
> >> +               clear_fixmap(FIX_PUD);
> >> +               return (pud_t *)set_fixmap_offset(FIX_PUD, pa);
> >> +       } else {
> >> +               return (pud_t *)((uintptr_t)pa);
> >> +       }
> >> +}
> >> +
> >> +static phys_addr_t __init alloc_pud(uintptr_t va) {
> >> +       if (mmu_enabled)
> >> +               return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
> >> +
> >> +       /* Only one PUD is available for early mapping */
> >> +       BUG_ON((va - kernel_virt_addr) >> PGDIR_SHIFT);
> >> +
> >> +       return (uintptr_t)early_pud;
> >> +}
> >> +
> >> +static void __init create_pud_mapping(pud_t *pudp,
> >> +                                     uintptr_t va, phys_addr_t pa,
> >> +                                     phys_addr_t sz, pgprot_t prot)
> >> +{
> >> +       pmd_t *nextp;
> >> +       phys_addr_t next_phys;
> >> +       uintptr_t pud_index = pud_index(va);
> >> +
> >> +       if (sz == PUD_SIZE) {
> >> +               if (pud_val(pudp[pud_index]) == 0)
> >> +                       pudp[pud_index] = pfn_pud(PFN_DOWN(pa), prot);
> >> +               return;
> >> +       }
> >> +
> >> +       if (pud_val(pudp[pud_index]) == 0) {
> >> +               next_phys = alloc_pmd(va);
> >> +               pudp[pud_index] = pfn_pud(PFN_DOWN(next_phys),
> PAGE_TABLE);
> >> +               nextp = get_pmd_virt(next_phys);
> >> +               memset(nextp, 0, PAGE_SIZE);
> >> +       } else {
> >> +               next_phys = PFN_PHYS(_pud_pfn(pudp[pud_index]));
> >> +               nextp = get_pmd_virt(next_phys);
> >> +       }
> >> +
> >> +       create_pmd_mapping(nextp, va, pa, sz, prot); }
> >> +
> >> +#define pgd_next_t             pud_t
> >> +#define alloc_pgd_next(__va)   alloc_pud(__va)
> >> +#define get_pgd_next_virt(__pa)        get_pud_virt(__pa)
> >>   #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)     \
> >> -       create_pmd_mapping(__nextp, __va, __pa, __sz, __prot)
> >> -#define fixmap_pgd_next                fixmap_pmd
> >> +       create_pud_mapping(__nextp, __va, __pa, __sz, __prot)
> >> +#define fixmap_pgd_next                (pgtable_l4_enabled ?                   \
> >> +                       (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd)
> >> +#define trampoline_pgd_next    (pgtable_l4_enabled ?                   \
> >> +                       (uintptr_t)trampoline_pud :
> >> +(uintptr_t)trampoline_pmd)
> >>   #else
> >>   #define pgd_next_t             pte_t
> >>   #define alloc_pgd_next(__va)   alloc_pte(__va)
> >>   #define get_pgd_next_virt(__pa)        get_pte_virt(__pa)
> >>   #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)     \
> >>          create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
> >> -#define fixmap_pgd_next                fixmap_pte
> >> +#define fixmap_pgd_next                ((uintptr_t)fixmap_pte)
> >>   #endif
> >>
> >>   static void __init create_pgd_mapping(pgd_t *pgdp, @@ -328,6
> >> +398,13 @@ static void __init create_pgd_mapping(pgd_t *pgdp,
> >>          phys_addr_t next_phys;
> >>          uintptr_t pgd_index = pgd_index(va);
> >>
> >> +#ifndef __PAGETABLE_PMD_FOLDED
> >> +       if (!pgtable_l4_enabled) {
> >> +               create_pud_mapping((pud_t *)pgdp, va, pa, sz, prot);
> >> +               return;
> >> +       }
> >> +#endif
> >> +
> >>          if (sz == PGDIR_SIZE) {
> >>                  if (pgd_val(pgdp[pgd_index]) == 0)
> >>                          pgdp[pgd_index] = pfn_pgd(PFN_DOWN(pa),
> >> prot); @@ -419,6 +496,47 @@ void __init relocate_kernel(uintptr_t
> load_pa)
> >>          }
> >>   }
> >>
> >> +#if defined(CONFIG_64BIT) && !defined(CONFIG_MAXPHYSMEM_2GB)
> void
> >> +disable_pgtable_l4(void) {
> >> +       pgtable_l4_enabled = false;
> >> +       __page_offset = PAGE_OFFSET_L3;
> >> +       satp_mode = SATP_MODE_39;
> >> +}
> >> +
> >> +/* There is a simple way to determine if 4-level is supported by the
> >> + * underlying hardware: establish 1:1 mapping in 4-level page table
> >> +mode
> >> + * then read SATP to see if the configuration was taken into account
> >> + * meaning sv48 is supported.
> >> + */
> >> +asmlinkage __init void set_satp_mode(uintptr_t load_pa) {
> >> +       u64 identity_satp, hw_satp;
> >> +       int cpus_node;
> >> +
> >> +       create_pgd_mapping(early_pg_dir, load_pa, (uintptr_t)early_pud,
> >> +                          PGDIR_SIZE, PAGE_TABLE);
> >> +       create_pud_mapping(early_pud, load_pa, (uintptr_t)early_pmd,
> >> +                          PUD_SIZE, PAGE_TABLE);
> >> +       create_pmd_mapping(early_pmd, load_pa, load_pa,
> >> +                          PMD_SIZE, PAGE_KERNEL_EXEC);
> >> +
> >> +       identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode;
> >> +       local_flush_tlb_all();
> >> +       csr_write(CSR_SATP, identity_satp);
> >> +
> >> +       hw_satp = csr_read(CSR_SATP);
> >> +       csr_write(CSR_SATP, 0ULL);
> >> +       local_flush_tlb_all();
> >> +
> >> +       if (hw_satp != identity_satp)
> >> +               disable_pgtable_l4();
> >> +
> >> +       memset(early_pg_dir, 0, PAGE_SIZE);
> >> +       memset(early_pud, 0, PAGE_SIZE);
> >> +       memset(early_pmd, 0, PAGE_SIZE); } #endif
> >>   #endif
> >>
> >>   static uintptr_t load_pa, load_sz;
> >> @@ -442,9 +560,14 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
> >>          load_pa = (uintptr_t)(&_start);
> >>          load_sz = (uintptr_t)(&_end) - load_pa;
> >>
> >> +#if defined(CONFIG_64BIT) && !defined(CONFIG_MAXPHYSMEM_2GB)
> >> +       set_satp_mode(load_pa);
> >> +#endif
> >> +
> >> +       kernel_virt_addr = KERNEL_VIRT_ADDR;
> >> +
> >>          va_pa_offset = PAGE_OFFSET - load_pa;
> >>          va_kernel_pa_offset = kernel_virt_addr - load_pa;
> >> -
> >>          pfn_base = PFN_DOWN(load_pa);
> >>
> >>   #ifdef CONFIG_RELOCATABLE
> >> @@ -473,15 +596,22 @@ asmlinkage void __init setup_vm(uintptr_t
> >> dtb_pa)
> >>
> >>          /* Setup early PGD for fixmap */
> >>          create_pgd_mapping(early_pg_dir, FIXADDR_START,
> >> -                          (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
> >> +                          fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
> >>
> >>   #ifndef __PAGETABLE_PMD_FOLDED
> >> -       /* Setup fixmap PMD */
> >> +       /* Setup fixmap PUD and PMD */
> >> +       if (pgtable_l4_enabled)
> >> +               create_pud_mapping(fixmap_pud, FIXADDR_START,
> >> +                          (uintptr_t)fixmap_pmd, PUD_SIZE,
> >> + PAGE_TABLE);
> >>          create_pmd_mapping(fixmap_pmd, FIXADDR_START,
> >>                             (uintptr_t)fixmap_pte, PMD_SIZE,
> >> PAGE_TABLE);
> >> +
> >>          /* Setup trampoline PGD and PMD */
> >>          create_pgd_mapping(trampoline_pg_dir, kernel_virt_addr,
> >> -                          (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE);
> >> +                          trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
> >> +       if (pgtable_l4_enabled)
> >> +               create_pud_mapping(trampoline_pud, kernel_virt_addr,
> >> +                          (uintptr_t)trampoline_pmd, PUD_SIZE,
> >> + PAGE_TABLE);
> >>          create_pmd_mapping(trampoline_pmd, kernel_virt_addr,
> >>                             load_pa, PMD_SIZE, PAGE_KERNEL_EXEC);
> >>   #else
> >> @@ -558,12 +688,13 @@ static void __init setup_vm_final(void)
> >>
> >>          vm_area_add_early(&vm_kernel);
> >>
> >> -       /* Clear fixmap PTE and PMD mappings */
> >> +       /* Clear fixmap page table mappings */
> >>          clear_fixmap(FIX_PTE);
> >>          clear_fixmap(FIX_PMD);
> >> +       clear_fixmap(FIX_PUD);
> >>
> >>          /* Move to swapper page table */
> >> -       csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) |
> SATP_MODE);
> >> +       csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) |
> >> + satp_mode);
> >>          local_flush_tlb_all();
> >>   }
> >>
> >> --
> >> 2.20.1
> >>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ