lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 16 May 2023 21:13:00 +0800
From:   WANG Xuerui <kernel@...0n.name>
To:     Huacai Chen <chenhuacai@...ngson.cn>,
        Huacai Chen <chenhuacai@...nel.org>
Cc:     loongarch@...ts.linux.dev, linux-arch@...r.kernel.org,
        Xuefeng Li <lixuefeng@...ngson.cn>,
        Guo Ren <guoren@...nel.org>,
        Jiaxun Yang <jiaxun.yang@...goat.com>,
        linux-kernel@...r.kernel.org, loongson-kernel@...ts.loongnix.cn,
        Liang Gao <gaoliang@...ngson.cn>, Jun Yi <yijun@...ngson.cn>
Subject: Re: [PATCH] LoongArch: Introduce hardware page table walker

On 2023/5/16 20:46, Huacai Chen wrote:
> Loongson-3A6000 and newer processors have hardware page table walker
> (PTW) support. PTW can handle all fastpaths of TLBI/TLBL/TLBS/TLBM
> exceptions by hardware, software only need to handle slowpaths (page
> faults).
> 
> BTW, PTW doesn't append _PAGE_MODIFIED for page table entries, so we
> change pmd_dirty() and pte_dirty() to also check _PAGE_DIRTY for the
> "dirty" attribute.

Too bad we can't get a sneak peek at the new manuals... reviewing some 
of the code anyway.

> 
> Signed-off-by: Liang Gao <gaoliang@...ngson.cn>
> Signed-off-by: Jun Yi <yijun@...ngson.cn>
> Signed-off-by: Huacai Chen <chenhuacai@...ngson.cn>
> ---
>   arch/loongarch/include/asm/cpu-features.h |  2 +-
>   arch/loongarch/include/asm/cpu.h          |  2 ++
>   arch/loongarch/include/asm/loongarch.h    |  4 ++++
>   arch/loongarch/include/asm/pgtable.h      |  4 ++--
>   arch/loongarch/include/asm/tlb.h          |  3 +++
>   arch/loongarch/include/uapi/asm/hwcap.h   |  1 +
>   arch/loongarch/kernel/cpu-probe.c         |  4 ++++
>   arch/loongarch/kernel/proc.c              |  1 +
>   arch/loongarch/mm/tlb.c                   | 24 ++++++++++++++++++-----
>   arch/loongarch/mm/tlbex.S                 | 21 ++++++++++++++++++++
>   10 files changed, 58 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h
> index f6177f133477..2eafe6a6aca8 100644
> --- a/arch/loongarch/include/asm/cpu-features.h
> +++ b/arch/loongarch/include/asm/cpu-features.h
> @@ -64,6 +64,6 @@
>   #define cpu_has_eiodecode	cpu_opt(LOONGARCH_CPU_EIODECODE)
>   #define cpu_has_guestid		cpu_opt(LOONGARCH_CPU_GUESTID)
>   #define cpu_has_hypervisor	cpu_opt(LOONGARCH_CPU_HYPERVISOR)
> -
> +#define cpu_has_ptw		cpu_opt(LOONGARCH_CPU_PTW)
>   
>   #endif /* __ASM_CPU_FEATURES_H */
> diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h
> index 88773d849e33..48b9f7168bcc 100644
> --- a/arch/loongarch/include/asm/cpu.h
> +++ b/arch/loongarch/include/asm/cpu.h
> @@ -98,6 +98,7 @@ enum cpu_type_enum {
>   #define CPU_FEATURE_EIODECODE		23	/* CPU has EXTIOI interrupt pin decode mode */
>   #define CPU_FEATURE_GUESTID		24	/* CPU has GuestID feature */
>   #define CPU_FEATURE_HYPERVISOR		25	/* CPU has hypervisor (running in VM) */
> +#define CPU_FEATURE_PTW			26	/* CPU has hardware page table walker */
>   
>   #define LOONGARCH_CPU_CPUCFG		BIT_ULL(CPU_FEATURE_CPUCFG)
>   #define LOONGARCH_CPU_LAM		BIT_ULL(CPU_FEATURE_LAM)
> @@ -125,5 +126,6 @@ enum cpu_type_enum {
>   #define LOONGARCH_CPU_EIODECODE		BIT_ULL(CPU_FEATURE_EIODECODE)
>   #define LOONGARCH_CPU_GUESTID		BIT_ULL(CPU_FEATURE_GUESTID)
>   #define LOONGARCH_CPU_HYPERVISOR	BIT_ULL(CPU_FEATURE_HYPERVISOR)
> +#define LOONGARCH_CPU_PTW		BIT_ULL(CPU_FEATURE_PTW)
>   
>   #endif /* _ASM_CPU_H */
> diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
> index b3323ab5b78d..93b22a7af654 100644
> --- a/arch/loongarch/include/asm/loongarch.h
> +++ b/arch/loongarch/include/asm/loongarch.h
> @@ -138,6 +138,7 @@ static inline u32 read_cpucfg(u32 reg)
>   #define  CPUCFG2_MIPSBT			BIT(20)
>   #define  CPUCFG2_LSPW			BIT(21)
>   #define  CPUCFG2_LAM			BIT(22)
> +#define  CPUCFG2_PTW			BIT(24)
>   
>   #define LOONGARCH_CPUCFG3		0x3
>   #define  CPUCFG3_CCDMA			BIT(0)
> @@ -453,6 +454,9 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)
>   #define  CSR_PWCTL0_PTBASE		(_ULCAST_(0x1f) << CSR_PWCTL0_PTBASE_SHIFT)
>   
>   #define LOONGARCH_CSR_PWCTL1		0x1d	/* PWCtl1 */
> +#define  CSR_PWCTL1_PTW_SHIFT		24
> +#define  CSR_PWCTL1_PTW_WIDTH		1
> +#define  CSR_PWCTL1_PTW			(_ULCAST_(0x1) << CSR_PWCTL1_PTW_SHIFT)
>   #define  CSR_PWCTL1_DIR3WIDTH_SHIFT	18
>   #define  CSR_PWCTL1_DIR3WIDTH_WIDTH	5
>   #define  CSR_PWCTL1_DIR3WIDTH		(_ULCAST_(0x1f) << CSR_PWCTL1_DIR3WIDTH_SHIFT)
> diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
> index d28fb9dbec59..5f93d6eef657 100644
> --- a/arch/loongarch/include/asm/pgtable.h
> +++ b/arch/loongarch/include/asm/pgtable.h
> @@ -362,7 +362,7 @@ extern pgd_t invalid_pg_dir[];
>    */
>   static inline int pte_write(pte_t pte)	{ return pte_val(pte) & _PAGE_WRITE; }
>   static inline int pte_young(pte_t pte)	{ return pte_val(pte) & _PAGE_ACCESSED; }
> -static inline int pte_dirty(pte_t pte)	{ return pte_val(pte) & _PAGE_MODIFIED; }
> +static inline int pte_dirty(pte_t pte)	{ return pte_val(pte) & (_PAGE_DIRTY | _PAGE_MODIFIED); }

Will this affect existing models? I can't seem to remember the details.

>   
>   static inline pte_t pte_mkold(pte_t pte)
>   {
> @@ -506,7 +506,7 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd)
>   
>   static inline int pmd_dirty(pmd_t pmd)
>   {
> -	return !!(pmd_val(pmd) & _PAGE_MODIFIED);
> +	return !!(pmd_val(pmd) & (_PAGE_DIRTY | _PAGE_MODIFIED));
>   }
>   
>   static inline pmd_t pmd_mkclean(pmd_t pmd)
> diff --git a/arch/loongarch/include/asm/tlb.h b/arch/loongarch/include/asm/tlb.h
> index f5e4deb97402..0dc9ee2b05d2 100644
> --- a/arch/loongarch/include/asm/tlb.h
> +++ b/arch/loongarch/include/asm/tlb.h
> @@ -163,6 +163,9 @@ extern void handle_tlb_store(void);
>   extern void handle_tlb_modify(void);
>   extern void handle_tlb_refill(void);
>   extern void handle_tlb_protect(void);
> +extern void handle_tlb_load_ptw(void);
> +extern void handle_tlb_store_ptw(void);
> +extern void handle_tlb_modify_ptw(void);
>   
>   extern void dump_tlb_all(void);
>   extern void dump_tlb_regs(void);
> diff --git a/arch/loongarch/include/uapi/asm/hwcap.h b/arch/loongarch/include/uapi/asm/hwcap.h
> index 8840b72fa8e8..6955a7cb2c65 100644
> --- a/arch/loongarch/include/uapi/asm/hwcap.h
> +++ b/arch/loongarch/include/uapi/asm/hwcap.h
> @@ -16,5 +16,6 @@
>   #define HWCAP_LOONGARCH_LBT_X86		(1 << 10)
>   #define HWCAP_LOONGARCH_LBT_ARM		(1 << 11)
>   #define HWCAP_LOONGARCH_LBT_MIPS	(1 << 12)
> +#define HWCAP_LOONGARCH_PTW		(1 << 13)

This feature should be irrevelant to userspace, right? We should 
probably not add it here.

>   
>   #endif /* _UAPI_ASM_HWCAP_H */
> diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c
> index f42acc6c8df6..e925579c7a71 100644
> --- a/arch/loongarch/kernel/cpu-probe.c
> +++ b/arch/loongarch/kernel/cpu-probe.c
> @@ -136,6 +136,10 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
>   		c->options |= LOONGARCH_CPU_CRYPTO;
>   		elf_hwcap |= HWCAP_LOONGARCH_CRYPTO;
>   	}
> +	if (config & CPUCFG2_PTW) {
> +		c->options |= LOONGARCH_CPU_PTW;
> +		elf_hwcap |= HWCAP_LOONGARCH_PTW;
> +	}
>   	if (config & CPUCFG2_LVZP) {
>   		c->options |= LOONGARCH_CPU_LVZ;
>   		elf_hwcap |= HWCAP_LOONGARCH_LVZ;
> diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c
> index 0d82907b5404..782a34e7336e 100644
> --- a/arch/loongarch/kernel/proc.c
> +++ b/arch/loongarch/kernel/proc.c
> @@ -79,6 +79,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
>   	if (cpu_has_crc32)	seq_printf(m, " crc32");
>   	if (cpu_has_complex)	seq_printf(m, " complex");
>   	if (cpu_has_crypto)	seq_printf(m, " crypto");
> +	if (cpu_has_ptw)	seq_printf(m, " ptw");
>   	if (cpu_has_lvz)	seq_printf(m, " lvz");
>   	if (cpu_has_lbt_x86)	seq_printf(m, " lbt_x86");
>   	if (cpu_has_lbt_arm)	seq_printf(m, " lbt_arm");
> diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c
> index 8bad6b0cff59..3e1d1d3c7058 100644
> --- a/arch/loongarch/mm/tlb.c
> +++ b/arch/loongarch/mm/tlb.c
> @@ -167,6 +167,9 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep
>   	int idx;
>   	unsigned long flags;
>   
> +	if (cpu_has_ptw)
> +		return;
> +
>   	/*
>   	 * Handle debugger faulting in for debugee.
>   	 */
> @@ -220,7 +223,11 @@ static void setup_ptwalker(void)
>   	pte_w = PAGE_SHIFT - 3;
>   
>   	pwctl0 = pte_i | pte_w << 5 | pmd_i << 10 | pmd_w << 15 | pud_i << 20 | pud_w << 25;
> -	pwctl1 = pgd_i | pgd_w << 6;
> +
> +	if (!cpu_has_ptw)
> +		pwctl1 = pgd_i | pgd_w << 6;
> +	else
> +		pwctl1 = pgd_i | pgd_w << 6 | CSR_PWCTL1_PTW;

This is better in avoiding the duplication:

if (cpu_has_ptw)
     pwctl1 |= CSR_PWCTL1_PTW;

>   
>   	csr_write64(pwctl0, LOONGARCH_CSR_PWCTL0);
>   	csr_write64(pwctl1, LOONGARCH_CSR_PWCTL1);
> @@ -264,10 +271,17 @@ void setup_tlb_handler(int cpu)
>   	if (cpu == 0) {
>   		memcpy((void *)tlbrentry, handle_tlb_refill, 0x80);
>   		local_flush_icache_range(tlbrentry, tlbrentry + 0x80);
> -		set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE);
> -		set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load, VECSIZE);
> -		set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store, VECSIZE);
> -		set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify, VECSIZE);
> +		if (!cpu_has_ptw) {

Better not have the condition inverted for less cognitive burden (have 
to go "no [hardware] page table walker" -> "take the software approach" 
otherwise).

> +			set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE);
> +			set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load, VECSIZE);
> +			set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store, VECSIZE);
> +			set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify, VECSIZE);
> +		} else {
> +			set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load_ptw, VECSIZE);
> +			set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load_ptw, VECSIZE);
> +			set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store_ptw, VECSIZE);
> +			set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify_ptw, VECSIZE);
> +		}
>   		set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE);
>   		set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE);
>   		set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE);
> diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S
> index 240ced55586e..01df8a0b87f6 100644
> --- a/arch/loongarch/mm/tlbex.S
> +++ b/arch/loongarch/mm/tlbex.S
> @@ -190,6 +190,13 @@ nopage_tlb_load:
>   	jr		t0
>   SYM_FUNC_END(handle_tlb_load)
>   
> +SYM_FUNC_START(handle_tlb_load_ptw)
> +	csrwr	t0, LOONGARCH_CSR_KS0
> +	csrwr	t1, LOONGARCH_CSR_KS1
> +	la_abs	t0, tlb_do_page_fault_0
> +	jirl	zero, t0, 0

jr t0

> +SYM_FUNC_END(handle_tlb_load_ptw)
> +
>   SYM_FUNC_START(handle_tlb_store)
>   	csrwr		t0, EXCEPTION_KS0
>   	csrwr		t1, EXCEPTION_KS1
> @@ -339,6 +346,13 @@ nopage_tlb_store:
>   	jr		t0
>   SYM_FUNC_END(handle_tlb_store)
>   
> +SYM_FUNC_START(handle_tlb_store_ptw)
> +	csrwr	t0, LOONGARCH_CSR_KS0
> +	csrwr	t1, LOONGARCH_CSR_KS1
> +	la_abs	t0, tlb_do_page_fault_1
> +	jirl	zero, t0, 0

Same here.

> +SYM_FUNC_END(handle_tlb_store_ptw)
> +
>   SYM_FUNC_START(handle_tlb_modify)
>   	csrwr		t0, EXCEPTION_KS0
>   	csrwr		t1, EXCEPTION_KS1
> @@ -486,6 +500,13 @@ nopage_tlb_modify:
>   	jr		t0
>   SYM_FUNC_END(handle_tlb_modify)
>   
> +SYM_FUNC_START(handle_tlb_modify_ptw)
> +	csrwr	t0, LOONGARCH_CSR_KS0
> +	csrwr	t1, LOONGARCH_CSR_KS1
> +	la_abs	t0, tlb_do_page_fault_1
> +	jirl	zero, t0, 0

And here.

> +SYM_FUNC_END(handle_tlb_modify_ptw)
> +
>   SYM_FUNC_START(handle_tlb_refill)
>   	csrwr		t0, LOONGARCH_CSR_TLBRSAVE
>   	csrrd		t0, LOONGARCH_CSR_PGD

-- 
WANG "xen0n" Xuerui

Linux/LoongArch mailing list: https://lore.kernel.org/loongarch/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ