[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <mhng-2543F392-0764-425E-ACB2-C897B7834C6C@palmerdabbelt-mac>
Date: Mon, 23 Jun 2025 16:19:46 -0700 (PDT)
From: Palmer Dabbelt <palmer@...belt.com>
To: zhangchunyan@...as.ac.cn
CC: Paul Walmsley <paul.walmsley@...ive.com>, aou@...s.berkeley.edu,
Alexandre Ghiti <alex@...ti.fr>, akpm@...ux-foundation.org, linux-riscv@...ts.infradead.org, debug@...osinc.com,
Vedvyas Shanbhogue <ved@...osinc.com>, linux-kernel@...r.kernel.org, zhang.lyra@...il.com
Subject: Re: [PATCH V8 2/3] riscv: mm: Add soft-dirty page tracking support
On Wed, 18 Jun 2025 23:52:31 PDT (-0700), zhangchunyan@...as.ac.cn wrote:
> The Svrsw60t59b extension allows to free the PTE reserved bits 60 and 59
> for software, this patch uses bit 59 for soft-dirty.
>
> To add swap PTE soft-dirty tracking, we borrow bit 3 which is available
> for swap PTEs on RISC-V systems.
>
> Signed-off-by: Chunyan Zhang <zhangchunyan@...as.ac.cn>
> ---
> arch/riscv/Kconfig | 1 +
> arch/riscv/include/asm/pgtable-bits.h | 19 +++++++
> arch/riscv/include/asm/pgtable.h | 71 ++++++++++++++++++++++++++-
> 3 files changed, 89 insertions(+), 2 deletions(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 01e4c15bee12..5c787c09f4dc 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -141,6 +141,7 @@ config RISCV
> select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
> select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
> select HAVE_ARCH_SECCOMP_FILTER
> + select HAVE_ARCH_SOFT_DIRTY if 64BIT && MMU && RISCV_ISA_SVRSW60T59B
> select HAVE_ARCH_STACKLEAK
> select HAVE_ARCH_THREAD_STRUCT_WHITELIST
> select HAVE_ARCH_TRACEHOOK
> diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h
> index a8f5205cea54..a6fa871dc19e 100644
> --- a/arch/riscv/include/asm/pgtable-bits.h
> +++ b/arch/riscv/include/asm/pgtable-bits.h
> @@ -20,6 +20,25 @@
>
> #define _PAGE_SPECIAL (1 << 8) /* RSW: 0x1 */
> #define _PAGE_DEVMAP (1 << 9) /* RSW, devmap */
> +
> +#ifdef CONFIG_MEM_SOFT_DIRTY
> +
> +/* ext_svrsw60t59b: bit 59 for software dirty tracking */
> +#define _PAGE_SOFT_DIRTY \
> + ((riscv_has_extension_unlikely(RISCV_ISA_EXT_SVRSW60T59B)) ? \
> + (1UL << 59) : 0)
I haven't fully followed the code paths here, but it looks like we'll
unconditionally enable VM_SOFTDIRTY and thus allow
vma_soft_dirty_enabled() to return true even when we don't have support
in hardware.
I'm not exactly sure what could go wrong there, but it seems like at
least userspace could be told that soft dirty is around and thus get
confused. It also looks like debug_vm_pgtable() will start failing
tests on these systems, which will probably trigger some bug reports.
At a bare minimum someone who understands the core MM code should look
here. Maybe the best way to do that is to convert those static
CONFIG_MEM_SOFT_DIRTY checks to some sort of arch-implemented callback
so this can be dynamic, and then see what people say?
> +/*
> + * Bit 3 is always zero for swap entry computation, so we
> + * can borrow it for swap page soft-dirty tracking.
> + */
> +#define _PAGE_SWP_SOFT_DIRTY \
> + ((riscv_has_extension_unlikely(RISCV_ISA_EXT_SVRSW60T59B)) ? \
> + _PAGE_EXEC : 0)
> +#else
> +#define _PAGE_SOFT_DIRTY 0
> +#define _PAGE_SWP_SOFT_DIRTY 0
> +#endif /* CONFIG_MEM_SOFT_DIRTY */
> +
> #define _PAGE_TABLE _PAGE_PRESENT
>
> /*
> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> index a11816bbf9e7..efc2da97f124 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -434,7 +434,7 @@ static inline pte_t pte_mkwrite_novma(pte_t pte)
>
> static inline pte_t pte_mkdirty(pte_t pte)
> {
> - return __pte(pte_val(pte) | _PAGE_DIRTY);
> + return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
> }
>
> static inline pte_t pte_mkclean(pte_t pte)
> @@ -467,6 +467,38 @@ static inline pte_t pte_mkhuge(pte_t pte)
> return pte;
> }
>
> +#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
> +static inline bool pte_soft_dirty(pte_t pte)
> +{
> + return !!(pte_val(pte) & _PAGE_SOFT_DIRTY);
> +}
> +
> +static inline pte_t pte_mksoft_dirty(pte_t pte)
> +{
> + return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY);
> +}
> +
> +static inline pte_t pte_clear_soft_dirty(pte_t pte)
> +{
> + return __pte(pte_val(pte) & ~(_PAGE_SOFT_DIRTY));
> +}
> +
> +static inline bool pte_swp_soft_dirty(pte_t pte)
> +{
> + return !!(pte_val(pte) & _PAGE_SWP_SOFT_DIRTY);
> +}
> +
> +static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
> +{
> + return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY);
> +}
> +
> +static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
> +{
> + return __pte(pte_val(pte) & ~(_PAGE_SWP_SOFT_DIRTY));
> +}
> +#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
> +
> #ifdef CONFIG_RISCV_ISA_SVNAPOT
> #define pte_leaf_size(pte) (pte_napot(pte) ? \
> napot_cont_size(napot_cont_order(pte)) :\
> @@ -819,6 +851,40 @@ static inline pud_t pud_mkspecial(pud_t pud)
> }
> #endif
>
> +#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
> +static inline bool pmd_soft_dirty(pmd_t pmd)
> +{
> + return pte_soft_dirty(pmd_pte(pmd));
> +}
> +
> +static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
> +{
> + return pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)));
> +}
> +
> +static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
> +{
> + return pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd)));
> +}
> +
> +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
> +static inline bool pmd_swp_soft_dirty(pmd_t pmd)
> +{
> + return pte_swp_soft_dirty(pmd_pte(pmd));
> +}
> +
> +static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
> +{
> + return pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd)));
> +}
> +
> +static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
> +{
> + return pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd)));
> +}
> +#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
> +#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
> +
> static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
> pmd_t *pmdp, pmd_t pmd)
> {
> @@ -1005,7 +1071,8 @@ static inline pud_t pud_modify(pud_t pud, pgprot_t newprot)
> *
> * Format of swap PTE:
> * bit 0: _PAGE_PRESENT (zero)
> - * bit 1 to 3: _PAGE_LEAF (zero)
> + * bit 1 to 2: (zero)
> + * bit 3: _PAGE_SWP_SOFT_DIRTY
> * bit 5: _PAGE_PROT_NONE (zero)
> * bit 6: exclusive marker
> * bits 7 to 11: swap type
Powered by blists - more mailing lists