lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAMn1gO5pGVRCErVF+Ca-4JgHRKEcq9sDGyEe--gEjj5ZLrB8sA@mail.gmail.com>
Date: Mon, 29 Jan 2024 16:04:18 -0800
From: Peter Collingbourne <pcc@...gle.com>
To: Alexandru Elisei <alexandru.elisei@....com>
Cc: catalin.marinas@....com, will@...nel.org, oliver.upton@...ux.dev, 
	maz@...nel.org, james.morse@....com, suzuki.poulose@....com, 
	yuzenghui@...wei.com, arnd@...db.de, akpm@...ux-foundation.org, 
	mingo@...hat.com, peterz@...radead.org, juri.lelli@...hat.com, 
	vincent.guittot@...aro.org, dietmar.eggemann@....com, rostedt@...dmis.org, 
	bsegall@...gle.com, mgorman@...e.de, bristot@...hat.com, vschneid@...hat.com, 
	mhiramat@...nel.org, rppt@...nel.org, hughd@...gle.com, steven.price@....com, 
	anshuman.khandual@....com, vincenzo.frascino@....com, david@...hat.com, 
	eugenis@...gle.com, kcc@...gle.com, hyesoo.yu@...sung.com, 
	linux-arm-kernel@...ts.infradead.org, linux-kernel@...r.kernel.org, 
	kvmarm@...ts.linux.dev, linux-fsdevel@...r.kernel.org, 
	linux-arch@...r.kernel.org, linux-mm@...ck.org, 
	linux-trace-kernel@...r.kernel.org
Subject: Re: [PATCH RFC v3 23/35] arm64: mte: Try to reserve tag storage in arch_alloc_page()

On Thu, Jan 25, 2024 at 8:45 AM Alexandru Elisei
<alexandru.elisei@....com> wrote:
>
> Reserve tag storage for a page that is being allocated as tagged. This
> is a best effort approach, and failing to reserve tag storage is
> allowed.
>
> When all the associated tagged pages have been freed, return the tag
> storage pages back to the page allocator, where they can be used again for
> data allocations.
>
> Signed-off-by: Alexandru Elisei <alexandru.elisei@....com>
> ---
>
> Changes since rfc v2:
>
> * Based on rfc v2 patch #16 ("arm64: mte: Manage tag storage on page
> allocation").
> * Fixed calculation of the number of associated tag storage blocks (Hyesoo
> Yu).
> * Tag storage is reserved in arch_alloc_page() instead of
> arch_prep_new_page().
>
>  arch/arm64/include/asm/mte.h             |  16 +-
>  arch/arm64/include/asm/mte_tag_storage.h |  31 +++
>  arch/arm64/include/asm/page.h            |   5 +
>  arch/arm64/include/asm/pgtable.h         |  19 ++
>  arch/arm64/kernel/mte_tag_storage.c      | 234 +++++++++++++++++++++++
>  arch/arm64/mm/fault.c                    |   7 +
>  fs/proc/page.c                           |   1 +
>  include/linux/kernel-page-flags.h        |   1 +
>  include/linux/page-flags.h               |   1 +
>  include/trace/events/mmflags.h           |   3 +-
>  mm/huge_memory.c                         |   1 +
>  11 files changed, 316 insertions(+), 3 deletions(-)
>
> diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
> index 8034695b3dd7..6457b7899207 100644
> --- a/arch/arm64/include/asm/mte.h
> +++ b/arch/arm64/include/asm/mte.h
> @@ -40,12 +40,24 @@ void mte_free_tag_buf(void *buf);
>  #ifdef CONFIG_ARM64_MTE
>
>  /* track which pages have valid allocation tags */
> -#define PG_mte_tagged  PG_arch_2
> +#define PG_mte_tagged          PG_arch_2
>  /* simple lock to avoid multiple threads tagging the same page */
> -#define PG_mte_lock    PG_arch_3
> +#define PG_mte_lock            PG_arch_3
> +/* Track if a tagged page has tag storage reserved */
> +#define PG_tag_storage_reserved        PG_arch_4
> +
> +#ifdef CONFIG_ARM64_MTE_TAG_STORAGE
> +DECLARE_STATIC_KEY_FALSE(tag_storage_enabled_key);
> +extern bool page_tag_storage_reserved(struct page *page);
> +#endif
>
>  static inline void set_page_mte_tagged(struct page *page)
>  {
> +#ifdef CONFIG_ARM64_MTE_TAG_STORAGE
> +       /* Open code mte_tag_storage_enabled() */
> +       WARN_ON_ONCE(static_branch_likely(&tag_storage_enabled_key) &&
> +                    !page_tag_storage_reserved(page));
> +#endif
>         /*
>          * Ensure that the tags written prior to this function are visible
>          * before the page flags update.
> diff --git a/arch/arm64/include/asm/mte_tag_storage.h b/arch/arm64/include/asm/mte_tag_storage.h
> index 7b3f6bff8e6f..09f1318d924e 100644
> --- a/arch/arm64/include/asm/mte_tag_storage.h
> +++ b/arch/arm64/include/asm/mte_tag_storage.h
> @@ -5,6 +5,12 @@
>  #ifndef __ASM_MTE_TAG_STORAGE_H
>  #define __ASM_MTE_TAG_STORAGE_H
>
> +#ifndef __ASSEMBLY__
> +
> +#include <linux/mm_types.h>
> +
> +#include <asm/mte.h>
> +
>  #ifdef CONFIG_ARM64_MTE_TAG_STORAGE
>
>  DECLARE_STATIC_KEY_FALSE(tag_storage_enabled_key);
> @@ -15,6 +21,15 @@ static inline bool tag_storage_enabled(void)
>  }
>
>  void mte_init_tag_storage(void);
> +
> +static inline bool alloc_requires_tag_storage(gfp_t gfp)
> +{
> +       return gfp & __GFP_TAGGED;
> +}
> +int reserve_tag_storage(struct page *page, int order, gfp_t gfp);
> +void free_tag_storage(struct page *page, int order);
> +
> +bool page_tag_storage_reserved(struct page *page);
>  #else
>  static inline bool tag_storage_enabled(void)
>  {
> @@ -23,6 +38,22 @@ static inline bool tag_storage_enabled(void)
>  static inline void mte_init_tag_storage(void)
>  {
>  }
> +static inline bool alloc_requires_tag_storage(struct page *page)

This function should take a gfp_t to match the
CONFIG_ARM64_MTE_TAG_STORAGE case.

Peter

> +{
> +       return false;
> +}
> +static inline int reserve_tag_storage(struct page *page, int order, gfp_t gfp)
> +{
> +       return 0;
> +}
> +static inline void free_tag_storage(struct page *page, int order)
> +{
> +}
> +static inline bool page_tag_storage_reserved(struct page *page)
> +{
> +       return true;
> +}
>  #endif /* CONFIG_ARM64_MTE_TAG_STORAGE */
>
> +#endif /* !__ASSEMBLY__ */
>  #endif /* __ASM_MTE_TAG_STORAGE_H  */
> diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
> index 88bab032a493..3a656492f34a 100644
> --- a/arch/arm64/include/asm/page.h
> +++ b/arch/arm64/include/asm/page.h
> @@ -35,6 +35,11 @@ void copy_highpage(struct page *to, struct page *from);
>  void tag_clear_highpage(struct page *to);
>  #define __HAVE_ARCH_TAG_CLEAR_HIGHPAGE
>
> +#ifdef CONFIG_ARM64_MTE_TAG_STORAGE
> +void arch_alloc_page(struct page *, int order, gfp_t gfp);
> +#define HAVE_ARCH_ALLOC_PAGE
> +#endif
> +
>  #define clear_user_page(page, vaddr, pg)       clear_page(page)
>  #define copy_user_page(to, from, vaddr, pg)    copy_page(to, from)
>
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index 2499cc4fa4f2..f30466199a9b 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -10,6 +10,7 @@
>
>  #include <asm/memory.h>
>  #include <asm/mte.h>
> +#include <asm/mte_tag_storage.h>
>  #include <asm/pgtable-hwdef.h>
>  #include <asm/pgtable-prot.h>
>  #include <asm/tlbflush.h>
> @@ -1069,6 +1070,24 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
>                 mte_restore_page_tags_by_swp_entry(entry, &folio->page);
>  }
>
> +#ifdef CONFIG_ARM64_MTE_TAG_STORAGE
> +
> +#define __HAVE_ARCH_FREE_PAGES_PREPARE
> +static inline void arch_free_pages_prepare(struct page *page, int order)
> +{
> +       if (tag_storage_enabled() && page_mte_tagged(page))
> +               free_tag_storage(page, order);
> +}
> +
> +#define __HAVE_ARCH_ALLOC_CMA
> +static inline bool arch_alloc_cma(gfp_t gfp_mask)
> +{
> +       if (tag_storage_enabled() && alloc_requires_tag_storage(gfp_mask))
> +               return false;
> +       return true;
> +}
> +
> +#endif /* CONFIG_ARM64_MTE_TAG_STORAGE */
>  #endif /* CONFIG_ARM64_MTE */
>
>  #define __HAVE_ARCH_CALC_VMA_GFP
> diff --git a/arch/arm64/kernel/mte_tag_storage.c b/arch/arm64/kernel/mte_tag_storage.c
> index d58c68b4a849..762c7c803a70 100644
> --- a/arch/arm64/kernel/mte_tag_storage.c
> +++ b/arch/arm64/kernel/mte_tag_storage.c
> @@ -34,6 +34,31 @@ struct tag_region {
>  static struct tag_region tag_regions[MAX_TAG_REGIONS];
>  static int num_tag_regions;
>
> +/*
> + * A note on locking. Reserving tag storage takes the tag_blocks_lock mutex,
> + * because alloc_contig_range() might sleep.
> + *
> + * Freeing tag storage takes the xa_lock spinlock with interrupts disabled
> + * because pages can be freed from non-preemptible contexts, including from an
> + * interrupt handler.
> + *
> + * Because tag storage can be freed from interrupt contexts, the xarray is
> + * defined with the XA_FLAGS_LOCK_IRQ flag to disable interrupts when calling
> + * xa_store(). This is done to prevent a deadlock with free_tag_storage() being
> + * called from an interrupt raised before xa_store() releases the xa_lock.
> + *
> + * All of the above means that reserve_tag_storage() cannot run concurrently
> + * with itself (no concurrent insertions), but it can run at the same time as
> + * free_tag_storage(). The first thing that reserve_tag_storage() does after
> + * taking the mutex is increase the refcount on all present tag storage blocks
> + * with the xa_lock held, to serialize against freeing the blocks. This is an
> + * optimization to avoid taking and releasing the xa_lock after each iteration
> + * if the refcount operation was moved inside the loop, where it would have had
> + * to be executed for each block.
> + */
> +static DEFINE_XARRAY_FLAGS(tag_blocks_reserved, XA_FLAGS_LOCK_IRQ);
> +static DEFINE_MUTEX(tag_blocks_lock);
> +
>  static u32 __init get_block_size_pages(u32 block_size_bytes)
>  {
>         u32 a = PAGE_SIZE;
> @@ -364,3 +389,212 @@ static int __init mte_enable_tag_storage(void)
>         return -EINVAL;
>  }
>  arch_initcall(mte_enable_tag_storage);
> +
> +static void page_set_tag_storage_reserved(struct page *page, int order)
> +{
> +       int i;
> +
> +       for (i = 0; i < (1 << order); i++)
> +               set_bit(PG_tag_storage_reserved, &(page + i)->flags);
> +}
> +
> +static void block_ref_add(unsigned long block, struct tag_region *region, int order)
> +{
> +       int count;
> +
> +       count = min(1u << order, 32 * region->block_size_pages);
> +       page_ref_add(pfn_to_page(block), count);
> +}
> +
> +static int block_ref_sub_return(unsigned long block, struct tag_region *region, int order)
> +{
> +       int count;
> +
> +       count = min(1u << order, 32 * region->block_size_pages);
> +       return page_ref_sub_return(pfn_to_page(block), count);
> +}
> +
> +static bool tag_storage_block_is_reserved(unsigned long block)
> +{
> +       return xa_load(&tag_blocks_reserved, block) != NULL;
> +}
> +
> +static int tag_storage_reserve_block(unsigned long block, struct tag_region *region, int order)
> +{
> +       int ret;
> +
> +       ret = xa_err(xa_store(&tag_blocks_reserved, block, pfn_to_page(block), GFP_KERNEL));
> +       if (!ret)
> +               block_ref_add(block, region, order);
> +
> +       return ret;
> +}
> +
> +static int order_to_num_blocks(int order, u32 block_size_pages)
> +{
> +       int num_tag_storage_pages = max((1 << order) / 32, 1);
> +
> +       return DIV_ROUND_UP(num_tag_storage_pages, block_size_pages);
> +}
> +
> +static int tag_storage_find_block_in_region(struct page *page, unsigned long *blockp,
> +                                           struct tag_region *region)
> +{
> +       struct range *tag_range = &region->tag_range;
> +       struct range *mem_range = &region->mem_range;
> +       u64 page_pfn = page_to_pfn(page);
> +       u64 block, block_offset;
> +
> +       if (!(mem_range->start <= page_pfn && page_pfn <= mem_range->end))
> +               return -ERANGE;
> +
> +       block_offset = (page_pfn - mem_range->start) / 32;
> +       block = tag_range->start + rounddown(block_offset, region->block_size_pages);
> +
> +       if (block + region->block_size_pages - 1 > tag_range->end) {
> +               pr_err("Block 0x%llx-0x%llx is outside tag region 0x%llx-0x%llx\n",
> +                       PFN_PHYS(block), PFN_PHYS(block + region->block_size_pages + 1) - 1,
> +                       PFN_PHYS(tag_range->start), PFN_PHYS(tag_range->end + 1) - 1);
> +               return -ERANGE;
> +       }
> +       *blockp = block;
> +
> +       return 0;
> +
> +}
> +
> +static int tag_storage_find_block(struct page *page, unsigned long *block,
> +                                 struct tag_region **region)
> +{
> +       int i, ret;
> +
> +       for (i = 0; i < num_tag_regions; i++) {
> +               ret = tag_storage_find_block_in_region(page, block, &tag_regions[i]);
> +               if (ret == 0) {
> +                       *region = &tag_regions[i];
> +                       return 0;
> +               }
> +       }
> +
> +       return -EINVAL;
> +}
> +
> +bool page_tag_storage_reserved(struct page *page)
> +{
> +       return test_bit(PG_tag_storage_reserved, &page->flags);
> +}
> +
> +int reserve_tag_storage(struct page *page, int order, gfp_t gfp)
> +{
> +       unsigned long start_block, end_block;
> +       struct tag_region *region;
> +       unsigned long block;
> +       unsigned long flags;
> +       int ret = 0;
> +
> +       VM_WARN_ON_ONCE(!preemptible());
> +
> +       if (page_tag_storage_reserved(page))
> +               return 0;
> +
> +       /*
> +        * __alloc_contig_migrate_range() ignores gfp when allocating the
> +        * destination page for migration. Regardless, massage gfp flags and
> +        * remove __GFP_TAGGED to avoid recursion in case gfp stops being
> +        * ignored.
> +        */
> +       gfp &= ~__GFP_TAGGED;
> +       if (!(gfp & __GFP_NORETRY))
> +               gfp |= __GFP_RETRY_MAYFAIL;
> +
> +       ret = tag_storage_find_block(page, &start_block, &region);
> +       if (WARN_ONCE(ret, "Missing tag storage block for pfn 0x%lx", page_to_pfn(page)))
> +               return -EINVAL;
> +       end_block = start_block + order_to_num_blocks(order, region->block_size_pages);
> +
> +       mutex_lock(&tag_blocks_lock);
> +
> +       /* Check again, this time with the lock held. */
> +       if (page_tag_storage_reserved(page))
> +               goto out_unlock;
> +
> +       /* Make sure existing entries are not freed from out under out feet. */
> +       xa_lock_irqsave(&tag_blocks_reserved, flags);
> +       for (block = start_block; block < end_block; block += region->block_size_pages) {
> +               if (tag_storage_block_is_reserved(block))
> +                       block_ref_add(block, region, order);
> +       }
> +       xa_unlock_irqrestore(&tag_blocks_reserved, flags);
> +
> +       for (block = start_block; block < end_block; block += region->block_size_pages) {
> +               /* Refcount incremented above. */
> +               if (tag_storage_block_is_reserved(block))
> +                       continue;
> +
> +               ret = cma_alloc_range(region->cma, block, region->block_size_pages, 3, gfp);
> +               /* Should never happen. */
> +               VM_WARN_ON_ONCE(ret == -EEXIST);
> +               if (ret)
> +                       goto out_error;
> +
> +               ret = tag_storage_reserve_block(block, region, order);
> +               if (ret) {
> +                       cma_release(region->cma, pfn_to_page(block), region->block_size_pages);
> +                       goto out_error;
> +               }
> +       }
> +
> +       page_set_tag_storage_reserved(page, order);
> +out_unlock:
> +       mutex_unlock(&tag_blocks_lock);
> +
> +       return 0;
> +
> +out_error:
> +       xa_lock_irqsave(&tag_blocks_reserved, flags);
> +       for (block = start_block; block < end_block; block += region->block_size_pages) {
> +               if (tag_storage_block_is_reserved(block) &&
> +                   block_ref_sub_return(block, region, order) == 1) {
> +                       __xa_erase(&tag_blocks_reserved, block);
> +                       cma_release(region->cma, pfn_to_page(block), region->block_size_pages);
> +               }
> +       }
> +       xa_unlock_irqrestore(&tag_blocks_reserved, flags);
> +
> +       mutex_unlock(&tag_blocks_lock);
> +
> +       return ret;
> +}
> +
> +void free_tag_storage(struct page *page, int order)
> +{
> +       unsigned long block, start_block, end_block;
> +       struct tag_region *region;
> +       unsigned long flags;
> +       int ret;
> +
> +       ret = tag_storage_find_block(page, &start_block, &region);
> +       if (WARN_ONCE(ret, "Missing tag storage block for pfn 0x%lx", page_to_pfn(page)))
> +               return;
> +
> +       end_block = start_block + order_to_num_blocks(order, region->block_size_pages);
> +
> +       xa_lock_irqsave(&tag_blocks_reserved, flags);
> +       for (block = start_block; block < end_block; block += region->block_size_pages) {
> +               if (WARN_ONCE(!tag_storage_block_is_reserved(block),
> +                   "Block 0x%lx is not reserved for pfn 0x%lx", block, page_to_pfn(page)))
> +                       continue;
> +
> +               if (block_ref_sub_return(block, region, order) == 1) {
> +                       __xa_erase(&tag_blocks_reserved, block);
> +                       cma_release(region->cma, pfn_to_page(block), region->block_size_pages);
> +               }
> +       }
> +       xa_unlock_irqrestore(&tag_blocks_reserved, flags);
> +}
> +
> +void arch_alloc_page(struct page *page, int order, gfp_t gfp)
> +{
> +       if (tag_storage_enabled() && alloc_requires_tag_storage(gfp))
> +               reserve_tag_storage(page, order, gfp);
> +}
> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index c022e473c17c..1ffaeccecda2 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -37,6 +37,7 @@
>  #include <asm/esr.h>
>  #include <asm/kprobes.h>
>  #include <asm/mte.h>
> +#include <asm/mte_tag_storage.h>
>  #include <asm/processor.h>
>  #include <asm/sysreg.h>
>  #include <asm/system_misc.h>
> @@ -950,6 +951,12 @@ gfp_t arch_calc_vma_gfp(struct vm_area_struct *vma, gfp_t gfp)
>
>  void tag_clear_highpage(struct page *page)
>  {
> +       if (tag_storage_enabled() && !page_tag_storage_reserved(page)) {
> +               /* Don't zero the tags if tag storage is not reserved */
> +               clear_page(page_address(page));
> +               return;
> +       }
> +
>         /* Newly allocated page, shouldn't have been tagged yet */
>         WARN_ON_ONCE(!try_page_mte_tagging(page));
>         mte_zero_clear_page_tags(page_address(page));
> diff --git a/fs/proc/page.c b/fs/proc/page.c
> index 195b077c0fac..e7eb584a9234 100644
> --- a/fs/proc/page.c
> +++ b/fs/proc/page.c
> @@ -221,6 +221,7 @@ u64 stable_page_flags(struct page *page)
>  #ifdef CONFIG_ARCH_USES_PG_ARCH_X
>         u |= kpf_copy_bit(k, KPF_ARCH_2,        PG_arch_2);
>         u |= kpf_copy_bit(k, KPF_ARCH_3,        PG_arch_3);
> +       u |= kpf_copy_bit(k, KPF_ARCH_4,        PG_arch_4);
>  #endif
>
>         return u;
> diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h
> index 859f4b0c1b2b..4a0d719ffdd4 100644
> --- a/include/linux/kernel-page-flags.h
> +++ b/include/linux/kernel-page-flags.h
> @@ -19,5 +19,6 @@
>  #define KPF_SOFTDIRTY          40
>  #define KPF_ARCH_2             41
>  #define KPF_ARCH_3             42
> +#define KPF_ARCH_4             43
>
>  #endif /* LINUX_KERNEL_PAGE_FLAGS_H */
> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> index b7237bce7446..03f03e6d735e 100644
> --- a/include/linux/page-flags.h
> +++ b/include/linux/page-flags.h
> @@ -135,6 +135,7 @@ enum pageflags {
>  #ifdef CONFIG_ARCH_USES_PG_ARCH_X
>         PG_arch_2,
>         PG_arch_3,
> +       PG_arch_4,
>  #endif
>         __NR_PAGEFLAGS,
>
> diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
> index 6ca0d5ed46c0..ba962fd10a2c 100644
> --- a/include/trace/events/mmflags.h
> +++ b/include/trace/events/mmflags.h
> @@ -125,7 +125,8 @@ IF_HAVE_PG_HWPOISON(hwpoison)                                               \
>  IF_HAVE_PG_IDLE(idle)                                                  \
>  IF_HAVE_PG_IDLE(young)                                                 \
>  IF_HAVE_PG_ARCH_X(arch_2)                                              \
> -IF_HAVE_PG_ARCH_X(arch_3)
> +IF_HAVE_PG_ARCH_X(arch_3)                                              \
> +IF_HAVE_PG_ARCH_X(arch_4)
>
>  #define show_page_flags(flags)                                         \
>         (flags) ? __print_flags(flags, "|",                             \
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 2bad63a7ec16..47932539cc50 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -2804,6 +2804,7 @@ static void __split_huge_page_tail(struct folio *folio, int tail,
>  #ifdef CONFIG_ARCH_USES_PG_ARCH_X
>                          (1L << PG_arch_2) |
>                          (1L << PG_arch_3) |
> +                        (1L << PG_arch_4) |
>  #endif
>                          (1L << PG_dirty) |
>                          LRU_GEN_MASK | LRU_REFS_MASK));
> --
> 2.43.0
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ