lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANpmjNOaUdBOX1z1TST5djOLuL2DWj1Vus=ot_F_e_-8am3qZQ@mail.gmail.com>
Date: Tue, 13 Feb 2024 09:30:25 +0100
From: Marco Elver <elver@...gle.com>
To: Oscar Salvador <osalvador@...e.de>
Cc: Andrew Morton <akpm@...ux-foundation.org>, linux-kernel@...r.kernel.org, 
	linux-mm@...ck.org, Michal Hocko <mhocko@...e.com>, Vlastimil Babka <vbabka@...e.cz>, 
	Andrey Konovalov <andreyknvl@...il.com>, Alexander Potapenko <glider@...gle.com>
Subject: Re: [PATCH v8 2/5] mm,page_owner: Implement the tracking of the
 stacks count

On Mon, 12 Feb 2024 at 23:29, Oscar Salvador <osalvador@...e.de> wrote:
>
> page_owner needs to increment a stack_record refcount when a new allocation
> occurs, and decrement it on a free operation.
> In order to do that, we need to have a way to get a stack_record from a
> handle.
> Implement __stack_depot_get_stack_record() which just does that, and make
> it public so page_owner can use it.
>
> Also implement {inc,dec}_stack_record_count() which increments
> or decrements on respective allocation and free operations, via
> __reset_page_owner() (free operation) and __set_page_owner() (alloc
> operation).
>
> Traversing all stackdepot buckets comes with its own complexity,
> plus we would have to implement a way to mark only those stack_records
> that were originated from page_owner, as those are the ones we are
> interested in.
> For that reason, page_owner maintains its own list of stack_records,
> because traversing that list is faster than traversing all buckets
> while keeping at the same time a low complexity.
> inc_stack_record_count() is responsible of adding new stack_records
> into the list stack_list.
>
> Modifications on the list are protected via a spinlock with irqs
> disabled, since this code can also be reached from IRQ context.
>
> Signed-off-by: Oscar Salvador <osalvador@...e.de>

For the code:

Reviewed-by: Marco Elver <elver@...gle.com>

But see minor comments below.

> ---
>  include/linux/stackdepot.h |  9 +++++
>  lib/stackdepot.c           |  8 +++++
>  mm/page_owner.c            | 73 ++++++++++++++++++++++++++++++++++++++
>  3 files changed, 90 insertions(+)
>
> diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
> index 90274860fd8e..f3c2162bf615 100644
> --- a/include/linux/stackdepot.h
> +++ b/include/linux/stackdepot.h
> @@ -175,6 +175,15 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
>  depot_stack_handle_t stack_depot_save(unsigned long *entries,
>                                       unsigned int nr_entries, gfp_t gfp_flags);
>
> +/**
> + * __stack_depot_get_stack_record - Get a pointer to a stack_record struct
> + * This function is only for internal purposes.

I think the body of the kernel doc needs to go after argument declarations.

> + * @handle: Stack depot handle
> + *
> + * Return: Returns a pointer to a stack_record struct
> + */
> +struct stack_record *__stack_depot_get_stack_record(depot_stack_handle_t handle);
> +
>  /**
>   * stack_depot_fetch - Fetch a stack trace from stack depot
>   *
> diff --git a/lib/stackdepot.c b/lib/stackdepot.c
> index 6f9095374847..fdb09450a538 100644
> --- a/lib/stackdepot.c
> +++ b/lib/stackdepot.c
> @@ -685,6 +685,14 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
>  }
>  EXPORT_SYMBOL_GPL(stack_depot_save);
>
> +struct stack_record *__stack_depot_get_stack_record(depot_stack_handle_t handle)
> +{
> +       if (!handle)
> +               return NULL;
> +
> +       return depot_fetch_stack(handle);
> +}
> +
>  unsigned int stack_depot_fetch(depot_stack_handle_t handle,
>                                unsigned long **entries)
>  {
> diff --git a/mm/page_owner.c b/mm/page_owner.c
> index 5634e5d890f8..7d1b3f75cef3 100644
> --- a/mm/page_owner.c
> +++ b/mm/page_owner.c
> @@ -36,6 +36,14 @@ struct page_owner {
>         pid_t free_tgid;
>  };
>
> +struct stack {
> +       struct stack_record *stack_record;
> +       struct stack *next;
> +};
> +
> +static struct stack *stack_list;
> +static DEFINE_SPINLOCK(stack_list_lock);
> +
>  static bool page_owner_enabled __initdata;
>  DEFINE_STATIC_KEY_FALSE(page_owner_inited);
>
> @@ -61,6 +69,57 @@ static __init bool need_page_owner(void)
>         return page_owner_enabled;
>  }
>
> +static void add_stack_record_to_list(struct stack_record *stack_record)
> +{
> +       unsigned long flags;
> +       struct stack *stack;
> +
> +       stack = kmalloc(sizeof(*stack), GFP_KERNEL);
> +       if (stack) {

It's usually more elegant to write

if (!stack)
  return;

If the rest of the function is conditional.

> +               stack->stack_record = stack_record;
> +               stack->next = NULL;
> +
> +               spin_lock_irqsave(&stack_list_lock, flags);
> +               if (!stack_list) {
> +                       stack_list = stack;
> +               } else {
> +                       stack->next = stack_list;
> +                       stack_list = stack;
> +               }
> +               spin_unlock_irqrestore(&stack_list_lock, flags);
> +       }
> +}
> +
> +static void inc_stack_record_count(depot_stack_handle_t handle)
> +{
> +       struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
> +
> +       if (stack_record) {
> +               /*
> +                * New stack_record's that do not use STACK_DEPOT_FLAG_GET start
> +                * with REFCOUNT_SATURATED to catch spurious increments of their
> +                * refcount.
> +                * Since we do not use STACK_DEPOT_FLAG_{GET,PUT} API, let us

I think I mentioned this in the other email, there is no
STACK_DEPOT_FLAG_PUT, only stack_depot_put().

> +                * set a refcount of 1 ourselves.
> +                */
> +               if (refcount_read(&stack_record->count) == REFCOUNT_SATURATED) {
> +                       refcount_set(&stack_record->count, 1);
> +
> +                       /* Add the new stack_record to our list */
> +                       add_stack_record_to_list(stack_record);
> +               }
> +               refcount_inc(&stack_record->count);
> +       }
> +}
> +
> +static void dec_stack_record_count(depot_stack_handle_t handle)
> +{
> +       struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
> +
> +       if (stack_record)
> +               refcount_dec(&stack_record->count);
> +}
> +
>  static __always_inline depot_stack_handle_t create_dummy_stack(void)
>  {
>         unsigned long entries[4];
> @@ -140,6 +199,7 @@ void __reset_page_owner(struct page *page, unsigned short order)
>         int i;
>         struct page_ext *page_ext;
>         depot_stack_handle_t handle;
> +       depot_stack_handle_t alloc_handle;
>         struct page_owner *page_owner;
>         u64 free_ts_nsec = local_clock();
>
> @@ -147,6 +207,9 @@ void __reset_page_owner(struct page *page, unsigned short order)
>         if (unlikely(!page_ext))
>                 return;
>
> +       page_owner = get_page_owner(page_ext);
> +       alloc_handle = page_owner->handle;
> +
>         handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
>         for (i = 0; i < (1 << order); i++) {
>                 __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
> @@ -158,6 +221,15 @@ void __reset_page_owner(struct page *page, unsigned short order)
>                 page_ext = page_ext_next(page_ext);
>         }
>         page_ext_put(page_ext);
> +       if (alloc_handle != early_handle)
> +               /*
> +                * early_handle is being set as a handle for all those
> +                * early allocated pages. See init_pages_in_zone().
> +                * Since their refcount is not being incremented because
> +                * the machinery is not ready yet, we cannot decrement
> +                * their refcount either.
> +                */
> +               dec_stack_record_count(alloc_handle);
>  }
>
>  static inline void __set_page_owner_handle(struct page_ext *page_ext,
> @@ -199,6 +271,7 @@ noinline void __set_page_owner(struct page *page, unsigned short order,
>                 return;
>         __set_page_owner_handle(page_ext, handle, order, gfp_mask);
>         page_ext_put(page_ext);
> +       inc_stack_record_count(handle);
>  }
>
>  void __set_page_owner_migrate_reason(struct page *page, int reason)
> --
> 2.43.0
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ