lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240212223029.30769-3-osalvador@suse.de>
Date: Mon, 12 Feb 2024 23:30:26 +0100
From: Oscar Salvador <osalvador@...e.de>
To: Andrew Morton <akpm@...ux-foundation.org>
Cc: linux-kernel@...r.kernel.org,
	linux-mm@...ck.org,
	Michal Hocko <mhocko@...e.com>,
	Vlastimil Babka <vbabka@...e.cz>,
	Marco Elver <elver@...gle.com>,
	Andrey Konovalov <andreyknvl@...il.com>,
	Alexander Potapenko <glider@...gle.com>,
	Oscar Salvador <osalvador@...e.de>
Subject: [PATCH v8 2/5] mm,page_owner: Implement the tracking of the stacks count

page_owner needs to increment a stack_record refcount when a new allocation
occurs, and decrement it on a free operation.
In order to do that, we need to have a way to get a stack_record from a
handle.
Implement __stack_depot_get_stack_record() which just does that, and make
it public so page_owner can use it.

Also implement {inc,dec}_stack_record_count() which increments
or decrements on respective allocation and free operations, via
__reset_page_owner() (free operation) and __set_page_owner() (alloc
operation).

Traversing all stackdepot buckets comes with its own complexity,
plus we would have to implement a way to mark only those stack_records
that were originated from page_owner, as those are the ones we are
interested in.
For that reason, page_owner maintains its own list of stack_records,
because traversing that list is faster than traversing all buckets
while keeping at the same time a low complexity.
inc_stack_record_count() is responsible of adding new stack_records
into the list stack_list.

Modifications on the list are protected via a spinlock with irqs
disabled, since this code can also be reached from IRQ context.

Signed-off-by: Oscar Salvador <osalvador@...e.de>
---
 include/linux/stackdepot.h |  9 +++++
 lib/stackdepot.c           |  8 +++++
 mm/page_owner.c            | 73 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 90 insertions(+)

diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index 90274860fd8e..f3c2162bf615 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -175,6 +175,15 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
 depot_stack_handle_t stack_depot_save(unsigned long *entries,
 				      unsigned int nr_entries, gfp_t gfp_flags);
 
+/**
+ * __stack_depot_get_stack_record - Get a pointer to a stack_record struct
+ * This function is only for internal purposes.
+ * @handle: Stack depot handle
+ *
+ * Return: Returns a pointer to a stack_record struct
+ */
+struct stack_record *__stack_depot_get_stack_record(depot_stack_handle_t handle);
+
 /**
  * stack_depot_fetch - Fetch a stack trace from stack depot
  *
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 6f9095374847..fdb09450a538 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -685,6 +685,14 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
 }
 EXPORT_SYMBOL_GPL(stack_depot_save);
 
+struct stack_record *__stack_depot_get_stack_record(depot_stack_handle_t handle)
+{
+	if (!handle)
+		return NULL;
+
+	return depot_fetch_stack(handle);
+}
+
 unsigned int stack_depot_fetch(depot_stack_handle_t handle,
 			       unsigned long **entries)
 {
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 5634e5d890f8..7d1b3f75cef3 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -36,6 +36,14 @@ struct page_owner {
 	pid_t free_tgid;
 };
 
+struct stack {
+	struct stack_record *stack_record;
+	struct stack *next;
+};
+
+static struct stack *stack_list;
+static DEFINE_SPINLOCK(stack_list_lock);
+
 static bool page_owner_enabled __initdata;
 DEFINE_STATIC_KEY_FALSE(page_owner_inited);
 
@@ -61,6 +69,57 @@ static __init bool need_page_owner(void)
 	return page_owner_enabled;
 }
 
+static void add_stack_record_to_list(struct stack_record *stack_record)
+{
+	unsigned long flags;
+	struct stack *stack;
+
+	stack = kmalloc(sizeof(*stack), GFP_KERNEL);
+	if (stack) {
+		stack->stack_record = stack_record;
+		stack->next = NULL;
+
+		spin_lock_irqsave(&stack_list_lock, flags);
+		if (!stack_list) {
+			stack_list = stack;
+		} else {
+			stack->next = stack_list;
+			stack_list = stack;
+		}
+		spin_unlock_irqrestore(&stack_list_lock, flags);
+	}
+}
+
+static void inc_stack_record_count(depot_stack_handle_t handle)
+{
+	struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
+
+	if (stack_record) {
+		/*
+		 * New stack_record's that do not use STACK_DEPOT_FLAG_GET start
+		 * with REFCOUNT_SATURATED to catch spurious increments of their
+		 * refcount.
+		 * Since we do not use STACK_DEPOT_FLAG_{GET,PUT} API, let us
+		 * set a refcount of 1 ourselves.
+		 */
+		if (refcount_read(&stack_record->count) == REFCOUNT_SATURATED) {
+			refcount_set(&stack_record->count, 1);
+
+			/* Add the new stack_record to our list */
+			add_stack_record_to_list(stack_record);
+		}
+		refcount_inc(&stack_record->count);
+	}
+}
+
+static void dec_stack_record_count(depot_stack_handle_t handle)
+{
+	struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
+
+	if (stack_record)
+		refcount_dec(&stack_record->count);
+}
+
 static __always_inline depot_stack_handle_t create_dummy_stack(void)
 {
 	unsigned long entries[4];
@@ -140,6 +199,7 @@ void __reset_page_owner(struct page *page, unsigned short order)
 	int i;
 	struct page_ext *page_ext;
 	depot_stack_handle_t handle;
+	depot_stack_handle_t alloc_handle;
 	struct page_owner *page_owner;
 	u64 free_ts_nsec = local_clock();
 
@@ -147,6 +207,9 @@ void __reset_page_owner(struct page *page, unsigned short order)
 	if (unlikely(!page_ext))
 		return;
 
+	page_owner = get_page_owner(page_ext);
+	alloc_handle = page_owner->handle;
+
 	handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
 	for (i = 0; i < (1 << order); i++) {
 		__clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
@@ -158,6 +221,15 @@ void __reset_page_owner(struct page *page, unsigned short order)
 		page_ext = page_ext_next(page_ext);
 	}
 	page_ext_put(page_ext);
+	if (alloc_handle != early_handle)
+		/*
+		 * early_handle is being set as a handle for all those
+		 * early allocated pages. See init_pages_in_zone().
+		 * Since their refcount is not being incremented because
+		 * the machinery is not ready yet, we cannot decrement
+		 * their refcount either.
+		 */
+		dec_stack_record_count(alloc_handle);
 }
 
 static inline void __set_page_owner_handle(struct page_ext *page_ext,
@@ -199,6 +271,7 @@ noinline void __set_page_owner(struct page *page, unsigned short order,
 		return;
 	__set_page_owner_handle(page_ext, handle, order, gfp_mask);
 	page_ext_put(page_ext);
+	inc_stack_record_count(handle);
 }
 
 void __set_page_owner_migrate_reason(struct page *page, int reason)
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ