lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20210401223010.3580480-1-slyfox@gentoo.org>
Date:   Thu,  1 Apr 2021 23:30:10 +0100
From:   Sergei Trofimovich <slyfox@...too.org>
To:     linux-mm@...ck.org
Cc:     linux-kernel@...r.kernel.org,
        Sergei Trofimovich <slyfox@...too.org>,
        Ingo Molnar <mingo@...hat.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Juri Lelli <juri.lelli@...hat.com>,
        Vincent Guittot <vincent.guittot@...aro.org>,
        Dietmar Eggemann <dietmar.eggemann@....com>,
        Steven Rostedt <rostedt@...dmis.org>,
        Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>,
        Daniel Bristot de Oliveira <bristot@...hat.com>,
        Andrew Morton <akpm@...ux-foundation.org>
Subject: [PATCH] mm: page_owner: detect page_owner recursion via task_struct

Before the change page_owner recursion was detected via fetching
backtrace and inspecting it for current instruction pointer.
It has a few problems:
- it is slightly slow as it requires extra backtrace and a linear
  stack scan of the result
- it is too late to check if backtrace fetching required memory
  allocation itself (ia64's unwinder requires it).

To simplify recursion tracking let's use page_owner recursion depth
as a counter in 'struct task_struct'.

The change make page_owner=on work on ia64 bu avoiding infinite
recursion in:
  kmalloc()
  -> __set_page_owner()
  -> save_stack()
  -> unwind() [ia64-specific]
  -> build_script()
  -> kmalloc()
  -> __set_page_owner() [we short-circuit here]
  -> save_stack()
  -> unwind() [recursion]

CC: Ingo Molnar <mingo@...hat.com>
CC: Peter Zijlstra <peterz@...radead.org>
CC: Juri Lelli <juri.lelli@...hat.com>
CC: Vincent Guittot <vincent.guittot@...aro.org>
CC: Dietmar Eggemann <dietmar.eggemann@....com>
CC: Steven Rostedt <rostedt@...dmis.org>
CC: Ben Segall <bsegall@...gle.com>
CC: Mel Gorman <mgorman@...e.de>
CC: Daniel Bristot de Oliveira <bristot@...hat.com>
CC: Andrew Morton <akpm@...ux-foundation.org>
CC: linux-mm@...ck.org
Signed-off-by: Sergei Trofimovich <slyfox@...too.org>
---
 include/linux/sched.h |  9 +++++++++
 init/init_task.c      |  3 +++
 mm/page_owner.c       | 41 +++++++++++++++++------------------------
 3 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ef00bb22164c..35771703fd89 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1371,6 +1371,15 @@ struct task_struct {
 	struct llist_head               kretprobe_instances;
 #endif
 
+#ifdef CONFIG_PAGE_OWNER
+	/*
+	 * Used by page_owner=on to detect recursion in page tracking.
+	 * Is it fine to have non-atomic ops here if we ever access
+	 * this variable via current->page_owner_depth?
+	 */
+	unsigned int page_owner_depth;
+#endif
+
 	/*
 	 * New fields for task_struct should be added above here, so that
 	 * they are included in the randomized portion of task_struct.
diff --git a/init/init_task.c b/init/init_task.c
index 3711cdaafed2..f579f2b2eca8 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -213,6 +213,9 @@ struct task_struct init_task
 #ifdef CONFIG_SECCOMP
 	.seccomp	= { .filter_count = ATOMIC_INIT(0) },
 #endif
+#ifdef CONFIG_PAGE_OWNER
+	.page_owner_depth	= 0,
+#endif
 };
 EXPORT_SYMBOL(init_task);
 
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 7147fd34a948..422558605fcc 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -20,6 +20,16 @@
  */
 #define PAGE_OWNER_STACK_DEPTH (16)
 
+/*
+ * How many reenters we allow to page_owner.
+ *
+ * Sometimes metadata allocation tracking requires more memory to be allocated:
+ * - when new stack trace is saved to stack depot
+ * - when backtrace itself is calculated (ia64)
+ * Instead of falling to infinite recursion give it a chance to recover.
+ */
+#define PAGE_OWNER_MAX_RECURSION_DEPTH (1)
+
 struct page_owner {
 	unsigned short order;
 	short last_migrate_reason;
@@ -97,42 +107,25 @@ static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
 	return (void *)page_ext + page_owner_ops.offset;
 }
 
-static inline bool check_recursive_alloc(unsigned long *entries,
-					 unsigned int nr_entries,
-					 unsigned long ip)
-{
-	unsigned int i;
-
-	for (i = 0; i < nr_entries; i++) {
-		if (entries[i] == ip)
-			return true;
-	}
-	return false;
-}
-
 static noinline depot_stack_handle_t save_stack(gfp_t flags)
 {
 	unsigned long entries[PAGE_OWNER_STACK_DEPTH];
 	depot_stack_handle_t handle;
 	unsigned int nr_entries;
 
-	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
-
-	/*
-	 * We need to check recursion here because our request to
-	 * stackdepot could trigger memory allocation to save new
-	 * entry. New memory allocation would reach here and call
-	 * stack_depot_save_entries() again if we don't catch it. There is
-	 * still not enough memory in stackdepot so it would try to
-	 * allocate memory again and loop forever.
-	 */
-	if (check_recursive_alloc(entries, nr_entries, _RET_IP_))
+	/* Avoid recursion. Used in stack trace generation code. */
+	if (current->page_owner_depth >= PAGE_OWNER_MAX_RECURSION_DEPTH)
 		return dummy_handle;
 
+	current->page_owner_depth++;
+
+	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
+
 	handle = stack_depot_save(entries, nr_entries, flags);
 	if (!handle)
 		handle = failure_handle;
 
+	current->page_owner_depth--;
 	return handle;
 }
 
-- 
2.31.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ