[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240313033417.447216-1-pasha.tatashin@soleen.com>
Date: Wed, 13 Mar 2024 03:34:17 +0000
From: Pasha Tatashin <pasha.tatashin@...een.com>
To: akpm@...ux-foundation.org,
jpoimboe@...nel.org,
pasha.tatashin@...een.com,
kent.overstreet@...ux.dev,
peterz@...radead.org,
nphamcs@...il.com,
cerasuolodomenico@...il.com,
surenb@...gle.com,
lizhijian@...itsu.com,
willy@...radead.org,
shakeel.butt@...ux.dev,
vbabka@...e.cz,
ziy@...dia.com,
linux-kernel@...r.kernel.org,
linux-mm@...ck.org
Subject: [PATCH] vmstat: Keep count of the maximum page reached by the kernel stack
CONFIG_DEBUG_STACK_USAGE provides a mechanism to know the minimum amount
of memory that was left in stack. Every time the new anti-record is
reached a message is printed to the console.
However, this is not useful to know how much each page within stack was
actually used. Provide a mechanism to count the number of time each
stack page was reached throughout the live of the stack:
$ grep kstack /proc/vmstat
kstack_page_1 19974
kstack_page_2 94
kstack_page_3 0
kstack_page_4 0
In the above example only out of ~20K threads that ever exited on that
machine only 94 touched second page of the stack, and none touched
pages three and four.
Signed-off-by: Pasha Tatashin <pasha.tatashin@...een.com>
---
include/linux/sched/task_stack.h | 39 ++++++++++++++++++++++++++++++--
include/linux/vm_event_item.h | 29 ++++++++++++++++++++++++
include/linux/vmstat.h | 16 -------------
mm/vmstat.c | 11 +++++++++
4 files changed, 77 insertions(+), 18 deletions(-)
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index ccd72b978e1f..7ff7f9997266 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -95,9 +95,41 @@ static inline int object_is_on_stack(const void *obj)
extern void thread_stack_cache_init(void);
#ifdef CONFIG_DEBUG_STACK_USAGE
+#ifdef CONFIG_VM_EVENT_COUNTERS
+#include <linux/vm_event_item.h>
+
+/* Count the maximum pages reached in kernel stacks */
+static inline void count_kstack_page(int stack_max_page)
+{
+ switch (stack_max_page) {
+ case 1:
+ this_cpu_inc(vm_event_states.event[KSTACK_PAGE_1]);
+ break;
+ case 2:
+ this_cpu_inc(vm_event_states.event[KSTACK_PAGE_2]);
+ break;
+#if THREAD_SIZE >= (4 * PAGE_SIZE)
+ case 3:
+ this_cpu_inc(vm_event_states.event[KSTACK_PAGE_3]);
+ break;
+ case 4:
+ this_cpu_inc(vm_event_states.event[KSTACK_PAGE_4]);
+ break;
+#endif
+#if THREAD_SIZE > (4 * PAGE_SIZE)
+ default:
+ this_cpu_inc(vm_event_states.event[KSTACK_PAGE_5]);
+#endif
+ }
+}
+#else /* !CONFIG_VM_EVENT_COUNTERS */
+static inline void count_kstack_page(int stack_max_page) {}
+#endif /* CONFIG_VM_EVENT_COUNTERS */
+
static inline unsigned long stack_not_used(struct task_struct *p)
{
unsigned long *n = end_of_stack(p);
+ unsigned long unused_stack;
do { /* Skip over canary */
# ifdef CONFIG_STACK_GROWSUP
@@ -108,10 +140,13 @@ static inline unsigned long stack_not_used(struct task_struct *p)
} while (!*n);
# ifdef CONFIG_STACK_GROWSUP
- return (unsigned long)end_of_stack(p) - (unsigned long)n;
+ unused_stack = (unsigned long)end_of_stack(p) - (unsigned long)n;
# else
- return (unsigned long)n - (unsigned long)end_of_stack(p);
+ unused_stack = (unsigned long)n - (unsigned long)end_of_stack(p);
# endif
+ count_kstack_page(((THREAD_SIZE - unused_stack) >> PAGE_SHIFT) + 1);
+
+ return unused_stack;
}
#endif
extern void set_task_stack_end_magic(struct task_struct *tsk);
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 747943bc8cc2..1dbfe47ff048 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -153,10 +153,39 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
VMA_LOCK_ABORT,
VMA_LOCK_RETRY,
VMA_LOCK_MISS,
+#endif
+#ifdef CONFIG_DEBUG_STACK_USAGE
+ KSTACK_PAGE_1,
+ KSTACK_PAGE_2,
+#if THREAD_SIZE >= (4 * PAGE_SIZE)
+ KSTACK_PAGE_3,
+ KSTACK_PAGE_4,
+#endif
+#if THREAD_SIZE > (4 * PAGE_SIZE)
+ KSTACK_PAGE_REST,
+#endif
#endif
NR_VM_EVENT_ITEMS
};
+#ifdef CONFIG_VM_EVENT_COUNTERS
+/*
+ * Light weight per cpu counter implementation.
+ *
+ * Counters should only be incremented and no critical kernel component
+ * should rely on the counter values.
+ *
+ * Counters are handled completely inline. On many platforms the code
+ * generated will simply be the increment of a global address.
+ */
+
+struct vm_event_state {
+ unsigned long event[NR_VM_EVENT_ITEMS];
+};
+
+DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
+#endif
+
#ifndef CONFIG_TRANSPARENT_HUGEPAGE
#define THP_FILE_ALLOC ({ BUILD_BUG(); 0; })
#define THP_FILE_FALLBACK ({ BUILD_BUG(); 0; })
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 343906a98d6e..18d4a97d3afd 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -41,22 +41,6 @@ enum writeback_stat_item {
};
#ifdef CONFIG_VM_EVENT_COUNTERS
-/*
- * Light weight per cpu counter implementation.
- *
- * Counters should only be incremented and no critical kernel component
- * should rely on the counter values.
- *
- * Counters are handled completely inline. On many platforms the code
- * generated will simply be the increment of a global address.
- */
-
-struct vm_event_state {
- unsigned long event[NR_VM_EVENT_ITEMS];
-};
-
-DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
-
/*
* vm counters are allowed to be racy. Use raw_cpu_ops to avoid the
* local_irq_disable overhead.
diff --git a/mm/vmstat.c b/mm/vmstat.c
index db79935e4a54..737c85689251 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1413,6 +1413,17 @@ const char * const vmstat_text[] = {
"vma_lock_retry",
"vma_lock_miss",
#endif
+#ifdef CONFIG_DEBUG_STACK_USAGE
+ "kstack_page_1",
+ "kstack_page_2",
+#if THREAD_SIZE >= (4 * PAGE_SIZE)
+ "kstack_page_3",
+ "kstack_page_4",
+#endif
+#if THREAD_SIZE > (4 * PAGE_SIZE)
+ "kstack_page_rest",
+#endif
+#endif
#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
};
#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */
--
2.44.0.278.ge034bb2e1d-goog
Powered by blists - more mailing lists