lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250610233053.973796-1-cachen@purestorage.com>
Date: Tue, 10 Jun 2025 17:30:53 -0600
From: Casey Chen <cachen@...estorage.com>
To: akpm@...ux-foundation.org,
	surenb@...gle.com
Cc: kent.overstreet@...ux.dev,
	corbet@....net,
	dennis@...nel.org,
	tj@...nel.org,
	cl@...two.org,
	vbabka@...e.cz,
	mhocko@...e.com,
	jackmanb@...gle.com,
	hannes@...xchg.org,
	ziy@...dia.com,
	rientjes@...gle.com,
	roman.gushchin@...ux.dev,
	harry.yoo@...cle.com,
	linux-mm@...ck.org,
	linux-kernel@...r.kernel.org,
	linux-doc@...r.kernel.org,
	yzhong@...estorage.com,
	Casey Chen <cachen@...estorage.com>
Subject: [PATCH] alloc_tag: add per-NUMA node stats

Add support for tracking per-NUMA node statistics in /proc/allocinfo.
Previously, each alloc_tag had a single set of counters (bytes and
calls), aggregated across all CPUs. With this change, each CPU can
maintain separate counters for each NUMA node, allowing finer-grained
memory allocation profiling.

This feature is controlled by the new
CONFIG_MEM_ALLOC_PROFILING_PER_NUMA_STATS option:

* When enabled (=y), the output includes per-node statistics following
  the total bytes/calls:

<size> <calls> <tag info>
...
315456       9858     mm/dmapool.c:338 func:pool_alloc_page
        nid0     94912        2966
        nid1     220544       6892
7680         60       mm/dmapool.c:254 func:dma_pool_create
        nid0     4224         33
        nid1     3456         27

* When disabled (=n), the output remains unchanged:
<size> <calls> <tag info>
...
315456       9858     mm/dmapool.c:338 func:pool_alloc_page
7680         60       mm/dmapool.c:254 func:dma_pool_create

To minimize memory overhead, per-NUMA stats counters are dynamically
allocated using the percpu allocator. PERCPU_DYNAMIC_RESERVE has been
increased to ensure sufficient space for in-kernel alloc_tag counters.

For in-kernel alloc_tag instances, pcpu_alloc_noprof() is used to
allocate counters. These allocations are excluded from the profiling
statistics themselves.

Signed-off-by: Casey Chen <cachen@...estorage.com>
Reviewed-by: Yuanyuan Zhong <yzhong@...estorage.com>
---
 Documentation/mm/allocation-profiling.rst |  3 ++
 include/linux/alloc_tag.h                 | 49 ++++++++++++------
 include/linux/codetag.h                   |  4 ++
 include/linux/percpu.h                    |  2 +-
 lib/Kconfig.debug                         |  7 +++
 lib/alloc_tag.c                           | 61 ++++++++++++++++++++---
 mm/page_alloc.c                           | 35 ++++++-------
 mm/percpu.c                               |  8 ++-
 mm/show_mem.c                             | 27 +++++++---
 mm/slub.c                                 | 11 ++--
 10 files changed, 156 insertions(+), 51 deletions(-)

diff --git a/Documentation/mm/allocation-profiling.rst b/Documentation/mm/allocation-profiling.rst
index 316311240e6a..13d1d0cb91bf 100644
--- a/Documentation/mm/allocation-profiling.rst
+++ b/Documentation/mm/allocation-profiling.rst
@@ -17,6 +17,9 @@ kconfig options:
   adds warnings for allocations that weren't accounted because of a
   missing annotation
 
+- CONFIG_MEM_ALLOC_PROFILING_PER_NUMA_STATS
+  adds memory allocation profiling stats for each numa node, off by default.
+
 Boot parameter:
   sysctl.vm.mem_profiling={0|1|never}[,compressed]
 
diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
index 8f7931eb7d16..04f5beb44ef9 100644
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -15,6 +15,8 @@
 #include <linux/static_key.h>
 #include <linux/irqflags.h>
 
+extern int pcpu_counters_num;
+
 struct alloc_tag_counters {
 	u64 bytes;
 	u64 calls;
@@ -134,16 +136,34 @@ static inline bool mem_alloc_profiling_enabled(void)
 				   &mem_alloc_profiling_key);
 }
 
+static inline struct alloc_tag_counters alloc_tag_read_nid(struct alloc_tag *tag, int nid)
+{
+	struct alloc_tag_counters v = { 0, 0 };
+	struct alloc_tag_counters *counters;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		counters = per_cpu_ptr(tag->counters, cpu);
+		v.bytes += counters[nid].bytes;
+		v.calls += counters[nid].calls;
+	}
+
+	return v;
+}
+
 static inline struct alloc_tag_counters alloc_tag_read(struct alloc_tag *tag)
 {
 	struct alloc_tag_counters v = { 0, 0 };
-	struct alloc_tag_counters *counter;
+	struct alloc_tag_counters *counters;
 	int cpu;
+	int nid;
 
 	for_each_possible_cpu(cpu) {
-		counter = per_cpu_ptr(tag->counters, cpu);
-		v.bytes += counter->bytes;
-		v.calls += counter->calls;
+		counters = per_cpu_ptr(tag->counters, cpu);
+		for (nid = 0; nid < pcpu_counters_num; nid++) {
+			v.bytes += counters[nid].bytes;
+			v.calls += counters[nid].calls;
+		}
 	}
 
 	return v;
@@ -179,7 +199,7 @@ static inline bool __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag
 	return true;
 }
 
-static inline bool alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
+static inline bool alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag, int nid)
 {
 	if (unlikely(!__alloc_tag_ref_set(ref, tag)))
 		return false;
@@ -190,17 +210,18 @@ static inline bool alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *t
 	 * Each new reference for every sub-allocation needs to increment call
 	 * counter because when we free each part the counter will be decremented.
 	 */
-	this_cpu_inc(tag->counters->calls);
+	this_cpu_inc(tag->counters[nid].calls);
 	return true;
 }
 
-static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, size_t bytes)
+static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag,
+				 int nid, size_t bytes)
 {
-	if (likely(alloc_tag_ref_set(ref, tag)))
-		this_cpu_add(tag->counters->bytes, bytes);
+	if (likely(alloc_tag_ref_set(ref, tag, nid)))
+		this_cpu_add(tag->counters[nid].bytes, bytes);
 }
 
-static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes)
+static inline void alloc_tag_sub(union codetag_ref *ref, int nid, size_t bytes)
 {
 	struct alloc_tag *tag;
 
@@ -215,8 +236,8 @@ static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes)
 
 	tag = ct_to_alloc_tag(ref->ct);
 
-	this_cpu_sub(tag->counters->bytes, bytes);
-	this_cpu_dec(tag->counters->calls);
+	this_cpu_sub(tag->counters[nid].bytes, bytes);
+	this_cpu_dec(tag->counters[nid].calls);
 
 	ref->ct = NULL;
 }
@@ -228,8 +249,8 @@ static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes)
 #define DEFINE_ALLOC_TAG(_alloc_tag)
 static inline bool mem_alloc_profiling_enabled(void) { return false; }
 static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag,
-				 size_t bytes) {}
-static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes) {}
+				 int nid, size_t bytes) {}
+static inline void alloc_tag_sub(union codetag_ref *ref, int nid, size_t bytes) {}
 #define alloc_tag_record(p)	do {} while (0)
 
 #endif /* CONFIG_MEM_ALLOC_PROFILING */
diff --git a/include/linux/codetag.h b/include/linux/codetag.h
index 5f2b9a1f722c..79d6b96c61f6 100644
--- a/include/linux/codetag.h
+++ b/include/linux/codetag.h
@@ -16,6 +16,10 @@ struct module;
 #define CODETAG_SECTION_START_PREFIX	"__start_"
 #define CODETAG_SECTION_STOP_PREFIX	"__stop_"
 
+enum codetag_flags {
+	CODETAG_PERCPU_ALLOC	= (1 << 0), /* codetag tracking percpu allocation */
+};
+
 /*
  * An instance of this structure is created in a special ELF section at every
  * code location being tagged.  At runtime, the special section is treated as
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 85bf8dd9f087..d92c27fbcd0d 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -43,7 +43,7 @@
 # define PERCPU_DYNAMIC_SIZE_SHIFT      12
 #endif /* LOCKDEP and PAGE_SIZE > 4KiB */
 #else
-#define PERCPU_DYNAMIC_SIZE_SHIFT      10
+#define PERCPU_DYNAMIC_SIZE_SHIFT      13
 #endif
 
 /*
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 166b9d830a85..ba2d9c7e050b 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1037,6 +1037,13 @@ config MEM_ALLOC_PROFILING_DEBUG
 	  Adds warnings with helpful error messages for memory allocation
 	  profiling.
 
+config MEM_ALLOC_PROFILING_PER_NUMA_STATS
+	bool "Memory allocation profiling per-NUMA stats"
+	default n
+	depends on MEM_ALLOC_PROFILING
+	help
+	  Display allocation stats on every NUMA node.
+
 source "lib/Kconfig.kasan"
 source "lib/Kconfig.kfence"
 source "lib/Kconfig.kmsan"
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index d48b80f3f007..b503685dff73 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -42,6 +42,9 @@ struct allocinfo_private {
 	bool print_header;
 };
 
+int pcpu_counters_num;
+static unsigned long pcpu_counters_size;
+
 static void *allocinfo_start(struct seq_file *m, loff_t *pos)
 {
 	struct allocinfo_private *priv;
@@ -88,7 +91,7 @@ static void print_allocinfo_header(struct seq_buf *buf)
 {
 	/* Output format version, so we can change it. */
 	seq_buf_printf(buf, "allocinfo - version: 1.0\n");
-	seq_buf_printf(buf, "#     <size>  <calls> <tag info>\n");
+	seq_buf_printf(buf, "<size> <calls> <tag info>\n");
 }
 
 static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct)
@@ -97,12 +100,29 @@ static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct)
 	struct alloc_tag_counters counter = alloc_tag_read(tag);
 	s64 bytes = counter.bytes;
 
-	seq_buf_printf(out, "%12lli %8llu ", bytes, counter.calls);
+	seq_buf_printf(out, "%-12lli %-8llu ", bytes, counter.calls);
 	codetag_to_text(out, ct);
 	seq_buf_putc(out, ' ');
 	seq_buf_putc(out, '\n');
 }
 
+#ifdef CONFIG_MEM_ALLOC_PROFILING_PER_NUMA_STATS
+static void alloc_tag_to_text_all_nids(struct seq_buf *out, struct codetag *ct)
+{
+	struct alloc_tag *tag = ct_to_alloc_tag(ct);
+	struct alloc_tag_counters counter;
+	s64 bytes;
+	int nid;
+
+	for (nid = 0; nid < pcpu_counters_num; nid++) {
+		counter = alloc_tag_read_nid(tag, nid);
+		bytes = counter.bytes;
+		seq_buf_printf(out, "        nid%-5u %-12lli %-8llu\n",
+				nid, bytes, counter.calls);
+	}
+}
+#endif
+
 static int allocinfo_show(struct seq_file *m, void *arg)
 {
 	struct allocinfo_private *priv = (struct allocinfo_private *)arg;
@@ -116,6 +136,9 @@ static int allocinfo_show(struct seq_file *m, void *arg)
 		priv->print_header = false;
 	}
 	alloc_tag_to_text(&buf, priv->iter.ct);
+#ifdef CONFIG_MEM_ALLOC_PROFILING_PER_NUMA_STATS
+	alloc_tag_to_text_all_nids(&buf, priv->iter.ct);
+#endif
 	seq_commit(m, seq_buf_used(&buf));
 	return 0;
 }
@@ -184,7 +207,7 @@ void pgalloc_tag_split(struct folio *folio, int old_order, int new_order)
 
 		if (get_page_tag_ref(folio_page(folio, i), &ref, &handle)) {
 			/* Set new reference to point to the original tag */
-			alloc_tag_ref_set(&ref, tag);
+			alloc_tag_ref_set(&ref, tag, folio_nid(folio));
 			update_page_tag_ref(handle, &ref);
 			put_page_tag_ref(handle);
 		}
@@ -247,19 +270,41 @@ static void shutdown_mem_profiling(bool remove_file)
 void __init alloc_tag_sec_init(void)
 {
 	struct alloc_tag *last_codetag;
+	int i;
 
 	if (!mem_profiling_support)
 		return;
 
-	if (!static_key_enabled(&mem_profiling_compressed))
-		return;
-
 	kernel_tags.first_tag = (struct alloc_tag *)kallsyms_lookup_name(
 					SECTION_START(ALLOC_TAG_SECTION_NAME));
 	last_codetag = (struct alloc_tag *)kallsyms_lookup_name(
 					SECTION_STOP(ALLOC_TAG_SECTION_NAME));
 	kernel_tags.count = last_codetag - kernel_tags.first_tag;
 
+#ifdef CONFIG_MEM_ALLOC_PROFILING_PER_NUMA_STATS
+	pcpu_counters_num = num_possible_nodes();
+#else
+	pcpu_counters_num = 1;
+#endif
+
+	pcpu_counters_size = pcpu_counters_num * sizeof(struct alloc_tag_counters);
+	for (i = 0; i < kernel_tags.count; i++) {
+		/* Each CPU has one alloc_tag_counters per numa node */
+		kernel_tags.first_tag[i].counters =
+			pcpu_alloc_noprof(pcpu_counters_size,
+					  sizeof(struct alloc_tag_counters),
+					  false, GFP_KERNEL | __GFP_ZERO);
+		if (!kernel_tags.first_tag[i].counters) {
+			while (--i >= 0)
+				free_percpu(kernel_tags.first_tag[i].counters);
+			pr_info("Failed to allocate per-cpu alloc_tag counters\n");
+			return;
+		}
+	}
+
+	if (!static_key_enabled(&mem_profiling_compressed))
+		return;
+
 	/* Check if kernel tags fit into page flags */
 	if (kernel_tags.count > (1UL << NR_UNUSED_PAGEFLAG_BITS)) {
 		shutdown_mem_profiling(false); /* allocinfo file does not exist yet */
@@ -622,7 +667,9 @@ static int load_module(struct module *mod, struct codetag *start, struct codetag
 	stop_tag = ct_to_alloc_tag(stop);
 	for (tag = start_tag; tag < stop_tag; tag++) {
 		WARN_ON(tag->counters);
-		tag->counters = alloc_percpu(struct alloc_tag_counters);
+		tag->counters = __alloc_percpu_gfp(pcpu_counters_size,
+						   sizeof(struct alloc_tag_counters),
+						   GFP_KERNEL | __GFP_ZERO);
 		if (!tag->counters) {
 			while (--tag >= start_tag) {
 				free_percpu(tag->counters);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 90b06f3d004c..8219d8de6f97 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1107,58 +1107,59 @@ void __clear_page_tag_ref(struct page *page)
 /* Should be called only if mem_alloc_profiling_enabled() */
 static noinline
 void __pgalloc_tag_add(struct page *page, struct task_struct *task,
-		       unsigned int nr)
+		       int nid, unsigned int nr)
 {
 	union pgtag_ref_handle handle;
 	union codetag_ref ref;
 
 	if (get_page_tag_ref(page, &ref, &handle)) {
-		alloc_tag_add(&ref, task->alloc_tag, PAGE_SIZE * nr);
+		alloc_tag_add(&ref, task->alloc_tag, nid, PAGE_SIZE * nr);
 		update_page_tag_ref(handle, &ref);
 		put_page_tag_ref(handle);
 	}
 }
 
 static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
-				   unsigned int nr)
+				   int nid, unsigned int nr)
 {
 	if (mem_alloc_profiling_enabled())
-		__pgalloc_tag_add(page, task, nr);
+		__pgalloc_tag_add(page, task, nid, nr);
 }
 
 /* Should be called only if mem_alloc_profiling_enabled() */
 static noinline
-void __pgalloc_tag_sub(struct page *page, unsigned int nr)
+void __pgalloc_tag_sub(struct page *page, int nid, unsigned int nr)
 {
 	union pgtag_ref_handle handle;
 	union codetag_ref ref;
 
 	if (get_page_tag_ref(page, &ref, &handle)) {
-		alloc_tag_sub(&ref, PAGE_SIZE * nr);
+		alloc_tag_sub(&ref, nid, PAGE_SIZE * nr);
 		update_page_tag_ref(handle, &ref);
 		put_page_tag_ref(handle);
 	}
 }
 
-static inline void pgalloc_tag_sub(struct page *page, unsigned int nr)
+static inline void pgalloc_tag_sub(struct page *page, int nid, unsigned int nr)
 {
 	if (mem_alloc_profiling_enabled())
-		__pgalloc_tag_sub(page, nr);
+		__pgalloc_tag_sub(page, nid, nr);
 }
 
 /* When tag is not NULL, assuming mem_alloc_profiling_enabled */
-static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
+static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag,
+					 int nid, unsigned int nr)
 {
 	if (tag)
-		this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr);
+		this_cpu_sub(tag->counters[nid].bytes, PAGE_SIZE * nr);
 }
 
 #else /* CONFIG_MEM_ALLOC_PROFILING */
 
 static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
-				   unsigned int nr) {}
-static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
-static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
+				   int nid, unsigned int nr) {}
+static inline void pgalloc_tag_sub(struct page *page, int nid, unsigned int nr) {}
+static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, int nid, unsigned int nr) {}
 
 #endif /* CONFIG_MEM_ALLOC_PROFILING */
 
@@ -1197,7 +1198,7 @@ __always_inline bool free_pages_prepare(struct page *page,
 		/* Do not let hwpoison pages hit pcplists/buddy */
 		reset_page_owner(page, order);
 		page_table_check_free(page, order);
-		pgalloc_tag_sub(page, 1 << order);
+		pgalloc_tag_sub(page, page_to_nid(page), 1 << order);
 
 		/*
 		 * The page is isolated and accounted for.
@@ -1251,7 +1252,7 @@ __always_inline bool free_pages_prepare(struct page *page,
 	page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
 	reset_page_owner(page, order);
 	page_table_check_free(page, order);
-	pgalloc_tag_sub(page, 1 << order);
+	pgalloc_tag_sub(page, page_to_nid(page), 1 << order);
 
 	if (!PageHighMem(page)) {
 		debug_check_no_locks_freed(page_address(page),
@@ -1707,7 +1708,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
 
 	set_page_owner(page, order, gfp_flags);
 	page_table_check_alloc(page, order);
-	pgalloc_tag_add(page, current, 1 << order);
+	pgalloc_tag_add(page, current, page_to_nid(page), 1 << order);
 }
 
 static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
@@ -5064,7 +5065,7 @@ static void ___free_pages(struct page *page, unsigned int order,
 	if (put_page_testzero(page))
 		__free_frozen_pages(page, order, fpi_flags);
 	else if (!head) {
-		pgalloc_tag_sub_pages(tag, (1 << order) - 1);
+		pgalloc_tag_sub_pages(tag, page_to_nid(page), (1 << order) - 1);
 		while (order-- > 0)
 			__free_frozen_pages(page + (1 << order), order,
 					    fpi_flags);
diff --git a/mm/percpu.c b/mm/percpu.c
index b35494c8ede2..130450e9718e 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1691,15 +1691,19 @@ static void pcpu_alloc_tag_alloc_hook(struct pcpu_chunk *chunk, int off,
 				      size_t size)
 {
 	if (mem_alloc_profiling_enabled() && likely(chunk->obj_exts)) {
+		/* For percpu allocation, store all alloc_tag stats on numa node 0 */
 		alloc_tag_add(&chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].tag,
-			      current->alloc_tag, size);
+			      current->alloc_tag, 0, size);
+		if (current->alloc_tag)
+			current->alloc_tag->ct.flags |= CODETAG_PERCPU_ALLOC;
 	}
 }
 
 static void pcpu_alloc_tag_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
 {
+	/* percpu alloc_tag stats is stored on numa node 0 so subtract from node 0 */
 	if (mem_alloc_profiling_enabled() && likely(chunk->obj_exts))
-		alloc_tag_sub(&chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].tag, size);
+		alloc_tag_sub(&chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].tag, 0, size);
 }
 #else
 static void pcpu_alloc_tag_alloc_hook(struct pcpu_chunk *chunk, int off,
diff --git a/mm/show_mem.c b/mm/show_mem.c
index 03e8d968fd1a..b2ff55afb3cc 100644
--- a/mm/show_mem.c
+++ b/mm/show_mem.c
@@ -5,6 +5,7 @@
  * Copyright (C) 2008 Johannes Weiner <hannes@...urebad.de>
  */
 
+#include <linux/alloc_tag.h>
 #include <linux/blkdev.h>
 #include <linux/cma.h>
 #include <linux/cpuset.h>
@@ -428,6 +429,7 @@ void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
 		nr = alloc_tag_top_users(tags, ARRAY_SIZE(tags), false);
 		if (nr) {
 			pr_notice("Memory allocations:\n");
+			pr_notice("<size> <calls> <tag info>\n");
 			for (i = 0; i < nr; i++) {
 				struct codetag *ct = tags[i].ct;
 				struct alloc_tag *tag = ct_to_alloc_tag(ct);
@@ -435,16 +437,27 @@ void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
 				char bytes[10];
 
 				string_get_size(counter.bytes, 1, STRING_UNITS_2, bytes, sizeof(bytes));
-
 				/* Same as alloc_tag_to_text() but w/o intermediate buffer */
 				if (ct->modname)
-					pr_notice("%12s %8llu %s:%u [%s] func:%s\n",
-						  bytes, counter.calls, ct->filename,
-						  ct->lineno, ct->modname, ct->function);
+					pr_notice("%-12s %-8llu %s:%u [%s] func:%s\n",
+						bytes, counter.calls, ct->filename,
+						ct->lineno, ct->modname, ct->function);
 				else
-					pr_notice("%12s %8llu %s:%u func:%s\n",
-						  bytes, counter.calls, ct->filename,
-						  ct->lineno, ct->function);
+					pr_notice("%-12s %-8llu %s:%u func:%s\n",
+						bytes, counter.calls,
+						ct->filename, ct->lineno, ct->function);
+
+#ifdef CONFIG_MEM_ALLOC_PROFILING_PER_NUMA_STATS
+				int nid;
+
+				for (nid = 0; nid < pcpu_counters_num; nid++) {
+					counter = alloc_tag_read_nid(tag, nid);
+					string_get_size(counter.bytes, 1, STRING_UNITS_2,
+							bytes, sizeof(bytes));
+					pr_notice("        nid%-5u %-12lld %-8lld\n",
+						  nid, counter.bytes, counter.calls);
+				}
+#endif
 			}
 		}
 	}
diff --git a/mm/slub.c b/mm/slub.c
index be8b09e09d30..068b88b85d80 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2104,8 +2104,12 @@ __alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags)
 	 * If other users appear then mem_alloc_profiling_enabled()
 	 * check should be added before alloc_tag_add().
 	 */
-	if (likely(obj_exts))
-		alloc_tag_add(&obj_exts->ref, current->alloc_tag, s->size);
+	if (likely(obj_exts)) {
+		struct page *page = virt_to_page(object);
+
+		alloc_tag_add(&obj_exts->ref, current->alloc_tag,
+				page_to_nid(page), s->size);
+	}
 }
 
 static inline void
@@ -2133,8 +2137,9 @@ __alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p
 
 	for (i = 0; i < objects; i++) {
 		unsigned int off = obj_to_index(s, slab, p[i]);
+		struct page *page = virt_to_page(p[i]);
 
-		alloc_tag_sub(&obj_exts[off].ref, s->size);
+		alloc_tag_sub(&obj_exts[off].ref, page_to_nid(page), s->size);
 	}
 }
 
-- 
2.34.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ