linux-kernel - [PATCH v4 1/2] mm: add per-order mTHP anon_alloc and anon_alloc

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-Id: <20240405102704.77559-2-21cnbao@gmail.com>
Date: Fri,  5 Apr 2024 23:27:03 +1300
From: Barry Song <21cnbao@...il.com>
To: david@...hat.com,
	akpm@...ux-foundation.org,
	linux-mm@...ck.org,
	ryan.roberts@....com
Cc: cerasuolodomenico@...il.com,
	chrisl@...nel.org,
	kasong@...cent.com,
	peterx@...hat.com,
	surenb@...gle.com,
	v-songbaohua@...o.com,
	willy@...radead.org,
	yosryahmed@...gle.com,
	yuzhao@...gle.com,
	linux-kernel@...r.kernel.org
Subject: [PATCH v4 1/2] mm: add per-order mTHP anon_alloc and anon_alloc_fallback counters

From: Barry Song <v-songbaohua@...o.com>

Profiling a system blindly with mTHP has become challenging due to the
lack of visibility into its operations. Presenting the success rate of
mTHP allocations appears to be pressing need.

Recently, I've been experiencing significant difficulty debugging
performance improvements and regressions without these figures.
It's crucial for us to understand the true effectiveness of mTHP in
real-world scenarios, especially in systems with fragmented memory.

This patch sets up the framework for per-order mTHP counters, starting
with the introduction of anon_alloc and anon_alloc_fallback counters.
Incorporating additional counters should now be straightforward as well.

Signed-off-by: Barry Song <v-songbaohua@...o.com>
---
 include/linux/huge_mm.h | 19 ++++++++++++++++
 mm/huge_memory.c        | 48 +++++++++++++++++++++++++++++++++++++++++
 mm/memory.c             |  2 ++
 3 files changed, 69 insertions(+)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index e896ca4760f6..c5d33017a4dd 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -264,6 +264,25 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
 					  enforce_sysfs, orders);
 }
 
+enum mthp_stat_item {
+	MTHP_STAT_ANON_ALLOC,
+	MTHP_STAT_ANON_ALLOC_FALLBACK,
+	__MTHP_STAT_COUNT
+};
+
+struct mthp_stat {
+	unsigned long stats[PMD_ORDER + 1][__MTHP_STAT_COUNT];
+};
+
+DECLARE_PER_CPU(struct mthp_stat, mthp_stats);
+
+static inline void count_mthp_stat(int order, enum mthp_stat_item item)
+{
+	if (unlikely(order > PMD_ORDER))
+		return;
+	this_cpu_inc(mthp_stats.stats[order][item]);
+}
+
 #define transparent_hugepage_use_zero_page()				\
 	(transparent_hugepage_flags &					\
 	 (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9d4b2fbf6872..5b875f0fc923 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -526,6 +526,46 @@ static const struct kobj_type thpsize_ktype = {
 	.sysfs_ops = &kobj_sysfs_ops,
 };
 
+DEFINE_PER_CPU(struct mthp_stat, mthp_stats) = {{{0}}};
+
+static unsigned long sum_mthp_stat(int order, enum mthp_stat_item item)
+{
+	unsigned long sum = 0;
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		struct mthp_stat *this = &per_cpu(mthp_stats, cpu);
+
+		sum += this->stats[order][item];
+	}
+
+	return sum;
+}
+
+#define DEFINE_MTHP_STAT_ATTR(_name, _index)					\
+static ssize_t _name##_show(struct kobject *kobj,			\
+			struct kobj_attribute *attr, char *buf)		\
+{									\
+	int order = to_thpsize(kobj)->order;				\
+									\
+	return sysfs_emit(buf, "%lu\n", sum_mthp_stat(order, _index));	\
+}									\
+static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+
+DEFINE_MTHP_STAT_ATTR(anon_alloc, MTHP_STAT_ANON_ALLOC);
+DEFINE_MTHP_STAT_ATTR(anon_alloc_fallback, MTHP_STAT_ANON_ALLOC_FALLBACK);
+
+static struct attribute *stats_attrs[] = {
+	&anon_alloc_attr.attr,
+	&anon_alloc_fallback_attr.attr,
+	NULL,
+};
+
+static struct attribute_group stats_attr_group = {
+	.name = "stats",
+	.attrs = stats_attrs,
+};
+
 static struct thpsize *thpsize_create(int order, struct kobject *parent)
 {
 	unsigned long size = (PAGE_SIZE << order) / SZ_1K;
@@ -549,6 +589,12 @@ static struct thpsize *thpsize_create(int order, struct kobject *parent)
 		return ERR_PTR(ret);
 	}
 
+	ret = sysfs_create_group(&thpsize->kobj, &stats_attr_group);
+	if (ret) {
+		kobject_put(&thpsize->kobj);
+		return ERR_PTR(ret);
+	}
+
 	thpsize->order = order;
 	return thpsize;
 }
@@ -1050,8 +1096,10 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 	folio = vma_alloc_folio(gfp, HPAGE_PMD_ORDER, vma, haddr, true);
 	if (unlikely(!folio)) {
 		count_vm_event(THP_FAULT_FALLBACK);
+		count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_ALLOC_FALLBACK);
 		return VM_FAULT_FALLBACK;
 	}
+	count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_ALLOC);
 	return __do_huge_pmd_anonymous_page(vmf, &folio->page, gfp);
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index 649e3ed94487..1723c8ddf9cb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4374,8 +4374,10 @@ static struct folio *alloc_anon_folio(struct vm_fault *vmf)
 			}
 			folio_throttle_swaprate(folio, gfp);
 			clear_huge_page(&folio->page, vmf->address, 1 << order);
+			count_mthp_stat(order, MTHP_STAT_ANON_ALLOC);
 			return folio;
 		}
+		count_mthp_stat(order, MTHP_STAT_ANON_ALLOC_FALLBACK);
 next:
 		order = next_order(&orders, order);
 	}
-- 
2.34.1