lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250522090243.758943-2-p.raghav@samsung.com>
Date: Thu, 22 May 2025 11:02:42 +0200
From: Pankaj Raghav <p.raghav@...sung.com>
To: Suren Baghdasaryan <surenb@...gle.com>,
	Vlastimil Babka <vbabka@...e.cz>,
	Ryan Roberts <ryan.roberts@....com>,
	Mike Rapoport <rppt@...nel.org>,
	Michal Hocko <mhocko@...e.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	Nico Pache <npache@...hat.com>,
	Dev Jain <dev.jain@....com>,
	Baolin Wang <baolin.wang@...ux.alibaba.com>,
	Borislav Petkov <bp@...en8.de>,
	Ingo Molnar <mingo@...hat.com>,
	"H . Peter Anvin" <hpa@...or.com>,
	Zi Yan <ziy@...dia.com>,
	Dave Hansen <dave.hansen@...ux.intel.com>,
	David Hildenbrand <david@...hat.com>,
	Lorenzo Stoakes <lorenzo.stoakes@...cle.com>,
	Andrew Morton <akpm@...ux-foundation.org>,
	"Liam R . Howlett" <Liam.Howlett@...cle.com>,
	Jens Axboe <axboe@...nel.dk>
Cc: linux-block@...r.kernel.org,
	linux-fsdevel@...r.kernel.org,
	"Darrick J . Wong" <djwong@...nel.org>,
	gost.dev@...sung.com,
	kernel@...kajraghav.com,
	hch@....de,
	linux-kernel@...r.kernel.org,
	linux-mm@...ck.org,
	willy@...radead.org,
	x86@...nel.org,
	mcgrof@...nel.org,
	Pankaj Raghav <p.raghav@...sung.com>
Subject: [RFC v2 1/2] mm: add THP_HUGE_ZERO_PAGE_ALWAYS config option

There are many places in the kernel where we need to zeroout larger
chunks but the maximum segment we can zeroout at a time by ZERO_PAGE
is limited by PAGE_SIZE.

This is especially annoying in block devices and filesystems where we
attach multiple ZERO_PAGEs to the bio in different bvecs. With multipage
bvec support in block layer, it is much more efficient to send out
larger zero pages as a part of single bvec.

This concern was raised during the review of adding LBS support to
XFS[1][2].

Usually huge_zero_folio is allocated on demand, and it will be
deallocated by the shrinker if there are no users of it left.

Add a config option THP_HUGE_ZERO_PAGE_ALWAYS that will always allocate
the huge_zero_folio, and it will never be freed. This makes using the
huge_zero_folio without having to pass any mm struct and call put_folio
in the destructor.

We can enable it by default for x86_64 where the PMD size is 2M.
It is good compromise between the memory and efficiency.
As a THP zero page might be wasteful for architectures with bigger page
sizes, let's not enable it for them.

[1] https://lore.kernel.org/linux-xfs/20231027051847.GA7885@lst.de/
[2] https://lore.kernel.org/linux-xfs/ZitIK5OnR7ZNY0IG@infradead.org/

Suggested-by: David Hildenbrand <david@...hat.com>
Signed-off-by: Pankaj Raghav <p.raghav@...sung.com>
---
 arch/x86/Kconfig |  1 +
 mm/Kconfig       | 12 +++++++++
 mm/huge_memory.c | 63 ++++++++++++++++++++++++++++++++++++++----------
 3 files changed, 63 insertions(+), 13 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 055204dc211d..2e1527580746 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -152,6 +152,7 @@ config X86
 	select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP	if X86_64
 	select ARCH_WANT_HUGETLB_VMEMMAP_PREINIT if X86_64
 	select ARCH_WANTS_THP_SWAP		if X86_64
+	select ARCH_WANTS_THP_ZERO_PAGE_ALWAYS	if X86_64
 	select ARCH_HAS_PARANOID_L1D_FLUSH
 	select BUILDTIME_TABLE_SORT
 	select CLKEVT_I8253
diff --git a/mm/Kconfig b/mm/Kconfig
index bd08e151fa1b..a2994e7d55ba 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -823,6 +823,9 @@ config ARCH_WANT_GENERAL_HUGETLB
 config ARCH_WANTS_THP_SWAP
 	def_bool n
 
+config ARCH_WANTS_THP_ZERO_PAGE_ALWAYS
+	def_bool n
+
 config MM_ID
 	def_bool n
 
@@ -895,6 +898,15 @@ config READ_ONLY_THP_FOR_FS
 	  support of file THPs will be developed in the next few release
 	  cycles.
 
+config THP_ZERO_PAGE_ALWAYS
+	def_bool y
+	depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_ZERO_PAGE_ALWAYS
+	help
+	  Typically huge_zero_folio, which is a THP of zeroes, is allocated
+	  on demand and deallocated when not in use. This option will always
+	  allocate huge_zero_folio for zeroing and it is never deallocated.
+	  Not suitable for memory constrained systems.
+
 config NO_PAGE_MAPCOUNT
 	bool "No per-page mapcount (EXPERIMENTAL)"
 	help
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d3e66136e41a..1a0556ca3839 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -247,9 +247,16 @@ static void put_huge_zero_page(void)
 	BUG_ON(atomic_dec_and_test(&huge_zero_refcount));
 }
 
+/*
+ * If THP_ZERO_PAGE_ALWAYS is enabled, @mm can be NULL, i.e, the huge_zero_folio
+ * is not associated with any mm_struct.
+ */
 struct folio *mm_get_huge_zero_folio(struct mm_struct *mm)
 {
-	if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
+	if (!IS_ENABLED(CONFIG_THP_ZERO_PAGE_ALWAYS) && !mm)
+		return NULL;
+
+	if (IS_ENABLED(CONFIG_THP_ZERO_PAGE_ALWAYS) || test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
 		return READ_ONCE(huge_zero_folio);
 
 	if (!get_huge_zero_page())
@@ -263,6 +270,9 @@ struct folio *mm_get_huge_zero_folio(struct mm_struct *mm)
 
 void mm_put_huge_zero_folio(struct mm_struct *mm)
 {
+	if (IS_ENABLED(CONFIG_THP_ZERO_PAGE_ALWAYS))
+		return;
+
 	if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
 		put_huge_zero_page();
 }
@@ -274,14 +284,21 @@ static unsigned long shrink_huge_zero_page_count(struct shrinker *shrink,
 	return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0;
 }
 
+static void _put_huge_zero_folio(void)
+{
+	struct folio *zero_folio;
+
+	zero_folio = xchg(&huge_zero_folio, NULL);
+	BUG_ON(zero_folio == NULL);
+	WRITE_ONCE(huge_zero_pfn, ~0UL);
+	folio_put(zero_folio);
+}
+
 static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
 				       struct shrink_control *sc)
 {
 	if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
-		struct folio *zero_folio = xchg(&huge_zero_folio, NULL);
-		BUG_ON(zero_folio == NULL);
-		WRITE_ONCE(huge_zero_pfn, ~0UL);
-		folio_put(zero_folio);
+		_put_huge_zero_folio();
 		return HPAGE_PMD_NR;
 	}
 
@@ -850,10 +867,6 @@ static inline void hugepage_exit_sysfs(struct kobject *hugepage_kobj)
 
 static int __init thp_shrinker_init(void)
 {
-	huge_zero_page_shrinker = shrinker_alloc(0, "thp-zero");
-	if (!huge_zero_page_shrinker)
-		return -ENOMEM;
-
 	deferred_split_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE |
 						 SHRINKER_MEMCG_AWARE |
 						 SHRINKER_NONSLAB,
@@ -863,14 +876,21 @@ static int __init thp_shrinker_init(void)
 		return -ENOMEM;
 	}
 
-	huge_zero_page_shrinker->count_objects = shrink_huge_zero_page_count;
-	huge_zero_page_shrinker->scan_objects = shrink_huge_zero_page_scan;
-	shrinker_register(huge_zero_page_shrinker);
-
 	deferred_split_shrinker->count_objects = deferred_split_count;
 	deferred_split_shrinker->scan_objects = deferred_split_scan;
 	shrinker_register(deferred_split_shrinker);
 
+	if (IS_ENABLED(CONFIG_THP_ZERO_PAGE_ALWAYS))
+		return 0;
+
+	huge_zero_page_shrinker = shrinker_alloc(0, "thp-zero");
+	if (!huge_zero_page_shrinker)
+		return -ENOMEM;
+
+	huge_zero_page_shrinker->count_objects = shrink_huge_zero_page_count;
+	huge_zero_page_shrinker->scan_objects = shrink_huge_zero_page_scan;
+	shrinker_register(huge_zero_page_shrinker);
+
 	return 0;
 }
 
@@ -880,6 +900,17 @@ static void __init thp_shrinker_exit(void)
 	shrinker_free(deferred_split_shrinker);
 }
 
+static int __init huge_zero_page_init(void) {
+
+	if (!IS_ENABLED(CONFIG_THP_ZERO_PAGE_ALWAYS))
+		return 0;
+
+	if (!get_huge_zero_page()) {
+		return -ENOMEM;
+	}
+	return 0;
+}
+
 static int __init hugepage_init(void)
 {
 	int err;
@@ -903,6 +934,10 @@ static int __init hugepage_init(void)
 	if (err)
 		goto err_slab;
 
+	err = huge_zero_page_init();
+	if (err)
+		goto err_huge_zero_page;
+
 	err = thp_shrinker_init();
 	if (err)
 		goto err_shrinker;
@@ -925,6 +960,8 @@ static int __init hugepage_init(void)
 err_khugepaged:
 	thp_shrinker_exit();
 err_shrinker:
+	_put_huge_zero_folio();
+err_huge_zero_page:
 	khugepaged_destroy();
 err_slab:
 	hugepage_exit_sysfs(hugepage_kobj);
-- 
2.47.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ