lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251225082059.1632-8-lizhe.67@bytedance.com>
Date: Thu, 25 Dec 2025 16:20:58 +0800
From: 李喆 <lizhe.67@...edance.com>
To: <muchun.song@...ux.dev>, <osalvador@...e.de>, <david@...nel.org>, 
	<akpm@...ux-foundation.org>, <fvdl@...gle.com>
Cc: <linux-mm@...ck.org>, <linux-kernel@...r.kernel.org>, 
	<lizhe.67@...edance.com>
Subject: [PATCH 7/8] mm/hugetlb: add epoll support for interface "zeroable_hugepages"

From: Li Zhe <lizhe.67@...edance.com>

Add epoll support for interface "zeroable_hugepages". When no huge folios
are available for pre-zeroing, user space can block on the
zeroable_hugepages file with epoll, and it will be woken as soon as one
or more huge folios become eligible for pre-zeroing.

Signed-off-by: Li Zhe <lizhe.67@...edance.com>
---
 mm/hugetlb.c          | 13 +++++++++++++
 mm/hugetlb_internal.h |  6 ++++++
 mm/hugetlb_sysfs.c    | 22 +++++++++++++++++++++-
 3 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8d36487659f8..c2df0317fe15 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1868,6 +1868,7 @@ void free_huge_folio(struct folio *folio)
 		arch_clear_hugetlb_flags(folio);
 		enqueue_hugetlb_folio(h, folio);
 		spin_unlock_irqrestore(&hugetlb_lock, flags);
+		do_zero_free_notify(h, folio_nid(folio));
 	}
 }
 
@@ -1999,8 +2000,10 @@ static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h,
 void prep_and_add_allocated_folios(struct hstate *h,
 				   struct list_head *folio_list)
 {
+	nodemask_t allocated_mask = NODE_MASK_NONE;
 	unsigned long flags;
 	struct folio *folio, *tmp_f;
+	int nid;
 
 	/* Send list for bulk vmemmap optimization processing */
 	hugetlb_vmemmap_optimize_folios(h, folio_list);
@@ -2010,8 +2013,12 @@ void prep_and_add_allocated_folios(struct hstate *h,
 	list_for_each_entry_safe(folio, tmp_f, folio_list, lru) {
 		prep_account_new_hugetlb_folio(h, folio);
 		enqueue_hugetlb_folio(h, folio);
+		node_set(folio_nid(folio), allocated_mask);
 	}
 	spin_unlock_irqrestore(&hugetlb_lock, flags);
+
+	for_each_node_mask(nid, allocated_mask)
+		do_zero_free_notify(h, nid);
 }
 
 /*
@@ -2383,6 +2390,8 @@ static int gather_surplus_pages(struct hstate *h, long delta)
 	long needed, allocated;
 	bool alloc_ok = true;
 	nodemask_t *mbind_nodemask, alloc_nodemask;
+	nodemask_t allocated_mask = NODE_MASK_NONE;
+	int nid;
 
 	mbind_nodemask = policy_mbind_nodemask(htlb_alloc_mask(h));
 	if (mbind_nodemask)
@@ -2455,9 +2464,12 @@ static int gather_surplus_pages(struct hstate *h, long delta)
 			break;
 		/* Add the page to the hugetlb allocator */
 		enqueue_hugetlb_folio(h, folio);
+		node_set(folio_nid(folio), allocated_mask);
 	}
 free:
 	spin_unlock_irq(&hugetlb_lock);
+	for_each_node_mask(nid, allocated_mask)
+		do_zero_free_notify(h, nid);
 
 	/*
 	 * Free unnecessary surplus pages to the buddy allocator.
@@ -2841,6 +2853,7 @@ static int alloc_and_dissolve_hugetlb_folio(struct folio *old_folio,
 		 * Folio has been replaced, we can safely free the old one.
 		 */
 		spin_unlock_irq(&hugetlb_lock);
+		do_zero_free_notify(h, folio_nid(new_folio));
 		update_and_free_hugetlb_folio(h, old_folio, false);
 	}
 
diff --git a/mm/hugetlb_internal.h b/mm/hugetlb_internal.h
index 1d2f870deccf..9c60661283c7 100644
--- a/mm/hugetlb_internal.h
+++ b/mm/hugetlb_internal.h
@@ -106,6 +106,12 @@ extern ssize_t __nr_hugepages_store_common(bool obey_mempolicy,
 					   struct hstate *h, int nid,
 					   unsigned long count, size_t len);
 
+#ifdef CONFIG_NUMA
+extern void do_zero_free_notify(struct hstate *h, int nid);
+#else
+static inline void do_zero_free_notify(struct hstate *h, int nid) {}
+#endif
+
 extern void hugetlb_sysfs_init(void) __init;
 
 #ifdef CONFIG_SYSCTL
diff --git a/mm/hugetlb_sysfs.c b/mm/hugetlb_sysfs.c
index 08ad39d3e022..c063237249f6 100644
--- a/mm/hugetlb_sysfs.c
+++ b/mm/hugetlb_sysfs.c
@@ -340,6 +340,7 @@ static bool hugetlb_sysfs_initialized __ro_after_init;
 
 struct node_hstate_item {
 	struct kobject *hstate_kobj;
+	struct work_struct notify_work;
 };
 
 /*
@@ -355,6 +356,21 @@ struct node_hstate {
 };
 static struct node_hstate node_hstates[MAX_NUMNODES];
 
+static void pre_zero_notify_fun(struct work_struct *work)
+{
+	struct node_hstate_item *item =
+		container_of(work, struct node_hstate_item, notify_work);
+
+	sysfs_notify(item->hstate_kobj, NULL, "zeroable_hugepages");
+}
+
+void do_zero_free_notify(struct hstate *h, int nid)
+{
+	struct node_hstate *nhs = &node_hstates[nid];
+
+	schedule_work(&nhs->items[hstate_index(h)].notify_work);
+}
+
 static ssize_t zeroable_hugepages_show(struct kobject *kobj,
 					struct kobj_attribute *attr, char *buf)
 {
@@ -564,8 +580,11 @@ void hugetlb_register_node(struct node *node)
 		return;
 
 	for_each_hstate(h) {
+		int index = hstate_index(h);
+		struct node_hstate_item *item = &nhs->items[index];
+
 		err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj,
-				&nhs->items[hstate_index(h)].hstate_kobj,
+				&item->hstate_kobj,
 				&per_node_hstate_attr_group);
 		if (err) {
 			pr_err("HugeTLB: Unable to add hstate %s for node %d\n",
@@ -573,6 +592,7 @@ void hugetlb_register_node(struct node *node)
 			hugetlb_unregister_node(node);
 			break;
 		}
+		INIT_WORK(&item->notify_work, pre_zero_notify_fun);
 	}
 }
 
-- 
2.20.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ