[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250507141132.2773275-2-usamaarif642@gmail.com>
Date: Wed, 7 May 2025 15:00:34 +0100
From: Usama Arif <usamaarif642@...il.com>
To: Andrew Morton <akpm@...ux-foundation.org>,
david@...hat.com,
linux-mm@...ck.org
Cc: hannes@...xchg.org,
shakeel.butt@...ux.dev,
riel@...riel.com,
ziy@...dia.com,
baolin.wang@...ux.alibaba.com,
lorenzo.stoakes@...cle.com,
Liam.Howlett@...cle.com,
npache@...hat.com,
ryan.roberts@....com,
linux-kernel@...r.kernel.org,
kernel-team@...a.com,
Usama Arif <usamaarif642@...il.com>
Subject: [PATCH 1/1] prctl: allow overriding system THP policy to always per process
Allowing override of global THP policy per process allows workloads
that have shown to benefit from hugepages to do so, without regressing
workloads that wouldn't benefit. This will allow such types of workloads
to be run/stacked on the same machine.
It also helps in rolling out hugepages in hyperscaler configurations
for workloads that benefit from them, where a single THP policy is likely
to be used across the entire fleet, and prctl will help override it.
Signed-off-by: Usama Arif <usamaarif642@...il.com>
---
include/linux/huge_mm.h | 3 ++-
include/linux/mm_types.h | 7 ++-----
include/uapi/linux/prctl.h | 3 +++
kernel/sys.c | 16 ++++++++++++++++
tools/include/uapi/linux/prctl.h | 3 +++
.../perf/trace/beauty/include/uapi/linux/prctl.h | 3 +++
6 files changed, 29 insertions(+), 6 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 2f190c90192d..0587dc4b8e2d 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -293,7 +293,8 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
if (vm_flags & VM_HUGEPAGE)
mask |= READ_ONCE(huge_anon_orders_madvise);
if (hugepage_global_always() ||
- ((vm_flags & VM_HUGEPAGE) && hugepage_global_enabled()))
+ ((vm_flags & VM_HUGEPAGE) && hugepage_global_enabled()) ||
+ test_bit(MMF_THP_ALWAYS, &vma->vm_mm->flags))
mask |= READ_ONCE(huge_anon_orders_inherit);
orders &= mask;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e76bade9ebb1..9bcd72b2b191 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1704,11 +1704,8 @@ enum {
#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */
#define MMF_VM_HUGEPAGE 17 /* set when mm is available for khugepaged */
-/*
- * This one-shot flag is dropped due to necessity of changing exe once again
- * on NFS restore
- */
-//#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */
+/* override inherited page sizes to always for the entire process */
+ #define MMF_THP_ALWAYS 18
#define MMF_HAS_UPROBES 19 /* has uprobes */
#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 15c18ef4eb11..22c526681562 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -364,4 +364,7 @@ struct prctl_mm_map {
# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
+#define PR_SET_THP_ALWAYS 78
+#define PR_GET_THP_ALWAYS 79
+
#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index c434968e9f5d..ee56b059ff1f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2658,6 +2658,22 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
clear_bit(MMF_DISABLE_THP, &me->mm->flags);
mmap_write_unlock(me->mm);
break;
+ case PR_GET_THP_ALWAYS:
+ if (arg2 || arg3 || arg4 || arg5)
+ return -EINVAL;
+ error = !!test_bit(MMF_THP_ALWAYS, &me->mm->flags);
+ break;
+ case PR_SET_THP_ALWAYS:
+ if (arg3 || arg4 || arg5)
+ return -EINVAL;
+ if (mmap_write_lock_killable(me->mm))
+ return -EINTR;
+ if (arg2)
+ set_bit(MMF_THP_ALWAYS, &me->mm->flags);
+ else
+ clear_bit(MMF_THP_ALWAYS, &me->mm->flags);
+ mmap_write_unlock(me->mm);
+ break;
case PR_MPX_ENABLE_MANAGEMENT:
case PR_MPX_DISABLE_MANAGEMENT:
/* No longer implemented: */
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 35791791a879..f5f6cff42b3f 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -328,4 +328,7 @@ struct prctl_mm_map {
# define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */
# define PR_PPC_DEXCR_CTRL_MASK 0x1f
+#define PR_GET_THP_ALWAYS 78
+#define PR_SET_THP_ALWAYS 79
+
#endif /* _LINUX_PRCTL_H */
diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
index 15c18ef4eb11..680996d56faf 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
@@ -364,4 +364,7 @@ struct prctl_mm_map {
# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
+#define PR_GET_THP_ALWAYS 78
+#define PR_SET_THP_ALWAYS 79
+
#endif /* _LINUX_PRCTL_H */
--
2.47.1
Powered by blists - more mailing lists