[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20130802205735.GS26476@sgi.com>
Date: Fri, 2 Aug 2013 15:57:35 -0500
From: Alex Thorlton <athorlton@....com>
To: linux-kernel@...r.kernel.org
Cc: Ingo Molnar <mingo@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Andrew Morton <akpm@...ux-foundation.org>,
Mel Gorman <mgorman@...e.de>,
"Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>,
Rik van Riel <riel@...hat.com>,
Johannes Weiner <hannes@...xchg.org>,
"Eric W. Biederman" <ebiederm@...ssion.com>,
Sedat Dilek <sedat.dilek@...il.com>,
Frederic Weisbecker <fweisbec@...il.com>,
Dave Jones <davej@...hat.com>,
Michael Kerrisk <mtk.manpages@...il.com>,
"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>,
David Howells <dhowells@...hat.com>,
Thomas Gleixner <tglx@...utronix.de>,
Al Viro <viro@...iv.linux.org.uk>,
Oleg Nesterov <oleg@...hat.com>,
Srikar Dronamraju <srikar@...ux.vnet.ibm.com>,
Kees Cook <keescook@...omium.org>, Robin Holt <holt@....com>,
linux-kernel@...r.kernel.org
Subject: [PATCHv2] Add per-process flag to control thp
This patch implements functionality to allow processes to disable the use of
transparent hugepages through the prctl syscall.
We've determined that some jobs perform significantly better with thp disabled,
and we needed a way to control thp on a per-process basis, without relying on
madvise.
v2 - tweaked thp_disabled flag to be a single bit instead of an int
---
include/linux/huge_mm.h | 14 +++++++++++++-
include/linux/init_task.h | 8 ++++++++
include/linux/sched.h | 4 ++++
include/uapi/linux/prctl.h | 3 +++
kernel/fork.c | 4 ++++
kernel/sys.c | 31 +++++++++++++++++++++++++++++++
6 files changed, 63 insertions(+), 1 deletion(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index b60de92..53af3ca 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -1,6 +1,8 @@
#ifndef _LINUX_HUGE_MM_H
#define _LINUX_HUGE_MM_H
+#include <linux/sched.h>
+
extern int do_huge_pmd_anonymous_page(struct mm_struct *mm,
struct vm_area_struct *vma,
unsigned long address, pmd_t *pmd,
@@ -66,7 +68,7 @@ extern pmd_t *page_check_address_pmd(struct page *page,
extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
-#define transparent_hugepage_enabled(__vma) \
+#define _transparent_hugepage_enabled(__vma) \
((transparent_hugepage_flags & \
(1<<TRANSPARENT_HUGEPAGE_FLAG) || \
(transparent_hugepage_flags & \
@@ -177,6 +179,11 @@ static inline struct page *compound_trans_head(struct page *page)
return page;
}
+static inline int transparent_hugepage_enabled(struct vm_area_struct *vma)
+{
+ return !current->thp_disabled & _transparent_hugepage_enabled(vma);
+}
+
extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pmd_t pmd, pmd_t *pmdp);
@@ -230,6 +237,11 @@ static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_str
return 0;
}
+static inline int transparent_hugepage_enabled(struct vm_area_struct *vma)
+{
+ return _transparent_hugepage_enabled(vma);
+}
+
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* _LINUX_HUGE_MM_H */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 5cd0f09..aae74fd 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -152,6 +152,13 @@ extern struct task_group root_task_group;
# define INIT_VTIME(tsk)
#endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+# define INIT_THP_DISABLED \
+ .thp_disabled = 0,
+#else
+# define INIT_THP_DISABLED
+#endif
+
#define INIT_TASK_COMM "swapper"
/*
@@ -222,6 +229,7 @@ extern struct task_group root_task_group;
INIT_TASK_RCU_PREEMPT(tsk) \
INIT_CPUSET_SEQ \
INIT_VTIME(tsk) \
+ INIT_THP_DISABLED \
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 50d04b9..c14cf47 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1117,6 +1117,10 @@ struct task_struct {
unsigned sched_reset_on_fork:1;
unsigned sched_contributes_to_load:1;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ unsigned thp_disabled:1;
+#endif
+
pid_t pid;
pid_t tgid;
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 289760f..f69780d 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -149,4 +149,7 @@
#define PR_GET_TID_ADDRESS 40
+#define PR_SET_THP_DISABLED 41
+#define PR_GET_THP_DISABLED 42
+
#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 403d2bb..0b4afb5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1311,6 +1311,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->sequential_io_avg = 0;
#endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ p->thp_disabled = current->thp_disabled;
+#endif
+
/* Perform scheduler related setup. Assign this task to a CPU. */
sched_fork(p);
diff --git a/kernel/sys.c b/kernel/sys.c
index 771129b..416c8a6 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1836,6 +1836,31 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
}
#endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static int prctl_set_thp_disabled(struct task_struct *me)
+{
+ me->thp_disabled = 1;
+ return 0;
+}
+
+static int prctl_get_thp_disabled(struct task_struct *me,
+ int __user *thp_disabled)
+{
+ return put_user(me->thp_disabled, thp_disabled);
+}
+#else
+static int prctl_set_thp_disabled(struct task_struct *me)
+{
+ return -EINVAL;
+}
+
+static int prctl_get_thp_disabled(struct task_struct *me,
+ int __user *thp_disabled)
+{
+ return -EINVAL;
+}
+#endif
+
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
unsigned long, arg4, unsigned long, arg5)
{
@@ -1999,6 +2024,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
if (arg2 || arg3 || arg4 || arg5)
return -EINVAL;
return current->no_new_privs ? 1 : 0;
+ case PR_SET_THP_DISABLED:
+ error = prctl_set_thp_disabled(me);
+ break;
+ case PR_GET_THP_DISABLED:
+ error = prctl_get_thp_disabled(me, (int __user *) arg2);
+ break;
default:
error = -EINVAL;
break;
--
1.7.12.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists