[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20241122193305.7316-1-torvalds@linux-foundation.org>
Date: Fri, 22 Nov 2024 11:33:05 -0800
From: Linus Torvalds <torvalds@...ux-foundation.org>
To: Josh Poimboeuf <jpoimboe@...nel.org>
Cc: LKML <linux-kernel@...r.kernel.org>,
x86@...nel.org,
Linus Torvalds <torvalds@...ux-foundation.org>
Subject: [PATCH] futex: improve user space accesses
Josh Poimboeuf reports that he got a "will-it-scale.per_process_ops 1.9%
improvement" report for his patch that changed __get_user() to use
pointer masking instead of the explicit speculation barrier. However,
that patch doesn't actually work in the general case, because some (very
bad) architecture-specific code actually depends on __get_user() also
working on kernel addresses.
A profile showed that the offending __get_user() was the futex code,
which really should be fixed up to not use that horrid legacy case.
Rewrite futex_get_value_locked() to use the modern user acccess helpers,
and inline it so that the compiler not only avoids the function call for
a few instructions, but can do CSE on the address masking.
It also turns out the x86 futex functions have unnecessary barriers in
other places, so let's fix those up too.
Link: https://lore.kernel.org/all/20241115230653.hfvzyf3aqqntgp63@jpoimboe/
Reported-by: Josh Poimboeuf <jpoimboe@...nel.org>
Signed-off-by: Linus Torvalds <torvalds@...ux-foundation.org>
---
arch/x86/include/asm/futex.h | 8 +++--
kernel/futex/core.c | 22 --------------
kernel/futex/futex.h | 59 ++++++++++++++++++++++++++++++++++--
3 files changed, 63 insertions(+), 26 deletions(-)
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index 99d345b686fa..6e2458088800 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -48,7 +48,9 @@ do { \
static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
u32 __user *uaddr)
{
- if (!user_access_begin(uaddr, sizeof(u32)))
+ if (can_do_masked_user_access())
+ uaddr = masked_user_access_begin(uaddr);
+ else if (!user_access_begin(uaddr, sizeof(u32)))
return -EFAULT;
switch (op) {
@@ -84,7 +86,9 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
{
int ret = 0;
- if (!user_access_begin(uaddr, sizeof(u32)))
+ if (can_do_masked_user_access())
+ uaddr = masked_user_access_begin(uaddr);
+ else if (!user_access_begin(uaddr, sizeof(u32)))
return -EFAULT;
asm volatile("\n"
"1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index 326bfe6549d7..9833fdb63e39 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -451,28 +451,6 @@ struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *
return NULL;
}
-int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval)
-{
- int ret;
-
- pagefault_disable();
- ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
- pagefault_enable();
-
- return ret;
-}
-
-int futex_get_value_locked(u32 *dest, u32 __user *from)
-{
- int ret;
-
- pagefault_disable();
- ret = __get_user(*dest, from);
- pagefault_enable();
-
- return ret ? -EFAULT : 0;
-}
-
/**
* wait_for_owner_exiting - Block until the owner has exited
* @ret: owner's current futex lock status
diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
index 8b195d06f4e8..323572014b32 100644
--- a/kernel/futex/futex.h
+++ b/kernel/futex/futex.h
@@ -6,6 +6,7 @@
#include <linux/rtmutex.h>
#include <linux/sched/wake_q.h>
#include <linux/compat.h>
+#include <linux/uaccess.h>
#ifdef CONFIG_PREEMPT_RT
#include <linux/rcuwait.h>
@@ -225,10 +226,64 @@ extern bool __futex_wake_mark(struct futex_q *q);
extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q);
extern int fault_in_user_writeable(u32 __user *uaddr);
-extern int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval);
-extern int futex_get_value_locked(u32 *dest, u32 __user *from);
extern struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key);
+static inline int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval)
+{
+ int ret;
+
+ pagefault_disable();
+ ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
+ pagefault_enable();
+
+ return ret;
+}
+
+/*
+ * This does a plain atomic user space read, and the user pointer has
+ * already been verified earlier by get_futex_key() to be both aligned
+ * and actually in user space, just like futex_atomic_cmpxchg_inatomic().
+ *
+ * We still want to avoid any speculation, and while __get_user() is
+ * the traditional model for this, it's actually slower then doing
+ * this manually these days.
+ *
+ * We could just have a per-architecture special function for it,
+ * the same way we do futex_atomic_cmpxchg_inatomic(), but rather
+ * than force everybody to do that, write it out long-hand using
+ * the low-level user-access infrastructure.
+ *
+ * This looks a bit overkill, but generally just results in a couple
+ * of instructions.
+ */
+static __always_inline int futex_read_inatomic(u32 *dest, u32 __user *from)
+{
+ u32 val;
+
+ if (can_do_masked_user_access())
+ from = masked_user_access_begin(from);
+ else if (!user_read_access_begin(from, sizeof(*from)))
+ return -EFAULT;
+ unsafe_get_user(val, from, Efault);
+ user_access_end();
+ *dest = val;
+ return 0;
+Efault:
+ user_access_end();
+ return -EFAULT;
+}
+
+static inline int futex_get_value_locked(u32 *dest, u32 __user *from)
+{
+ int ret;
+
+ pagefault_disable();
+ ret = futex_read_inatomic(dest, from);
+ pagefault_enable();
+
+ return ret;
+}
+
extern void __futex_unqueue(struct futex_q *q);
extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb);
extern int futex_unqueue(struct futex_q *q);
--
2.47.0.293.g502c15559b
Powered by blists - more mailing lists