[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <2fded39d933cb51f0992fd51416ddef5f0e81493.1622558659.git.asml.silence@gmail.com>
Date: Tue, 1 Jun 2021 15:58:26 +0100
From: Pavel Begunkov <asml.silence@...il.com>
To: io-uring@...r.kernel.org
Cc: Jens Axboe <axboe@...nel.dk>, Andres Freund <andres@...razel.de>,
Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Darren Hart <dvhart@...radead.org>,
Davidlohr Bueso <dave@...olabs.net>,
linux-kernel@...r.kernel.org
Subject: [RFC 1/4] futex: add op wake for a single key
Add a new futex wake function futex_wake_op_single(), which works
similar to futex_wake_op() but only for a single futex address as it
takes too many arguments. Also export it and other functions that will
be used by io_uring.
Signed-off-by: Pavel Begunkov <asml.silence@...il.com>
---
include/linux/futex.h | 15 ++++++++++
kernel/futex.c | 64 +++++++++++++++++++++++++++++++++++++++++--
2 files changed, 77 insertions(+), 2 deletions(-)
diff --git a/include/linux/futex.h b/include/linux/futex.h
index b70df27d7e85..04d500ae5983 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -77,6 +77,10 @@ void futex_exec_release(struct task_struct *tsk);
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
u32 __user *uaddr2, u32 val2, u32 val3);
+int futex_wake_op_single(u32 __user *uaddr, int nr_wake, unsigned int op,
+ bool shared, bool try);
+int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
+ ktime_t *abs_time, u32 bitset);
#else
static inline void futex_init_task(struct task_struct *tsk) { }
static inline void futex_exit_recursive(struct task_struct *tsk) { }
@@ -88,6 +92,17 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val,
{
return -EINVAL;
}
+static inline int futex_wake_op_single(u32 __user *uaddr, int nr_wake,
+ unsigned int op, bool shared, bool try)
+{
+ return -EINVAL;
+}
+static inline int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
+ ktime_t *abs_time, u32 bitset)
+{
+ return -EINVAL;
+}
+
#endif
#endif
diff --git a/kernel/futex.c b/kernel/futex.c
index 4938a00bc785..75dc600062a4 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1681,6 +1681,66 @@ static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
}
}
+int futex_wake_op_single(u32 __user *uaddr, int nr_wake, unsigned int op,
+ bool shared, bool try)
+{
+ union futex_key key;
+ struct futex_hash_bucket *hb;
+ struct futex_q *this, *next;
+ int ret, op_ret;
+ DEFINE_WAKE_Q(wake_q);
+
+retry:
+ ret = get_futex_key(uaddr, shared, &key, FUTEX_WRITE);
+ if (unlikely(ret != 0))
+ return ret;
+ hb = hash_futex(&key);
+retry_private:
+ spin_lock(&hb->lock);
+ op_ret = futex_atomic_op_inuser(op, uaddr);
+ if (unlikely(op_ret < 0)) {
+ spin_unlock(&hb->lock);
+
+ if (!IS_ENABLED(CONFIG_MMU) ||
+ unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
+ /*
+ * we don't get EFAULT from MMU faults if we don't have
+ * an MMU, but we might get them from range checking
+ */
+ ret = op_ret;
+ return ret;
+ }
+ if (try)
+ return -EAGAIN;
+
+ if (op_ret == -EFAULT) {
+ ret = fault_in_user_writeable(uaddr);
+ if (ret)
+ return ret;
+ }
+ cond_resched();
+ if (shared)
+ goto retry;
+ goto retry_private;
+ }
+ if (op_ret) {
+ plist_for_each_entry_safe(this, next, &hb->chain, list) {
+ if (match_futex(&this->key, &key)) {
+ if (this->pi_state || this->rt_waiter) {
+ ret = -EINVAL;
+ break;
+ }
+ mark_wake_futex(&wake_q, this);
+ if (++ret >= nr_wake)
+ break;
+ }
+ }
+ }
+ spin_unlock(&hb->lock);
+ wake_up_q(&wake_q);
+ return ret;
+}
+
/*
* Wake up all waiters hashed on the physical page that is mapped
* to this virtual address:
@@ -2680,8 +2740,8 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
return ret;
}
-static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
- ktime_t *abs_time, u32 bitset)
+int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
+ ktime_t *abs_time, u32 bitset)
{
struct hrtimer_sleeper timeout, *to;
struct restart_block *restart;
--
2.31.1
Powered by blists - more mailing lists