[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <79fa9cc2-e0cc-922f-89d3-9ace59abb2e8@gmail.com>
Date: Tue, 24 Sep 2019 20:33:26 +0300
From: Pavel Begunkov <asml.silence@...il.com>
To: Jens Axboe <axboe@...nel.dk>, Peter Zijlstra <peterz@...radead.org>
Cc: Ingo Molnar <mingo@...nel.org>, Ingo Molnar <mingo@...hat.com>,
linux-block@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 0/2] Optimise io_uring completion waiting
On 24/09/2019 16:13, Jens Axboe wrote:
> On 9/24/19 5:23 AM, Pavel Begunkov wrote:
>>> Yep that should do it, and saves 8 bytes of stack as well.
>>>
>>> BTW, did you test my patch, this one or the previous? Just curious if it
>>> worked for you.
>>>
>> Not yet, going to do that tonight
>
> Thanks! For reference, the final version is below. There was still a
> signal mishap in there, now it should all be correct afaict.
>
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 9b84232e5cc4..d2a86164d520 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -2768,6 +2768,38 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit,
> return submit;
> }
>
> +struct io_wait_queue {
> + struct wait_queue_entry wq;
> + struct io_ring_ctx *ctx;
> + unsigned to_wait;
> + unsigned nr_timeouts;
> +};
> +
> +static inline bool io_should_wake(struct io_wait_queue *iowq)
> +{
> + struct io_ring_ctx *ctx = iowq->ctx;
> +
> + /*
> + * Wake up if we have enough events, or if a timeout occured since we
> + * started waiting. For timeouts, we always want to return to userspace,
> + * regardless of event count.
> + */
> + return io_cqring_events(ctx->rings) >= iowq->to_wait ||
> + atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
> +}
> +
> +static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
> + int wake_flags, void *key)
> +{
> + struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue,
> + wq);
> +
> + if (!io_should_wake(iowq))
> + return -1;
It would try to schedule only the first task in the wait list. Is that the
semantic you want?
E.g. for waiters=[32,8] and nr_events == 8, io_wake_function() returns
after @32, and won't wake up the second one.
> +
> + return autoremove_wake_function(curr, mode, wake_flags, key);
> +}
> +
> /*
> * Wait until events become available, if we don't already have some. The
> * application must reap them itself, as they reside on the shared cq ring.
> @@ -2775,8 +2807,16 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit,
> static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
> const sigset_t __user *sig, size_t sigsz)
> {
> + struct io_wait_queue iowq = {
> + .wq = {
> + .private = current,
> + .func = io_wake_function,
> + .entry = LIST_HEAD_INIT(iowq.wq.entry),
> + },
> + .ctx = ctx,
> + .to_wait = min_events,
> + };
> struct io_rings *rings = ctx->rings;
> - unsigned nr_timeouts;
> int ret;
>
> if (io_cqring_events(rings) >= min_events)
> @@ -2795,15 +2835,20 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
> return ret;
> }
>
> - nr_timeouts = atomic_read(&ctx->cq_timeouts);
> - /*
> - * Return if we have enough events, or if a timeout occured since
> - * we started waiting. For timeouts, we always want to return to
> - * userspace.
> - */
> - ret = wait_event_interruptible(ctx->wait,
> - io_cqring_events(rings) >= min_events ||
> - atomic_read(&ctx->cq_timeouts) != nr_timeouts);
> + iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
> + do {
> + prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
> + TASK_INTERRUPTIBLE);
> + if (io_should_wake(&iowq))
> + break;
> + schedule();
> + if (signal_pending(current)) {
> + ret = -ERESTARTSYS;
> + break;
> + }
> + } while (1);
> + finish_wait(&ctx->wait, &iowq.wq);
> +
> restore_saved_sigmask_unless(ret == -ERESTARTSYS);
> if (ret == -ERESTARTSYS)
> ret = -EINTR;
>
--
Yours sincerely,
Pavel Begunkov
Download attachment "signature.asc" of type "application/pgp-signature" (834 bytes)
Powered by blists - more mailing lists