[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6facf552-924f-2af1-03e5-99957a90bfd0@gmail.com>
Date: Sat, 28 Dec 2019 14:15:54 +0300
From: Pavel Begunkov <asml.silence@...il.com>
To: Jens Axboe <axboe@...nel.dk>, io-uring@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v4 2/2] io_uring: batch getting pcpu references
On 28/12/2019 14:13, Pavel Begunkov wrote:
> percpu_ref_tryget() has its own overhead. Instead getting a reference
> for each request, grab a bunch once per io_submit_sqes().
>
> ~5% throughput boost for a "submit and wait 128 nops" benchmark.
>
> Signed-off-by: Pavel Begunkov <asml.silence@...il.com>
> ---
> fs/io_uring.c | 26 +++++++++++++++++---------
> 1 file changed, 17 insertions(+), 9 deletions(-)
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 7fc1158bf9a4..404946080e86 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -1080,9 +1080,6 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
> gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
> struct io_kiocb *req;
>
> - if (!percpu_ref_tryget(&ctx->refs))
> - return NULL;
> -
> if (!state) {
> req = kmem_cache_alloc(req_cachep, gfp);
> if (unlikely(!req))
> @@ -1141,6 +1138,14 @@ static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr)
> }
> }
>
> +static void __io_req_free_empty(struct io_kiocb *req)
If anybody have better naming (or a better approach at all), I'm all ears.
> +{
> + if (likely(!io_is_fallback_req(req)))
> + kmem_cache_free(req_cachep, req);
> + else
> + clear_bit_unlock(0, (unsigned long *) req->ctx->fallback_req);
> +}
> +
> static void __io_free_req(struct io_kiocb *req)
> {
> struct io_ring_ctx *ctx = req->ctx;
> @@ -1162,11 +1167,9 @@ static void __io_free_req(struct io_kiocb *req)
> wake_up(&ctx->inflight_wait);
> spin_unlock_irqrestore(&ctx->inflight_lock, flags);
> }
> - percpu_ref_put(&ctx->refs);
> - if (likely(!io_is_fallback_req(req)))
> - kmem_cache_free(req_cachep, req);
> - else
> - clear_bit_unlock(0, (unsigned long *) ctx->fallback_req);
> +
> + percpu_ref_put(&req->ctx->refs);
> + __io_req_free_empty(req);
> }
>
> static bool io_link_cancel_timeout(struct io_kiocb *req)
> @@ -4551,6 +4554,9 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
> return -EBUSY;
> }
>
> + if (!percpu_ref_tryget_many(&ctx->refs, nr))
> + return -EAGAIN;
> +
> if (nr > IO_PLUG_THRESHOLD) {
> io_submit_state_start(&state, nr);
> statep = &state;
> @@ -4567,7 +4573,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
> break;
> }
> if (!io_get_sqring(ctx, req, &sqe)) {
> - __io_free_req(req);
> + __io_req_free_empty(req);
> break;
> }
>
> @@ -4598,6 +4604,8 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
> break;
> }
>
> + if (submitted != nr)
> + percpu_ref_put_many(&ctx->refs, nr - submitted);
> if (link)
> io_queue_link_head(link);
> if (statep)
>
--
Pavel Begunkov
Powered by blists - more mailing lists