netdev - Re: [PATCH 15/23] io_uring: enable BPF to submit SQEs

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <71AABD2E-3B7F-4869-B909-D84C612FA18D@fb.com>
Date:   Fri, 21 May 2021 00:06:59 +0000
From:   Song Liu <songliubraving@...com>
To:     Pavel Begunkov <asml.silence@...il.com>
CC:     "io-uring@...r.kernel.org" <io-uring@...r.kernel.org>,
        Networking <netdev@...r.kernel.org>,
        "bpf@...r.kernel.org" <bpf@...r.kernel.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        Jens Axboe <axboe@...nel.dk>,
        Alexei Starovoitov <ast@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>,
        Andrii Nakryiko <andrii@...nel.org>, Martin Lau <kafai@...com>,
        Yonghong Song <yhs@...com>,
        John Fastabend <john.fastabend@...il.com>,
        KP Singh <kpsingh@...nel.org>,
        Horst Schirmeier <horst.schirmeier@...dortmund.de>,
        "Franz-B . Tuneke" <franz-bernhard.tuneke@...dortmund.de>,
        Christian Dietrich <stettberger@...ucode.de>
Subject: Re: [PATCH 15/23] io_uring: enable BPF to submit SQEs



> On May 19, 2021, at 7:13 AM, Pavel Begunkov <asml.silence@...il.com> wrote:
> 
> Add a BPF_FUNC_iouring_queue_sqe BPF function as a demonstration of
> submmiting a new request by a BPF request.
> 
> Signed-off-by: Pavel Begunkov <asml.silence@...il.com>
> ---
> fs/io_uring.c            | 51 ++++++++++++++++++++++++++++++++++++----
> include/uapi/linux/bpf.h |  1 +
> 2 files changed, 48 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 20fddc5945f2..aae786291c57 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -882,6 +882,7 @@ struct io_defer_entry {
> };
> 
> struct io_bpf_ctx {
> +	struct io_ring_ctx	*ctx;
> };
> 
> struct io_op_def {
> @@ -6681,7 +6682,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
> 			ret = -EBADF;
> 	}
> 
> -	state->ios_left--;
> +	if (state->ios_left > 1)
> +		state->ios_left--;
> 	return ret;
> }
> 
> @@ -10345,10 +10347,50 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
> 	return ret;
> }
> 
> +BPF_CALL_3(io_bpf_queue_sqe, struct io_bpf_ctx *,		bpf_ctx,
> +			     const struct io_uring_sqe *,	sqe,
> +			     u32,				sqe_len)
> +{
> +	struct io_ring_ctx *ctx = bpf_ctx->ctx;
> +	struct io_kiocb *req;
> +
> +	if (sqe_len != sizeof(struct io_uring_sqe))
> +		return -EINVAL;
> +
> +	req = io_alloc_req(ctx);
> +	if (unlikely(!req))
> +		return -ENOMEM;
> +	if (!percpu_ref_tryget_many(&ctx->refs, 1)) {
> +		kmem_cache_free(req_cachep, req);
> +		return -EAGAIN;
> +	}
> +	percpu_counter_add(&current->io_uring->inflight, 1);
> +	refcount_add(1, &current->usage);
> +
> +	/* returns number of submitted SQEs or an error */
> +	return !io_submit_sqe(ctx, req, sqe);
> +}
> +
> +const struct bpf_func_proto io_bpf_queue_sqe_proto = {
> +	.func = io_bpf_queue_sqe,
> +	.gpl_only = false,
> +	.ret_type = RET_INTEGER,
> +	.arg1_type = ARG_PTR_TO_CTX,
> +	.arg2_type = ARG_PTR_TO_MEM,
> +	.arg3_type = ARG_CONST_SIZE,
> +};
> +
> static const struct bpf_func_proto *
> io_bpf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> {
> -	return bpf_base_func_proto(func_id);
> +	switch (func_id) {
> +	case BPF_FUNC_copy_from_user:
> +		return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
> +	case BPF_FUNC_iouring_queue_sqe:
> +		return prog->aux->sleepable ? &io_bpf_queue_sqe_proto : NULL;
> +	default:
> +		return bpf_base_func_proto(func_id);
> +	}
> }
> 
> static bool io_bpf_is_valid_access(int off, int size,
> @@ -10379,9 +10421,10 @@ static void io_bpf_run(struct io_kiocb *req, unsigned int issue_flags)
> 		     atomic_read(&req->task->io_uring->in_idle)))
> 		goto done;
> 
> -	memset(&bpf_ctx, 0, sizeof(bpf_ctx));
> +	bpf_ctx.ctx = ctx;
> 	prog = req->bpf.prog;
> 
> +	io_submit_state_start(&ctx->submit_state, 1);
> 	if (prog->aux->sleepable) {
> 		rcu_read_lock();
> 		bpf_prog_run_pin_on_cpu(req->bpf.prog, &bpf_ctx);
> @@ -10389,7 +10432,7 @@ static void io_bpf_run(struct io_kiocb *req, unsigned int issue_flags)
> 	} else {
> 		bpf_prog_run_pin_on_cpu(req->bpf.prog, &bpf_ctx);
> 	}
> -
> +	io_submit_state_end(&ctx->submit_state, ctx);
> 	ret = 0;
> done:
> 	__io_req_complete(req, issue_flags, ret, 0);
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index de544f0fbeef..cc268f749a7d 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -4082,6 +4082,7 @@ union bpf_attr {
> 	FN(ima_inode_hash),		\
> 	FN(sock_from_file),		\
> 	FN(check_mtu),			\
> +	FN(iouring_queue_sqe),		\

We need to describe this function in the comment above, just like 20/23 does. 

> 	/* */
> 
> /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> -- 
> 2.31.1
>