[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5864844f-db69-d025-1eb3-f856257415be@fb.com>
Date: Tue, 17 Dec 2019 06:14:09 +0000
From: Yonghong Song <yhs@...com>
To: Martin Lau <kafai@...com>,
"bpf@...r.kernel.org" <bpf@...r.kernel.org>
CC: Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
David Miller <davem@...emloft.net>,
Kernel Team <Kernel-team@...com>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>
Subject: Re: [PATCH bpf-next 05/13] bpf: Introduce BPF_PROG_TYPE_STRUCT_OPS
On 12/13/19 4:47 PM, Martin KaFai Lau wrote:
> This patch allows the kernel's struct ops (i.e. func ptr) to be
> implemented in BPF. The first use case in this series is the
> "struct tcp_congestion_ops" which will be introduced in a
> latter patch.
>
> This patch introduces a new prog type BPF_PROG_TYPE_STRUCT_OPS.
> The BPF_PROG_TYPE_STRUCT_OPS prog is verified against a particular
> func ptr of a kernel struct. The attr->attach_btf_id is the btf id
> of a kernel struct. The attr->expected_attach_type is the member
> "index" of that kernel struct. The first member of a struct starts
> with member index 0. That will avoid ambiguity when a kernel struct
> has multiple func ptrs with the same func signature.
>
> For example, a BPF_PROG_TYPE_STRUCT_OPS prog is written
> to implement the "init" func ptr of the "struct tcp_congestion_ops".
> The attr->attach_btf_id is the btf id of the "struct tcp_congestion_ops"
> of the _running_ kernel. The attr->expected_attach_type is 3.
>
> The ctx of BPF_PROG_TYPE_STRUCT_OPS is an array of u64 args saved
> by arch_prepare_bpf_trampoline that will be done in the next
> patch when introducing BPF_MAP_TYPE_STRUCT_OPS.
>
> "struct bpf_struct_ops" is introduced as a common interface for the kernel
> struct that supports BPF_PROG_TYPE_STRUCT_OPS prog. The supporting kernel
> struct will need to implement an instance of the "struct bpf_struct_ops".
>
> The supporting kernel struct also needs to implement a bpf_verifier_ops.
> During BPF_PROG_LOAD, bpf_struct_ops_find() will find the right
> bpf_verifier_ops by searching the attr->attach_btf_id.
>
> A new "btf_struct_access" is also added to the bpf_verifier_ops such
> that the supporting kernel struct can optionally provide its own specific
> check on accessing the func arg (e.g. provide limited write access).
>
> After btf_vmlinux is parsed, the new bpf_struct_ops_init() is called
> to initialize some values (e.g. the btf id of the supporting kernel
> struct) and it can only be done once the btf_vmlinux is available.
>
> The R0 checks at BPF_EXIT is excluded for the BPF_PROG_TYPE_STRUCT_OPS prog
> if the return type of the prog->aux->attach_func_proto is "void".
>
> Signed-off-by: Martin KaFai Lau <kafai@...com>
> ---
> include/linux/bpf.h | 30 +++++++
> include/linux/bpf_types.h | 4 +
> include/linux/btf.h | 34 ++++++++
> include/uapi/linux/bpf.h | 1 +
> kernel/bpf/Makefile | 2 +-
> kernel/bpf/bpf_struct_ops.c | 124 +++++++++++++++++++++++++++
> kernel/bpf/bpf_struct_ops_types.h | 4 +
> kernel/bpf/btf.c | 88 ++++++++++++++------
> kernel/bpf/syscall.c | 17 ++--
> kernel/bpf/verifier.c | 134 +++++++++++++++++++++++-------
> 10 files changed, 374 insertions(+), 64 deletions(-)
> create mode 100644 kernel/bpf/bpf_struct_ops.c
> create mode 100644 kernel/bpf/bpf_struct_ops_types.h
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index d467983e61bb..1f0a5fc8c5ee 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -349,6 +349,10 @@ struct bpf_verifier_ops {
> const struct bpf_insn *src,
> struct bpf_insn *dst,
> struct bpf_prog *prog, u32 *target_size);
> + int (*btf_struct_access)(struct bpf_verifier_log *log,
> + const struct btf_type *t, int off, int size,
> + enum bpf_access_type atype,
> + u32 *next_btf_id);
> };
>
> struct bpf_prog_offload_ops {
> @@ -667,6 +671,32 @@ struct bpf_array_aux {
> struct work_struct work;
> };
>
> +struct btf_type;
> +struct btf_member;
> +
> +#define BPF_STRUCT_OPS_MAX_NR_MEMBERS 64
> +struct bpf_struct_ops {
> + const struct bpf_verifier_ops *verifier_ops;
> + int (*init)(struct btf *_btf_vmlinux);
> + int (*check_member)(const struct btf_type *t,
> + const struct btf_member *member);
> + const struct btf_type *type;
> + const char *name;
> + struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS];
> + u32 type_id;
> +};
> +
> +#if defined(CONFIG_BPF_JIT)
> +const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id);
> +void bpf_struct_ops_init(struct btf *_btf_vmlinux);
> +#else
> +static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id)
> +{
> + return NULL;
> +}
> +static inline void bpf_struct_ops_init(struct btf *_btf_vmlinux) { }
> +#endif
> +
> struct bpf_array {
> struct bpf_map map;
> u32 elem_size;
[...]
> +const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = {
> +};
> +
> +const struct bpf_prog_ops bpf_struct_ops_prog_ops = {
> +};
> +
> +void bpf_struct_ops_init(struct btf *_btf_vmlinux)
> +{
> + const struct btf_member *member;
> + struct bpf_struct_ops *st_ops;
> + struct bpf_verifier_log log = {};
> + const struct btf_type *t;
> + const char *mname;
> + s32 type_id;
> + u32 i, j;
> +
> + for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) {
> + st_ops = bpf_struct_ops[i];
> +
> + type_id = btf_find_by_name_kind(_btf_vmlinux, st_ops->name,
> + BTF_KIND_STRUCT);
> + if (type_id < 0) {
> + pr_warn("Cannot find struct %s in btf_vmlinux\n",
> + st_ops->name);
> + continue;
> + }
> + t = btf_type_by_id(_btf_vmlinux, type_id);
> + if (btf_type_vlen(t) > BPF_STRUCT_OPS_MAX_NR_MEMBERS) {
> + pr_warn("Cannot support #%u members in struct %s\n",
> + btf_type_vlen(t), st_ops->name);
> + continue;
> + }
> +
> + for_each_member(j, t, member) {
> + const struct btf_type *func_proto;
> +
> + mname = btf_name_by_offset(_btf_vmlinux,
> + member->name_off);
> + if (!*mname) {
> + pr_warn("anon member in struct %s is not supported\n",
> + st_ops->name);
> + break;
> + }
> +
> + if (btf_member_bitfield_size(t, member)) {
> + pr_warn("bit field member %s in struct %s is not supported\n",
> + mname, st_ops->name);
> + break;
> + }
> +
> + func_proto = btf_type_resolve_func_ptr(_btf_vmlinux,
> + member->type,
> + NULL);
> + if (func_proto &&
> + btf_distill_func_proto(&log, _btf_vmlinux,
> + func_proto, mname,
> + &st_ops->func_models[j])) {
> + pr_warn("Error in parsing func ptr %s in struct %s\n",
> + mname, st_ops->name);
> + break;
> + }
> + }
> +
> + if (j == btf_type_vlen(t)) {
> + if (st_ops->init(_btf_vmlinux)) {
is it possible that st_ops->init might be a NULL pointer?
> + pr_warn("Error in init bpf_struct_ops %s\n",
> + st_ops->name);
> + } else {
> + st_ops->type_id = type_id;
> + st_ops->type = t;
> + }
> + }
> + }
> +}
> +
> +extern struct btf *btf_vmlinux;
> +
[...]
> index 408264c1d55b..4c1eaa1a2965 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -2858,11 +2858,6 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
> u32 btf_id;
> int ret;
>
> - if (atype != BPF_READ) {
> - verbose(env, "only read is supported\n");
> - return -EACCES;
> - }
> -
> if (off < 0) {
> verbose(env,
> "R%d is ptr_%s invalid negative access: off=%d\n",
> @@ -2879,17 +2874,32 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
> return -EACCES;
> }
>
> - ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
> + if (env->ops->btf_struct_access) {
> + ret = env->ops->btf_struct_access(&env->log, t, off, size,
> + atype, &btf_id);
> + } else {
> + if (atype != BPF_READ) {
> + verbose(env, "only read is supported\n");
> + return -EACCES;
> + }
> +
> + ret = btf_struct_access(&env->log, t, off, size, atype,
> + &btf_id);
> + }
> +
> if (ret < 0)
> return ret;
>
> - if (ret == SCALAR_VALUE) {
> - mark_reg_unknown(env, regs, value_regno);
> - return 0;
> + if (atype == BPF_READ) {
> + if (ret == SCALAR_VALUE) {
> + mark_reg_unknown(env, regs, value_regno);
> + return 0;
> + }
> + mark_reg_known_zero(env, regs, value_regno);
> + regs[value_regno].type = PTR_TO_BTF_ID;
> + regs[value_regno].btf_id = btf_id;
> }
> - mark_reg_known_zero(env, regs, value_regno);
> - regs[value_regno].type = PTR_TO_BTF_ID;
> - regs[value_regno].btf_id = btf_id;
> +
> return 0;
> }
>
> @@ -6343,8 +6353,30 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
> static int check_return_code(struct bpf_verifier_env *env)
> {
> struct tnum enforce_attach_type_range = tnum_unknown;
> + const struct bpf_prog *prog = env->prog;
> struct bpf_reg_state *reg;
> struct tnum range = tnum_range(0, 1);
> + int err;
> +
> + /* The struct_ops func-ptr's return type could be "void" */
> + if (env->prog->type == BPF_PROG_TYPE_STRUCT_OPS &&
> + !prog->aux->attach_func_proto->type)
> + return 0;
> +
> + /* eBPF calling convetion is such that R0 is used
> + * to return the value from eBPF program.
> + * Make sure that it's readable at this time
> + * of bpf_exit, which means that program wrote
> + * something into it earlier
> + */
> + err = check_reg_arg(env, BPF_REG_0, SRC_OP);
> + if (err)
> + return err;
> +
> + if (is_pointer_value(env, BPF_REG_0)) {
> + verbose(env, "R0 leaks addr as return value\n");
> + return -EACCES;
> + }
>
> switch (env->prog->type) {
> case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
> @@ -8010,21 +8042,6 @@ static int do_check(struct bpf_verifier_env *env)
> if (err)
> return err;
>
> - /* eBPF calling convetion is such that R0 is used
> - * to return the value from eBPF program.
> - * Make sure that it's readable at this time
> - * of bpf_exit, which means that program wrote
> - * something into it earlier
> - */
> - err = check_reg_arg(env, BPF_REG_0, SRC_OP);
> - if (err)
> - return err;
> -
> - if (is_pointer_value(env, BPF_REG_0)) {
> - verbose(env, "R0 leaks addr as return value\n");
> - return -EACCES;
> - }
> -
> err = check_return_code(env);
> if (err)
> return err;
> @@ -8833,12 +8850,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
> convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
> break;
> case PTR_TO_BTF_ID:
> - if (type == BPF_WRITE) {
> + if (type == BPF_READ) {
> + insn->code = BPF_LDX | BPF_PROBE_MEM |
> + BPF_SIZE((insn)->code);
> + env->prog->aux->num_exentries++;
> + } else if (env->prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
> verbose(env, "Writes through BTF pointers are not allowed\n");
> return -EINVAL;
> }
> - insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code);
> - env->prog->aux->num_exentries++;
Do we need to increase num_exentries for BPF_WRITE as well?
> continue;
> default:
> continue;
> @@ -9505,6 +9524,58 @@ static void print_verification_stats(struct bpf_verifier_env *env)
> env->peak_states, env->longest_mark_read_walk);
> }
>
[...]
Powered by blists - more mailing lists