[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20210608220839.c3xuapju2efn2k24@kafai-mbp>
Date: Tue, 8 Jun 2021 15:08:39 -0700
From: Martin KaFai Lau <kafai@...com>
To: Tanner Love <tannerlove.kernel@...il.com>
CC: <netdev@...r.kernel.org>, <davem@...emloft.net>,
Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Andrii Nakryiko <andrii@...nel.org>,
Eric Dumazet <edumazet@...gle.com>,
Willem de Bruijn <willemb@...gle.com>,
Petar Penkov <ppenkov@...gle.com>,
Jakub Kicinski <kuba@...nel.org>,
"Michael S . Tsirkin" <mst@...hat.com>,
Jason Wang <jasowang@...hat.com>,
Tanner Love <tannerlove@...gle.com>,
Stanislav Fomichev <sdf@...gle.com>
Subject: Re: [PATCH net-next v4 1/3] net: flow_dissector: extend bpf flow
dissector support with vnet hdr
On Tue, Jun 08, 2021 at 01:02:22PM -0400, Tanner Love wrote:
[ ... ]
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 9dc44ba97584..a333e6177de1 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -430,6 +430,8 @@ enum bpf_reg_type {
> PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */
> PTR_TO_FUNC, /* reg points to a bpf program function */
> PTR_TO_MAP_KEY, /* reg points to a map element key */
> + PTR_TO_VNET_HDR, /* reg points to struct virtio_net_hdr */
> + PTR_TO_VNET_HDR_OR_NULL, /* reg points to virtio_net_hdr or NULL */
> __BPF_REG_TYPE_MAX,
> };
>
[ ... ]
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 418b9b813d65..e1ac34548f9a 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -6017,6 +6017,8 @@ struct bpf_flow_keys {
> };
> __u32 flags;
> __be32 flow_label;
> + __bpf_md_ptr(const struct virtio_net_hdr *, vhdr);
> + __u8 vhdr_is_little_endian;
> };
>
> struct bpf_func_info {
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 331b170d9fcc..2962b537da28 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -22,6 +22,7 @@
> #include <linux/error-injection.h>
> #include <linux/bpf_lsm.h>
> #include <linux/btf_ids.h>
> +#include <linux/virtio_net.h>
>
> #include "disasm.h"
>
> @@ -441,7 +442,8 @@ static bool reg_type_not_null(enum bpf_reg_type type)
> type == PTR_TO_TCP_SOCK ||
> type == PTR_TO_MAP_VALUE ||
> type == PTR_TO_MAP_KEY ||
> - type == PTR_TO_SOCK_COMMON;
> + type == PTR_TO_SOCK_COMMON ||
> + type == PTR_TO_VNET_HDR;
> }
>
> static bool reg_type_may_be_null(enum bpf_reg_type type)
> @@ -453,7 +455,8 @@ static bool reg_type_may_be_null(enum bpf_reg_type type)
> type == PTR_TO_BTF_ID_OR_NULL ||
> type == PTR_TO_MEM_OR_NULL ||
> type == PTR_TO_RDONLY_BUF_OR_NULL ||
> - type == PTR_TO_RDWR_BUF_OR_NULL;
> + type == PTR_TO_RDWR_BUF_OR_NULL ||
> + type == PTR_TO_VNET_HDR_OR_NULL;
> }
>
> static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
> @@ -576,6 +579,8 @@ static const char * const reg_type_str[] = {
> [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
> [PTR_TO_FUNC] = "func",
> [PTR_TO_MAP_KEY] = "map_key",
> + [PTR_TO_VNET_HDR] = "virtio_net_hdr",
> + [PTR_TO_VNET_HDR_OR_NULL] = "virtio_net_hdr_or_null",
> };
>
> static char slot_type_char[] = {
> @@ -1166,6 +1171,9 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
> case PTR_TO_RDWR_BUF_OR_NULL:
> reg->type = PTR_TO_RDWR_BUF;
> break;
> + case PTR_TO_VNET_HDR_OR_NULL:
> + reg->type = PTR_TO_VNET_HDR;
> + break;
> default:
> WARN_ONCE(1, "unknown nullable register type");
> }
> @@ -2528,6 +2536,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
> case PTR_TO_MEM_OR_NULL:
> case PTR_TO_FUNC:
> case PTR_TO_MAP_KEY:
> + case PTR_TO_VNET_HDR:
> + case PTR_TO_VNET_HDR_OR_NULL:
> return true;
> default:
> return false;
> @@ -3384,6 +3394,18 @@ static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
> return 0;
> }
>
> +static int check_virtio_net_hdr_access(struct bpf_verifier_env *env, int off,
> + int size)
> +{
> + if (size < 0 || off < 0 ||
> + (u64)off + size > sizeof(struct virtio_net_hdr)) {
> + verbose(env, "invalid access to virtio_net_hdr off=%d size=%d\n",
> + off, size);
> + return -EACCES;
> + }
> + return 0;
> +}
> +
> static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
> u32 regno, int off, int size,
> enum bpf_access_type t)
> @@ -3568,6 +3590,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
> case PTR_TO_XDP_SOCK:
> pointer_desc = "xdp_sock ";
> break;
> + case PTR_TO_VNET_HDR:
> + pointer_desc = "virtio_net_hdr ";
> + break;
> default:
> break;
> }
> @@ -4218,6 +4243,23 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
> }
>
> err = check_flow_keys_access(env, off, size);
> + if (!err && t == BPF_READ && value_regno >= 0) {
> + if (off == offsetof(struct bpf_flow_keys, vhdr)) {
> + regs[value_regno].type = PTR_TO_VNET_HDR_OR_NULL;
check_flow_keys_access() needs some modifications
1. What if t == BPF_WRITE? I think "keys->vhdr = 0xdead" has to be rejected.
2. It needs to check loading keys->vhdr is in sizeof(__u64) like other
pointer loading does. Take a look at the flow_keys case in
flow_dissector_is_valid_access().
It also needs to convert the pointer loading like how
flow_dissector_convert_ctx_access() does on flow_keys.
A high level design question. "struct virtio_net_hdr" is in uapi and
there is no need to do convert_ctx. I think using PTR_TO_BTF_ID_OR_NULL
will be easier here and the new PTR_TO_VNET_HDR* related changes will go away.
The "else if (reg->type == PTR_TO_CTX)" case earlier could be a good example.
To get the btf_id for "struct virtio_net_hdr", take a look at
the BTF_ID_LIST_SINGLE() usage in filter.c
> + /* required for dropping or_null */
> + regs[value_regno].id = ++env->id_gen;
> + } else {
> + mark_reg_unknown(env, regs, value_regno);
> + }
> + }
> + } else if (reg->type == PTR_TO_VNET_HDR) {
> + if (t == BPF_WRITE) {
> + verbose(env, "R%d cannot write into %s\n",
> + regno, reg_type_str[reg->type]);
> + return -EACCES;
> + }
> +
> + err = check_virtio_net_hdr_access(env, off, size);
> if (!err && t == BPF_READ && value_regno >= 0)
> mark_reg_unknown(env, regs, value_regno);
> } else if (type_is_sk_pointer(reg->type)) {
[ ... ]
> @@ -8390,6 +8392,30 @@ static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
> si->dst_reg, si->src_reg,
> offsetof(struct bpf_flow_dissector, flow_keys));
> break;
> +
> + case offsetof(struct __sk_buff, len):
> + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, skb),
> + si->dst_reg, si->src_reg,
> + offsetof(struct bpf_flow_dissector, skb));
> + *insn++ = BPF_JMP_IMM(BPF_JNE, si->dst_reg, 0, 4);
> + /* bpf_flow_dissector->skb == NULL */
> + /* dst_reg = bpf_flow_dissector->data_end */
> + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data_end),
> + si->dst_reg, si->src_reg,
> + offsetof(struct bpf_flow_dissector, data_end));
> + /* TMP = bpf_flow_dissector->data */
> + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data),
> + BPF_REG_TMP, si->src_reg,
I don't think BPF_REG_TMP can be used here. My understanding is that is for
classic bpf. Try BPF_REG_AX instead.
It will be a good idea to cover this case if it has not been done in patch 3.
> + offsetof(struct bpf_flow_dissector, data));
> + /* dst_reg -= bpf_flow_dissector->data */
> + *insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, BPF_REG_TMP);
> + *insn++ = BPF_JMP_A(1);
> + /* bpf_flow_dissector->skb != NULL */
> + /* bpf_flow_dissector->skb->len */
> + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
> + si->dst_reg, si->dst_reg,
> + offsetof(struct sk_buff, len));
> + break;
> }
>
> return insn - insn_buf;
Powered by blists - more mailing lists