[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180912034632.bkxpktzvze242rvr@ast-mbp>
Date: Tue, 11 Sep 2018 20:46:34 -0700
From: Alexei Starovoitov <alexei.starovoitov@...il.com>
To: Petar Penkov <peterpenkov96@...il.com>
Cc: netdev@...r.kernel.org, davem@...emloft.net, ast@...nel.org,
daniel@...earbox.net, simon.horman@...ronome.com,
ecree@...arflare.com, songliubraving@...com, tom@...bertland.com,
Petar Penkov <ppenkov@...gle.com>,
Willem de Bruijn <willemb@...gle.com>
Subject: Re: [bpf-next, v2 1/3] flow_dissector: implements flow dissector BPF
hook
On Fri, Sep 07, 2018 at 05:11:08PM -0700, Petar Penkov wrote:
> From: Petar Penkov <ppenkov@...gle.com>
>
> Adds a hook for programs of type BPF_PROG_TYPE_FLOW_DISSECTOR and
> attach type BPF_FLOW_DISSECTOR that is executed in the flow dissector
> path. The BPF program is per-network namespace.
>
> Signed-off-by: Petar Penkov <ppenkov@...gle.com>
> Signed-off-by: Willem de Bruijn <willemb@...gle.com>
> ---
> include/linux/bpf.h | 1 +
> include/linux/bpf_types.h | 1 +
> include/linux/skbuff.h | 7 ++
> include/net/net_namespace.h | 3 +
> include/net/sch_generic.h | 12 ++-
> include/uapi/linux/bpf.h | 25 ++++++
> kernel/bpf/syscall.c | 8 ++
> kernel/bpf/verifier.c | 32 ++++++++
> net/core/filter.c | 67 ++++++++++++++++
> net/core/flow_dissector.c | 136 +++++++++++++++++++++++++++++++++
> tools/bpf/bpftool/prog.c | 1 +
> tools/include/uapi/linux/bpf.h | 25 ++++++
> tools/lib/bpf/libbpf.c | 2 +
please split up update to tools/include/uapi/linux/bpf.h as a separate patch 2.
We often have conflicts in there, so best to have a separate.
Also please split tools/lib and tools/bpf chnages into patch 3.
> 13 files changed, 317 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 523481a3471b..988a00797bcd 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -212,6 +212,7 @@ enum bpf_reg_type {
> PTR_TO_PACKET_META, /* skb->data - meta_len */
> PTR_TO_PACKET, /* reg points to skb->data */
> PTR_TO_PACKET_END, /* skb->data + headlen */
> + PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */
> };
>
> /* The information passed from prog-specific *_is_valid_access
> diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
> index cd26c090e7c0..22083712dd18 100644
> --- a/include/linux/bpf_types.h
> +++ b/include/linux/bpf_types.h
> @@ -32,6 +32,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
> #ifdef CONFIG_INET
> BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
> #endif
> +BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector)
>
> BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
> BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 17a13e4785fc..ce0e863f02a2 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -243,6 +243,8 @@ struct scatterlist;
> struct pipe_inode_info;
> struct iov_iter;
> struct napi_struct;
> +struct bpf_prog;
> +union bpf_attr;
>
> #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
> struct nf_conntrack {
> @@ -1192,6 +1194,11 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
> const struct flow_dissector_key *key,
> unsigned int key_count);
>
> +int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
> + struct bpf_prog *prog);
> +
> +int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr);
> +
> bool __skb_flow_dissect(const struct sk_buff *skb,
> struct flow_dissector *flow_dissector,
> void *target_container,
> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> index 9b5fdc50519a..99d4148e0f90 100644
> --- a/include/net/net_namespace.h
> +++ b/include/net/net_namespace.h
> @@ -43,6 +43,7 @@ struct ctl_table_header;
> struct net_generic;
> struct uevent_sock;
> struct netns_ipvs;
> +struct bpf_prog;
>
>
> #define NETDEV_HASHBITS 8
> @@ -145,6 +146,8 @@ struct net {
> #endif
> struct net_generic __rcu *gen;
>
> + struct bpf_prog __rcu *flow_dissector_prog;
> +
> /* Note : following structs are cache line aligned */
> #ifdef CONFIG_XFRM
> struct netns_xfrm xfrm;
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index a6d00093f35e..1b81ba85fd2d 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -19,6 +19,7 @@ struct Qdisc_ops;
> struct qdisc_walker;
> struct tcf_walker;
> struct module;
> +struct bpf_flow_keys;
>
> typedef int tc_setup_cb_t(enum tc_setup_type type,
> void *type_data, void *cb_priv);
> @@ -307,9 +308,14 @@ struct tcf_proto {
> };
>
> struct qdisc_skb_cb {
> - unsigned int pkt_len;
> - u16 slave_dev_queue_mapping;
> - u16 tc_classid;
> + union {
> + struct {
> + unsigned int pkt_len;
> + u16 slave_dev_queue_mapping;
> + u16 tc_classid;
> + };
> + struct bpf_flow_keys *flow_keys;
> + };
is this magic really necessary? flow_dissector runs very early in recv path.
There is no qdisc or conflicts with tcp/ip use of cb.
I think the whole cb block can be used.
> #define QDISC_CB_PRIV_LEN 20
> unsigned char data[QDISC_CB_PRIV_LEN];
> };
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 66917a4eba27..3064706fcaaa 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -152,6 +152,7 @@ enum bpf_prog_type {
> BPF_PROG_TYPE_LWT_SEG6LOCAL,
> BPF_PROG_TYPE_LIRC_MODE2,
> BPF_PROG_TYPE_SK_REUSEPORT,
> + BPF_PROG_TYPE_FLOW_DISSECTOR,
> };
>
> enum bpf_attach_type {
> @@ -172,6 +173,7 @@ enum bpf_attach_type {
> BPF_CGROUP_UDP4_SENDMSG,
> BPF_CGROUP_UDP6_SENDMSG,
> BPF_LIRC_MODE2,
> + BPF_FLOW_DISSECTOR,
> __MAX_BPF_ATTACH_TYPE
> };
>
> @@ -2333,6 +2335,7 @@ struct __sk_buff {
> /* ... here. */
>
> __u32 data_meta;
> + __u32 flow_keys;
please use
struct bpf_flow_keys *flow_keys;
instead.
See what we did in 'struct sk_msg_md' and in 'struct sk_reuseport_md'.
There is no need to hide pointers in u32.
> };
>
> struct bpf_tunnel_key {
> @@ -2778,4 +2781,26 @@ enum bpf_task_fd_type {
> BPF_FD_TYPE_URETPROBE, /* filename + offset */
> };
>
> +struct bpf_flow_keys {
> + __u16 thoff;
> + __u16 addr_proto; /* ETH_P_* of valid addrs */
> + __u8 is_frag;
> + __u8 is_first_frag;
> + __u8 is_encap;
> + __be16 n_proto;
> + __u8 ip_proto;
> + union {
> + struct {
> + __be32 ipv4_src;
> + __be32 ipv4_dst;
> + };
> + struct {
> + __u32 ipv6_src[4]; /* in6_addr; network order */
> + __u32 ipv6_dst[4]; /* in6_addr; network order */
> + };
> + };
> + __be16 sport;
> + __be16 dport;
> +};
> +
> #endif /* _UAPI__LINUX_BPF_H__ */
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 3c9636f03bb2..b3c2d09bcf7a 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -1615,6 +1615,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
> case BPF_LIRC_MODE2:
> ptype = BPF_PROG_TYPE_LIRC_MODE2;
> break;
> + case BPF_FLOW_DISSECTOR:
> + ptype = BPF_PROG_TYPE_FLOW_DISSECTOR;
> + break;
> default:
> return -EINVAL;
> }
> @@ -1636,6 +1639,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
> case BPF_PROG_TYPE_LIRC_MODE2:
> ret = lirc_prog_attach(attr, prog);
> break;
> + case BPF_PROG_TYPE_FLOW_DISSECTOR:
> + ret = skb_flow_dissector_bpf_prog_attach(attr, prog);
> + break;
> default:
> ret = cgroup_bpf_prog_attach(attr, ptype, prog);
> }
> @@ -1688,6 +1694,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
> return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL);
> case BPF_LIRC_MODE2:
> return lirc_prog_detach(attr);
> + case BPF_FLOW_DISSECTOR:
> + return skb_flow_dissector_bpf_prog_detach(attr);
> default:
> return -EINVAL;
> }
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 6ff1bac1795d..8ccbff4fff93 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -261,6 +261,7 @@ static const char * const reg_type_str[] = {
> [PTR_TO_PACKET] = "pkt",
> [PTR_TO_PACKET_META] = "pkt_meta",
> [PTR_TO_PACKET_END] = "pkt_end",
> + [PTR_TO_FLOW_KEYS] = "flow_keys",
> };
>
> static char slot_type_char[] = {
> @@ -965,6 +966,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
> case PTR_TO_PACKET:
> case PTR_TO_PACKET_META:
> case PTR_TO_PACKET_END:
> + case PTR_TO_FLOW_KEYS:
> case CONST_PTR_TO_MAP:
> return true;
> default:
> @@ -1238,6 +1240,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
> case BPF_PROG_TYPE_LWT_XMIT:
> case BPF_PROG_TYPE_SK_SKB:
> case BPF_PROG_TYPE_SK_MSG:
> + case BPF_PROG_TYPE_FLOW_DISSECTOR:
> if (meta)
> return meta->pkt_access;
>
> @@ -1321,6 +1324,18 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
> return -EACCES;
> }
>
> +static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
> + int size)
> +{
> + if (size < 0 || off < 0 ||
> + (u64)off + size > sizeof(struct bpf_flow_keys)) {
> + verbose(env, "invalid access to flow keys off=%d size=%d\n",
> + off, size);
> + return -EACCES;
> + }
> + return 0;
> +}
> +
> static bool __is_pointer_value(bool allow_ptr_leaks,
> const struct bpf_reg_state *reg)
> {
> @@ -1422,6 +1437,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
> * right in front, treat it the very same way.
> */
> return check_pkt_ptr_alignment(env, reg, off, size, strict);
> + case PTR_TO_FLOW_KEYS:
> + pointer_desc = "flow keys ";
> + break;
> case PTR_TO_MAP_VALUE:
> pointer_desc = "value ";
> break;
> @@ -1692,6 +1710,17 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
> err = check_packet_access(env, regno, off, size, false);
> if (!err && t == BPF_READ && value_regno >= 0)
> mark_reg_unknown(env, regs, value_regno);
> + } else if (reg->type == PTR_TO_FLOW_KEYS) {
> + if (t == BPF_WRITE && value_regno >= 0 &&
> + is_pointer_value(env, value_regno)) {
> + verbose(env, "R%d leaks addr into flow keys\n",
> + value_regno);
> + return -EACCES;
> + }
> +
> + err = check_flow_keys_access(env, off, size);
> + if (!err && t == BPF_READ && value_regno >= 0)
> + mark_reg_unknown(env, regs, value_regno);
> } else {
> verbose(env, "R%d invalid mem access '%s'\n", regno,
> reg_type_str[reg->type]);
> @@ -1839,6 +1868,8 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
> case PTR_TO_PACKET_META:
> return check_packet_access(env, regno, reg->off, access_size,
> zero_size_allowed);
> + case PTR_TO_FLOW_KEYS:
> + return check_flow_keys_access(env, reg->off, access_size);
> case PTR_TO_MAP_VALUE:
> return check_map_access(env, regno, reg->off, access_size,
> zero_size_allowed);
> @@ -4366,6 +4397,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
> case PTR_TO_CTX:
> case CONST_PTR_TO_MAP:
> case PTR_TO_PACKET_END:
> + case PTR_TO_FLOW_KEYS:
> /* Only valid matches are exact, which memcmp() above
> * would have accepted
> */
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 8cb242b4400f..bc3725c26794 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -5122,6 +5122,17 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> }
> }
>
> +static const struct bpf_func_proto *
> +flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> +{
> + switch (func_id) {
> + case BPF_FUNC_skb_load_bytes:
> + return &bpf_skb_load_bytes_proto;
> + default:
> + return bpf_base_func_proto(func_id);
> + }
> +}
> +
> static const struct bpf_func_proto *
> lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> {
> @@ -5237,6 +5248,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
> case bpf_ctx_range(struct __sk_buff, data):
> case bpf_ctx_range(struct __sk_buff, data_meta):
> case bpf_ctx_range(struct __sk_buff, data_end):
> + case bpf_ctx_range(struct __sk_buff, flow_keys):
> if (size != size_default)
> return false;
> break;
> @@ -5265,6 +5277,7 @@ static bool sk_filter_is_valid_access(int off, int size,
> case bpf_ctx_range(struct __sk_buff, data):
> case bpf_ctx_range(struct __sk_buff, data_meta):
> case bpf_ctx_range(struct __sk_buff, data_end):
> + case bpf_ctx_range(struct __sk_buff, flow_keys):
> case bpf_ctx_range_till(struct __sk_buff, family, local_port):
> return false;
> }
> @@ -5290,6 +5303,7 @@ static bool lwt_is_valid_access(int off, int size,
> case bpf_ctx_range(struct __sk_buff, tc_classid):
> case bpf_ctx_range_till(struct __sk_buff, family, local_port):
> case bpf_ctx_range(struct __sk_buff, data_meta):
> + case bpf_ctx_range(struct __sk_buff, flow_keys):
> return false;
> }
>
> @@ -5500,6 +5514,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
> case bpf_ctx_range(struct __sk_buff, data_end):
> info->reg_type = PTR_TO_PACKET_END;
> break;
> + case bpf_ctx_range(struct __sk_buff, flow_keys):
> case bpf_ctx_range_till(struct __sk_buff, family, local_port):
> return false;
> }
> @@ -5701,6 +5716,7 @@ static bool sk_skb_is_valid_access(int off, int size,
> switch (off) {
> case bpf_ctx_range(struct __sk_buff, tc_classid):
> case bpf_ctx_range(struct __sk_buff, data_meta):
> + case bpf_ctx_range(struct __sk_buff, flow_keys):
> return false;
> }
>
> @@ -5760,6 +5776,39 @@ static bool sk_msg_is_valid_access(int off, int size,
> return true;
> }
>
> +static bool flow_dissector_is_valid_access(int off, int size,
> + enum bpf_access_type type,
> + const struct bpf_prog *prog,
> + struct bpf_insn_access_aux *info)
> +{
> + if (type == BPF_WRITE) {
> + switch (off) {
> + case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
> + break;
> + default:
> + return false;
> + }
> + }
> +
> + switch (off) {
> + case bpf_ctx_range(struct __sk_buff, data):
> + info->reg_type = PTR_TO_PACKET;
> + break;
> + case bpf_ctx_range(struct __sk_buff, data_end):
> + info->reg_type = PTR_TO_PACKET_END;
> + break;
> + case bpf_ctx_range(struct __sk_buff, flow_keys):
> + info->reg_type = PTR_TO_FLOW_KEYS;
> + break;
> + case bpf_ctx_range(struct __sk_buff, tc_classid):
> + case bpf_ctx_range(struct __sk_buff, data_meta):
> + case bpf_ctx_range_till(struct __sk_buff, family, local_port):
> + return false;
> + }
> +
> + return bpf_skb_is_valid_access(off, size, type, prog, info);
> +}
> +
> static u32 bpf_convert_ctx_access(enum bpf_access_type type,
> const struct bpf_insn *si,
> struct bpf_insn *insn_buf,
> @@ -6054,6 +6103,15 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
> bpf_target_off(struct sock_common,
> skc_num, 2, target_size));
> break;
> +
> + case offsetof(struct __sk_buff, flow_keys):
> + off = si->off;
> + off -= offsetof(struct __sk_buff, flow_keys);
> + off += offsetof(struct sk_buff, cb);
> + off += offsetof(struct qdisc_skb_cb, flow_keys);
> + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
> + si->src_reg, off);
> + break;
> }
>
> return insn - insn_buf;
> @@ -7017,6 +7075,15 @@ const struct bpf_verifier_ops sk_msg_verifier_ops = {
> const struct bpf_prog_ops sk_msg_prog_ops = {
> };
>
> +const struct bpf_verifier_ops flow_dissector_verifier_ops = {
> + .get_func_proto = flow_dissector_func_proto,
> + .is_valid_access = flow_dissector_is_valid_access,
> + .convert_ctx_access = bpf_convert_ctx_access,
> +};
> +
> +const struct bpf_prog_ops flow_dissector_prog_ops = {
> +};
> +
> int sk_detach_filter(struct sock *sk)
> {
> int ret = -ENOENT;
> diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
> index ce9eeeb7c024..7eed48c46a94 100644
> --- a/net/core/flow_dissector.c
> +++ b/net/core/flow_dissector.c
> @@ -25,6 +25,9 @@
> #include <net/flow_dissector.h>
> #include <scsi/fc/fc_fcoe.h>
> #include <uapi/linux/batadv_packet.h>
> +#include <linux/bpf.h>
> +
> +static DEFINE_MUTEX(flow_dissector_mutex);
>
> static void dissector_set_key(struct flow_dissector *flow_dissector,
> enum flow_dissector_key_id key_id)
> @@ -62,6 +65,44 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
> }
> EXPORT_SYMBOL(skb_flow_dissector_init);
>
> +int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
> + struct bpf_prog *prog)
> +{
> + struct bpf_prog *attached;
> + struct net *net;
> +
> + net = current->nsproxy->net_ns;
> + mutex_lock(&flow_dissector_mutex);
> + attached = rcu_dereference_protected(net->flow_dissector_prog,
> + lockdep_is_held(&flow_dissector_mutex));
> + if (attached) {
> + /* Only one BPF program can be attached at a time */
> + mutex_unlock(&flow_dissector_mutex);
> + return -EEXIST;
> + }
> + rcu_assign_pointer(net->flow_dissector_prog, prog);
> + mutex_unlock(&flow_dissector_mutex);
> + return 0;
> +}
> +
> +int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
> +{
> + struct bpf_prog *attached;
> + struct net *net;
> +
> + net = current->nsproxy->net_ns;
> + mutex_lock(&flow_dissector_mutex);
> + attached = rcu_dereference_protected(net->flow_dissector_prog,
> + lockdep_is_held(&flow_dissector_mutex));
> + if (!attached) {
> + mutex_unlock(&flow_dissector_mutex);
> + return -ENOENT;
> + }
> + bpf_prog_put(attached);
> + RCU_INIT_POINTER(net->flow_dissector_prog, NULL);
> + mutex_unlock(&flow_dissector_mutex);
> + return 0;
> +}
> /**
> * skb_flow_get_be16 - extract be16 entity
> * @skb: sk_buff to extract from
> @@ -588,6 +629,60 @@ static bool skb_flow_dissect_allowed(int *num_hdrs)
> return (*num_hdrs <= MAX_FLOW_DISSECT_HDRS);
> }
>
> +static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
> + struct flow_dissector *flow_dissector,
> + void *target_container)
> +{
> + struct flow_dissector_key_control *key_control;
> + struct flow_dissector_key_basic *key_basic;
> + struct flow_dissector_key_addrs *key_addrs;
> + struct flow_dissector_key_ports *key_ports;
> +
> + key_control = skb_flow_dissector_target(flow_dissector,
> + FLOW_DISSECTOR_KEY_CONTROL,
> + target_container);
> + key_control->thoff = flow_keys->thoff;
> + if (flow_keys->is_frag)
> + key_control->flags |= FLOW_DIS_IS_FRAGMENT;
> + if (flow_keys->is_first_frag)
> + key_control->flags |= FLOW_DIS_FIRST_FRAG;
> + if (flow_keys->is_encap)
> + key_control->flags |= FLOW_DIS_ENCAPSULATION;
> +
> + key_basic = skb_flow_dissector_target(flow_dissector,
> + FLOW_DISSECTOR_KEY_BASIC,
> + target_container);
> + key_basic->n_proto = flow_keys->n_proto;
> + key_basic->ip_proto = flow_keys->ip_proto;
> +
> + if (flow_keys->addr_proto == ETH_P_IP &&
> + dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
> + key_addrs = skb_flow_dissector_target(flow_dissector,
> + FLOW_DISSECTOR_KEY_IPV4_ADDRS,
> + target_container);
> + key_addrs->v4addrs.src = flow_keys->ipv4_src;
> + key_addrs->v4addrs.dst = flow_keys->ipv4_dst;
> + key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
> + } else if (flow_keys->addr_proto == ETH_P_IPV6 &&
> + dissector_uses_key(flow_dissector,
> + FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
> + key_addrs = skb_flow_dissector_target(flow_dissector,
> + FLOW_DISSECTOR_KEY_IPV6_ADDRS,
> + target_container);
> + memcpy(&key_addrs->v6addrs, &flow_keys->ipv6_src,
> + sizeof(key_addrs->v6addrs));
> + key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
> + }
> +
> + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) {
> + key_ports = skb_flow_dissector_target(flow_dissector,
> + FLOW_DISSECTOR_KEY_PORTS,
> + target_container);
> + key_ports->src = flow_keys->sport;
> + key_ports->dst = flow_keys->dport;
> + }
> +}
> +
> /**
> * __skb_flow_dissect - extract the flow_keys struct and return it
> * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
> @@ -619,6 +714,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
> struct flow_dissector_key_vlan *key_vlan;
> enum flow_dissect_ret fdret;
> enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
> + struct bpf_prog *attached;
> int num_hdrs = 0;
> u8 ip_proto = 0;
> bool ret;
> @@ -658,6 +754,46 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
> FLOW_DISSECTOR_KEY_BASIC,
> target_container);
>
> + rcu_read_lock();
> + attached = skb ? rcu_dereference(dev_net(skb->dev)->flow_dissector_prog)
> + : NULL;
> + if (attached) {
> + /* Note that even though the const qualifier is discarded
> + * throughout the execution of the BPF program, all changes(the
> + * control block) are reverted after the BPF program returns.
> + * Therefore, __skb_flow_dissect does not alter the skb.
> + */
> + struct bpf_flow_keys flow_keys = {};
> + struct qdisc_skb_cb cb_saved;
> + struct qdisc_skb_cb *cb;
> + u16 *pseudo_cb;
> + u32 result;
> +
> + cb = qdisc_skb_cb(skb);
> + pseudo_cb = (u16 *)bpf_skb_cb((struct sk_buff *)skb);
> +
> + /* Save Control Block */
> + memcpy(&cb_saved, cb, sizeof(cb_saved));
> + memset(cb, 0, sizeof(cb_saved));
> +
> + /* Pass parameters to the BPF program */
> + cb->flow_keys = &flow_keys;
> + *pseudo_cb = nhoff;
I don't understand this bit.
What is this pseudo_cb and why nhoff goes in there?
Some odd way to pass it into the prog?
> +
> + bpf_compute_data_pointers((struct sk_buff *)skb);
> + result = BPF_PROG_RUN(attached, skb);
> +
> + /* Restore state */
> + memcpy(cb, &cb_saved, sizeof(cb_saved));
> +
> + __skb_flow_bpf_to_target(&flow_keys, flow_dissector,
> + target_container);
> + key_control->thoff = min_t(u16, key_control->thoff, skb->len);
> + rcu_read_unlock();
> + return result == BPF_OK;
> + }
> + rcu_read_unlock();
> +
> if (dissector_uses_key(flow_dissector,
> FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
> struct ethhdr *eth = eth_hdr(skb);
Powered by blists - more mailing lists