[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200930064811.mroafbnrrnb77qki@kafai-mbp.dhcp.thefacebook.com>
Date: Tue, 29 Sep 2020 23:48:11 -0700
From: Martin KaFai Lau <kafai@...com>
To: Daniel Borkmann <daniel@...earbox.net>
CC: <ast@...nel.org>, <john.fastabend@...il.com>,
<netdev@...r.kernel.org>, <bpf@...r.kernel.org>,
David Ahern <dsahern@...nel.org>
Subject: Re: [PATCH bpf-next v3 3/6] bpf: add redirect_neigh helper as
redirect drop-in
On Tue, Sep 29, 2020 at 11:23:03PM +0200, Daniel Borkmann wrote:
[ ... ]
> ---
> include/linux/skbuff.h | 5 +
> include/uapi/linux/bpf.h | 14 ++
> net/core/filter.c | 273 +++++++++++++++++++++++++++++++--
> tools/include/uapi/linux/bpf.h | 14 ++
> 4 files changed, 293 insertions(+), 13 deletions(-)
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 04a18e01b362..3d0cf3722bb4 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -2548,6 +2548,11 @@ static inline int skb_mac_header_was_set(const struct sk_buff *skb)
> return skb->mac_header != (typeof(skb->mac_header))~0U;
> }
>
> +static inline void skb_unset_mac_header(struct sk_buff *skb)
> +{
> + skb->mac_header = (typeof(skb->mac_header))~0U;
> +}
> +
> static inline void skb_reset_mac_header(struct sk_buff *skb)
> {
> skb->mac_header = skb->data - skb->head;
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 6116a7f54c8f..1f17c6752deb 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3652,6 +3652,19 @@ union bpf_attr {
> * associated socket instead of the current process.
> * Return
> * The id is returned or 0 in case the id could not be retrieved.
> + *
> + * long bpf_redirect_neigh(u32 ifindex, u64 flags)
> + * Description
> + * Redirect the packet to another net device of index *ifindex*
> + * and fill in L2 addresses from neighboring subsystem. This helper
> + * is somewhat similar to **bpf_redirect**\ (), except that it
> + * fills in e.g. MAC addresses based on the L3 information from
> + * the packet. This helper is supported for IPv4 and IPv6 protocols.
> + * The *flags* argument is reserved and must be 0. The helper is
> + * currently only supported for tc BPF program types.
> + * Return
> + * The helper returns **TC_ACT_REDIRECT** on success or
> + * **TC_ACT_SHOT** on error.
> */
> #define __BPF_FUNC_MAPPER(FN) \
> FN(unspec), \
> @@ -3806,6 +3819,7 @@ union bpf_attr {
> FN(snprintf_btf), \
> FN(seq_printf_btf), \
> FN(skb_cgroup_classid), \
> + FN(redirect_neigh), \
> /* */
>
> /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/net/core/filter.c b/net/core/filter.c
> index a0776e48dcc9..14b1534f6b46 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2163,6 +2163,222 @@ static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
> return __bpf_redirect_no_mac(skb, dev, flags);
> }
>
> +#if IS_ENABLED(CONFIG_IPV6)
> +static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
> +{
> + struct dst_entry *dst = skb_dst(skb);
> + struct net_device *dev = dst->dev;
> + u32 hh_len = LL_RESERVED_SPACE(dev);
> + const struct in6_addr *nexthop;
> + struct neighbour *neigh;
> +
> + if (dev_xmit_recursion())
> + goto out_rec;
> +
> + skb->dev = dev;
> + skb->tstamp = 0;
> +
> + if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
> + struct sk_buff *skb2;
> +
> + skb2 = skb_realloc_headroom(skb, hh_len);
> + if (!skb2) {
> + kfree_skb(skb);
> + return -ENOMEM;
> + }
> + if (skb->sk)
> + skb_set_owner_w(skb2, skb->sk);
> + consume_skb(skb);
> + skb = skb2;
> + }
> +
> + rcu_read_lock_bh();
> + nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
> + &ipv6_hdr(skb)->daddr);
> + neigh = ip_neigh_gw6(dev, nexthop);
> + if (likely(!IS_ERR(neigh))) {
> + int ret;
> +
> + sock_confirm_neigh(skb, neigh);
> + dev_xmit_recursion_inc();
> + ret = neigh_output(neigh, skb, false);
> + dev_xmit_recursion_dec();
> + rcu_read_unlock_bh();
> + return ret;
> + }
> + rcu_read_unlock_bh();
> + IP6_INC_STATS(dev_net(dst->dev),
> + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
> +out_drop:
> + kfree_skb(skb);
> + return -EINVAL;
> +out_rec:
> + net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
> + goto out_drop;
nit. may be log this at the earlier "if (dev_xmit_recursion)" and
then directly goto out_drop.
> +}
> +
[ ... ]
> +/* Internal, non-exposed redirect flags. */
> +enum {
> + BPF_F_NEIGH = (1ULL << 1),
> +};
It will be useful to ensure the future "flags" of BPF_FUNC_redirect
will not overlap with this. May be a BUILD_BUG_ON?
Others LGTM.
Acked-by: Martin KaFai Lau <kafai@...com>
>
> int skb_do_redirect(struct sk_buff *skb)
> {
> struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
> struct net_device *dev;
> + u32 flags = ri->flags;
>
> dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->tgt_index);
> ri->tgt_index = 0;
> @@ -2231,7 +2440,22 @@ int skb_do_redirect(struct sk_buff *skb)
> return -EINVAL;
> }
>
> - return __bpf_redirect(skb, dev, ri->flags);
> + return flags & BPF_F_NEIGH ?
> + __bpf_redirect_neigh(skb, dev) :
> + __bpf_redirect(skb, dev, flags);
> +}
Powered by blists - more mailing lists