[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <e7eb6be7-2c03-0377-0712-90a3bb289594@gmail.com>
Date: Mon, 28 Sep 2020 21:13:01 -0700
From: David Ahern <dsahern@...il.com>
To: Daniel Borkmann <daniel@...earbox.net>, ast@...nel.org
Cc: john.fastabend@...il.com, netdev@...r.kernel.org,
bpf@...r.kernel.org, David Ahern <dsahern@...nel.org>
Subject: Re: [PATCH bpf-next v2 3/6] bpf: add redirect_neigh helper as
redirect drop-in
On 9/28/20 7:38 AM, Daniel Borkmann wrote:
> diff --git a/net/core/filter.c b/net/core/filter.c
> index a0776e48dcc9..64c6e5ec97d7 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> +static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
> +{
> + const struct ipv6hdr *ip6h = ipv6_hdr(skb);
> + struct net *net = dev_net(dev);
> + int err, ret = NET_XMIT_DROP;
> + struct flowi6 fl6 = {
> + .flowi6_flags = FLOWI_FLAG_ANYSRC,
> + .flowi6_mark = skb->mark,
> + .flowlabel = ip6_flowinfo(ip6h),
> + .flowi6_proto = ip6h->nexthdr,
> + .flowi6_oif = dev->ifindex,
> + .daddr = ip6h->daddr,
> + .saddr = ip6h->saddr,
> + };
> + struct dst_entry *dst;
> +
> + skb->dev = dev;
this is not needed here. You set dev in bpf_out_neigh_v6 to the dst dev.
Everything else is an error path where the skb is dropped.
> + skb->tstamp = 0;
> +
> + dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
> + if (IS_ERR(dst))
> + goto out_drop;
> +
> + skb_dst_set(skb, dst);
> +
> + err = bpf_out_neigh_v6(net, skb);
> + if (unlikely(net_xmit_eval(err)))
> + dev->stats.tx_errors++;
> + else
> + ret = NET_XMIT_SUCCESS;
> + goto out_xmit;
> +out_drop:
> + dev->stats.tx_errors++;
> + kfree_skb(skb);
> +out_xmit:
> + return ret;
> +}
> +#else
> +static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
> +{
> + kfree_skb(skb);
> + return NET_XMIT_DROP;
> +}
> +#endif /* CONFIG_IPV6 */
> +
> +#if IS_ENABLED(CONFIG_INET)
> +static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
> +{
> + struct dst_entry *dst = skb_dst(skb);
> + struct rtable *rt = container_of(dst, struct rtable, dst);
> + struct net_device *dev = dst->dev;
> + u32 hh_len = LL_RESERVED_SPACE(dev);
> + struct neighbour *neigh;
> + bool is_v6gw = false;
> +
> + if (dev_xmit_recursion())
> + goto out_rec;
> + if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
Why is this check needed for v4 but not v6?
> + struct sk_buff *skb2;
> +
> + skb2 = skb_realloc_headroom(skb, hh_len);
> + if (!skb2) {
> + kfree_skb(skb);
> + return -ENOMEM;
> + }
> + if (skb->sk)
> + skb_set_owner_w(skb2, skb->sk);
> + consume_skb(skb);
> + skb = skb2;
> + }
> + rcu_read_lock_bh();
> + neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
> + if (likely(!IS_ERR(neigh))) {
> + int ret;
> +
> + sock_confirm_neigh(skb, neigh);
> + dev_xmit_recursion_inc();
> + ret = neigh_output(neigh, skb, is_v6gw);
> + dev_xmit_recursion_dec();
> + rcu_read_unlock_bh();
> + return ret;
> + }
> + rcu_read_unlock_bh();
> +out_drop:
> + kfree_skb(skb);
> + return -EINVAL;
> +out_rec:
> + net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
> + goto out_drop;
> +}
> +
> +static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
> +{
> + const struct iphdr *ip4h = ip_hdr(skb);
> + struct net *net = dev_net(dev);
> + int err, ret = NET_XMIT_DROP;
> + struct flowi4 fl4 = {
> + .flowi4_flags = FLOWI_FLAG_ANYSRC,
> + .flowi4_mark = skb->mark,
> + .flowi4_tos = RT_TOS(ip4h->tos),
set flowi4_proto here.
> + .flowi4_oif = dev->ifindex,
> + .daddr = ip4h->daddr,
> + .saddr = ip4h->saddr,
> + };
> + struct rtable *rt;
> +
> + skb->dev = dev;
> + skb->tstamp = 0;
> +
> + rt = ip_route_output_flow(net, &fl4, NULL);
> + if (IS_ERR(rt))
> + goto out_drop;
> + if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
> + ip_rt_put(rt);
> + goto out_drop;
> + }
> +
> + skb_dst_set(skb, &rt->dst);
> +
> + err = bpf_out_neigh_v4(net, skb);
> + if (unlikely(net_xmit_eval(err)))
> + dev->stats.tx_errors++;
> + else
> + ret = NET_XMIT_SUCCESS;
> + goto out_xmit;
> +out_drop:
> + dev->stats.tx_errors++;
> + kfree_skb(skb);
> +out_xmit:
> + return ret;
> +}
Powered by blists - more mailing lists