[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190130194811.239760-4-posk@google.com>
Date: Wed, 30 Jan 2019 11:48:09 -0800
From: Peter Oskolkov <posk@...gle.com>
To: Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>, netdev@...r.kernel.org
Cc: Peter Oskolkov <posk.devel@...il.com>,
David Ahern <dsahern@...il.com>,
Peter Oskolkov <posk@...gle.com>
Subject: [PATCH bpf-next v4 3/5] bpf: add handling of BPF_LWT_REROUTE to lwt_bpf.c
This patch builds on top of the previous patch in the patchset,
which added BPF_LWT_ENCAP_IP mode to bpf_lwt_push_encap. As the
encapping can result in the skb needing to go via a different
interface/route/dst, bpf programs can indicate this by returning
BPF_LWT_REROUTE, which triggers a new route lookup for the skb.
Signed-off-by: Peter Oskolkov <posk@...gle.com>
---
net/core/lwt_bpf.c | 116 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 116 insertions(+)
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 6a6e9acab73d..21ffafd4eabb 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -16,6 +16,7 @@
#include <linux/types.h>
#include <linux/bpf.h>
#include <net/lwtunnel.h>
+#include <net/ip6_route.h>
struct bpf_lwt_prog {
struct bpf_prog *prog;
@@ -55,6 +56,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
switch (ret) {
case BPF_OK:
+ case BPF_LWT_REROUTE:
break;
case BPF_REDIRECT:
@@ -87,6 +89,32 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
return ret;
}
+static int bpf_lwt_input_reroute(struct sk_buff *skb)
+{
+ int err = -EINVAL;
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ struct iphdr *iph = ip_hdr(skb);
+
+ err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+ iph->tos, skb_dst(skb)->dev);
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ ip6_route_input(skb);
+ err = skb_dst(skb)->error;
+ } else {
+ pr_warn_once("BPF_LWT_REROUTE input: unsupported proto %d\n",
+ skb->protocol);
+ }
+
+ if (err)
+ goto err;
+ return dst_input(skb);
+
+err:
+ kfree_skb(skb);
+ return err;
+}
+
static int bpf_input(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -98,6 +126,8 @@ static int bpf_input(struct sk_buff *skb)
ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT);
if (ret < 0)
return ret;
+ if (ret == BPF_LWT_REROUTE)
+ return bpf_lwt_input_reroute(skb);
}
if (unlikely(!dst->lwtstate->orig_input)) {
@@ -147,6 +177,90 @@ static int xmit_check_hhlen(struct sk_buff *skb)
return 0;
}
+static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
+{
+ struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev);
+ int oif = l3mdev ? l3mdev->ifindex : 0;
+ struct dst_entry *dst = NULL;
+ struct sock *sk;
+ struct net *net;
+ bool ipv4;
+ int err;
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ ipv4 = true;
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ ipv4 = false;
+ } else {
+ pr_warn_once("BPF_LWT_REROUTE xmit: unsupported proto %d\n",
+ skb->protocol);
+ return -EINVAL;
+ }
+
+ sk = sk_to_full_sk(skb->sk);
+ if (sk) {
+ if (sk->sk_bound_dev_if)
+ oif = sk->sk_bound_dev_if;
+ net = sock_net(sk);
+ } else {
+ net = dev_net(skb_dst(skb)->dev);
+ }
+
+ if (ipv4) {
+ struct iphdr *iph = ip_hdr(skb);
+ struct flowi4 fl4 = {0};
+ struct rtable *rt;
+
+ fl4.flowi4_oif = oif;
+ fl4.flowi4_mark = skb->mark;
+ fl4.flowi4_uid = sock_net_uid(net, sk);
+ fl4.flowi4_tos = RT_TOS(iph->tos);
+ fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
+ fl4.flowi4_proto = iph->protocol;
+ fl4.daddr = iph->daddr;
+ fl4.saddr = iph->saddr;
+
+ rt = ip_route_output_key(net, &fl4);
+ if (IS_ERR(rt) || rt->dst.error)
+ return -EINVAL;
+ dst = &rt->dst;
+ } else {
+ struct ipv6hdr *iph6 = ipv6_hdr(skb);
+ struct flowi6 fl6 = {0};
+
+ fl6.flowi6_oif = oif;
+ fl6.flowi6_mark = skb->mark;
+ fl6.flowi6_uid = sock_net_uid(net, sk);
+ fl6.flowlabel = ip6_flowinfo(iph6);
+ fl6.flowi6_proto = iph6->nexthdr;
+ fl6.daddr = iph6->daddr;
+ fl6.saddr = iph6->saddr;
+
+ dst = ip6_route_output(net, skb->sk, &fl6);
+ if (IS_ERR(dst) || dst->error)
+ return -EINVAL;
+ }
+
+ /* Although skb header was reserved in bpf_lwt_push_ip_encap(), it
+ * was done for the previous dst, so we are doing it here again, in
+ * case the new dst needs much more space. The call below is a noop
+ * if there is enough header space in skb.
+ */
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ return err;
+
+ skb_dst_drop(skb);
+ skb_dst_set(skb, dst);
+
+ err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb);
+ if (unlikely(err))
+ return err;
+
+ /* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */
+ return LWTUNNEL_XMIT_DONE;
+}
+
static int bpf_xmit(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -169,6 +283,8 @@ static int bpf_xmit(struct sk_buff *skb)
return LWTUNNEL_XMIT_CONTINUE;
case BPF_REDIRECT:
return LWTUNNEL_XMIT_DONE;
+ case BPF_LWT_REROUTE:
+ return bpf_lwt_xmit_reroute(skb);
default:
return ret;
}
--
2.20.1.495.gaa96b0ce6b-goog
Powered by blists - more mailing lists