[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <160319106331.15822.2945713836148003890.stgit@toke.dk>
Date: Tue, 20 Oct 2020 12:51:03 +0200
From: Toke Høiland-Jørgensen <toke@...hat.com>
To: Daniel Borkmann <daniel@...earbox.net>
Cc: David Ahern <dsahern@...nel.org>, netdev@...r.kernel.org,
bpf@...r.kernel.org
Subject: [PATCH bpf v2 2/3] bpf_fib_lookup: optionally skip neighbour lookup
From: Toke Høiland-Jørgensen <toke@...hat.com>
The bpf_fib_lookup() helper performs a neighbour lookup for the destination
IP and returns BPF_FIB_LKUP_NO_NEIGH if this fails, with the expectation
that the BPF program will deal with this condition, either by passing the
packet up the stack, or by using bpf_redirect_neigh().
The neighbour lookup is done via a hash table (through ___neigh_lookup_noref()),
which incurs some overhead. If the caller knows this is likely to fail
anyway, it may want to skip that and go unconditionally to
bpf_redirect_neigh(). For this use case, add a flag to bpf_fib_lookup()
that will make it skip the neighbour lookup and instead always return
BPF_FIB_LKUP_RET_NO_NEIGH (but still populate the gateway and target
ifindex).
Signed-off-by: Toke Høiland-Jørgensen <toke@...hat.com>
---
include/uapi/linux/bpf.h | 10 ++++++----
net/core/filter.c | 16 ++++++++++++++--
tools/include/uapi/linux/bpf.h | 10 ++++++----
3 files changed, 26 insertions(+), 10 deletions(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9668cde9d684..4bfd3c72dae6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4841,12 +4841,14 @@ struct bpf_raw_tracepoint_args {
__u64 args[0];
};
-/* DIRECT: Skip the FIB rules and go to FIB table associated with device
- * OUTPUT: Do lookup from egress perspective; default is ingress
+/* DIRECT: Skip the FIB rules and go to FIB table associated with device
+ * OUTPUT: Do lookup from egress perspective; default is ingress
+ * SKIP_NEIGH: Skip neighbour lookup and return BPF_FIB_LKUP_RET_NO_NEIGH on success
*/
enum {
- BPF_FIB_LOOKUP_DIRECT = (1U << 0),
- BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
+ BPF_FIB_LOOKUP_DIRECT = (1U << 0),
+ BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
+ BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
};
enum {
diff --git a/net/core/filter.c b/net/core/filter.c
index fa09b4f141ae..9791e6311afa 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5382,6 +5382,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (nhc->nhc_gw_family)
params->ipv4_dst = nhc->nhc_gw.ipv4;
+ if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
+ return BPF_FIB_LKUP_RET_NO_NEIGH;
+
neigh = __ipv4_neigh_lookup_noref(dev,
(__force u32)params->ipv4_dst);
} else {
@@ -5389,6 +5392,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
params->family = AF_INET6;
*dst = nhc->nhc_gw.ipv6;
+
+ if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
+ return BPF_FIB_LKUP_RET_NO_NEIGH;
+
neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
}
@@ -5501,6 +5508,9 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
params->rt_metric = res.f6i->fib6_metric;
params->ifindex = dev->ifindex;
+ if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
+ return BPF_FIB_LKUP_RET_NO_NEIGH;
+
/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
* not needed here.
*/
@@ -5518,7 +5528,8 @@ BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
if (plen < sizeof(*params))
return -EINVAL;
- if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
+ if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT |
+ BPF_FIB_LOOKUP_SKIP_NEIGH))
return -EINVAL;
switch (params->family) {
@@ -5555,7 +5566,8 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
if (plen < sizeof(*params))
return -EINVAL;
- if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
+ if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT |
+ BPF_FIB_LOOKUP_SKIP_NEIGH))
return -EINVAL;
switch (params->family) {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 9668cde9d684..4bfd3c72dae6 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4841,12 +4841,14 @@ struct bpf_raw_tracepoint_args {
__u64 args[0];
};
-/* DIRECT: Skip the FIB rules and go to FIB table associated with device
- * OUTPUT: Do lookup from egress perspective; default is ingress
+/* DIRECT: Skip the FIB rules and go to FIB table associated with device
+ * OUTPUT: Do lookup from egress perspective; default is ingress
+ * SKIP_NEIGH: Skip neighbour lookup and return BPF_FIB_LKUP_RET_NO_NEIGH on success
*/
enum {
- BPF_FIB_LOOKUP_DIRECT = (1U << 0),
- BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
+ BPF_FIB_LOOKUP_DIRECT = (1U << 0),
+ BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
+ BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
};
enum {
Powered by blists - more mailing lists