[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230929150717.120463-2-m@lambda.lt>
Date: Fri, 29 Sep 2023 17:07:16 +0200
From: Martynas Pumputis <m@...bda.lt>
To: bpf@...r.kernel.org
Cc: Daniel Borkmann <daniel@...earbox.net>,
netdev@...r.kernel.org,
Martin KaFai Lau <martin.lau@...ux.dev>,
Nikolay Aleksandrov <razor@...ckwall.org>,
Martynas Pumputis <m@...bda.lt>
Subject: [PATCH bpf 1/2] bpf: Derive source IP addr via bpf_*_fib_lookup()
Extend the bpf_fib_lookup() helper by making it to return the source
IPv4/IPv6 address if the BPF_FIB_LOOKUP_SET_SRC flag is set.
For example, the following snippet can be used to derive the desired
source IP address:
struct bpf_fib_lookup p = { .ipv4_dst = ip4->daddr };
ret = bpf_skb_fib_lookup(skb, p, sizeof(p),
BPF_FIB_LOOKUP_SET_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH);
if (ret != BPF_FIB_LKUP_RET_SUCCESS)
return TC_ACT_SHOT;
/* the p.ipv4_src now contains the source address */
The inability to derive the proper source address may cause malfunctions
in BPF-based dataplanes for hosts containing netdevs with more than one
routable IP address or for multi-homed hosts.
For example, Cilium implements packet masquerading in BPF. If an
egressing netdev to which the Cilium's BPF prog is attached has
multiple IP addresses, then only one [hardcoded] IP address can be used for
masquerading. This breaks connectivity if any other IP address should have
been selected instead, for example, when a public and private addresses
are attached to the same egress interface.
The change was tested with Cilium [1].
Nikolay Aleksandrov helped to figure out the IPv6 addr selection.
[1]: https://github.com/cilium/cilium/pull/28283
Signed-off-by: Martynas Pumputis <m@...bda.lt>
---
include/uapi/linux/bpf.h | 9 +++++++++
net/core/filter.c | 13 ++++++++++++-
tools/include/uapi/linux/bpf.h | 10 ++++++++++
3 files changed, 31 insertions(+), 1 deletion(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0448700890f7..a6bf686eecbc 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3257,6 +3257,10 @@ union bpf_attr {
* and *params*->smac will not be set as output. A common
* use case is to call **bpf_redirect_neigh**\ () after
* doing **bpf_fib_lookup**\ ().
+ * **BPF_FIB_LOOKUP_SET_SRC**
+ * Derive and set source IP addr in *params*->ipv{4,6}_src
+ * for the nexthop. If the src addr cannot be derived,
+ * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned.
*
* *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs.
@@ -6953,6 +6957,7 @@ enum {
BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
BPF_FIB_LOOKUP_TBID = (1U << 3),
+ BPF_FIB_LOOKUP_SET_SRC = (1U << 4),
};
enum {
@@ -6965,6 +6970,7 @@ enum {
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
+ BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */
};
struct bpf_fib_lookup {
@@ -6999,6 +7005,9 @@ struct bpf_fib_lookup {
__u32 rt_metric;
};
+ /* input: source address to consider for lookup
+ * output: source address result from lookup
+ */
union {
__be32 ipv4_src;
__u32 ipv6_src[4]; /* in6_addr; network order */
diff --git a/net/core/filter.c b/net/core/filter.c
index a094694899c9..e9cdcf49df62 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5850,6 +5850,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
params->rt_metric = res.fi->fib_priority;
params->ifindex = dev->ifindex;
+ if (flags & BPF_FIB_LOOKUP_SET_SRC)
+ params->ipv4_src = fib_result_prefsrc(net, &res);
+
/* xdp and cls_bpf programs are run in RCU-bh so
* rcu_read_lock_bh is not needed here
*/
@@ -5992,6 +5995,13 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
params->rt_metric = res.f6i->fib6_metric;
params->ifindex = dev->ifindex;
+ if (flags & BPF_FIB_LOOKUP_SET_SRC) {
+ err = ip6_route_get_saddr(net, res.f6i, &fl6.daddr, 0,
+ (struct in6_addr *)¶ms->ipv6_src);
+ if (err)
+ return BPF_FIB_LKUP_RET_NO_SRC_ADDR;
+ }
+
if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
goto set_fwd_params;
@@ -6010,7 +6020,8 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
#endif
#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
- BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID)
+ BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
+ BPF_FIB_LOOKUP_SET_SRC)
BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
struct bpf_fib_lookup *, params, int, plen, u32, flags)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0448700890f7..72cd0ca97689 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3257,6 +3257,10 @@ union bpf_attr {
* and *params*->smac will not be set as output. A common
* use case is to call **bpf_redirect_neigh**\ () after
* doing **bpf_fib_lookup**\ ().
+ * **BPF_FIB_LOOKUP_SET_SRC**
+ * Derive and set source IP addr in *params*->ipv{4,6}_src
+ * for the nexthop. If the src addr cannot be derived,
+ * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned.
*
* *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs.
@@ -6953,6 +6957,7 @@ enum {
BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
BPF_FIB_LOOKUP_TBID = (1U << 3),
+ BPF_FIB_LOOKUP_SET_SRC = (1U << 4),
};
enum {
@@ -6965,6 +6970,7 @@ enum {
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
+ BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */
};
struct bpf_fib_lookup {
@@ -6999,6 +7005,10 @@ struct bpf_fib_lookup {
__u32 rt_metric;
};
+
+ /* input: source address to consider for lookup
+ * output: source address result from lookup
+ */
union {
__be32 ipv4_src;
__u32 ipv6_src[4]; /* in6_addr; network order */
--
2.42.0
Powered by blists - more mailing lists