[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240929170219.1881536-1-jrife@google.com>
Date: Sun, 29 Sep 2024 17:02:16 +0000
From: Jordan Rife <jrife@...gle.com>
To: bpf@...r.kernel.org
Cc: Jordan Rife <jrife@...gle.com>, netdev@...r.kernel.org,
Daniel Borkmann <daniel@...earbox.net>, Alexei Starovoitov <ast@...nel.org>,
Andrii Nakryiko <andrii@...nel.org>, Martin KaFai Lau <martin.lau@...ux.dev>,
Kui-Feng Lee <thinker.li@...il.com>, Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>, "David S. Miller" <davem@...emloft.net>, Paolo Abeni <pabeni@...hat.com>,
stable@...r.kernel.org
Subject: [PATCH v1] bpf: Prevent infinite loops with bpf_redirect_peer
It is possible to create cycles using bpf_redirect_peer which lead to an
an infinite loop inside __netif_receive_skb_core. The simplest way to
illustrate this is by attaching a TC program to the ingress hook on both
sides of a veth or netkit device pair which redirects to its own peer,
although other cycles are possible. This patch places an upper limit on
the number of iterations allowed inside __netif_receive_skb_core to
prevent this.
Signed-off-by: Jordan Rife <jrife@...gle.com>
Fixes: 9aa1206e8f48 ("bpf: Add redirect_peer helper")
Cc: stable@...r.kernel.org
---
net/core/dev.c | 11 +++-
net/core/dev.h | 1 +
.../selftests/bpf/prog_tests/tc_redirect.c | 51 +++++++++++++++++++
.../selftests/bpf/progs/test_tc_peer.c | 13 +++++
4 files changed, 75 insertions(+), 1 deletion(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index cd479f5f22f6..753f8d27f47c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5455,6 +5455,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
struct net_device *orig_dev;
bool deliver_exact = false;
int ret = NET_RX_DROP;
+ int loops = 0;
__be16 type;
net_timestamp_check(!READ_ONCE(net_hotdata.tstamp_prequeue), skb);
@@ -5521,8 +5522,16 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
nf_skip_egress(skb, true);
skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev,
&another);
- if (another)
+ if (another) {
+ loops++;
+ if (unlikely(loops == RX_LOOP_LIMIT)) {
+ ret = NET_RX_DROP;
+ net_crit_ratelimited("bpf: loop limit reached on datapath, buggy bpf program?\n");
+ goto out;
+ }
+
goto another_round;
+ }
if (!skb)
goto out;
diff --git a/net/core/dev.h b/net/core/dev.h
index 5654325c5b71..28d1cf2f9069 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -150,6 +150,7 @@ struct napi_struct *napi_by_id(unsigned int napi_id);
void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu);
#define XMIT_RECURSION_LIMIT 8
+#define RX_LOOP_LIMIT 8
#ifndef CONFIG_PREEMPT_RT
static inline bool dev_xmit_recursion(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index c85798966aec..db1b36090d6c 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -1081,6 +1081,55 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
close_netns(nstoken);
}
+static void test_tc_redirect_peer_loop(struct netns_setup_result *setup_result)
+{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_src);
+ struct test_tc_peer *skel;
+ struct nstoken *nstoken;
+ int err;
+
+ /* Set up an infinite redirect loop using bpf_redirect_peer with ingress
+ * hooks on either side of a veth/netkit pair redirecting to its own
+ * peer. This should not lock up the kernel.
+ */
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
+ return;
+
+ skel = test_tc_peer__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
+ goto done;
+
+ skel->rodata->IFINDEX_SRC = setup_result->ifindex_src;
+ skel->rodata->IFINDEX_SRC_FWD = setup_result->ifindex_src_fwd;
+
+ err = test_tc_peer__load(skel);
+ if (!ASSERT_OK(err, "test_tc_peer__load"))
+ goto done;
+
+ QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src);
+ XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.tc_src_self, 0);
+ close_netns(nstoken);
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+ QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, skel->progs.tc_src_fwd_self, 0);
+
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto done;
+
+ SYS_NOFAIL("ip netns exec " NS_SRC " %s -c 1 -W 1 -q %s > /dev/null",
+ ping_command(AF_INET), IP4_DST);
+fail:
+done:
+ if (skel)
+ test_tc_peer__destroy(skel);
+ close_netns(nstoken);
+}
+
static int tun_open(char *name)
{
struct ifreq ifr;
@@ -1280,6 +1329,8 @@ static void *test_tc_redirect_run_tests(void *arg)
RUN_TEST(tc_redirect_peer, MODE_VETH);
RUN_TEST(tc_redirect_peer, MODE_NETKIT);
RUN_TEST(tc_redirect_peer_l3, MODE_VETH);
+ RUN_TEST(tc_redirect_peer_loop, MODE_VETH);
+ RUN_TEST(tc_redirect_peer_loop, MODE_NETKIT);
RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT);
RUN_TEST(tc_redirect_neigh, MODE_VETH);
RUN_TEST(tc_redirect_neigh_fib, MODE_VETH);
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
index 365eacb5dc34..9b8a00ccad42 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_peer.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -10,6 +10,7 @@
#include <bpf/bpf_helpers.h>
+volatile const __u32 IFINDEX_SRC_FWD;
volatile const __u32 IFINDEX_SRC;
volatile const __u32 IFINDEX_DST;
@@ -34,6 +35,18 @@ int tc_src(struct __sk_buff *skb)
return bpf_redirect_peer(IFINDEX_DST, 0);
}
+SEC("tc")
+int tc_src_self(struct __sk_buff *skb)
+{
+ return bpf_redirect_peer(IFINDEX_SRC, 0);
+}
+
+SEC("tc")
+int tc_src_fwd_self(struct __sk_buff *skb)
+{
+ return bpf_redirect_peer(IFINDEX_SRC_FWD, 0);
+}
+
SEC("tc")
int tc_dst_l3(struct __sk_buff *skb)
{
--
2.46.1.824.gd892dcdcdd-goog
Powered by blists - more mailing lists