lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20260107-skb-meta-safeproof-netdevs-rx-only-v3-17-0d461c5e4764@cloudflare.com>
Date: Wed, 07 Jan 2026 15:28:17 +0100
From: Jakub Sitnicki <jakub@...udflare.com>
To: bpf@...r.kernel.org
Cc: netdev@...r.kernel.org, "David S. Miller" <davem@...emloft.net>, 
 Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>, 
 Paolo Abeni <pabeni@...hat.com>, Alexei Starovoitov <ast@...nel.org>, 
 Daniel Borkmann <daniel@...earbox.net>, 
 Jesper Dangaard Brouer <hawk@...nel.org>, 
 John Fastabend <john.fastabend@...il.com>, 
 Stanislav Fomichev <sdf@...ichev.me>, Simon Horman <horms@...nel.org>, 
 Andrii Nakryiko <andrii@...nel.org>, 
 Martin KaFai Lau <martin.lau@...ux.dev>, 
 Eduard Zingerman <eddyz87@...il.com>, Song Liu <song@...nel.org>, 
 Yonghong Song <yonghong.song@...ux.dev>, KP Singh <kpsingh@...nel.org>, 
 Hao Luo <haoluo@...gle.com>, Jiri Olsa <jolsa@...nel.org>, 
 kernel-team@...udflare.com
Subject: [PATCH bpf-next v3 17/17] selftests/bpf: Test skb metadata access
 after L2 decapsulation

Add tests to verify that XDP/skb metadata remains accessible after L2
decapsulation now that metadata tracking has been decoupled from the MAC
header offset.

Use a 2-netns setup to test each L2 decapsulation path that resets the MAC
header offset: GRE (IPv4/IPv6), VXLAN, GENEVE, L2TPv3, VLAN, QinQ, and
MPLS. SRv6 and NSH are not tested due to setup complexity (SRv6 requires 4
namespaces according to selftests/net/srv6_hl2encap_red_l2vpn_test.sh, NSH
requires Open vSwitch).

For each encapsulation type, test three access scenarios after L2 decap:
- direct skb->data_meta pointer access
- dynptr-based metadata access (bpf_dynptr_from_skb_meta)
- metadata access after move or skb head realloc (bpf_skb_adjust_room)

Change test_xdp_meta.c to identify test packets by payload content instead
of source MAC address, since L2 encapsulation pushes its own MAC header.

Signed-off-by: Jakub Sitnicki <jakub@...udflare.com>
---
 tools/testing/selftests/bpf/config                 |   6 +-
 .../bpf/prog_tests/xdp_context_test_run.c          | 292 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/test_xdp_meta.c  |  48 ++--
 3 files changed, 325 insertions(+), 21 deletions(-)

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 558839e3c185..f867b7eff085 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -130,4 +130,8 @@ CONFIG_INFINIBAND=y
 CONFIG_SMC=y
 CONFIG_SMC_HS_CTRL_BPF=y
 CONFIG_DIBS=y
-CONFIG_DIBS_LO=y
\ No newline at end of file
+CONFIG_DIBS_LO=y
+CONFIG_L2TP=y
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=y
+CONFIG_L2TP_ETH=y
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
index ee94c281888a..dc7f936216db 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -8,9 +8,14 @@
 #define TX_NAME "veth1"
 #define TX_NETNS "xdp_context_tx"
 #define RX_NETNS "xdp_context_rx"
+#define RX_MAC "02:00:00:00:00:01"
+#define TX_MAC "02:00:00:00:00:02"
 #define TAP_NAME "tap0"
 #define DUMMY_NAME "dum0"
 #define TAP_NETNS "xdp_context_tuntap"
+#define ENCAP_DEV "encap"
+#define DECAP_RX_NETNS "xdp_context_decap_rx"
+#define DECAP_TX_NETNS "xdp_context_decap_tx"
 
 #define TEST_PAYLOAD_LEN 32
 static const __u8 test_payload[TEST_PAYLOAD_LEN] = {
@@ -127,6 +132,7 @@ static int send_test_packet(int ifindex)
 	/* We use the Ethernet header only to identify the test packet */
 	struct ethhdr eth = {
 		.h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF },
+		.h_proto = htons(ETH_P_IP),
 	};
 
 	memcpy(packet, &eth, sizeof(eth));
@@ -155,6 +161,34 @@ static int send_test_packet(int ifindex)
 	return -1;
 }
 
+static int send_routed_packet(int af, const char *ip)
+{
+	struct sockaddr_storage addr;
+	socklen_t alen;
+	int r, sock = -1;
+
+	r = make_sockaddr(af, ip, 42, &addr, &alen);
+	if (!ASSERT_OK(r, "make_sockaddr"))
+		goto err;
+
+	sock = socket(af, SOCK_DGRAM, 0);
+	if (!ASSERT_OK_FD(sock, "socket"))
+		goto err;
+
+	r = sendto(sock, test_payload, sizeof(test_payload), 0,
+		   (struct sockaddr *)&addr, alen);
+	if (!ASSERT_EQ(r, sizeof(test_payload), "sendto"))
+		goto err;
+
+	close(sock);
+	return 0;
+
+err:
+	if (sock >= 0)
+		close(sock);
+	return -1;
+}
+
 static int write_test_packet(int tap_fd)
 {
 	__u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
@@ -510,3 +544,261 @@ void test_xdp_context_tuntap(void)
 
 	test_xdp_meta__destroy(skel);
 }
+
+enum l2_encap_type {
+	GRE4_ENCAP,
+	GRE6_ENCAP,
+	VXLAN_ENCAP,
+	GENEVE_ENCAP,
+	L2TPV3_ENCAP,
+	VLAN_ENCAP,
+	QINQ_ENCAP,
+	MPLS_ENCAP,
+};
+
+static bool l2_encap_uses_ipv6(enum l2_encap_type encap_type)
+{
+	return encap_type == GRE6_ENCAP;
+}
+
+static bool l2_encap_uses_routing(enum l2_encap_type encap_type)
+{
+	return encap_type == MPLS_ENCAP;
+}
+
+static bool setup_l2_encap_dev(enum l2_encap_type encap_type,
+			       const char *encap_dev, const char *lower_dev,
+			       const char *net_prefix, const char *local_addr,
+			       const char *remote_addr)
+{
+	switch (encap_type) {
+	case GRE4_ENCAP:
+		SYS(fail, "ip link add %s type gretap local %s remote %s",
+		    encap_dev, local_addr, remote_addr);
+		return true;
+
+	case GRE6_ENCAP:
+		SYS(fail, "ip link add %s type ip6gretap local %s remote %s",
+		    encap_dev, local_addr, remote_addr);
+		return true;
+
+	case VXLAN_ENCAP:
+		SYS(fail,
+		    "ip link add %s type vxlan id 42 local %s remote %s dstport 4789",
+		    encap_dev, local_addr, remote_addr);
+		return true;
+
+	case GENEVE_ENCAP:
+		SYS(fail,
+		    "ip link add %s type geneve id 42 remote %s",
+		    encap_dev, remote_addr);
+		return true;
+
+	case L2TPV3_ENCAP:
+		SYS(fail,
+		    "ip l2tp add tunnel tunnel_id 42 peer_tunnel_id 42 encap ip local %s remote %s",
+		    local_addr, remote_addr);
+		SYS(fail,
+		    "ip l2tp add session name %s tunnel_id 42 session_id 42 peer_session_id 42",
+		    encap_dev);
+		return true;
+
+	case VLAN_ENCAP:
+		SYS(fail, "ip link set dev %s down", lower_dev);
+		SYS(fail, "ethtool -K %s rx-vlan-hw-parse off", lower_dev);
+		SYS(fail, "ethtool -K %s tx-vlan-hw-insert off", lower_dev);
+		SYS(fail, "ip link set dev %s up", lower_dev);
+		SYS(fail, "ip link add %s link %s type vlan id 42", encap_dev,
+		    lower_dev);
+		return true;
+
+	case QINQ_ENCAP:
+		SYS(fail, "ip link set dev %s down", lower_dev);
+		SYS(fail, "ethtool -K %s rx-vlan-hw-parse off", lower_dev);
+		SYS(fail, "ethtool -K %s tx-vlan-hw-insert off", lower_dev);
+		SYS(fail, "ethtool -K %s rx-vlan-stag-hw-parse off", lower_dev);
+		SYS(fail, "ethtool -K %s tx-vlan-stag-hw-insert off", lower_dev);
+		SYS(fail, "ip link set dev %s up", lower_dev);
+		SYS(fail, "ip link add vlan.100 link %s type vlan proto 802.1ad id 100", lower_dev);
+		SYS(fail, "ip link set dev vlan.100 up");
+		SYS(fail, "ip link add %s link vlan.100 type vlan id 42", encap_dev);
+		return true;
+
+	case MPLS_ENCAP:
+		SYS(fail, "sysctl -wq net.mpls.platform_labels=65535");
+		SYS(fail, "sysctl -wq net.mpls.conf.%s.input=1", lower_dev);
+		SYS(fail, "ip route change %s encap mpls 42 via %s", net_prefix, remote_addr);
+		SYS(fail, "ip -f mpls route add 42 dev lo");
+		SYS(fail, "ip link set dev lo name %s", encap_dev);
+
+		return true;
+	}
+fail:
+	return false;
+}
+
+static void test_l2_decap(enum l2_encap_type encap_type,
+			  struct bpf_program *xdp_prog,
+			  struct bpf_program *tc_prog, bool *test_pass)
+{
+	LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
+	LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+	const char *net, *rx_ip, *tx_ip, *addr_opts;
+	int af, plen;
+	struct netns_obj *rx_ns = NULL, *tx_ns = NULL;
+	struct nstoken *nstoken = NULL;
+	int lower_ifindex, upper_ifindex;
+	int ret;
+
+	if (l2_encap_uses_ipv6(encap_type)) {
+		af = AF_INET6;
+		net = "fd00::/64";
+		rx_ip = "fd00::1";
+		tx_ip = "fd00::2";
+		plen = 64;
+		addr_opts = "nodad";
+	} else {
+		af = AF_INET;
+		net = "192.0.2.0/24";
+		rx_ip = "192.0.2.1";
+		tx_ip = "192.0.2.2";
+		plen = 24;
+		addr_opts = "";
+	}
+
+	*test_pass = false;
+
+	rx_ns = netns_new(DECAP_RX_NETNS, false);
+	if (!ASSERT_OK_PTR(rx_ns, "create rx_ns"))
+		return;
+
+	tx_ns = netns_new(DECAP_TX_NETNS, false);
+	if (!ASSERT_OK_PTR(tx_ns, "create tx_ns"))
+		goto close;
+
+	SYS(close, "ip link add " RX_NAME " address " RX_MAC " netns " DECAP_RX_NETNS
+		   " type veth peer name " TX_NAME " address " TX_MAC " netns " DECAP_TX_NETNS);
+
+	nstoken = open_netns(DECAP_RX_NETNS);
+	if (!ASSERT_OK_PTR(nstoken, "setns rx_ns"))
+		goto close;
+
+	SYS(close, "ip addr add %s/%u dev %s %s", rx_ip, plen, RX_NAME, addr_opts);
+	SYS(close, "ip link set dev %s up", RX_NAME);
+
+	if (!setup_l2_encap_dev(encap_type, ENCAP_DEV, RX_NAME, net, rx_ip, tx_ip))
+		goto close;
+	SYS(close, "ip link set dev %s up", ENCAP_DEV);
+
+	lower_ifindex = if_nametoindex(RX_NAME);
+	if (!ASSERT_GE(lower_ifindex, 0, "if_nametoindex lower"))
+		goto close;
+
+	upper_ifindex = if_nametoindex(ENCAP_DEV);
+	if (!ASSERT_GE(upper_ifindex, 0, "if_nametoindex upper"))
+		goto close;
+
+	ret = bpf_xdp_attach(lower_ifindex, bpf_program__fd(xdp_prog), 0, NULL);
+	if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+		goto close;
+
+	tc_hook.ifindex = upper_ifindex;
+	ret = bpf_tc_hook_create(&tc_hook);
+	if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+		goto close;
+
+	tc_opts.prog_fd = bpf_program__fd(tc_prog);
+	ret = bpf_tc_attach(&tc_hook, &tc_opts);
+	if (!ASSERT_OK(ret, "bpf_tc_attach"))
+		goto close;
+
+	close_netns(nstoken);
+
+	nstoken = open_netns(DECAP_TX_NETNS);
+	if (!ASSERT_OK_PTR(nstoken, "setns tx_ns"))
+		goto close;
+
+	SYS(close, "ip addr add %s/%u dev %s %s", tx_ip, plen, TX_NAME, addr_opts);
+	SYS(close, "ip neigh add %s lladdr %s nud permanent dev %s", rx_ip, RX_MAC, TX_NAME);
+	SYS(close, "ip link set dev %s up", TX_NAME);
+
+	if (!setup_l2_encap_dev(encap_type, ENCAP_DEV, TX_NAME, net, tx_ip, rx_ip))
+		goto close;
+	SYS(close, "ip link set dev %s up", ENCAP_DEV);
+
+	upper_ifindex = if_nametoindex(ENCAP_DEV);
+	if (!ASSERT_GE(upper_ifindex, 0, "if_nametoindex upper"))
+		goto close;
+
+	if (l2_encap_uses_routing(encap_type))
+		ret = send_routed_packet(af, rx_ip);
+	else
+		ret = send_test_packet(upper_ifindex);
+	if (!ASSERT_OK(ret, "send packet"))
+		goto close;
+
+	if (!ASSERT_TRUE(*test_pass, "test_pass"))
+		dump_err_stream(tc_prog);
+
+close:
+	close_netns(nstoken);
+	netns_free(rx_ns);
+	netns_free(tx_ns);
+}
+
+__printf(1, 2) static bool start_subtest(const char *fmt, ...)
+{
+	char *subtest_name;
+	va_list ap;
+	int r;
+
+	va_start(ap, fmt);
+	r = vasprintf(&subtest_name, fmt, ap);
+	va_end(ap);
+	if (!ASSERT_GE(r, 0, "format string"))
+		return false;
+
+	r = test__start_subtest(subtest_name);
+	free(subtest_name);
+	return r;
+}
+
+void test_xdp_context_l2_decap(void)
+{
+	const struct test {
+		enum l2_encap_type encap_type;
+		const char *encap_name;
+	} tests[] = {
+		{ GRE4_ENCAP, "gre4" },
+		{ GRE6_ENCAP, "gre6" },
+		{ VXLAN_ENCAP, "vxlan" },
+		{ GENEVE_ENCAP, "geneve" },
+		{ L2TPV3_ENCAP, "l2tpv3" },
+		{ VLAN_ENCAP, "vlan" },
+		{ QINQ_ENCAP, "qinq" },
+		{ MPLS_ENCAP, "mpls" },
+	};
+	struct test_xdp_meta *skel;
+	const struct test *t;
+
+	skel = test_xdp_meta__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
+		return;
+
+	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+		if (start_subtest("%s_direct_access", t->encap_name))
+			test_l2_decap(t->encap_type, skel->progs.ing_xdp,
+				      skel->progs.ing_cls,
+				      &skel->bss->test_pass);
+		if (start_subtest("%s_dynptr_read", t->encap_name))
+			test_l2_decap(t->encap_type, skel->progs.ing_xdp,
+				      skel->progs.ing_cls_dynptr_read,
+				      &skel->bss->test_pass);
+		if (start_subtest("%s_helper_adjust_room", t->encap_name))
+			test_l2_decap(t->encap_type, skel->progs.ing_xdp,
+				      skel->progs.helper_skb_adjust_room,
+				      &skel->bss->test_pass);
+	}
+
+	test_xdp_meta__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
index 0a0f371a2dec..69130e250e84 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
@@ -280,18 +280,35 @@ int ing_cls_dynptr_offset_oob(struct __sk_buff *ctx)
 	return TC_ACT_SHOT;
 }
 
+/* Test packets carry test metadata pattern as payload. */
+static bool is_test_packet(struct xdp_md *ctx)
+{
+	__u8 meta_have[META_SIZE];
+	__u32 len;
+	int ret;
+
+	len = bpf_xdp_get_buff_len(ctx);
+	if (len < META_SIZE)
+		return false;
+	ret = bpf_xdp_load_bytes(ctx, len - META_SIZE, meta_have, META_SIZE);
+	if (ret)
+		return false;
+	ret = __builtin_memcmp(meta_have, meta_want, META_SIZE);
+	if (ret)
+		return false;
+
+	return true;
+}
+
 /* Reserve and clear space for metadata but don't populate it */
 SEC("xdp")
 int ing_xdp_zalloc_meta(struct xdp_md *ctx)
 {
-	struct ethhdr *eth = ctx_ptr(ctx, data);
 	__u8 *meta;
 	int ret;
 
 	/* Drop any non-test packets */
-	if (eth + 1 > ctx_ptr(ctx, data_end))
-		return XDP_DROP;
-	if (!check_smac(eth))
+	if (!is_test_packet(ctx))
 		return XDP_DROP;
 
 	ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
@@ -310,33 +327,24 @@ int ing_xdp_zalloc_meta(struct xdp_md *ctx)
 SEC("xdp")
 int ing_xdp(struct xdp_md *ctx)
 {
-	__u8 *data, *data_meta, *data_end, *payload;
-	struct ethhdr *eth;
+	__u8 *data, *data_meta;
 	int ret;
 
+	/* Drop any non-test packets */
+	if (!is_test_packet(ctx))
+		return XDP_DROP;
+
 	ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
 	if (ret < 0)
 		return XDP_DROP;
 
 	data_meta = ctx_ptr(ctx, data_meta);
-	data_end  = ctx_ptr(ctx, data_end);
 	data      = ctx_ptr(ctx, data);
 
-	eth = (struct ethhdr *)data;
-	payload = data + sizeof(struct ethhdr);
-
-	if (payload + META_SIZE > data_end ||
-	    data_meta + META_SIZE > data)
-		return XDP_DROP;
-
-	/* The Linux networking stack may send other packets on the test
-	 * interface that interfere with the test. Just drop them.
-	 * The test packets can be recognized by their source MAC address.
-	 */
-	if (!check_smac(eth))
+	if (data_meta + META_SIZE > data)
 		return XDP_DROP;
 
-	__builtin_memcpy(data_meta, payload, META_SIZE);
+	__builtin_memcpy(data_meta, meta_want, META_SIZE);
 	return XDP_PASS;
 }
 

-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ