[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250630-skb-metadata-thru-dynptr-v1-12-f17da13625d8@cloudflare.com>
Date: Mon, 30 Jun 2025 16:55:45 +0200
From: Jakub Sitnicki <jakub@...udflare.com>
To: bpf@...r.kernel.org
Cc: Alexei Starovoitov <ast@...nel.org>,
Arthur Fabre <arthur@...hurfabre.com>, Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>, Jesper Dangaard Brouer <hawk@...nel.org>,
Jesse Brandeburg <jbrandeburg@...udflare.com>,
Joanne Koong <joannelkoong@...il.com>,
Lorenzo Bianconi <lorenzo@...nel.org>,
Toke Høiland-Jørgensen <thoiland@...hat.com>,
Yan Zhai <yan@...udflare.com>, netdev@...r.kernel.org,
kernel-team@...udflare.com, Stanislav Fomichev <sdf@...ichev.me>
Subject: [PATCH bpf-next 12/13] selftests/bpf: Cover lack of access to skb
metadata at ip layer
Currently we don't expect skb metadata to persist beyond the device hooks.
Extend the test run BPF program from the Netfilter pre-routing hook to
verify this behavior.
Note, that the added test has no observable side-effect yet. This will be
addressed by the next change.
Signed-off-by: Jakub Sitnicki <jakub@...udflare.com>
---
.../bpf/prog_tests/xdp_context_test_run.c | 94 ++++++++++++++++------
tools/testing/selftests/bpf/progs/test_xdp_meta.c | 62 +++++++++-----
tools/testing/selftests/bpf/test_progs.h | 1 +
3 files changed, 115 insertions(+), 42 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
index 79c4c58276e6..4cf8e009a054 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -19,6 +19,9 @@ static const __u8 test_payload[TEST_PAYLOAD_LEN] = {
0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
};
+#define PACKET_LEN \
+ (sizeof(struct ethhdr) + sizeof(struct iphdr) + TEST_PAYLOAD_LEN)
+
void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts,
__u32 data_meta, __u32 data, __u32 data_end,
__u32 ingress_ifindex, __u32 rx_queue_index,
@@ -120,18 +123,38 @@ void test_xdp_context_test_run(void)
test_xdp_context_test_run__destroy(skel);
}
+static void init_test_packet(__u8 *pkt)
+{
+ struct ethhdr *eth = &(struct ethhdr){
+ .h_dest = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x01 },
+ .h_source = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x02 },
+ .h_proto = htons(ETH_P_IP),
+ };
+ struct iphdr *iph = &(struct iphdr){
+ .ihl = 5,
+ .version = IPVERSION,
+ .ttl = IPDEFTTL,
+ .protocol = 61, /* host internal protocol */
+ .saddr = inet_addr("10.0.0.2"),
+ .daddr = inet_addr("10.0.0.1"),
+ };
+
+ eth = memcpy(pkt, eth, sizeof(*eth));
+ pkt += sizeof(*eth);
+ iph = memcpy(pkt, iph, sizeof(*iph));
+ pkt += sizeof(*iph);
+ memcpy(pkt, test_payload, sizeof(test_payload));
+
+ iph->tot_len = htons(sizeof(*iph) + sizeof(test_payload));
+ iph->check = build_ip_csum(iph);
+}
+
static int send_test_packet(int ifindex)
{
+ __u8 packet[PACKET_LEN];
int n, sock = -1;
- __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
-
- /* The ethernet header is not relevant for this test and doesn't need to
- * be meaningful.
- */
- struct ethhdr eth = { 0 };
- memcpy(packet, ð, sizeof(eth));
- memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN);
+ init_test_packet(packet);
sock = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW);
if (!ASSERT_GE(sock, 0, "socket"))
@@ -271,17 +294,18 @@ void test_xdp_context_veth(void)
static void test_tuntap(struct bpf_program *xdp_prog,
struct bpf_program *tc_prio_1_prog,
struct bpf_program *tc_prio_2_prog,
+ struct bpf_program *nf_prog,
struct bpf_map *result_map)
{
LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
- LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ struct bpf_link *nf_link = NULL;
struct netns_obj *ns = NULL;
- __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
+ __u8 packet[PACKET_LEN];
int tap_fd = -1;
int tap_ifindex;
int ret;
- if (!clear_test_result(result_map))
+ if (result_map && !clear_test_result(result_map))
return;
ns = netns_new(TAP_NETNS, true);
@@ -292,6 +316,8 @@ static void test_tuntap(struct bpf_program *xdp_prog,
if (!ASSERT_GE(tap_fd, 0, "open_tuntap"))
goto close;
+ SYS(close, "ip link set dev " TAP_NAME " addr 02:00:00:00:00:01");
+ SYS(close, "ip addr add dev " TAP_NAME " 10.0.0.1/24");
SYS(close, "ip link set dev " TAP_NAME " up");
tap_ifindex = if_nametoindex(TAP_NAME);
@@ -303,10 +329,14 @@ static void test_tuntap(struct bpf_program *xdp_prog,
if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
goto close;
- tc_opts.prog_fd = bpf_program__fd(tc_prio_1_prog);
- ret = bpf_tc_attach(&tc_hook, &tc_opts);
- if (!ASSERT_OK(ret, "bpf_tc_attach"))
- goto close;
+ if (tc_prio_1_prog) {
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1,
+ .prog_fd = bpf_program__fd(tc_prio_1_prog));
+
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto close;
+ }
if (tc_prio_2_prog) {
LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 2,
@@ -317,28 +347,33 @@ static void test_tuntap(struct bpf_program *xdp_prog,
goto close;
}
+ if (nf_prog) {
+ LIBBPF_OPTS(bpf_netfilter_opts, nf_opts,
+ .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING);
+
+ nf_link = bpf_program__attach_netfilter(nf_prog, &nf_opts);
+ if (!ASSERT_OK_PTR(nf_link, "attach_netfilter"))
+ goto close;
+ }
+
ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog),
0, NULL);
if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
goto close;
- /* The ethernet header is not relevant for this test and doesn't need to
- * be meaningful.
- */
- struct ethhdr eth = { 0 };
-
- memcpy(packet, ð, sizeof(eth));
- memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN);
-
+ init_test_packet(packet);
ret = write(tap_fd, packet, sizeof(packet));
if (!ASSERT_EQ(ret, sizeof(packet), "write packet"))
goto close;
- assert_test_result(result_map);
+ if (result_map)
+ assert_test_result(result_map);
close:
if (tap_fd >= 0)
close(tap_fd);
+ if (nf_link)
+ bpf_link__destroy(nf_link);
netns_free(ns);
}
@@ -354,27 +389,38 @@ void test_xdp_context_tuntap(void)
test_tuntap(skel->progs.ing_xdp,
skel->progs.ing_cls,
NULL, /* tc prio 2 */
+ NULL, /* netfilter */
skel->maps.test_result);
if (test__start_subtest("dynptr_read"))
test_tuntap(skel->progs.ing_xdp,
skel->progs.ing_cls_dynptr_read,
NULL, /* tc prio 2 */
+ NULL, /* netfilter */
skel->maps.test_result);
if (test__start_subtest("dynptr_slice"))
test_tuntap(skel->progs.ing_xdp,
skel->progs.ing_cls_dynptr_slice,
NULL, /* tc prio 2 */
+ NULL, /* netfilter */
skel->maps.test_result);
if (test__start_subtest("dynptr_write"))
test_tuntap(skel->progs.ing_xdp_zalloc_meta,
skel->progs.ing_cls_dynptr_write,
skel->progs.ing_cls_dynptr_read,
+ NULL, /* netfilter */
skel->maps.test_result);
if (test__start_subtest("dynptr_slice_rdwr"))
test_tuntap(skel->progs.ing_xdp_zalloc_meta,
skel->progs.ing_cls_dynptr_slice_rdwr,
skel->progs.ing_cls_dynptr_slice,
+ NULL, /* netfilter */
skel->maps.test_result);
+ if (test__start_subtest("dynptr_nf_hook"))
+ test_tuntap(skel->progs.ing_xdp,
+ NULL, /* tc prio 1 */
+ NULL, /* tc prio 2 */
+ skel->progs.ing_nf,
+ NULL /* ignore result for now */);
test_xdp_meta__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
index b6fed72b1005..41411d164190 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
@@ -1,15 +1,25 @@
#include <stdbool.h>
#include <linux/bpf.h>
#include <linux/if_ether.h>
+#include <linux/ip.h>
#include <linux/pkt_cls.h>
+#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
#include "bpf_kfuncs.h"
+#define META_OFFSET (sizeof(struct ethhdr) + sizeof(struct iphdr))
#define META_SIZE 32
+#define NF_DROP 0
+#define NF_ACCEPT 1
+
#define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem
+struct bpf_nf_ctx {
+ struct sk_buff *skb;
+} __attribute__((preserve_access_index));
+
/* Demonstrates how metadata can be passed from an XDP program to a TC program
* using bpf_xdp_adjust_meta.
* For the sake of testing the metadata support in drivers, the XDP program uses
@@ -60,6 +70,20 @@ int ing_cls_dynptr_read(struct __sk_buff *ctx)
return TC_ACT_SHOT;
}
+/* Check that we can't get a dynptr slice to skb metadata yet */
+SEC("netfilter")
+int ing_nf(struct bpf_nf_ctx *ctx)
+{
+ struct __sk_buff *skb = (struct __sk_buff *)ctx->skb;
+ struct bpf_dynptr meta;
+
+ bpf_dynptr_from_skb(skb, BPF_DYNPTR_F_SKB_METADATA, &meta);
+ if (bpf_dynptr_size(&meta) != 0)
+ return NF_DROP;
+
+ return NF_ACCEPT;
+}
+
/* Write to metadata using bpf_dynptr_write helper */
SEC("tc")
int ing_cls_dynptr_write(struct __sk_buff *ctx)
@@ -68,7 +92,7 @@ int ing_cls_dynptr_write(struct __sk_buff *ctx)
__u8 *src;
bpf_dynptr_from_skb(ctx, 0, &data);
- src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE);
+ src = bpf_dynptr_slice(&data, META_OFFSET, NULL, META_SIZE);
if (!src)
return TC_ACT_SHOT;
@@ -108,7 +132,7 @@ int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx)
__u8 *src, *dst;
bpf_dynptr_from_skb(ctx, 0, &data);
- src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE);
+ src = bpf_dynptr_slice(&data, META_OFFSET, NULL, META_SIZE);
if (!src)
return TC_ACT_SHOT;
@@ -126,14 +150,18 @@ int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx)
SEC("xdp")
int ing_xdp_zalloc_meta(struct xdp_md *ctx)
{
- struct ethhdr *eth = ctx_ptr(ctx, data);
+ const void *data_end = ctx_ptr(ctx, data_end);
+ const struct ethhdr *eth;
+ const struct iphdr *iph;
__u8 *meta;
int ret;
- /* Drop any non-test packets */
- if (eth + 1 > ctx_ptr(ctx, data_end))
+ /* Expect Eth | IPv4 (proto=61) | ... */
+ eth = ctx_ptr(ctx, data);
+ if (eth + 1 > data_end || eth->h_proto != bpf_htons(ETH_P_IP))
return XDP_DROP;
- if (eth->h_proto != 0)
+ iph = (void *)(eth + 1);
+ if (iph + 1 > data_end || iph->protocol != 61)
return XDP_DROP;
ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
@@ -153,7 +181,8 @@ SEC("xdp")
int ing_xdp(struct xdp_md *ctx)
{
__u8 *data, *data_meta, *data_end, *payload;
- struct ethhdr *eth;
+ const struct ethhdr *eth;
+ const struct iphdr *iph;
int ret;
ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
@@ -164,18 +193,15 @@ int ing_xdp(struct xdp_md *ctx)
data_end = ctx_ptr(ctx, data_end);
data = ctx_ptr(ctx, data);
- eth = (struct ethhdr *)data;
- payload = data + sizeof(struct ethhdr);
-
- if (payload + META_SIZE > data_end ||
- data_meta + META_SIZE > data)
+ /* Expect Eth | IPv4 (proto=61) | meta blob */
+ eth = (void *)data;
+ if (eth + 1 > data_end || eth->h_proto != bpf_htons(ETH_P_IP))
return XDP_DROP;
-
- /* The Linux networking stack may send other packets on the test
- * interface that interfere with the test. Just drop them.
- * The test packets can be recognized by their ethertype of zero.
- */
- if (eth->h_proto != 0)
+ iph = (void *)(eth + 1);
+ if (iph + 1 > data_end || iph->protocol != 61)
+ return XDP_DROP;
+ payload = (void *)(iph + 1);
+ if (payload + META_SIZE > data_end || data_meta + META_SIZE > data)
return XDP_DROP;
__builtin_memcpy(data_meta, payload, META_SIZE);
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index df2222a1806f..204f54cdaab1 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -20,6 +20,7 @@ typedef __u16 __sum16;
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/filter.h>
+#include <linux/netfilter.h>
#include <linux/perf_event.h>
#include <linux/socket.h>
#include <linux/unistd.h>
--
2.43.0
Powered by blists - more mailing lists