[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <67eed9f680b5f_15e1b3294f4@willemb.c.googlers.com.notmuch>
Date: Thu, 03 Apr 2025 14:56:54 -0400
From: Willem de Bruijn <willemdebruijn.kernel@...il.com>
To: Stanislav Fomichev <stfomichev@...il.com>,
Willem de Bruijn <willemdebruijn.kernel@...il.com>
Cc: bpf@...r.kernel.org,
netdev@...r.kernel.org,
ast@...nel.org,
daniel@...earbox.net,
john.fastabend@...il.com,
Willem de Bruijn <willemb@...gle.com>
Subject: Re: [PATCH bpf 2/2] selftests/net: test sk_filter support for
SKF_NET_OFF on frags
Stanislav Fomichev wrote:
> On 04/03, Willem de Bruijn wrote:
> > From: Willem de Bruijn <willemb@...gle.com>
> >
> > Verify that a classic BPF linux socket filter correctly matches
> > packet contents. Including when accessing contents in an
> > skb_frag.
> >
> > 1. Open a SOCK_RAW socket with a classic BPF filter on UDP dport 8000.
> > 2. Open a tap device with IFF_NAPI_FRAGS to inject skbs with frags.
> > 3. Send a packet for which the UDP header is in frag[0].
> > 4. Receive this packet to demonstrate that the socket accepted it.
> >
> > Signed-off-by: Willem de Bruijn <willemb@...gle.com>
>
> Acked-by: Stanislav Fomichev <sdf@...ichev.me>
Thanks for the review :)
> My (weak) preference is to put (most) bpf-related things under
> selftests/bpf, but since you already have it working, not sure
> it's worth the effort.
I wasn't sure since this is exclusively legacy linux socket filters,
and needs a tun network stack to exercise it.
Will keep as is if you indeed don't mind.
> > ---
> > tools/testing/selftests/net/.gitignore | 1 +
> > tools/testing/selftests/net/Makefile | 2 +
> > tools/testing/selftests/net/skf_net_off.c | 244 +++++++++++++++++++++
> > tools/testing/selftests/net/skf_net_off.sh | 28 +++
> > 4 files changed, 275 insertions(+)
> > create mode 100644 tools/testing/selftests/net/skf_net_off.c
> > create mode 100755 tools/testing/selftests/net/skf_net_off.sh
> >
> > diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
> > index 679542f565a4..532bb732bc6d 100644
> > --- a/tools/testing/selftests/net/.gitignore
> > +++ b/tools/testing/selftests/net/.gitignore
> > @@ -39,6 +39,7 @@ scm_rights
> > sk_bind_sendto_listen
> > sk_connect_zero_addr
> > sk_so_peek_off
> > +skf_net_off
> > socket
> > so_incoming_cpu
> > so_netns_cookie
> > diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
> > index 6d718b478ed8..124078b56fa4 100644
> > --- a/tools/testing/selftests/net/Makefile
> > +++ b/tools/testing/selftests/net/Makefile
> > @@ -106,6 +106,8 @@ TEST_PROGS += ipv6_route_update_soft_lockup.sh
> > TEST_PROGS += busy_poll_test.sh
> > TEST_GEN_PROGS += proc_net_pktgen
> > TEST_PROGS += lwt_dst_cache_ref_loop.sh
> > +TEST_PROGS += skf_net_off.sh
> > +TEST_GEN_FILES += skf_net_off
> >
> > # YNL files, must be before "include ..lib.mk"
> > YNL_GEN_FILES := busy_poller netlink-dumps
> > diff --git a/tools/testing/selftests/net/skf_net_off.c b/tools/testing/selftests/net/skf_net_off.c
> > new file mode 100644
> > index 000000000000..1fdf61d6cd7f
> > --- /dev/null
> > +++ b/tools/testing/selftests/net/skf_net_off.c
> > @@ -0,0 +1,244 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +
> > +/* Open a tun device.
> > + *
> > + * [modifications: use IFF_NAPI_FRAGS, add sk filter]
> > + *
> > + * Expects the device to have been configured previously, e.g.:
> > + * sudo ip tuntap add name tap1 mode tap
> > + * sudo ip link set tap1 up
> > + * sudo ip link set dev tap1 addr 02:00:00:00:00:01
> > + * sudo ip -6 addr add fdab::1 peer fdab::2 dev tap1 nodad
> > + *
> > + * And to avoid premature pskb_may_pull:
> > + *
> > + * sudo ethtool -K tap1 gro off
> > + * sudo bash -c 'echo 0 > /proc/sys/net/ipv4/ip_early_demux'
> > + */
> > +
> > +#define _GNU_SOURCE
> > +
> > +#include <arpa/inet.h>
> > +#include <errno.h>
> > +#include <error.h>
> > +#include <fcntl.h>
> > +#include <getopt.h>
> > +#include <linux/filter.h>
> > +#include <linux/if.h>
> > +#include <linux/if_packet.h>
> > +#include <linux/if_tun.h>
> > +#include <linux/ipv6.h>
> > +#include <netinet/if_ether.h>
> > +#include <netinet/in.h>
> > +#include <netinet/ip.h>
> > +#include <netinet/ip6.h>
> > +#include <netinet/udp.h>
> > +#include <poll.h>
> > +#include <signal.h>
> > +#include <stdbool.h>
> > +#include <stddef.h>
> > +#include <stdio.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <sys/ioctl.h>
> > +#include <sys/socket.h>
> > +#include <sys/poll.h>
> > +#include <sys/types.h>
> > +#include <sys/uio.h>
> > +#include <unistd.h>
> > +
> > +static bool cfg_do_filter;
> > +static bool cfg_do_frags;
> > +static int cfg_dst_port = 8000;
> > +static char *cfg_ifname;
> > +
> > +static int tun_open(const char *tun_name)
> > +{
> > + struct ifreq ifr = {0};
> > + int fd, ret;
> > +
> > + fd = open("/dev/net/tun", O_RDWR);
> > + if (fd == -1)
> > + error(1, errno, "open /dev/net/tun");
> > +
> > + ifr.ifr_flags = IFF_TAP;
> > + if (cfg_do_frags)
> > + ifr.ifr_flags |= IFF_NAPI | IFF_NAPI_FRAGS;
> > +
> > + strncpy(ifr.ifr_name, tun_name, IFNAMSIZ - 1);
> > +
> > + ret = ioctl(fd, TUNSETIFF, &ifr);
> > + if (ret)
> > + error(1, ret, "ioctl TUNSETIFF");
> > +
> > + return fd;
> > +}
> > +
> > +static void sk_set_filter(int fd)
> > +{
> > + const int offset_proto = offsetof(struct ip6_hdr, ip6_nxt);
> > + const int offset_dport = sizeof(struct ip6_hdr) + offsetof(struct udphdr, dest);
> > +
> > + /* Filter UDP packets with destination port cfg_dst_port */
> > + struct sock_filter filter_code[] = {
> > + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
> > + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
> > + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_NET_OFF + offset_proto),
> > + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 2),
> > + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, SKF_NET_OFF + offset_dport),
> > + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dst_port, 1, 0),
> > + BPF_STMT(BPF_RET + BPF_K, 0),
> > + BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
> > + };
> > +
> > + struct sock_fprog filter = {
> > + sizeof(filter_code) / sizeof(filter_code[0]),
> > + filter_code,
> > + };
> > +
> > + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter)))
> > + error(1, errno, "setsockopt attach filter");
> > +}
> > +
> > +static int raw_open(void)
> > +{
> > + int fd;
> > +
> > + fd = socket(PF_INET6, SOCK_RAW, IPPROTO_UDP);
> > + if (fd == -1)
> > + error(1, errno, "socket raw (udp)");
> > +
> > + if (cfg_do_filter)
> > + sk_set_filter(fd);
> > +
> > + return fd;
> > +}
> > +
> > +static void tun_write(int fd)
> > +{
> > + const char eth_src[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x02 };
> > + const char eth_dst[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x01 };
> > + struct tun_pi pi = {0};
> > + struct ipv6hdr ip6h = {0};
> > + struct udphdr uh = {0};
> > + struct ethhdr eth = {0};
> > + uint32_t payload;
> > + struct iovec iov[5];
> > + int ret;
> > +
> > + pi.proto = htons(ETH_P_IPV6);
> > +
> > + memcpy(eth.h_source, eth_src, sizeof(eth_src));
> > + memcpy(eth.h_dest, eth_dst, sizeof(eth_dst));
> > + eth.h_proto = htons(ETH_P_IPV6);
> > +
> > + ip6h.version = 6;
> > + ip6h.payload_len = htons(sizeof(uh) + sizeof(uint32_t));
> > + ip6h.nexthdr = IPPROTO_UDP;
> > + ip6h.hop_limit = 8;
> > + if (inet_pton(AF_INET6, "fdab::2", &ip6h.saddr) != 1)
> > + error(1, errno, "inet_pton src");
> > + if (inet_pton(AF_INET6, "fdab::1", &ip6h.daddr) != 1)
> > + error(1, errno, "inet_pton src");
> > +
> > + uh.source = htons(8000);
> > + uh.dest = htons(cfg_dst_port);
> > + uh.len = ip6h.payload_len;
> > + uh.check = 0;
> > +
> > + payload = htonl(0xABABABAB); /* Covered in IPv6 length */
> > +
> > + iov[0].iov_base = π
> > + iov[0].iov_len = sizeof(pi);
> > + iov[1].iov_base = ð
> > + iov[1].iov_len = sizeof(eth);
> > + iov[2].iov_base = &ip6h;
> > + iov[2].iov_len = sizeof(ip6h);
> > + iov[3].iov_base = &uh;
> > + iov[3].iov_len = sizeof(uh);
> > + iov[4].iov_base = &payload;
> > + iov[4].iov_len = sizeof(payload);
> > +
> > + ret = writev(fd, iov, sizeof(iov) / sizeof(iov[0]));
> > + if (ret <= 0)
> > + error(1, errno, "writev");
> > +}
> > +
> > +static void raw_read(int fd)
> > +{
> > + struct timeval tv = { .tv_usec = 100 * 1000 };
> > + struct msghdr msg = {0};
> > + struct iovec iov[2];
> > + struct udphdr uh;
> > + uint32_t payload[2];
> > + int ret;
> > +
> > + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
> > + error(1, errno, "setsockopt rcvtimeo udp");
> > +
> > + iov[0].iov_base = &uh;
> > + iov[0].iov_len = sizeof(uh);
> > +
> > + iov[1].iov_base = payload;
> > + iov[1].iov_len = sizeof(payload);
> > +
> > + msg.msg_iov = iov;
> > + msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]);
> > +
> > + ret = recvmsg(fd, &msg, 0);
> > + if (ret <= 0)
> > + error(1, errno, "read raw");
> > + if (ret != sizeof(uh) + sizeof(payload[0]))
> > + error(1, errno, "read raw: len=%d\n", ret);
> > +
> > + fprintf(stderr, "raw recv: 0x%x\n", payload[0]);
> > +}
> > +
> > +static void parse_opts(int argc, char **argv)
> > +{
> > + int c;
> > +
> > + while ((c = getopt(argc, argv, "fFi:")) != -1) {
> > + switch (c) {
> > + case 'f':
> > + cfg_do_filter = true;
> > + printf("bpf filter enabled\n");
> > + break;
> > + case 'F':
> > + cfg_do_frags = true;
> > + printf("napi frags mode enabled\n");
> > + break;
> > + case 'i':
> > + cfg_ifname = optarg;
> > + break;
> > + default:
> > + error(1, 0, "unknown option %c", optopt);
> > + break;
> > + }
> > + }
> > +
> > + if (!cfg_ifname)
> > + error(1, 0, "must specify tap interface name (-i)");
> > +}
> > +
> > +int main(int argc, char **argv)
> > +{
> > + int fdt, fdr;
> > +
> > + parse_opts(argc, argv);
> > +
> > + fdr = raw_open();
> > + fdt = tun_open(cfg_ifname);
> > +
> > + tun_write(fdt);
> > + raw_read(fdr);
> > +
> > + if (close(fdt))
> > + error(1, errno, "close tun");
> > + if (close(fdr))
> > + error(1, errno, "close udp");
> > +
> > + fprintf(stderr, "OK\n");
> > + return 0;
> > +}
> > +
> > diff --git a/tools/testing/selftests/net/skf_net_off.sh b/tools/testing/selftests/net/skf_net_off.sh
> > new file mode 100755
> > index 000000000000..e9cce93a0258
> > --- /dev/null
> > +++ b/tools/testing/selftests/net/skf_net_off.sh
> > @@ -0,0 +1,28 @@
> > +#!/bin/bash
> > +# SPDX-License-Identifier: GPL-2.0
> > +
> > +readonly NS="ns-$(mktemp -u XXXXXX)"
> > +
> > +cleanup() {
> > + ip netns del $NS
> > +}
> > +
> > +ip netns add $NS
> > +trap cleanup EXIT
> > +
> > +ip -netns $NS link set lo up
> > +ip -netns $NS tuntap add name tap1 mode tap
> > +ip -netns $NS link set tap1 up
> > +ip -netns $NS link set dev tap1 addr 02:00:00:00:00:01
> > +ip -netns $NS -6 addr add fdab::1 peer fdab::2 dev tap1 nodad
> > +ip netns exec $NS ethtool -K tap1 gro off
> > +ip netns exec $NS sysctl -w net.ipv4.ip_early_demux=0
>
> Curious: why disable ip_early_demux here?
Otherwise early demux will pull the headers into linear, in
udp_v6_early_demux
Powered by blists - more mailing lists