[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220308055959.ltrzq3kkq7joslv2@kafai-mbp.dhcp.thefacebook.com>
Date: Mon, 7 Mar 2022 21:59:59 -0800
From: Martin KaFai Lau <kafai@...com>
To: Toke Høiland-Jørgensen <toke@...hat.com>
Cc: Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
"David S. Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>,
Jesper Dangaard Brouer <hawk@...nel.org>,
John Fastabend <john.fastabend@...il.com>,
Andrii Nakryiko <andrii@...nel.org>,
Song Liu <songliubraving@...com>, Yonghong Song <yhs@...com>,
KP Singh <kpsingh@...nel.org>, Shuah Khan <shuah@...nel.org>,
netdev@...r.kernel.org, bpf@...r.kernel.org
Subject: Re: [PATCH bpf-next v9 5/5] selftests/bpf: Add selftest for
XDP_REDIRECT in BPF_PROG_RUN
On Sun, Mar 06, 2022 at 11:34:04PM +0100, Toke Høiland-Jørgensen wrote:
> +#define NUM_PKTS 1000000
It took my qemu 30s to run.
Would it have the same test coverage by lowering it to something
like 10000 ?
> +void test_xdp_do_redirect(void)
> +{
> + int err, xdp_prog_fd, tc_prog_fd, ifindex_src, ifindex_dst;
> + char data[sizeof(pkt_udp) + sizeof(__u32)];
> + struct test_xdp_do_redirect *skel = NULL;
> + struct nstoken *nstoken = NULL;
> + struct bpf_link *link;
> +
> + struct xdp_md ctx_in = { .data = sizeof(__u32),
> + .data_end = sizeof(data) };
> + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
> + .data_in = &data,
> + .data_size_in = sizeof(data),
> + .ctx_in = &ctx_in,
> + .ctx_size_in = sizeof(ctx_in),
> + .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
> + .repeat = NUM_PKTS,
> + .batch_size = 64,
> + );
> + DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
> + .attach_point = BPF_TC_INGRESS);
> +
> + memcpy(&data[sizeof(__u32)], &pkt_udp, sizeof(pkt_udp));
> + *((__u32 *)data) = 0x42; /* metadata test value */
> +
> + skel = test_xdp_do_redirect__open();
> + if (!ASSERT_OK_PTR(skel, "skel"))
> + return;
> +
> + /* The XDP program we run with bpf_prog_run() will cycle through all
> + * three xmit (PASS/TX/REDIRECT) return codes starting from above, and
> + * ending up with PASS, so we should end up with two packets on the dst
> + * iface and NUM_PKTS-2 in the TC hook. We match the packets on the UDP
> + * payload.
> + */
> + SYS("ip netns add testns");
> + nstoken = open_netns("testns");
> + if (!ASSERT_OK_PTR(nstoken, "setns"))
> + goto out;
> +
> + SYS("ip link add veth_src type veth peer name veth_dst");
> + SYS("ip link set dev veth_src address 00:11:22:33:44:55");
> + SYS("ip link set dev veth_dst address 66:77:88:99:aa:bb");
> + SYS("ip link set dev veth_src up");
> + SYS("ip link set dev veth_dst up");
> + SYS("ip addr add dev veth_src fc00::1/64");
> + SYS("ip addr add dev veth_dst fc00::2/64");
> + SYS("ip neigh add fc00::2 dev veth_src lladdr 66:77:88:99:aa:bb");
> +
> + /* We enable forwarding in the test namespace because that will cause
> + * the packets that go through the kernel stack (with XDP_PASS) to be
> + * forwarded back out the same interface (because of the packet dst
> + * combined with the interface addresses). When this happens, the
> + * regular forwarding path will end up going through the same
> + * veth_xdp_xmit() call as the XDP_REDIRECT code, which can cause a
> + * deadlock if it happens on the same CPU. There's a local_bh_disable()
> + * in the test_run code to prevent this, but an earlier version of the
> + * code didn't have this, so we keep the test behaviour to make sure the
> + * bug doesn't resurface.
> + */
> + SYS("sysctl -qw net.ipv6.conf.all.forwarding=1");
> +
> + ifindex_src = if_nametoindex("veth_src");
> + ifindex_dst = if_nametoindex("veth_dst");
> + if (!ASSERT_NEQ(ifindex_src, 0, "ifindex_src") ||
> + !ASSERT_NEQ(ifindex_dst, 0, "ifindex_dst"))
> + goto out;
> +
> + memcpy(skel->rodata->expect_dst, &pkt_udp.eth.h_dest, ETH_ALEN);
> + skel->rodata->ifindex_out = ifindex_src; /* redirect back to the same iface */
> + skel->rodata->ifindex_in = ifindex_src;
> + ctx_in.ingress_ifindex = ifindex_src;
> + tc_hook.ifindex = ifindex_src;
> +
> + if (!ASSERT_OK(test_xdp_do_redirect__load(skel), "load"))
> + goto out;
> +
> + link = bpf_program__attach_xdp(skel->progs.xdp_count_pkts, ifindex_dst);
> + if (!ASSERT_OK_PTR(link, "prog_attach"))
> + goto out;
> + skel->links.xdp_count_pkts = link;
> +
> + tc_prog_fd = bpf_program__fd(skel->progs.tc_count_pkts);
> + if (attach_tc_prog(&tc_hook, tc_prog_fd))
> + goto out;
> +
> + xdp_prog_fd = bpf_program__fd(skel->progs.xdp_redirect);
> + err = bpf_prog_test_run_opts(xdp_prog_fd, &opts);
> + if (!ASSERT_OK(err, "prog_run"))
> + goto out_tc;
> +
> + /* wait for the packets to be flushed */
> + kern_sync_rcu();
> +
> + /* There will be one packet sent through XDP_REDIRECT and one through
> + * XDP_TX; these will show up on the XDP counting program, while the
> + * rest will be counted at the TC ingress hook (and the counting program
> + * resets the packet payload so they don't get counted twice even though
> + * they are re-xmited out the veth device
> + */
> + ASSERT_EQ(skel->bss->pkts_seen_xdp, 2, "pkt_count_xdp");
> + ASSERT_EQ(skel->bss->pkts_seen_tc, NUM_PKTS - 2, "pkt_count_tc");
> +
> +out_tc:
> + bpf_tc_hook_destroy(&tc_hook);
> +out:
> + if (nstoken)
> + close_netns(nstoken);
> + system("ip netns del testns");
> + test_xdp_do_redirect__destroy(skel);
> +}
> diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
> new file mode 100644
> index 000000000000..d785f48304ea
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
> @@ -0,0 +1,92 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <vmlinux.h>
> +#include <bpf/bpf_helpers.h>
> +
> +#define ETH_ALEN 6
> +#define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr))
> +const volatile int ifindex_out;
> +const volatile int ifindex_in;
> +const volatile __u8 expect_dst[ETH_ALEN];
> +volatile int pkts_seen_xdp = 0;
> +volatile int pkts_seen_tc = 0;
> +volatile int retcode = XDP_REDIRECT;
> +
> +SEC("xdp")
> +int xdp_redirect(struct xdp_md *xdp)
> +{
> + __u32 *metadata = (void *)(long)xdp->data_meta;
> + void *data_end = (void *)(long)xdp->data_end;
> + void *data = (void *)(long)xdp->data;
> +
> + __u8 *payload = data + HDR_SZ;
> + int ret = retcode;
> +
> + if (payload + 1 > data_end)
> + return XDP_ABORTED;
> +
> + if (xdp->ingress_ifindex != ifindex_in)
> + return XDP_ABORTED;
> +
> + if (metadata + 1 > data)
> + return XDP_ABORTED;
> +
> + if (*metadata != 0x42)
> + return XDP_ABORTED;
> +
> + *payload = 0x42;
nit. How about also adding a pkts_seen_zero counter here, like
if (*payload == 0) {
*payload = 0x42;
pkts_seen_zero++;
}
and add ASSERT_EQ(skel->bss->pkts_seen_zero, 2, "pkt_count_zero")
to the prog_tests. It can better show the recycled page's data
is not re-initialized.
> +
> + if (bpf_xdp_adjust_meta(xdp, 4))
> + return XDP_ABORTED;
> +
> + if (retcode > XDP_PASS)
> + retcode--;
> +
> + if (ret == XDP_REDIRECT)
> + return bpf_redirect(ifindex_out, 0);
> +
> + return ret;
> +}
> +
> +static bool check_pkt(void *data, void *data_end)
> +{
> + struct ipv6hdr *iph = data + sizeof(struct ethhdr);
> + __u8 *payload = data + HDR_SZ;
> +
> + if (payload + 1 > data_end)
> + return false;
> +
> + if (iph->nexthdr != IPPROTO_UDP || *payload != 0x42)
> + return false;
> +
> + /* reset the payload so the same packet doesn't get counted twice when
> + * it cycles back through the kernel path and out the dst veth
> + */
> + *payload = 0;
> + return true;
> +}
> +
> +SEC("xdp")
> +int xdp_count_pkts(struct xdp_md *xdp)
> +{
> + void *data = (void *)(long)xdp->data;
> + void *data_end = (void *)(long)xdp->data_end;
> +
> + if (check_pkt(data, data_end))
> + pkts_seen_xdp++;
> +
> + return XDP_DROP;
nit. A comment here will be useful to explain XDP_DROP from
the xdp@...h@...ress will put the page back to the recycle
pool, which will be similar to xmit-ing out of a real NIC.
> +}
> +
> +SEC("tc")
> +int tc_count_pkts(struct __sk_buff *skb)
> +{
> + void *data = (void *)(long)skb->data;
> + void *data_end = (void *)(long)skb->data_end;
> +
> + if (check_pkt(data, data_end))
> + pkts_seen_tc++;
> +
> + return 0;
> +}
> +
> +char _license[] SEC("license") = "GPL";
> --
> 2.35.1
>
Powered by blists - more mailing lists