netdev - Re: [RFC PATCH net-next v6 13/13] bpf: add simple bpf tests in the tx path for so

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAL+tcoCpJESydmRXp9ASeXYjFkjOyXn+dF+7dYa0Ek6DdnMHKw@mail.gmail.com>
Date: Sat, 25 Jan 2025 11:42:18 +0800
From: Jason Xing <kerneljasonxing@...il.com>
To: Martin KaFai Lau <martin.lau@...ux.dev>
Cc: davem@...emloft.net, edumazet@...gle.com, kuba@...nel.org, 
	pabeni@...hat.com, dsahern@...nel.org, willemdebruijn.kernel@...il.com, 
	willemb@...gle.com, ast@...nel.org, daniel@...earbox.net, andrii@...nel.org, 
	eddyz87@...il.com, song@...nel.org, yonghong.song@...ux.dev, 
	john.fastabend@...il.com, kpsingh@...nel.org, sdf@...ichev.me, 
	haoluo@...gle.com, jolsa@...nel.org, horms@...nel.org, bpf@...r.kernel.org, 
	netdev@...r.kernel.org
Subject: Re: [RFC PATCH net-next v6 13/13] bpf: add simple bpf tests in the tx
 path for so_timestamping feature

On Sat, Jan 25, 2025 at 11:08 AM Martin KaFai Lau <martin.lau@...ux.dev> wrote:
>
> On 1/20/25 5:29 PM, Jason Xing wrote:
> > Only check if we pass those three key points after we enable the
> > bpf extension for so_timestamping. During each point, we can choose
> > whether to print the current timestamp.
> >
> > Signed-off-by: Jason Xing <kerneljasonxing@...il.com>
> > ---
> >   .../bpf/prog_tests/so_timestamping.c          |  98 ++++++++
> >   .../selftests/bpf/progs/so_timestamping.c     | 227 ++++++++++++++++++
> >   2 files changed, 325 insertions(+)
> >   create mode 100644 tools/testing/selftests/bpf/prog_tests/so_timestamping.c
> >   create mode 100644 tools/testing/selftests/bpf/progs/so_timestamping.c
> >
> > diff --git a/tools/testing/selftests/bpf/prog_tests/so_timestamping.c b/tools/testing/selftests/bpf/prog_tests/so_timestamping.c
> > new file mode 100644
> > index 000000000000..bbfa7eb38cfb
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/prog_tests/so_timestamping.c
> > @@ -0,0 +1,98 @@
> > +#define _GNU_SOURCE
> > +#include <sched.h>
> > +#include <linux/socket.h>
> > +#include <linux/tls.h>
> > +#include <net/if.h>
> > +
> > +#include "test_progs.h"
> > +#include "cgroup_helpers.h"
> > +#include "network_helpers.h"
> > +
> > +#include "so_timestamping.skel.h"
> > +
> > +#define CG_NAME "/so-timestamping-test"
> > +
> > +static const char addr4_str[] = "127.0.0.1";
> > +static const char addr6_str[] = "::1";
> > +static struct so_timestamping *skel;
> > +static int cg_fd;
> > +
> > +static int create_netns(void)
>
> Reuse the netns_new("so_timestamping_ns", true) from test_progs.c.
>
> > +{
> > +     if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
> > +             return -1;
> > +
> > +     if (!ASSERT_OK(system("ip link set dev lo up"), "set lo up"))
> > +             return -1;
> > +
> > +     return 0;
> > +}
> > +
> > +static void test_tcp(int family)
> > +{
> > +     struct so_timestamping__bss *bss = skel->bss;
> > +     char buf[] = "testing testing";
> > +     int sfd = -1, cfd = -1;
> > +     int n;
> > +
> > +     memset(bss, 0, sizeof(*bss));
> > +
> > +     sfd = start_server(family, SOCK_STREAM,
> > +                        family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
> > +     if (!ASSERT_GE(sfd, 0, "start_server"))
>
> nit. ASSERT_OK_FD.
>
> > +             goto out;
> > +
> > +     cfd = connect_to_fd(sfd, 0);
> > +     if (!ASSERT_GE(cfd, 0, "connect_to_fd_server")) {
>
> Same here. ASSERT_OK_FD.
>
> > +             close(sfd);
>
> This close is unnecessary. It will cause a double close at "out:" also.
>
> > +             goto out;
> > +     }
> > +
> > +     n = write(cfd, buf, sizeof(buf));
> > +     if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
> > +             goto out;
> > +
> > +     ASSERT_EQ(bss->nr_active, 1, "nr_active");
> > +     ASSERT_EQ(bss->nr_snd, 2, "nr_snd");
> > +     ASSERT_EQ(bss->nr_sched, 1, "nr_sched");
> > +     ASSERT_EQ(bss->nr_txsw, 1, "nr_txsw");
> > +     ASSERT_EQ(bss->nr_ack, 1, "nr_ack");
> > +
> > +out:
> > +     if (sfd >= 0)
> > +             close(sfd);
> > +     if (cfd >= 0)
> > +             close(cfd);
> > +}
> > +
> > +void test_so_timestamping(void)
> > +{
> > +     cg_fd = test__join_cgroup(CG_NAME);
> > +     if (cg_fd < 0)
> > +             return;
> > +
> > +     if (create_netns())
> > +             goto done;
> > +
> > +     skel = so_timestamping__open();
>
> nit. so_timestamping__open_and_load()
>
> > +     if (!ASSERT_OK_PTR(skel, "open skel"))
> > +             goto done;
> > +
> > +     if (!ASSERT_OK(so_timestamping__load(skel), "load skel"))
>
> Then this __load() is not need.
>
> > +             goto done;
> > +
> > +     if (!ASSERT_OK(so_timestamping__attach(skel), "attach skel"))
> > +             goto done;
> > +
> > +     skel->links.skops_sockopt =
> > +             bpf_program__attach_cgroup(skel->progs.skops_sockopt, cg_fd);
> > +     if (!ASSERT_OK_PTR(skel->links.skops_sockopt, "attach cgroup"))
> > +             goto done;
> > +
> > +     test_tcp(AF_INET6);
> > +     test_tcp(AF_INET);
> > +
> > +done:
> > +     so_timestamping__destroy(skel);
> > +     close(cg_fd);
> > +}
> > diff --git a/tools/testing/selftests/bpf/progs/so_timestamping.c b/tools/testing/selftests/bpf/progs/so_timestamping.c
> > new file mode 100644
> > index 000000000000..f4708e84c243
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/progs/so_timestamping.c
> > @@ -0,0 +1,227 @@
> > +#include "vmlinux.h"
> > +#include "bpf_tracing_net.h"
> > +#include <bpf/bpf_core_read.h>
> > +#include <bpf/bpf_helpers.h>
> > +#include <bpf/bpf_tracing.h>
> > +//#include <bpf/bpf_core_read.h>
> > +#include "bpf_misc.h"
> > +#include "bpf_kfuncs.h"
> > +
> > +#define SK_BPF_CB_FLAGS 1009
> > +#define SK_BPF_CB_TX_TIMESTAMPING 1
> > +
> > +int nr_active;
> > +int nr_snd;
> > +int nr_passive;
> > +int nr_sched;
> > +int nr_txsw;
> > +int nr_ack;
> > +
> > +struct sockopt_test {
> > +     int opt;
> > +     int new;
> > +};
> > +
> > +static const struct sockopt_test sol_socket_tests[] = {
> > +     { .opt = SK_BPF_CB_FLAGS, .new = SK_BPF_CB_TX_TIMESTAMPING, },
> > +     { .opt = 0, },
> > +};
> > +
> > +struct loop_ctx {
> > +     void *ctx;
> > +     const struct sock *sk;
> > +};
> > +
> > +struct sk_stg {
> > +     __u64 sendmsg_ns;       /* record ts when sendmsg is called */
> > +};
> > +
> > +struct {
> > +     __uint(type, BPF_MAP_TYPE_SK_STORAGE);
> > +     __uint(map_flags, BPF_F_NO_PREALLOC);
> > +     __type(key, int);
> > +     __type(value, struct sk_stg);
> > +} sk_stg_map SEC(".maps");
> > +
> > +
> > +struct delay_info {
> > +     u64 sendmsg_ns;         /* record ts when sendmsg is called */
> > +     u32 sched_delay;        /* SCHED_OPT_CB - sendmsg_ns */
> > +     u32 sw_snd_delay;       /* SW_OPT_CB - SCHED_OPT_CB */
> > +     u32 ack_delay;          /* ACK_OPT_CB - SW_OPT_CB */
> > +};
> > +
> > +struct {
> > +     __uint(type, BPF_MAP_TYPE_HASH);
> > +     __type(key, u32);
> > +     __type(value, struct delay_info);
> > +     __uint(max_entries, 1024);
> > +} time_map SEC(".maps");
> > +
> > +static u64 delay_tolerance_nsec = 1000000000; /* 1 second as an example */
> > +
> > +static int bpf_test_sockopt_int(void *ctx, const struct sock *sk,
> > +                             const struct sockopt_test *t,
> > +                             int level)
> > +{
> > +     int new, opt, tmp;
> > +
> > +     opt = t->opt;
> > +     new = t->new;
> > +
> > +     if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)))
> > +             return 1;
> > +
> > +     if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) ||
> > +         tmp != new) {
> > +             return 1;
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +static int bpf_test_socket_sockopt(__u32 i, struct loop_ctx *lc)
> > +{
> > +     const struct sockopt_test *t;
> > +
> > +     if (i >= ARRAY_SIZE(sol_socket_tests))
> > +             return 1;
> > +
> > +     t = &sol_socket_tests[i];
> > +     if (!t->opt)
> > +             return 1;
> > +
> > +     return bpf_test_sockopt_int(lc->ctx, lc->sk, t, SOL_SOCKET);
> > +}
> > +
> > +static int bpf_test_sockopt(void *ctx, const struct sock *sk)
> > +{
> > +     struct loop_ctx lc = { .ctx = ctx, .sk = sk, };
> > +     int n;
> > +
> > +     n = bpf_loop(ARRAY_SIZE(sol_socket_tests), bpf_test_socket_sockopt, &lc, 0);
> > +     if (n != ARRAY_SIZE(sol_socket_tests))
> > +             return -1;
> > +
> > +     return 0;
> > +}
> > +
> > +static bool bpf_test_delay(struct bpf_sock_ops *skops, const struct sock *sk)
> > +{
> > +     struct bpf_sock_ops_kern *skops_kern;
> > +     u64 timestamp = bpf_ktime_get_ns();
> > +     struct skb_shared_info *shinfo;
> > +     struct delay_info dinfo = {0};
> > +     struct delay_info *val;
> > +     struct sk_buff *skb;
> > +     struct sk_stg *stg;
> > +     u32 delay, tskey;
> > +     u64 prior_ts;
> > +
> > +     skops_kern = bpf_cast_to_kern_ctx(skops);
> > +     skb = skops_kern->skb;
> > +     shinfo = bpf_core_cast(skb->head + skb->end, struct skb_shared_info);
> > +     tskey = shinfo->tskey;
> > +     if (!tskey)
> > +             return false;
> > +
> > +     if (skops->op == BPF_SOCK_OPS_TS_TCP_SND_CB) {
> > +             stg = bpf_sk_storage_get(&sk_stg_map, (void *)sk, 0, 0);
> > +             if (!stg)
> > +                     return false;
> > +             dinfo.sendmsg_ns = stg->sendmsg_ns;
> > +             val = &dinfo;
>
> Move the map_update here instead.
>
>                 bpf_map_update_elem(&time_map, &tskey, val, BPF_ANY);
>
> > +             goto out;
> > +     }
> > +
> > +     val = bpf_map_lookup_elem(&time_map, &tskey);
> > +     if (!val)
> > +             return false;
> > +
> > +     switch (skops->op) {
> > +     case BPF_SOCK_OPS_TS_SCHED_OPT_CB:
> > +             delay = val->sched_delay = timestamp - val->sendmsg_ns;
> > +             break;
> > +     case BPF_SOCK_OPS_TS_SW_OPT_CB:
> > +             prior_ts = val->sched_delay + val->sendmsg_ns;
> > +             delay = val->sw_snd_delay = timestamp - prior_ts;
> > +             break;
> > +     case BPF_SOCK_OPS_TS_ACK_OPT_CB:
> > +             prior_ts = val->sw_snd_delay + val->sched_delay + val->sendmsg_ns;
> > +             delay = val->ack_delay = timestamp - prior_ts;
> > +             break;
> > +     }
> > +
> > +     if (delay <= 0 || delay >= delay_tolerance_nsec)
>
> Regarding delay <= 0 check, note that delay was defined as u32.
>
> delay_tolerance_nsec is 1 sec which could be too short for the bpf CI. May be
> raise it to like 10s and only check "if (delay >= delay_tolerance_nsec)". It
> will be useful to bump a nr_long_delay++ also and ASSERT in the userspace.
>
> btw, it is in nsec, is u32 enough?
>
>
> > +             return false;
> > +
> > +     /* Since it's the last one, remove from the map after latency check */
> > +     if (skops->op == BPF_SOCK_OPS_TS_ACK_OPT_CB) {
> > +             bpf_map_delete_elem(&time_map, &tskey);
> > +             return true;
> > +     }
> > +
> > +out:
> > +     bpf_map_update_elem(&time_map, &tskey, val, BPF_ANY);
>
> then no need to do update_elem here for other op.

I'm going to adjust all the points that you mentioned as above. Thanks!

>
> Overall, I think the set looks good. Only a few things left. Thanks for
> revamping the test also. The test should be pretty close to how it will be used.
>
> Please add tests to ensure the new timestamping callbacks cannot use the helpers
> that we discussed in the earlier patch and also cannot directly read/write the
> sock fields through the bpf_sock_ops.

No problem. Will do it in the next respin.

>
> Please also add some details on how the UDP BPF_SOCK_OPS_TS_TCP_SND_CB (or to be
> renamed to BPF_SOCK_OPS_TS_SND_CB ?) will look like. It is the only callback
> that I don't have a clear idea for UDP.

I think I will rename it as you said. But I wonder if I can add more
details about UDP after this series gets merged which should not be
too late. After this series, I will carefully consider and test how we
use for UDP type.

>
> Please tag the set to bpf-next. Then the bpf CI can pick up automatically and
> continue testing it whenever some other bpf patches landed.

Got it!

>
> [ I will reply other followup later ]

Thanks for your work.

>
> > +     return true;
> > +}
> > +
> > +SEC("fentry/tcp_sendmsg_locked")
> > +int BPF_PROG(trace_tcp_sendmsg_locked, struct sock *sk, struct msghdr *msg, size_t size)
> > +{
> > +     u64 timestamp = bpf_ktime_get_ns();
> > +     u32 flag = sk->sk_bpf_cb_flags;
> > +     struct sk_stg *stg;
> > +
> > +     if (!flag)
> > +             return 0;
> > +
> > +     stg = bpf_sk_storage_get(&sk_stg_map, sk, 0,
> > +                              BPF_SK_STORAGE_GET_F_CREATE);
> > +     if (!stg)
> > +             return 0;
> > +
> > +     stg->sendmsg_ns = timestamp;
> > +     nr_snd += 1;
> > +     return 0;
> > +}
> > +
> > +SEC("sockops")
> > +int skops_sockopt(struct bpf_sock_ops *skops)
> > +{
> > +     struct bpf_sock *bpf_sk = skops->sk;
> > +     const struct sock *sk;
> > +
> > +     if (!bpf_sk)
> > +             return 1;
> > +
> > +     sk = (struct sock *)bpf_skc_to_tcp_sock(bpf_sk);
> > +     if (!sk)
> > +             return 1;
> > +
> > +     switch (skops->op) {
> > +     case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
> > +             nr_active += !bpf_test_sockopt(skops, sk);
> > +             break;
> > +     case BPF_SOCK_OPS_TS_TCP_SND_CB:
> > +             if (bpf_test_delay(skops, sk))
> > +                     nr_snd += 1;
> > +             break;
> > +     case BPF_SOCK_OPS_TS_SCHED_OPT_CB:
> > +             if (bpf_test_delay(skops, sk))
> > +                     nr_sched += 1;
> > +             break;
> > +     case BPF_SOCK_OPS_TS_SW_OPT_CB:
> > +             if (bpf_test_delay(skops, sk))
> > +                     nr_txsw += 1;
> > +             break;
> > +     case BPF_SOCK_OPS_TS_ACK_OPT_CB:
> > +             if (bpf_test_delay(skops, sk))
> > +                     nr_ack += 1;
> > +             break;
> > +     }
> > +
> > +     return 1;
> > +}
> > +
> > +char _license[] SEC("license") = "GPL";
>