lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Tue, 21 May 2024 10:57:47 -0700
From: Stanislav Fomichev <sdf@...gle.com>
To: Amery Hung <ameryhung@...il.com>
Cc: netdev@...r.kernel.org, bpf@...r.kernel.org, yangpeihao@...u.edu.cn, 
	daniel@...earbox.net, andrii@...nel.org, martin.lau@...nel.org, 
	sinquersw@...il.com, toke@...hat.com, jhs@...atatu.com, jiri@...nulli.us, 
	xiyou.wangcong@...il.com, yepeilin.cs@...il.com
Subject: Re: [RFC PATCH v8 17/20] selftests: Add a basic fifo qdisc test

On Tue, May 21, 2024 at 8:03 AM Amery Hung <ameryhung@...il.com> wrote:
>
> On Mon, May 20, 2024 at 8:15 PM Stanislav Fomichev <sdf@...gle.com> wrote:
> >
> > On 05/10, Amery Hung wrote:
> > > This selftest shows a bare minimum fifo qdisc, which simply enqueues skbs
> > > into the back of a bpf list and dequeues from the front of the list.
> > >
> > > Signed-off-by: Amery Hung <amery.hung@...edance.com>
> > > ---
> > >  .../selftests/bpf/prog_tests/bpf_qdisc.c      | 161 ++++++++++++++++++
> > >  .../selftests/bpf/progs/bpf_qdisc_common.h    |  23 +++
> > >  .../selftests/bpf/progs/bpf_qdisc_fifo.c      |  83 +++++++++
> > >  3 files changed, 267 insertions(+)
> > >  create mode 100644 tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
> > >  create mode 100644 tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
> > >  create mode 100644 tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
> > >
> > > diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
> > > new file mode 100644
> > > index 000000000000..295d0216e70f
> > > --- /dev/null
> > > +++ b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
> > > @@ -0,0 +1,161 @@
> > > +#include <linux/pkt_sched.h>
> > > +#include <linux/rtnetlink.h>
> > > +#include <test_progs.h>
> > > +
> > > +#include "network_helpers.h"
> > > +#include "bpf_qdisc_fifo.skel.h"
> > > +
> > > +#ifndef ENOTSUPP
> > > +#define ENOTSUPP 524
> > > +#endif
> > > +
> > > +#define LO_IFINDEX 1
> > > +
> > > +static const unsigned int total_bytes = 10 * 1024 * 1024;
> > > +static int stop;
> > > +
> > > +static void *server(void *arg)
> > > +{
> > > +     int lfd = (int)(long)arg, err = 0, fd;
> > > +     ssize_t nr_sent = 0, bytes = 0;
> > > +     char batch[1500];
> > > +
> > > +     fd = accept(lfd, NULL, NULL);
> > > +     while (fd == -1) {
> > > +             if (errno == EINTR)
> > > +                     continue;
> > > +             err = -errno;
> > > +             goto done;
> > > +     }
> > > +
> > > +     if (settimeo(fd, 0)) {
> > > +             err = -errno;
> > > +             goto done;
> > > +     }
> > > +
> > > +     while (bytes < total_bytes && !READ_ONCE(stop)) {
> > > +             nr_sent = send(fd, &batch,
> > > +                            MIN(total_bytes - bytes, sizeof(batch)), 0);
> > > +             if (nr_sent == -1 && errno == EINTR)
> > > +                     continue;
> > > +             if (nr_sent == -1) {
> > > +                     err = -errno;
> > > +                     break;
> > > +             }
> > > +             bytes += nr_sent;
> > > +     }
> > > +
> > > +     ASSERT_EQ(bytes, total_bytes, "send");
> > > +
> > > +done:
> > > +     if (fd >= 0)
> > > +             close(fd);
> > > +     if (err) {
> > > +             WRITE_ONCE(stop, 1);
> > > +             return ERR_PTR(err);
> > > +     }
> > > +     return NULL;
> > > +}
> > > +
> > > +static void do_test(char *qdisc)
> > > +{
> > > +     DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
> > > +                         .attach_point = BPF_TC_QDISC,
> > > +                         .parent = TC_H_ROOT,
> > > +                         .handle = 0x8000000,
> > > +                         .qdisc = qdisc);
> > > +     struct sockaddr_in6 sa6 = {};
> > > +     ssize_t nr_recv = 0, bytes = 0;
> > > +     int lfd = -1, fd = -1;
> > > +     pthread_t srv_thread;
> > > +     socklen_t addrlen = sizeof(sa6);
> > > +     void *thread_ret;
> > > +     char batch[1500];
> > > +     int err;
> > > +
> > > +     WRITE_ONCE(stop, 0);
> > > +
> > > +     err = bpf_tc_hook_create(&hook);
> > > +     if (!ASSERT_OK(err, "attach qdisc"))
> > > +             return;
> > > +
> > > +     lfd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
> > > +     if (!ASSERT_NEQ(lfd, -1, "socket")) {
> > > +             bpf_tc_hook_destroy(&hook);
> > > +             return;
> > > +     }
> > > +
> > > +     fd = socket(AF_INET6, SOCK_STREAM, 0);
> > > +     if (!ASSERT_NEQ(fd, -1, "socket")) {
> > > +             bpf_tc_hook_destroy(&hook);
> > > +             close(lfd);
> > > +             return;
> > > +     }
> > > +
> > > +     if (settimeo(lfd, 0) || settimeo(fd, 0))
> > > +             goto done;
> > > +
> > > +     err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen);
> > > +     if (!ASSERT_NEQ(err, -1, "getsockname"))
> > > +             goto done;
> > > +
> > > +     /* connect to server */
> > > +     err = connect(fd, (struct sockaddr *)&sa6, addrlen);
> > > +     if (!ASSERT_NEQ(err, -1, "connect"))
> > > +             goto done;
> > > +
> > > +     err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd);
> > > +     if (!ASSERT_OK(err, "pthread_create"))
> > > +             goto done;
> > > +
> > > +     /* recv total_bytes */
> > > +     while (bytes < total_bytes && !READ_ONCE(stop)) {
> > > +             nr_recv = recv(fd, &batch,
> > > +                            MIN(total_bytes - bytes, sizeof(batch)), 0);
> > > +             if (nr_recv == -1 && errno == EINTR)
> > > +                     continue;
> > > +             if (nr_recv == -1)
> > > +                     break;
> > > +             bytes += nr_recv;
> > > +     }
> > > +
> > > +     ASSERT_EQ(bytes, total_bytes, "recv");
> > > +
> > > +     WRITE_ONCE(stop, 1);
> > > +     pthread_join(srv_thread, &thread_ret);
> > > +     ASSERT_OK(IS_ERR(thread_ret), "thread_ret");
> > > +
> > > +done:
> > > +     close(lfd);
> > > +     close(fd);
> > > +
> > > +     bpf_tc_hook_destroy(&hook);
> > > +     return;
> > > +}
> > > +
> > > +static void test_fifo(void)
> > > +{
> > > +     struct bpf_qdisc_fifo *fifo_skel;
> > > +     struct bpf_link *link;
> > > +
> > > +     fifo_skel = bpf_qdisc_fifo__open_and_load();
> > > +     if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
> > > +             return;
> > > +
> > > +     link = bpf_map__attach_struct_ops(fifo_skel->maps.fifo);
> > > +     if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
> > > +             bpf_qdisc_fifo__destroy(fifo_skel);
> > > +             return;
> > > +     }
> > > +
> > > +     do_test("bpf_fifo");
> > > +
> > > +     bpf_link__destroy(link);
> > > +     bpf_qdisc_fifo__destroy(fifo_skel);
> > > +}
> > > +
> > > +void test_bpf_qdisc(void)
> > > +{
> > > +     if (test__start_subtest("fifo"))
> > > +             test_fifo();
> > > +}
> > > diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
> > > new file mode 100644
> > > index 000000000000..96ab357de28e
> > > --- /dev/null
> > > +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
> > > @@ -0,0 +1,23 @@
> > > +#ifndef _BPF_QDISC_COMMON_H
> > > +#define _BPF_QDISC_COMMON_H
> > > +
> > > +#define NET_XMIT_SUCCESS        0x00
> > > +#define NET_XMIT_DROP           0x01    /* skb dropped                  */
> > > +#define NET_XMIT_CN             0x02    /* congestion notification      */
> > > +
> > > +#define TC_PRIO_CONTROL  7
> > > +#define TC_PRIO_MAX      15
> > > +
> > > +void bpf_skb_set_dev(struct sk_buff *skb, struct Qdisc *sch) __ksym;
> > > +u32 bpf_skb_get_hash(struct sk_buff *p) __ksym;
> > > +void bpf_skb_release(struct sk_buff *p) __ksym;
> > > +void bpf_qdisc_skb_drop(struct sk_buff *p, struct bpf_sk_buff_ptr *to_free) __ksym;
> > > +void bpf_qdisc_watchdog_schedule(struct Qdisc *sch, u64 expire, u64 delta_ns) __ksym;
> > > +bool bpf_qdisc_find_class(struct Qdisc *sch, u32 classid) __ksym;
> > > +int bpf_qdisc_create_child(struct Qdisc *sch, u32 min,
> > > +                        struct netlink_ext_ack *extack) __ksym;
> > > +int bpf_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, u32 classid,
> > > +                   struct bpf_sk_buff_ptr *to_free_list) __ksym;
> > > +struct sk_buff *bpf_qdisc_dequeue(struct Qdisc *sch, u32 classid) __ksym;
> > > +
> > > +#endif
> > > diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
> > > new file mode 100644
> > > index 000000000000..433fd9c3639c
> > > --- /dev/null
> > > +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
> > > @@ -0,0 +1,83 @@
> > > +#include <vmlinux.h>
> > > +#include "bpf_experimental.h"
> > > +#include "bpf_qdisc_common.h"
> > > +
> > > +char _license[] SEC("license") = "GPL";
> > > +
> > > +#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
> > > +
> > > +private(B) struct bpf_spin_lock q_fifo_lock;
> > > +private(B) struct bpf_list_head q_fifo __contains_kptr(sk_buff, bpf_list);
> > > +
> > > +unsigned int q_limit = 1000;
> > > +unsigned int q_qlen = 0;
> > > +
> > > +SEC("struct_ops/bpf_fifo_enqueue")
> > > +int BPF_PROG(bpf_fifo_enqueue, struct sk_buff *skb, struct Qdisc *sch,
> > > +          struct bpf_sk_buff_ptr *to_free)
> > > +{
> > > +     q_qlen++;
> > > +     if (q_qlen > q_limit) {
> > > +             bpf_qdisc_skb_drop(skb, to_free);
> > > +             return NET_XMIT_DROP;
> > > +     }
> >
> > [..]
> >
> > > +     bpf_spin_lock(&q_fifo_lock);
> > > +     bpf_list_excl_push_back(&q_fifo, &skb->bpf_list);
> > > +     bpf_spin_unlock(&q_fifo_lock);
> >
> > Can you also expand a bit on the locking here and elsewhere? And how it
> > interplays with TCQ_F_NOLOCK?
> >
> > As I mentioned at lsfmmbpf, I don't think there is a lot of similar
> > locking in the existing C implementations? So why do we need it here?
>
> The locks are required to prevent catastrophic concurrent accesses to
> bpf graphs. The verifier will check 1) if there is a spin_lock in the
> same struct with a list head or rbtree root, and 2) the lock is held
> when accessing the list or rbtree.
>
> Since we have the safety guarantee provided by the verifier, I think
> there is an opportunity to allow qdisc users to set TCQ_F_NOLOCK. I will
> check if qdisc kfuncs are TCQ_F_NOLOCK safe though. Let me know if I
> missed anything.

Ah, so these locking constraints come from the verifier....
In this case, yes, it would be nice to have special treatment for bpf
qdisc (or maybe allow passing TCQ_F_NOLOCK somehow). If the verifier
enforces the locking for the underlying data structures, we should try
to remove the ones from the generic qdisc layer.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ