lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAMB2axOmZbZgqZdWjdAL0__uHgZCtK8G0ABoKCizk16NkQdpTQ@mail.gmail.com>
Date: Tue, 21 May 2024 08:03:04 -0700
From: Amery Hung <ameryhung@...il.com>
To: Stanislav Fomichev <sdf@...gle.com>
Cc: netdev@...r.kernel.org, bpf@...r.kernel.org, yangpeihao@...u.edu.cn, 
	daniel@...earbox.net, andrii@...nel.org, martin.lau@...nel.org, 
	sinquersw@...il.com, toke@...hat.com, jhs@...atatu.com, jiri@...nulli.us, 
	xiyou.wangcong@...il.com, yepeilin.cs@...il.com
Subject: Re: [RFC PATCH v8 17/20] selftests: Add a basic fifo qdisc test

On Mon, May 20, 2024 at 8:15 PM Stanislav Fomichev <sdf@...gle.com> wrote:
>
> On 05/10, Amery Hung wrote:
> > This selftest shows a bare minimum fifo qdisc, which simply enqueues skbs
> > into the back of a bpf list and dequeues from the front of the list.
> >
> > Signed-off-by: Amery Hung <amery.hung@...edance.com>
> > ---
> >  .../selftests/bpf/prog_tests/bpf_qdisc.c      | 161 ++++++++++++++++++
> >  .../selftests/bpf/progs/bpf_qdisc_common.h    |  23 +++
> >  .../selftests/bpf/progs/bpf_qdisc_fifo.c      |  83 +++++++++
> >  3 files changed, 267 insertions(+)
> >  create mode 100644 tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
> >  create mode 100644 tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
> >
> > diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
> > new file mode 100644
> > index 000000000000..295d0216e70f
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
> > @@ -0,0 +1,161 @@
> > +#include <linux/pkt_sched.h>
> > +#include <linux/rtnetlink.h>
> > +#include <test_progs.h>
> > +
> > +#include "network_helpers.h"
> > +#include "bpf_qdisc_fifo.skel.h"
> > +
> > +#ifndef ENOTSUPP
> > +#define ENOTSUPP 524
> > +#endif
> > +
> > +#define LO_IFINDEX 1
> > +
> > +static const unsigned int total_bytes = 10 * 1024 * 1024;
> > +static int stop;
> > +
> > +static void *server(void *arg)
> > +{
> > +     int lfd = (int)(long)arg, err = 0, fd;
> > +     ssize_t nr_sent = 0, bytes = 0;
> > +     char batch[1500];
> > +
> > +     fd = accept(lfd, NULL, NULL);
> > +     while (fd == -1) {
> > +             if (errno == EINTR)
> > +                     continue;
> > +             err = -errno;
> > +             goto done;
> > +     }
> > +
> > +     if (settimeo(fd, 0)) {
> > +             err = -errno;
> > +             goto done;
> > +     }
> > +
> > +     while (bytes < total_bytes && !READ_ONCE(stop)) {
> > +             nr_sent = send(fd, &batch,
> > +                            MIN(total_bytes - bytes, sizeof(batch)), 0);
> > +             if (nr_sent == -1 && errno == EINTR)
> > +                     continue;
> > +             if (nr_sent == -1) {
> > +                     err = -errno;
> > +                     break;
> > +             }
> > +             bytes += nr_sent;
> > +     }
> > +
> > +     ASSERT_EQ(bytes, total_bytes, "send");
> > +
> > +done:
> > +     if (fd >= 0)
> > +             close(fd);
> > +     if (err) {
> > +             WRITE_ONCE(stop, 1);
> > +             return ERR_PTR(err);
> > +     }
> > +     return NULL;
> > +}
> > +
> > +static void do_test(char *qdisc)
> > +{
> > +     DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
> > +                         .attach_point = BPF_TC_QDISC,
> > +                         .parent = TC_H_ROOT,
> > +                         .handle = 0x8000000,
> > +                         .qdisc = qdisc);
> > +     struct sockaddr_in6 sa6 = {};
> > +     ssize_t nr_recv = 0, bytes = 0;
> > +     int lfd = -1, fd = -1;
> > +     pthread_t srv_thread;
> > +     socklen_t addrlen = sizeof(sa6);
> > +     void *thread_ret;
> > +     char batch[1500];
> > +     int err;
> > +
> > +     WRITE_ONCE(stop, 0);
> > +
> > +     err = bpf_tc_hook_create(&hook);
> > +     if (!ASSERT_OK(err, "attach qdisc"))
> > +             return;
> > +
> > +     lfd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
> > +     if (!ASSERT_NEQ(lfd, -1, "socket")) {
> > +             bpf_tc_hook_destroy(&hook);
> > +             return;
> > +     }
> > +
> > +     fd = socket(AF_INET6, SOCK_STREAM, 0);
> > +     if (!ASSERT_NEQ(fd, -1, "socket")) {
> > +             bpf_tc_hook_destroy(&hook);
> > +             close(lfd);
> > +             return;
> > +     }
> > +
> > +     if (settimeo(lfd, 0) || settimeo(fd, 0))
> > +             goto done;
> > +
> > +     err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen);
> > +     if (!ASSERT_NEQ(err, -1, "getsockname"))
> > +             goto done;
> > +
> > +     /* connect to server */
> > +     err = connect(fd, (struct sockaddr *)&sa6, addrlen);
> > +     if (!ASSERT_NEQ(err, -1, "connect"))
> > +             goto done;
> > +
> > +     err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd);
> > +     if (!ASSERT_OK(err, "pthread_create"))
> > +             goto done;
> > +
> > +     /* recv total_bytes */
> > +     while (bytes < total_bytes && !READ_ONCE(stop)) {
> > +             nr_recv = recv(fd, &batch,
> > +                            MIN(total_bytes - bytes, sizeof(batch)), 0);
> > +             if (nr_recv == -1 && errno == EINTR)
> > +                     continue;
> > +             if (nr_recv == -1)
> > +                     break;
> > +             bytes += nr_recv;
> > +     }
> > +
> > +     ASSERT_EQ(bytes, total_bytes, "recv");
> > +
> > +     WRITE_ONCE(stop, 1);
> > +     pthread_join(srv_thread, &thread_ret);
> > +     ASSERT_OK(IS_ERR(thread_ret), "thread_ret");
> > +
> > +done:
> > +     close(lfd);
> > +     close(fd);
> > +
> > +     bpf_tc_hook_destroy(&hook);
> > +     return;
> > +}
> > +
> > +static void test_fifo(void)
> > +{
> > +     struct bpf_qdisc_fifo *fifo_skel;
> > +     struct bpf_link *link;
> > +
> > +     fifo_skel = bpf_qdisc_fifo__open_and_load();
> > +     if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
> > +             return;
> > +
> > +     link = bpf_map__attach_struct_ops(fifo_skel->maps.fifo);
> > +     if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
> > +             bpf_qdisc_fifo__destroy(fifo_skel);
> > +             return;
> > +     }
> > +
> > +     do_test("bpf_fifo");
> > +
> > +     bpf_link__destroy(link);
> > +     bpf_qdisc_fifo__destroy(fifo_skel);
> > +}
> > +
> > +void test_bpf_qdisc(void)
> > +{
> > +     if (test__start_subtest("fifo"))
> > +             test_fifo();
> > +}
> > diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
> > new file mode 100644
> > index 000000000000..96ab357de28e
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
> > @@ -0,0 +1,23 @@
> > +#ifndef _BPF_QDISC_COMMON_H
> > +#define _BPF_QDISC_COMMON_H
> > +
> > +#define NET_XMIT_SUCCESS        0x00
> > +#define NET_XMIT_DROP           0x01    /* skb dropped                  */
> > +#define NET_XMIT_CN             0x02    /* congestion notification      */
> > +
> > +#define TC_PRIO_CONTROL  7
> > +#define TC_PRIO_MAX      15
> > +
> > +void bpf_skb_set_dev(struct sk_buff *skb, struct Qdisc *sch) __ksym;
> > +u32 bpf_skb_get_hash(struct sk_buff *p) __ksym;
> > +void bpf_skb_release(struct sk_buff *p) __ksym;
> > +void bpf_qdisc_skb_drop(struct sk_buff *p, struct bpf_sk_buff_ptr *to_free) __ksym;
> > +void bpf_qdisc_watchdog_schedule(struct Qdisc *sch, u64 expire, u64 delta_ns) __ksym;
> > +bool bpf_qdisc_find_class(struct Qdisc *sch, u32 classid) __ksym;
> > +int bpf_qdisc_create_child(struct Qdisc *sch, u32 min,
> > +                        struct netlink_ext_ack *extack) __ksym;
> > +int bpf_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, u32 classid,
> > +                   struct bpf_sk_buff_ptr *to_free_list) __ksym;
> > +struct sk_buff *bpf_qdisc_dequeue(struct Qdisc *sch, u32 classid) __ksym;
> > +
> > +#endif
> > diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
> > new file mode 100644
> > index 000000000000..433fd9c3639c
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
> > @@ -0,0 +1,83 @@
> > +#include <vmlinux.h>
> > +#include "bpf_experimental.h"
> > +#include "bpf_qdisc_common.h"
> > +
> > +char _license[] SEC("license") = "GPL";
> > +
> > +#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
> > +
> > +private(B) struct bpf_spin_lock q_fifo_lock;
> > +private(B) struct bpf_list_head q_fifo __contains_kptr(sk_buff, bpf_list);
> > +
> > +unsigned int q_limit = 1000;
> > +unsigned int q_qlen = 0;
> > +
> > +SEC("struct_ops/bpf_fifo_enqueue")
> > +int BPF_PROG(bpf_fifo_enqueue, struct sk_buff *skb, struct Qdisc *sch,
> > +          struct bpf_sk_buff_ptr *to_free)
> > +{
> > +     q_qlen++;
> > +     if (q_qlen > q_limit) {
> > +             bpf_qdisc_skb_drop(skb, to_free);
> > +             return NET_XMIT_DROP;
> > +     }
>
> [..]
>
> > +     bpf_spin_lock(&q_fifo_lock);
> > +     bpf_list_excl_push_back(&q_fifo, &skb->bpf_list);
> > +     bpf_spin_unlock(&q_fifo_lock);
>
> Can you also expand a bit on the locking here and elsewhere? And how it
> interplays with TCQ_F_NOLOCK?
>
> As I mentioned at lsfmmbpf, I don't think there is a lot of similar
> locking in the existing C implementations? So why do we need it here?

The locks are required to prevent catastrophic concurrent accesses to
bpf graphs. The verifier will check 1) if there is a spin_lock in the
same struct with a list head or rbtree root, and 2) the lock is held
when accessing the list or rbtree.

Since we have the safety guarantee provided by the verifier, I think
there is an opportunity to allow qdisc users to set TCQ_F_NOLOCK. I will
check if qdisc kfuncs are TCQ_F_NOLOCK safe though. Let me know if I
missed anything.

Thanks,
Amery

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ