lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAJ8uoz3MNrz_f7dy6+U=zj7GeywUda9E9pP2s9uU1jVW5O2zHw@mail.gmail.com>
Date:   Mon, 27 May 2019 08:58:52 +0200
From:   Magnus Karlsson <magnus.karlsson@...il.com>
To:     Ye Xiaolong <xiaolong.ye@...el.com>
Cc:     Magnus Karlsson <magnus.karlsson@...el.com>,
        Björn Töpel <bjorn.topel@...el.com>,
        Alexei Starovoitov <ast@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>,
        Network Development <netdev@...r.kernel.org>,
        Jesper Dangaard Brouer <brouer@...hat.com>,
        bpf@...r.kernel.org, bruce.richardson@...el.com,
        ciara.loftus@...el.com,
        Jakub Kicinski <jakub.kicinski@...ronome.com>,
        "Zhang, Qi Z" <qi.z.zhang@...el.com>,
        Maxim Mikityanskiy <maximmi@...lanox.com>,
        "Samudrala, Sridhar" <sridhar.samudrala@...el.com>,
        kevin.laatz@...el.com
Subject: Re: [RFC bpf-next 7/7] samples/bpf: add busy-poll support to xdpsock sample

On Fri, May 24, 2019 at 4:59 AM Ye Xiaolong <xiaolong.ye@...el.com> wrote:
>
> Hi, Magnus
>
> On 05/02, Magnus Karlsson wrote:
> >This patch adds busy-poll support to the xdpsock sample
> >application. It is enabled by the "-b" or the "--busy-poll" command
> >line options.
> >
> >Signed-off-by: Magnus Karlsson <magnus.karlsson@...el.com>
> >---
> > samples/bpf/xdpsock_user.c | 203 ++++++++++++++++++++++++++++-----------------
> > 1 file changed, 125 insertions(+), 78 deletions(-)
> >
> >diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
> >index d08ee1a..1272edf 100644
> >--- a/samples/bpf/xdpsock_user.c
> >+++ b/samples/bpf/xdpsock_user.c
> >@@ -66,6 +66,7 @@ static const char *opt_if = "";
> > static int opt_ifindex;
> > static int opt_queue;
> > static int opt_poll;
> >+static int opt_busy_poll;
> > static int opt_interval = 1;
> > static u32 opt_xdp_bind_flags;
> > static __u32 prog_id;
> >@@ -119,8 +120,11 @@ static void print_benchmark(bool running)
> >       else
> >               printf("        ");
> >
> >-      if (opt_poll)
> >+      if (opt_poll) {
> >+              if (opt_busy_poll)
> >+                      printf("busy-");
> >               printf("poll() ");
> >+      }
> >
> >       if (running) {
> >               printf("running...");
> >@@ -306,7 +310,7 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem)
> >       xsk->umem = umem;
> >       cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
> >       cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
> >-      cfg.libbpf_flags = 0;
>
> Any purpose for removing this line, as cfg here is a local variable, cfg.libbpf_flags
> can be random and may lead to xdpsock failure as `Invalid no_argument`.

No, that was a mistake. Thanks for catching.

I will produce a new patch set with the improvements needed to make
execution of application and softirq/ksoftirqd on the same core
efficient, make a v2 of the busy poll patch set (with all your
suggested improvements) on top of that, and then see what performance
we have.

Thanks: Magnus

> Thanks,
> Xiaolong
>
> >+      cfg.busy_poll = (opt_busy_poll ? BATCH_SIZE : 0);
> >       cfg.xdp_flags = opt_xdp_flags;
> >       cfg.bind_flags = opt_xdp_bind_flags;
> >       ret = xsk_socket__create(&xsk->xsk, opt_if, opt_queue, umem->umem,
> >@@ -319,17 +323,17 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem)
> >               exit_with_error(-ret);
> >
> >       ret = xsk_ring_prod__reserve(&xsk->umem->fq,
> >-                                   XSK_RING_PROD__DEFAULT_NUM_DESCS,
> >+                                   1024,
> >                                    &idx);
> >-      if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
> >+      if (ret != 1024)
> >               exit_with_error(-ret);
> >       for (i = 0;
> >-           i < XSK_RING_PROD__DEFAULT_NUM_DESCS *
> >+           i < 1024 *
> >                    XSK_UMEM__DEFAULT_FRAME_SIZE;
> >            i += XSK_UMEM__DEFAULT_FRAME_SIZE)
> >               *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx++) = i;
> >       xsk_ring_prod__submit(&xsk->umem->fq,
> >-                            XSK_RING_PROD__DEFAULT_NUM_DESCS);
> >+                            1024);
> >
> >       return xsk;
> > }
> >@@ -341,6 +345,7 @@ static struct option long_options[] = {
> >       {"interface", required_argument, 0, 'i'},
> >       {"queue", required_argument, 0, 'q'},
> >       {"poll", no_argument, 0, 'p'},
> >+      {"busy-poll", no_argument, 0, 'b'},
> >       {"xdp-skb", no_argument, 0, 'S'},
> >       {"xdp-native", no_argument, 0, 'N'},
> >       {"interval", required_argument, 0, 'n'},
> >@@ -360,6 +365,7 @@ static void usage(const char *prog)
> >               "  -i, --interface=n    Run on interface n\n"
> >               "  -q, --queue=n        Use queue n (default 0)\n"
> >               "  -p, --poll           Use poll syscall\n"
> >+              "  -b, --busy-poll      Use poll syscall with busy poll\n"
> >               "  -S, --xdp-skb=n      Use XDP skb-mod\n"
> >               "  -N, --xdp-native=n   Enfore XDP native mode\n"
> >               "  -n, --interval=n     Specify statistics update interval (default 1 sec).\n"
> >@@ -377,7 +383,7 @@ static void parse_command_line(int argc, char **argv)
> >       opterr = 0;
> >
> >       for (;;) {
> >-              c = getopt_long(argc, argv, "Frtli:q:psSNn:cz", long_options,
> >+              c = getopt_long(argc, argv, "Frtli:q:pbsSNn:cz", long_options,
> >                               &option_index);
> >               if (c == -1)
> >                       break;
> >@@ -401,6 +407,10 @@ static void parse_command_line(int argc, char **argv)
> >               case 'p':
> >                       opt_poll = 1;
> >                       break;
> >+              case 'b':
> >+                      opt_busy_poll = 1;
> >+                      opt_poll = 1;
> >+                      break;
> >               case 'S':
> >                       opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
> >                       opt_xdp_bind_flags |= XDP_COPY;
> >@@ -444,7 +454,8 @@ static void kick_tx(struct xsk_socket_info *xsk)
> >       exit_with_error(errno);
> > }
> >
> >-static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk)
> >+static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
> >+                                   struct pollfd *fds)
> > {
> >       u32 idx_cq = 0, idx_fq = 0;
> >       unsigned int rcvd;
> >@@ -453,7 +464,8 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk)
> >       if (!xsk->outstanding_tx)
> >               return;
> >
> >-      kick_tx(xsk);
> >+      if (!opt_poll)
> >+              kick_tx(xsk);
> >       ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE :
> >               xsk->outstanding_tx;
> >
> >@@ -467,6 +479,8 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk)
> >               while (ret != rcvd) {
> >                       if (ret < 0)
> >                               exit_with_error(-ret);
> >+                      if (opt_busy_poll)
> >+                              ret = poll(fds, num_socks, 0);
> >                       ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd,
> >                                                    &idx_fq);
> >               }
> >@@ -490,7 +504,8 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk)
> >       if (!xsk->outstanding_tx)
> >               return;
> >
> >-      kick_tx(xsk);
> >+      if (!opt_busy_poll)
> >+              kick_tx(xsk);
> >
> >       rcvd = xsk_ring_cons__peek(&xsk->umem->cq, BATCH_SIZE, &idx);
> >       if (rcvd > 0) {
> >@@ -500,10 +515,10 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk)
> >       }
> > }
> >
> >-static void rx_drop(struct xsk_socket_info *xsk)
> >+static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds)
> > {
> >-      unsigned int rcvd, i;
> >       u32 idx_rx = 0, idx_fq = 0;
> >+      unsigned int rcvd, i;
> >       int ret;
> >
> >       rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
> >@@ -514,6 +529,8 @@ static void rx_drop(struct xsk_socket_info *xsk)
> >       while (ret != rcvd) {
> >               if (ret < 0)
> >                       exit_with_error(-ret);
> >+              if (opt_busy_poll)
> >+                      ret = poll(fds, num_socks, 0);
> >               ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
> >       }
> >
> >@@ -533,43 +550,68 @@ static void rx_drop(struct xsk_socket_info *xsk)
> >
> > static void rx_drop_all(void)
> > {
> >-      struct pollfd fds[MAX_SOCKS + 1];
> >-      int i, ret, timeout, nfds = 1;
> >+      struct pollfd fds[MAX_SOCKS];
> >+      int i, ret;
> >
> >       memset(fds, 0, sizeof(fds));
> >
> >       for (i = 0; i < num_socks; i++) {
> >               fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
> >               fds[i].events = POLLIN;
> >-              timeout = 1000; /* 1sn */
> >       }
> >
> >       for (;;) {
> >               if (opt_poll) {
> >-                      ret = poll(fds, nfds, timeout);
> >+                      ret = poll(fds, num_socks, 0);
> >                       if (ret <= 0)
> >                               continue;
> >               }
> >
> >               for (i = 0; i < num_socks; i++)
> >-                      rx_drop(xsks[i]);
> >+                      rx_drop(xsks[i], fds);
> >+      }
> >+}
> >+
> >+static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb)
> >+{
> >+      u32 idx;
> >+
> >+      if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) ==
> >+          BATCH_SIZE) {
> >+              unsigned int i;
> >+
> >+              for (i = 0; i < BATCH_SIZE; i++) {
> >+                      xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr
> >+                              = (frame_nb + i) <<
> >+                              XSK_UMEM__DEFAULT_FRAME_SHIFT;
> >+                      xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len =
> >+                              sizeof(pkt_data) - 1;
> >+              }
> >+
> >+              xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE);
> >+              xsk->outstanding_tx += BATCH_SIZE;
> >+              frame_nb += BATCH_SIZE;
> >+              frame_nb %= NUM_FRAMES;
> >       }
> >+
> >+      complete_tx_only(xsk);
> > }
> >
> >-static void tx_only(struct xsk_socket_info *xsk)
> >+static void tx_only_all(void)
> > {
> >-      int timeout, ret, nfds = 1;
> >-      struct pollfd fds[nfds + 1];
> >-      u32 idx, frame_nb = 0;
> >+      struct pollfd fds[MAX_SOCKS];
> >+      u32 frame_nb[MAX_SOCKS] = {};
> >+      int i, ret;
> >
> >       memset(fds, 0, sizeof(fds));
> >-      fds[0].fd = xsk_socket__fd(xsk->xsk);
> >-      fds[0].events = POLLOUT;
> >-      timeout = 1000; /* 1sn */
> >+      for (i = 0; i < num_socks; i++) {
> >+              fds[0].fd = xsk_socket__fd(xsks[i]->xsk);
> >+              fds[0].events = POLLOUT;
> >+      }
> >
> >       for (;;) {
> >               if (opt_poll) {
> >-                      ret = poll(fds, nfds, timeout);
> >+                      ret = poll(fds, num_socks, 0);
> >                       if (ret <= 0)
> >                               continue;
> >
> >@@ -577,70 +619,75 @@ static void tx_only(struct xsk_socket_info *xsk)
> >                               continue;
> >               }
> >
> >-              if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) ==
> >-                  BATCH_SIZE) {
> >-                      unsigned int i;
> >-
> >-                      for (i = 0; i < BATCH_SIZE; i++) {
> >-                              xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr
> >-                                      = (frame_nb + i) <<
> >-                                      XSK_UMEM__DEFAULT_FRAME_SHIFT;
> >-                              xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len =
> >-                                      sizeof(pkt_data) - 1;
> >-                      }
> >-
> >-                      xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE);
> >-                      xsk->outstanding_tx += BATCH_SIZE;
> >-                      frame_nb += BATCH_SIZE;
> >-                      frame_nb %= NUM_FRAMES;
> >-              }
> >-
> >-              complete_tx_only(xsk);
> >+              for (i = 0; i < num_socks; i++)
> >+                      tx_only(xsks[i], frame_nb[i]);
> >       }
> > }
> >
> >-static void l2fwd(struct xsk_socket_info *xsk)
> >+static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
> > {
> >-      for (;;) {
> >-              unsigned int rcvd, i;
> >-              u32 idx_rx = 0, idx_tx = 0;
> >-              int ret;
> >+      unsigned int rcvd, i;
> >+      u32 idx_rx = 0, idx_tx = 0;
> >+      int ret;
> >
> >-              for (;;) {
> >-                      complete_tx_l2fwd(xsk);
> >+      complete_tx_l2fwd(xsk, fds);
> >
> >-                      rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE,
> >-                                                 &idx_rx);
> >-                      if (rcvd > 0)
> >-                              break;
> >-              }
> >+      rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE,
> >+                                 &idx_rx);
> >+      if (!rcvd)
> >+              return;
> >
> >+      ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
> >+      while (ret != rcvd) {
> >+              if (ret < 0)
> >+                      exit_with_error(-ret);
> >+              if (opt_busy_poll)
> >+                      ret = poll(fds, num_socks, 0);
> >               ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
> >-              while (ret != rcvd) {
> >-                      if (ret < 0)
> >-                              exit_with_error(-ret);
> >-                      ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
> >-              }
> >+      }
> >+
> >+      for (i = 0; i < rcvd; i++) {
> >+              u64 addr = xsk_ring_cons__rx_desc(&xsk->rx,
> >+                                                idx_rx)->addr;
> >+              u32 len = xsk_ring_cons__rx_desc(&xsk->rx,
> >+                                               idx_rx++)->len;
> >+              char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
> >
> >-              for (i = 0; i < rcvd; i++) {
> >-                      u64 addr = xsk_ring_cons__rx_desc(&xsk->rx,
> >-                                                        idx_rx)->addr;
> >-                      u32 len = xsk_ring_cons__rx_desc(&xsk->rx,
> >-                                                       idx_rx++)->len;
> >-                      char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
> >+              swap_mac_addresses(pkt);
> >
> >-                      swap_mac_addresses(pkt);
> >+              hex_dump(pkt, len, addr);
> >+              xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = addr;
> >+              xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len;
> >+      }
> >
> >-                      hex_dump(pkt, len, addr);
> >-                      xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = addr;
> >-                      xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len;
> >-              }
> >+      xsk_ring_prod__submit(&xsk->tx, rcvd);
> >+      xsk_ring_cons__release(&xsk->rx, rcvd);
> >
> >-              xsk_ring_prod__submit(&xsk->tx, rcvd);
> >-              xsk_ring_cons__release(&xsk->rx, rcvd);
> >+      xsk->rx_npkts += rcvd;
> >+      xsk->outstanding_tx += rcvd;
> >+}
> >
> >-              xsk->rx_npkts += rcvd;
> >-              xsk->outstanding_tx += rcvd;
> >+static void l2fwd_all(void)
> >+{
> >+      struct pollfd fds[MAX_SOCKS];
> >+      int i, ret;
> >+
> >+      memset(fds, 0, sizeof(fds));
> >+
> >+      for (i = 0; i < num_socks; i++) {
> >+              fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
> >+              fds[i].events = POLLOUT | POLLIN;
> >+      }
> >+
> >+      for (;;) {
> >+              if (opt_poll) {
> >+                      ret = poll(fds, num_socks, 0);
> >+                      if (ret <= 0)
> >+                              continue;
> >+              }
> >+
> >+              for (i = 0; i < num_socks; i++)
> >+                      l2fwd(xsks[i], fds);
> >       }
> > }
> >
> >@@ -693,9 +740,9 @@ int main(int argc, char **argv)
> >       if (opt_bench == BENCH_RXDROP)
> >               rx_drop_all();
> >       else if (opt_bench == BENCH_TXONLY)
> >-              tx_only(xsks[0]);
> >+              tx_only_all();
> >       else
> >-              l2fwd(xsks[0]);
> >+              l2fwd_all();
> >
> >       return 0;
> > }
> >--
> >2.7.4
> >

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ