[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAJ8uoz3wPOWRBokCMw8oz66jUE6eB8uYkkui+Gba06=2SZi5bA@mail.gmail.com>
Date: Thu, 21 Feb 2019 08:09:43 +0100
From: Magnus Karlsson <magnus.karlsson@...il.com>
To: Ye Xiaolong <xiaolong.ye@...el.com>
Cc: Magnus Karlsson <magnus.karlsson@...el.com>,
Björn Töpel <bjorn.topel@...el.com>,
ast@...nel.org, Daniel Borkmann <daniel@...earbox.net>,
Network Development <netdev@...r.kernel.org>,
Jakub Kicinski <jakub.kicinski@...ronome.com>,
Björn Töpel <bjorn.topel@...il.com>,
"Zhang, Qi Z" <qi.z.zhang@...el.com>,
Jesper Dangaard Brouer <brouer@...hat.com>
Subject: Re: [PATCH bpf-next v5 1/3] libbpf: add support for using AF_XDP sockets
On Thu, Feb 21, 2019 at 7:06 AM Ye Xiaolong <xiaolong.ye@...el.com> wrote:
>
> Hi Magnus
>
> On 02/19, Magnus Karlsson wrote:
> [snip]
> >+static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
> >+{
> >+ bool prog_attached = false;
> >+ __u32 prog_id = 0;
> >+ int err;
> >+
> >+ err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id,
> >+ xsk->config.xdp_flags);
> >+ if (err)
> >+ return err;
> >+
> >+ if (!prog_id) {
> >+ prog_attached = true;
> >+ err = xsk_create_bpf_maps(xsk);
> >+ if (err)
> >+ return err;
> >+
> >+ err = xsk_load_xdp_prog(xsk);
> >+ if (err)
> >+ goto out_maps;
> >+ } else {
> >+ xsk->fd = bpf_prog_get_fd_by_id(prog_id);
>
> I suppose it should be
>
> xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
My bad, that is correct. Will spin a v6.
Thanks: Magnus
> >+ }
> >+
> >+ err = xsk_update_bpf_maps(xsk, true, xsk->fd);
> >+ if (err)
> >+ goto out_load;
> >+
> >+ return 0;
> >+
> >+out_load:
> >+ if (prog_attached)
> >+ close(xsk->prog_fd);
> >+out_maps:
> >+ if (prog_attached)
> >+ xsk_delete_bpf_maps(xsk);
> >+ return err;
> >+}
> >+
> >+int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
> >+ __u32 queue_id, struct xsk_umem *umem,
> >+ struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
> >+ const struct xsk_socket_config *usr_config)
> >+{
> >+ struct sockaddr_xdp sxdp = {};
> >+ struct xdp_mmap_offsets off;
> >+ struct xsk_socket *xsk;
> >+ socklen_t optlen;
> >+ void *map;
> >+ int err;
> >+
> >+ if (!umem || !xsk_ptr || !rx || !tx)
> >+ return -EFAULT;
> >+
> >+ if (umem->refcount) {
> >+ pr_warning("Error: shared umems not supported by libbpf.\n");
> >+ return -EBUSY;
> >+ }
> >+
> >+ xsk = calloc(1, sizeof(*xsk));
> >+ if (!xsk)
> >+ return -ENOMEM;
> >+
> >+ if (umem->refcount++ > 0) {
> >+ xsk->fd = socket(AF_XDP, SOCK_RAW, 0);
> >+ if (xsk->fd < 0) {
> >+ err = -errno;
> >+ goto out_xsk_alloc;
> >+ }
> >+ } else {
> >+ xsk->fd = umem->fd;
> >+ }
> >+
> >+ xsk->outstanding_tx = 0;
> >+ xsk->queue_id = queue_id;
> >+ xsk->umem = umem;
> >+ xsk->ifindex = if_nametoindex(ifname);
> >+ if (!xsk->ifindex) {
> >+ err = -errno;
> >+ goto out_socket;
> >+ }
> >+ strncpy(xsk->ifname, ifname, IFNAMSIZ);
> >+
> >+ xsk_set_xdp_socket_config(&xsk->config, usr_config);
> >+
> >+ if (rx) {
> >+ err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
> >+ &xsk->config.rx_size,
> >+ sizeof(xsk->config.rx_size));
> >+ if (err) {
> >+ err = -errno;
> >+ goto out_socket;
> >+ }
> >+ }
> >+ if (tx) {
> >+ err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
> >+ &xsk->config.tx_size,
> >+ sizeof(xsk->config.tx_size));
> >+ if (err) {
> >+ err = -errno;
> >+ goto out_socket;
> >+ }
> >+ }
> >+
> >+ optlen = sizeof(off);
> >+ err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
> >+ if (err) {
> >+ err = -errno;
> >+ goto out_socket;
> >+ }
> >+
> >+ if (rx) {
> >+ map = xsk_mmap(NULL, off.rx.desc +
> >+ xsk->config.rx_size * sizeof(struct xdp_desc),
> >+ PROT_READ | PROT_WRITE,
> >+ MAP_SHARED | MAP_POPULATE,
> >+ xsk->fd, XDP_PGOFF_RX_RING);
> >+ if (map == MAP_FAILED) {
> >+ err = -errno;
> >+ goto out_socket;
> >+ }
> >+
> >+ rx->mask = xsk->config.rx_size - 1;
> >+ rx->size = xsk->config.rx_size;
> >+ rx->producer = map + off.rx.producer;
> >+ rx->consumer = map + off.rx.consumer;
> >+ rx->ring = map + off.rx.desc;
> >+ }
> >+ xsk->rx = rx;
> >+
> >+ if (tx) {
> >+ map = xsk_mmap(NULL, off.tx.desc +
> >+ xsk->config.tx_size * sizeof(struct xdp_desc),
> >+ PROT_READ | PROT_WRITE,
> >+ MAP_SHARED | MAP_POPULATE,
> >+ xsk->fd, XDP_PGOFF_TX_RING);
> >+ if (map == MAP_FAILED) {
> >+ err = -errno;
> >+ goto out_mmap_rx;
> >+ }
> >+
> >+ tx->mask = xsk->config.tx_size - 1;
> >+ tx->size = xsk->config.tx_size;
> >+ tx->producer = map + off.tx.producer;
> >+ tx->consumer = map + off.tx.consumer;
> >+ tx->ring = map + off.tx.desc;
> >+ tx->cached_cons = xsk->config.tx_size;
> >+ }
> >+ xsk->tx = tx;
> >+
> >+ sxdp.sxdp_family = PF_XDP;
> >+ sxdp.sxdp_ifindex = xsk->ifindex;
> >+ sxdp.sxdp_queue_id = xsk->queue_id;
> >+ sxdp.sxdp_flags = xsk->config.bind_flags;
> >+
> >+ err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
> >+ if (err) {
> >+ err = -errno;
> >+ goto out_mmap_tx;
> >+ }
> >+
> >+ if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
> >+ err = xsk_setup_xdp_prog(xsk);
> >+ if (err)
> >+ goto out_mmap_tx;
> >+ }
> >+
> >+ *xsk_ptr = xsk;
> >+ return 0;
> >+
> >+out_mmap_tx:
> >+ if (tx)
> >+ munmap(xsk->tx,
> >+ off.tx.desc +
> >+ xsk->config.tx_size * sizeof(struct xdp_desc));
> >+out_mmap_rx:
> >+ if (rx)
> >+ munmap(xsk->rx,
> >+ off.rx.desc +
> >+ xsk->config.rx_size * sizeof(struct xdp_desc));
> >+out_socket:
> >+ if (--umem->refcount)
> >+ close(xsk->fd);
> >+out_xsk_alloc:
> >+ free(xsk);
> >+ return err;
> >+}
> >+
> >+int xsk_umem__delete(struct xsk_umem *umem)
> >+{
> >+ struct xdp_mmap_offsets off;
> >+ socklen_t optlen;
> >+ int err;
> >+
> >+ if (!umem)
> >+ return 0;
> >+
> >+ if (umem->refcount)
> >+ return -EBUSY;
> >+
> >+ optlen = sizeof(off);
> >+ err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
> >+ if (!err) {
> >+ munmap(umem->fill->ring,
> >+ off.fr.desc + umem->config.fill_size * sizeof(__u64));
> >+ munmap(umem->comp->ring,
> >+ off.cr.desc + umem->config.comp_size * sizeof(__u64));
> >+ }
> >+
> >+ close(umem->fd);
> >+ free(umem);
> >+
> >+ return 0;
> >+}
> >+
> >+void xsk_socket__delete(struct xsk_socket *xsk)
> >+{
> >+ struct xdp_mmap_offsets off;
> >+ socklen_t optlen;
> >+ int err;
> >+
> >+ if (!xsk)
> >+ return;
> >+
> >+ (void)xsk_update_bpf_maps(xsk, 0, 0);
> >+
> >+ optlen = sizeof(off);
> >+ err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
> >+ if (!err) {
> >+ if (xsk->rx)
> >+ munmap(xsk->rx->ring,
> >+ off.rx.desc +
> >+ xsk->config.rx_size * sizeof(struct xdp_desc));
> >+ if (xsk->tx)
> >+ munmap(xsk->tx->ring,
> >+ off.tx.desc +
> >+ xsk->config.tx_size * sizeof(struct xdp_desc));
> >+ }
> >+
> >+ xsk->umem->refcount--;
> >+ /* Do not close an fd that also has an associated umem connected
> >+ * to it.
> >+ */
> >+ if (xsk->fd != xsk->umem->fd)
> >+ close(xsk->fd);
> >+ free(xsk);
> >+}
> >diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
> >new file mode 100644
> >index 0000000..a497f00
> >--- /dev/null
> >+++ b/tools/lib/bpf/xsk.h
> >@@ -0,0 +1,203 @@
> >+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
> >+
> >+/*
> >+ * AF_XDP user-space access library.
> >+ *
> >+ * Copyright(c) 2018 - 2019 Intel Corporation.
> >+ *
> >+ * Author(s): Magnus Karlsson <magnus.karlsson@...el.com>
> >+ */
> >+
> >+#ifndef __LIBBPF_XSK_H
> >+#define __LIBBPF_XSK_H
> >+
> >+#include <stdio.h>
> >+#include <stdint.h>
> >+#include <linux/if_xdp.h>
> >+
> >+#include "libbpf.h"
> >+
> >+#ifdef __cplusplus
> >+extern "C" {
> >+#endif
> >+
> >+/* Do not access these members directly. Use the functions below. */
> >+#define DEFINE_XSK_RING(name) \
> >+struct name { \
> >+ __u32 cached_prod; \
> >+ __u32 cached_cons; \
> >+ __u32 mask; \
> >+ __u32 size; \
> >+ __u32 *producer; \
> >+ __u32 *consumer; \
> >+ void *ring; \
> >+}
> >+
> >+DEFINE_XSK_RING(xsk_ring_prod);
> >+DEFINE_XSK_RING(xsk_ring_cons);
> >+
> >+struct xsk_umem;
> >+struct xsk_socket;
> >+
> >+static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill,
> >+ __u32 idx)
> >+{
> >+ __u64 *addrs = (__u64 *)fill->ring;
> >+
> >+ return &addrs[idx & fill->mask];
> >+}
> >+
> >+static inline const __u64 *
> >+xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx)
> >+{
> >+ const __u64 *addrs = (const __u64 *)comp->ring;
> >+
> >+ return &addrs[idx & comp->mask];
> >+}
> >+
> >+static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx,
> >+ __u32 idx)
> >+{
> >+ struct xdp_desc *descs = (struct xdp_desc *)tx->ring;
> >+
> >+ return &descs[idx & tx->mask];
> >+}
> >+
> >+static inline const struct xdp_desc *
> >+xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx)
> >+{
> >+ const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring;
> >+
> >+ return &descs[idx & rx->mask];
> >+}
> >+
> >+static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
> >+{
> >+ __u32 free_entries = r->cached_cons - r->cached_prod;
> >+
> >+ if (free_entries >= nb)
> >+ return free_entries;
> >+
> >+ /* Refresh the local tail pointer.
> >+ * cached_cons is r->size bigger than the real consumer pointer so
> >+ * that this addition can be avoided in the more frequently
> >+ * executed code that computs free_entries in the beginning of
> >+ * this function. Without this optimization it whould have been
> >+ * free_entries = r->cached_prod - r->cached_cons + r->size.
> >+ */
> >+ r->cached_cons = *r->consumer + r->size;
> >+
> >+ return r->cached_cons - r->cached_prod;
> >+}
> >+
> >+static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
> >+{
> >+ __u32 entries = r->cached_prod - r->cached_cons;
> >+
> >+ if (entries == 0) {
> >+ r->cached_prod = *r->producer;
> >+ entries = r->cached_prod - r->cached_cons;
> >+ }
> >+
> >+ return (entries > nb) ? nb : entries;
> >+}
> >+
> >+static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod,
> >+ size_t nb, __u32 *idx)
> >+{
> >+ if (unlikely(xsk_prod_nb_free(prod, nb) < nb))
> >+ return 0;
> >+
> >+ *idx = prod->cached_prod;
> >+ prod->cached_prod += nb;
> >+
> >+ return nb;
> >+}
> >+
> >+static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb)
> >+{
> >+ /* Make sure everything has been written to the ring before signalling
> >+ * this to the kernel.
> >+ */
> >+ smp_wmb();
> >+
> >+ *prod->producer += nb;
> >+}
> >+
> >+static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons,
> >+ size_t nb, __u32 *idx)
> >+{
> >+ size_t entries = xsk_cons_nb_avail(cons, nb);
> >+
> >+ if (likely(entries > 0)) {
> >+ /* Make sure we do not speculatively read the data before
> >+ * we have received the packet buffers from the ring.
> >+ */
> >+ smp_rmb();
> >+
> >+ *idx = cons->cached_cons;
> >+ cons->cached_cons += entries;
> >+ }
> >+
> >+ return entries;
> >+}
> >+
> >+static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb)
> >+{
> >+ *cons->consumer += nb;
> >+}
> >+
> >+static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
> >+{
> >+ return &((char *)umem_area)[addr];
> >+}
> >+
> >+LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem);
> >+LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk);
> >+
> >+#define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048
> >+#define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048
> >+#define XSK_UMEM__DEFAULT_FRAME_SHIFT 11 /* 2048 bytes */
> >+#define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT)
> >+#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0
> >+
> >+struct xsk_umem_config {
> >+ __u32 fill_size;
> >+ __u32 comp_size;
> >+ __u32 frame_size;
> >+ __u32 frame_headroom;
> >+};
> >+
> >+/* Flags for the libbpf_flags field. */
> >+#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0)
> >+
> >+struct xsk_socket_config {
> >+ __u32 rx_size;
> >+ __u32 tx_size;
> >+ __u32 libbpf_flags;
> >+ __u32 xdp_flags;
> >+ __u16 bind_flags;
> >+};
> >+
> >+/* Set config to NULL to get the default configuration. */
> >+LIBBPF_API int xsk_umem__create(struct xsk_umem **umem,
> >+ void *umem_area, __u64 size,
> >+ struct xsk_ring_prod *fill,
> >+ struct xsk_ring_cons *comp,
> >+ const struct xsk_umem_config *config);
> >+LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk,
> >+ const char *ifname, __u32 queue_id,
> >+ struct xsk_umem *umem,
> >+ struct xsk_ring_cons *rx,
> >+ struct xsk_ring_prod *tx,
> >+ const struct xsk_socket_config *config);
> >+
> >+/* Returns 0 for success and -EBUSY if the umem is still in use. */
> >+LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem);
> >+LIBBPF_API void xsk_socket__delete(struct xsk_socket *xsk);
> >+
> >+#ifdef __cplusplus
> >+} /* extern "C" */
> >+#endif
> >+
> >+#endif /* __LIBBPF_XSK_H */
> >--
> >2.7.4
> >
Powered by blists - more mailing lists