[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20210301104318.263262-3-bjorn.topel@gmail.com>
Date: Mon, 1 Mar 2021 11:43:18 +0100
From: Björn Töpel <bjorn.topel@...il.com>
To: ast@...nel.org, daniel@...earbox.net, netdev@...r.kernel.org,
bpf@...r.kernel.org
Cc: Björn Töpel <bjorn.topel@...el.com>,
magnus.karlsson@...el.com, jonathan.lemon@...il.com,
maximmi@...dia.com, andrii@...nel.org
Subject: [PATCH bpf-next 2/2] libbpf, xsk: add libbpf_smp_store_release libbpf_smp_load_acquire
From: Björn Töpel <bjorn.topel@...el.com>
Now that the AF_XDP rings have load-acquire/store-release semantics,
move libbpf to that as well.
The library-internal libbpf_smp_{load_acquire,store_release} are only
valid for 32-bit words on ARM64.
Also, remove the barriers that are no longer in use.
Signed-off-by: Björn Töpel <bjorn.topel@...el.com>
---
tools/lib/bpf/libbpf_util.h | 72 +++++++++++++++++++++++++------------
tools/lib/bpf/xsk.h | 17 +++------
2 files changed, 55 insertions(+), 34 deletions(-)
diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h
index 59c779c5790c..94a0d7bb6f3c 100644
--- a/tools/lib/bpf/libbpf_util.h
+++ b/tools/lib/bpf/libbpf_util.h
@@ -5,6 +5,7 @@
#define __LIBBPF_LIBBPF_UTIL_H
#include <stdbool.h>
+#include <linux/compiler.h>
#ifdef __cplusplus
extern "C" {
@@ -15,29 +16,56 @@ extern "C" {
* application that uses libbpf.
*/
#if defined(__i386__) || defined(__x86_64__)
-# define libbpf_smp_rmb() asm volatile("" : : : "memory")
-# define libbpf_smp_wmb() asm volatile("" : : : "memory")
-# define libbpf_smp_mb() \
- asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc")
-/* Hinders stores to be observed before older loads. */
-# define libbpf_smp_rwmb() asm volatile("" : : : "memory")
+# define libbpf_smp_store_release(p, v) \
+ do { \
+ asm volatile("" : : : "memory"); \
+ WRITE_ONCE(*p, v); \
+ } while (0)
+# define libbpf_smp_load_acquire(p) \
+ ({ \
+ typeof(*p) ___p1 = READ_ONCE(*p); \
+ asm volatile("" : : : "memory"); \
+ ___p1; \
+ })
#elif defined(__aarch64__)
-# define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory")
-# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
-# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
-# define libbpf_smp_rwmb() libbpf_smp_mb()
-#elif defined(__arm__)
-/* These are only valid for armv7 and above */
-# define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory")
-# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
-# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
-# define libbpf_smp_rwmb() libbpf_smp_mb()
-#else
-/* Architecture missing native barrier functions. */
-# define libbpf_smp_rmb() __sync_synchronize()
-# define libbpf_smp_wmb() __sync_synchronize()
-# define libbpf_smp_mb() __sync_synchronize()
-# define libbpf_smp_rwmb() __sync_synchronize()
+# define libbpf_smp_store_release(p, v) \
+ asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory")
+# define libbpf_smp_load_acquire(p) \
+ ({ \
+ typeof(*p) ___p1; \
+ asm volatile ("ldar %w0, %1" \
+ : "=r" (___p1) : "Q" (*p) : "memory"); \
+ __p1; \
+ })
+#elif defined(__riscv)
+# define libbpf_smp_store_release(p, v) \
+ do { \
+ asm volatile ("fence rw,w" : : : "memory"); \
+ WRITE_ONCE(*p, v); \
+ } while (0)
+# define libbpf_smp_load_acquire(p) \
+ ({ \
+ typeof(*p) ___p1 = READ_ONCE(*p); \
+ asm volatile ("fence r,rw" : : : "memory"); \
+ ___p1; \
+ })
+#endif
+
+#ifndef libbpf_smp_store_release
+#define libbpf_smp_store_release(p, v) \
+ do { \
+ __sync_synchronize(); \
+ WRITE_ONCE(*p, v); \
+ } while (0)
+#endif
+
+#ifndef libbpf_smp_load_acquire
+#define libbpf_smp_load_acquire(p) \
+ ({ \
+ typeof(*p) ___p1 = READ_ONCE(*p); \
+ __sync_synchronize(); \
+ ___p1; \
+ })
#endif
#ifdef __cplusplus
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index e9f121f5d129..a9fdea87b5cd 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -96,7 +96,8 @@ static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
* this function. Without this optimization it whould have been
* free_entries = r->cached_prod - r->cached_cons + r->size.
*/
- r->cached_cons = *r->consumer + r->size;
+ r->cached_cons = libbpf_smp_load_acquire(r->consumer);
+ r->cached_cons += r->size;
return r->cached_cons - r->cached_prod;
}
@@ -106,7 +107,7 @@ static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
__u32 entries = r->cached_prod - r->cached_cons;
if (entries == 0) {
- r->cached_prod = *r->producer;
+ r->cached_prod = libbpf_smp_load_acquire(r->producer);
entries = r->cached_prod - r->cached_cons;
}
@@ -129,9 +130,7 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
/* Make sure everything has been written to the ring before indicating
* this to the kernel by writing the producer pointer.
*/
- libbpf_smp_wmb();
-
- *prod->producer += nb;
+ libbpf_smp_store_release(prod->producer, *prod->producer + nb);
}
static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
@@ -139,11 +138,6 @@ static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __
__u32 entries = xsk_cons_nb_avail(cons, nb);
if (entries > 0) {
- /* Make sure we do not speculatively read the data before
- * we have received the packet buffers from the ring.
- */
- libbpf_smp_rmb();
-
*idx = cons->cached_cons;
cons->cached_cons += entries;
}
@@ -161,9 +155,8 @@ static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb)
/* Make sure data has been read before indicating we are done
* with the entries by updating the consumer pointer.
*/
- libbpf_smp_rwmb();
+ libbpf_smp_store_release(cons->consumer, *cons->consumer + nb);
- *cons->consumer += nb;
}
static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
--
2.27.0
Powered by blists - more mailing lists