lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240619072253.504963-16-bigeasy@linutronix.de>
Date: Wed, 19 Jun 2024 09:17:06 +0200
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To: linux-kernel@...r.kernel.org,
	netdev@...r.kernel.org
Cc: "David S. Miller" <davem@...emloft.net>,
	Daniel Bristot de Oliveira <bristot@...nel.org>,
	Boqun Feng <boqun.feng@...il.com>,
	Daniel Borkmann <daniel@...earbox.net>,
	Eric Dumazet <edumazet@...gle.com>,
	Frederic Weisbecker <frederic@...nel.org>,
	Ingo Molnar <mingo@...hat.com>,
	Jakub Kicinski <kuba@...nel.org>,
	Paolo Abeni <pabeni@...hat.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	Waiman Long <longman@...hat.com>,
	Will Deacon <will@...nel.org>,
	Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
	Björn Töpel <bjorn@...nel.org>,
	Alexei Starovoitov <ast@...nel.org>,
	Andrii Nakryiko <andrii@...nel.org>,
	Eduard Zingerman <eddyz87@...il.com>,
	Hao Luo <haoluo@...gle.com>,
	Jesper Dangaard Brouer <hawk@...nel.org>,
	Jiri Olsa <jolsa@...nel.org>,
	John Fastabend <john.fastabend@...il.com>,
	Jonathan Lemon <jonathan.lemon@...il.com>,
	KP Singh <kpsingh@...nel.org>,
	Maciej Fijalkowski <maciej.fijalkowski@...el.com>,
	Magnus Karlsson <magnus.karlsson@...el.com>,
	Martin KaFai Lau <martin.lau@...ux.dev>,
	Song Liu <song@...nel.org>,
	Stanislav Fomichev <sdf@...gle.com>,
	Toke Høiland-Jørgensen <toke@...hat.com>,
	Yonghong Song <yonghong.song@...ux.dev>,
	bpf@...r.kernel.org
Subject: [PATCH v8 net-next 15/15] net: Move per-CPU flush-lists to bpf_net_context on PREEMPT_RT.

The per-CPU flush lists, which are accessed from within the NAPI callback
(xdp_do_flush() for instance), are per-CPU. There are subject to the
same problem as struct bpf_redirect_info.

Add the per-CPU lists cpu_map_flush_list, dev_map_flush_list and
xskmap_map_flush_list to struct bpf_net_context. Add wrappers for the
access. The lists initialized on first usage (similar to
bpf_net_ctx_get_ri()).

Cc: "Björn Töpel" <bjorn@...nel.org>
Cc: Alexei Starovoitov <ast@...nel.org>
Cc: Andrii Nakryiko <andrii@...nel.org>
Cc: Eduard Zingerman <eddyz87@...il.com>
Cc: Hao Luo <haoluo@...gle.com>
Cc: Jesper Dangaard Brouer <hawk@...nel.org>
Cc: Jiri Olsa <jolsa@...nel.org>
Cc: John Fastabend <john.fastabend@...il.com>
Cc: Jonathan Lemon <jonathan.lemon@...il.com>
Cc: KP Singh <kpsingh@...nel.org>
Cc: Maciej Fijalkowski <maciej.fijalkowski@...el.com>
Cc: Magnus Karlsson <magnus.karlsson@...el.com>
Cc: Martin KaFai Lau <martin.lau@...ux.dev>
Cc: Song Liu <song@...nel.org>
Cc: Stanislav Fomichev <sdf@...gle.com>
Cc: Toke Høiland-Jørgensen <toke@...hat.com>
Cc: Yonghong Song <yonghong.song@...ux.dev>
Cc: bpf@...r.kernel.org
Acked-by: Jesper Dangaard Brouer <hawk@...nel.org>
Reviewed-by: Toke Høiland-Jørgensen <toke@...hat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
---
 include/linux/filter.h | 42 ++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/cpumap.c    | 19 +++----------------
 kernel/bpf/devmap.c    | 11 +++--------
 net/xdp/xsk.c          | 12 ++++--------
 4 files changed, 52 insertions(+), 32 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 0a7f6e4a00b60..c0349522de8fb 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -736,6 +736,9 @@ struct bpf_nh_params {
 /* flags for bpf_redirect_info kern_flags */
 #define BPF_RI_F_RF_NO_DIRECT	BIT(0)	/* no napi_direct on return_frame */
 #define BPF_RI_F_RI_INIT	BIT(1)
+#define BPF_RI_F_CPU_MAP_INIT	BIT(2)
+#define BPF_RI_F_DEV_MAP_INIT	BIT(3)
+#define BPF_RI_F_XSK_MAP_INIT	BIT(4)
 
 struct bpf_redirect_info {
 	u64 tgt_index;
@@ -750,6 +753,9 @@ struct bpf_redirect_info {
 
 struct bpf_net_context {
 	struct bpf_redirect_info ri;
+	struct list_head cpu_map_flush_list;
+	struct list_head dev_map_flush_list;
+	struct list_head xskmap_map_flush_list;
 };
 
 static inline struct bpf_net_context *bpf_net_ctx_set(struct bpf_net_context *bpf_net_ctx)
@@ -787,6 +793,42 @@ static inline struct bpf_redirect_info *bpf_net_ctx_get_ri(void)
 	return &bpf_net_ctx->ri;
 }
 
+static inline struct list_head *bpf_net_ctx_get_cpu_map_flush_list(void)
+{
+	struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
+
+	if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_CPU_MAP_INIT)) {
+		INIT_LIST_HEAD(&bpf_net_ctx->cpu_map_flush_list);
+		bpf_net_ctx->ri.kern_flags |= BPF_RI_F_CPU_MAP_INIT;
+	}
+
+	return &bpf_net_ctx->cpu_map_flush_list;
+}
+
+static inline struct list_head *bpf_net_ctx_get_dev_flush_list(void)
+{
+	struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
+
+	if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_DEV_MAP_INIT)) {
+		INIT_LIST_HEAD(&bpf_net_ctx->dev_map_flush_list);
+		bpf_net_ctx->ri.kern_flags |= BPF_RI_F_DEV_MAP_INIT;
+	}
+
+	return &bpf_net_ctx->dev_map_flush_list;
+}
+
+static inline struct list_head *bpf_net_ctx_get_xskmap_flush_list(void)
+{
+	struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
+
+	if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_XSK_MAP_INIT)) {
+		INIT_LIST_HEAD(&bpf_net_ctx->xskmap_map_flush_list);
+		bpf_net_ctx->ri.kern_flags |= BPF_RI_F_XSK_MAP_INIT;
+	}
+
+	return &bpf_net_ctx->xskmap_map_flush_list;
+}
+
 /* Compute the linear packet data range [data, data_end) which
  * will be accessed by various program types (cls_bpf, act_bpf,
  * lwt, ...). Subsystems allowing direct data access must (!)
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 66974bd027109..068e994ed781a 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -79,8 +79,6 @@ struct bpf_cpu_map {
 	struct bpf_cpu_map_entry __rcu **cpu_map;
 };
 
-static DEFINE_PER_CPU(struct list_head, cpu_map_flush_list);
-
 static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
 {
 	u32 value_size = attr->value_size;
@@ -709,7 +707,7 @@ static void bq_flush_to_queue(struct xdp_bulk_queue *bq)
  */
 static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
 {
-	struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);
+	struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list();
 	struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
 
 	if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
@@ -761,7 +759,7 @@ int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
 
 void __cpu_map_flush(void)
 {
-	struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);
+	struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list();
 	struct xdp_bulk_queue *bq, *tmp;
 
 	list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
@@ -775,20 +773,9 @@ void __cpu_map_flush(void)
 #ifdef CONFIG_DEBUG_NET
 bool cpu_map_check_flush(void)
 {
-	if (list_empty(this_cpu_ptr(&cpu_map_flush_list)))
+	if (list_empty(bpf_net_ctx_get_cpu_map_flush_list()))
 		return false;
 	__cpu_map_flush();
 	return true;
 }
 #endif
-
-static int __init cpu_map_init(void)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu)
-		INIT_LIST_HEAD(&per_cpu(cpu_map_flush_list, cpu));
-	return 0;
-}
-
-subsys_initcall(cpu_map_init);
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index fbfdfb60db8d7..317ac2d66ebd1 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -83,7 +83,6 @@ struct bpf_dtab {
 	u32 n_buckets;
 };
 
-static DEFINE_PER_CPU(struct list_head, dev_flush_list);
 static DEFINE_SPINLOCK(dev_map_lock);
 static LIST_HEAD(dev_map_list);
 
@@ -415,7 +414,7 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
  */
 void __dev_flush(void)
 {
-	struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
+	struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list();
 	struct xdp_dev_bulk_queue *bq, *tmp;
 
 	list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
@@ -429,7 +428,7 @@ void __dev_flush(void)
 #ifdef CONFIG_DEBUG_NET
 bool dev_check_flush(void)
 {
-	if (list_empty(this_cpu_ptr(&dev_flush_list)))
+	if (list_empty(bpf_net_ctx_get_dev_flush_list()))
 		return false;
 	__dev_flush();
 	return true;
@@ -460,7 +459,7 @@ static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
 static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
 		       struct net_device *dev_rx, struct bpf_prog *xdp_prog)
 {
-	struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
+	struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list();
 	struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
 
 	if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
@@ -1160,15 +1159,11 @@ static struct notifier_block dev_map_notifier = {
 
 static int __init dev_map_init(void)
 {
-	int cpu;
-
 	/* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */
 	BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) !=
 		     offsetof(struct _bpf_dtab_netdev, dev));
 	register_netdevice_notifier(&dev_map_notifier);
 
-	for_each_possible_cpu(cpu)
-		INIT_LIST_HEAD(&per_cpu(dev_flush_list, cpu));
 	return 0;
 }
 
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 7d1c0986f9bb3..ed062e0383896 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -35,8 +35,6 @@
 #define TX_BATCH_SIZE 32
 #define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE)
 
-static DEFINE_PER_CPU(struct list_head, xskmap_flush_list);
-
 void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
 {
 	if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
@@ -372,7 +370,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 
 int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
-	struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
+	struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list();
 	int err;
 
 	err = xsk_rcv(xs, xdp);
@@ -387,7 +385,7 @@ int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
 
 void __xsk_map_flush(void)
 {
-	struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
+	struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list();
 	struct xdp_sock *xs, *tmp;
 
 	list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
@@ -399,7 +397,7 @@ void __xsk_map_flush(void)
 #ifdef CONFIG_DEBUG_NET
 bool xsk_map_check_flush(void)
 {
-	if (list_empty(this_cpu_ptr(&xskmap_flush_list)))
+	if (list_empty(bpf_net_ctx_get_xskmap_flush_list()))
 		return false;
 	__xsk_map_flush();
 	return true;
@@ -1772,7 +1770,7 @@ static struct pernet_operations xsk_net_ops = {
 
 static int __init xsk_init(void)
 {
-	int err, cpu;
+	int err;
 
 	err = proto_register(&xsk_proto, 0 /* no slab */);
 	if (err)
@@ -1790,8 +1788,6 @@ static int __init xsk_init(void)
 	if (err)
 		goto out_pernet;
 
-	for_each_possible_cpu(cpu)
-		INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu));
 	return 0;
 
 out_pernet:
-- 
2.45.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ