linux-kernel - Re: [PATCH v5 net-next 14/15] net: Reference bpf_redirect_info via task_struct on PREEMPT

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <20240610165014.uWp_yZuW@linutronix.de>
Date: Mon, 10 Jun 2024 18:50:14 +0200
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To: Jesper Dangaard Brouer <hawk@...nel.org>
Cc: linux-kernel@...r.kernel.org, netdev@...r.kernel.org,
	"David S. Miller" <davem@...emloft.net>,
	Daniel Bristot de Oliveira <bristot@...nel.org>,
	Boqun Feng <boqun.feng@...il.com>,
	Daniel Borkmann <daniel@...earbox.net>,
	Eric Dumazet <edumazet@...gle.com>,
	Frederic Weisbecker <frederic@...nel.org>,
	Ingo Molnar <mingo@...hat.com>, Jakub Kicinski <kuba@...nel.org>,
	Paolo Abeni <pabeni@...hat.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	Waiman Long <longman@...hat.com>, Will Deacon <will@...nel.org>,
	Alexei Starovoitov <ast@...nel.org>,
	Andrii Nakryiko <andrii@...nel.org>,
	Eduard Zingerman <eddyz87@...il.com>, Hao Luo <haoluo@...gle.com>,
	Jiri Olsa <jolsa@...nel.org>,
	John Fastabend <john.fastabend@...il.com>,
	KP Singh <kpsingh@...nel.org>,
	Martin KaFai Lau <martin.lau@...ux.dev>, Song Liu <song@...nel.org>,
	Stanislav Fomichev <sdf@...gle.com>,
	Toke Høiland-Jørgensen <toke@...hat.com>,
	Yonghong Song <yonghong.song@...ux.dev>, bpf@...r.kernel.org
Subject: Re: [PATCH v5 net-next 14/15] net: Reference bpf_redirect_info via
 task_struct on PREEMPT_RT.

On 2024-06-07 13:51:25 [+0200], Jesper Dangaard Brouer wrote:
> The memset can be further optimized as it currently clears 64 bytes, but
> it only need to clear 40 bytes, see pahole below.
> 
> Replace memset with something like:
>  memset(&bpf_net_ctx->ri, 0, offsetof(struct bpf_net_context, ri.nh));
> 
> This is an optimization, because with 64 bytes this result in a rep-stos
> (repeated string store operation) that on Intel touch CPU-flags (to be
> IRQ safe) which is slow, while clearing 40 bytes doesn't cause compiler
> to use this instruction, which is faster.  Memset benchmarked with [1]

I've been playing along with this and have to say that "rep stosq" is
roughly 3x slower vs "movq" for 64 bytes on all x86 I've been looking
at.
For gcc the stosq vs movq depends on the CPU settings. The generic uses
movq up to 40 bytes, skylake uses movq even for 64bytes. clang…
This could be tuned via -mmemset-strategy=libcall:64:align,rep_8byte:-1:align

I folded this into the last two patches:

diff --git a/include/linux/filter.h b/include/linux/filter.h
index d2b4260d9d0be..1588d208f1348 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -744,27 +744,40 @@ struct bpf_redirect_info {
 	struct bpf_nh_params nh;
 };
 
+enum bpf_ctx_init_type {
+	bpf_ctx_ri_init,
+	bpf_ctx_cpu_map_init,
+	bpf_ctx_dev_map_init,
+	bpf_ctx_xsk_map_init,
+};
+
 struct bpf_net_context {
 	struct bpf_redirect_info ri;
 	struct list_head cpu_map_flush_list;
 	struct list_head dev_map_flush_list;
 	struct list_head xskmap_map_flush_list;
+	unsigned int flags;
 };
 
+static inline bool bpf_net_ctx_need_init(struct bpf_net_context *bpf_net_ctx,
+					 enum bpf_ctx_init_type flag)
+{
+	return !(bpf_net_ctx->flags & (1 << flag));
+}
+
+static inline bool bpf_net_ctx_set_flag(struct bpf_net_context *bpf_net_ctx,
+					enum bpf_ctx_init_type flag)
+{
+	return bpf_net_ctx->flags |= 1 << flag;
+}
+
 static inline struct bpf_net_context *bpf_net_ctx_set(struct bpf_net_context *bpf_net_ctx)
 {
 	struct task_struct *tsk = current;
 
 	if (tsk->bpf_net_context != NULL)
 		return NULL;
-	memset(&bpf_net_ctx->ri, 0, sizeof(bpf_net_ctx->ri));
-
-	if (IS_ENABLED(CONFIG_BPF_SYSCALL)) {
-		INIT_LIST_HEAD(&bpf_net_ctx->cpu_map_flush_list);
-		INIT_LIST_HEAD(&bpf_net_ctx->dev_map_flush_list);
-	}
-	if (IS_ENABLED(CONFIG_XDP_SOCKETS))
-		INIT_LIST_HEAD(&bpf_net_ctx->xskmap_map_flush_list);
+	bpf_net_ctx->flags = 0;
 
 	tsk->bpf_net_context = bpf_net_ctx;
 	return bpf_net_ctx;
@@ -785,6 +798,11 @@ static inline struct bpf_redirect_info *bpf_net_ctx_get_ri(void)
 {
 	struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
 
+	if (bpf_net_ctx_need_init(bpf_net_ctx, bpf_ctx_ri_init)) {
+		memset(&bpf_net_ctx->ri, 0, offsetof(struct bpf_net_context, ri.nh));
+		bpf_net_ctx_set_flag(bpf_net_ctx, bpf_ctx_ri_init);
+	}
+
 	return &bpf_net_ctx->ri;
 }
 
@@ -792,6 +810,11 @@ static inline struct list_head *bpf_net_ctx_get_cpu_map_flush_list(void)
 {
 	struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
 
+	if (bpf_net_ctx_need_init(bpf_net_ctx, bpf_ctx_cpu_map_init)) {
+		INIT_LIST_HEAD(&bpf_net_ctx->cpu_map_flush_list);
+		bpf_net_ctx_set_flag(bpf_net_ctx, bpf_ctx_cpu_map_init);
+	}
+
 	return &bpf_net_ctx->cpu_map_flush_list;
 }
 
@@ -799,6 +822,11 @@ static inline struct list_head *bpf_net_ctx_get_dev_flush_list(void)
 {
 	struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
 
+	if (bpf_net_ctx_need_init(bpf_net_ctx, bpf_ctx_dev_map_init)) {
+		INIT_LIST_HEAD(&bpf_net_ctx->dev_map_flush_list);
+		bpf_net_ctx_set_flag(bpf_net_ctx, bpf_ctx_dev_map_init);
+	}
+
 	return &bpf_net_ctx->dev_map_flush_list;
 }
 
@@ -806,6 +834,11 @@ static inline struct list_head *bpf_net_ctx_get_xskmap_flush_list(void)
 {
 	struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
 
+	if (bpf_net_ctx_need_init(bpf_net_ctx, bpf_ctx_xsk_map_init)) {
+		INIT_LIST_HEAD(&bpf_net_ctx->xskmap_map_flush_list);
+		bpf_net_ctx_set_flag(bpf_net_ctx, bpf_ctx_xsk_map_init);
+	}
+
 	return &bpf_net_ctx->xskmap_map_flush_list;
 }
 

Sebastian