[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260122211854.5508-2-adubey@linux.ibm.com>
Date: Fri, 23 Jan 2026 02:48:49 +0530
From: adubey@...ux.ibm.com
To: bpf@...r.kernel.org, linuxppc-dev@...ts.ozlabs.org,
linux-kselftest@...r.kernel.org, linux-kernel@...r.kernel.org
Cc: hbathini@...ux.ibm.com, sachinpb@...ux.ibm.com, venkat88@...ux.ibm.com,
andrii@...nel.org, eddyz87@...il.com, mykolal@...com, ast@...nel.org,
daniel@...earbox.net, martin.lau@...ux.dev, song@...nel.org,
yonghong.song@...ux.dev, john.fastabend@...il.com, kpsingh@...nel.org,
sdf@...ichev.me, haoluo@...gle.com, jolsa@...nel.org,
christophe.leroy@...roup.eu, naveen@...nel.org, maddy@...ux.ibm.com,
mpe@...erman.id.au, npiggin@...il.com, memxor@...il.com,
iii@...ux.ibm.com, shuah@...nel.org
Subject: [PATCH v4 1/6] powerpc64/bpf: Moving tail_call_cnt to bottom of frame
From: Abhishek Dubey <adubey@...ux.ibm.com>
In the conventional stack frame, the position of tail_call_cnt
is after the NVR save area (BPF_PPC_STACK_SAVE). Whereas, the
offset of tail_call_cnt in the trampoline frame is after the
stack alignment padding. BPF JIT logic could become complex
when dealing with frame-sensitive offset calculation of
tail_call_cnt. Having the same offset in both frames is the
desired objective.
The trampoline frame does not have a BPF_PPC_STACK_SAVE area.
Introducing it leads to under-utilization of extra memory meant
only for the offset alignment of tail_call_cnt.
Another challenge is the variable alignment padding sitting at
the bottom of the trampoline frame, which requires additional
handling to compute tail_call_cnt offset.
This patch addresses the above issues by moving tail_call_cnt
to the bottom of the stack frame at offset 0 for both types
of frames. This saves additional bytes required by BPF_PPC_STACK_SAVE
in trampoline frame, and a common offset computation for
tail_call_cnt serves both frames.
The changes in this patch are required by the second patch in the
series, where the 'reference to tail_call_info' of the main frame
is copied into the trampoline frame from the previous frame.
Signed-off-by: Abhishek Dubey <adubey@...ux.ibm.com>
---
arch/powerpc/net/bpf_jit.h | 1 +
arch/powerpc/net/bpf_jit_comp.c | 15 ++++++++++++---
arch/powerpc/net/bpf_jit_comp64.c | 31 ++++++++++++++++++++-----------
3 files changed, 33 insertions(+), 14 deletions(-)
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 8334cd667bba..9f6ec00bd02e 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -24,6 +24,7 @@
#define SZL sizeof(unsigned long)
#define BPF_INSN_SAFETY 64
+#define BPF_PPC_TAILCALL 8
#define PLANT_INSTR(d, idx, instr) \
do { if (d) { (d)[idx] = instr; } idx++; } while (0)
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 5e976730b2f5..d51c696221d7 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -604,8 +604,8 @@ static void bpf_trampoline_setup_tail_call_cnt(u32 *image, struct codegen_contex
int func_frame_offset, int r4_off)
{
if (IS_ENABLED(CONFIG_PPC64)) {
- /* See bpf_jit_stack_tailcallcnt() */
- int tailcallcnt_offset = 7 * 8;
+ /* See Generated stack layout */
+ int tailcallcnt_offset = BPF_PPC_TAILCALL;
EMIT(PPC_RAW_LL(_R3, _R1, func_frame_offset - tailcallcnt_offset));
EMIT(PPC_RAW_STL(_R3, _R1, -tailcallcnt_offset));
@@ -620,7 +620,7 @@ static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_cont
{
if (IS_ENABLED(CONFIG_PPC64)) {
/* See bpf_jit_stack_tailcallcnt() */
- int tailcallcnt_offset = 7 * 8;
+ int tailcallcnt_offset = BPF_PPC_TAILCALL;
EMIT(PPC_RAW_LL(_R3, _R1, -tailcallcnt_offset));
EMIT(PPC_RAW_STL(_R3, _R1, func_frame_offset - tailcallcnt_offset));
@@ -714,6 +714,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
* LR save area [ r0 save (64-bit) ] | header
* [ r0 save (32-bit) ] |
* dummy frame for unwind [ back chain 1 ] --
+ * [ tail_call_cnt ] optional - 64-bit powerpc
* [ padding ] align stack frame
* r4_off [ r4 (tailcallcnt) ] optional - 32-bit powerpc
* alt_lr_off [ real lr (ool stub)] optional - actual lr
@@ -795,6 +796,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
}
+ /*
+ * Save tailcall count pointer at the same offset on the
+ * stack where subprogs expect it
+ */
+ if ((flags & BPF_TRAMP_F_CALL_ORIG) &&
+ (flags & BPF_TRAMP_F_TAIL_CALL_CTX))
+ bpf_frame_size += BPF_PPC_TAILCALL;
+
/* Padding to align stack frame, if any */
bpf_frame_size = round_up(bpf_frame_size, SZL * 2);
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 1fe37128c876..296e9ea14f2e 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -20,13 +20,15 @@
#include "bpf_jit.h"
/*
- * Stack layout:
+ * Stack layout with frame:
+ * Layout when setting up our own stack frame.
+ * Note: r1 at bottom, component offsets positive wrt r1.
* Ensure the top half (upto local_tmp_var) stays consistent
* with our redzone usage.
*
* [ prev sp ] <-------------
- * [ nv gpr save area ] 6*8 |
* [ tail_call_cnt ] 8 |
+ * [ nv gpr save area ] 6*8 |
* [ local_tmp_var ] 24 |
* fp (r31) --> [ ebpf stack space ] upto 512 |
* [ frame header ] 32/112 |
@@ -36,10 +38,12 @@
/* for gpr non volatile registers BPG_REG_6 to 10 */
#define BPF_PPC_STACK_SAVE (6*8)
/* for bpf JIT code internal usage */
-#define BPF_PPC_STACK_LOCALS 32
+#define BPF_PPC_STACK_LOCALS 24
/* stack frame excluding BPF stack, ensure this is quadword aligned */
#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \
- BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE)
+ BPF_PPC_STACK_LOCALS + \
+ BPF_PPC_STACK_SAVE + \
+ BPF_PPC_TAILCALL)
/* BPF register usage */
#define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
@@ -87,27 +91,32 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
}
/*
+ * Stack layout with redzone:
* When not setting up our own stackframe, the redzone (288 bytes) usage is:
+ * Note: r1 from prev frame. Component offset negative wrt r1.
*
* [ prev sp ] <-------------
* [ ... ] |
* sp (r1) ---> [ stack pointer ] --------------
- * [ nv gpr save area ] 6*8
* [ tail_call_cnt ] 8
+ * [ nv gpr save area ] 6*8
* [ local_tmp_var ] 24
* [ unused red zone ] 224
*/
static int bpf_jit_stack_local(struct codegen_context *ctx)
{
- if (bpf_has_stack_frame(ctx))
+ if (bpf_has_stack_frame(ctx)) {
+ /* Stack layout with frame */
return STACK_FRAME_MIN_SIZE + ctx->stack_size;
- else
- return -(BPF_PPC_STACK_SAVE + 32);
+ } else {
+ /* Stack layout with redzone */
+ return -(BPF_PPC_TAILCALL + BPF_PPC_STACK_SAVE + BPF_PPC_STACK_LOCALS);
+ }
}
static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx)
{
- return bpf_jit_stack_local(ctx) + 24;
+ return bpf_jit_stack_local(ctx) + BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE;
}
static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
@@ -115,7 +124,7 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
if (reg >= BPF_PPC_NVR_MIN && reg < 32)
return (bpf_has_stack_frame(ctx) ?
(BPF_PPC_STACKFRAME + ctx->stack_size) : 0)
- - (8 * (32 - reg));
+ - (8 * (32 - reg)) - BPF_PPC_TAILCALL;
pr_err("BPF JIT is asking about unknown registers");
BUG();
@@ -145,7 +154,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
if (ctx->seen & SEEN_TAILCALL) {
EMIT(PPC_RAW_LI(bpf_to_ppc(TMP_REG_1), 0));
/* this goes in the redzone */
- EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, -(BPF_PPC_STACK_SAVE + 8)));
+ EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, -(BPF_PPC_TAILCALL)));
} else {
EMIT(PPC_RAW_NOP());
EMIT(PPC_RAW_NOP());
--
2.48.1
Powered by blists - more mailing lists