[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e1883afa-5ec3-4a92-ad7e-26cc896a853b@linux.ibm.com>
Date: Sat, 17 Jan 2026 16:11:04 +0530
From: Hari Bathini <hbathini@...ux.ibm.com>
To: adubey@...ux.ibm.com, bpf@...r.kernel.org, linuxppc-dev@...ts.ozlabs.org,
linux-kselftest@...r.kernel.org, linux-kernel@...r.kernel.org
Cc: sachinpb@...ux.ibm.com, venkat88@...ux.ibm.com, andrii@...nel.org,
eddyz87@...il.com, mykolal@...com, ast@...nel.org,
daniel@...earbox.net, martin.lau@...ux.dev, song@...nel.org,
yonghong.song@...ux.dev, john.fastabend@...il.com, kpsingh@...nel.org,
sdf@...ichev.me, haoluo@...gle.com, jolsa@...nel.org,
christophe.leroy@...roup.eu, naveen@...nel.org, maddy@...ux.ibm.com,
mpe@...erman.id.au, npiggin@...il.com, memxor@...il.com,
iii@...ux.ibm.com, shuah@...nel.org
Subject: Re: [PATCH v2 3/6] powerpc64/bpf: Tailcall handling with trampolines
On 17/01/26 4:09 pm, Hari Bathini wrote:
>
>
> On 14/01/26 5:14 pm, adubey@...ux.ibm.com wrote:
>> From: Abhishek Dubey <adubey@...ux.ibm.com>
>>
>> The trampoline mechanism sets up its own stack frame and
>> an additional dummy frame. We need to have additional JIT
>> instructions handling tailcall dereferencing in the
>> trampoline's context.
>>
>> We don't add the two stack frames pointed above, rather
>> add space for tail_call_info at bottom in trampoline frame
>> for ppc64. This makes the trampoline's frame consistent with
>> layout of all other frames wrt tail_call_info offset.
>>
>> Signed-off-by: Abhishek Dubey <adubey@...ux.ibm.com>
>> ---
>> arch/powerpc/net/bpf_jit_comp.c | 83 ++++++++++++++++++++++-----------
>> 1 file changed, 56 insertions(+), 27 deletions(-)
>>
>> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/
>> bpf_jit_comp.c
>> index 069a8822c30d..e3088cf089d1 100644
>> --- a/arch/powerpc/net/bpf_jit_comp.c
>> +++ b/arch/powerpc/net/bpf_jit_comp.c
>> @@ -606,33 +606,58 @@ static int invoke_bpf_mod_ret(u32 *image, u32
>> *ro_image, struct codegen_context
>> return 0;
>> }
>> -static void bpf_trampoline_setup_tail_call_cnt(u32 *image, struct
>> codegen_context *ctx,
>> - int func_frame_offset, int r4_off)
>> -{
>> - if (IS_ENABLED(CONFIG_PPC64)) {
>> - /* See bpf_jit_stack_tailcallinfo_offset() */
>> - int tailcallcnt_offset = 7 * 8;
>> -
>> - EMIT(PPC_RAW_LL(_R3, _R1, func_frame_offset -
>> tailcallcnt_offset));
>> - EMIT(PPC_RAW_STL(_R3, _R1, -tailcallcnt_offset));
>> - } else {
>> - /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */
>> - EMIT(PPC_RAW_LL(_R4, _R1, r4_off));
>> - }
>> -}
>> +/*
>> + * Refer the label 'Generated stack layout' in this file for actual
>> stack
>> + * layout during trampoline invocation.
>> + *
>> + * Refer __arch_prepare_bpf_trampoline() for stack component details.
>> + *
>> + * The tailcall count/reference is present in caller's stack frame.
>> Its required
>> + * to copy the content of tail_call_info before calling the actual
>> function
>> + * to which the trampoline is attached.
>> + *
>> + */
>> -static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct
>> codegen_context *ctx,
>> - int func_frame_offset, int r4_off)
>> +static void bpf_trampoline_setup_tail_call_info(u32 *image, struct
>> codegen_context *ctx,
>> + int func_frame_offset,
>> + int bpf_dummy_frame_size, int r4_off)
>> {
>> if (IS_ENABLED(CONFIG_PPC64)) {
>> /* See bpf_jit_stack_tailcallinfo_offset() */
>
>> - int tailcallcnt_offset = 7 * 8;
>> + int tailcallinfo_offset = BPF_PPC_TAILCALL;
>
> This offset update should have been part of patch#1
>
>> + /*
>> + * func_frame_offset = ...(1)
>> + * bpf_dummy_frame_size + trampoline_frame_size
>> + */
>> + EMIT(PPC_RAW_LD(_R4, _R1, func_frame_offset));
>> + EMIT(PPC_RAW_LD(_R3, _R4, -tailcallinfo_offset));
>> +
>> + /*
>> + * Setting the tail_call_info in trampoline's frame
>> + * depending on if previous frame had value or reference.
>> + */
>> + EMIT(PPC_RAW_CMPLWI(_R3, MAX_TAIL_CALL_CNT));
>> + PPC_COND_BRANCH(COND_GT, CTX_NIA(ctx) + 8);
>> + EMIT(PPC_RAW_ADDI(_R3, _R4,
>> bpf_jit_stack_tailcallinfo_offset(ctx)));
>> + /*
>> + * From ...(1) above:
>> + * trampoline_frame_bottom = ...(2)
>> + * func_frame_offset - bpf_dummy_frame_size
>> + *
>> + * Using ...(2) derived above:
>> + * trampoline_tail_call_info_offset = ...(3)
>> + * trampoline_frame_bottom - tailcallinfo_offset
>> + *
>> + * From ...(3):
>> + * Use trampoline_tail_call_info_offset to write reference of
>> main's
>> + * tail_call_info in trampoline frame.
>> + */
>> + EMIT(PPC_RAW_STL(_R3, _R1, (func_frame_offset -
>> bpf_dummy_frame_size)
>> + - tailcallinfo_offset));
>> - EMIT(PPC_RAW_LL(_R3, _R1, -tailcallcnt_offset));
>> - EMIT(PPC_RAW_STL(_R3, _R1, func_frame_offset -
>> tailcallcnt_offset));
>> } else {
>> /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */
>> - EMIT(PPC_RAW_STL(_R4, _R1, r4_off));
>> + EMIT(PPC_RAW_LL(_R4, _R1, r4_off));
>> }
>> }
>> @@ -720,6 +745,7 @@ static int __arch_prepare_bpf_trampoline(struct
>> bpf_tramp_image *im, void *rw_im
>> * LR save area [ r0 save (64-bit) ] | header
>> * [ r0 save (32-bit) ] |
>> * dummy frame for unwind [ back chain 1 ] --
>> + * [ tail_call_info ] non
>> optional - 64-bit powerpc
Also, why non-optional? This can be optional on BPF_TRAMP_F_CALL_ORIG
and BPF_TRAMP_F_TAIL_CALL_CTX flags?
>> * [ padding ] align
>> stack frame
>> * r4_off [ r4 (tailcallcnt) ] optional -
>> 32-bit powerpc
>> * alt_lr_off [ real lr (ool stub)] optional -
>> actual lr
>
>> @@ -801,8 +827,14 @@ static int __arch_prepare_bpf_trampoline(struct
>> bpf_tramp_image *im, void *rw_im
>> }
>> }
>> - /* Padding to align stack frame, if any */
>> - bpf_frame_size = round_up(bpf_frame_size, SZL * 2);
>> + if (!(bpf_frame_size % (2 * SZL))) {
>> + /* Stack is 16-byte aligned */
>> + /* Room for padding followed by 64-bit tail_call_info */
>> + bpf_frame_size += SZL + BPF_PPC_TAILCALL;
>> + } else {
>> + /* Room for 64-bit tail_call_info */
>> + bpf_frame_size += BPF_PPC_TAILCALL;
>> + }
>> /* Dummy frame size for proper unwind - includes 64-bytes red
>> zone for 64-bit powerpc */
>> bpf_dummy_frame_size = STACK_FRAME_MIN_SIZE + 64;
>
> This change assumes the size is at least 8-byte aligned which is
> true today but better skip that assumption by not touching the
> padding part. The above hunk could simply be:
>
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/
> bpf_jit_comp.c
> index 5e976730b2f5..266cc6f17dcc 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -795,6 +795,10 @@ static int __arch_prepare_bpf_trampoline(struct
> bpf_tramp_image *im, void *rw_im
> }
> }
>
> + /* Save tailcall count pointer at the same offset on the stack
> where subprogs expect it */
> + if ((flags & BPF_TRAMP_F_CALL_ORIG) && (flags &
> BPF_TRAMP_F_TAIL_CALL_CTX))
> + bpf_frame_size += SZL;
> +
> /* Padding to align stack frame, if any */
> bpf_frame_size = round_up(bpf_frame_size, SZL * 2);
>
>
> Patch#2 is not complete without this change. Please fold this patch
> into patch#2 itself.
>
- Hari
Powered by blists - more mailing lists