[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <da3aef2d-44ab-4642-9114-15ef1e724bac@linux.ibm.com>
Date: Thu, 14 Aug 2025 14:24:35 +0530
From: Hari Bathini <hbathini@...ux.ibm.com>
To: Saket Kumar Bhaskar <skb99@...ux.ibm.com>, bpf@...r.kernel.org,
linuxppc-dev@...ts.ozlabs.org, linux-kselftest@...r.kernel.org,
linux-kernel@...r.kernel.org
Cc: sachinpb@...ux.ibm.com, venkat88@...ux.ibm.com, andrii@...nel.org,
eddyz87@...il.com, mykolal@...com, ast@...nel.org,
daniel@...earbox.net, martin.lau@...ux.dev, song@...nel.org,
yonghong.song@...ux.dev, john.fastabend@...il.com, kpsingh@...nel.org,
sdf@...ichev.me, haoluo@...gle.com, jolsa@...nel.org,
christophe.leroy@...roup.eu, naveen@...nel.org, maddy@...ux.ibm.com,
mpe@...erman.id.au, npiggin@...il.com, memxor@...il.com,
iii@...ux.ibm.com, shuah@...nel.org
Subject: Re: [bpf-next 2/6] bpf,powerpc: Implement PROBE_MEM32 pseudo
instructions
On 05/08/25 11:57 am, Saket Kumar Bhaskar wrote:
> Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW]
> instructions. They are similar to PROBE_MEM instructions with the
> following differences:
> - PROBE_MEM32 supports store.
> - PROBE_MEM32 relies on the verifier to clear upper 32-bit of the
> src/dst register
> - PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in _R26
> in the prologue). Due to bpf_arena constructions such _R26 + reg +
> off16 access is guaranteed to be within arena virtual range, so no
> address check at run-time.
> - PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When
> LDX faults the destination register is zeroed.
>
> To support these on powerpc, we do tmp1 = _R26 + src/dst reg and then use
> tmp1 as the new src/dst register. This allows us to reuse most of the
> code for normal [LDX | STX | ST].
>
> Signed-off-by: Saket Kumar Bhaskar <skb99@...ux.ibm.com>
> ---
> arch/powerpc/net/bpf_jit.h | 5 +-
> arch/powerpc/net/bpf_jit_comp.c | 10 ++-
> arch/powerpc/net/bpf_jit_comp32.c | 2 +-
> arch/powerpc/net/bpf_jit_comp64.c | 108 ++++++++++++++++++++++++++++--
> 4 files changed, 114 insertions(+), 11 deletions(-)
>
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index 4c26912c2e3c..2d095a873305 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -161,9 +161,10 @@ struct codegen_context {
> unsigned int seen;
> unsigned int idx;
> unsigned int stack_size;
> - int b2p[MAX_BPF_JIT_REG + 2];
> + int b2p[MAX_BPF_JIT_REG + 3];
> unsigned int exentry_idx;
> unsigned int alt_exit_addr;
> + u64 arena_vm_start;
> };
>
> #define bpf_to_ppc(r) (ctx->b2p[r])
> @@ -201,7 +202,7 @@ int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg,
>
> int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
> struct codegen_context *ctx, int insn_idx,
> - int jmp_off, int dst_reg);
> + int jmp_off, int dst_reg, u32 code);
>
> #endif
>
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index c0684733e9d6..35bfdf4d8785 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -204,6 +204,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>
> /* Make sure that the stack is quadword aligned. */
> cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
> + cgctx.arena_vm_start = bpf_arena_get_kern_vm_start(fp->aux->arena);
>
> /* Scouting faux-generate pass 0 */
> if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
> @@ -326,7 +327,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> */
> int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
> struct codegen_context *ctx, int insn_idx, int jmp_off,
> - int dst_reg)
> + int dst_reg, u32 code)
> {
> off_t offset;
> unsigned long pc;
> @@ -354,7 +355,12 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass
> (fp->aux->num_exentries * BPF_FIXUP_LEN * 4) +
> (ctx->exentry_idx * BPF_FIXUP_LEN * 4);
>
> - fixup[0] = PPC_RAW_LI(dst_reg, 0);
> + if ((BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM32) ||
> + (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM))
> + fixup[0] = PPC_RAW_LI(dst_reg, 0);
> + else if (BPF_CLASS(code) == BPF_ST || BPF_CLASS(code) == BPF_STX)
> + fixup[0] = PPC_RAW_NOP();
> +
> if (IS_ENABLED(CONFIG_PPC32))
> fixup[1] = PPC_RAW_LI(dst_reg - 1, 0); /* clear higher 32-bit register too */
>
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
> index 0aace304dfe1..3087e744fb25 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -1087,7 +1087,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
> }
>
> ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx, insn_idx,
> - jmp_off, dst_reg);
> + jmp_off, dst_reg, code);
> if (ret)
> return ret;
> }
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index 489de21fe3d6..16e62766c757 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -44,6 +44,7 @@
> /* BPF register usage */
> #define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
> #define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
> +#define ARENA_VM_START (MAX_BPF_JIT_REG + 2)
>
> /* BPF to ppc register mappings */
> void bpf_jit_init_reg_mapping(struct codegen_context *ctx)
> @@ -61,6 +62,8 @@ void bpf_jit_init_reg_mapping(struct codegen_context *ctx)
> ctx->b2p[BPF_REG_7] = _R28;
> ctx->b2p[BPF_REG_8] = _R29;
> ctx->b2p[BPF_REG_9] = _R30;
> + /* non volatile register for kern_vm_start address */
> + ctx->b2p[ARENA_VM_START] = _R26;
> /* frame pointer aka BPF_REG_10 */
> ctx->b2p[BPF_REG_FP] = _R31;
> /* eBPF jit internal registers */
> @@ -69,8 +72,8 @@ void bpf_jit_init_reg_mapping(struct codegen_context *ctx)
> ctx->b2p[TMP_REG_2] = _R10;
> }
>
> -/* PPC NVR range -- update this if we ever use NVRs below r27 */
> -#define BPF_PPC_NVR_MIN _R27
> +/* PPC NVR range -- update this if we ever use NVRs below r26 */
> +#define BPF_PPC_NVR_MIN _R26
>
> static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
> {
> @@ -170,10 +173,17 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
> if (bpf_is_seen_register(ctx, bpf_to_ppc(i)))
> EMIT(PPC_RAW_STD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i))));
>
> + if (ctx->arena_vm_start)
> + EMIT(PPC_RAW_STD(bpf_to_ppc(ARENA_VM_START), _R1,
> + bpf_jit_stack_offsetof(ctx, bpf_to_ppc(ARENA_VM_START))));
> +
I don't see a selftest that tests both arena and tailcalls
together but the above change is going to clobber tailcall count.
That is because the current stack layout is impacted with this
new non-volatile register usage:
/*
* [ prev sp ] <-------------
* [ nv gpr save area ] 5*8 |
* [ tail_call_cnt ] 8 |
* [ local_tmp_var ] 16 |
* fp (r31) --> [ ebpf stack space ] upto 512 |
* [ frame header ] 32/112 |
* sp (r1) ---> [ stack pointer ] --------------
*/
Please rework the above stack layout and the corresponding macros
accordingly.
> /* Setup frame pointer to point to the bpf stack area */
> if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP)))
> EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1,
> STACK_FRAME_MIN_SIZE + ctx->stack_size));
> +
> + if (ctx->arena_vm_start)
> + PPC_LI64(bpf_to_ppc(ARENA_VM_START), ctx->arena_vm_start);
> }
>
> static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
> @@ -185,6 +195,10 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
> if (bpf_is_seen_register(ctx, bpf_to_ppc(i)))
> EMIT(PPC_RAW_LD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i))));
>
> + if (ctx->arena_vm_start)
> + EMIT(PPC_RAW_LD(bpf_to_ppc(ARENA_VM_START), _R1,
> + bpf_jit_stack_offsetof(ctx, bpf_to_ppc(ARENA_VM_START))));
> +
> /* Tear down our stack frame */
> if (bpf_has_stack_frame(ctx)) {
> EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME + ctx->stack_size));
- Hari
Powered by blists - more mailing lists