lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEyhmHQOo4ZC8tS549zChU3ozvsh0WAJ9SnQOeG=9mX8g2Gt5w@mail.gmail.com>
Date: Wed, 16 Jul 2025 20:32:54 +0800
From: Hengqi Chen <hengqi.chen@...il.com>
To: Chenghao Duan <duanchenghao@...inos.cn>
Cc: ast@...nel.org, daniel@...earbox.net, andrii@...nel.org, 
	yangtiezhu@...ngson.cn, chenhuacai@...nel.org, martin.lau@...ux.dev, 
	eddyz87@...il.com, song@...nel.org, yonghong.song@...ux.dev, 
	john.fastabend@...il.com, kpsingh@...nel.org, sdf@...ichev.me, 
	haoluo@...gle.com, jolsa@...nel.org, kernel@...0n.name, 
	linux-kernel@...r.kernel.org, loongarch@...ts.linux.dev, bpf@...r.kernel.org, 
	guodongtai@...inos.cn, youling.tang@...ux.dev, jianghaoran@...inos.cn
Subject: Re: [PATCH v3 5/5] LoongArch: BPF: Add bpf trampoline support for Loongarch

On Wed, Jul 9, 2025 at 1:51 PM Chenghao Duan <duanchenghao@...inos.cn> wrote:
>
> BPF trampoline is the critical infrastructure of the BPF subsystem, acting
> as a mediator between kernel functions and BPF programs. Numerous important
> features, such as using BPF program for zero overhead kernel introspection,
> rely on this key component.
>
> The related tests have passed, Including the following technical points:
> 1. fentry
> 2. fmod_ret
> 3. fexit
>
> Co-developed-by: George Guo <guodongtai@...inos.cn>
> Signed-off-by: George Guo <guodongtai@...inos.cn>
> Signed-off-by: Chenghao Duan <duanchenghao@...inos.cn>
> ---
>  arch/loongarch/net/bpf_jit.c | 391 +++++++++++++++++++++++++++++++++++
>  arch/loongarch/net/bpf_jit.h |   6 +
>  2 files changed, 397 insertions(+)
>
> diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
> index 9cb01f0b0..6820558af 100644
> --- a/arch/loongarch/net/bpf_jit.c
> +++ b/arch/loongarch/net/bpf_jit.c
> @@ -7,6 +7,10 @@
>  #include <linux/memory.h>
>  #include "bpf_jit.h"
>
> +#define LOONGARCH_MAX_REG_ARGS 8
> +#define LOONGARCH_FENTRY_NINSNS 2
> +#define LOONGARCH_FENTRY_NBYTES (LOONGARCH_FENTRY_NINSNS * 4)
> +
>  #define REG_TCC                LOONGARCH_GPR_A6
>  #define TCC_SAVED      LOONGARCH_GPR_S5
>
> @@ -1400,6 +1404,16 @@ static int gen_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call)
>                                   (unsigned long)ip, (unsigned long)target);
>  }
>
> +static int emit_call(struct jit_ctx *ctx, u64 addr)
> +{
> +       u64 ip;
> +
> +       if (addr && ctx->image && ctx->ro_image)
> +               ip = (u64)(ctx->image + ctx->idx);
> +
> +       return emit_jump_and_link(ctx, LOONGARCH_GPR_RA, ip, addr);
> +}
> +
>  int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
>                        void *old_addr, void *new_addr)
>  {
> @@ -1457,3 +1471,380 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len)
>
>         return dst;
>  }
> +
> +static void store_args(struct jit_ctx *ctx, int nargs, int args_off)
> +{
> +       int i;
> +
> +       for (i = 0; i < nargs; i++) {
> +               emit_insn(ctx, std, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off);
> +               args_off -= 8;
> +       }
> +}
> +
> +static void restore_args(struct jit_ctx *ctx, int nargs, int args_off)
> +{
> +       int i;
> +
> +       for (i = 0; i < nargs; i++) {
> +               emit_insn(ctx, ldd, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off);
> +               args_off -= 8;
> +       }
> +}
> +
> +static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
> +                          int args_off, int retval_off,
> +                          int run_ctx_off, bool save_ret)
> +{
> +       int ret;
> +       u32 *branch;
> +       struct bpf_prog *p = l->link.prog;
> +       int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
> +
> +       if (l->cookie) {
> +               move_imm(ctx, LOONGARCH_GPR_T1, l->cookie, false);
> +               emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -run_ctx_off + cookie_off);
> +       } else {
> +               emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP,
> +                         -run_ctx_off + cookie_off);
> +       }
> +
> +       /* arg1: prog */
> +       move_imm(ctx, LOONGARCH_GPR_A0, (const s64)p, false);
> +       /* arg2: &run_ctx */
> +       emit_insn(ctx, addid, LOONGARCH_GPR_A1, LOONGARCH_GPR_FP, -run_ctx_off);
> +       ret = emit_call(ctx, (const u64)bpf_trampoline_enter(p));
> +       if (ret)
> +               return ret;
> +
> +       /* store prog start time */
> +       move_reg(ctx, LOONGARCH_GPR_S1, LOONGARCH_GPR_A0);
> +
> +       /* if (__bpf_prog_enter(prog) == 0)
> +        *      goto skip_exec_of_prog;
> +        *
> +        */
> +       branch = (u32 *)ctx->image + ctx->idx;
> +       /* nop reserved for conditional jump */
> +       emit_insn(ctx, nop);
> +
> +       /* arg1: &args_off */
> +       emit_insn(ctx, addid, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -args_off);
> +       if (!p->jited)
> +               move_imm(ctx, LOONGARCH_GPR_A1, (const s64)p->insnsi, false);
> +       ret = emit_call(ctx, (const u64)p->bpf_func);
> +       if (ret)
> +               return ret;
> +
> +       if (save_ret) {
> +               emit_insn(ctx, std, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off);
> +               emit_insn(ctx, std, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8));
> +       }
> +
> +       /* update branch with beqz */
> +       if (ctx->image) {
> +               int offset = (void *)(&ctx->image[ctx->idx]) - (void *)branch;
> +               *branch = larch_insn_gen_beq(LOONGARCH_GPR_A0, LOONGARCH_GPR_ZERO, offset);
> +       }
> +
> +       /* arg1: prog */
> +       move_imm(ctx, LOONGARCH_GPR_A0, (const s64)p, false);
> +       /* arg2: prog start time */
> +       move_reg(ctx, LOONGARCH_GPR_A1, LOONGARCH_GPR_S1);
> +       /* arg3: &run_ctx */
> +       emit_insn(ctx, addid, LOONGARCH_GPR_A2, LOONGARCH_GPR_FP, -run_ctx_off);
> +       ret = emit_call(ctx, (const u64)bpf_trampoline_exit(p));
> +
> +       return ret;
> +}
> +
> +static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
> +                              int args_off, int retval_off, int run_ctx_off, u32 **branches)
> +{
> +       int i;
> +
> +       emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -retval_off);
> +       for (i = 0; i < tl->nr_links; i++) {
> +               invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off,
> +                               run_ctx_off, true);
> +               emit_insn(ctx, ldd, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -retval_off);
> +               branches[i] = (u32 *)ctx->image + ctx->idx;
> +               emit_insn(ctx, nop);
> +       }
> +}
> +
> +u64 bpf_jit_alloc_exec_limit(void)
> +{
> +       return VMALLOC_END - VMALLOC_START;
> +}
> +
> +void *arch_alloc_bpf_trampoline(unsigned int size)
> +{
> +       return bpf_prog_pack_alloc(size, jit_fill_hole);
> +}
> +
> +void arch_free_bpf_trampoline(void *image, unsigned int size)
> +{
> +       bpf_prog_pack_free(image, size);
> +}
> +
> +static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
> +                                        const struct btf_func_model *m,
> +                                        struct bpf_tramp_links *tlinks,
> +                                        void *func_addr, u32 flags)
> +{
> +       int i;
> +       int stack_size = 0, nargs = 0;
> +       int retval_off, args_off, nargs_off, ip_off, run_ctx_off, sreg_off;
> +       struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
> +       struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
> +       struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
> +       int ret, save_ret;
> +       void *orig_call = func_addr;
> +       u32 **branches = NULL;
> +
> +       if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY))
> +               return -ENOTSUPP;
> +
> +       /*
> +        * FP + 8       [ RA to parent func ] return address to parent
> +        *                    function
> +        * FP + 0       [ FP of parent func ] frame pointer of parent
> +        *                    function
> +        * FP - 8       [ T0 to traced func ] return address of traced
> +        *                    function
> +        * FP - 16      [ FP of traced func ] frame pointer of traced
> +        *                    function
> +        *
> +        * FP - retval_off  [ return value      ] BPF_TRAMP_F_CALL_ORIG or
> +        *                    BPF_TRAMP_F_RET_FENTRY_RET
> +        *                  [ argN              ]
> +        *                  [ ...               ]
> +        * FP - args_off    [ arg1              ]
> +        *
> +        * FP - nargs_off   [ regs count        ]
> +        *
> +        * FP - ip_off      [ traced func   ] BPF_TRAMP_F_IP_ARG
> +        *
> +        * FP - run_ctx_off [ bpf_tramp_run_ctx ]
> +        *
> +        * FP - sreg_off    [ callee saved reg  ]
> +        *
> +        */
> +
> +       if (m->nr_args > LOONGARCH_MAX_REG_ARGS)
> +               return -ENOTSUPP;
> +
> +       if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY))
> +               return -ENOTSUPP;
> +
> +       stack_size = 0;
> +
> +       /* room of trampoline frame to store return address and frame pointer */
> +       stack_size += 16;
> +
> +       save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
> +       if (save_ret) {
> +               /* Save BPF R0 and A0 */
> +               stack_size += 16;
> +               retval_off = stack_size;
> +       }
> +
> +       /* room of trampoline frame to store args */
> +       nargs = m->nr_args;
> +       stack_size += nargs * 8;
> +       args_off = stack_size;
> +
> +       /* room of trampoline frame to store args number */
> +       stack_size += 8;
> +       nargs_off = stack_size;
> +
> +       /* room of trampoline frame to store ip address */
> +       if (flags & BPF_TRAMP_F_IP_ARG) {
> +               stack_size += 8;
> +               ip_off = stack_size;
> +       }
> +
> +       /* room of trampoline frame to store struct bpf_tramp_run_ctx */
> +       stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
> +       run_ctx_off = stack_size;
> +
> +       stack_size += 8;
> +       sreg_off = stack_size;
> +
> +       stack_size = round_up(stack_size, 16);
> +
> +       /* For the trampoline called from function entry */
> +       /* RA and FP for parent function*/
> +       emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -16);
> +       emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8);
> +       emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0);
> +       emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 16);
> +
> +       /* RA and FP for traced function*/
> +       emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_size);
> +       emit_insn(ctx, std, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8);
> +       emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16);
> +       emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size);
> +
> +       /* callee saved register S1 to pass start time */
> +       emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off);
> +
> +       /* store ip address of the traced function */
> +       if (flags & BPF_TRAMP_F_IP_ARG) {
> +               move_imm(ctx, LOONGARCH_GPR_T1, (const s64)func_addr, false);
> +               emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -ip_off);
> +       }
> +
> +       /* store nargs number*/
> +       move_imm(ctx, LOONGARCH_GPR_T1, nargs, false);
> +       emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -nargs_off);
> +
> +       store_args(ctx, nargs, args_off);
> +
> +       /* To traced function */
> +       orig_call += LOONGARCH_FENTRY_NBYTES;
> +       if (flags & BPF_TRAMP_F_CALL_ORIG) {
> +               move_imm(ctx, LOONGARCH_GPR_A0, (const s64)im, false);
> +               ret = emit_call(ctx, (const u64)__bpf_tramp_enter);
> +               if (ret)
> +                       return ret;
> +       }
> +
> +       for (i = 0; i < fentry->nr_links; i++) {
> +               ret = invoke_bpf_prog(ctx, fentry->links[i], args_off, retval_off,
> +                                     run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET);
> +               if (ret)
> +                       return ret;
> +       }
> +       if (fmod_ret->nr_links) {
> +               branches  = kcalloc(fmod_ret->nr_links, sizeof(u32 *), GFP_KERNEL);
> +               if (!branches)
> +                       return -ENOMEM;
> +
> +               invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off,
> +                                  run_ctx_off, branches);
> +       }
> +
> +       if (flags & BPF_TRAMP_F_CALL_ORIG) {
> +               restore_args(ctx, m->nr_args, args_off);
> +               ret = emit_call(ctx, (const u64)orig_call);
> +               if (ret)
> +                       goto out;
> +               emit_insn(ctx, std, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off);
> +               emit_insn(ctx, std, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8));
> +               im->ip_after_call = ctx->ro_image + ctx->idx;
> +               /* Reserve space for the move_imm + jirl instruction */
> +               emit_insn(ctx, nop);
> +               emit_insn(ctx, nop);
> +               emit_insn(ctx, nop);
> +               emit_insn(ctx, nop);
> +               emit_insn(ctx, nop);
> +       }
> +
> +       for (i = 0; ctx->image && i < fmod_ret->nr_links; i++) {
> +               int offset = (void *)(&ctx->image[ctx->idx]) - (void *)branches[i];
> +               *branches[i] = larch_insn_gen_bne(LOONGARCH_GPR_T1, LOONGARCH_GPR_ZERO, offset);
> +       }
> +
> +       for (i = 0; i < fexit->nr_links; i++) {
> +               ret = invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off,
> +                                     run_ctx_off, false);
> +               if (ret)
> +                       goto out;
> +       }
> +
> +       if (flags & BPF_TRAMP_F_CALL_ORIG) {
> +               im->ip_epilogue = ctx->ro_image + ctx->idx;
> +               move_imm(ctx, LOONGARCH_GPR_A0, (const s64)im, false);
> +               ret = emit_call(ctx, (const u64)__bpf_tramp_exit);
> +               if (ret)
> +                       goto out;
> +       }
> +
> +       if (flags & BPF_TRAMP_F_RESTORE_REGS)
> +               restore_args(ctx, m->nr_args, args_off);
> +
> +       if (save_ret) {
> +               emit_insn(ctx, ldd, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off);
> +               emit_insn(ctx, ldd, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8));
> +       }
> +
> +       emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off);
> +
> +       /* trampoline called from function entry */
> +       emit_insn(ctx, ldd, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8);
> +       emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16);
> +       emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_size);
> +
> +       emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8);
> +       emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0);
> +       emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, 16);
> +
> +       if (flags & BPF_TRAMP_F_SKIP_FRAME)
> +               /* return to parent function */
> +               emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0);
> +       else
> +               /* return to traced function */
> +               emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T0, 0);
> +
> +       ret = ctx->idx;
> +out:
> +       kfree(branches);
> +
> +       return ret;
> +}
> +
> +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
> +                               void *ro_image_end, const struct btf_func_model *m,
> +                               u32 flags, struct bpf_tramp_links *tlinks,
> +                               void *func_addr)
> +{
> +       int ret;
> +       void *image, *tmp;
> +       u32 size = ro_image_end - ro_image;
> +
> +       image = kvmalloc(size, GFP_KERNEL);
> +       if (!image)
> +               return -ENOMEM;
> +
> +       struct jit_ctx ctx = {
> +               .image = (union loongarch_instruction *)image,
> +               .ro_image = (union loongarch_instruction *)ro_image,
> +               .idx = 0,
> +       };
> +

Declare ctx at function entry ?

> +       jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image));
> +       ret = __arch_prepare_bpf_trampoline(&ctx, im, m, tlinks, func_addr, flags);
> +       if (ret > 0 && validate_code(&ctx) < 0) {
> +               ret = -EINVAL;
> +               goto out;
> +       }
> +
> +       tmp = bpf_arch_text_copy(ro_image, image, size);
> +       if (IS_ERR(tmp)) {
> +               ret = PTR_ERR(tmp);
> +               goto out;
> +       }
> +
> +       bpf_flush_icache(ro_image, ro_image_end);
> +out:
> +       kvfree(image);
> +       return ret < 0 ? ret : size;
> +}
> +
> +int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
> +                            struct bpf_tramp_links *tlinks, void *func_addr)
> +{
> +       struct bpf_tramp_image im;
> +       struct jit_ctx ctx;
> +       int ret;
> +
> +       ctx.image = NULL;
> +       ctx.idx = 0;
> +
> +       ret = __arch_prepare_bpf_trampoline(&ctx, &im, m, tlinks, func_addr, flags);
> +
> +       /* Page align */
> +       return ret < 0 ? ret : round_up(ret * LOONGARCH_INSN_SIZE, PAGE_SIZE);
> +}
> diff --git a/arch/loongarch/net/bpf_jit.h b/arch/loongarch/net/bpf_jit.h
> index f9c569f53..5697158fd 100644
> --- a/arch/loongarch/net/bpf_jit.h
> +++ b/arch/loongarch/net/bpf_jit.h
> @@ -18,6 +18,7 @@ struct jit_ctx {
>         u32 *offset;
>         int num_exentries;
>         union loongarch_instruction *image;
> +       union loongarch_instruction *ro_image;
>         u32 stack_size;
>  };
>
> @@ -308,3 +309,8 @@ static inline int emit_tailcall_jmp(struct jit_ctx *ctx, u8 cond, enum loongarch
>
>         return -EINVAL;
>  }
> +
> +static inline void bpf_flush_icache(void *start, void *end)
> +{
> +       flush_icache_range((unsigned long)start, (unsigned long)end);
> +}
> --
> 2.43.0
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ