[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9941341e8bd78f3563e0027a59cac8966f1e3666.camel@gmail.com>
Date: Fri, 03 Jan 2025 16:12:08 -0800
From: Eduard Zingerman <eddyz87@...il.com>
To: Peilin Ye <yepeilin@...gle.com>, bpf@...r.kernel.org
Cc: Alexei Starovoitov <ast@...nel.org>, Song Liu <song@...nel.org>,
Yonghong Song <yonghong.song@...ux.dev>, Daniel Borkmann
<daniel@...earbox.net>, Andrii Nakryiko <andrii@...nel.org>, Martin KaFai
Lau <martin.lau@...ux.dev>, John Fastabend <john.fastabend@...il.com>, KP
Singh <kpsingh@...nel.org>, Stanislav Fomichev <sdf@...ichev.me>, Hao Luo
<haoluo@...gle.com>, Jiri Olsa <jolsa@...nel.org>, "Paul E. McKenney"
<paulmck@...nel.org>, Puranjay Mohan <puranjay@...nel.org>, Xu Kuohai
<xukuohai@...weicloud.com>, Catalin Marinas <catalin.marinas@....com>, Will
Deacon <will@...nel.org>, Quentin Monnet <qmo@...nel.org>, Mykola Lysenko
<mykolal@...com>, Shuah Khan <shuah@...nel.org>, Josh Don
<joshdon@...gle.com>, Barret Rhoden <brho@...gle.com>, Neel Natu
<neelnatu@...gle.com>, Benjamin Segall <bsegall@...gle.com>, David Vernet
<dvernet@...a.com>, Dave Marchevsky <davemarchevsky@...a.com>,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH RFC bpf-next v1 2/4] bpf: Introduce load-acquire and
store-release instructions
On Sat, 2024-12-21 at 01:25 +0000, Peilin Ye wrote:
> Introduce BPF instructions with load-acquire and store-release
> semantics, as discussed in [1]. The following new flags are defined:
The '[1]' link is missing.
> BPF_ATOMIC_LOAD 0x10
> BPF_ATOMIC_STORE 0x20
> BPF_ATOMIC_TYPE(imm) ((imm) & 0xf0)
>
> BPF_RELAXED 0x0
> BPF_ACQUIRE 0x1
> BPF_RELEASE 0x2
> BPF_ACQ_REL 0x3
> BPF_SEQ_CST 0x4
>
> BPF_LOAD_ACQ (BPF_ATOMIC_LOAD | BPF_ACQUIRE)
> BPF_STORE_REL (BPF_ATOMIC_STORE | BPF_RELEASE)
>
> A "load-acquire" is a BPF_STX | BPF_ATOMIC instruction with the 'imm'
> field set to BPF_LOAD_ACQ (0x11).
>
> Similarly, a "store-release" is a BPF_STX | BPF_ATOMIC instruction with
> the 'imm' field set to BPF_STORE_REL (0x22).
[...]
> diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
> index 309c4aa1b026..2a354a44f209 100644
> --- a/kernel/bpf/disasm.c
> +++ b/kernel/bpf/disasm.c
> @@ -267,6 +267,20 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
> BPF_SIZE(insn->code) == BPF_DW ? "64" : "",
> bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
> insn->dst_reg, insn->off, insn->src_reg);
> + } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
> + insn->imm == BPF_LOAD_ACQ) {
> + verbose(cbs->private_data, "(%02x) %s%d = load_acquire((%s *)(r%d %+d))\n",
> + insn->code,
> + BPF_SIZE(insn->code) == BPF_DW ? "r" : "w", insn->dst_reg,
Nit: I think that 'BPF_DW ? "r" : "w"' part is not really necessary.
> + bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
> + insn->src_reg, insn->off);
> + } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
> + insn->imm == BPF_STORE_REL) {
> + verbose(cbs->private_data, "(%02x) store_release((%s *)(r%d %+d), %s%d)\n",
> + insn->code,
> + bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
> + insn->dst_reg, insn->off,
> + BPF_SIZE(insn->code) == BPF_DW ? "r" : "w", insn->src_reg);
> } else {
> verbose(cbs->private_data, "BUG_%02x\n", insn->code);
> }
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index fa40a0440590..dc3ecc925b97 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -3480,7 +3480,7 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
> }
>
> if (class == BPF_STX) {
> - /* BPF_STX (including atomic variants) has multiple source
> + /* BPF_STX (including atomic variants) has one or more source
> * operands, one of which is a ptr. Check whether the caller is
> * asking about it.
> */
> @@ -7550,6 +7550,8 @@ static int check_load(struct bpf_verifier_env *env, struct bpf_insn *insn, const
>
> static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
> {
> + const int bpf_size = BPF_SIZE(insn->code);
> + bool write_only = false;
> int load_reg;
> int err;
>
> @@ -7564,17 +7566,21 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
> case BPF_XOR | BPF_FETCH:
> case BPF_XCHG:
> case BPF_CMPXCHG:
> + if (bpf_size != BPF_W && bpf_size != BPF_DW) {
> + verbose(env, "invalid atomic operand size\n");
> + return -EINVAL;
> + }
> + break;
> + case BPF_LOAD_ACQ:
Several notes here:
- This skips the 'bpf_jit_supports_insn()' call at the end of the function.
- Also 'check_load()' allows source register to be PTR_TO_CTX,
but convert_ctx_access() is not adjusted to handle these atomic instructions.
(Just in case: context access is special, context structures are not "real",
e.g. during runtime real sk_buff is passed to the program, not __sk_buff,
in convert_ctx_access() verifier adjusts offsets of load and store instructions
to point to real fields, this is done per program type, e.g. see
filter.c:bpf_convert_ctx_access);
- backtrack_insn() needs special rules to handle BPF_LOAD_ACQ same way
it handles loads.
> + return check_load(env, insn, "atomic");
> + case BPF_STORE_REL:
> + write_only = true;
> break;
> default:
> verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
> return -EINVAL;
> }
>
> - if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
> - verbose(env, "invalid atomic operand size\n");
> - return -EINVAL;
> - }
> -
> /* check src1 operand */
> err = check_reg_arg(env, insn->src_reg, SRC_OP);
> if (err)
Note: this code fragment looks as follows:
/* check src1 operand */
err = check_reg_arg(env, insn->src_reg, SRC_OP);
if (err)
return err;
/* check src2 operand */
err = check_reg_arg(env, insn->dst_reg, SRC_OP);
if (err)
return err;
And there is no need for 'check_reg_arg(env, insn->dst_reg, SRC_OP)'
for BPF_STORE_REL.
> @@ -7615,6 +7621,9 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
> return -EACCES;
> }
>
> + if (write_only)
> + goto skip_read_check;
> +
> if (insn->imm & BPF_FETCH) {
> if (insn->imm == BPF_CMPXCHG)
> load_reg = BPF_REG_0;
> @@ -7636,14 +7645,15 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
> * case to simulate the register fill.
> */
> err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
> - BPF_SIZE(insn->code), BPF_READ, -1, true, false);
> + bpf_size, BPF_READ, -1, true, false);
> if (!err && load_reg >= 0)
> err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
> - BPF_SIZE(insn->code), BPF_READ, load_reg,
> - true, false);
> + bpf_size, BPF_READ, load_reg, true,
> + false);
> if (err)
> return err;
>
> +skip_read_check:
> if (is_arena_reg(env, insn->dst_reg)) {
> err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
> if (err)
[...]
Powered by blists - more mailing lists