[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <790ec09d-cdbc-96cc-e7f7-2b0d7bb6dd35@fb.com>
Date: Mon, 31 Dec 2018 21:41:22 +0000
From: Yonghong Song <yhs@...com>
To: Jakub Kicinski <jakub.kicinski@...ronome.com>
CC: "alexei.starovoitov@...il.com" <alexei.starovoitov@...il.com>,
"daniel@...earbox.net" <daniel@...earbox.net>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>,
"oss-drivers@...ronome.com" <oss-drivers@...ronome.com>
Subject: Re: [RFC bpf-next v3 03/12] bpf: verifier: remove dead code
On 12/31/18 12:31 PM, Jakub Kicinski wrote:
> On Sun, 30 Dec 2018 22:02:10 +0000, Yonghong Song wrote:
>> On 12/28/18 7:09 PM, Jakub Kicinski wrote:
>>> +static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
>>> + u32 cnt)
>>> +{
>>> + struct bpf_subprog_info *need_first_linfo;
>>> + struct bpf_prog *prog = env->prog;
>>> + u32 i, l_off, l_cnt, nr_linfo;
>>> + struct bpf_line_info *linfo;
>>> +
>>> + nr_linfo = prog->aux->nr_linfo;
>>> + if (!nr_linfo || !cnt)
>>> + return 0;
>>> +
>>> + linfo = prog->aux->linfo;
>>> +
>>> + /* progs are already adjusted/removed, if program starts on off, it may
>>> + * had its start cut off and its line info may need to be preserved
>>> + */
>>> + for (i = 0; i < env->subprog_cnt; i++)
>>> + if (env->subprog_info[i].start >= off)
>>> + break;
>>> + if (i < env->subprog_cnt && env->subprog_info[i].start == off)
>>> + need_first_linfo = &env->subprog_info[i];
>>> + else
>>> + need_first_linfo = NULL;
>>> +
>>> + /* find first line info to remove */
>>> + for (i = 0; i < nr_linfo; i++)
>>> + if (linfo[i].insn_off >= off)
>>> + break;
>>> +
>>> + /* count lines to be removed */
>>> + l_off = i;
>>> + l_cnt = 0;
>>> + for (; i < nr_linfo; i++)
>>> + if (linfo[i].insn_off < off + cnt)
>>> + l_cnt++;
>>> + else
>>> + break;
>>> +
>>> + /* either we didn't actually cut the start or we can just use line info
>>> + * of first instruction if it exists
>>> + */
>>> + if (i < nr_linfo && linfo[i].insn_off == off + cnt)
>>> + need_first_linfo = NULL;
>>> + if (need_first_linfo) {
>>> + if (WARN_ONCE(!l_cnt,
>>> + "verifier bug - no linfo removed, yet its missing"))
>>> + return -EINVAL;
>>> + if (WARN_ONCE(need_first_linfo->linfo_idx < l_off ||
>>> + need_first_linfo->linfo_idx >= l_off + l_cnt,
>>> + "verifier bug - removed prog linfo not in removed range"))
>>> + return -EINVAL;
>>> + /* subprog linfo_idx is not adjusted yet, so just find out
>>> + * which line it used to be and swap it
>>> + */
>>> + memmove(linfo + l_off, linfo + need_first_linfo->linfo_idx,
>>> + sizeof(*linfo));
>>> + need_first_linfo->linfo_idx = l_off;
>>> + linfo[l_off].insn_off = off;
>>> +
>>> + l_off++;
>>> + l_cnt--;
>>> + }
>>
>> In this routine, we handle need_first_linfo if the first insn of a
>> subprogram is deleted.
>> I wonder whether we need to handle the last deleted linfo as well. For
>> example:
>> func1:
>> line_info1:
>> insn1
>> insn2
>> func2:
>> line_info2:
>> insn3
>> insn4:
>>
>> if insn2 and insn3 are deleted, the above algorithm will produce:
>> func1:
>> line_info1:
>> insn1
>> func2:
>> insn4
>>
>> func2 is missing the first line info, which should be line_info2 with
>> adjusted insn offset to the start of func2.
>
> Mm.. should not happen, we should end up with:
>
> func1:
> line_info1:
> insn1
> func2:
> line_info2:
> insn4
>
> func2 after func adjust will start at off and there is no line info for
> off + cnt (insn4), so we will preserve line_info2.
Thanks for verification, I missed that
>>> + /* count lines to be removed */
>>> + l_off = i;
>>> + l_cnt = 0;
>>> + for (; i < nr_linfo; i++)
>>> + if (linfo[i].insn_off < off + cnt)
>>> + l_cnt++;
>>> + else
>>> + break;
once you reached linfo[i].insn_off >= off + cnt, no more counting.
since l_cnt starts from 0, so the last one is actually preserved.
>
> I've added the following test_btf test, just to be sure:
>
> {
> .descr = "line_info (dead end + subprog start w/ no linfo)",
> .raw_types = {
> BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
> BTF_FUNC_PROTO_ENC(1, 1), /* [2] */
> BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
> BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */
> BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */
> BTF_END_RAW,
> },
> BTF_STR_SEC("\0int\0x\0main\0func\0/* main linfo */\0/* func linfo */"),
> .insns = {
> BPF_MOV64_IMM(BPF_REG_0, 0),
> BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 1, 2), /* dead */
> BPF_CALL_REL(2),
> BPF_EXIT_INSN(),
> BPF_EXIT_INSN(), /* dead */
> /* func */
> BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* dead */
> BPF_MOV64_IMM(BPF_REG_0, 0),
> BPF_EXIT_INSN(),
> },
> .prog_type = BPF_PROG_TYPE_TRACEPOINT,
> .func_info_cnt = 2,
> .func_info_rec_size = 8,
> .func_info = { {0, 3}, {5, 4}, },
> .line_info = {
> BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10),
> BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10),
> BTF_END_RAW,
> },
> .line_info_rec_size = sizeof(struct bpf_line_info),
> .nr_jited_ksyms = 2,
> },
>
> ~# bpftool prog dump xlated id 48
> int main(int x):
> ; /* main linfo */
> 0: (b7) r0 = 0
> 1: (85) call pc+1#0xffffffffc041e782
> 2: (95) exit
> int func(int x):
> ; /* func linfo */
> 3: (b7) r0 = 0
> 4: (95) exit
>
> ~# bpftool prog dump jited id 48
> int main(int x):
> 0xffffffffc041cb2f:
> ; /* main linfo */
> 0: push %rbp
> 1: mov %rsp,%rbp
> 4: sub $0x28,%rsp
> b: sub $0x28,%rbp
> f: mov %rbx,0x0(%rbp)
> 13: mov %r13,0x8(%rbp)
> 17: mov %r14,0x10(%rbp)
> 1b: mov %r15,0x18(%rbp)
> 1f: xor %eax,%eax
> 21: mov %rax,0x20(%rbp)
> 25: xor %eax,%eax
> 27: callq 0x0000000000001c53
> 2c: mov 0x0(%rbp),%rbx
> 30: mov 0x8(%rbp),%r13
> 34: mov 0x10(%rbp),%r14
> 38: mov 0x18(%rbp),%r15
> 3c: add $0x28,%rbp
> 40: leaveq
> 41: retq
>
> int func(int x):
> 0xffffffffc041e782:
> ; /* func linfo */
> 0: push %rbp
> 1: mov %rsp,%rbp
> 4: sub $0x28,%rsp
> b: sub $0x28,%rbp
> f: mov %rbx,0x0(%rbp)
> 13: mov %r13,0x8(%rbp)
> 17: mov %r14,0x10(%rbp)
> 1b: mov %r15,0x18(%rbp)
> 1f: xor %eax,%eax
> 21: mov %rax,0x20(%rbp)
> 25: xor %eax,%eax
> 27: mov 0x0(%rbp),%rbx
> 2b: mov 0x8(%rbp),%r13
> 2f: mov 0x10(%rbp),%r14
> 33: mov 0x18(%rbp),%r15
> 37: add $0x28,%rbp
> 3b: leaveq
> 3c: retq
>
>> Another example:
>> func1:
>> line_info1:
>> insn1
>> insn2
>> line_info2:
>> insn3
>> insn4
>> If insn2 and insn3 are deleted, we will get
>> func1:
>> line_info1:
>> insn1
>> insn4
>> This is not ideal either. We should attribute insn4 to line_info2.
>
> Cool, I'm a little in the dark on what the definition of line info is,
> so your suggestions are very much appreciated! If you're saying that
> all instructions below line info are considered part of that "line",
> that'd simplify the code.
>
> So should I always preserve "last" line info if first live instruction
> doesn't have one?
>
> linfo1:
> insn1
> insn2 /* dead */
> linfo2:
> insn3 /* dead */
> insn4
> linfo3:
> insn5
>
> ||
> \/
>
> linfo1:
> insn1
> linfo2:
> insn4
> linfo3:
> insn5
Yes, the above is the desirable output.
>
>
> That'd simplify things quite a bit!
>
>>> +
>>> + /* remove the line info which refers to the removed instructions */
>>> + if (l_cnt) {
>>> + memmove(linfo + l_off, linfo + i,
>>> + sizeof(*linfo) * (nr_linfo - i));
>>> +
>>> + prog->aux->nr_linfo -= l_cnt;
>>> + nr_linfo = prog->aux->nr_linfo;
>>> + }
>>> +
>>> + /* pull all linfo[i].insn_off >= off + cnt in by cnt */
>>> + for (i = l_off; i < nr_linfo; i++)
>>> + linfo[i].insn_off -= cnt;
>>> +
>>> + /* fix up all subprogs (incl. 'exit') which start >= off */
>>> + for (i = 0; i <= env->subprog_cnt; i++)
>>> + if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
>>> + env->subprog_info[i].linfo_idx -= l_cnt;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
>>> +{
>>> + struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
>>> + unsigned int orig_prog_len = env->prog->len;
>>> + int err;
>>> +
>>> + if (!cnt)
>>> + return 0;
>>
>> This check is probably not needed as all call sites (including
>> patch #4) guarantees non-zero cnt.
>
> Ack, thanks for the review, I was unsure if this is not convention.
>
Powered by blists - more mailing lists