[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <34E2BF41-03E0-4DEC-ABF3-72C8FF7B4E4A@redhat.com>
Date: Thu, 24 Jun 2021 11:42:12 +0200
From: Eelco Chaudron <echaudro@...hat.com>
To: John Fastabend <john.fastabend@...il.com>
Cc: Lorenzo Bianconi <lorenzo@...nel.org>, bpf@...r.kernel.org,
netdev@...r.kernel.org, lorenzo.bianconi@...hat.com,
davem@...emloft.net, kuba@...nel.org, ast@...nel.org,
daniel@...earbox.net, shayagr@...zon.com, sameehj@...zon.com,
dsahern@...nel.org, brouer@...hat.com, jasowang@...hat.com,
alexander.duyck@...il.com, saeed@...nel.org,
maciej.fijalkowski@...el.com, magnus.karlsson@...el.com,
tirthendu.sarkar@...el.com
Subject: Re: [PATCH v9 bpf-next 10/14] bpf: add multi-buffer support to xdp copy helpers
On 23 Jun 2021, at 1:49, John Fastabend wrote:
> Lorenzo Bianconi wrote:
>> From: Eelco Chaudron <echaudro@...hat.com>
>>
>> This patch adds support for multi-buffer for the following helpers:
>> - bpf_xdp_output()
>> - bpf_perf_event_output()
>>
>> Signed-off-by: Eelco Chaudron <echaudro@...hat.com>
>> Signed-off-by: Lorenzo Bianconi <lorenzo@...nel.org>
>> ---
>
> Ah ok so at least xdp_output will work with all bytes. But this is
> getting close to having access into the frags so I think doing
> the last bit shouldn't be too hard?
Guess you are talking about multi-buffer access in the XDP program?
I did suggest an API a while back, https://lore.kernel.org/bpf/FD3E6E08-DE78-4FBA-96F6-646C93E88631@redhat.com/ but I had/have not time to work on it. Guess the difficult part is to convince the verifier to allow the data to be accessed.
>
>> kernel/trace/bpf_trace.c | 3 +
>> net/core/filter.c | 72 +++++++++-
>> .../selftests/bpf/prog_tests/xdp_bpf2bpf.c | 127 ++++++++++++------
>> .../selftests/bpf/progs/test_xdp_bpf2bpf.c | 2 +-
>> 4 files changed, 160 insertions(+), 44 deletions(-)
>>
>> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
>> index d2d7cf6cfe83..ee926ec64f78 100644
>> --- a/kernel/trace/bpf_trace.c
>> +++ b/kernel/trace/bpf_trace.c
>> @@ -1365,6 +1365,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
>>
>> extern const struct bpf_func_proto bpf_skb_output_proto;
>> extern const struct bpf_func_proto bpf_xdp_output_proto;
>> +extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto;
>>
>> BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
>> struct bpf_map *, map, u64, flags)
>> @@ -1460,6 +1461,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>> return &bpf_sock_from_file_proto;
>> case BPF_FUNC_get_socket_cookie:
>> return &bpf_get_socket_ptr_cookie_proto;
>> + case BPF_FUNC_xdp_get_buff_len:
>> + return &bpf_xdp_get_buff_len_trace_proto;
>> #endif
>> case BPF_FUNC_seq_printf:
>> return prog->expected_attach_type == BPF_TRACE_ITER ?
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index b0855f2d4726..f7211b7908a9 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -3939,6 +3939,15 @@ const struct bpf_func_proto bpf_xdp_get_buff_len_proto = {
>> .arg1_type = ARG_PTR_TO_CTX,
>> };
>>
>> +BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff)
>> +
>> +const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = {
>> + .func = bpf_xdp_get_buff_len,
>> + .gpl_only = false,
>> + .arg1_type = ARG_PTR_TO_BTF_ID,
>> + .arg1_btf_id = &bpf_xdp_get_buff_len_bpf_ids[0],
>> +};
>> +
>> BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
>> {
>> void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
>> @@ -4606,10 +4615,56 @@ static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
>> };
>> #endif
>>
>> -static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
>> +static unsigned long bpf_xdp_copy(void *dst_buff, const void *ctx,
>> unsigned long off, unsigned long len)
>> {
>> - memcpy(dst_buff, src_buff + off, len);
>> + struct xdp_buff *xdp = (struct xdp_buff *)ctx;
>> + struct skb_shared_info *sinfo;
>> + unsigned long base_len;
>> +
>> + if (likely(!xdp_buff_is_mb(xdp))) {
>> + memcpy(dst_buff, xdp->data + off, len);
>> + return 0;
>> + }
>> +
>> + base_len = xdp->data_end - xdp->data;
>> + sinfo = xdp_get_shared_info_from_buff(xdp);
>> + do {
>> + const void *src_buff = NULL;
>> + unsigned long copy_len = 0;
>> +
>> + if (off < base_len) {
>> + src_buff = xdp->data + off;
>> + copy_len = min(len, base_len - off);
>> + } else {
>> + unsigned long frag_off_total = base_len;
>> + int i;
>> +
>> + for (i = 0; i < sinfo->nr_frags; i++) {
>> + skb_frag_t *frag = &sinfo->frags[i];
>> + unsigned long frag_len, frag_off;
>> +
>> + frag_len = skb_frag_size(frag);
>> + frag_off = off - frag_off_total;
>> + if (frag_off < frag_len) {
>> + src_buff = skb_frag_address(frag) +
>> + frag_off;
>> + copy_len = min(len,
>> + frag_len - frag_off);
>> + break;
>> + }
>> + frag_off_total += frag_len;
>> + }
>> + }
>> + if (!src_buff)
>> + break;
>> +
>> + memcpy(dst_buff, src_buff, copy_len);
>> + off += copy_len;
>> + len -= copy_len;
>> + dst_buff += copy_len;
>
> This block reads odd to be because it requires looping over the frags
> multiple times? Why not something like this,
>
> if (off < base_len) {
> src_buff = xdp->data + off
> copy_len = min...
> memcpy(dst_buff, src_buff, copy_len)
> off += copylen
> len -= copylen
> dst_buff += copylen;
> }
>
> for (i = 0; i , nr_frags; i++) {
> frag = ...
> ...
> if frag_off < fraglen
> ...
> memcpy()
> update(off, len, dst_buff)
> }
>
>
> Maybe use a helper to set off,len and dst_buff if worried about the
> duplication. Seems cleaner than walking through 0..n-1 frags for
> each copy.
You are right it looks odd, will re-write this in the next iteration.
>> + } while (len);
>> +
>> return 0;
>> }
Powered by blists - more mailing lists