[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9c87a535-a08a-e4d4-969e-2e8eae348846@iogearbox.net>
Date: Thu, 15 Mar 2018 21:25:57 +0100
From: Daniel Borkmann <daniel@...earbox.net>
To: John Fastabend <john.fastabend@...il.com>, davem@...emloft.net,
ast@...nel.org, davejwatson@...com
Cc: netdev@...r.kernel.org
Subject: Re: [bpf-next PATCH v2 08/18] bpf: sk_msg program helper
bpf_sk_msg_pull_data
On 03/12/2018 08:23 PM, John Fastabend wrote:
[...]
> /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> * function eBPF program intends to call
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 2c73af0..7b9e63e 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -1956,6 +1956,134 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
> .arg2_type = ARG_ANYTHING,
> };
>
> +BPF_CALL_4(bpf_msg_pull_data,
> + struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
> +{
> + unsigned int len = 0, offset = 0, copy = 0;
> + struct scatterlist *sg = msg->sg_data;
> + int first_sg, last_sg, i, shift;
> + unsigned char *p, *to, *from;
> + int bytes = end - start;
> + struct page *page;
> +
> + if (unlikely(end < start))
> + return -EINVAL;
Actually should be:
if (unlikely(flags || end <= start))
return -EINVAL;
> + /* First find the starting scatterlist element */
> + i = msg->sg_start;
> + do {
> + len = sg[i].length;
> + offset += len;
> + if (start < offset + len)
> + break;
> + i++;
> + if (i == MAX_SKB_FRAGS)
> + i = 0;
> + } while (i != msg->sg_end);
> +
> + if (unlikely(start >= offset + len))
> + return -EINVAL;
> +
> + if (!msg->sg_copy[i] && bytes <= len)
> + goto out;
> +
> + first_sg = i;
> +
> + /* At this point we need to linearize multiple scatterlist
> + * elements or a single shared page. Either way we need to
> + * copy into a linear buffer exclusively owned by BPF. Then
> + * place the buffer in the scatterlist and fixup the original
> + * entries by removing the entries now in the linear buffer
> + * and shifting the remaining entries. For now we do not try
> + * to copy partial entries to avoid complexity of running out
> + * of sg_entry slots. The downside is reading a single byte
> + * will copy the entire sg entry.
> + */
> + do {
> + copy += sg[i].length;
> + i++;
> + if (i == MAX_SKB_FRAGS)
> + i = 0;
> + if (bytes < copy)
> + break;
> + } while (i != msg->sg_end);
> + last_sg = i;
> +
> + if (unlikely(copy < end - start))
> + return -EINVAL;
> +
> + page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy));
if (unlikely(!page))
return -ENOMEM;
> + p = page_address(page);
> + offset = 0;
> +
> + i = first_sg;
> + do {
> + from = sg_virt(&sg[i]);
> + len = sg[i].length;
> + to = p + offset;
> +
> + memcpy(to, from, len);
> + offset += len;
> + sg[i].length = 0;
> + put_page(sg_page(&sg[i]));
> +
> + i++;
> + if (i == MAX_SKB_FRAGS)
> + i = 0;
> + } while (i != last_sg);
> +
> + sg[first_sg].length = copy;
> + sg_set_page(&sg[first_sg], page, copy, 0);
> +
> + /* To repair sg ring we need to shift entries. If we only
> + * had a single entry though we can just replace it and
> + * be done. Otherwise walk the ring and shift the entries.
> + */
> + shift = last_sg - first_sg - 1;
> + if (!shift)
> + goto out;
> +
> + i = first_sg + 1;
> + do {
> + int move_from;
> +
> + if (i + shift >= MAX_SKB_FRAGS)
> + move_from = i + shift - MAX_SKB_FRAGS;
> + else
> + move_from = i + shift;
> +
> + if (move_from == msg->sg_end)
> + break;
> +
> + sg[i] = sg[move_from];
> + sg[move_from].length = 0;
> + sg[move_from].page_link = 0;
> + sg[move_from].offset = 0;
> +
> + i++;
> + if (i == MAX_SKB_FRAGS)
> + i = 0;
> + } while (1);
> + msg->sg_end -= shift;
> + if (msg->sg_end < 0)
> + msg->sg_end += MAX_SKB_FRAGS;
> +out:
> + msg->data = sg_virt(&sg[i]) + start - offset;
> + msg->data_end = msg->data + bytes;
> +
> + return 0;
> +}
> +
> +static const struct bpf_func_proto bpf_msg_pull_data_proto = {
> + .func = bpf_msg_pull_data,
> + .gpl_only = false,
> + .ret_type = RET_INTEGER,
> + .arg1_type = ARG_PTR_TO_CTX,
> + .arg2_type = ARG_ANYTHING,
> + .arg3_type = ARG_ANYTHING,
> + .arg4_type = ARG_ANYTHING,
> +};
> +
> BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
> {
> return task_get_classid(skb);
> @@ -2897,7 +3025,8 @@ bool bpf_helper_changes_pkt_data(void *func)
> func == bpf_l3_csum_replace ||
> func == bpf_l4_csum_replace ||
> func == bpf_xdp_adjust_head ||
> - func == bpf_xdp_adjust_meta)
> + func == bpf_xdp_adjust_meta ||
> + func == bpf_msg_pull_data)
> return true;
>
> return false;
> @@ -3666,6 +3795,8 @@ static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id)
> return &bpf_msg_apply_bytes_proto;
> case BPF_FUNC_msg_cork_bytes:
> return &bpf_msg_cork_bytes_proto;
> + case BPF_FUNC_msg_pull_data:
> + return &bpf_msg_pull_data_proto;
> default:
> return bpf_base_func_proto(func_id);
> }
>
Powered by blists - more mailing lists