netdev - Re: [bpf-next PATCH v2 08/18] bpf: sk_msg program helper bpf_sk_msg_pull

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <9c87a535-a08a-e4d4-969e-2e8eae348846@iogearbox.net>
Date:   Thu, 15 Mar 2018 21:25:57 +0100
From:   Daniel Borkmann <daniel@...earbox.net>
To:     John Fastabend <john.fastabend@...il.com>, davem@...emloft.net,
        ast@...nel.org, davejwatson@...com
Cc:     netdev@...r.kernel.org
Subject: Re: [bpf-next PATCH v2 08/18] bpf: sk_msg program helper
 bpf_sk_msg_pull_data

On 03/12/2018 08:23 PM, John Fastabend wrote:
[...]
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 2c73af0..7b9e63e 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -1956,6 +1956,134 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
>  	.arg2_type      = ARG_ANYTHING,
>  };
>  
> +BPF_CALL_4(bpf_msg_pull_data,
> +	   struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
> +{
> +	unsigned int len = 0, offset = 0, copy = 0;
> +	struct scatterlist *sg = msg->sg_data;
> +	int first_sg, last_sg, i, shift;
> +	unsigned char *p, *to, *from;
> +	int bytes = end - start;
> +	struct page *page;
> +
> +	if (unlikely(end < start))
> +		return -EINVAL;

Actually should be:

if (unlikely(flags || end <= start))
	return -EINVAL;

> +	/* First find the starting scatterlist element */
> +	i = msg->sg_start;
> +	do {
> +		len = sg[i].length;
> +		offset += len;
> +		if (start < offset + len)
> +			break;
> +		i++;
> +		if (i == MAX_SKB_FRAGS)
> +			i = 0;
> +	} while (i != msg->sg_end);
> +
> +	if (unlikely(start >= offset + len))
> +		return -EINVAL;
> +
> +	if (!msg->sg_copy[i] && bytes <= len)
> +		goto out;
> +
> +	first_sg = i;
> +
> +	/* At this point we need to linearize multiple scatterlist
> +	 * elements or a single shared page. Either way we need to
> +	 * copy into a linear buffer exclusively owned by BPF. Then
> +	 * place the buffer in the scatterlist and fixup the original
> +	 * entries by removing the entries now in the linear buffer
> +	 * and shifting the remaining entries. For now we do not try
> +	 * to copy partial entries to avoid complexity of running out
> +	 * of sg_entry slots. The downside is reading a single byte
> +	 * will copy the entire sg entry.
> +	 */
> +	do {
> +		copy += sg[i].length;
> +		i++;
> +		if (i == MAX_SKB_FRAGS)
> +			i = 0;
> +		if (bytes < copy)
> +			break;
> +	} while (i != msg->sg_end);
> +	last_sg = i;
> +
> +	if (unlikely(copy < end - start))
> +		return -EINVAL;
> +
> +	page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy));

if (unlikely(!page))
	return -ENOMEM;

> +	p = page_address(page);
> +	offset = 0;
> +
> +	i = first_sg;
> +	do {
> +		from = sg_virt(&sg[i]);
> +		len = sg[i].length;
> +		to = p + offset;
> +
> +		memcpy(to, from, len);
> +		offset += len;
> +		sg[i].length = 0;
> +		put_page(sg_page(&sg[i]));
> +
> +		i++;
> +		if (i == MAX_SKB_FRAGS)
> +			i = 0;
> +	} while (i != last_sg);
> +
> +	sg[first_sg].length = copy;
> +	sg_set_page(&sg[first_sg], page, copy, 0);
> +
> +	/* To repair sg ring we need to shift entries. If we only
> +	 * had a single entry though we can just replace it and
> +	 * be done. Otherwise walk the ring and shift the entries.
> +	 */
> +	shift = last_sg - first_sg - 1;
> +	if (!shift)
> +		goto out;
> +
> +	i = first_sg + 1;
> +	do {
> +		int move_from;
> +
> +		if (i + shift >= MAX_SKB_FRAGS)
> +			move_from = i + shift - MAX_SKB_FRAGS;
> +		else
> +			move_from = i + shift;
> +
> +		if (move_from == msg->sg_end)
> +			break;
> +
> +		sg[i] = sg[move_from];
> +		sg[move_from].length = 0;
> +		sg[move_from].page_link = 0;
> +		sg[move_from].offset = 0;
> +
> +		i++;
> +		if (i == MAX_SKB_FRAGS)
> +			i = 0;
> +	} while (1);
> +	msg->sg_end -= shift;
> +	if (msg->sg_end < 0)
> +		msg->sg_end += MAX_SKB_FRAGS;
> +out:
> +	msg->data = sg_virt(&sg[i]) + start - offset;
> +	msg->data_end = msg->data + bytes;
> +
> +	return 0;
> +}
> +
> +static const struct bpf_func_proto bpf_msg_pull_data_proto = {
> +	.func		= bpf_msg_pull_data,
> +	.gpl_only	= false,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type	= ARG_PTR_TO_CTX,
> +	.arg2_type	= ARG_ANYTHING,
> +	.arg3_type	= ARG_ANYTHING,
> +	.arg4_type	= ARG_ANYTHING,
> +};
> +
>  BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
>  {
>  	return task_get_classid(skb);
> @@ -2897,7 +3025,8 @@ bool bpf_helper_changes_pkt_data(void *func)
>  	    func == bpf_l3_csum_replace ||
>  	    func == bpf_l4_csum_replace ||
>  	    func == bpf_xdp_adjust_head ||
> -	    func == bpf_xdp_adjust_meta)
> +	    func == bpf_xdp_adjust_meta ||
> +	    func == bpf_msg_pull_data)
>  		return true;
>  
>  	return false;
> @@ -3666,6 +3795,8 @@ static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id)
>  		return &bpf_msg_apply_bytes_proto;
>  	case BPF_FUNC_msg_cork_bytes:
>  		return &bpf_msg_cork_bytes_proto;
> +	case BPF_FUNC_msg_pull_data:
> +		return &bpf_msg_pull_data_proto;
>  	default:
>  		return bpf_base_func_proto(func_id);
>  	}
>