lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:   Mon, 26 Nov 2018 07:43:16 -0800
From:   John Fastabend <john.fastabend@...il.com>
To:     Daniel Borkmann <daniel@...earbox.net>, ast@...nel.org
Cc:     netdev@...r.kernel.org
Subject: Re: [PATCH bpf-next 1/3] bpf: helper to pop data from messages

On 11/25/18 5:05 PM, Daniel Borkmann wrote:
> On 11/23/2018 02:38 AM, John Fastabend wrote:
>> This adds a BPF SK_MSG program helper so that we can pop data from a
>> msg. We use this to pop metadata from a previous push data call.
>>
>> Signed-off-by: John Fastabend <john.fastabend@...il.com>
>> ---
>>  include/uapi/linux/bpf.h |  13 +++-
>>  net/core/filter.c        | 169 +++++++++++++++++++++++++++++++++++++++++++++++
>>  net/ipv4/tcp_bpf.c       |  14 +++-
>>  3 files changed, 192 insertions(+), 4 deletions(-)
>>
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index c1554aa..64681f8 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -2268,6 +2268,16 @@ union bpf_attr {
>>   *
>>   *	Return
>>   *		0 on success, or a negative error in case of failure.
>> + *
>> + * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags)
>> + *	 Description
>> + *		Will remove 'pop' bytes from a msg starting at byte 'start'.
>> + *		This result in ENOMEM errors under certain situations where
>> + *		a allocation and copy are required due to a full ring buffer.
>> + *		However, the helper will try to avoid doing the allocation
>> + *		if possible. Other errors can occur if input parameters are
>> + *		invalid either do to start byte not being valid part of msg
>> + *		payload and/or pop value being to large.
>>   */
>>  #define __BPF_FUNC_MAPPER(FN)		\
>>  	FN(unspec),			\
>> @@ -2360,7 +2370,8 @@ union bpf_attr {
>>  	FN(map_push_elem),		\
>>  	FN(map_pop_elem),		\
>>  	FN(map_peek_elem),		\
>> -	FN(msg_push_data),
>> +	FN(msg_push_data),		\
>> +	FN(msg_pop_data),
>>  
>>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>>   * function eBPF program intends to call
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index f6ca38a..c6b35b5 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -2428,6 +2428,173 @@ static const struct bpf_func_proto bpf_msg_push_data_proto = {
>>  	.arg4_type	= ARG_ANYTHING,
>>  };
>>  
>> +static void sk_msg_shift_left(struct sk_msg *msg, int i)
>> +{
>> +	int prev;
>> +
>> +	do {
>> +		prev = i;
>> +		sk_msg_iter_var_next(i);
>> +		msg->sg.data[prev] = msg->sg.data[i];
>> +	} while (i != msg->sg.end);
>> +
>> +	sk_msg_iter_prev(msg, end);
>> +}
>> +
>> +static void sk_msg_shift_right(struct sk_msg *msg, int i)
>> +{
>> +	struct scatterlist tmp, sge;
>> +
>> +	sk_msg_iter_next(msg, end);
>> +	sge = sk_msg_elem_cpy(msg, i);
>> +	sk_msg_iter_var_next(i);
>> +	tmp = sk_msg_elem_cpy(msg, i);
>> +
>> +	while (i != msg->sg.end) {
>> +		msg->sg.data[i] = sge;
>> +		sk_msg_iter_var_next(i);
>> +		sge = tmp;
>> +		tmp = sk_msg_elem_cpy(msg, i);
>> +	}
>> +}
>> +
>> +BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
>> +	   u32, len, u64, flags)
>> +{
>> +	u32 i = 0, l, space, offset = 0;
>> +	u64 last = start + len;
>> +	int pop;
>> +
>> +	if (unlikely(flags))
>> +		return -EINVAL;
>> +
>> +	/* First find the starting scatterlist element */
>> +	i = msg->sg.start;
>> +	do {
>> +		l = sk_msg_elem(msg, i)->length;
>> +
>> +		if (start < offset + l)
>> +			break;
>> +		offset += l;
>> +		sk_msg_iter_var_next(i);
>> +	} while (i != msg->sg.end);
>> +
>> +	/* Bounds checks: start and pop must be inside message */
>> +	if (start >= offset + l || last >= msg->sg.size)
>> +		return -EINVAL;
>> +
>> +	space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
>> +
>> +	pop = len;
>> +	/* --------------| offset
>> +	 * -| start      |------- len ------|
>> +	 *
>> +	 *  |----- a ----|-------- pop -------|----- b ----|
>> +	 *  |______________________________________________| length
>> +	 *
>> +	 *
>> +	 * a:   region at front of scatter element to save
>> +	 * b:   region at back of scatter element to save when length > A + pop
>> +	 * pop: region to pop from element, same as input 'pop' here will be
>> +	 *      decremented below per iteration.
>> +	 *
>> +	 * Two top-level cases to handle when start != offset, first B is non
>> +	 * zero and second B is zero corresponding to when a pop includes more
>> +	 * than one element.
>> +	 *
>> +	 * Then if B is non-zero AND there is no space allocate space and
>> +	 * compact A, B regions into page. If there is space shift ring to
>> +	 * the rigth free'ing the next element in ring to place B, leaving
>> +	 * A untouched except to reduce length.
>> +	 */
>> +	if (start != offset) {
>> +		struct scatterlist *nsge, *sge = sk_msg_elem(msg, i);
>> +		int a = start;
>> +		int b = sge->length - pop - a;
>> +
>> +		sk_msg_iter_var_next(i);
>> +
>> +		if (pop < sge->length - a) {
>> +			if (space) {
>> +				sge->length = a;
>> +				sk_msg_shift_right(msg, i);
>> +				nsge = sk_msg_elem(msg, i);
>> +				get_page(sg_page(sge));
>> +				sg_set_page(nsge,
>> +					    sg_page(sge), b, offset + pop);
>> +			} else {
>> +				struct page *page, *orig;
>> +				u8 *to, *from;
>> +
>> +				page = alloc_pages(__GFP_NOWARN |
>> +						   __GFP_COMP   | GFP_ATOMIC,
>> +						   get_order(a + b));
>> +				if (unlikely(!page))
>> +					return -ENOMEM;
>> +
>> +				sge->length = a;
>> +				orig = sg_page(sge);
>> +				from = sg_virt(sge);
>> +				to = page_address(page);
>> +				memcpy(to, from, a);
>> +				memcpy(to + a, from + a + pop, b);
>> +				sg_set_page(sge, page, a + b, 0);
>> +				put_page(orig);
>> +			}
>> +			pop = 0;
>> +		} else if (pop >= sge->length - a) {
>> +			sge->length = a;
>> +			pop -= (sge->length - a);
>> +		}
>> +	}
>> +
>> +	/* From above the current layout _must_ be as follows,
>> +	 *
>> +	 * -| offset
>> +	 * -| start
>> +	 *
>> +	 *  |---- pop ---|---------------- b ------------|
>> +	 *  |____________________________________________| length
>> +	 *
>> +	 * Offset and start of the current msg elem are equal because in the
>> +	 * previous case we handled offset != start and either consumed the
>> +	 * entire element and advanced to the next element OR pop == 0.
>> +	 *
>> +	 * Two cases to handle here are first pop is less than the length
>> +	 * leaving some remainder b above. Simply adjust the element's layout
>> +	 * in this case. Or pop >= length of the element so that b = 0. In this
>> +	 * case advance to next element decrementing pop.
>> +	 */
>> +	while (pop) {
>> +		struct scatterlist *sge = sk_msg_elem(msg, i);
>> +
>> +		if (pop < sge->length) {
>> +			sge->length -= pop;
>> +			sge->offset += pop;
>> +			pop = 0;
>> +		} else {
>> +			pop -= sge->length;
>> +			sk_msg_shift_left(msg, i);
>> +		}
>> +		sk_msg_iter_var_next(i);
>> +	}
>> +
>> +	sk_mem_uncharge(msg->sk, len - pop);
>> +	msg->sg.size -= (len - pop);
>> +	sk_msg_compute_data_pointers(msg);
>> +	return 0;
>> +}
> 
> Hmm, had to revert. I noticed this will have a use after free. While you do the
> sk_msg_compute_data_pointers() there is nothing from verifier that forces a
> reload of the payload pointers that were set up before the helper call, so they
> can still be used after the bpf_msg_pop_data() even though we dropped ref from
> page etc.

Thanks for catching this. I will send a v2 with bpf_msg_pop_data() added to
the list in bpf_helper_changes_pkt_data.

> Thanks,
> Daniel
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ