netdev - Re: [PATCH v5 bpf-next 5/9] bpf: cpumap: add the possibility to attach an eBPF program to cpumap

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <1f4af1f3-10cf-57ca-4171-11d3bff51c99@iogearbox.net>
Date:   Thu, 2 Jul 2020 01:39:40 +0200
From:   Daniel Borkmann <daniel@...earbox.net>
To:     Lorenzo Bianconi <lorenzo@...nel.org>, netdev@...r.kernel.org,
        bpf@...r.kernel.org
Cc:     davem@...emloft.net, ast@...nel.org, brouer@...hat.com,
        toke@...hat.com, lorenzo.bianconi@...hat.com, dsahern@...nel.org,
        andrii.nakryiko@...il.com
Subject: Re: [PATCH v5 bpf-next 5/9] bpf: cpumap: add the possibility to
 attach an eBPF program to cpumap

On 6/30/20 2:49 PM, Lorenzo Bianconi wrote:
[...]
> +static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
> +				    void **frames, int n,
> +				    struct xdp_cpumap_stats *stats)
> +{
> +	struct xdp_rxq_info rxq;
> +	struct bpf_prog *prog;
> +	struct xdp_buff xdp;
> +	int i, nframes = 0;
> +
> +	if (!rcpu->prog)
> +		return n;
> +
> +	rcu_read_lock();
> +
> +	xdp_set_return_frame_no_direct();
> +	xdp.rxq = &rxq;
> +
> +	prog = READ_ONCE(rcpu->prog);
> +	for (i = 0; i < n; i++) {
> +		struct xdp_frame *xdpf = frames[i];
> +		u32 act;
> +		int err;
> +
> +		rxq.dev = xdpf->dev_rx;
> +		rxq.mem = xdpf->mem;
> +		/* TODO: report queue_index to xdp_rxq_info */
> +
> +		xdp_convert_frame_to_buff(xdpf, &xdp);
> +
> +		act = bpf_prog_run_xdp(prog, &xdp);
> +		switch (act) {
> +		case XDP_PASS:
> +			err = xdp_update_frame_from_buff(&xdp, xdpf);
> +			if (err < 0) {
> +				xdp_return_frame(xdpf);
> +				stats->drop++;
> +			} else {
> +				frames[nframes++] = xdpf;
> +				stats->pass++;
> +			}
> +			break;
> +		default:
> +			bpf_warn_invalid_xdp_action(act);
> +			/* fallthrough */
> +		case XDP_DROP:
> +			xdp_return_frame(xdpf);
> +			stats->drop++;
> +			break;
> +		}
> +	}
> +
> +	xdp_clear_return_frame_no_direct();
> +
> +	rcu_read_unlock();
> +
> +	return nframes;
> +}
> +
>   #define CPUMAP_BATCH 8
>   
>   static int cpu_map_kthread_run(void *data)
> @@ -235,11 +297,12 @@ static int cpu_map_kthread_run(void *data)
>   	 * kthread_stop signal until queue is empty.
>   	 */
>   	while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
> +		struct xdp_cpumap_stats stats = {}; /* zero stats */
> +		gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
>   		unsigned int drops = 0, sched = 0;
>   		void *frames[CPUMAP_BATCH];
>   		void *skbs[CPUMAP_BATCH];
> -		gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
> -		int i, n, m;
> +		int i, n, m, nframes;
>   
>   		/* Release CPU reschedule checks */
>   		if (__ptr_ring_empty(rcpu->queue)) {
> @@ -260,8 +323,8 @@ static int cpu_map_kthread_run(void *data)
>   		 * kthread CPU pinned. Lockless access to ptr_ring
>   		 * consume side valid as no-resize allowed of queue.
>   		 */
> -		n = __ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH);
> -
> +		n = __ptr_ring_consume_batched(rcpu->queue, frames,
> +					       CPUMAP_BATCH);
>   		for (i = 0; i < n; i++) {
>   			void *f = frames[i];
>   			struct page *page = virt_to_page(f);
> @@ -273,15 +336,19 @@ static int cpu_map_kthread_run(void *data)
>   			prefetchw(page);
>   		}
>   
> -		m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, n, skbs);
> -		if (unlikely(m == 0)) {
> -			for (i = 0; i < n; i++)
> -				skbs[i] = NULL; /* effect: xdp_return_frame */
> -			drops = n;
> +		/* Support running another XDP prog on this CPU */
> +		nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, n, &stats);
> +		if (nframes) {
> +			m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, skbs);
> +			if (unlikely(m == 0)) {
> +				for (i = 0; i < nframes; i++)
> +					skbs[i] = NULL; /* effect: xdp_return_frame */
> +				drops += nframes;
> +			}
>   		}
>   
>   		local_bh_disable();
> -		for (i = 0; i < n; i++) {
> +		for (i = 0; i < nframes; i++) {
>   			struct xdp_frame *xdpf = frames[i];
>   			struct sk_buff *skb = skbs[i];
>   			int ret;
> @@ -298,7 +365,7 @@ static int cpu_map_kthread_run(void *data)
>   				drops++;
>   		}
>   		/* Feedback loop via tracepoint */
> -		trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched);
> +		trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched, &stats);
>   
>   		local_bh_enable(); /* resched point, may call do_softirq() */
>   	}
> @@ -308,13 +375,38 @@ static int cpu_map_kthread_run(void *data)
>   	return 0;
>   }
[...]
> @@ -415,6 +510,8 @@ static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap,
>   
>   	old_rcpu = xchg(&cmap->cpu_map[key_cpu], rcpu);
>   	if (old_rcpu) {
> +		if (old_rcpu->prog)
> +			bpf_prog_put(old_rcpu->prog);
>   		call_rcu(&old_rcpu->rcu, __cpu_map_entry_free);
>   		INIT_WORK(&old_rcpu->kthread_stop_wq, cpu_map_kthread_stop);
>   		schedule_work(&old_rcpu->kthread_stop_wq);

Hm, not quite sure I follow the logic here. Why is the bpf_prog_put() not placed inside
__cpu_map_entry_free(), for example? Wouldn't this at least leave a potential small race
window of UAF given the rest is still live? If we already piggy-back from RCU side on
rcpu entry, why not having it in __cpu_map_entry_free()?

Thanks,
Daniel