netdev - Re: [PATCHv12 bpf-next 1/6] bpf: run devmap xdp_prog on flush instead of bulk enqueue

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <913a8e62-3f17-84ed-e4f5-099ba441508c@gmail.com>
Date:   Thu, 17 Dec 2020 09:07:03 -0700
From:   David Ahern <dsahern@...il.com>
To:     Hangbin Liu <liuhangbin@...il.com>, bpf@...r.kernel.org
Cc:     netdev@...r.kernel.org,
        Toke Høiland-Jørgensen <toke@...hat.com>,
        Jiri Benc <jbenc@...hat.com>,
        Jesper Dangaard Brouer <brouer@...hat.com>,
        Eelco Chaudron <echaudro@...hat.com>, ast@...nel.org,
        Daniel Borkmann <daniel@...earbox.net>,
        Lorenzo Bianconi <lorenzo.bianconi@...hat.com>,
        Andrii Nakryiko <andrii.nakryiko@...il.com>,
        Alexei Starovoitov <alexei.starovoitov@...il.com>,
        Hangbin Liu <haliu@...hat.com>
Subject: Re: [PATCHv12 bpf-next 1/6] bpf: run devmap xdp_prog on flush instead
 of bulk enqueue

On 12/16/20 7:30 AM, Hangbin Liu wrote:
> @@ -327,40 +328,92 @@ bool dev_map_can_have_prog(struct bpf_map *map)
>  	return false;
>  }
>  
> +static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
> +				struct xdp_frame **frames, int n,
> +				struct net_device *dev)
> +{
> +	struct xdp_txq_info txq = { .dev = dev };
> +	struct xdp_buff xdp;
> +	int i, nframes = 0;
> +
> +	for (i = 0; i < n; i++) {
> +		struct xdp_frame *xdpf = frames[i];
> +		u32 act;
> +		int err;
> +
> +		xdp_convert_frame_to_buff(xdpf, &xdp);
> +		xdp.txq = &txq;
> +
> +		act = bpf_prog_run_xdp(xdp_prog, &xdp);
> +		switch (act) {
> +		case XDP_PASS:
> +			err = xdp_update_frame_from_buff(&xdp, xdpf);
> +			if (unlikely(err < 0))
> +				xdp_return_frame_rx_napi(xdpf);
> +			else
> +				frames[nframes++] = xdpf;
> +			break;
> +		default:
> +			bpf_warn_invalid_xdp_action(act);
> +			fallthrough;
> +		case XDP_ABORTED:
> +			trace_xdp_exception(dev, xdp_prog, act);
> +			fallthrough;
> +		case XDP_DROP:
> +			xdp_return_frame_rx_napi(xdpf);
> +			break;
> +		}
> +	}
> +	return n - nframes; /* dropped frames count */

just return nframes here, since ...

> +}
> +
>  static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
>  {
>  	struct net_device *dev = bq->dev;
>  	int sent = 0, drops = 0, err = 0;
> +	unsigned int cnt = bq->count;
> +	unsigned int xdp_drop;
>  	int i;
>  
> -	if (unlikely(!bq->count))
> +	if (unlikely(!cnt))
>  		return;
>  
> -	for (i = 0; i < bq->count; i++) {
> +	for (i = 0; i < cnt; i++) {
>  		struct xdp_frame *xdpf = bq->q[i];
>  
>  		prefetch(xdpf);
>  	}
>  
> -	sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags);
> +	if (unlikely(bq->xdp_prog)) {
> +		xdp_drop = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev);
> +		cnt -= xdp_drop;

... that is apparently what you really want.

> +		if (!cnt) {
> +			sent = 0;
> +			drops = xdp_drop;
> +			goto out;
> +		}
> +	}
> +
> +	sent = dev->netdev_ops->ndo_xdp_xmit(dev, cnt, bq->q, flags);
>  	if (sent < 0) {
>  		err = sent;
>  		sent = 0;
>  		goto error;
>  	}
> -	drops = bq->count - sent;
> +	drops = (cnt - sent) + xdp_drop;
>  out:
>  	bq->count = 0;
>  
>  	trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, drops, err);
>  	bq->dev_rx = NULL;
> +	bq->xdp_prog = NULL;
>  	__list_del_clearprev(&bq->flush_node);
>  	return;
>  error:
>  	/* If ndo_xdp_xmit fails with an errno, no frames have been
>  	 * xmit'ed and it's our responsibility to them free all.
>  	 */
> -	for (i = 0; i < bq->count; i++) {
> +	for (i = 0; i < cnt; i++) {
>  		struct xdp_frame *xdpf = bq->q[i];
>  
>  		xdp_return_frame_rx_napi(xdpf);
> @@ -408,7 +461,8 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
>   * Thus, safe percpu variable access.
>   */
>  static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
> -		       struct net_device *dev_rx)
> +		       struct net_device *dev_rx,
> +		       struct bpf_dtab_netdev *dst)
>  {
>  	struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
>  	struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
> @@ -423,6 +477,14 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
>  	if (!bq->dev_rx)
>  		bq->dev_rx = dev_rx;
>  
> +	/* Store (potential) xdp_prog that run before egress to dev as
> +	 * part of bulk_queue.  This will be same xdp_prog for all
> +	 * xdp_frame's in bulk_queue, because this per-CPU store must
> +	 * be flushed from net_device drivers NAPI func end.
> +	 */
> +	if (dst && dst->xdp_prog && !bq->xdp_prog)
> +		bq->xdp_prog = dst->xdp_prog;


if you pass in xdp_prog through __xdp_enqueue you can reduce that to just:

	if (!bq->xdp_prog)
		bq->xdp_prog = xdp_prog;


>  	bq->q[bq->count++] = xdpf;
>  
>  	if (!bq->flush_node.prev)
> @@ -430,7 +492,8 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
>  }
>  
>  static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
> -			       struct net_device *dev_rx)
> +				struct net_device *dev_rx,
> +				struct bpf_dtab_netdev *dst)
>  {
>  	struct xdp_frame *xdpf;
>  	int err;
> @@ -446,42 +509,14 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
>  	if (unlikely(!xdpf))
>  		return -EOVERFLOW;
>  
> -	bq_enqueue(dev, xdpf, dev_rx);
> +	bq_enqueue(dev, xdpf, dev_rx, dst);
>  	return 0;
>  }
>  
> -static struct xdp_buff *dev_map_run_prog(struct net_device *dev,
> -					 struct xdp_buff *xdp,
> -					 struct bpf_prog *xdp_prog)
> -{
> -	struct xdp_txq_info txq = { .dev = dev };
> -	u32 act;
> -
> -	xdp_set_data_meta_invalid(xdp);
> -	xdp->txq = &txq;
> -
> -	act = bpf_prog_run_xdp(xdp_prog, xdp);
> -	switch (act) {
> -	case XDP_PASS:
> -		return xdp;
> -	case XDP_DROP:
> -		break;
> -	default:
> -		bpf_warn_invalid_xdp_action(act);
> -		fallthrough;
> -	case XDP_ABORTED:
> -		trace_xdp_exception(dev, xdp_prog, act);
> -		break;
> -	}
> -
> -	xdp_return_buff(xdp);
> -	return NULL;
> -}
> -
>  int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
>  		    struct net_device *dev_rx)
>  {
> -	return __xdp_enqueue(dev, xdp, dev_rx);
> +	return __xdp_enqueue(dev, xdp, dev_rx, NULL);
>  }
>  
>  int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
> @@ -489,12 +524,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
>  {
>  	struct net_device *dev = dst->dev;
>  
> -	if (dst->xdp_prog) {
> -		xdp = dev_map_run_prog(dev, xdp, dst->xdp_prog);
> -		if (!xdp)
> -			return 0;
> -	}
> -	return __xdp_enqueue(dev, xdp, dev_rx);
> +	return __xdp_enqueue(dev, xdp, dev_rx, dst);
>  }
>  
>  int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
>