[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20210415001711.dpbt2lej75ry6v7a@kafai-mbp.dhcp.thefacebook.com>
Date: Wed, 14 Apr 2021 17:17:11 -0700
From: Martin KaFai Lau <kafai@...com>
To: Hangbin Liu <liuhangbin@...il.com>
CC: <bpf@...r.kernel.org>, <netdev@...r.kernel.org>,
Toke Høiland-Jørgensen <toke@...hat.com>,
Jiri Benc <jbenc@...hat.com>,
Jesper Dangaard Brouer <brouer@...hat.com>,
Eelco Chaudron <echaudro@...hat.com>, <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Lorenzo Bianconi <lorenzo.bianconi@...hat.com>,
David Ahern <dsahern@...il.com>,
Andrii Nakryiko <andrii.nakryiko@...il.com>,
Alexei Starovoitov <alexei.starovoitov@...il.com>,
John Fastabend <john.fastabend@...il.com>,
Maciej Fijalkowski <maciej.fijalkowski@...el.com>,
Björn Töpel <bjorn.topel@...il.com>
Subject: Re: [PATCHv7 bpf-next 1/4] bpf: run devmap xdp_prog on flush instead
of bulk enqueue
On Wed, Apr 14, 2021 at 08:26:07PM +0800, Hangbin Liu wrote:
[ ... ]
> diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
> index aa516472ce46..3980fb3bfb09 100644
> --- a/kernel/bpf/devmap.c
> +++ b/kernel/bpf/devmap.c
> @@ -57,6 +57,7 @@ struct xdp_dev_bulk_queue {
> struct list_head flush_node;
> struct net_device *dev;
> struct net_device *dev_rx;
> + struct bpf_prog *xdp_prog;
> unsigned int count;
> };
>
> @@ -326,22 +327,71 @@ bool dev_map_can_have_prog(struct bpf_map *map)
> return false;
> }
>
> +static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
> + struct xdp_frame **frames, int n,
> + struct net_device *dev)
> +{
> + struct xdp_txq_info txq = { .dev = dev };
> + struct xdp_buff xdp;
> + int i, nframes = 0;
> +
> + for (i = 0; i < n; i++) {
> + struct xdp_frame *xdpf = frames[i];
> + u32 act;
> + int err;
> +
> + xdp_convert_frame_to_buff(xdpf, &xdp);
> + xdp.txq = &txq;
> +
> + act = bpf_prog_run_xdp(xdp_prog, &xdp);
> + switch (act) {
> + case XDP_PASS:
> + err = xdp_update_frame_from_buff(&xdp, xdpf);
> + if (unlikely(err < 0))
> + xdp_return_frame_rx_napi(xdpf);
> + else
> + frames[nframes++] = xdpf;
> + break;
> + default:
> + bpf_warn_invalid_xdp_action(act);
> + fallthrough;
> + case XDP_ABORTED:
> + trace_xdp_exception(dev, xdp_prog, act);
> + fallthrough;
> + case XDP_DROP:
> + xdp_return_frame_rx_napi(xdpf);
> + break;
> + }
> + }
> + return nframes; /* sent frames count */
> +}
> +
> static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
> {
> struct net_device *dev = bq->dev;
> - int sent = 0, err = 0;
> + int sent = 0, drops = 0, err = 0;
> + unsigned int cnt = bq->count;
> + int to_send = cnt;
> int i;
>
> - if (unlikely(!bq->count))
> + if (unlikely(!cnt))
> return;
>
> - for (i = 0; i < bq->count; i++) {
> + for (i = 0; i < cnt; i++) {
> struct xdp_frame *xdpf = bq->q[i];
>
> prefetch(xdpf);
> }
>
> - sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags);
> + if (bq->xdp_prog) {
bq->xdp_prog is used here
> + to_send = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev);
> + if (!to_send)
> + goto out;
> +
> + drops = cnt - to_send;
> + }
> +
[ ... ]
> static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
> - struct net_device *dev_rx)
> + struct net_device *dev_rx, struct bpf_prog *xdp_prog)
> {
> struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
> struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
> @@ -412,18 +466,22 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
> /* Ingress dev_rx will be the same for all xdp_frame's in
> * bulk_queue, because bq stored per-CPU and must be flushed
> * from net_device drivers NAPI func end.
> + *
> + * Do the same with xdp_prog and flush_list since these fields
> + * are only ever modified together.
> */
> - if (!bq->dev_rx)
> + if (!bq->dev_rx) {
> bq->dev_rx = dev_rx;
> + bq->xdp_prog = xdp_prog;
bp->xdp_prog is assigned here and could be used later in bq_xmit_all().
How is bq->xdp_prog protected? Are they all under one rcu_read_lock()?
It is not very obvious after taking a quick look at xdp_do_flush[_map].
e.g. what if the devmap elem gets deleted.
[ ... ]
> static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
> - struct net_device *dev_rx)
> + struct net_device *dev_rx,
> + struct bpf_prog *xdp_prog)
> {
> struct xdp_frame *xdpf;
> int err;
> @@ -439,42 +497,14 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
> if (unlikely(!xdpf))
> return -EOVERFLOW;
>
> - bq_enqueue(dev, xdpf, dev_rx);
> + bq_enqueue(dev, xdpf, dev_rx, xdp_prog);
> return 0;
> }
>
[ ... ]
> @@ -482,12 +512,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
> {
> struct net_device *dev = dst->dev;
>
> - if (dst->xdp_prog) {
> - xdp = dev_map_run_prog(dev, xdp, dst->xdp_prog);
> - if (!xdp)
> - return 0;
> - }
> - return __xdp_enqueue(dev, xdp, dev_rx);
> + return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog);
> }
Powered by blists - more mailing lists