[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CALDO+SbeLUWCQ8iFVrB641zsXX8+h4hhMzYR-0+1CR-KQBhw=A@mail.gmail.com>
Date: Fri, 27 Jan 2023 06:28:05 -0800
From: William Tu <u9012063@...il.com>
To: Alexander Lobakin <alexandr.lobakin@...el.com>
Cc: netdev@...r.kernel.org, jsankararama@...are.com, gyang@...are.com,
doshir@...are.com, alexander.duyck@...il.com, bang@...are.com,
Yifeng Sun <yifengs@...are.com>,
Alexander Duyck <alexanderduyck@...com>
Subject: Re: [RFC PATCH net-next v14] vmxnet3: Add XDP support.
Hi Alexander,
Thanks for finding more issues in my code!
On Tue, Jan 24, 2023 at 3:10 AM Alexander Lobakin
<alexandr.lobakin@...el.com> wrote:
>
> From: William Tu <u9012063@...il.com>
> Date: Mon, 23 Jan 2023 20:45:15 -0800
>
> > The patch adds native-mode XDP support: XDP DROP, PASS, TX, and REDIRECT.
> >
> > Background:
>
> [...]
>
> > @@ -414,26 +427,34 @@ static void
> > vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq,
> > struct vmxnet3_adapter *adapter)
> > {
> > + struct xdp_frame_bulk bq;
> > + u32 map_type;
> > int i;
> >
> > + xdp_frame_bulk_init(&bq);
> > +
> > while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) {
> > struct vmxnet3_tx_buf_info *tbi;
> >
> > tbi = tq->buf_info + tq->tx_ring.next2comp;
> > + map_type = tbi->map_type;
> >
> > vmxnet3_unmap_tx_buf(tbi, adapter->pdev);
> > if (tbi->skb) {
> > - dev_kfree_skb_any(tbi->skb);
> > + if (map_type & VMXNET3_MAP_XDP)
> > + xdp_return_frame_bulk(tbi->xdpf, &bq);
> > + else
> > + dev_kfree_skb_any(tbi->skb);
> > tbi->skb = NULL;
> > }
> > vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
> > }
> >
> > - /* sanity check, verify all buffers are indeed unmapped and freed */
> > - for (i = 0; i < tq->tx_ring.size; i++) {
> > - BUG_ON(tq->buf_info[i].skb != NULL ||
> > - tq->buf_info[i].map_type != VMXNET3_MAP_NONE);
> > - }
> > + xdp_flush_frame_bulk(&bq);
>
> Breh, I forgot you need to lock RCU read for bulk-flushing...
Thanks, I didn't notice that, will do it.
>
> xdp_frame_bulk_init();
> rcu_read_lock();
>
> ...
>
> xdp_flush_frame_bulk();
> rcu_read_unlock();
>
> In both places where you use it.
>
> > +
> > + /* sanity check, verify all buffers are indeed unmapped */
> > + for (i = 0; i < tq->tx_ring.size; i++)
> > + BUG_ON(tq->buf_info[i].map_type != VMXNET3_MAP_NONE);
> >
> > tq->tx_ring.gen = VMXNET3_INIT_GEN;
> > tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
>
> [...]
>
> > +static int
> > +vmxnet3_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf,
> > + struct netlink_ext_ack *extack)
> > +{
> > + struct vmxnet3_adapter *adapter = netdev_priv(netdev);
> > + struct bpf_prog *new_bpf_prog = bpf->prog;
> > + struct bpf_prog *old_bpf_prog;
> > + unsigned int max_mtu;
> > + bool need_update;
> > + bool running;
> > + int err;
> > +
> > + max_mtu = SKB_WITH_OVERHEAD(PAGE_SIZE - VMXNET3_XDP_HEADROOM) -
> > + netdev->hard_header_len;
>
> Wrong alignment. You aligned it to the opening brace, but the last
> variable itself is not inside the braces.
> Either align it to 'SKB' or include into the expression inside the
> braces, both is fine.
sorry, my mistake again. Will fix it.
>
> > + if (new_bpf_prog && netdev->mtu > max_mtu) {
> > + NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP");
> > + return -EOPNOTSUPP;
> > + }
>
> [...]
>
> > + default:
> > + bpf_warn_invalid_xdp_action(rq->adapter->netdev, prog, act);
> > + fallthrough;
> > + case XDP_ABORTED:
> > + trace_xdp_exception(rq->adapter->netdev, prog, act);
> > + rq->stats.xdp_aborted++;
> > + break;
> > + case XDP_DROP:
> > + rq->stats.xdp_drops++;
> > + break;
> > + }
> > +
> > + page_pool_recycle_direct(rq->page_pool,
> > + virt_to_head_page(xdp->data_hard_start));
>
> Nit: you can reuse the @page variable to make this one more elegant, too :)
Will do.
>
> page = virt_to ...
> page_pool_recycle_direct(pool, page);
>
> > +
> > + return act;
> > +}
> > +
> > +static struct sk_buff *
> > +vmxnet3_build_skb(struct vmxnet3_rx_queue *rq, struct page *page,
> > + const struct xdp_buff *xdp)
>
> [...]
>
> > + page = page_pool_alloc_pages(rq->page_pool, GFP_ATOMIC);
> > + if (unlikely(!page)) {
> > + rq->stats.rx_buf_alloc_failure++;
> > + return XDP_DROP;
> > + }
> > +
> > + xdp_init_buff(&xdp, PAGE_SIZE, &rq->xdp_rxq);
> > + xdp_prepare_buff(&xdp, page_address(page), XDP_PACKET_HEADROOM,
>
> Isn't page_pool->p.offset more correct here instead of just
> %XDP_PACKET_HEADROOM? You included %NET_IP_ALIGN there.
Yes, will fix here and others below.
>
> > + len, false);
> > + xdp_buff_clear_frags_flag(&xdp);
> > +
> > + /* Must copy the data because it's at dataring. */
> > + memcpy(xdp.data, data, len);
>
> Aren't you missing dma_sync_single_for_cpu() before this memcpy()?
We don't need to dma_sync_single_for_cpu here because the xdp.data is
part of the rx ring (%rq->data_ring.base) and using dma_alloc_coherent.
>
> > +
> > + rcu_read_lock();
>
> [...]
>
> > +int
> > +vmxnet3_process_xdp(struct vmxnet3_adapter *adapter,
> > + struct vmxnet3_rx_queue *rq,
> > + struct Vmxnet3_RxCompDesc *rcd,
> > + struct vmxnet3_rx_buf_info *rbi,
> > + struct Vmxnet3_RxDesc *rxd,
> > + struct sk_buff **skb_xdp_pass)
> > +{
> > + struct bpf_prog *xdp_prog;
> > + dma_addr_t new_dma_addr;
> > + struct xdp_buff xdp;
> > + struct page *page;
> > + void *new_data;
> > + int act;
> > +
> > + page = rbi->page;
> > + dma_sync_single_for_cpu(&adapter->pdev->dev,
> > + page_pool_get_dma_addr(page) +
> > + XDP_PACKET_HEADROOM, rcd->len,
>
> Same.
>
> > + page_pool_get_dma_dir(rq->page_pool));
> > +
> > + xdp_init_buff(&xdp, rbi->len, &rq->xdp_rxq);
> > + xdp_prepare_buff(&xdp, page_address(page), XDP_PACKET_HEADROOM,
>
> Same.
>
> > + rcd->len, false);
> > + xdp_buff_clear_frags_flag(&xdp);
> > +
> > + rcu_read_lock();
> > + xdp_prog = rcu_dereference(rq->adapter->xdp_bpf_prog);
> > + if (!xdp_prog) {
> > + rcu_read_unlock();
> > + act = XDP_PASS;
> > + goto refill;
>
> Is it correct to go to 'refill' here? You miss vmxnet3_build_skb() this
> way. And when you call vmxne3_process_xdp_small() later during the NAPI
> poll, you also don't build an skb and go to 'sop_done' directly instead.
> So I feel you must add a new label below and go to it instead, just like
> you do in process_xdp():
good catch and you're right, will add another label below.
>
> > + }
> > + act = vmxnet3_run_xdp(rq, &xdp);
> > + rcu_read_unlock();
> > +
> > + if (act == XDP_PASS) {
>
> here:
>
> > + *skb_xdp_pass = vmxnet3_build_skb(rq, page, &xdp);
> > + if (!skb_xdp_pass)
> > + act = XDP_DROP;
> > + }
> > +
> > +refill:
> > + new_data = vmxnet3_pp_get_buff(rq->page_pool, &new_dma_addr,
> > + GFP_ATOMIC);
> > + if (!new_data) {
> > + rq->stats.rx_buf_alloc_failure++;
> > + return XDP_DROP;
> > + }
> > + rbi->page = virt_to_head_page(new_data);
> > + rbi->dma_addr = new_dma_addr;
> > + rxd->addr = cpu_to_le64(rbi->dma_addr);
> > + rxd->len = rbi->len;
> > +
> > + return act;
> > +}
> + a couple notes in the reply to your previous revision. I feel like one
> more spin and I won't have any more comments, will be good to me :)
Will start working on next version, thanks again!
William
Powered by blists - more mailing lists