lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 24 Jan 2023 12:10:33 +0100
From:   Alexander Lobakin <alexandr.lobakin@...el.com>
To:     William Tu <u9012063@...il.com>
CC:     <netdev@...r.kernel.org>, <jsankararama@...are.com>,
        <gyang@...are.com>, <doshir@...are.com>,
        <alexander.duyck@...il.com>, <bang@...are.com>,
        "Yifeng Sun" <yifengs@...are.com>,
        Alexander Duyck <alexanderduyck@...com>
Subject: Re: [RFC PATCH net-next v14] vmxnet3: Add XDP support.

From: William Tu <u9012063@...il.com>
Date: Mon, 23 Jan 2023 20:45:15 -0800

> The patch adds native-mode XDP support: XDP DROP, PASS, TX, and REDIRECT.
> 
> Background:

[...]

> @@ -414,26 +427,34 @@ static void
>  vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq,
>  		   struct vmxnet3_adapter *adapter)
>  {
> +	struct xdp_frame_bulk bq;
> +	u32 map_type;
>  	int i;
>  
> +	xdp_frame_bulk_init(&bq);
> +
>  	while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) {
>  		struct vmxnet3_tx_buf_info *tbi;
>  
>  		tbi = tq->buf_info + tq->tx_ring.next2comp;
> +		map_type = tbi->map_type;
>  
>  		vmxnet3_unmap_tx_buf(tbi, adapter->pdev);
>  		if (tbi->skb) {
> -			dev_kfree_skb_any(tbi->skb);
> +			if (map_type & VMXNET3_MAP_XDP)
> +				xdp_return_frame_bulk(tbi->xdpf, &bq);
> +			else
> +				dev_kfree_skb_any(tbi->skb);
>  			tbi->skb = NULL;
>  		}
>  		vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
>  	}
>  
> -	/* sanity check, verify all buffers are indeed unmapped and freed */
> -	for (i = 0; i < tq->tx_ring.size; i++) {
> -		BUG_ON(tq->buf_info[i].skb != NULL ||
> -		       tq->buf_info[i].map_type != VMXNET3_MAP_NONE);
> -	}
> +	xdp_flush_frame_bulk(&bq);

Breh, I forgot you need to lock RCU read for bulk-flushing...

xdp_frame_bulk_init();
rcu_read_lock();

...

xdp_flush_frame_bulk();
rcu_read_unlock();

In both places where you use it.

> +
> +	/* sanity check, verify all buffers are indeed unmapped */
> +	for (i = 0; i < tq->tx_ring.size; i++)
> +		BUG_ON(tq->buf_info[i].map_type != VMXNET3_MAP_NONE);
>  
>  	tq->tx_ring.gen = VMXNET3_INIT_GEN;
>  	tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;

[...]

> +static int
> +vmxnet3_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf,
> +		struct netlink_ext_ack *extack)
> +{
> +	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
> +	struct bpf_prog *new_bpf_prog = bpf->prog;
> +	struct bpf_prog *old_bpf_prog;
> +	unsigned int max_mtu;
> +	bool need_update;
> +	bool running;
> +	int err;
> +
> +	max_mtu = SKB_WITH_OVERHEAD(PAGE_SIZE - VMXNET3_XDP_HEADROOM) -
> +				    netdev->hard_header_len;

Wrong alignment. You aligned it to the opening brace, but the last
variable itself is not inside the braces.
Either align it to 'SKB' or include into the expression inside the
braces, both is fine.

> +	if (new_bpf_prog && netdev->mtu > max_mtu) {
> +		NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP");
> +		return -EOPNOTSUPP;
> +	}

[...]

> +	default:
> +		bpf_warn_invalid_xdp_action(rq->adapter->netdev, prog, act);
> +		fallthrough;
> +	case XDP_ABORTED:
> +		trace_xdp_exception(rq->adapter->netdev, prog, act);
> +		rq->stats.xdp_aborted++;
> +		break;
> +	case XDP_DROP:
> +		rq->stats.xdp_drops++;
> +		break;
> +	}
> +
> +	page_pool_recycle_direct(rq->page_pool,
> +				 virt_to_head_page(xdp->data_hard_start));

Nit: you can reuse the @page variable to make this one more elegant, too :)

	page = virt_to ...
	page_pool_recycle_direct(pool, page);

> +
> +	return act;
> +}
> +
> +static struct sk_buff *
> +vmxnet3_build_skb(struct vmxnet3_rx_queue *rq, struct page *page,
> +		  const struct xdp_buff *xdp)

[...]

> +	page = page_pool_alloc_pages(rq->page_pool, GFP_ATOMIC);
> +	if (unlikely(!page)) {
> +		rq->stats.rx_buf_alloc_failure++;
> +		return XDP_DROP;
> +	}
> +
> +	xdp_init_buff(&xdp, PAGE_SIZE, &rq->xdp_rxq);
> +	xdp_prepare_buff(&xdp, page_address(page), XDP_PACKET_HEADROOM,

Isn't page_pool->p.offset more correct here instead of just
%XDP_PACKET_HEADROOM? You included %NET_IP_ALIGN there.

> +			 len, false);
> +	xdp_buff_clear_frags_flag(&xdp);
> +
> +	/* Must copy the data because it's at dataring. */
> +	memcpy(xdp.data, data, len);

Aren't you missing dma_sync_single_for_cpu() before this memcpy()?

> +
> +	rcu_read_lock();

[...]

> +int
> +vmxnet3_process_xdp(struct vmxnet3_adapter *adapter,
> +		    struct vmxnet3_rx_queue *rq,
> +		    struct Vmxnet3_RxCompDesc *rcd,
> +		    struct vmxnet3_rx_buf_info *rbi,
> +		    struct Vmxnet3_RxDesc *rxd,
> +		    struct sk_buff **skb_xdp_pass)
> +{
> +	struct bpf_prog *xdp_prog;
> +	dma_addr_t new_dma_addr;
> +	struct xdp_buff xdp;
> +	struct page *page;
> +	void *new_data;
> +	int act;
> +
> +	page = rbi->page;
> +	dma_sync_single_for_cpu(&adapter->pdev->dev,
> +				page_pool_get_dma_addr(page) +
> +				XDP_PACKET_HEADROOM, rcd->len,

Same.

> +				page_pool_get_dma_dir(rq->page_pool));
> +
> +	xdp_init_buff(&xdp, rbi->len, &rq->xdp_rxq);
> +	xdp_prepare_buff(&xdp, page_address(page), XDP_PACKET_HEADROOM,

Same.

> +			 rcd->len, false);
> +	xdp_buff_clear_frags_flag(&xdp);
> +
> +	rcu_read_lock();
> +	xdp_prog = rcu_dereference(rq->adapter->xdp_bpf_prog);
> +	if (!xdp_prog) {
> +		rcu_read_unlock();
> +		act = XDP_PASS;
> +		goto refill;

Is it correct to go to 'refill' here? You miss vmxnet3_build_skb() this
way. And when you call vmxne3_process_xdp_small() later during the NAPI
poll, you also don't build an skb and go to 'sop_done' directly instead.
So I feel you must add a new label below and go to it instead, just like
you do in process_xdp():

> +	}
> +	act = vmxnet3_run_xdp(rq, &xdp);
> +	rcu_read_unlock();
> +
> +	if (act == XDP_PASS) {

here:

> +		*skb_xdp_pass = vmxnet3_build_skb(rq, page, &xdp);
> +		if (!skb_xdp_pass)
> +			act = XDP_DROP;
> +	}
> +
> +refill:
> +	new_data = vmxnet3_pp_get_buff(rq->page_pool, &new_dma_addr,
> +				       GFP_ATOMIC);
> +	if (!new_data) {
> +		rq->stats.rx_buf_alloc_failure++;
> +		return XDP_DROP;
> +	}
> +	rbi->page = virt_to_head_page(new_data);
> +	rbi->dma_addr = new_dma_addr;
> +	rxd->addr = cpu_to_le64(rbi->dma_addr);
> +	rxd->len = rbi->len;
> +
> +	return act;
> +}
+ a couple notes in the reply to your previous revision. I feel like one
more spin and I won't have any more comments, will be good to me :)

Thanks,
Olek

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ