lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <37577230-b6b8-641c-6b77-c28b8b6fcb8b@intel.com>
Date:   Wed, 15 Feb 2023 16:54:38 +0100
From:   Alexander Lobakin <aleksander.lobakin@...el.com>
To:     William Tu <u9012063@...il.com>
CC:     <netdev@...r.kernel.org>, <jsankararama@...are.com>,
        <gyang@...are.com>, <doshir@...are.com>,
        <alexander.duyck@...il.com>, <alexandr.lobakin@...el.com>,
        <bang@...are.com>, <maciej.fijalkowski@...el.com>,
        <witu@...dia.com>, Yifeng Sun <yifengs@...are.com>,
        Alexander Duyck <alexanderduyck@...com>
Subject: Re: [RFC PATCH net-next v16] vmxnet3: Add XDP support.

From: William Tu <u9012063@...il.com>
Date: Sun,  5 Feb 2023 07:59:04 -0800

> The patch adds native-mode XDP support: XDP DROP, PASS, TX, and REDIRECT.
> 
> Background:
> The vmxnet3 rx consists of three rings: ring0, ring1, and dataring.

[...]

> Signed-off-by: William Tu <u9012063@...il.com>
> Tested-by: Yifeng Sun <yifengs@...are.com>
> Reviewed-by: Alexander Duyck <alexanderduyck@...com>
> Reviewed-by: Alexander Lobakin <alexandr.lobakin@...el.com>

Hmmm, did I give it? Can't remember :D

> @@ -326,14 +327,16 @@ static void
>  vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi,
>  		     struct pci_dev *pdev)
>  {
> -	if (tbi->map_type == VMXNET3_MAP_SINGLE)
> +	u32 map_type = tbi->map_type;
> +
> +	if (map_type & VMXNET3_MAP_SINGLE)
>  		dma_unmap_single(&pdev->dev, tbi->dma_addr, tbi->len,
>  				 DMA_TO_DEVICE);
> -	else if (tbi->map_type == VMXNET3_MAP_PAGE)
> +	else if (map_type & VMXNET3_MAP_PAGE)
>  		dma_unmap_page(&pdev->dev, tbi->dma_addr, tbi->len,
>  			       DMA_TO_DEVICE);
>  	else
> -		BUG_ON(tbi->map_type != VMXNET3_MAP_NONE);
> +		BUG_ON((map_type & ~VMXNET3_MAP_XDP));

Excesssive braces around the condition :s

>  
>  	tbi->map_type = VMXNET3_MAP_NONE; /* to help debugging */
>  }
> @@ -341,19 +344,20 @@ vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi,
>  
>  static int
>  vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq,
> -		  struct pci_dev *pdev,	struct vmxnet3_adapter *adapter)
> +		  struct pci_dev *pdev,	struct vmxnet3_adapter *adapter,
> +		  struct xdp_frame_bulk *bq)
>  {
> -	struct sk_buff *skb;
> +	struct vmxnet3_tx_buf_info *tbi;
>  	int entries = 0;
> +	u32 map_type;
>  
>  	/* no out of order completion */
>  	BUG_ON(tq->buf_info[eop_idx].sop_idx != tq->tx_ring.next2comp);
>  	BUG_ON(VMXNET3_TXDESC_GET_EOP(&(tq->tx_ring.base[eop_idx].txd)) != 1);
>  
> -	skb = tq->buf_info[eop_idx].skb;
> -	BUG_ON(skb == NULL);
> -	tq->buf_info[eop_idx].skb = NULL;
> -
> +	tbi = &tq->buf_info[eop_idx];
> +	BUG_ON(tbi->skb == NULL);

Prefer `!ptr` over `ptr == NULL`.

> +	map_type = tbi->map_type;
>  	VMXNET3_INC_RING_IDX_ONLY(eop_idx, tq->tx_ring.size);
>  
>  	while (tq->tx_ring.next2comp != eop_idx) {

[...]

> @@ -1253,6 +1290,62 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
>  	return NETDEV_TX_OK;
>  }
>  
> +static int
> +vmxnet3_create_pp(struct vmxnet3_adapter *adapter,
> +		  struct vmxnet3_rx_queue *rq, int size)
> +{
> +	const struct page_pool_params pp_params = {
> +		.order = 0,
> +		.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
> +		.pool_size = size,
> +		.nid = NUMA_NO_NODE,
> +		.dev = &adapter->pdev->dev,
> +		.offset = VMXNET3_XDP_RX_OFFSET,
> +		.max_len = VMXNET3_XDP_MAX_FRSIZE,
> +		.dma_dir = DMA_BIDIRECTIONAL,

Don't you want to set the direction depending on whether you have XDP
enabled? Bidir sync is slower than 1-dir on certain architectures
without DMA coherence engines, as you need to not only drop the page
from the cache, but also do a writeback.

> +	};
> +	struct page_pool *pp;
> +	int err;
> +
> +	pp = page_pool_create(&pp_params);
> +	if (IS_ERR(pp))
> +		return PTR_ERR(pp);
> +
> +	err = xdp_rxq_info_reg(&rq->xdp_rxq, adapter->netdev, rq->qid,
> +			       rq->napi.napi_id);
> +	if (err < 0)
> +		goto err_free_pp;
> +
> +	err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, MEM_TYPE_PAGE_POOL, pp);
> +	if (err)
> +		goto err_unregister_rxq;
> +
> +	rq->page_pool = pp;
> +
> +	return 0;
> +
> +err_unregister_rxq:
> +	xdp_rxq_info_unreg(&rq->xdp_rxq);
> +err_free_pp:
> +	page_pool_destroy(pp);
> +
> +	return err;
> +}

[...]

> +			if (rbi->buf_type != VMXNET3_RX_BUF_XDP)
> +				goto rcd_done;
> +
> +			act = vmxnet3_process_xdp(adapter, rq, rcd, rbi, rxd,
> +						  &skb_xdp_pass);
> +			if (act == XDP_PASS) {
> +				ctx->skb = skb_xdp_pass;
> +				goto sop_done;
> +			}
> +			ctx->skb = NULL;
> +			need_flush |= (act == XDP_REDIRECT);

Also excessive braces :D

> +
> +			goto rcd_done;
> +		}
> +skip_xdp:
> +
>  		if (rcd->sop) { /* first buf of the pkt */
>  			bool rxDataRingUsed;
>  			u16 len;

[...]

> @@ -1470,6 +1591,25 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
>  			rxDataRingUsed =
>  				VMXNET3_RX_DATA_RING(adapter, rcd->rqID);
>  			len = rxDataRingUsed ? rcd->len : rbi->len;
> +
> +			if (rxDataRingUsed && vmxnet3_xdp_enabled(adapter)) {
> +				struct sk_buff *skb_xdp_pass;
> +				size_t sz;
> +				int act;
> +
> +				sz = rcd->rxdIdx * rq->data_ring.desc_size;
> +				act = vmxnet3_process_xdp_small(adapter, rq,
> +								&rq->data_ring.base[sz],
> +								rcd->len,
> +								&skb_xdp_pass);
> +				if (act == XDP_PASS) {
> +					ctx->skb = skb_xdp_pass;
> +					goto sop_done;
> +				}
> +				need_flush |= (act == XDP_REDIRECT);

(same)

> +
> +				goto rcd_done;
> +			}
>  			new_skb = netdev_alloc_skb_ip_align(adapter->netdev,
>  							    len);
>  			if (new_skb == NULL) {

[...]

> @@ -1755,13 +1898,20 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
>  				&rq->rx_ring[ring_idx].base[i].rxd, &rxDesc);
>  
>  			if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD &&
> -					rq->buf_info[ring_idx][i].skb) {
> +			    rq->buf_info[ring_idx][i].page &&
> +			    rq->buf_info[ring_idx][i].buf_type ==
> +			    VMXNET3_RX_BUF_XDP) {
> +				page_pool_recycle_direct(rq->page_pool,
> +							 rq->buf_info[ring_idx][i].page);

Too long line. Maybe add a `struct page *` variable in this block to
avoid this?

> +				rq->buf_info[ring_idx][i].page = NULL;
> +			} else if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD &&
> +				   rq->buf_info[ring_idx][i].skb) {
>  				dma_unmap_single(&adapter->pdev->dev, rxd->addr,
>  						 rxd->len, DMA_FROM_DEVICE);
>  				dev_kfree_skb(rq->buf_info[ring_idx][i].skb);
>  				rq->buf_info[ring_idx][i].skb = NULL;
>  			} else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY &&
> -					rq->buf_info[ring_idx][i].page) {
> +				   rq->buf_info[ring_idx][i].page) {
>  				dma_unmap_page(&adapter->pdev->dev, rxd->addr,
>  					       rxd->len, DMA_FROM_DEVICE);
>  				put_page(rq->buf_info[ring_idx][i].page);
[...]

You also need to resubmit the series to add XDP features advertisement,
which Lorenzo introduced recently.
Other that those, looks fine to me, my Rev-by can stay I guess :D

Thanks,
Olek

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ