lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f8270765-a27b-6ccf-33ea-cda097168d79@redhat.com>
Date: Mon, 10 Jul 2023 12:12:29 +0200
From: Jesper Dangaard Brouer <jbrouer@...hat.com>
To: Jakub Kicinski <kuba@...nel.org>, netdev@...r.kernel.org
Cc: brouer@...hat.com, almasrymina@...gle.com, hawk@...nel.org,
 ilias.apalodimas@...aro.org, edumazet@...gle.com, dsahern@...il.com,
 michael.chan@...adcom.com, willemb@...gle.com
Subject: Re: [RFC 08/12] eth: bnxt: let the page pool manage the DMA mapping



On 07/07/2023 20.39, Jakub Kicinski wrote:
> Use the page pool's ability to maintain DMA mappings for us.
> This avoid re-mapping recycled pages.
> 

For DMA using IOMMU mappings, using page_pool like this patch solves the
main bottleneck.  Thus, I suspect this patch will give the biggest
performance boost on it's own.

As you have already discovered, the next bottleneck then becomes the
IOMMU's address resolution, which the IOTLB (I/O Translation Lookaside
Buffer) hardware helps speed up.

There are a number of techniques for reducing IOTLB misses.
I recommend reading:
  IOMMU: Strategies for Mitigating the IOTLB Bottleneck
  - https://inria.hal.science/inria-00493752/document


> Note that pages in the pool are always mapped DMA_BIDIRECTIONAL,
> so we should use that instead of looking at bp->rx_dir.
> 
> The syncing is probably wrong, TBH, I haven't studied the page
> pool rules, they always confused me. But for a hack, who cares,
> x86 :D
> 
> Signed-off-by: Jakub Kicinski <kuba@...nel.org>
> ---
>   drivers/net/ethernet/broadcom/bnxt/bnxt.c | 24 ++++++++---------------
>   1 file changed, 8 insertions(+), 16 deletions(-)

Love seeing these stats, where page_pool reduce lines in drivers.

> 
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> index e5b54e6025be..6512514cd498 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> @@ -706,12 +706,9 @@ static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
>   	if (!page)
>   		return NULL;
>   
> -	*mapping = dma_map_page_attrs(dev, page, 0, PAGE_SIZE, bp->rx_dir,
> -				      DMA_ATTR_WEAK_ORDERING);
> -	if (dma_mapping_error(dev, *mapping)) {
> -		page_pool_recycle_direct(rxr->page_pool, page);
> -		return NULL;
> -	}
> +	*mapping = page_pool_get_dma_addr(page);
> +	dma_sync_single_for_device(dev, *mapping, PAGE_SIZE, DMA_BIDIRECTIONAL);
> +

You can keep this as-is, but I just wanted mention that page_pool
supports doing the "dma_sync_for_device" via PP_FLAG_DMA_SYNC_DEV.
Thus, removing more lines from driver code.

>   	return page;
>   }
>   
> @@ -951,6 +948,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp,
>   					      unsigned int offset_and_len)
>   {
>   	unsigned int len = offset_and_len & 0xffff;
> +	struct device *dev = &bp->pdev->dev;
>   	struct page *page = data;
>   	u16 prod = rxr->rx_prod;
>   	struct sk_buff *skb;
> @@ -962,8 +960,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp,
>   		return NULL;
>   	}
>   	dma_addr -= bp->rx_dma_offset;
> -	dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
> -			     DMA_ATTR_WEAK_ORDERING);
> +	dma_sync_single_for_cpu(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
>   	skb = build_skb(page_address(page), PAGE_SIZE);
>   	if (!skb) {
>   		page_pool_recycle_direct(rxr->page_pool, page);
> @@ -984,6 +981,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
>   {
>   	unsigned int payload = offset_and_len >> 16;
>   	unsigned int len = offset_and_len & 0xffff;
> +	struct device *dev = &bp->pdev->dev;
>   	skb_frag_t *frag;
>   	struct page *page = data;
>   	u16 prod = rxr->rx_prod;
> @@ -996,8 +994,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
>   		return NULL;
>   	}
>   	dma_addr -= bp->rx_dma_offset;
> -	dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
> -			     DMA_ATTR_WEAK_ORDERING);
> +	dma_sync_single_for_cpu(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
>   
>   	if (unlikely(!payload))
>   		payload = eth_get_headlen(bp->dev, data_ptr, len);
> @@ -2943,9 +2940,6 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr)
>   		rx_buf->data = NULL;
>   		if (BNXT_RX_PAGE_MODE(bp)) {
>   			mapping -= bp->rx_dma_offset;
> -			dma_unmap_page_attrs(&pdev->dev, mapping, PAGE_SIZE,
> -					     bp->rx_dir,
> -					     DMA_ATTR_WEAK_ORDERING);
>   			page_pool_recycle_direct(rxr->page_pool, data);
>   		} else {
>   			dma_unmap_single_attrs(&pdev->dev, mapping,
> @@ -2967,9 +2961,6 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr)
>   			continue;
>   
>   		if (BNXT_RX_PAGE_MODE(bp)) {
> -			dma_unmap_page_attrs(&pdev->dev, rx_agg_buf->mapping,
> -					     BNXT_RX_PAGE_SIZE, bp->rx_dir,
> -					     DMA_ATTR_WEAK_ORDERING);
>   			rx_agg_buf->page = NULL;
>   			__clear_bit(i, rxr->rx_agg_bmap);
>   
> @@ -3208,6 +3199,7 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp,
>   {
>   	struct page_pool_params pp = { 0 };
>   
> +	pp.flags = PP_FLAG_DMA_MAP;
>   	pp.pool_size = bp->rx_ring_size;
>   	pp.nid = dev_to_node(&bp->pdev->dev);
>   	pp.napi = &rxr->bnapi->napi;


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ