[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f8270765-a27b-6ccf-33ea-cda097168d79@redhat.com>
Date: Mon, 10 Jul 2023 12:12:29 +0200
From: Jesper Dangaard Brouer <jbrouer@...hat.com>
To: Jakub Kicinski <kuba@...nel.org>, netdev@...r.kernel.org
Cc: brouer@...hat.com, almasrymina@...gle.com, hawk@...nel.org,
ilias.apalodimas@...aro.org, edumazet@...gle.com, dsahern@...il.com,
michael.chan@...adcom.com, willemb@...gle.com
Subject: Re: [RFC 08/12] eth: bnxt: let the page pool manage the DMA mapping
On 07/07/2023 20.39, Jakub Kicinski wrote:
> Use the page pool's ability to maintain DMA mappings for us.
> This avoid re-mapping recycled pages.
>
For DMA using IOMMU mappings, using page_pool like this patch solves the
main bottleneck. Thus, I suspect this patch will give the biggest
performance boost on it's own.
As you have already discovered, the next bottleneck then becomes the
IOMMU's address resolution, which the IOTLB (I/O Translation Lookaside
Buffer) hardware helps speed up.
There are a number of techniques for reducing IOTLB misses.
I recommend reading:
IOMMU: Strategies for Mitigating the IOTLB Bottleneck
- https://inria.hal.science/inria-00493752/document
> Note that pages in the pool are always mapped DMA_BIDIRECTIONAL,
> so we should use that instead of looking at bp->rx_dir.
>
> The syncing is probably wrong, TBH, I haven't studied the page
> pool rules, they always confused me. But for a hack, who cares,
> x86 :D
>
> Signed-off-by: Jakub Kicinski <kuba@...nel.org>
> ---
> drivers/net/ethernet/broadcom/bnxt/bnxt.c | 24 ++++++++---------------
> 1 file changed, 8 insertions(+), 16 deletions(-)
Love seeing these stats, where page_pool reduce lines in drivers.
>
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> index e5b54e6025be..6512514cd498 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> @@ -706,12 +706,9 @@ static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
> if (!page)
> return NULL;
>
> - *mapping = dma_map_page_attrs(dev, page, 0, PAGE_SIZE, bp->rx_dir,
> - DMA_ATTR_WEAK_ORDERING);
> - if (dma_mapping_error(dev, *mapping)) {
> - page_pool_recycle_direct(rxr->page_pool, page);
> - return NULL;
> - }
> + *mapping = page_pool_get_dma_addr(page);
> + dma_sync_single_for_device(dev, *mapping, PAGE_SIZE, DMA_BIDIRECTIONAL);
> +
You can keep this as-is, but I just wanted mention that page_pool
supports doing the "dma_sync_for_device" via PP_FLAG_DMA_SYNC_DEV.
Thus, removing more lines from driver code.
> return page;
> }
>
> @@ -951,6 +948,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp,
> unsigned int offset_and_len)
> {
> unsigned int len = offset_and_len & 0xffff;
> + struct device *dev = &bp->pdev->dev;
> struct page *page = data;
> u16 prod = rxr->rx_prod;
> struct sk_buff *skb;
> @@ -962,8 +960,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp,
> return NULL;
> }
> dma_addr -= bp->rx_dma_offset;
> - dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
> - DMA_ATTR_WEAK_ORDERING);
> + dma_sync_single_for_cpu(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
> skb = build_skb(page_address(page), PAGE_SIZE);
> if (!skb) {
> page_pool_recycle_direct(rxr->page_pool, page);
> @@ -984,6 +981,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
> {
> unsigned int payload = offset_and_len >> 16;
> unsigned int len = offset_and_len & 0xffff;
> + struct device *dev = &bp->pdev->dev;
> skb_frag_t *frag;
> struct page *page = data;
> u16 prod = rxr->rx_prod;
> @@ -996,8 +994,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
> return NULL;
> }
> dma_addr -= bp->rx_dma_offset;
> - dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
> - DMA_ATTR_WEAK_ORDERING);
> + dma_sync_single_for_cpu(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
>
> if (unlikely(!payload))
> payload = eth_get_headlen(bp->dev, data_ptr, len);
> @@ -2943,9 +2940,6 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr)
> rx_buf->data = NULL;
> if (BNXT_RX_PAGE_MODE(bp)) {
> mapping -= bp->rx_dma_offset;
> - dma_unmap_page_attrs(&pdev->dev, mapping, PAGE_SIZE,
> - bp->rx_dir,
> - DMA_ATTR_WEAK_ORDERING);
> page_pool_recycle_direct(rxr->page_pool, data);
> } else {
> dma_unmap_single_attrs(&pdev->dev, mapping,
> @@ -2967,9 +2961,6 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr)
> continue;
>
> if (BNXT_RX_PAGE_MODE(bp)) {
> - dma_unmap_page_attrs(&pdev->dev, rx_agg_buf->mapping,
> - BNXT_RX_PAGE_SIZE, bp->rx_dir,
> - DMA_ATTR_WEAK_ORDERING);
> rx_agg_buf->page = NULL;
> __clear_bit(i, rxr->rx_agg_bmap);
>
> @@ -3208,6 +3199,7 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp,
> {
> struct page_pool_params pp = { 0 };
>
> + pp.flags = PP_FLAG_DMA_MAP;
> pp.pool_size = bp->rx_ring_size;
> pp.nid = dev_to_node(&bp->pdev->dev);
> pp.napi = &rxr->bnapi->napi;
Powered by blists - more mailing lists