[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAKgT0Ufx0Y=9kjLax36Gx4e7Y-A7sKZDNYxgJ9wbCT4_vxHhGA@mail.gmail.com>
Date: Mon, 13 Feb 2017 12:51:05 -0800
From: Alexander Duyck <alexander.duyck@...il.com>
To: Eric Dumazet <edumazet@...gle.com>
Cc: "David S . Miller" <davem@...emloft.net>,
netdev <netdev@...r.kernel.org>,
Tariq Toukan <tariqt@...lanox.com>,
Martin KaFai Lau <kafai@...com>,
Saeed Mahameed <saeedm@...lanox.com>,
Willem de Bruijn <willemb@...gle.com>,
Jesper Dangaard Brouer <brouer@...hat.com>,
Brenden Blanco <bblanco@...mgrid.com>,
Alexei Starovoitov <ast@...nel.org>,
Eric Dumazet <eric.dumazet@...il.com>
Subject: Re: [PATCH v3 net-next 08/14] mlx4: use order-0 pages for RX
On Mon, Feb 13, 2017 at 11:58 AM, Eric Dumazet <edumazet@...gle.com> wrote:
> Use of order-3 pages is problematic in some cases.
>
> This patch might add three kinds of regression :
>
> 1) a CPU performance regression, but we will add later page
> recycling and performance should be back.
>
> 2) TCP receiver could grow its receive window slightly slower,
> because skb->len/skb->truesize ratio will decrease.
> This is mostly ok, we prefer being conservative to not risk OOM,
> and eventually tune TCP better in the future.
> This is consistent with other drivers using 2048 per ethernet frame.
>
> 3) Because we allocate one page per RX slot, we consume more
> memory for the ring buffers. XDP already had this constraint anyway.
>
> Signed-off-by: Eric Dumazet <edumazet@...gle.com>
> ---
> drivers/net/ethernet/mellanox/mlx4/en_rx.c | 72 +++++++++++++---------------
> drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 4 --
> 2 files changed, 33 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> index 0c61c1200f2aec4c74d7403d9ec3ed06821c1bac..069ea09185fb0669d5c1f9b1b88f517b2d69c5ed 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> @@ -53,38 +53,26 @@
> static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
> struct mlx4_en_rx_alloc *page_alloc,
> const struct mlx4_en_frag_info *frag_info,
> - gfp_t _gfp)
> + gfp_t gfp)
> {
> - int order;
> struct page *page;
> dma_addr_t dma;
>
> - for (order = priv->rx_page_order; ;) {
> - gfp_t gfp = _gfp;
> -
> - if (order)
> - gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NOMEMALLOC;
> - page = alloc_pages(gfp, order);
> - if (likely(page))
> - break;
> - if (--order < 0 ||
> - ((PAGE_SIZE << order) < frag_info->frag_size))
> - return -ENOMEM;
> - }
> - dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order,
> - priv->dma_dir);
> + page = alloc_page(gfp);
> + if (unlikely(!page))
> + return -ENOMEM;
> + dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE, priv->dma_dir);
> if (unlikely(dma_mapping_error(priv->ddev, dma))) {
> put_page(page);
> return -ENOMEM;
> }
> - page_alloc->page_size = PAGE_SIZE << order;
> page_alloc->page = page;
> page_alloc->dma = dma;
> page_alloc->page_offset = 0;
> /* Not doing get_page() for each frag is a big win
> * on asymetric workloads. Note we can not use atomic_set().
> */
> - page_ref_add(page, page_alloc->page_size / frag_info->frag_stride - 1);
> + page_ref_add(page, PAGE_SIZE / frag_info->frag_stride - 1);
> return 0;
> }
>
> @@ -105,7 +93,7 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
> page_alloc[i].page_offset += frag_info->frag_stride;
>
> if (page_alloc[i].page_offset + frag_info->frag_stride <=
> - ring_alloc[i].page_size)
> + PAGE_SIZE)
> continue;
>
> if (unlikely(mlx4_alloc_pages(priv, &page_alloc[i],
> @@ -127,11 +115,10 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
> while (i--) {
> if (page_alloc[i].page != ring_alloc[i].page) {
> dma_unmap_page(priv->ddev, page_alloc[i].dma,
> - page_alloc[i].page_size,
> - priv->dma_dir);
> + PAGE_SIZE, priv->dma_dir);
> page = page_alloc[i].page;
> /* Revert changes done by mlx4_alloc_pages */
> - page_ref_sub(page, page_alloc[i].page_size /
> + page_ref_sub(page, PAGE_SIZE /
> priv->frag_info[i].frag_stride - 1);
> put_page(page);
> }
> @@ -147,8 +134,8 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv,
> u32 next_frag_end = frags[i].page_offset + 2 * frag_info->frag_stride;
>
>
> - if (next_frag_end > frags[i].page_size)
> - dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size,
> + if (next_frag_end > PAGE_SIZE)
> + dma_unmap_page(priv->ddev, frags[i].dma, PAGE_SIZE,
> priv->dma_dir);
>
> if (frags[i].page)
> @@ -168,9 +155,8 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
> frag_info, GFP_KERNEL | __GFP_COLD))
> goto out;
>
> - en_dbg(DRV, priv, " frag %d allocator: - size:%d frags:%d\n",
> - i, ring->page_alloc[i].page_size,
> - page_ref_count(ring->page_alloc[i].page));
> + en_dbg(DRV, priv, " frag %d allocator: - frags:%d\n",
> + i, page_ref_count(ring->page_alloc[i].page));
> }
> return 0;
>
> @@ -180,11 +166,10 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
>
> page_alloc = &ring->page_alloc[i];
> dma_unmap_page(priv->ddev, page_alloc->dma,
> - page_alloc->page_size,
> - priv->dma_dir);
> + PAGE_SIZE, priv->dma_dir);
> page = page_alloc->page;
> /* Revert changes done by mlx4_alloc_pages */
> - page_ref_sub(page, page_alloc->page_size /
> + page_ref_sub(page, PAGE_SIZE /
> priv->frag_info[i].frag_stride - 1);
> put_page(page);
> page_alloc->page = NULL;
You can probably simplify this by using __page_frag_cache_drain since
that way you can just doe the sub and test for the whole set instead
of doing N - 1 and 1.
Powered by blists - more mailing lists