[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20160907204501.08cc4ede@redhat.com>
Date: Wed, 7 Sep 2016 20:45:01 +0200
From: Jesper Dangaard Brouer <brouer@...hat.com>
To: Saeed Mahameed <saeedm@...lanox.com>
Cc: iovisor-dev <iovisor-dev@...ts.iovisor.org>,
netdev@...r.kernel.org, Tariq Toukan <tariqt@...lanox.com>,
Brenden Blanco <bblanco@...mgrid.com>,
Alexei Starovoitov <alexei.starovoitov@...il.com>,
Tom Herbert <tom@...bertland.com>,
Martin KaFai Lau <kafai@...com>,
Daniel Borkmann <daniel@...earbox.net>,
Eric Dumazet <edumazet@...gle.com>,
Jamal Hadi Salim <jhs@...atatu.com>, brouer@...hat.com
Subject: Re: [PATCH RFC 03/11] net/mlx5e: Implement RX mapped page cache for
page recycle
On Wed, 7 Sep 2016 15:42:24 +0300 Saeed Mahameed <saeedm@...lanox.com> wrote:
> From: Tariq Toukan <tariqt@...lanox.com>
>
> Instead of reallocating and mapping pages for RX data-path,
> recycle already used pages in a per ring cache.
>
> We ran pktgen single-stream benchmarks, with iptables-raw-drop:
>
> Single stride, 64 bytes:
> * 4,739,057 - baseline
> * 4,749,550 - order0 no cache
> * 4,786,899 - order0 with cache
> 1% gain
>
> Larger packets, no page cross, 1024 bytes:
> * 3,982,361 - baseline
> * 3,845,682 - order0 no cache
> * 4,127,852 - order0 with cache
> 3.7% gain
>
> Larger packets, every 3rd packet crosses a page, 1500 bytes:
> * 3,731,189 - baseline
> * 3,579,414 - order0 no cache
> * 3,931,708 - order0 with cache
> 5.4% gain
>
> Signed-off-by: Tariq Toukan <tariqt@...lanox.com>
> Signed-off-by: Saeed Mahameed <saeedm@...lanox.com>
> ---
> drivers/net/ethernet/mellanox/mlx5/core/en.h | 16 ++++++
> drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 15 ++++++
> drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 57 ++++++++++++++++++++--
> drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 16 ++++++
> 4 files changed, 99 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
> index 075cdfc..afbdf70 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
> @@ -287,6 +287,18 @@ struct mlx5e_rx_am { /* Adaptive Moderation */
> u8 tired;
> };
>
> +/* a single cache unit is capable to serve one napi call (for non-striding rq)
> + * or a MPWQE (for striding rq).
> + */
> +#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \
> + MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT)
> +#define MLX5E_CACHE_SIZE (2 * roundup_pow_of_two(MLX5E_CACHE_UNIT))
> +struct mlx5e_page_cache {
> + u32 head;
> + u32 tail;
> + struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE];
> +};
> +
[...]
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
> index c1cb510..8e02af3 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
> @@ -305,11 +305,55 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix)
> mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0);
> }
>
> +static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
> + struct mlx5e_dma_info *dma_info)
> +{
> + struct mlx5e_page_cache *cache = &rq->page_cache;
> + u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1);
> +
> + if (tail_next == cache->head) {
> + rq->stats.cache_full++;
> + return false;
> + }
> +
> + cache->page_cache[cache->tail] = *dma_info;
> + cache->tail = tail_next;
> + return true;
> +}
> +
> +static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq,
> + struct mlx5e_dma_info *dma_info)
> +{
> + struct mlx5e_page_cache *cache = &rq->page_cache;
> +
> + if (unlikely(cache->head == cache->tail)) {
> + rq->stats.cache_empty++;
> + return false;
> + }
> +
> + if (page_ref_count(cache->page_cache[cache->head].page) != 1) {
> + rq->stats.cache_busy++;
> + return false;
> + }
Hmmm... doesn't this cause "blocking" of the page_cache recycle
facility until the page at the head of the queue gets (page) refcnt
decremented? Real use-case could fairly easily block/cause this...
> +
> + *dma_info = cache->page_cache[cache->head];
> + cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1);
> + rq->stats.cache_reuse++;
> +
> + dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE,
> + DMA_FROM_DEVICE);
> + return true;
> +}
> +
> static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
> struct mlx5e_dma_info *dma_info)
> {
> - struct page *page = dev_alloc_page();
> + struct page *page;
> +
> + if (mlx5e_rx_cache_get(rq, dma_info))
> + return 0;
>
> + page = dev_alloc_page();
> if (unlikely(!page))
> return -ENOMEM;
--
Best regards,
Jesper Dangaard Brouer
MSc.CS, Principal Kernel Engineer at Red Hat
Author of http://www.iptv-analyzer.org
LinkedIn: http://www.linkedin.com/in/brouer
Powered by blists - more mailing lists