lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 11 May 2016 00:29:15 +0300
From:	Saeed Mahameed <saeedm@...lanox.com>
To:	"David S. Miller" <davem@...emloft.net>
Cc:	netdev@...r.kernel.org, Or Gerlitz <ogerlitz@...lanox.com>,
	Tal Alon <talal@...lanox.com>,
	Eran Ben Elisha <eranbe@...lanox.com>,
	Tariq Toukan <tariqt@...lanox.com>,
	Saeed Mahameed <saeedm@...lanox.com>
Subject: [PATCH net-next 2/3] net/mlx5e: Expand WQE stride when CQE compression is enabled

From: Tariq Toukan <tariqt@...lanox.com>

Make the MPWQE/Striding RQ default configuration dynamic and not
statically set at compile time.  Now at driver load we set
stride size and num strides dynamically.

By default we use same values as before, but when CQE compression
is enabled, we set larger stride size to benefit from CQE
compression for larger packets.

Signed-off-by: Tariq Toukan <tariqt@...lanox.com>
Signed-off-by: Saeed Mahameed <saeedm@...lanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h      | 13 ++++----
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 23 ++++++++++---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   | 39 ++++++++++++-----------
 3 files changed, 46 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 19f0d8d..e05abad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -64,12 +64,9 @@
 #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW            0x4
 #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW            0x6
 
-#define MLX5_MPWRQ_LOG_NUM_STRIDES		11 /* >= 9, HW restriction */
 #define MLX5_MPWRQ_LOG_STRIDE_SIZE		6  /* >= 6, HW restriction */
-#define MLX5_MPWRQ_NUM_STRIDES			BIT(MLX5_MPWRQ_LOG_NUM_STRIDES)
-#define MLX5_MPWRQ_STRIDE_SIZE			BIT(MLX5_MPWRQ_LOG_STRIDE_SIZE)
-#define MLX5_MPWRQ_LOG_WQE_SZ			(MLX5_MPWRQ_LOG_NUM_STRIDES +\
-						 MLX5_MPWRQ_LOG_STRIDE_SIZE)
+#define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS	8  /* >= 6, HW restriction */
+#define MLX5_MPWRQ_LOG_WQE_SZ			17
 #define MLX5_MPWRQ_WQE_PAGE_ORDER  (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
 				    MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0)
 #define MLX5_MPWRQ_PAGES_PER_WQE		BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
@@ -154,6 +151,8 @@ struct mlx5e_umr_wqe {
 struct mlx5e_params {
 	u8  log_sq_size;
 	u8  rq_wq_type;
+	u8  mpwqe_log_stride_sz;
+	u8  mpwqe_log_num_strides;
 	u8  log_rq_size;
 	u16 num_channels;
 	u8  num_tc;
@@ -249,6 +248,8 @@ struct mlx5e_rq {
 	/* control */
 	struct mlx5_wq_ctrl    wq_ctrl;
 	u8                     wq_type;
+	u32                    mpwqe_stride_sz;
+	u32                    mpwqe_num_strides;
 	u32                    rqn;
 	struct mlx5e_channel  *channel;
 	struct mlx5e_priv     *priv;
@@ -272,7 +273,7 @@ struct mlx5e_mpw_info {
 	void (*dma_pre_sync)(struct device *pdev,
 			     struct mlx5e_mpw_info *wi,
 			     u32 wqe_offset, u32 len);
-	void (*add_skb_frag)(struct device *pdev,
+	void (*add_skb_frag)(struct mlx5e_rq *rq,
 			     struct sk_buff *skb,
 			     struct mlx5e_mpw_info *wi,
 			     u32 page_idx, u32 frag_offset, u32 len);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 0ea4c03..e40e59a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -307,7 +307,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
 		rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq;
 		rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
 
-		rq->wqe_sz = MLX5_MPWRQ_NUM_STRIDES * MLX5_MPWRQ_STRIDE_SIZE;
+		rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
+		rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
+		rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides;
 		byte_count = rq->wqe_sz;
 		break;
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
@@ -1130,9 +1132,9 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 	switch (priv->params.rq_wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 		MLX5_SET(wq, wq, log_wqe_num_of_strides,
-			 MLX5_MPWRQ_LOG_NUM_STRIDES - 9);
+			 priv->params.mpwqe_log_num_strides - 9);
 		MLX5_SET(wq, wq, log_wqe_stride_size,
-			 MLX5_MPWRQ_LOG_STRIDE_SIZE - 6);
+			 priv->params.mpwqe_log_stride_sz - 6);
 		MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ);
 		break;
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
@@ -1199,7 +1201,7 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
 	switch (priv->params.rq_wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 		log_cq_size = priv->params.log_rq_size +
-			MLX5_MPWRQ_LOG_NUM_STRIDES;
+			priv->params.mpwqe_log_num_strides;
 		break;
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
 		log_cq_size = priv->params.log_rq_size;
@@ -2729,12 +2731,25 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
 	switch (priv->params.rq_wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 		priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
+		priv->params.mpwqe_log_stride_sz =
+			priv->params.rx_cqe_compress ?
+			MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS :
+			MLX5_MPWRQ_LOG_STRIDE_SIZE;
+		priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
+			priv->params.mpwqe_log_stride_sz;
 		priv->params.lro_en = true;
 		break;
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
 		priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
 	}
 
+	mlx5_core_info(mdev,
+		       "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
+		       priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
+		       BIT(priv->params.log_rq_size),
+		       BIT(priv->params.mpwqe_log_stride_sz),
+		       priv->params.rx_cqe_compress_admin);
+
 	priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type,
 					    BIT(priv->params.log_rq_size));
 	priv->params.rx_cq_moderation_usec =
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index c8b8d45..f345679 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -127,7 +127,7 @@ static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq,
 
 	for (i = update_owner_only; i < cqe_count;
 	     i++, cq->mini_arr_idx++, cqcc++) {
-		if (unlikely(cq->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE))
+		if (cq->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE)
 			mlx5e_read_mini_arr_slot(cq, cqcc);
 
 		mlx5e_decompress_cqe_no_hash(rq, cq, cqcc);
@@ -212,6 +212,11 @@ err_free_skb:
 	return -ENOMEM;
 }
 
+static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq)
+{
+	return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER;
+}
+
 static inline void
 mlx5e_dma_pre_sync_linear_mpwqe(struct device *pdev,
 				struct mlx5e_mpw_info *wi,
@@ -230,13 +235,13 @@ mlx5e_dma_pre_sync_fragmented_mpwqe(struct device *pdev,
 }
 
 static inline void
-mlx5e_add_skb_frag_linear_mpwqe(struct device *pdev,
+mlx5e_add_skb_frag_linear_mpwqe(struct mlx5e_rq *rq,
 				struct sk_buff *skb,
 				struct mlx5e_mpw_info *wi,
 				u32 page_idx, u32 frag_offset,
 				u32 len)
 {
-	unsigned int truesize =	ALIGN(len, MLX5_MPWRQ_STRIDE_SIZE);
+	unsigned int truesize =	ALIGN(len, rq->mpwqe_stride_sz);
 
 	wi->skbs_frags[page_idx]++;
 	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
@@ -245,15 +250,15 @@ mlx5e_add_skb_frag_linear_mpwqe(struct device *pdev,
 }
 
 static inline void
-mlx5e_add_skb_frag_fragmented_mpwqe(struct device *pdev,
+mlx5e_add_skb_frag_fragmented_mpwqe(struct mlx5e_rq *rq,
 				    struct sk_buff *skb,
 				    struct mlx5e_mpw_info *wi,
 				    u32 page_idx, u32 frag_offset,
 				    u32 len)
 {
-	unsigned int truesize =	ALIGN(len, MLX5_MPWRQ_STRIDE_SIZE);
+	unsigned int truesize =	ALIGN(len, rq->mpwqe_stride_sz);
 
-	dma_sync_single_for_cpu(pdev,
+	dma_sync_single_for_cpu(rq->pdev,
 				wi->umr.dma_info[page_idx].addr + frag_offset,
 				len, DMA_FROM_DEVICE);
 	wi->skbs_frags[page_idx]++;
@@ -293,7 +298,6 @@ mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev,
 	skb_copy_to_linear_data_offset(skb, 0,
 				       page_address(dma_info->page) + offset,
 				       len);
-#if (MLX5_MPWRQ_SMALL_PACKET_THRESHOLD >= MLX5_MPWRQ_STRIDE_SIZE)
 	if (unlikely(offset + headlen > PAGE_SIZE)) {
 		dma_info++;
 		headlen_pg = len;
@@ -304,7 +308,6 @@ mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev,
 					       page_address(dma_info->page),
 					       len);
 	}
-#endif
 }
 
 static u16 mlx5e_get_wqe_mtt_offset(u16 rq_ix, u16 wqe_ix)
@@ -430,7 +433,7 @@ static int mlx5e_alloc_rx_fragmented_mpwqe(struct mlx5e_rq *rq,
 	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
 		if (unlikely(mlx5e_alloc_and_map_page(rq, wi, i)))
 			goto err_unmap;
-		atomic_add(MLX5_MPWRQ_STRIDES_PER_PAGE,
+		atomic_add(mlx5e_mpwqe_strides_per_page(rq),
 			   &wi->umr.dma_info[i].page->_count);
 		wi->skbs_frags[i] = 0;
 	}
@@ -449,7 +452,7 @@ err_unmap:
 	while (--i >= 0) {
 		dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE,
 			       PCI_DMA_FROMDEVICE);
-		atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE,
+		atomic_sub(mlx5e_mpwqe_strides_per_page(rq),
 			   &wi->umr.dma_info[i].page->_count);
 		put_page(wi->umr.dma_info[i].page);
 	}
@@ -474,7 +477,7 @@ void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq,
 	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
 		dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE,
 			       PCI_DMA_FROMDEVICE);
-		atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE - wi->skbs_frags[i],
+		atomic_sub(mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i],
 			   &wi->umr.dma_info[i].page->_count);
 		put_page(wi->umr.dma_info[i].page);
 	}
@@ -524,7 +527,7 @@ static int mlx5e_alloc_rx_linear_mpwqe(struct mlx5e_rq *rq,
 	 */
 	split_page(wi->dma_info.page, MLX5_MPWRQ_WQE_PAGE_ORDER);
 	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
-		atomic_add(MLX5_MPWRQ_STRIDES_PER_PAGE,
+		atomic_add(mlx5e_mpwqe_strides_per_page(rq),
 			   &wi->dma_info.page[i]._count);
 		wi->skbs_frags[i] = 0;
 	}
@@ -548,7 +551,7 @@ void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq,
 	dma_unmap_page(rq->pdev, wi->dma_info.addr, rq->wqe_sz,
 		       PCI_DMA_FROMDEVICE);
 	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
-		atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE - wi->skbs_frags[i],
+		atomic_sub(mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i],
 			   &wi->dma_info.page[i]._count);
 		put_page(&wi->dma_info.page[i]);
 	}
@@ -793,9 +796,9 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq,
 					   u32 cqe_bcnt,
 					   struct sk_buff *skb)
 {
-	u32 consumed_bytes = ALIGN(cqe_bcnt, MLX5_MPWRQ_STRIDE_SIZE);
+	u32 consumed_bytes = ALIGN(cqe_bcnt, rq->mpwqe_stride_sz);
 	u16 stride_ix      = mpwrq_get_cqe_stride_index(cqe);
-	u32 wqe_offset     = stride_ix * MLX5_MPWRQ_STRIDE_SIZE;
+	u32 wqe_offset     = stride_ix * rq->mpwqe_stride_sz;
 	u32 head_offset    = wqe_offset & (PAGE_SIZE - 1);
 	u32 page_idx       = wqe_offset >> PAGE_SHIFT;
 	u32 head_page_idx  = page_idx;
@@ -803,19 +806,17 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq,
 	u32 frag_offset    = head_offset + headlen;
 	u16 byte_cnt       = cqe_bcnt - headlen;
 
-#if (MLX5_MPWRQ_SMALL_PACKET_THRESHOLD >= MLX5_MPWRQ_STRIDE_SIZE)
 	if (unlikely(frag_offset >= PAGE_SIZE)) {
 		page_idx++;
 		frag_offset -= PAGE_SIZE;
 	}
-#endif
 	wi->dma_pre_sync(rq->pdev, wi, wqe_offset, consumed_bytes);
 
 	while (byte_cnt) {
 		u32 pg_consumed_bytes =
 			min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);
 
-		wi->add_skb_frag(rq->pdev, skb, wi, page_idx, frag_offset,
+		wi->add_skb_frag(rq, skb, wi, page_idx, frag_offset,
 				 pg_consumed_bytes);
 		byte_cnt -= pg_consumed_bytes;
 		frag_offset = 0;
@@ -865,7 +866,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
 
 mpwrq_cqe_out:
-	if (likely(wi->consumed_strides < MLX5_MPWRQ_NUM_STRIDES))
+	if (likely(wi->consumed_strides < rq->mpwqe_num_strides))
 		return;
 
 	wi->free_wqe(rq, wi);
-- 
2.8.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ