lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:	Mon, 16 Nov 2009 12:01:09 +0200
From:	"Vladislav Zolotarov" <vladz@...adcom.com>
To:	davem@...emloft.net
cc:	netdev@...r.kernel.org, eilong@...adcom.com
Subject: [net-next] bnx2x: Handle Rx and Tx together in NAPI

Put Tx and Rx DPC to be handled in the NAPI:
  - Saves status blocks.
  - Moves the Tx work from hardIRQ to NAPI.
  - Limit Tx work done in one iteration by the same number of packets as
Rx in order to ensure both fair work load balancing relatively to other
devices scheduled for NAPI on the local CPU and sane Tx/Rx skb resource
management from single QP perspective.

Signed-off-by: Vladislav Zolotarov <vladz@...adcom.com>
Signed-off-by: Eilon Greenstein <eilong@...adcom.com>
---
 drivers/net/bnx2x.h      |   21 +--
 drivers/net/bnx2x_main.c |  356
+++++++++++++++++++---------------------------
 2 files changed, 155 insertions(+), 222 deletions(-)

diff --git a/drivers/net/bnx2x.h b/drivers/net/bnx2x.h
index 928942b..602ab86 100644
--- a/drivers/net/bnx2x.h
+++ b/drivers/net/bnx2x.h
@@ -259,9 +259,6 @@ struct bnx2x_eth_q_stats {
 struct bnx2x_fastpath {
 
 	struct napi_struct	napi;
-
-	u8			is_rx_queue;
-
 	struct host_status_block *status_blk;
 	dma_addr_t		status_blk_mapping;
 
@@ -970,8 +967,7 @@ struct bnx2x {
 #define BNX2X_STATE_ERROR		0xf000
 
 	int			multi_mode;
-	int			num_rx_queues;
-	int			num_tx_queues;
+	int			num_queues;
 
 	u32			rx_mode;
 #define BNX2X_RX_MODE_NONE		0
@@ -1074,20 +1070,15 @@ struct bnx2x {
 };
 
 
-#define BNX2X_MAX_QUEUES(bp)	(IS_E1HMF(bp) ? (MAX_CONTEXT/(2 *
E1HVN_MAX)) \
-					      : (MAX_CONTEXT/2))
-#define BNX2X_NUM_QUEUES(bp)	(bp->num_rx_queues + bp->num_tx_queues)
-#define is_multi(bp)		(BNX2X_NUM_QUEUES(bp) > 2)
+#define BNX2X_MAX_QUEUES(bp)	(IS_E1HMF(bp) ? (MAX_CONTEXT/E1HVN_MAX) \
+					      : MAX_CONTEXT)
+#define BNX2X_NUM_QUEUES(bp)	(bp->num_queues)
+#define is_multi(bp)		(BNX2X_NUM_QUEUES(bp) > 1)
 
-#define for_each_rx_queue(bp, var) \
-			for (var = 0; var < bp->num_rx_queues; var++)
-#define for_each_tx_queue(bp, var) \
-			for (var = bp->num_rx_queues; \
-			     var < BNX2X_NUM_QUEUES(bp); var++)
 #define for_each_queue(bp, var) \
 			for (var = 0; var < BNX2X_NUM_QUEUES(bp); var++)
 #define for_each_nondefault_queue(bp, var) \
-			for (var = 1; var < bp->num_rx_queues; var++)
+			for (var = 1; var < BNX2X_NUM_QUEUES(bp); var++)
 
 
 void bnx2x_read_dmae(struct bnx2x *bp, u32 src_addr, u32 len32);
diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c
index e2cf686..75af1d4 100644
--- a/drivers/net/bnx2x_main.c
+++ b/drivers/net/bnx2x_main.c
@@ -57,7 +57,7 @@
 #include "bnx2x_init_ops.h"
 #include "bnx2x_dump.h"
 
-#define DRV_MODULE_VERSION	"1.52.1-4"
+#define DRV_MODULE_VERSION	"1.52.1-5"
 #define DRV_MODULE_RELDATE	"2009/11/09"
 #define BNX2X_BC_VER		0x040200
 
@@ -91,15 +91,10 @@ module_param(multi_mode, int, 0);
 MODULE_PARM_DESC(multi_mode, " Multi queue mode "
 			     "(0 Disable; 1 Enable (default))");
 
-static int num_rx_queues;
-module_param(num_rx_queues, int, 0);
-MODULE_PARM_DESC(num_rx_queues, " Number of Rx queues for multi_mode=1"
-				" (default is half number of CPUs)");
-
-static int num_tx_queues;
-module_param(num_tx_queues, int, 0);
-MODULE_PARM_DESC(num_tx_queues, " Number of Tx queues for multi_mode=1"
-				" (default is half number of CPUs)");
+static int num_queues;
+module_param(num_queues, int, 0);
+MODULE_PARM_DESC(num_queues, " Number of queues for multi_mode=1"
+				" (default is as a number of CPUs)");
 
 static int disable_tpa;
 module_param(disable_tpa, int, 0);
@@ -558,7 +553,7 @@ static void bnx2x_panic_dump(struct bnx2x *bp)
 		  bp->def_att_idx, bp->attn_state, bp->spq_prod_idx);
 
 	/* Rx */
-	for_each_rx_queue(bp, i) {
+	for_each_queue(bp, i) {
 		struct bnx2x_fastpath *fp = &bp->fp[i];
 
 		BNX2X_ERR("fp%d: rx_bd_prod(%x)  rx_bd_cons(%x)"
@@ -575,7 +570,7 @@ static void bnx2x_panic_dump(struct bnx2x *bp)
 	}
 
 	/* Tx */
-	for_each_tx_queue(bp, i) {
+	for_each_queue(bp, i) {
 		struct bnx2x_fastpath *fp = &bp->fp[i];
 
 		BNX2X_ERR("fp%d: tx_pkt_prod(%x)  tx_pkt_cons(%x)"
@@ -590,7 +585,7 @@ static void bnx2x_panic_dump(struct bnx2x *bp)
 
 	/* Rings */
 	/* Rx */
-	for_each_rx_queue(bp, i) {
+	for_each_queue(bp, i) {
 		struct bnx2x_fastpath *fp = &bp->fp[i];
 
 		start = RX_BD(le16_to_cpu(*fp->rx_cons_sb) - 10);
@@ -624,7 +619,7 @@ static void bnx2x_panic_dump(struct bnx2x *bp)
 	}
 
 	/* Tx */
-	for_each_tx_queue(bp, i) {
+	for_each_queue(bp, i) {
 		struct bnx2x_fastpath *fp = &bp->fp[i];
 
 		start = TX_BD(le16_to_cpu(*fp->tx_cons_sb) - 10);
@@ -792,21 +787,13 @@ static inline void bnx2x_ack_sb(struct bnx2x *bp,
u8 sb_id,
 	barrier();
 }
 
-static inline u16 bnx2x_update_fpsb_idx(struct bnx2x_fastpath *fp)
+static inline void bnx2x_update_fpsb_idx(struct bnx2x_fastpath *fp)
 {
 	struct host_status_block *fpsb = fp->status_blk;
-	u16 rc = 0;
 
 	barrier(); /* status block is written to by the chip */
-	if (fp->fp_c_idx != fpsb->c_status_block.status_block_index) {
-		fp->fp_c_idx = fpsb->c_status_block.status_block_index;
-		rc |= 1;
-	}
-	if (fp->fp_u_idx != fpsb->u_status_block.status_block_index) {
-		fp->fp_u_idx = fpsb->u_status_block.status_block_index;
-		rc |= 2;
-	}
-	return rc;
+	fp->fp_c_idx = fpsb->c_status_block.status_block_index;
+	fp->fp_u_idx = fpsb->u_status_block.status_block_index;
 }
 
 static u16 bnx2x_ack_int(struct bnx2x *bp)
@@ -846,6 +833,9 @@ static u16 bnx2x_free_tx_pkt(struct bnx2x *bp,
struct bnx2x_fastpath *fp,
 	u16 bd_idx = TX_BD(tx_buf->first_bd), new_cons;
 	int nbd;
 
+	/* prefetch skb end pointer to speedup dev_kfree_skb() */
+	prefetch(&skb->end);
+
 	DP(BNX2X_MSG_OFF, "pkt_idx %d  buff @(%p)->skb %p\n",
 	   idx, tx_buf, skb);
 
@@ -890,7 +880,7 @@ static u16 bnx2x_free_tx_pkt(struct bnx2x *bp,
struct bnx2x_fastpath *fp,
 
 	/* release skb */
 	WARN_ON(!skb);
-	dev_kfree_skb_any(skb);
+	dev_kfree_skb(skb);
 	tx_buf->first_bd = 0;
 	tx_buf->skb = NULL;
 
@@ -920,23 +910,33 @@ static inline u16 bnx2x_tx_avail(struct
bnx2x_fastpath *fp)
 	return (s16)(fp->bp->tx_ring_size) - used;
 }
 
-static void bnx2x_tx_int(struct bnx2x_fastpath *fp)
+static inline int bnx2x_has_tx_work(struct bnx2x_fastpath *fp)
+{
+	u16 hw_cons;
+
+	/* Tell compiler that status block fields can change */
+	barrier();
+	hw_cons = le16_to_cpu(*fp->tx_cons_sb);
+	return hw_cons != fp->tx_pkt_cons;
+}
+
+static int bnx2x_tx_int(struct bnx2x_fastpath *fp, int num)
 {
 	struct bnx2x *bp = fp->bp;
 	struct netdev_queue *txq;
 	u16 hw_cons, sw_cons, bd_cons = fp->tx_bd_cons;
-	int done = 0;
+	int cnt;
 
 #ifdef BNX2X_STOP_ON_ERROR
 	if (unlikely(bp->panic))
-		return;
+		return -1;
 #endif
 
-	txq = netdev_get_tx_queue(bp->dev, fp->index - bp->num_rx_queues);
+	txq = netdev_get_tx_queue(bp->dev, fp->index);
 	hw_cons = le16_to_cpu(*fp->tx_cons_sb);
 	sw_cons = fp->tx_pkt_cons;
 
-	while (sw_cons != hw_cons) {
+	for (cnt = 0; (cnt < num) && (sw_cons != hw_cons); cnt++) {
 		u16 pkt_cons;
 
 		pkt_cons = TX_BD(sw_cons);
@@ -953,7 +953,6 @@ static void bnx2x_tx_int(struct bnx2x_fastpath *fp)
 */
 		bd_cons = bnx2x_free_tx_pkt(bp, fp, pkt_cons);
 		sw_cons++;
-		done++;
 	}
 
 	fp->tx_pkt_cons = sw_cons;
@@ -975,6 +974,7 @@ static void bnx2x_tx_int(struct bnx2x_fastpath *fp)
 		    (bnx2x_tx_avail(fp) >= MAX_SKB_FRAGS + 3))
 			netif_tx_wake_queue(txq);
 	}
+	return (cnt >= num) ? 1 : 0;
 }
 
 #ifdef BCM_CNIC
@@ -1742,27 +1742,13 @@ static irqreturn_t bnx2x_msix_fp_int(int irq,
void *fp_cookie)
 	if (unlikely(bp->panic))
 		return IRQ_HANDLED;
 #endif
-	/* Handle Rx or Tx according to MSI-X vector */
-	if (fp->is_rx_queue) {
-		prefetch(fp->rx_cons_sb);
-		prefetch(&fp->status_blk->u_status_block.status_block_index);
-
-		napi_schedule(&bnx2x_fp(bp, fp->index, napi));
-
-	} else {
-		prefetch(fp->tx_cons_sb);
-		prefetch(&fp->status_blk->c_status_block.status_block_index);
 
-		bnx2x_update_fpsb_idx(fp);
-		rmb();
-		bnx2x_tx_int(fp);
-
-		/* Re-enable interrupts */
-		bnx2x_ack_sb(bp, fp->sb_id, USTORM_ID,
-			     le16_to_cpu(fp->fp_u_idx), IGU_INT_NOP, 1);
-		bnx2x_ack_sb(bp, fp->sb_id, CSTORM_ID,
-			     le16_to_cpu(fp->fp_c_idx), IGU_INT_ENABLE, 1);
-	}
+	/* Handle Rx and Tx according to MSI-X vector */
+	prefetch(fp->rx_cons_sb);
+	prefetch(fp->tx_cons_sb);
+	prefetch(&fp->status_blk->u_status_block.status_block_index);
+	prefetch(&fp->status_blk->c_status_block.status_block_index);
+	napi_schedule(&bnx2x_fp(bp, fp->index, napi));
 
 	return IRQ_HANDLED;
 }
@@ -1797,31 +1783,14 @@ static irqreturn_t bnx2x_interrupt(int irq, void
*dev_instance)
 
 		mask = 0x2 << fp->sb_id;
 		if (status & mask) {
-			/* Handle Rx or Tx according to SB id */
-			if (fp->is_rx_queue) {
-				prefetch(fp->rx_cons_sb);
-				prefetch(&fp->status_blk->u_status_block.
-							status_block_index);
-
-				napi_schedule(&bnx2x_fp(bp, fp->index, napi));
-
-			} else {
-				prefetch(fp->tx_cons_sb);
-				prefetch(&fp->status_blk->c_status_block.
-							status_block_index);
-
-				bnx2x_update_fpsb_idx(fp);
-				rmb();
-				bnx2x_tx_int(fp);
-
-				/* Re-enable interrupts */
-				bnx2x_ack_sb(bp, fp->sb_id, USTORM_ID,
-					     le16_to_cpu(fp->fp_u_idx),
-					     IGU_INT_NOP, 1);
-				bnx2x_ack_sb(bp, fp->sb_id, CSTORM_ID,
-					     le16_to_cpu(fp->fp_c_idx),
-					     IGU_INT_ENABLE, 1);
-			}
+			/* Handle Rx and Tx according to SB id */
+			prefetch(fp->rx_cons_sb);
+			prefetch(&fp->status_blk->u_status_block.
+						status_block_index);
+			prefetch(fp->tx_cons_sb);
+			prefetch(&fp->status_blk->c_status_block.
+						status_block_index);
+			napi_schedule(&bnx2x_fp(bp, fp->index, napi));
 			status &= ~mask;
 		}
 	}
@@ -4027,7 +3996,7 @@ static int bnx2x_storm_stats_update(struct bnx2x
*bp)
 	estats->no_buff_discard_hi = 0;
 	estats->no_buff_discard_lo = 0;
 
-	for_each_rx_queue(bp, i) {
+	for_each_queue(bp, i) {
 		struct bnx2x_fastpath *fp = &bp->fp[i];
 		int cl_id = fp->cl_id;
 		struct tstorm_per_client_stats *tclient =
@@ -4244,7 +4213,7 @@ static void bnx2x_net_stats_update(struct bnx2x
*bp)
 	nstats->tx_bytes = bnx2x_hilo(&estats->total_bytes_transmitted_hi);
 
 	nstats->rx_dropped = estats->mac_discard;
-	for_each_rx_queue(bp, i)
+	for_each_queue(bp, i)
 		nstats->rx_dropped +=
 			le32_to_cpu(bp->fp[i].old_tclient.checksum_discard);
 
@@ -4298,7 +4267,7 @@ static void bnx2x_drv_stats_update(struct bnx2x
*bp)
 	estats->rx_err_discard_pkt = 0;
 	estats->rx_skb_alloc_failed = 0;
 	estats->hw_csum_err = 0;
-	for_each_rx_queue(bp, i) {
+	for_each_queue(bp, i) {
 		struct bnx2x_eth_q_stats *qstats = &bp->fp[i].eth_q_stats;
 
 		estats->driver_xoff += qstats->driver_xoff;
@@ -4329,7 +4298,7 @@ static void bnx2x_stats_update(struct bnx2x *bp)
 
 	if (bp->msglevel & NETIF_MSG_TIMER) {
 		struct bnx2x_fastpath *fp0_rx = bp->fp;
-		struct bnx2x_fastpath *fp0_tx = &(bp->fp[bp->num_rx_queues]);
+		struct bnx2x_fastpath *fp0_tx = bp->fp;
 		struct tstorm_per_client_stats *old_tclient =
 							&bp->fp->old_tclient;
 		struct bnx2x_eth_q_stats *qstats = &bp->fp->eth_q_stats;
@@ -4681,7 +4650,7 @@ static void bnx2x_timer(unsigned long data)
 		struct bnx2x_fastpath *fp = &bp->fp[0];
 		int rc;
 
-		bnx2x_tx_int(fp);
+		bnx2x_tx_int(fp, 1000);
 		rc = bnx2x_rx_int(fp, 1000);
 	}
 
@@ -4984,7 +4953,7 @@ static void bnx2x_init_rx_rings(struct bnx2x *bp)
 
 	if (bp->flags & TPA_ENABLE_FLAG) {
 
-		for_each_rx_queue(bp, j) {
+		for_each_queue(bp, j) {
 			struct bnx2x_fastpath *fp = &bp->fp[j];
 
 			for (i = 0; i < max_agg_queues; i++) {
@@ -5007,16 +4976,13 @@ static void bnx2x_init_rx_rings(struct bnx2x
*bp)
 		}
 	}
 
-	for_each_rx_queue(bp, j) {
+	for_each_queue(bp, j) {
 		struct bnx2x_fastpath *fp = &bp->fp[j];
 
 		fp->rx_bd_cons = 0;
 		fp->rx_cons_sb = BNX2X_RX_SB_INDEX;
 		fp->rx_bd_cons_sb = BNX2X_RX_SB_BD_INDEX;
 
-		/* Mark queue as Rx */
-		fp->is_rx_queue = 1;
-
 		/* "next page" elements initialization */
 		/* SGE ring */
 		for (i = 1; i <= NUM_RX_SGE_PAGES; i++) {
@@ -5122,7 +5088,7 @@ static void bnx2x_init_tx_ring(struct bnx2x *bp)
 {
 	int i, j;
 
-	for_each_tx_queue(bp, j) {
+	for_each_queue(bp, j) {
 		struct bnx2x_fastpath *fp = &bp->fp[j];
 
 		for (i = 1; i <= NUM_TX_RINGS; i++) {
@@ -5148,10 +5114,6 @@ static void bnx2x_init_tx_ring(struct bnx2x *bp)
 		fp->tx_cons_sb = BNX2X_TX_SB_INDEX;
 		fp->tx_pkt = 0;
 	}
-
-	/* clean tx statistics */
-	for_each_rx_queue(bp, i)
-		bnx2x_fp(bp, i, tx_pkt) = 0;
 }
 
 static void bnx2x_init_sp_ring(struct bnx2x *bp)
@@ -5180,7 +5142,8 @@ static void bnx2x_init_context(struct bnx2x *bp)
 {
 	int i;
 
-	for_each_rx_queue(bp, i) {
+	/* Rx */
+	for_each_queue(bp, i) {
 		struct eth_context *context = bnx2x_sp(bp, context[i].eth);
 		struct bnx2x_fastpath *fp = &bp->fp[i];
 		u8 cl_id = fp->cl_id;
@@ -5232,10 +5195,11 @@ static void bnx2x_init_context(struct bnx2x *bp)
 					       ETH_CONNECTION_TYPE);
 	}
 
-	for_each_tx_queue(bp, i) {
+	/* Tx */
+	for_each_queue(bp, i) {
 		struct bnx2x_fastpath *fp = &bp->fp[i];
 		struct eth_context *context =
-			bnx2x_sp(bp, context[i - bp->num_rx_queues].eth);
+			bnx2x_sp(bp, context[i].eth);
 
 		context->cstorm_st_context.sb_index_number =
 						C_SB_ETH_TX_CQ_INDEX;
@@ -5263,7 +5227,7 @@ static void bnx2x_init_ind_table(struct bnx2x *bp)
 	for (i = 0; i < TSTORM_INDIRECTION_TABLE_SIZE; i++)
 		REG_WR8(bp, BAR_TSTRORM_INTMEM +
 			TSTORM_INDIRECTION_TABLE_OFFSET(func) + i,
-			bp->fp->cl_id + (i % bp->num_rx_queues));
+			bp->fp->cl_id + (i % bp->num_queues));
 }
 
 static void bnx2x_set_client_config(struct bnx2x *bp)
@@ -5507,7 +5471,7 @@ static void bnx2x_init_internal_func(struct bnx2x
*bp)
 		min((u32)(min((u32)8, (u32)MAX_SKB_FRAGS) *
 			  SGE_PAGE_SIZE * PAGES_PER_SGE),
 		    (u32)0xffff);
-	for_each_rx_queue(bp, i) {
+	for_each_queue(bp, i) {
 		struct bnx2x_fastpath *fp = &bp->fp[i];
 
 		REG_WR(bp, BAR_USTRORM_INTMEM +
@@ -5542,7 +5506,7 @@ static void bnx2x_init_internal_func(struct bnx2x
*bp)
 		rx_pause.cqe_thr_high = 350;
 		rx_pause.sge_thr_high = 0;
 
-		for_each_rx_queue(bp, i) {
+		for_each_queue(bp, i) {
 			struct bnx2x_fastpath *fp = &bp->fp[i];
 
 			if (!fp->disable_tpa) {
@@ -5637,9 +5601,6 @@ static void bnx2x_nic_init(struct bnx2x *bp, u32
load_code)
 #else
 		fp->sb_id = fp->cl_id;
 #endif
-		/* Suitable Rx and Tx SBs are served by the same client */
-		if (i >= bp->num_rx_queues)
-			fp->cl_id -= bp->num_rx_queues;
 		DP(NETIF_MSG_IFUP,
 		   "queue[%d]:  bnx2x_init_sb(%p,%p)  cl_id %d  sb %d\n",
 		   i, bp, fp->status_blk, fp->cl_id, fp->sb_id);
@@ -6749,7 +6710,7 @@ static void bnx2x_free_mem(struct bnx2x *bp)
 			       sizeof(struct host_status_block));
 	}
 	/* Rx */
-	for_each_rx_queue(bp, i) {
+	for_each_queue(bp, i) {
 
 		/* fastpath rx rings: rx_buf rx_desc rx_comp */
 		BNX2X_FREE(bnx2x_fp(bp, i, rx_buf_ring));
@@ -6769,7 +6730,7 @@ static void bnx2x_free_mem(struct bnx2x *bp)
 			       BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
 	}
 	/* Tx */
-	for_each_tx_queue(bp, i) {
+	for_each_queue(bp, i) {
 
 		/* fastpath tx rings: tx_buf tx_desc */
 		BNX2X_FREE(bnx2x_fp(bp, i, tx_buf_ring));
@@ -6831,7 +6792,7 @@ static int bnx2x_alloc_mem(struct bnx2x *bp)
 				sizeof(struct host_status_block));
 	}
 	/* Rx */
-	for_each_rx_queue(bp, i) {
+	for_each_queue(bp, i) {
 
 		/* fastpath rx rings: rx_buf rx_desc rx_comp */
 		BNX2X_ALLOC(bnx2x_fp(bp, i, rx_buf_ring),
@@ -6853,7 +6814,7 @@ static int bnx2x_alloc_mem(struct bnx2x *bp)
 				BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
 	}
 	/* Tx */
-	for_each_tx_queue(bp, i) {
+	for_each_queue(bp, i) {
 
 		/* fastpath tx rings: tx_buf tx_desc */
 		BNX2X_ALLOC(bnx2x_fp(bp, i, tx_buf_ring),
@@ -6909,7 +6870,7 @@ static void bnx2x_free_tx_skbs(struct bnx2x *bp)
 {
 	int i;
 
-	for_each_tx_queue(bp, i) {
+	for_each_queue(bp, i) {
 		struct bnx2x_fastpath *fp = &bp->fp[i];
 
 		u16 bd_cons = fp->tx_bd_cons;
@@ -6927,7 +6888,7 @@ static void bnx2x_free_rx_skbs(struct bnx2x *bp)
 {
 	int i, j;
 
-	for_each_rx_queue(bp, j) {
+	for_each_queue(bp, j) {
 		struct bnx2x_fastpath *fp = &bp->fp[j];
 
 		for (i = 0; i < NUM_RX_BD; i++) {
@@ -7042,12 +7003,8 @@ static int bnx2x_req_msix_irqs(struct bnx2x *bp)
 #endif
 	for_each_queue(bp, i) {
 		struct bnx2x_fastpath *fp = &bp->fp[i];
-
-		if (i < bp->num_rx_queues)
-			sprintf(fp->name, "%s-rx-%d", bp->dev->name, i);
-		else
-			sprintf(fp->name, "%s-tx-%d",
-				bp->dev->name, i - bp->num_rx_queues);
+		snprintf(fp->name, sizeof(fp->name), "%s-fp-%d",
+			 bp->dev->name, i);
 
 		rc = request_irq(bp->msix_table[i + offset].vector,
 				 bnx2x_msix_fp_int, 0, fp->name, fp);
@@ -7106,7 +7063,7 @@ static void bnx2x_napi_enable(struct bnx2x *bp)
 {
 	int i;
 
-	for_each_rx_queue(bp, i)
+	for_each_queue(bp, i)
 		napi_enable(&bnx2x_fp(bp, i, napi));
 }
 
@@ -7114,7 +7071,7 @@ static void bnx2x_napi_disable(struct bnx2x *bp)
 {
 	int i;
 
-	for_each_rx_queue(bp, i)
+	for_each_queue(bp, i)
 		napi_disable(&bnx2x_fp(bp, i, napi));
 }
 
@@ -7410,88 +7367,60 @@ static int bnx2x_setup_multi(struct bnx2x *bp,
int index)
 
 static int bnx2x_poll(struct napi_struct *napi, int budget);
 
-static void bnx2x_set_int_mode_msix(struct bnx2x *bp, int
*num_rx_queues_out,
-				    int *num_tx_queues_out)
+static void bnx2x_set_num_queues_msix(struct bnx2x *bp)
 {
-	int _num_rx_queues = 0, _num_tx_queues = 0;
 
 	switch (bp->multi_mode) {
 	case ETH_RSS_MODE_DISABLED:
-		_num_rx_queues = 1;
-		_num_tx_queues = 1;
+		bp->num_queues = 1;
 		break;
 
 	case ETH_RSS_MODE_REGULAR:
-		if (num_rx_queues)
-			_num_rx_queues = min_t(u32, num_rx_queues,
-					       BNX2X_MAX_QUEUES(bp));
-		else
-			_num_rx_queues = min_t(u32, num_online_cpus(),
-					       BNX2X_MAX_QUEUES(bp));
-
-		if (num_tx_queues)
-			_num_tx_queues = min_t(u32, num_tx_queues,
-					       BNX2X_MAX_QUEUES(bp));
+		if (num_queues)
+			bp->num_queues = min_t(u32, num_queues,
+						  BNX2X_MAX_QUEUES(bp));
 		else
-			_num_tx_queues = min_t(u32, num_online_cpus(),
-					       BNX2X_MAX_QUEUES(bp));
-
-		/* There must be not more Tx queues than Rx queues */
-		if (_num_tx_queues > _num_rx_queues) {
-			BNX2X_ERR("number of tx queues (%d) > "
-				  "number of rx queues (%d)"
-				  "  defaulting to %d\n",
-				  _num_tx_queues, _num_rx_queues,
-				  _num_rx_queues);
-			_num_tx_queues = _num_rx_queues;
-		}
+			bp->num_queues = min_t(u32, num_online_cpus(),
+						  BNX2X_MAX_QUEUES(bp));
 		break;
 
 
 	default:
-		_num_rx_queues = 1;
-		_num_tx_queues = 1;
+		bp->num_queues = 1;
 		break;
 	}
-
-	*num_rx_queues_out = _num_rx_queues;
-	*num_tx_queues_out = _num_tx_queues;
 }
 
-static int bnx2x_set_int_mode(struct bnx2x *bp)
+static int bnx2x_set_num_queues(struct bnx2x *bp)
 {
 	int rc = 0;
 
 	switch (int_mode) {
 	case INT_MODE_INTx:
 	case INT_MODE_MSI:
-		bp->num_rx_queues = 1;
-		bp->num_tx_queues = 1;
+		bp->num_queues = 1;
 		DP(NETIF_MSG_IFUP, "set number of queues to 1\n");
 		break;
 
 	case INT_MODE_MSIX:
 	default:
-		/* Set interrupt mode according to bp->multi_mode value */
-		bnx2x_set_int_mode_msix(bp, &bp->num_rx_queues,
-					&bp->num_tx_queues);
+		/* Set number of queues according to bp->multi_mode value */
+		bnx2x_set_num_queues_msix(bp);
 
-		DP(NETIF_MSG_IFUP, "set number of queues to: rx %d tx %d\n",
-		   bp->num_rx_queues, bp->num_tx_queues);
+		DP(NETIF_MSG_IFUP, "set number of queues to %d\n",
+		   bp->num_queues);
 
 		/* if we can't use MSI-X we only need one fp,
 		 * so try to enable MSI-X with the requested number of fp's
 		 * and fallback to MSI or legacy INTx with one fp
 		 */
 		rc = bnx2x_enable_msix(bp);
-		if (rc) {
+		if (rc)
 			/* failed to enable MSI-X */
-			bp->num_rx_queues = 1;
-			bp->num_tx_queues = 1;
-		}
+			bp->num_queues = 1;
 		break;
 	}
-	bp->dev->real_num_tx_queues = bp->num_tx_queues;
+	bp->dev->real_num_tx_queues = bp->num_queues;
 	return rc;
 }
 
@@ -7513,16 +7442,16 @@ static int bnx2x_nic_load(struct bnx2x *bp, int
load_mode)
 
 	bp->state = BNX2X_STATE_OPENING_WAIT4_LOAD;
 
-	rc = bnx2x_set_int_mode(bp);
+	rc = bnx2x_set_num_queues(bp);
 
 	if (bnx2x_alloc_mem(bp))
 		return -ENOMEM;
 
-	for_each_rx_queue(bp, i)
+	for_each_queue(bp, i)
 		bnx2x_fp(bp, i, disable_tpa) =
 					((bp->flags & TPA_ENABLE_FLAG) == 0);
 
-	for_each_rx_queue(bp, i)
+	for_each_queue(bp, i)
 		netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi),
 			       bnx2x_poll, 128);
 
@@ -7536,7 +7465,7 @@ static int bnx2x_nic_load(struct bnx2x *bp, int
load_mode)
 		}
 	} else {
 		/* Fall to INTx if failed to enable MSI-X due to lack of
-		   memory (in bnx2x_set_int_mode()) */
+		   memory (in bnx2x_set_num_queues()) */
 		if ((rc != -ENOMEM) && (int_mode != INT_MODE_INTx))
 			bnx2x_enable_msi(bp);
 		bnx2x_ack_int(bp);
@@ -7730,14 +7659,14 @@ load_error3:
 	bp->port.pmf = 0;
 	/* Free SKBs, SGEs, TPA pool and driver internals */
 	bnx2x_free_skbs(bp);
-	for_each_rx_queue(bp, i)
+	for_each_queue(bp, i)
 		bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
 load_error2:
 	/* Release IRQs */
 	bnx2x_free_irq(bp);
 load_error1:
 	bnx2x_napi_disable(bp);
-	for_each_rx_queue(bp, i)
+	for_each_queue(bp, i)
 		netif_napi_del(&bnx2x_fp(bp, i, napi));
 	bnx2x_free_mem(bp);
 
@@ -7928,13 +7857,13 @@ static int bnx2x_nic_unload(struct bnx2x *bp,
int unload_mode)
 	bnx2x_free_irq(bp);
 
 	/* Wait until tx fastpath tasks complete */
-	for_each_tx_queue(bp, i) {
+	for_each_queue(bp, i) {
 		struct bnx2x_fastpath *fp = &bp->fp[i];
 
 		cnt = 1000;
 		while (bnx2x_has_tx_work_unload(fp)) {
 
-			bnx2x_tx_int(fp);
+			bnx2x_tx_int(fp, 1000);
 			if (!cnt) {
 				BNX2X_ERR("timeout waiting for queue[%d]\n",
 					  i);
@@ -8071,9 +8000,9 @@ unload_error:
 
 	/* Free SKBs, SGEs, TPA pool and driver internals */
 	bnx2x_free_skbs(bp);
-	for_each_rx_queue(bp, i)
+	for_each_queue(bp, i)
 		bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
-	for_each_rx_queue(bp, i)
+	for_each_queue(bp, i)
 		netif_napi_del(&bnx2x_fp(bp, i, napi));
 	bnx2x_free_mem(bp);
 
@@ -10269,7 +10198,7 @@ static int bnx2x_run_loopback(struct bnx2x *bp,
int loopback_mode, u8 link_up)
 	struct sk_buff *skb;
 	unsigned char *packet;
 	struct bnx2x_fastpath *fp_rx = &bp->fp[0];
-	struct bnx2x_fastpath *fp_tx = &bp->fp[bp->num_rx_queues];
+	struct bnx2x_fastpath *fp_tx = &bp->fp[0];
 	u16 tx_start_idx, tx_idx;
 	u16 rx_start_idx, rx_idx;
 	u16 pkt_prod, bd_prod;
@@ -10346,7 +10275,7 @@ static int bnx2x_run_loopback(struct bnx2x *bp,
int loopback_mode, u8 link_up)
 
 	fp_tx->tx_db.data.prod += 2;
 	barrier();
-	DOORBELL(bp, fp_tx->index - bp->num_rx_queues, fp_tx->tx_db.raw);
+	DOORBELL(bp, fp_tx->index, fp_tx->tx_db.raw);
 
 	mmiowb();
 
@@ -10725,7 +10654,7 @@ static int bnx2x_get_sset_count(struct
net_device *dev, int stringset)
 	switch(stringset) {
 	case ETH_SS_STATS:
 		if (is_multi(bp)) {
-			num_stats = BNX2X_NUM_Q_STATS * bp->num_rx_queues;
+			num_stats = BNX2X_NUM_Q_STATS * bp->num_queues;
 			if (!IS_E1HMF_MODE_STAT(bp))
 				num_stats += BNX2X_NUM_STATS;
 		} else {
@@ -10756,7 +10685,7 @@ static void bnx2x_get_strings(struct net_device
*dev, u32 stringset, u8 *buf)
 	case ETH_SS_STATS:
 		if (is_multi(bp)) {
 			k = 0;
-			for_each_rx_queue(bp, i) {
+			for_each_queue(bp, i) {
 				for (j = 0; j < BNX2X_NUM_Q_STATS; j++)
 					sprintf(buf + (k + j)*ETH_GSTRING_LEN,
 						bnx2x_q_stats_arr[j].string, i);
@@ -10793,7 +10722,7 @@ static void bnx2x_get_ethtool_stats(struct
net_device *dev,
 
 	if (is_multi(bp)) {
 		k = 0;
-		for_each_rx_queue(bp, i) {
+		for_each_queue(bp, i) {
 			hw_stats = (u32 *)&bp->fp[i].eth_q_stats;
 			for (j = 0; j < BNX2X_NUM_Q_STATS; j++) {
 				if (bnx2x_q_stats_arr[j].size == 0) {
@@ -10989,6 +10918,7 @@ static inline int bnx2x_has_rx_work(struct
bnx2x_fastpath *fp)
 
 static int bnx2x_poll(struct napi_struct *napi, int budget)
 {
+	int more_tx = 0;
 	struct bnx2x_fastpath *fp = container_of(napi, struct bnx2x_fastpath,
 						 napi);
 	struct bnx2x *bp = fp->bp;
@@ -11002,38 +10932,51 @@ static int bnx2x_poll(struct napi_struct
*napi, int budget)
 	prefetch(fp->rx_buf_ring[RX_BD(fp->rx_bd_cons)].skb);
 	prefetch((char *)(fp->rx_buf_ring[RX_BD(fp->rx_bd_cons)].skb) + 256);
 
-	bnx2x_update_fpsb_idx(fp);
+	if (bnx2x_has_tx_work(fp))
+		more_tx = bnx2x_tx_int(fp, budget);
 
-	if (bnx2x_has_rx_work(fp)) {
+	if (bnx2x_has_rx_work(fp))
 		work_done = bnx2x_rx_int(fp, budget);
 
-		/* must not complete if we consumed full budget */
-		if (work_done >= budget)
-			goto poll_again;
-	}
+	/* If there is more Tx work - push this NAPI to the
+	   end of the list. Give a runtime to other local NAPIs. */
+	if (more_tx)
+		work_done = budget;
 
-	/* bnx2x_has_rx_work() reads the status block, thus we need to
-	 * ensure that status block indices have been actually read
-	 * (bnx2x_update_fpsb_idx) prior to this check (bnx2x_has_rx_work)
-	 * so that we won't write the "newer" value of the status block to IGU
-	 * (if there was a DMA right after bnx2x_has_rx_work and
-	 * if there is no rmb, the memory reading (bnx2x_update_fpsb_idx)
-	 * may be postponed to right before bnx2x_ack_sb). In this case
-	 * there will never be another interrupt until there is another update
-	 * of the status block, while there is still unhandled work.
-	 */
-	rmb();
+	/* must not complete if we consumed full budget */
+	if (work_done >= budget)
+		goto poll_again;
 
-	if (!bnx2x_has_rx_work(fp)) {
+	/* Fall out from the NAPI loop if needed */
+	if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
 #ifdef BNX2X_STOP_ON_ERROR
 poll_panic:
 #endif
-		napi_complete(napi);
+		bnx2x_update_fpsb_idx(fp);
+		/*
+		 * bnx2x_has_rx_work() reads the status block, thus we need
+		 * to ensure that status block indices have been actually read
+		 * (bnx2x_update_fpsb_idx) prior to this check
+		 * (bnx2x_has_rx_work) so that we won't write the "newer"
+		 * value of the status block to IGU (if there was a DMA right
+		 * after bnx2x_has_rx_work and if there is no rmb, the memory
+		 * reading (bnx2x_update_fpsb_idx) may be postponed to right
+		 * before bnx2x_ack_sb). In this case there will never be
+		 * another interrupt until there is another update of the
+		 * status block, while there is still unhandled work.
+		 */
+		rmb();
 
-		bnx2x_ack_sb(bp, fp->sb_id, USTORM_ID,
-			     le16_to_cpu(fp->fp_u_idx), IGU_INT_NOP, 1);
-		bnx2x_ack_sb(bp, fp->sb_id, CSTORM_ID,
-			     le16_to_cpu(fp->fp_c_idx), IGU_INT_ENABLE, 1);
+		if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
+			napi_complete(napi);
+			/* Re-enable interrupts */
+			bnx2x_ack_sb(bp, fp->sb_id, CSTORM_ID,
+				     le16_to_cpu(fp->fp_c_idx),
+				     IGU_INT_NOP, 1);
+			bnx2x_ack_sb(bp, fp->sb_id, USTORM_ID,
+				     le16_to_cpu(fp->fp_u_idx),
+				     IGU_INT_ENABLE, 1);
+		}
 	}
 
 poll_again:
@@ -11221,7 +11164,7 @@ exit_lbl:
 static netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct
net_device *dev)
 {
 	struct bnx2x *bp = netdev_priv(dev);
-	struct bnx2x_fastpath *fp, *fp_stat;
+	struct bnx2x_fastpath *fp;
 	struct netdev_queue *txq;
 	struct sw_tx_bd *tx_buf;
 	struct eth_tx_start_bd *tx_start_bd;
@@ -11243,11 +11186,10 @@ static netdev_tx_t bnx2x_start_xmit(struct
sk_buff *skb, struct net_device *dev)
 	fp_index = skb_get_queue_mapping(skb);
 	txq = netdev_get_tx_queue(dev, fp_index);
 
-	fp = &bp->fp[fp_index + bp->num_rx_queues];
-	fp_stat = &bp->fp[fp_index];
+	fp = &bp->fp[fp_index];
 
 	if (unlikely(bnx2x_tx_avail(fp) < (skb_shinfo(skb)->nr_frags + 3))) {
-		fp_stat->eth_q_stats.driver_xoff++;
+		fp->eth_q_stats.driver_xoff++;
 		netif_tx_stop_queue(txq);
 		BNX2X_ERR("BUG! Tx ring full when queue awake!\n");
 		return NETDEV_TX_BUSY;
@@ -11473,7 +11415,7 @@ static netdev_tx_t bnx2x_start_xmit(struct
sk_buff *skb, struct net_device *dev)
 
 	fp->tx_db.data.prod += nbd;
 	barrier();
-	DOORBELL(bp, fp->index - bp->num_rx_queues, fp->tx_db.raw);
+	DOORBELL(bp, fp->index, fp->tx_db.raw);
 
 	mmiowb();
 
@@ -11484,11 +11426,11 @@ static netdev_tx_t bnx2x_start_xmit(struct
sk_buff *skb, struct net_device *dev)
 		/* We want bnx2x_tx_int to "see" the updated tx_bd_prod
 		   if we put Tx into XOFF state. */
 		smp_mb();
-		fp_stat->eth_q_stats.driver_xoff++;
+		fp->eth_q_stats.driver_xoff++;
 		if (bnx2x_tx_avail(fp) >= MAX_SKB_FRAGS + 3)
 			netif_tx_wake_queue(txq);
 	}
-	fp_stat->tx_pkt++;
+	fp->tx_pkt++;
 
 	return NETDEV_TX_OK;
 }
@@ -12376,9 +12318,9 @@ static int bnx2x_eeh_nic_unload(struct bnx2x
*bp)
 
 	/* Free SKBs, SGEs, TPA pool and driver internals */
 	bnx2x_free_skbs(bp);
-	for_each_rx_queue(bp, i)
+	for_each_queue(bp, i)
 		bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
-	for_each_rx_queue(bp, i)
+	for_each_queue(bp, i)
 		netif_napi_del(&bnx2x_fp(bp, i, napi));
 	bnx2x_free_mem(bp);
 
-- 
1.6.3.3




--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists