lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Date:	Tue, 17 Jun 2008 11:33:32 +0300
From:	"Eilon Greenstein" <eilong@...adcom.com>
To:	netdev <netdev@...r.kernel.org>
cc:	"Michael Chan" <mchan@...adcom.com>,
	"Vladislav Zolotarov" <vladz@...adcom.com>
Subject: [PATCH net-next 08/12]bnx2x: Add TPA, Broadcoms HW LRO

The TPA stands for Transparent Packet Aggregation. When enabled, the FW
aggregate in-order TCP packets according to the 4-tuple match and sends
1 big packet to the driver. This packet is stored on an SGL in which
each SGE is 1 page. The FW also implements a timeout algorithm and it
honors all TCP flag, including the push flag as a trigger to halt
aggregation.

Signed-off-by: Vladislav Zolotarov <vladz@...adcom.com>
Signed-off-by: Eliezer Tamir <eliezert@...adcom.com>
Signed-off-by: Eilong Greenstein <eilong@...adcom.com>
---
 drivers/net/bnx2x.h      |  275 +++++++++++++++------
 drivers/net/bnx2x_main.c |  619 ++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 795 insertions(+), 99 deletions(-)

diff --git a/drivers/net/bnx2x.h b/drivers/net/bnx2x.h
index 56331be..4e3e18d 100755
--- a/drivers/net/bnx2x.h
+++ b/drivers/net/bnx2x.h
@@ -125,8 +125,8 @@
 #define is_multi(bp)		(bp->num_queues > 1)
 
 
+/* fast path */
 
-#define bnx2x_sp_check(bp, var) ((bp->slowpath) ? (&bp->slowpath->var) : NULL)
 struct sw_rx_bd {
 	struct sk_buff	*skb;
 	DECLARE_PCI_UNMAP_ADDR(mapping)
@@ -137,6 +137,49 @@ struct sw_tx_bd {
 	u16		first_bd;
 };
 
+struct sw_rx_page {
+	struct page	*page;
+	DECLARE_PCI_UNMAP_ADDR(mapping)
+};
+
+
+/* MC hsi */
+#define BCM_PAGE_BITS			12
+#define BCM_PAGE_SIZE			(1 << BCM_PAGE_BITS)
+#define PAGES_PER_SGE_SHIFT		0
+#define PAGES_PER_SGE			(1 << PAGES_PER_SGE_SHIFT)
+
+/* SGE ring related macros */
+#define NUM_RX_SGE_PAGES		2
+#define RX_SGE_CNT		(BCM_PAGE_SIZE / sizeof(struct eth_rx_sge))
+#define MAX_RX_SGE_CNT			(RX_SGE_CNT - 2)
+/* RX_SGE_CNT is promissed to be a power of 2 */
+#define RX_SGE_MASK			(RX_SGE_CNT - 1)
+#define NUM_RX_SGE			(RX_SGE_CNT * NUM_RX_SGE_PAGES)
+#define MAX_RX_SGE			(NUM_RX_SGE - 1)
+#define NEXT_SGE_IDX(x)		((((x) & RX_SGE_MASK) == \
+				  (MAX_RX_SGE_CNT - 1)) ? (x) + 3 : (x) + 1)
+#define RX_SGE(x)			((x) & MAX_RX_SGE)
+
+/* SGE producer mask related macros */
+/* Number of bits in one sge_mask array element */
+#define RX_SGE_MASK_ELEM_SZ		64
+#define RX_SGE_MASK_ELEM_SHIFT		6
+#define RX_SGE_MASK_ELEM_MASK		((u64)RX_SGE_MASK_ELEM_SZ - 1)
+
+/* Creates a bitmask of all ones in less significant bits.
+   idx - index of the most significant bit in the created mask */
+#define RX_SGE_ONES_MASK(idx) \
+		(((u64)0x1 << (((idx) & RX_SGE_MASK_ELEM_MASK) + 1)) - 1)
+#define RX_SGE_MASK_ELEM_ONE_MASK	((u64)(~0))
+
+/* Number of u64 elements in SGE mask array */
+#define RX_SGE_MASK_LEN			((NUM_RX_SGE_PAGES * RX_SGE_CNT) / \
+					 RX_SGE_MASK_ELEM_SZ)
+#define RX_SGE_MASK_LEN_MASK		(RX_SGE_MASK_LEN - 1)
+#define NEXT_SGE_MASK_ELEM(el)		(((el) + 1) & RX_SGE_MASK_LEN_MASK)
+
+
 struct bnx2x_fastpath {
 
 	struct napi_struct	napi;
@@ -152,7 +195,8 @@ struct bnx2x_fastpath {
 	struct eth_tx_bd	*tx_desc_ring;
 	dma_addr_t		tx_desc_mapping;
 
-	struct sw_rx_bd 	*rx_buf_ring;
+	struct sw_rx_bd		*rx_buf_ring;	/* BDs mappings ring */
+	struct sw_rx_page	*rx_page_ring;	/* SGE pages mappings ring */
 
 	struct eth_rx_bd	*rx_desc_ring;
 	dma_addr_t		rx_desc_mapping;
@@ -160,6 +204,12 @@ struct bnx2x_fastpath {
 	union eth_rx_cqe	*rx_comp_ring;
 	dma_addr_t		rx_comp_mapping;
 
+	/* SGE ring */
+	struct eth_rx_sge	*rx_sge_ring;
+	dma_addr_t		rx_sge_mapping;
+
+	u64			sge_mask[RX_SGE_MASK_LEN];
+
 	int			state;
 #define BNX2X_FP_STATE_CLOSED		0
 #define BNX2X_FP_STATE_IRQ		0x80000
@@ -190,27 +240,152 @@ struct bnx2x_fastpath {
 	u16			rx_bd_cons;
 	u16			rx_comp_prod;
 	u16			rx_comp_cons;
+	u16			rx_sge_prod;
+	/* The last maximal completed SGE */
+	u16			last_max_sge;
 	u16			*rx_cons_sb;
+	u16			*rx_bd_cons_sb;
 
 	unsigned long		tx_pkt,
 				rx_pkt,
-				rx_calls;
-
-	struct bnx2x    	*bp; /* parent */
+				rx_calls,
+				rx_alloc_failed;
+	/* TPA related */
+	struct sw_rx_bd		tpa_pool[ETH_MAX_AGGREGATION_QUEUES_E1H];
+	u8			tpa_state[ETH_MAX_AGGREGATION_QUEUES_E1H];
+#define BNX2X_TPA_START			1
+#define BNX2X_TPA_STOP			2
+	u8			disable_tpa;
+
+	struct bnx2x		*bp; /* parent */
+#ifdef BNX2X_STOP_ON_ERROR
+	u64			tpa_queue_used;
+#endif
 };
 
-#define bnx2x_fp(bp, nr, var)   	(bp->fp[nr].var)
+#define bnx2x_fp(bp, nr, var)		(bp->fp[nr].var)
+
+
+/* MC hsi */
+#define MAX_FETCH_BD			13	/* HW max BDs per packet */
+#define RX_COPY_THRESH			92
+
+#define NUM_TX_RINGS			16
+#define TX_DESC_CNT		(BCM_PAGE_SIZE / sizeof(struct eth_tx_bd))
+#define MAX_TX_DESC_CNT			(TX_DESC_CNT - 1)
+#define NUM_TX_BD			(TX_DESC_CNT * NUM_TX_RINGS)
+#define MAX_TX_BD			(NUM_TX_BD - 1)
+#define MAX_TX_AVAIL			(MAX_TX_DESC_CNT * NUM_TX_RINGS - 2)
+#define NEXT_TX_IDX(x)		((((x) & MAX_TX_DESC_CNT) == \
+				  (MAX_TX_DESC_CNT - 1)) ? (x) + 2 : (x) + 1)
+#define TX_BD(x)			((x) & MAX_TX_BD)
+#define TX_BD_POFF(x)			((x) & MAX_TX_DESC_CNT)
+
+/* The RX BD ring is special, each bd is 8 bytes but the last one is 16 */
+#define NUM_RX_RINGS			8
+#define RX_DESC_CNT		(BCM_PAGE_SIZE / sizeof(struct eth_rx_bd))
+#define MAX_RX_DESC_CNT			(RX_DESC_CNT - 2)
+#define RX_DESC_MASK			(RX_DESC_CNT - 1)
+#define NUM_RX_BD			(RX_DESC_CNT * NUM_RX_RINGS)
+#define MAX_RX_BD			(NUM_RX_BD - 1)
+#define MAX_RX_AVAIL			(MAX_RX_DESC_CNT * NUM_RX_RINGS - 2)
+#define NEXT_RX_IDX(x)		((((x) & RX_DESC_MASK) == \
+				  (MAX_RX_DESC_CNT - 1)) ? (x) + 3 : (x) + 1)
+#define RX_BD(x)			((x) & MAX_RX_BD)
+
+/* As long as CQE is 4 times bigger than BD entry we have to allocate
+   4 times more pages for CQ ring in order to keep it balanced with
+   BD ring */
+#define NUM_RCQ_RINGS			(NUM_RX_RINGS * 4)
+#define RCQ_DESC_CNT		(BCM_PAGE_SIZE / sizeof(union eth_rx_cqe))
+#define MAX_RCQ_DESC_CNT		(RCQ_DESC_CNT - 1)
+#define NUM_RCQ_BD			(RCQ_DESC_CNT * NUM_RCQ_RINGS)
+#define MAX_RCQ_BD			(NUM_RCQ_BD - 1)
+#define MAX_RCQ_AVAIL			(MAX_RCQ_DESC_CNT * NUM_RCQ_RINGS - 2)
+#define NEXT_RCQ_IDX(x)		((((x) & MAX_RCQ_DESC_CNT) == \
+				  (MAX_RCQ_DESC_CNT - 1)) ? (x) + 2 : (x) + 1)
+#define RCQ_BD(x)			((x) & MAX_RCQ_BD)
+
+
 /* This is needed for determening of last_max */
 #define SUB_S16(a, b)			(s16)((s16)(a) - (s16)(b))
 
+#define __SGE_MASK_SET_BIT(el, bit) \
+	do { \
+		el = ((el) | ((u64)0x1 << (bit))); \
+	} while (0)
+
+#define __SGE_MASK_CLEAR_BIT(el, bit) \
+	do { \
+		el = ((el) & (~((u64)0x1 << (bit)))); \
+	} while (0)
+
+#define SGE_MASK_SET_BIT(fp, idx) \
+	__SGE_MASK_SET_BIT(fp->sge_mask[(idx) >> RX_SGE_MASK_ELEM_SHIFT], \
+			   ((idx) & RX_SGE_MASK_ELEM_MASK))
+
+#define SGE_MASK_CLEAR_BIT(fp, idx) \
+	__SGE_MASK_CLEAR_BIT(fp->sge_mask[(idx) >> RX_SGE_MASK_ELEM_SHIFT], \
+			     ((idx) & RX_SGE_MASK_ELEM_MASK))
+
+
+/* used on a CID received from the HW */
+#define SW_CID(x)			(le32_to_cpu(x) & \
+					 (COMMON_RAMROD_ETH_RX_CQE_CID >> 7))
+#define CQE_CMD(x)			(le32_to_cpu(x) >> \
+					COMMON_RAMROD_ETH_RX_CQE_CMD_ID_SHIFT)
+
 #define BD_UNMAP_ADDR(bd)		HILO_U64(le32_to_cpu((bd)->addr_hi), \
 						 le32_to_cpu((bd)->addr_lo))
 #define BD_UNMAP_LEN(bd)		(le16_to_cpu((bd)->nbytes))
 
+
+#define DPM_TRIGER_TYPE			0x40
+#define DOORBELL(bp, cid, val) \
+	do { \
+		writel((u32)val, (bp)->doorbells + (BCM_PAGE_SIZE * cid) + \
+		       DPM_TRIGER_TYPE); \
+	} while (0)
+
+
+/* TX CSUM helpers */
+#define SKB_CS_OFF(skb)		(offsetof(struct tcphdr, check) - \
+				 skb->csum_offset)
+#define SKB_CS(skb)		(*(u16 *)(skb_transport_header(skb) + \
+					  skb->csum_offset))
+
+#define pbd_tcp_flags(skb)	(ntohl(tcp_flag_word(tcp_hdr(skb)))>>16 & 0xff)
+
+#define XMIT_PLAIN			0
+#define XMIT_CSUM_V4			0x1
+#define XMIT_CSUM_V6			0x2
+#define XMIT_CSUM_TCP			0x4
+#define XMIT_GSO_V4			0x8
+#define XMIT_GSO_V6			0x10
+
+#define XMIT_CSUM			(XMIT_CSUM_V4 | XMIT_CSUM_V6)
+#define XMIT_GSO			(XMIT_GSO_V4 | XMIT_GSO_V6)
+
+
 /* stuff added to make the code fit 80Col */
 
 #define CQE_TYPE(cqe_fp_flags)	((cqe_fp_flags) & ETH_FAST_PATH_RX_CQE_TYPE)
 
+#define TPA_TYPE_START			ETH_FAST_PATH_RX_CQE_START_FLG
+#define TPA_TYPE_END			ETH_FAST_PATH_RX_CQE_END_FLG
+#define TPA_TYPE(cqe_fp_flags)		((cqe_fp_flags) & \
+					 (TPA_TYPE_START | TPA_TYPE_END))
+
+#define BNX2X_RX_SUM_OK(cqe) \
+			(!(cqe->fast_path_cqe.status_flags & \
+			 (ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG | \
+			  ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)))
+
+#define BNX2X_RX_SUM_FIX(cqe) \
+			((le16_to_cpu(cqe->fast_path_cqe.pars_flags.flags) & \
+			  PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) == \
+			 (1 << PARSING_FLAGS_OVER_ETHERNET_PROTOCOL_SHIFT))
+
 #define ETH_RX_ERROR_FALGS	(ETH_FAST_PATH_RX_CQE_PHY_DECODE_ERR_FLG | \
 				 ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG | \
 				 ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG)
@@ -240,6 +415,9 @@ struct bnx2x_fastpath {
 #define BNX2X_TX_SB_INDEX \
 	(&fp->status_blk->c_status_block.index_values[C_SB_ETH_TX_CQ_INDEX])
 
+
+/* end of fast path */
+
 /* common */
 
 struct bnx2x_common {
@@ -585,6 +763,7 @@ struct bnx2x {
 #define USING_DAC_FLAG			0x10
 #define USING_MSIX_FLAG			0x20
 #define ASF_ENABLE_FLAG			0x40
+#define TPA_ENABLE_FLAG			0x80
 #define NO_MCP_FLAG			0x100
 #define BP_NOMCP(bp)			(bp->flags & NO_MCP_FLAG)
 
@@ -706,74 +885,6 @@ void bnx2x_write_dmae(struct bnx2x *bp, dma_addr_t dma_addr, u32 dst_addr,
 		      u32 len32);
 int bnx2x_set_gpio(struct bnx2x *bp, int gpio_num, u32 mode);
 
-
-/* MC hsi */
-#define RX_COPY_THRESH  		92
-#define BCM_PAGE_BITS   		12
-#define BCM_PAGE_SIZE   		(1 << BCM_PAGE_BITS)
-
-#define NUM_TX_RINGS    		16
-#define TX_DESC_CNT     	(BCM_PAGE_SIZE / sizeof(struct eth_tx_bd))
-#define MAX_TX_DESC_CNT 		(TX_DESC_CNT - 1)
-#define NUM_TX_BD       		(TX_DESC_CNT * NUM_TX_RINGS)
-#define MAX_TX_BD       		(NUM_TX_BD - 1)
-#define MAX_TX_AVAIL    		(MAX_TX_DESC_CNT * NUM_TX_RINGS - 2)
-#define NEXT_TX_IDX(x)  	((((x) & MAX_TX_DESC_CNT) == \
-				 (MAX_TX_DESC_CNT - 1)) ? (x) + 2 : (x) + 1)
-#define TX_BD(x)			((x) & MAX_TX_BD)
-#define TX_BD_POFF(x)   		((x) & MAX_TX_DESC_CNT)
-
-/* The RX BD ring is special, each bd is 8 bytes but the last one is 16 */
-#define NUM_RX_RINGS    		8
-#define RX_DESC_CNT     	(BCM_PAGE_SIZE / sizeof(struct eth_rx_bd))
-#define MAX_RX_DESC_CNT 		(RX_DESC_CNT - 2)
-#define RX_DESC_MASK    		(RX_DESC_CNT - 1)
-#define NUM_RX_BD       		(RX_DESC_CNT * NUM_RX_RINGS)
-#define MAX_RX_BD       		(NUM_RX_BD - 1)
-#define MAX_RX_AVAIL    		(MAX_RX_DESC_CNT * NUM_RX_RINGS - 2)
-#define NEXT_RX_IDX(x)  	((((x) & RX_DESC_MASK) == \
-				 (MAX_RX_DESC_CNT - 1)) ? (x) + 3 : (x) + 1)
-#define RX_BD(x)			((x) & MAX_RX_BD)
-
-#define NUM_RCQ_RINGS   		(NUM_RX_RINGS * 2)
-#define RCQ_DESC_CNT    	(BCM_PAGE_SIZE / sizeof(union eth_rx_cqe))
-#define MAX_RCQ_DESC_CNT		(RCQ_DESC_CNT - 1)
-#define NUM_RCQ_BD      		(RCQ_DESC_CNT * NUM_RCQ_RINGS)
-#define MAX_RCQ_BD      		(NUM_RCQ_BD - 1)
-#define MAX_RCQ_AVAIL   		(MAX_RCQ_DESC_CNT * NUM_RCQ_RINGS - 2)
-#define NEXT_RCQ_IDX(x) 	((((x) & MAX_RCQ_DESC_CNT) == \
-				 (MAX_RCQ_DESC_CNT - 1)) ? (x) + 2 : (x) + 1)
-#define RCQ_BD(x)       		((x) & MAX_RCQ_BD)
-
-
-/* used on a CID received from the HW */
-#define SW_CID(x)       		(le32_to_cpu(x) & \
-					 (COMMON_RAMROD_ETH_RX_CQE_CID >> 1))
-#define CQE_CMD(x)      		(le32_to_cpu(x) >> \
-					COMMON_RAMROD_ETH_RX_CQE_CMD_ID_SHIFT)
-
-#define STROM_ASSERT_ARRAY_SIZE 	50
-
-
-
-/* must be used on a CID before placing it on a HW ring */
-#define HW_CID(bp, x)		((BP_PORT(bp) << 23) | (BP_E1HVN(bp) << 17) | x)
-
-#define SP_DESC_CNT     	(BCM_PAGE_SIZE / sizeof(struct eth_spe))
-#define MAX_SP_DESC_CNT 		(SP_DESC_CNT - 1)
-
-
-#define BNX2X_BTR       		3
-#define MAX_SPQ_PENDING 		8
-
-
-#define DPM_TRIGER_TYPE 		0x40
-#define DOORBELL(bp, cid, val) \
-	do { \
-		writel((u32)val, (bp)->doorbells + (BCM_PAGE_SIZE * cid) + \
-		       DPM_TRIGER_TYPE); \
-	} while (0)
-
 static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
 			   int wait)
 {
@@ -853,14 +964,20 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
 #define BNX2X_LOOPBACK_FAILED		(BNX2X_MAC_LOOPBACK_FAILED | \
 					 BNX2X_PHY_LOOPBACK_FAILED)
 
-#define pbd_tcp_flags(skb)  	(ntohl(tcp_flag_word(tcp_hdr(skb)))>>16 & 0xff)
+
+#define STROM_ASSERT_ARRAY_SIZE		50
+
 
 /* must be used on a CID before placing it on a HW ring */
+#define HW_CID(bp, x)		((BP_PORT(bp) << 23) | (BP_E1HVN(bp) << 17) | x)
+
+#define SP_DESC_CNT		(BCM_PAGE_SIZE / sizeof(struct eth_spe))
+#define MAX_SP_DESC_CNT			(SP_DESC_CNT - 1)
+
+
+#define BNX2X_BTR			3
+#define MAX_SPQ_PENDING			8
 
-#define BNX2X_RX_SUM_OK(cqe) \
-			(!(cqe->fast_path_cqe.status_flags & \
-			 (ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG | \
-			  ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)))
 
 /* CMNG constants
    derived from lab experiments, and not from system spec calculations !!! */
diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c
index 20ecfcb..366764e 100644
--- a/drivers/net/bnx2x_main.c
+++ b/drivers/net/bnx2x_main.c
@@ -84,6 +84,7 @@ MODULE_VERSION(DRV_MODULE_VERSION);
 static int use_inta;
 static int poll;
 static int debug;
+static int disable_tpa;
 static int nomcp;
 static int load_count[3]; /* 0 common, 1 port0, 2 port1 */
 static int use_multi;
@@ -91,6 +92,7 @@ static int use_multi;
 module_param(use_inta, int, 0);
 module_param(poll, int, 0);
 module_param(debug, int, 0);
+module_param(disable_tpa, int, 0);
 module_param(nomcp, int, 0);
 MODULE_PARM_DESC(use_inta, "use INT#A instead of MSI-X");
 MODULE_PARM_DESC(poll, "use polling (for debug)");
@@ -440,12 +442,12 @@ static void bnx2x_panic_dump(struct bnx2x *bp)
 			  "  tx_bd_prod(%x)  tx_bd_cons(%x)  *tx_cons_sb(%x)"
 			  "  *rx_cons_sb(%x)  rx_comp_prod(%x)"
 			  "  rx_comp_cons(%x)  fp_c_idx(%x)  fp_u_idx(%x)"
-			  "  bd data(%x,%x)\n",
+			  "  bd data(%x,%x)  rx_alloc_failed(%lx)\n",
 			  i, fp->tx_pkt_prod, fp->tx_pkt_cons, fp->tx_bd_prod,
 			  fp->tx_bd_cons, *fp->tx_cons_sb, *fp->rx_cons_sb,
 			  fp->rx_comp_prod, fp->rx_comp_cons, fp->fp_c_idx,
 			  fp->fp_u_idx, hw_prods->packets_prod,
-			  hw_prods->bds_prod);
+			  hw_prods->bds_prod, fp->rx_alloc_failed);
 
 		start = TX_BD(le16_to_cpu(*fp->tx_cons_sb) - 10);
 		end = TX_BD(le16_to_cpu(*fp->tx_cons_sb) + 245);
@@ -888,6 +890,62 @@ static void bnx2x_sp_event(struct bnx2x_fastpath *fp,
 	mb(); /* force bnx2x_wait_ramrod() to see the change */
 }
 
+static inline void bnx2x_free_rx_sge(struct bnx2x *bp,
+				     struct bnx2x_fastpath *fp, u16 index)
+{
+	struct sw_rx_page *sw_buf = &fp->rx_page_ring[index];
+	struct page *page = sw_buf->page;
+	struct eth_rx_sge *sge = &fp->rx_sge_ring[index];
+
+	/* Skip "next page" elements */
+	if (!page)
+		return;
+
+	pci_unmap_page(bp->pdev, pci_unmap_addr(sw_buf, mapping),
+		       PAGE_SIZE*PAGES_PER_SGE, PCI_DMA_FROMDEVICE);
+	__free_pages(page, PAGES_PER_SGE_SHIFT);
+
+	sw_buf->page = NULL;
+	sge->addr_hi = 0;
+	sge->addr_lo = 0;
+}
+
+static inline void bnx2x_free_rx_sge_range(struct bnx2x *bp,
+					   struct bnx2x_fastpath *fp, int last)
+{
+	int i;
+
+	for (i = 0; i < last; i++)
+		bnx2x_free_rx_sge(bp, fp, i);
+}
+
+static inline int bnx2x_alloc_rx_sge(struct bnx2x *bp,
+				     struct bnx2x_fastpath *fp, u16 index)
+{
+	struct page *page = alloc_pages(GFP_ATOMIC, PAGES_PER_SGE_SHIFT);
+	struct sw_rx_page *sw_buf = &fp->rx_page_ring[index];
+	struct eth_rx_sge *sge = &fp->rx_sge_ring[index];
+	dma_addr_t mapping;
+
+	if (unlikely(page == NULL))
+		return -ENOMEM;
+
+	mapping = pci_map_page(bp->pdev, page, 0, PAGES_PER_SGE*PAGE_SIZE,
+			       PCI_DMA_FROMDEVICE);
+	if (unlikely(dma_mapping_error(mapping))) {
+		__free_pages(page, PAGES_PER_SGE_SHIFT);
+		return -ENOMEM;
+	}
+
+	sw_buf->page = page;
+	pci_unmap_addr_set(sw_buf, mapping, mapping);
+
+	sge->addr_hi = cpu_to_le32(U64_HI(mapping));
+	sge->addr_lo = cpu_to_le32(U64_LO(mapping));
+
+	return 0;
+}
+
 static inline int bnx2x_alloc_rx_skb(struct bnx2x *bp,
 				     struct bnx2x_fastpath *fp, u16 index)
 {
@@ -941,12 +999,301 @@ static void bnx2x_reuse_rx_skb(struct bnx2x_fastpath *fp,
 	*prod_bd = *cons_bd;
 }
 
+static inline void bnx2x_update_last_max_sge(struct bnx2x_fastpath *fp,
+					     u16 idx)
+{
+	u16 last_max = fp->last_max_sge;
+
+	if (SUB_S16(idx, last_max) > 0)
+		fp->last_max_sge = idx;
+}
+
+static void bnx2x_clear_sge_mask_next_elems(struct bnx2x_fastpath *fp)
+{
+	int i, j;
+
+	for (i = 1; i <= NUM_RX_SGE_PAGES; i++) {
+		int idx = RX_SGE_CNT * i - 1;
+
+		for (j = 0; j < 2; j++) {
+			SGE_MASK_CLEAR_BIT(fp, idx);
+			idx--;
+		}
+	}
+}
+
+static void bnx2x_update_sge_prod(struct bnx2x_fastpath *fp,
+				  struct eth_fast_path_rx_cqe *fp_cqe)
+{
+	struct bnx2x *bp = fp->bp;
+	u16 sge_len = PAGE_ALIGN(le16_to_cpu(fp_cqe->pkt_len) -
+				 le16_to_cpu(fp_cqe->len_on_bd)) >> PAGE_SHIFT;
+	u16 last_max, last_elem, first_elem;
+	u16 delta = 0;
+	u16 i;
+
+	if (!sge_len)
+		return;
+
+	/* First mark all used pages*/
+	for (i = 0; i < sge_len; i++)
+		SGE_MASK_CLEAR_BIT(fp, RX_SGE(le16_to_cpu(fp_cqe->sgl[i])));
+
+	DP(BNX2X_MSG_FP, "fp_cqe->sgl[%d] = %d\n",
+	   sge_len - 1, le16_to_cpu(fp_cqe->sgl[sge_len - 1]));
+
+	/* Here we assume that the last SGE index is the biggest */
+	prefetch((void *)(fp->sge_mask));
+	bnx2x_update_last_max_sge(fp, le16_to_cpu(fp_cqe->sgl[sge_len - 1]));
+
+	last_max = RX_SGE(fp->last_max_sge);
+	last_elem = last_max >> RX_SGE_MASK_ELEM_SHIFT;
+	first_elem = RX_SGE(fp->rx_sge_prod) >> RX_SGE_MASK_ELEM_SHIFT;
+
+	/* If ring is not full */
+	if (last_elem + 1 != first_elem)
+		last_elem++;
+
+	/* Now update the prod */
+	for (i = first_elem; i != last_elem; i = NEXT_SGE_MASK_ELEM(i)) {
+		if (likely(fp->sge_mask[i]))
+			break;
+
+		fp->sge_mask[i] = RX_SGE_MASK_ELEM_ONE_MASK;
+		delta += RX_SGE_MASK_ELEM_SZ;
+	}
+
+	if (delta > 0) {
+		fp->rx_sge_prod += delta;
+		/* clear page-end entries */
+		bnx2x_clear_sge_mask_next_elems(fp);
+	}
+
+	DP(NETIF_MSG_RX_STATUS,
+	   "fp->last_max_sge = %d  fp->rx_sge_prod = %d\n",
+	   fp->last_max_sge, fp->rx_sge_prod);
+}
+
+static inline void bnx2x_init_sge_ring_bit_mask(struct bnx2x_fastpath *fp)
+{
+	/* Set the mask to all 1-s: it's faster to compare to 0 than to 0xf-s */
+	memset(fp->sge_mask, 0xff,
+	       (NUM_RX_SGE >> RX_SGE_MASK_ELEM_SHIFT)*sizeof(u64));
+
+	/* Clear the two last indeces in the page to 1:
+	   these are the indeces that correspond to the "next" element,
+	   hence will never be indicated and should be removed from
+	   the calculations. */
+	bnx2x_clear_sge_mask_next_elems(fp);
+}
+
+static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue,
+			    struct sk_buff *skb, u16 cons, u16 prod)
+{
+	struct bnx2x *bp = fp->bp;
+	struct sw_rx_bd *cons_rx_buf = &fp->rx_buf_ring[cons];
+	struct sw_rx_bd *prod_rx_buf = &fp->rx_buf_ring[prod];
+	struct eth_rx_bd *prod_bd = &fp->rx_desc_ring[prod];
+	dma_addr_t mapping;
+
+	/* move empty skb from pool to prod and map it */
+	prod_rx_buf->skb = fp->tpa_pool[queue].skb;
+	mapping = pci_map_single(bp->pdev, fp->tpa_pool[queue].skb->data,
+				 bp->rx_buf_use_size, PCI_DMA_FROMDEVICE);
+	pci_unmap_addr_set(prod_rx_buf, mapping, mapping);
+
+	/* move partial skb from cons to pool (don't unmap yet) */
+	fp->tpa_pool[queue] = *cons_rx_buf;
+
+	/* mark bin state as start - print error if current state != stop */
+	if (fp->tpa_state[queue] != BNX2X_TPA_STOP)
+		BNX2X_ERR("start of bin not in stop [%d]\n", queue);
+
+	fp->tpa_state[queue] = BNX2X_TPA_START;
+
+	/* point prod_bd to new skb */
+	prod_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
+	prod_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
+
+#ifdef BNX2X_STOP_ON_ERROR
+	fp->tpa_queue_used |= 1<<queue;
+	DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%llx\n",
+	   fp->tpa_queue_used);
+#endif
+}
+
+static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
+			       struct sk_buff *skb,
+			       struct eth_fast_path_rx_cqe *fp_cqe,
+			       u16 cqe_idx)
+{
+	u32 i, frag_len, frag_size, pages;
+	struct sw_rx_page *rx_pg, old_rx_pg;
+	struct page *sge;
+	int err;
+	int j;
+
+	frag_size = le16_to_cpu(fp_cqe->pkt_len) -
+		    le16_to_cpu(fp_cqe->len_on_bd);
+	pages = PAGE_ALIGN(frag_size) >> PAGE_SHIFT;
+
+#ifdef BNX2X_STOP_ON_ERROR
+	if (pages > 8*PAGES_PER_SGE) {
+		BNX2X_ERR("SGL length is too long: %d. CQE index is %d\n",
+			  pages, cqe_idx);
+		BNX2X_ERR("fp_cqe->pkt_len = %d  fp_cqe->len_on_bd = %d\n",
+			  fp_cqe->pkt_len, fp_cqe->len_on_bd);
+		bnx2x_panic();
+		return -EINVAL;
+	}
+#endif
+
+	/* Run through the SGL and compose the fragmented skb */
+	for (i = 0, j = 0; i < pages; i += PAGES_PER_SGE, j++) {
+		u16 sge_idx = RX_SGE(le16_to_cpu(fp_cqe->sgl[j]));
+
+		/* FW gives the indices of the SGE as if the ring is an array
+		   (meaning that "next" element will consume 2 indices) */
+		frag_len = min(frag_size,
+			       (unsigned int)PAGE_SIZE*PAGES_PER_SGE);
+		rx_pg = &fp->rx_page_ring[sge_idx];
+		sge = rx_pg->page;
+		old_rx_pg = *rx_pg;
+
+		/* If we fail to allocate a substitute page, we simply stop
+		   where we are and drop the whole packet */
+		err = bnx2x_alloc_rx_sge(bp, fp, sge_idx);
+		if (unlikely(err)) {
+			fp->rx_alloc_failed++;
+			return err;
+		}
+
+		/* Unmap the page as we r going to pass it to the stack */
+		pci_unmap_page(bp->pdev, pci_unmap_addr(&old_rx_pg, mapping),
+			       PAGE_SIZE*PAGES_PER_SGE, PCI_DMA_FROMDEVICE);
+
+		/* Add one frag and update the appropriate fields in the skb */
+		skb_fill_page_desc(skb, j, old_rx_pg.page, 0, frag_len);
+
+		skb->data_len += frag_len;
+		skb->truesize += frag_len;
+		skb->len += frag_len;
+
+		frag_size -= frag_len;
+	}
+
+	return 0;
+}
+
+static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
+			   u16 queue, int pad, int len, union eth_rx_cqe *cqe,
+			   u16 cqe_idx)
+{
+	struct sw_rx_bd *rx_buf = &fp->tpa_pool[queue];
+	struct sk_buff *skb = rx_buf->skb;
+	/* alloc new skb */
+	struct sk_buff *new_skb = netdev_alloc_skb(bp->dev, bp->rx_buf_size);
+
+	/* Unmap skb in the pool anyway, as we are going to change
+	   pool entry status to BNX2X_TPA_STOP even if new skb allocation
+	   fails. */
+	pci_unmap_single(bp->pdev, pci_unmap_addr(rx_buf, mapping),
+			 bp->rx_buf_use_size, PCI_DMA_FROMDEVICE);
+
+	/* if alloc failed drop the packet and keep the buffer in the bin */
+	if (likely(new_skb)) {
+
+		prefetch(skb);
+		prefetch(((char *)(skb)) + 128);
+
+		/* else fix ip xsum and give it to the stack */
+		/* (no need to map the new skb) */
+#ifdef BNX2X_STOP_ON_ERROR
+		if (pad + len > bp->rx_buf_size) {
+			BNX2X_ERR("skb_put is about to fail... "
+				  "pad %d  len %d  rx_buf_size %d\n",
+				  pad, len, bp->rx_buf_size);
+			bnx2x_panic();
+			return;
+		}
+#endif
+
+		skb_reserve(skb, pad);
+		skb_put(skb, len);
+
+		skb->protocol = eth_type_trans(skb, bp->dev);
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+		{
+			struct iphdr *iph;
+
+			iph = (struct iphdr *)skb->data;
+			iph->check = 0;
+			iph->check = ip_fast_csum((u8 *)iph, iph->ihl);
+		}
+
+		if (!bnx2x_fill_frag_skb(bp, fp, skb,
+					 &cqe->fast_path_cqe, cqe_idx)) {
+#ifdef BCM_VLAN
+			if ((bp->vlgrp != NULL) &&
+			    (le16_to_cpu(cqe->fast_path_cqe.pars_flags.flags) &
+			     PARSING_FLAGS_VLAN))
+				vlan_hwaccel_receive_skb(skb, bp->vlgrp,
+						le16_to_cpu(cqe->fast_path_cqe.
+							    vlan_tag));
+			else
+#endif
+				netif_receive_skb(skb);
+		} else {
+			DP(NETIF_MSG_RX_STATUS, "Failed to allocate new pages"
+			   " - dropping packet!\n");
+			dev_kfree_skb(skb);
+		}
+
+		bp->dev->last_rx = jiffies;
+
+		/* put new skb in bin */
+		fp->tpa_pool[queue].skb = new_skb;
+
+	} else {
+		DP(NETIF_MSG_RX_STATUS,
+		   "Failed to allocate new skb - dropping packet!\n");
+		fp->rx_alloc_failed++;
+	}
+
+	fp->tpa_state[queue] = BNX2X_TPA_STOP;
+}
+
+static inline void bnx2x_update_rx_prod(struct bnx2x *bp,
+					struct bnx2x_fastpath *fp,
+					u16 bd_prod, u16 rx_comp_prod,
+					u16 rx_sge_prod)
+{
+	struct tstorm_eth_rx_producers rx_prods = {0};
+	int i;
+
+	/* Update producers */
+	rx_prods.bd_prod = bd_prod;
+	rx_prods.cqe_prod = rx_comp_prod;
+	rx_prods.sge_prod = rx_sge_prod;
+
+	for (i = 0; i < sizeof(struct tstorm_eth_rx_producers)/4; i++)
+		REG_WR(bp, BAR_TSTRORM_INTMEM +
+		       TSTORM_RX_PRODS_OFFSET(BP_PORT(bp), FP_CL_ID(fp)) + i*4,
+		       ((u32 *)&rx_prods)[i]);
+
+	DP(NETIF_MSG_RX_STATUS,
+	   "Wrote: bd_prod %u  cqe_prod %u  sge_prod %u\n",
+	   bd_prod, rx_comp_prod, rx_sge_prod);
+}
+
 static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
 {
 	struct bnx2x *bp = fp->bp;
 	u16 bd_cons, bd_prod, bd_prod_fw, comp_ring_cons;
 	u16 hw_comp_cons, sw_comp_cons, sw_comp_prod;
 	int rx_pkt = 0;
+	u16 queue;
 
 #ifdef BNX2X_STOP_ON_ERROR
 	if (unlikely(bp->panic))
@@ -1006,6 +1353,49 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
 			len = le16_to_cpu(cqe->fast_path_cqe.pkt_len);
 			pad = cqe->fast_path_cqe.placement_offset;
 
+			/* If CQE is marked both TPA_START and TPA_END
+			   it is a non-TPA CQE */
+			if ((!fp->disable_tpa) &&
+			    (TPA_TYPE(cqe_fp_flags) !=
+					(TPA_TYPE_START | TPA_TYPE_END))) {
+				queue = cqe->fast_path_cqe.queue_index;
+
+				if (TPA_TYPE(cqe_fp_flags) == TPA_TYPE_START) {
+					DP(NETIF_MSG_RX_STATUS,
+					   "calling tpa_start on queue %d\n",
+					   queue);
+
+					bnx2x_tpa_start(fp, queue, skb,
+							bd_cons, bd_prod);
+					goto next_rx;
+				}
+
+				if (TPA_TYPE(cqe_fp_flags) == TPA_TYPE_END) {
+					DP(NETIF_MSG_RX_STATUS,
+					   "calling tpa_stop on queue %d\n",
+					   queue);
+
+					if (!BNX2X_RX_SUM_FIX(cqe))
+						BNX2X_ERR("STOP on none TCP "
+							  "data\n");
+
+					/* This is a size of the linear data
+					   on this skb */
+					len = le16_to_cpu(cqe->fast_path_cqe.
+								len_on_bd);
+					bnx2x_tpa_stop(bp, fp, queue, pad,
+						    len, cqe, comp_ring_cons);
+				#ifdef BNX2X_STOP_ON_ERROR
+					if (bp->panic)
+						return -EINVAL;
+				#endif
+
+					bnx2x_update_sge_prod(fp,
+							&cqe->fast_path_cqe);
+					goto next_cqe;
+				}
+			}
+
 			pci_dma_sync_single_for_device(bp->pdev,
 					pci_unmap_addr(rx_buf, mapping),
 						       pad + RX_COPY_THRESH,
@@ -1036,7 +1426,7 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
 					DP(NETIF_MSG_RX_ERR,
 					   "ERROR  packet dropped "
 					   "because of alloc failure\n");
-					/* TBD count this as a drop? */
+					fp->rx_alloc_failed++;
 					goto reuse_rx;
 				}
 
@@ -1062,6 +1452,7 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
 				DP(NETIF_MSG_RX_ERR,
 				   "ERROR  packet dropped because "
 				   "of alloc failure\n");
+				fp->rx_alloc_failed++;
 reuse_rx:
 				bnx2x_reuse_rx_skb(fp, skb, bd_cons, bd_prod);
 				goto next_rx;
@@ -1108,11 +1499,9 @@ next_cqe:
 	fp->rx_comp_cons = sw_comp_cons;
 	fp->rx_comp_prod = sw_comp_prod;
 
-	REG_WR(bp, BAR_TSTRORM_INTMEM +
-		TSTORM_RX_PRODS_OFFSET(BP_PORT(bp), FP_CL_ID(fp)),
-		sw_comp_prod);
-
-
+	/* Update producers */
+	bnx2x_update_rx_prod(bp, fp, bd_prod_fw, sw_comp_prod,
+			     fp->rx_sge_prod);
 	mmiowb(); /* keep prod updates ordered */
 
 	fp->rx_pkt += rx_pkt;
@@ -3516,6 +3905,8 @@ static void bnx2x_init_sb(struct bnx2x *bp, int sb_id,
 	REG_WR(bp, BAR_CSTRORM_INTMEM +
 	       ((CSTORM_SB_HOST_SB_ADDR_OFFSET(port, sb_id)) + 4),
 	       U64_HI(section));
+	REG_WR8(bp, BAR_CSTRORM_INTMEM + FP_CSB_FUNC_OFF +
+		CSTORM_SB_HOST_STATUS_BLOCK_OFFSET(port, sb_id), func);
 
 	for (index = 0; index < HC_CSTORM_SB_NUM_INDICES; index++)
 		REG_WR16(bp, BAR_CSTRORM_INTMEM +
@@ -3707,9 +4098,35 @@ static void bnx2x_update_coalesce(struct bnx2x *bp)
 	}
 }
 
+static inline void bnx2x_free_tpa_pool(struct bnx2x *bp,
+				       struct bnx2x_fastpath *fp, int last)
+{
+	int i;
+
+	for (i = 0; i < last; i++) {
+		struct sw_rx_bd *rx_buf = &(fp->tpa_pool[i]);
+		struct sk_buff *skb = rx_buf->skb;
+
+		if (skb == NULL) {
+			DP(NETIF_MSG_IFDOWN, "tpa bin %d empty on free\n", i);
+			continue;
+		}
+
+		if (fp->tpa_state[i] == BNX2X_TPA_START)
+			pci_unmap_single(bp->pdev,
+					 pci_unmap_addr(rx_buf, mapping),
+					 bp->rx_buf_use_size,
+					 PCI_DMA_FROMDEVICE);
+
+		dev_kfree_skb(skb);
+		rx_buf->skb = NULL;
+	}
+}
+
 static void bnx2x_init_rx_rings(struct bnx2x *bp)
 {
-	u16 ring_prod;
+	int func = BP_FUNC(bp);
+	u16 ring_prod, cqe_ring_prod = 0;
 	int i, j;
 
 	bp->rx_buf_use_size = bp->dev->mtu;
@@ -3717,12 +4134,59 @@ static void bnx2x_init_rx_rings(struct bnx2x *bp)
 	bp->rx_buf_use_size += bp->rx_offset + ETH_OVREHEAD;
 	bp->rx_buf_size = bp->rx_buf_use_size + 64;
 
+	if (bp->flags & TPA_ENABLE_FLAG) {
+		DP(NETIF_MSG_IFUP,
+		   "rx_buf_use_size %d  rx_buf_size %d  effective_mtu %d\n",
+		   bp->rx_buf_use_size, bp->rx_buf_size,
+		   bp->dev->mtu + ETH_OVREHEAD);
+
+		for_each_queue(bp, j) {
+			for (i = 0; i < ETH_MAX_AGGREGATION_QUEUES_E1H; i++) {
+				struct bnx2x_fastpath *fp = &bp->fp[j];
+
+				fp->tpa_pool[i].skb =
+				   netdev_alloc_skb(bp->dev, bp->rx_buf_size);
+				if (!fp->tpa_pool[i].skb) {
+					BNX2X_ERR("Failed to allocate TPA "
+						  "skb pool for queue[%d] - "
+						  "disabling TPA on this "
+						  "queue!\n", j);
+					bnx2x_free_tpa_pool(bp, fp, i);
+					fp->disable_tpa = 1;
+					break;
+				}
+				pci_unmap_addr_set((struct sw_rx_bd *)
+							&bp->fp->tpa_pool[i],
+						   mapping, 0);
+				fp->tpa_state[i] = BNX2X_TPA_STOP;
+			}
+		}
+	}
+
 	for_each_queue(bp, j) {
 		struct bnx2x_fastpath *fp = &bp->fp[j];
 
 		fp->rx_bd_cons = 0;
 		fp->rx_cons_sb = BNX2X_RX_SB_INDEX;
+		fp->rx_bd_cons_sb = BNX2X_RX_SB_BD_INDEX;
+
+		/* "next page" elements initialization */
+		/* SGE ring */
+		for (i = 1; i <= NUM_RX_SGE_PAGES; i++) {
+			struct eth_rx_sge *sge;
+
+			sge = &fp->rx_sge_ring[RX_SGE_CNT * i - 2];
+			sge->addr_hi =
+				cpu_to_le32(U64_HI(fp->rx_sge_mapping +
+					BCM_PAGE_SIZE*(i % NUM_RX_SGE_PAGES)));
+			sge->addr_lo =
+				cpu_to_le32(U64_LO(fp->rx_sge_mapping +
+					BCM_PAGE_SIZE*(i % NUM_RX_SGE_PAGES)));
+		}
+
+		bnx2x_init_sge_ring_bit_mask(fp);
 
+		/* RX BD ring */
 		for (i = 1; i <= NUM_RX_RINGS; i++) {
 			struct eth_rx_bd *rx_bd;
 
@@ -3749,35 +4213,61 @@ static void bnx2x_init_rx_rings(struct bnx2x *bp)
 					   BCM_PAGE_SIZE*(i % NUM_RCQ_RINGS)));
 		}
 
-		/* rx completion queue */
-		fp->rx_comp_cons = ring_prod = 0;
+		/* Allocate SGEs and initialize the ring elements */
+		for (i = 0, ring_prod = 0;
+		     i < MAX_RX_SGE_CNT*NUM_RX_SGE_PAGES; i++) {
 
+			if (bnx2x_alloc_rx_sge(bp, fp, ring_prod) < 0) {
+				BNX2X_ERR("was only able to allocate "
+					  "%d rx sges\n", i);
+				BNX2X_ERR("disabling TPA for queue[%d]\n", j);
+				/* Cleanup already allocated elements */
+				bnx2x_free_rx_sge_range(bp, fp, ring_prod);
+				bnx2x_free_tpa_pool(bp, fp,
+					      ETH_MAX_AGGREGATION_QUEUES_E1H);
+				fp->disable_tpa = 1;
+				ring_prod = 0;
+				break;
+			}
+			ring_prod = NEXT_SGE_IDX(ring_prod);
+		}
+		fp->rx_sge_prod = ring_prod;
+
+		/* Allocate BDs and initialize BD ring */
+		fp->rx_comp_cons = fp->rx_alloc_failed = 0;
+		cqe_ring_prod = ring_prod = 0;
 		for (i = 0; i < bp->rx_ring_size; i++) {
 			if (bnx2x_alloc_rx_skb(bp, fp, ring_prod) < 0) {
 				BNX2X_ERR("was only able to allocate "
 					  "%d rx skbs\n", i);
+				fp->rx_alloc_failed++;
 				break;
 			}
 			ring_prod = NEXT_RX_IDX(ring_prod);
+			cqe_ring_prod = NEXT_RCQ_IDX(cqe_ring_prod);
 			BUG_TRAP(ring_prod > i);
 		}
 
-		fp->rx_bd_prod = fp->rx_comp_prod = ring_prod;
+		fp->rx_bd_prod = ring_prod;
+		/* must not have more available CQEs than BDs */
+		fp->rx_comp_prod = min((u16)(NUM_RCQ_RINGS*RCQ_DESC_CNT),
+				       cqe_ring_prod);
 		fp->rx_pkt = fp->rx_calls = 0;
 
-		/* Warning! this will generate an interrupt (to the TSTORM) */
-		/* must only be done when chip is initialized */
-		REG_WR(bp, BAR_TSTRORM_INTMEM +
-		       TSTORM_RX_PRODS_OFFSET(BP_PORT(bp), FP_CL_ID(fp)),
-			ring_prod);
+		/* Warning!
+		 * this will generate an interrupt (to the TSTORM)
+		 * must only be done after chip is initialized
+		 */
+		bnx2x_update_rx_prod(bp, fp, ring_prod, fp->rx_comp_prod,
+				     fp->rx_sge_prod);
 		if (j != 0)
 			continue;
 
 		REG_WR(bp, BAR_USTRORM_INTMEM +
-		       USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(BP_PORT(bp)),
+		       USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func),
 		       U64_LO(fp->rx_comp_mapping));
 		REG_WR(bp, BAR_USTRORM_INTMEM +
-		       USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(BP_PORT(bp)) + 4,
+		       USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func) + 4,
 		       U64_HI(fp->rx_comp_mapping));
 	}
 }
@@ -3865,6 +4355,18 @@ static void bnx2x_init_context(struct bnx2x *bp)
 						U64_HI(fp->rx_desc_mapping);
 		context->ustorm_st_context.common.bd_page_base_lo =
 						U64_LO(fp->rx_desc_mapping);
+		if (!fp->disable_tpa) {
+			context->ustorm_st_context.common.flags |=
+				(USTORM_ETH_ST_CONTEXT_CONFIG_ENABLE_TPA |
+				 USTORM_ETH_ST_CONTEXT_CONFIG_ENABLE_SGE_RING);
+			context->ustorm_st_context.common.sge_buff_size =
+						(u16)(PAGES_PER_SGE*PAGE_SIZE);
+			context->ustorm_st_context.common.sge_page_base_hi =
+						U64_HI(fp->rx_sge_mapping);
+			context->ustorm_st_context.common.sge_page_base_lo =
+						U64_LO(fp->rx_sge_mapping);
+		}
+
 		context->cstorm_st_context.sb_index_number =
 						HC_INDEX_C_ETH_TX_CQ_CONS;
 		context->cstorm_st_context.status_block_id = sb_id;
@@ -3915,6 +4417,18 @@ static void bnx2x_set_client_config(struct bnx2x *bp)
 	}
 #endif
 
+	if (bp->flags & TPA_ENABLE_FLAG) {
+		tstorm_client.max_sges_for_packet =
+				PAGE_ALIGN(tstorm_client.mtu) >> PAGE_SHIFT;
+		tstorm_client.max_sges_for_packet =
+				((tstorm_client.max_sges_for_packet +
+				  PAGES_PER_SGE - 1) & (~(PAGES_PER_SGE - 1)))
+				>> PAGES_PER_SGE_SHIFT;
+
+		tstorm_client.config_flags |=
+				TSTORM_ETH_CLIENT_CONFIG_ENABLE_SGE_RING;
+	}
+
 	for_each_queue(bp, i) {
 		REG_WR(bp, BAR_TSTRORM_INTMEM +
 			TSTORM_CLIENT_CONFIG_OFFSET(port, bp->fp[i].cl_id),
@@ -4038,6 +4552,23 @@ static void bnx2x_init_internal(struct bnx2x *bp)
 	for (i = 0; i < USTORM_AGG_DATA_SIZE >> 2; i++)
 		REG_WR(bp, BAR_USTRORM_INTMEM +
 			USTORM_AGG_DATA_OFFSET + 4*i, 0);
+
+	for_each_queue(bp, i) {
+		struct bnx2x_fastpath *fp = &bp->fp[i];
+
+		REG_WR(bp, BAR_USTRORM_INTMEM +
+		       USTORM_CQE_PAGE_BASE_OFFSET(port, FP_CL_ID(fp)),
+		       U64_LO(fp->rx_comp_mapping));
+		REG_WR(bp, BAR_USTRORM_INTMEM +
+		       USTORM_CQE_PAGE_BASE_OFFSET(port, FP_CL_ID(fp)) + 4,
+		       U64_HI(fp->rx_comp_mapping));
+
+		REG_WR16(bp, BAR_USTRORM_INTMEM +
+			 USTORM_MAX_AGG_SIZE_OFFSET(port, FP_CL_ID(fp)),
+			 min((u32)(bp->rx_buf_use_size +
+				   8*PAGES_PER_SGE*PAGE_SIZE),
+			     (u32)0xffff));
+	}
 }
 
 static void bnx2x_nic_init(struct bnx2x *bp)
@@ -4696,6 +5227,17 @@ static int bnx2x_init_common(struct bnx2x *bp)
 	enable_blocks_parity(bp);
 #endif
 
+	if (bp->flags & TPA_ENABLE_FLAG) {
+		struct tstorm_eth_tpa_exist tmp = {0};
+
+		tmp.tpa_exist = 1;
+
+		REG_WR(bp, BAR_TSTRORM_INTMEM + TSTORM_TPA_EXIST_OFFSET,
+		       ((u32 *)&tmp)[0]);
+		REG_WR(bp, BAR_TSTRORM_INTMEM + TSTORM_TPA_EXIST_OFFSET + 4,
+		       ((u32 *)&tmp)[1]);
+	}
+
 	return 0;
 }
 
@@ -5073,6 +5615,11 @@ static void bnx2x_free_mem(struct bnx2x *bp)
 			       bnx2x_fp(bp, i, rx_comp_mapping),
 			       sizeof(struct eth_fast_path_rx_cqe) *
 			       NUM_RCQ_BD);
+
+		/* SGE ring */
+		BNX2X_PCI_FREE(bnx2x_fp(bp, i, rx_sge_ring),
+			       bnx2x_fp(bp, i, rx_sge_mapping),
+			       BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
 	}
 
 	/* end of fastpath */
@@ -5152,6 +5699,12 @@ static int bnx2x_alloc_mem(struct bnx2x *bp)
 				sizeof(struct eth_fast_path_rx_cqe) *
 				NUM_RCQ_BD);
 
+		/* SGE ring */
+		BNX2X_ALLOC(bnx2x_fp(bp, i, rx_page_ring),
+				sizeof(struct sw_rx_page) * NUM_RX_SGE);
+		BNX2X_PCI_ALLOC(bnx2x_fp(bp, i, rx_sge_ring),
+				&bnx2x_fp(bp, i, rx_sge_mapping),
+				BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
 	}
 	/* end of fastpath */
 
@@ -5242,6 +5795,9 @@ static void bnx2x_free_rx_skbs(struct bnx2x *bp)
 			rx_buf->skb = NULL;
 			dev_kfree_skb(skb);
 		}
+		if (!fp->disable_tpa)
+			bnx2x_free_tpa_pool(bp, fp,
+					    ETH_MAX_AGGREGATION_QUEUES_E1H);
 	}
 }
 
@@ -5595,6 +6151,10 @@ static int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 	if (bnx2x_alloc_mem(bp))
 		return -ENOMEM;
 
+	for_each_queue(bp, i)
+		bnx2x_fp(bp, i, disable_tpa) =
+					((bp->flags & TPA_ENABLE_FLAG) == 0);
+
 	/* Disable ISR calls until we initialize HW */
 	atomic_set(&bp->intr_sem, 1);
 
@@ -5723,6 +6283,11 @@ load_int_disable:
 	/* Release IRQs */
 	bnx2x_free_irq(bp);
 
+	/* Free SKBs, SGEs, TPA pool and driver internals */
+	bnx2x_free_skbs(bp);
+	for_each_queue(bp, i)
+		bnx2x_free_rx_sge_range(bp, bp->fp + i,
+					RX_SGE_CNT*NUM_RX_SGE_PAGES);
 load_error:
 	bnx2x_free_mem(bp);
 
@@ -5895,6 +6460,10 @@ static int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode)
 	for_each_queue(bp, i) {
 		struct bnx2x_fastpath *fp = &bp->fp[i];
 
+#ifdef BNX2X_STOP_ON_ERROR
+		DP(NETIF_MSG_IFDOWN, "fp->tpa_queue_used = 0x%llx\n",
+		   fp->tpa_queue_used);
+#endif
 		cnt = 1000;
 		while (bnx2x_has_work(fp)) {
 			msleep(1);
@@ -5994,8 +6563,11 @@ unload_error:
 	if (!BP_NOMCP(bp))
 		bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE);
 
-	/* Free SKBs and driver internals */
+	/* Free SKBs, SGEs, TPA pool and driver internals */
 	bnx2x_free_skbs(bp);
+	for_each_queue(bp, i)
+		bnx2x_free_rx_sge_range(bp, bp->fp + i,
+					RX_SGE_CNT*NUM_RX_SGE_PAGES);
 	bnx2x_free_mem(bp);
 
 	bp->state = BNX2X_STATE_CLOSED;
@@ -6671,6 +7243,13 @@ static int bnx2x_init_bp(struct bnx2x *bp)
 		printk(KERN_ERR PFX
 		       "MCP disabled, must load devices in order!\n");
 
+	/* Set TPA flags */
+	if (disable_tpa)
+		bp->flags &= ~TPA_ENABLE_FLAG;
+	else
+		bp->flags |= TPA_ENABLE_FLAG;
+
+
 	bp->tx_ring_size = MAX_TX_AVAIL;
 	bp->rx_ring_size = MAX_RX_AVAIL;
 
-- 
1.5.5




--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ