lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 31 Jan 2018 14:53:45 +0100
From:   Björn Töpel <bjorn.topel@...il.com>
To:     bjorn.topel@...il.com, magnus.karlsson@...el.com,
        alexander.h.duyck@...el.com, alexander.duyck@...il.com,
        john.fastabend@...il.com, ast@...com, brouer@...hat.com,
        willemdebruijn.kernel@...il.com, daniel@...earbox.net,
        netdev@...r.kernel.org
Cc:     Björn Töpel <bjorn.topel@...el.com>,
        michael.lundkvist@...csson.com, jesse.brandeburg@...el.com,
        anjali.singhai@...el.com, jeffrey.b.shaw@...el.com,
        ferruh.yigit@...el.com, qi.z.zhang@...el.com
Subject: [RFC PATCH 13/24] i40e: introduce external allocator support

From: Björn Töpel <bjorn.topel@...el.com>

Here, buff_pool is introduced, which is an allocator/pool for Rx
frames. This commits pulls out the recycling allocator in i40e, starts
using the new buff_pool API, and adds a simple non-recycling page
allocating buff_pool implementation. Future commits will reintroduce a
buff_pool page recycling/flipping implementation.

Signed-off-by: Björn Töpel <bjorn.topel@...el.com>
---
 drivers/net/ethernet/intel/i40e/Makefile       |   3 +-
 drivers/net/ethernet/intel/i40e/buff_pool.c    | 285 ++++++++++++++
 drivers/net/ethernet/intel/i40e/buff_pool.h    |  70 ++++
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c |   1 -
 drivers/net/ethernet/intel/i40e/i40e_main.c    |  24 +-
 drivers/net/ethernet/intel/i40e/i40e_txrx.c    | 510 +++++++++----------------
 drivers/net/ethernet/intel/i40e/i40e_txrx.h    |  64 +---
 7 files changed, 541 insertions(+), 416 deletions(-)
 create mode 100644 drivers/net/ethernet/intel/i40e/buff_pool.c
 create mode 100644 drivers/net/ethernet/intel/i40e/buff_pool.h

diff --git a/drivers/net/ethernet/intel/i40e/Makefile b/drivers/net/ethernet/intel/i40e/Makefile
index 3da482c3d68d..bfdf9ce3e7f0 100644
--- a/drivers/net/ethernet/intel/i40e/Makefile
+++ b/drivers/net/ethernet/intel/i40e/Makefile
@@ -45,6 +45,7 @@ i40e-objs := i40e_main.o \
 	i40e_txrx.o	\
 	i40e_ptp.o	\
 	i40e_client.o   \
-	i40e_virtchnl_pf.o
+	i40e_virtchnl_pf.o \
+	buff_pool.o
 
 i40e-$(CONFIG_I40E_DCB) += i40e_dcb.o i40e_dcb_nl.o
diff --git a/drivers/net/ethernet/intel/i40e/buff_pool.c b/drivers/net/ethernet/intel/i40e/buff_pool.c
new file mode 100644
index 000000000000..8c51f61ca71d
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/buff_pool.c
@@ -0,0 +1,285 @@
+#include "buff_pool.h"
+
+#include "i40e.h"
+#include "i40e_txrx.h"
+
+struct buff_pool_ops {
+	int (*alloc)(void *pool, unsigned long *handle);
+	void (*free)(void *pool, unsigned long handle);
+	unsigned int (*buff_size)(void *pool);
+	unsigned int (*total_buff_size)(void *pool);
+	unsigned int (*buff_headroom)(void *pool);
+	unsigned int (*buff_truesize)(void *pool);
+	void *(*buff_ptr)(void *pool, unsigned long handle);
+	int (*buff_convert_to_page)(void *pool,
+				    unsigned long handle,
+				    struct page **pg, unsigned int *pg_off);
+	dma_addr_t (*buff_dma)(void *pool,
+			       unsigned long handle);
+	void (*buff_dma_sync_cpu)(void *pool,
+				  unsigned long handle,
+				  unsigned int off,
+				  unsigned int size);
+	void (*buff_dma_sync_dev)(void *pool,
+				  unsigned long handle,
+				  unsigned int off,
+				  unsigned int size);
+};
+
+int bpool_alloc(struct buff_pool *pool, unsigned long *handle)
+{
+	return pool->ops->alloc(pool->pool, handle);
+}
+
+void bpool_free(struct buff_pool *pool, unsigned long handle)
+{
+	pool->ops->free(pool->pool, handle);
+}
+
+unsigned int bpool_buff_size(struct buff_pool *pool)
+{
+	return pool->ops->buff_size(pool->pool);
+}
+
+unsigned int bpool_total_buff_size(struct buff_pool *pool)
+{
+	return pool->ops->total_buff_size(pool->pool);
+}
+
+unsigned int bpool_buff_headroom(struct buff_pool *pool)
+{
+	return pool->ops->buff_headroom(pool->pool);
+}
+
+unsigned int bpool_buff_truesize(struct buff_pool *pool)
+{
+	return pool->ops->buff_truesize(pool->pool);
+}
+
+void *bpool_buff_ptr(struct buff_pool *pool, unsigned long handle)
+{
+	return pool->ops->buff_ptr(pool->pool, handle);
+}
+
+int bpool_buff_convert_to_page(struct buff_pool *pool, unsigned long handle,
+			       struct page **pg, unsigned int *pg_off)
+{
+	return pool->ops->buff_convert_to_page(pool->pool, handle, pg, pg_off);
+}
+
+dma_addr_t bpool_buff_dma(struct buff_pool *pool,
+			  unsigned long handle)
+{
+	return pool->ops->buff_dma(pool->pool, handle);
+}
+
+void bpool_buff_dma_sync_cpu(struct buff_pool *pool,
+			     unsigned long handle,
+			     unsigned int off,
+			     unsigned int size)
+{
+	pool->ops->buff_dma_sync_cpu(pool->pool, handle, off, size);
+}
+
+void bpool_buff_dma_sync_dev(struct buff_pool *pool,
+			     unsigned long handle,
+			     unsigned int off,
+			     unsigned int size)
+{
+	pool->ops->buff_dma_sync_dev(pool->pool, handle, off, size);
+}
+
+/* Naive, non-recycling allocator. */
+
+struct i40e_bp_pool {
+	struct device *dev;
+};
+
+struct i40e_bp_header {
+	dma_addr_t dma;
+};
+
+#define I40E_BPHDR_ALIGNED_SIZE ALIGN(sizeof(struct i40e_bp_header),	\
+				     SMP_CACHE_BYTES)
+
+static int i40e_bp_alloc(void *pool, unsigned long *handle)
+{
+	struct i40e_bp_pool *impl = (struct i40e_bp_pool *)pool;
+	struct i40e_bp_header *hdr;
+	struct page *pg;
+	dma_addr_t dma;
+
+	pg = dev_alloc_pages(0);
+	if (unlikely(!pg))
+		return -ENOMEM;
+
+	dma = dma_map_page_attrs(impl->dev, pg, 0,
+				 PAGE_SIZE,
+				 DMA_FROM_DEVICE,
+				 I40E_RX_DMA_ATTR);
+
+	if (dma_mapping_error(impl->dev, dma)) {
+		__free_pages(pg, 0);
+		return -ENOMEM;
+	}
+
+	hdr = (struct i40e_bp_header *)page_address(pg);
+	hdr->dma = dma;
+
+	*handle = (unsigned long)(((void *)hdr) + I40E_BPHDR_ALIGNED_SIZE);
+
+	return 0;
+}
+
+static void i40e_bp_free(void *pool, unsigned long handle)
+{
+	struct i40e_bp_pool *impl = (struct i40e_bp_pool *)pool;
+	struct i40e_bp_header *hdr;
+
+	hdr = (struct i40e_bp_header *)(handle & PAGE_MASK);
+
+	dma_unmap_page_attrs(impl->dev, hdr->dma, PAGE_SIZE,
+			     DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
+	page_frag_free(hdr);
+}
+
+static unsigned int i40e_bp_buff_size(void *pool)
+{
+	(void)pool;
+	return I40E_RXBUFFER_3072;
+}
+
+static unsigned int i40e_bp_total_buff_size(void *pool)
+{
+	(void)pool;
+	return PAGE_SIZE - I40E_BPHDR_ALIGNED_SIZE -
+		SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+}
+
+static unsigned int i40e_bp_buff_headroom(void *pool)
+{
+	(void)pool;
+	return PAGE_SIZE - I40E_BPHDR_ALIGNED_SIZE -
+		SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) -
+		I40E_RXBUFFER_3072;
+}
+
+static unsigned int i40e_bp_buff_truesize(void *pool)
+{
+	(void)pool;
+	return PAGE_SIZE;
+}
+
+static void *i40e_bp_buff_ptr(void *pool, unsigned long handle)
+{
+	return (void *)handle;
+}
+
+static int i40e_bp_buff_convert_to_page(void *pool,
+					unsigned long handle,
+					struct page **pg, unsigned int *pg_off)
+{
+	struct i40e_bp_pool *impl = (struct i40e_bp_pool *)pool;
+	struct i40e_bp_header *hdr;
+
+	hdr = (struct i40e_bp_header *)(handle & PAGE_MASK);
+
+	dma_unmap_page_attrs(impl->dev, hdr->dma, PAGE_SIZE,
+			     DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
+
+	*pg = virt_to_page(hdr);
+	*pg_off = I40E_BPHDR_ALIGNED_SIZE;
+
+	return 0;
+}
+
+static dma_addr_t i40e_bp_buff_dma(void *pool,
+				   unsigned long handle)
+{
+	struct i40e_bp_header *hdr;
+
+	hdr = (struct i40e_bp_header *)(handle & PAGE_MASK);
+
+	return hdr->dma + I40E_BPHDR_ALIGNED_SIZE;
+}
+
+static void i40e_bp_buff_dma_sync_cpu(void *pool,
+				      unsigned long handle,
+				      unsigned int off,
+				      unsigned int size)
+{
+	struct i40e_bp_pool *impl = (struct i40e_bp_pool *)pool;
+	struct i40e_bp_header *hdr;
+
+	off += I40E_BPHDR_ALIGNED_SIZE;
+
+	hdr = (struct i40e_bp_header *)(handle & PAGE_MASK);
+	dma_sync_single_range_for_cpu(impl->dev, hdr->dma, off, size,
+				      DMA_FROM_DEVICE);
+}
+
+static void i40e_bp_buff_dma_sync_dev(void *pool,
+				      unsigned long handle,
+				      unsigned int off,
+				      unsigned int size)
+{
+	struct i40e_bp_pool *impl = (struct i40e_bp_pool *)pool;
+	struct i40e_bp_header *hdr;
+
+	off += I40E_BPHDR_ALIGNED_SIZE;
+
+	hdr = (struct i40e_bp_header *)(handle & PAGE_MASK);
+	dma_sync_single_range_for_device(impl->dev, hdr->dma, off, size,
+					 DMA_FROM_DEVICE);
+}
+
+struct buff_pool *i40e_buff_pool_create(struct device *dev)
+{
+	struct i40e_bp_pool *pool_impl;
+	struct buff_pool_ops *pool_ops;
+	struct buff_pool *pool;
+
+	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+	if (!pool)
+		return NULL;
+
+	pool_impl = kzalloc(sizeof(*pool_impl), GFP_KERNEL);
+	if (!pool_impl) {
+		kfree(pool);
+		return NULL;
+	}
+
+	pool_ops = kzalloc(sizeof(*pool_ops), GFP_KERNEL);
+	if (!pool_ops) {
+		kfree(pool_impl);
+		kfree(pool);
+		return NULL;
+	}
+
+	pool_ops->alloc = i40e_bp_alloc;
+	pool_ops->free = i40e_bp_free;
+	pool_ops->buff_size = i40e_bp_buff_size;
+	pool_ops->total_buff_size = i40e_bp_total_buff_size;
+	pool_ops->buff_headroom = i40e_bp_buff_headroom;
+	pool_ops->buff_truesize = i40e_bp_buff_truesize;
+	pool_ops->buff_ptr = i40e_bp_buff_ptr;
+	pool_ops->buff_convert_to_page = i40e_bp_buff_convert_to_page;
+	pool_ops->buff_dma = i40e_bp_buff_dma;
+	pool_ops->buff_dma_sync_cpu = i40e_bp_buff_dma_sync_cpu;
+	pool_ops->buff_dma_sync_dev = i40e_bp_buff_dma_sync_dev;
+
+	pool_impl->dev = dev;
+
+	pool->pool = pool_impl;
+	pool->ops = pool_ops;
+
+	return pool;
+}
+
+void i40e_buff_pool_destroy(struct buff_pool *pool)
+{
+	kfree(pool->ops);
+	kfree(pool->pool);
+	kfree(pool);
+}
+
diff --git a/drivers/net/ethernet/intel/i40e/buff_pool.h b/drivers/net/ethernet/intel/i40e/buff_pool.h
new file mode 100644
index 000000000000..933881e14ac0
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/buff_pool.h
@@ -0,0 +1,70 @@
+#ifndef BUFF_POOL_H_
+#define BUFF_POOL_H_
+
+#include <linux/types.h>
+
+struct page;
+struct device;
+
+struct buff_pool_ops;
+
+struct buff_pool {
+	void *pool;
+	struct buff_pool_ops *ops;
+};
+
+/* Allocates a new buffer from the pool */
+int bpool_alloc(struct buff_pool *pool, unsigned long *handle);
+
+/* Returns a buffer originating from the pool, back to the pool */
+void bpool_free(struct buff_pool *pool, unsigned long handle);
+
+/* Returns the size of the buffer, w/o headroom. This is what the pool
+ * creator passed to the constructor.
+ */
+unsigned int bpool_buff_size(struct buff_pool *pool);
+
+/* Returns the size of the buffer, plus additional headroom (if
+ * any).
+ */
+unsigned int bpool_total_buff_size(struct buff_pool *pool);
+
+/* Returns additional headroom (if any) */
+unsigned int bpool_buff_headroom(struct buff_pool *pool);
+
+/* Returns the truesize (as for skbuff) */
+unsigned int bpool_buff_truesize(struct buff_pool *pool);
+
+/* Returns the kernel virtual address to the handle. */
+void *bpool_buff_ptr(struct buff_pool *pool, unsigned long handle);
+
+/* Converts a handle to a page. After a successful call, the handle is
+ * stale and should not be used and should be considered
+ * freed. Callers need to manually clean up the returned page (using
+ * page_free).
+ */
+int bpool_buff_convert_to_page(struct buff_pool *pool, unsigned long handle,
+			       struct page **pg, unsigned int *pg_off);
+
+/* Returns the dma address of a buffer */
+dma_addr_t bpool_buff_dma(struct buff_pool *pool,
+			  unsigned long handle);
+
+/* DMA sync for CPU */
+void bpool_buff_dma_sync_cpu(struct buff_pool *pool,
+			     unsigned long handle,
+			     unsigned int off,
+			     unsigned int size);
+
+/* DMA sync for device */
+void bpool_buff_dma_sync_dev(struct buff_pool *pool,
+			     unsigned long handle,
+			     unsigned int off,
+			     unsigned int size);
+/* ---- */
+
+struct buff_pool *i40e_buff_pool_create(struct device *dev);
+void i40e_buff_pool_destroy(struct buff_pool *pool);
+
+#endif /* BUFF_POOL_H_ */
+
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 2f5bee713fef..505e4bea01fb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1647,7 +1647,6 @@ static int i40e_set_ringparam(struct net_device *netdev,
 			 */
 			rx_rings[i].next_to_use = 0;
 			rx_rings[i].next_to_clean = 0;
-			rx_rings[i].next_to_alloc = 0;
 			/* do a struct copy */
 			*vsi->rx_rings[i] = rx_rings[i];
 		}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 09efb9dd09f3..7e82b7c6c0b7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -39,6 +39,7 @@
  */
 #define CREATE_TRACE_POINTS
 #include "i40e_trace.h"
+#include "buff_pool.h"
 
 const char i40e_driver_name[] = "i40e";
 static const char i40e_driver_string[] =
@@ -3217,7 +3218,9 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
 	/* clear the context structure first */
 	memset(&rx_ctx, 0, sizeof(rx_ctx));
 
-	ring->rx_buf_len = vsi->rx_buf_len;
+	ring->bpool = i40e_buff_pool_create(ring->dev);
+	ring->rx_buf_hr = (u16)bpool_buff_headroom(ring->bpool);
+	ring->rx_buf_len = (u16)bpool_buff_size(ring->bpool);
 
 	rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len,
 				    BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));
@@ -3312,20 +3315,8 @@ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi)
 	int err = 0;
 	u16 i;
 
-	if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) {
-		vsi->max_frame = I40E_MAX_RXBUFFER;
-		vsi->rx_buf_len = I40E_RXBUFFER_2048;
-#if (PAGE_SIZE < 8192)
-	} else if (!I40E_2K_TOO_SMALL_WITH_PADDING &&
-		   (vsi->netdev->mtu <= ETH_DATA_LEN)) {
-		vsi->max_frame = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
-		vsi->rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
-#endif
-	} else {
-		vsi->max_frame = I40E_MAX_RXBUFFER;
-		vsi->rx_buf_len = (PAGE_SIZE < 8192) ? I40E_RXBUFFER_3072 :
-						       I40E_RXBUFFER_2048;
-	}
+	vsi->max_frame = I40E_MAX_RXBUFFER;
+	vsi->rx_buf_len = I40E_RXBUFFER_3072;
 
 	/* set up individual rings */
 	for (i = 0; i < vsi->num_queue_pairs && !err; i++)
@@ -11601,6 +11592,9 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi,
 	bool need_reset;
 	int i;
 
+	/* XXX What's the correct behavior here, when we can have
+	 * different different rx_buf_lens per ring?
+	 */
 	/* Don't allow frames that span over multiple buffers */
 	if (frame_size > vsi->rx_buf_len)
 		return -EINVAL;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index f0feae92a34a..aa29013acf0c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -31,6 +31,7 @@
 #include "i40e.h"
 #include "i40e_trace.h"
 #include "i40e_prototype.h"
+#include "buff_pool.h"
 
 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
 				u32 td_tag)
@@ -1090,32 +1091,6 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
 	return false;
 }
 
-/**
- * i40e_reuse_rx_page - page flip buffer and store it back on the ring
- * @rx_ring: rx descriptor ring to store buffers on
- * @old_buff: donor buffer to have page reused
- *
- * Synchronizes page for reuse by the adapter
- **/
-static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
-			       struct i40e_rx_buffer *old_buff)
-{
-	struct i40e_rx_buffer *new_buff;
-	u16 nta = rx_ring->next_to_alloc;
-
-	new_buff = &rx_ring->rx_bi[nta];
-
-	/* update, and store next to alloc */
-	nta++;
-	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
-
-	/* transfer page from old buffer to new buffer */
-	new_buff->dma		= old_buff->dma;
-	new_buff->page		= old_buff->page;
-	new_buff->page_offset	= old_buff->page_offset;
-	new_buff->pagecnt_bias	= old_buff->pagecnt_bias;
-}
-
 /**
  * i40e_rx_is_programming_status - check for programming status descriptor
  * @qw: qword representing status_error_len in CPU ordering
@@ -1161,12 +1136,8 @@ static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
 
 	prefetch(I40E_RX_DESC(rx_ring, ntc));
 
-	/* place unused page back on the ring */
-	i40e_reuse_rx_page(rx_ring, rx_buffer);
-	rx_ring->rx_stats.page_reuse_count++;
-
-	/* clear contents of buffer_info */
-	rx_buffer->page = NULL;
+	bpool_free(rx_ring->bpool, rx_buffer->handle);
+	rx_buffer->handle = 0;
 
 	id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
 		  I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
@@ -1246,28 +1217,17 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
 	for (i = 0; i < rx_ring->count; i++) {
 		struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i];
 
-		if (!rx_bi->page)
+		if (!rx_bi->handle)
 			continue;
 
 		/* Invalidate cache lines that may have been written to by
 		 * device so that we avoid corrupting memory.
 		 */
-		dma_sync_single_range_for_cpu(rx_ring->dev,
-					      rx_bi->dma,
-					      rx_bi->page_offset,
-					      rx_ring->rx_buf_len,
-					      DMA_FROM_DEVICE);
-
-		/* free resources associated with mapping */
-		dma_unmap_page_attrs(rx_ring->dev, rx_bi->dma,
-				     i40e_rx_pg_size(rx_ring),
-				     DMA_FROM_DEVICE,
-				     I40E_RX_DMA_ATTR);
+		bpool_buff_dma_sync_cpu(rx_ring->bpool, rx_bi->handle, 0,
+					rx_ring->rx_buf_len);
 
-		__page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias);
-
-		rx_bi->page = NULL;
-		rx_bi->page_offset = 0;
+		bpool_free(rx_ring->bpool, rx_bi->handle);
+		rx_bi->handle = 0;
 	}
 
 	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
@@ -1276,7 +1236,6 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
 	/* Zero out the descriptor ring */
 	memset(rx_ring->desc, 0, rx_ring->size);
 
-	rx_ring->next_to_alloc = 0;
 	rx_ring->next_to_clean = 0;
 	rx_ring->next_to_use = 0;
 }
@@ -1296,6 +1255,9 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring)
 	kfree(rx_ring->rx_bi);
 	rx_ring->rx_bi = NULL;
 
+	i40e_buff_pool_destroy(rx_ring->bpool);
+	rx_ring->bpool = NULL;
+
 	if (rx_ring->desc) {
 		dma_free_coherent(rx_ring->dev, rx_ring->size,
 				  rx_ring->desc, rx_ring->dma);
@@ -1336,7 +1298,6 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
 		goto err;
 	}
 
-	rx_ring->next_to_alloc = 0;
 	rx_ring->next_to_clean = 0;
 	rx_ring->next_to_use = 0;
 
@@ -1366,9 +1327,6 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
 {
 	rx_ring->next_to_use = val;
 
-	/* update next to alloc since we have filled the ring */
-	rx_ring->next_to_alloc = val;
-
 	/* Force memory writes to complete before letting h/w
 	 * know there are new descriptors to fetch.  (Only
 	 * applicable for weak-ordered memory model archs,
@@ -1378,17 +1336,6 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
 	writel(val, rx_ring->tail);
 }
 
-/**
- * i40e_rx_offset - Return expected offset into page to access data
- * @rx_ring: Ring we are requesting offset of
- *
- * Returns the offset value for ring into the data buffer.
- */
-static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
-{
-	return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
-}
-
 /**
  * i40e_alloc_mapped_page - recycle or make a new page
  * @rx_ring: ring to use
@@ -1400,43 +1347,14 @@ static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
 static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
 				   struct i40e_rx_buffer *bi)
 {
-	struct page *page = bi->page;
-	dma_addr_t dma;
-
-	/* since we are recycling buffers we should seldom need to alloc */
-	if (likely(page)) {
-		rx_ring->rx_stats.page_reuse_count++;
-		return true;
-	}
-
-	/* alloc new page for storage */
-	page = dev_alloc_pages(i40e_rx_pg_order(rx_ring));
-	if (unlikely(!page)) {
-		rx_ring->rx_stats.alloc_page_failed++;
-		return false;
-	}
-
-	/* map page for use */
-	dma = dma_map_page_attrs(rx_ring->dev, page, 0,
-				 i40e_rx_pg_size(rx_ring),
-				 DMA_FROM_DEVICE,
-				 I40E_RX_DMA_ATTR);
+	unsigned long handle;
+	int err;
 
-	/* if mapping failed free memory back to system since
-	 * there isn't much point in holding memory we can't use
-	 */
-	if (dma_mapping_error(rx_ring->dev, dma)) {
-		__free_pages(page, i40e_rx_pg_order(rx_ring));
-		rx_ring->rx_stats.alloc_page_failed++;
+	err = bpool_alloc(rx_ring->bpool, &handle);
+	if (err)
 		return false;
-	}
-
-	bi->dma = dma;
-	bi->page = page;
-	bi->page_offset = i40e_rx_offset(rx_ring);
 
-	page_ref_add(page, USHRT_MAX - 1);
-	bi->pagecnt_bias = USHRT_MAX;
+	bi->handle = handle;
 
 	return true;
 }
@@ -1480,19 +1398,19 @@ bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
 	bi = &rx_ring->rx_bi[ntu];
 
 	do {
+		unsigned int headroom;
+		dma_addr_t dma;
+
 		if (!i40e_alloc_mapped_page(rx_ring, bi))
 			goto no_buffers;
 
-		/* sync the buffer for use by the device */
-		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
-						 bi->page_offset,
-						 rx_ring->rx_buf_len,
-						 DMA_FROM_DEVICE);
+		dma = bpool_buff_dma(rx_ring->bpool, bi->handle);
+		headroom = rx_ring->rx_buf_hr;
 
-		/* Refresh the desc even if buffer_addrs didn't change
-		 * because each write-back erases this info.
-		 */
-		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
+		bpool_buff_dma_sync_dev(rx_ring->bpool, bi->handle,
+					headroom, rx_ring->rx_buf_len);
+
+		rx_desc->read.pkt_addr = cpu_to_le64(dma + headroom);
 
 		rx_desc++;
 		bi++;
@@ -1738,78 +1656,6 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb,
 	return false;
 }
 
-/**
- * i40e_page_is_reusable - check if any reuse is possible
- * @page: page struct to check
- *
- * A page is not reusable if it was allocated under low memory
- * conditions, or it's not in the same NUMA node as this CPU.
- */
-static inline bool i40e_page_is_reusable(struct page *page)
-{
-	return (page_to_nid(page) == numa_mem_id()) &&
-		!page_is_pfmemalloc(page);
-}
-
-/**
- * i40e_can_reuse_rx_page - Determine if this page can be reused by
- * the adapter for another receive
- *
- * @rx_buffer: buffer containing the page
- *
- * If page is reusable, rx_buffer->page_offset is adjusted to point to
- * an unused region in the page.
- *
- * For small pages, @truesize will be a constant value, half the size
- * of the memory at page.  We'll attempt to alternate between high and
- * low halves of the page, with one half ready for use by the hardware
- * and the other half being consumed by the stack.  We use the page
- * ref count to determine whether the stack has finished consuming the
- * portion of this page that was passed up with a previous packet.  If
- * the page ref count is >1, we'll assume the "other" half page is
- * still busy, and this page cannot be reused.
- *
- * For larger pages, @truesize will be the actual space used by the
- * received packet (adjusted upward to an even multiple of the cache
- * line size).  This will advance through the page by the amount
- * actually consumed by the received packets while there is still
- * space for a buffer.  Each region of larger pages will be used at
- * most once, after which the page will not be reused.
- *
- * In either case, if the page is reusable its refcount is increased.
- **/
-static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer)
-{
-	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
-	struct page *page = rx_buffer->page;
-
-	/* Is any reuse possible? */
-	if (unlikely(!i40e_page_is_reusable(page)))
-		return false;
-
-#if (PAGE_SIZE < 8192)
-	/* if we are only owner of page we can reuse it */
-	if (unlikely((page_count(page) - pagecnt_bias) > 1))
-		return false;
-#else
-#define I40E_LAST_OFFSET \
-	(SKB_WITH_OVERHEAD(PAGE_SIZE) - I40E_RXBUFFER_2048)
-	if (rx_buffer->page_offset > I40E_LAST_OFFSET)
-		return false;
-#endif
-
-	/* If we have drained the page fragment pool we need to update
-	 * the pagecnt_bias and page count so that we fully restock the
-	 * number of references the driver holds.
-	 */
-	if (unlikely(pagecnt_bias == 1)) {
-		page_ref_add(page, USHRT_MAX - 1);
-		rx_buffer->pagecnt_bias = USHRT_MAX;
-	}
-
-	return true;
-}
-
 /**
  * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff
  * @rx_ring: rx descriptor ring to transact packets on
@@ -1823,25 +1669,24 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer)
  * The function will then update the page offset.
  **/
 static void i40e_add_rx_frag(struct i40e_ring *rx_ring,
-			     struct i40e_rx_buffer *rx_buffer,
 			     struct sk_buff *skb,
-			     unsigned int size)
+			     unsigned long handle,
+			     unsigned int size,
+			     unsigned int headroom)
 {
-#if (PAGE_SIZE < 8192)
-	unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
-#else
-	unsigned int truesize = SKB_DATA_ALIGN(size + i40e_rx_offset(rx_ring));
-#endif
+	unsigned int truesize = bpool_buff_truesize(rx_ring->bpool);
+	unsigned int pg_off;
+	struct page *pg;
+	int err;
 
-	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
-			rx_buffer->page_offset, size, truesize);
+	err = bpool_buff_convert_to_page(rx_ring->bpool, handle, &pg, &pg_off);
+	if (err) {
+		bpool_free(rx_ring->bpool, handle);
+		return;
+	}
 
-	/* page is being used so we must update the page offset */
-#if (PAGE_SIZE < 8192)
-	rx_buffer->page_offset ^= truesize;
-#else
-	rx_buffer->page_offset += truesize;
-#endif
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, pg, pg_off + headroom,
+			size, truesize);
 }
 
 /**
@@ -1853,22 +1698,16 @@ static void i40e_add_rx_frag(struct i40e_ring *rx_ring,
  * for use by the CPU.
  */
 static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
-						 const unsigned int size)
+						 unsigned long *handle,
+						 const unsigned int size,
+						 unsigned int *headroom)
 {
 	struct i40e_rx_buffer *rx_buffer;
 
 	rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
-	prefetchw(rx_buffer->page);
-
-	/* we are reusing so sync this buffer for CPU use */
-	dma_sync_single_range_for_cpu(rx_ring->dev,
-				      rx_buffer->dma,
-				      rx_buffer->page_offset,
-				      size,
-				      DMA_FROM_DEVICE);
-
-	/* We have pulled a buffer for use, so decrement pagecnt_bias */
-	rx_buffer->pagecnt_bias--;
+	*handle = rx_buffer->handle;
+	*headroom = rx_ring->rx_buf_hr;
+	bpool_buff_dma_sync_cpu(rx_ring->bpool, *handle, *headroom, size);
 
 	return rx_buffer;
 }
@@ -1884,56 +1723,56 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
  * skb correctly.
  */
 static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
-					  struct i40e_rx_buffer *rx_buffer,
-					  struct xdp_buff *xdp)
+					  unsigned long handle,
+					  unsigned int size,
+					  unsigned int headroom)
 {
-	unsigned int size = xdp->data_end - xdp->data;
-#if (PAGE_SIZE < 8192)
-	unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
-#else
-	unsigned int truesize = SKB_DATA_ALIGN(size);
-#endif
-	unsigned int headlen;
+	unsigned int truesize = bpool_buff_truesize(rx_ring->bpool);
+	unsigned int pg_off, headlen;
 	struct sk_buff *skb;
+	struct page *pg;
+	void *data;
+	int err;
 
+	data = bpool_buff_ptr(rx_ring->bpool, handle) + headroom;
 	/* prefetch first cache line of first page */
-	prefetch(xdp->data);
+	prefetch(data);
 #if L1_CACHE_BYTES < 128
-	prefetch(xdp->data + L1_CACHE_BYTES);
+	prefetch(data + L1_CACHE_BYTES);
 #endif
 
 	/* allocate a skb to store the frags */
 	skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
 			       I40E_RX_HDR_SIZE,
 			       GFP_ATOMIC | __GFP_NOWARN);
-	if (unlikely(!skb))
+	if (unlikely(!skb)) {
+		bpool_free(rx_ring->bpool, handle);
 		return NULL;
+	}
 
 	/* Determine available headroom for copy */
 	headlen = size;
 	if (headlen > I40E_RX_HDR_SIZE)
-		headlen = eth_get_headlen(xdp->data, I40E_RX_HDR_SIZE);
+		headlen = eth_get_headlen(data, I40E_RX_HDR_SIZE);
 
 	/* align pull length to size of long to optimize memcpy performance */
-	memcpy(__skb_put(skb, headlen), xdp->data,
-	       ALIGN(headlen, sizeof(long)));
+	memcpy(__skb_put(skb, headlen), data, ALIGN(headlen, sizeof(long)));
 
 	/* update all of the pointers */
 	size -= headlen;
 	if (size) {
-		skb_add_rx_frag(skb, 0, rx_buffer->page,
-				rx_buffer->page_offset + headlen,
-				size, truesize);
-
-		/* buffer is used by skb, update page_offset */
-#if (PAGE_SIZE < 8192)
-		rx_buffer->page_offset ^= truesize;
-#else
-		rx_buffer->page_offset += truesize;
-#endif
+		err = bpool_buff_convert_to_page(rx_ring->bpool, handle, &pg,
+						 &pg_off);
+		if (err) {
+			dev_kfree_skb(skb);
+			bpool_free(rx_ring->bpool, handle);
+			return NULL;
+		}
+
+		skb_add_rx_frag(skb, 0, pg, pg_off + headroom + headlen, size,
+				truesize);
 	} else {
-		/* buffer is unused, reset bias back to rx_buffer */
-		rx_buffer->pagecnt_bias++;
+		bpool_free(rx_ring->bpool, handle);
 	}
 
 	return skb;
@@ -1949,70 +1788,45 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
  * to set up the skb correctly and avoid any memcpy overhead.
  */
 static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
-				      struct i40e_rx_buffer *rx_buffer,
-				      struct xdp_buff *xdp)
+				      unsigned long handle,
+				      unsigned int size,
+				      unsigned int headroom)
 {
-	unsigned int size = xdp->data_end - xdp->data;
-#if (PAGE_SIZE < 8192)
-	unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
-#else
-	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
-				SKB_DATA_ALIGN(I40E_SKB_PAD + size);
-#endif
+	void *data, *data_hard_start;
 	struct sk_buff *skb;
+	unsigned int frag_size, pg_off;
+	struct page *pg;
+	int err;
+
+	err = bpool_buff_convert_to_page(rx_ring->bpool, handle, &pg, &pg_off);
+	if (err) {
+		bpool_free(rx_ring->bpool, handle);
+		return NULL;
+	}
 
+	frag_size = bpool_total_buff_size(rx_ring->bpool) +
+		    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+	data_hard_start = page_address(pg) + pg_off;
+	data = data_hard_start + headroom;
 	/* prefetch first cache line of first page */
-	prefetch(xdp->data);
+	prefetch(data);
 #if L1_CACHE_BYTES < 128
-	prefetch(xdp->data + L1_CACHE_BYTES);
+	prefetch(data + L1_CACHE_BYTES);
 #endif
 	/* build an skb around the page buffer */
-	skb = build_skb(xdp->data_hard_start, truesize);
-	if (unlikely(!skb))
+	skb = build_skb(data_hard_start, frag_size);
+	if (unlikely(!skb)) {
+		page_frag_free(data);
 		return NULL;
+	}
 
 	/* update pointers within the skb to store the data */
-	skb_reserve(skb, I40E_SKB_PAD);
+	skb_reserve(skb, headroom);
 	__skb_put(skb, size);
 
-	/* buffer is used by skb, update page_offset */
-#if (PAGE_SIZE < 8192)
-	rx_buffer->page_offset ^= truesize;
-#else
-	rx_buffer->page_offset += truesize;
-#endif
-
 	return skb;
 }
 
-/**
- * i40e_put_rx_buffer - Clean up used buffer and either recycle or free
- * @rx_ring: rx descriptor ring to transact packets on
- * @rx_buffer: rx buffer to pull data from
- *
- * This function will clean up the contents of the rx_buffer.  It will
- * either recycle the bufer or unmap it and free the associated resources.
- */
-static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
-			       struct i40e_rx_buffer *rx_buffer)
-{
-	if (i40e_can_reuse_rx_page(rx_buffer)) {
-		/* hand second half of page back to the ring */
-		i40e_reuse_rx_page(rx_ring, rx_buffer);
-		rx_ring->rx_stats.page_reuse_count++;
-	} else {
-		/* we are not reusing the buffer so unmap it */
-		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
-				     i40e_rx_pg_size(rx_ring),
-				     DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
-		__page_frag_cache_drain(rx_buffer->page,
-					rx_buffer->pagecnt_bias);
-	}
-
-	/* clear contents of buffer_info */
-	rx_buffer->page = NULL;
-}
-
 /**
  * i40e_is_non_eop - process handling of non-EOP buffers
  * @rx_ring: Rx ring being processed
@@ -2053,17 +1867,43 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring,
 static int i40e_xmit_xdp_ring(struct xdp_buff *xdp,
 			      struct i40e_ring *xdp_ring);
 
+static int i40e_xdp_buff_convert_page(struct i40e_ring *rx_ring,
+				      struct xdp_buff *xdp,
+				      unsigned long handle,
+				      unsigned int size,
+				      unsigned int headroom)
+{
+	unsigned int pg_off;
+	struct page *pg;
+	int err;
+
+	err = bpool_buff_convert_to_page(rx_ring->bpool, handle, &pg, &pg_off);
+	if (err)
+		return err;
+
+	xdp->data_hard_start = page_address(pg) + pg_off;
+	xdp->data = xdp->data_hard_start + headroom;
+	xdp_set_data_meta_invalid(xdp);
+	xdp->data_end = xdp->data + size;
+	xdp->rxq = &rx_ring->xdp_rxq;
+
+	return 0;
+}
+
 /**
  * i40e_run_xdp - run an XDP program
  * @rx_ring: Rx ring being processed
  * @xdp: XDP buffer containing the frame
  **/
 static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
-				    struct xdp_buff *xdp)
+				    unsigned long handle,
+				    unsigned int *size,
+				    unsigned int *headroom)
 {
 	int err, result = I40E_XDP_PASS;
 	struct i40e_ring *xdp_ring;
 	struct bpf_prog *xdp_prog;
+	struct xdp_buff xdp;
 	u32 act;
 
 	rcu_read_lock();
@@ -2072,20 +1912,47 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
 	if (!xdp_prog)
 		goto xdp_out;
 
-	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	xdp.data_hard_start = bpool_buff_ptr(rx_ring->bpool, handle);
+	xdp.data = xdp.data_hard_start + *headroom;
+	xdp_set_data_meta_invalid(&xdp);
+	xdp.data_end = xdp.data + *size;
+	xdp.rxq = &rx_ring->xdp_rxq;
+
+	act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+	*headroom = xdp.data - xdp.data_hard_start;
+	*size = xdp.data_end - xdp.data;
+
 	switch (act) {
 	case XDP_PASS:
 		break;
 	case XDP_TX:
+		err = i40e_xdp_buff_convert_page(rx_ring, &xdp, handle, *size,
+						 *headroom);
+		if (err) {
+			result = I40E_XDP_CONSUMED;
+			break;
+		}
+
 		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
-		result = i40e_xmit_xdp_ring(xdp, xdp_ring);
+		result = i40e_xmit_xdp_ring(&xdp, xdp_ring);
+		if (result == I40E_XDP_CONSUMED) {
+			page_frag_free(xdp.data);
+			result = I40E_XDP_TX; /* Hmm, here we bump the tail unnecessary, but better flow... */
+		}
 		break;
 	case XDP_REDIRECT:
-		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
-		if (!err)
-			result = I40E_XDP_TX;
-		else
+		err = i40e_xdp_buff_convert_page(rx_ring, &xdp, handle, *size,
+						 *headroom);
+		if (err) {
 			result = I40E_XDP_CONSUMED;
+			break;
+		}
+
+		err = xdp_do_redirect(rx_ring->netdev, &xdp, xdp_prog);
+		result = I40E_XDP_TX;
+		if (err)
+			page_frag_free(xdp.data);
 		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
@@ -2101,27 +1968,6 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
 	return ERR_PTR(-result);
 }
 
-/**
- * i40e_rx_buffer_flip - adjusted rx_buffer to point to an unused region
- * @rx_ring: Rx ring
- * @rx_buffer: Rx buffer to adjust
- * @size: Size of adjustment
- **/
-static void i40e_rx_buffer_flip(struct i40e_ring *rx_ring,
-				struct i40e_rx_buffer *rx_buffer,
-				unsigned int size)
-{
-#if (PAGE_SIZE < 8192)
-	unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
-
-	rx_buffer->page_offset ^= truesize;
-#else
-	unsigned int truesize = SKB_DATA_ALIGN(i40e_rx_offset(rx_ring) + size);
-
-	rx_buffer->page_offset += truesize;
-#endif
-}
-
 static inline void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring)
 {
 	/* Force memory writes to complete before letting h/w
@@ -2150,14 +1996,12 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	struct sk_buff *skb = rx_ring->skb;
 	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
 	bool failure = false, xdp_xmit = false;
-	struct xdp_buff xdp;
-
-	xdp.rxq = &rx_ring->xdp_rxq;
 
 	while (likely(total_rx_packets < (unsigned int)budget)) {
 		struct i40e_rx_buffer *rx_buffer;
 		union i40e_rx_desc *rx_desc;
-		unsigned int size;
+		unsigned int size, headroom;
+		unsigned long handle;
 		u16 vlan_tag;
 		u8 rx_ptype;
 		u64 qword;
@@ -2195,45 +2039,35 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 			break;
 
 		i40e_trace(clean_rx_irq, rx_ring, rx_desc, skb);
-		rx_buffer = i40e_get_rx_buffer(rx_ring, size);
+		rx_buffer = i40e_get_rx_buffer(rx_ring, &handle, size,
+					       &headroom);
 
 		/* retrieve a buffer from the ring */
-		if (!skb) {
-			xdp.data = page_address(rx_buffer->page) +
-				   rx_buffer->page_offset;
-			xdp_set_data_meta_invalid(&xdp);
-			xdp.data_hard_start = xdp.data -
-					      i40e_rx_offset(rx_ring);
-			xdp.data_end = xdp.data + size;
-
-			skb = i40e_run_xdp(rx_ring, &xdp);
-		}
+		if (!skb)
+			skb = i40e_run_xdp(rx_ring, handle, &size, &headroom);
 
 		if (IS_ERR(skb)) {
-			if (PTR_ERR(skb) == -I40E_XDP_TX) {
+			if (PTR_ERR(skb) == -I40E_XDP_TX)
 				xdp_xmit = true;
-				i40e_rx_buffer_flip(rx_ring, rx_buffer, size);
-			} else {
-				rx_buffer->pagecnt_bias++;
-			}
+			else
+				bpool_free(rx_ring->bpool, handle);
 			total_rx_bytes += size;
 			total_rx_packets++;
 		} else if (skb) {
-			i40e_add_rx_frag(rx_ring, rx_buffer, skb, size);
+			i40e_add_rx_frag(rx_ring, skb, handle, size, headroom);
 		} else if (ring_uses_build_skb(rx_ring)) {
-			skb = i40e_build_skb(rx_ring, rx_buffer, &xdp);
+			skb = i40e_build_skb(rx_ring, handle, size, headroom);
 		} else {
-			skb = i40e_construct_skb(rx_ring, rx_buffer, &xdp);
+			skb = i40e_construct_skb(rx_ring, handle, size,
+						 headroom);
 		}
 
+		rx_buffer->handle = 0;
+
 		/* exit if we failed to retrieve a buffer */
-		if (!skb) {
-			rx_ring->rx_stats.alloc_buff_failed++;
-			rx_buffer->pagecnt_bias++;
+		if (!skb)
 			break;
-		}
 
-		i40e_put_rx_buffer(rx_ring, rx_buffer);
 		cleaned_count++;
 
 		if (i40e_is_non_eop(rx_ring, rx_desc, skb))
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index d149ebb8330c..d8345265db1e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -140,58 +140,6 @@ enum i40e_dyn_idx_t {
 #define I40E_RX_DMA_ATTR \
 	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
 
-/* Attempt to maximize the headroom available for incoming frames.  We
- * use a 2K buffer for receives and need 1536/1534 to store the data for
- * the frame.  This leaves us with 512 bytes of room.  From that we need
- * to deduct the space needed for the shared info and the padding needed
- * to IP align the frame.
- *
- * Note: For cache line sizes 256 or larger this value is going to end
- *	 up negative.  In these cases we should fall back to the legacy
- *	 receive path.
- */
-#if (PAGE_SIZE < 8192)
-#define I40E_2K_TOO_SMALL_WITH_PADDING \
-((NET_SKB_PAD + I40E_RXBUFFER_1536) > SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048))
-
-static inline int i40e_compute_pad(int rx_buf_len)
-{
-	int page_size, pad_size;
-
-	page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
-	pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len;
-
-	return pad_size;
-}
-
-static inline int i40e_skb_pad(void)
-{
-	int rx_buf_len;
-
-	/* If a 2K buffer cannot handle a standard Ethernet frame then
-	 * optimize padding for a 3K buffer instead of a 1.5K buffer.
-	 *
-	 * For a 3K buffer we need to add enough padding to allow for
-	 * tailroom due to NET_IP_ALIGN possibly shifting us out of
-	 * cache-line alignment.
-	 */
-	if (I40E_2K_TOO_SMALL_WITH_PADDING)
-		rx_buf_len = I40E_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
-	else
-		rx_buf_len = I40E_RXBUFFER_1536;
-
-	/* if needed make room for NET_IP_ALIGN */
-	rx_buf_len -= NET_IP_ALIGN;
-
-	return i40e_compute_pad(rx_buf_len);
-}
-
-#define I40E_SKB_PAD i40e_skb_pad()
-#else
-#define I40E_2K_TOO_SMALL_WITH_PADDING false
-#define I40E_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
-#endif
-
 /**
  * i40e_test_staterr - tests bits in Rx descriptor status and error fields
  * @rx_desc: pointer to receive descriptor (in le64 format)
@@ -312,14 +260,7 @@ struct i40e_tx_buffer {
 };
 
 struct i40e_rx_buffer {
-	dma_addr_t dma;
-	struct page *page;
-#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
-	__u32 page_offset;
-#else
-	__u16 page_offset;
-#endif
-	__u16 pagecnt_bias;
+	unsigned long handle;
 };
 
 struct i40e_queue_stats {
@@ -387,6 +328,7 @@ struct i40e_ring {
 
 	u16 count;			/* Number of descriptors */
 	u16 reg_idx;			/* HW register index of the ring */
+	u16 rx_buf_hr;
 	u16 rx_buf_len;
 
 	/* used in interrupt processing */
@@ -420,7 +362,6 @@ struct i40e_ring {
 	struct i40e_q_vector *q_vector;	/* Backreference to associated vector */
 
 	struct rcu_head rcu;		/* to avoid race on free */
-	u16 next_to_alloc;
 	struct sk_buff *skb;		/* When i40e_clean_rx_ring_irq() must
 					 * return before it sees the EOP for
 					 * the current packet, we save that skb
@@ -432,6 +373,7 @@ struct i40e_ring {
 
 	struct i40e_channel *ch;
 	struct xdp_rxq_info xdp_rxq;
+	struct buff_pool *bpool;
 } ____cacheline_internodealigned_in_smp;
 
 static inline bool ring_uses_build_skb(struct i40e_ring *ring)
-- 
2.14.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ