lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230203210016.36606-5-shannon.nelson@amd.com>
Date:   Fri, 3 Feb 2023 13:00:16 -0800
From:   Shannon Nelson <shannon.nelson@....com>
To:     <netdev@...r.kernel.org>, <davem@...emloft.net>, <kuba@...nel.org>
CC:     <drivers@...sando.io>, Neel Patel <neel.patel@....com>,
        Shannon Nelson <shannon.nelson@....com>
Subject: [PATCH net-next 4/4] ionic: page cache for rx buffers

From: Neel Patel <neel.patel@....com>

Use a local page cache to optimize page allocation & dma mapping.

Signed-off-by: Neel Patel <neel.patel@....com>
Signed-off-by: Shannon Nelson <shannon.nelson@....com>
---
 .../net/ethernet/pensando/ionic/ionic_dev.h   |  16 +-
 .../net/ethernet/pensando/ionic/ionic_lif.h   |   8 +
 .../net/ethernet/pensando/ionic/ionic_stats.c |   8 +
 .../net/ethernet/pensando/ionic/ionic_txrx.c  | 296 ++++++++++++------
 4 files changed, 222 insertions(+), 106 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index a4a8802f3771..02c1bb9eb32f 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -15,7 +15,7 @@
 #define IONIC_MAX_RX_DESC		16384
 #define IONIC_MIN_TXRX_DESC		64
 #define IONIC_DEF_TXRX_DESC		4096
-#define IONIC_RX_FILL_THRESHOLD		16
+#define IONIC_RX_FILL_THRESHOLD		64
 #define IONIC_RX_FILL_DIV		8
 #define IONIC_LIFS_MAX			1024
 #define IONIC_WATCHDOG_SECS		5
@@ -181,8 +181,9 @@ typedef void (*ionic_desc_cb)(struct ionic_queue *q,
 			      struct ionic_desc_info *desc_info,
 			      struct ionic_cq_info *cq_info, void *cb_arg);
 
-#define IONIC_PAGE_SIZE				PAGE_SIZE
-#define IONIC_PAGE_SPLIT_SZ			(PAGE_SIZE / 2)
+#define IONIC_PAGE_ORDER			0
+#define IONIC_PAGE_SIZE				(PAGE_SIZE << IONIC_PAGE_ORDER)
+#define IONIC_PAGE_SPLIT_SZ			(PAGE_SIZE / 4)
 #define IONIC_PAGE_GFP_MASK			(GFP_ATOMIC | __GFP_NOWARN |\
 						 __GFP_COMP | __GFP_MEMALLOC)
 
@@ -193,6 +194,14 @@ struct ionic_buf_info {
 	u32 len;
 };
 
+#define IONIC_PAGE_CACHE_SIZE          2048
+
+struct ionic_page_cache {
+	u32 head;
+	u32 tail;
+	struct ionic_buf_info ring[IONIC_PAGE_CACHE_SIZE];
+} ____cacheline_aligned_in_smp;
+
 #define IONIC_MAX_FRAGS			(1 + IONIC_TX_MAX_SG_ELEMS_V1)
 
 struct ionic_desc_info {
@@ -251,6 +260,7 @@ struct ionic_queue {
 	unsigned int desc_size;
 	unsigned int sg_desc_size;
 	unsigned int pid;
+	struct ionic_page_cache page_cache;
 	char name[IONIC_QUEUE_NAME_MAX_SZ];
 } ____cacheline_aligned_in_smp;
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index 5425a8983ae0..892462b07e40 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -51,6 +51,14 @@ struct ionic_rx_stats {
 	u64 alloc_err;
 	u64 hwstamp_valid;
 	u64 hwstamp_invalid;
+	u64 cache_full;
+	u64 cache_empty;
+	u64 cache_busy;
+	u64 cache_get;
+	u64 cache_put;
+	u64 buf_reused;
+	u64 buf_exhausted;
+	u64 buf_not_reusable;
 };
 
 #define IONIC_QCQ_F_INITED		BIT(0)
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
index 9859a4432985..5c3dc6a4aff4 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_stats.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
@@ -149,6 +149,14 @@ static const struct ionic_stat_desc ionic_rx_stats_desc[] = {
 	IONIC_RX_STAT_DESC(hwstamp_invalid),
 	IONIC_RX_STAT_DESC(dropped),
 	IONIC_RX_STAT_DESC(vlan_stripped),
+	IONIC_RX_STAT_DESC(cache_full),
+	IONIC_RX_STAT_DESC(cache_empty),
+	IONIC_RX_STAT_DESC(cache_busy),
+	IONIC_RX_STAT_DESC(cache_get),
+	IONIC_RX_STAT_DESC(cache_put),
+	IONIC_RX_STAT_DESC(buf_exhausted),
+	IONIC_RX_STAT_DESC(buf_not_reusable),
+	IONIC_RX_STAT_DESC(buf_reused),
 };
 
 #define IONIC_NUM_LIF_STATS ARRAY_SIZE(ionic_lif_stats_desc)
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 0cb464931d3d..bd4f8873edc9 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -5,6 +5,7 @@
 #include <linux/ipv6.h>
 #include <linux/if_vlan.h>
 #include <net/ip6_checksum.h>
+#include <linux/skbuff.h>
 
 #include "ionic.h"
 #include "ionic_lif.h"
@@ -27,14 +28,143 @@ static inline struct netdev_queue *q_to_ndq(struct ionic_queue *q)
 	return netdev_get_tx_queue(q->lif->netdev, q->index);
 }
 
-static int ionic_rx_page_alloc(struct ionic_queue *q,
+static void *ionic_rx_buf_va(struct ionic_buf_info *buf_info)
+{
+	return page_address(buf_info->page) + buf_info->page_offset;
+}
+
+static dma_addr_t ionic_rx_buf_pa(struct ionic_buf_info *buf_info)
+{
+	return buf_info->dma_addr + buf_info->page_offset;
+}
+
+static unsigned int ionic_rx_buf_size(struct ionic_buf_info *buf_info)
+{
+	return IONIC_PAGE_SIZE - buf_info->page_offset;
+}
+
+static bool ionic_rx_cache_put(struct ionic_queue *q,
+			       struct ionic_buf_info *buf_info)
+{
+	struct ionic_page_cache *cache = &q->page_cache;
+	struct ionic_rx_stats *stats = q_to_rx_stats(q);
+	u32 tail_next;
+
+	tail_next = (cache->tail + 1) & (IONIC_PAGE_CACHE_SIZE - 1);
+	if (tail_next == cache->head) {
+		stats->cache_full++;
+		return false;
+	}
+
+	get_page(buf_info->page);
+
+	cache->ring[cache->tail] = *buf_info;
+	cache->tail = tail_next;
+	stats->cache_put++;
+
+	return true;
+}
+
+static bool ionic_rx_cache_get(struct ionic_queue *q,
 			       struct ionic_buf_info *buf_info)
+{
+	struct ionic_page_cache *cache = &q->page_cache;
+	struct ionic_rx_stats *stats = q_to_rx_stats(q);
+
+	if (unlikely(cache->head == cache->tail)) {
+		stats->cache_empty++;
+		return false;
+	}
+
+	if (page_ref_count(cache->ring[cache->head].page) != 1) {
+		stats->cache_busy++;
+		return false;
+	}
+
+	*buf_info = cache->ring[cache->head];
+	cache->head = (cache->head + 1) & (IONIC_PAGE_CACHE_SIZE - 1);
+	stats->cache_get++;
+
+	dma_sync_single_for_device(q->dev, buf_info->dma_addr,
+				   IONIC_PAGE_SIZE,
+				   DMA_FROM_DEVICE);
+
+	return true;
+}
+
+static void ionic_rx_cache_drain(struct ionic_queue *q)
+{
+	struct ionic_page_cache *cache = &q->page_cache;
+	struct ionic_rx_stats *stats = q_to_rx_stats(q);
+	struct ionic_buf_info *buf_info;
+
+	while (cache->head != cache->tail) {
+		buf_info = &cache->ring[cache->head];
+		dma_unmap_page(q->dev, buf_info->dma_addr, IONIC_PAGE_SIZE,
+			       DMA_FROM_DEVICE);
+		put_page(buf_info->page);
+		cache->head = (cache->head + 1) & (IONIC_PAGE_CACHE_SIZE - 1);
+	}
+
+	cache->head = 0;
+	cache->tail = 0;
+	stats->cache_empty = 0;
+	stats->cache_busy = 0;
+	stats->cache_get = 0;
+	stats->cache_put = 0;
+	stats->cache_full = 0;
+}
+
+static bool ionic_rx_buf_reuse(struct ionic_queue *q,
+			       struct ionic_buf_info *buf_info, u32 used)
+{
+	struct ionic_rx_stats *stats = q_to_rx_stats(q);
+	u32 size;
+
+	if (!dev_page_is_reusable(buf_info->page)) {
+		stats->buf_not_reusable++;
+		return false;
+	}
+
+	size = ALIGN(used, IONIC_PAGE_SPLIT_SZ);
+	buf_info->page_offset += size;
+	if (buf_info->page_offset >= IONIC_PAGE_SIZE) {
+		buf_info->page_offset = 0;
+		stats->buf_exhausted++;
+		return false;
+	}
+
+	stats->buf_reused++;
+
+	get_page(buf_info->page);
+
+	return true;
+}
+
+static void ionic_rx_buf_complete(struct ionic_queue *q,
+				  struct ionic_buf_info *buf_info, u32 used)
+{
+	if (ionic_rx_buf_reuse(q, buf_info, used))
+		return;
+
+	if (!ionic_rx_cache_put(q, buf_info))
+		dma_unmap_page_attrs(q->dev, buf_info->dma_addr, IONIC_PAGE_SIZE,
+				     DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+
+	buf_info->page = NULL;
+}
+
+static inline int ionic_rx_page_alloc(struct ionic_queue *q,
+				      struct ionic_buf_info *buf_info)
 {
 	struct net_device *netdev = q->lif->netdev;
 	struct ionic_rx_stats *stats;
 	struct device *dev;
 	struct page *page;
 
+	if (ionic_rx_cache_get(q, buf_info))
+		return 0;
+
 	dev = q->dev;
 	stats = q_to_rx_stats(q);
 
@@ -44,7 +174,7 @@ static int ionic_rx_page_alloc(struct ionic_queue *q,
 		return -EINVAL;
 	}
 
-	page = alloc_pages(IONIC_PAGE_GFP_MASK, 0);
+	page = alloc_pages_node(dev_to_node(dev), IONIC_PAGE_GFP_MASK, IONIC_PAGE_ORDER);
 	if (unlikely(!page)) {
 		net_err_ratelimited("%s: %s page alloc failed\n",
 				    netdev->name, q->name);
@@ -55,7 +185,7 @@ static int ionic_rx_page_alloc(struct ionic_queue *q,
 	buf_info->dma_addr = dma_map_page(dev, page, 0,
 					  IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
 	if (unlikely(dma_mapping_error(dev, buf_info->dma_addr))) {
-		__free_pages(page, 0);
+		__free_pages(page, IONIC_PAGE_ORDER);
 		net_err_ratelimited("%s: %s dma map failed\n",
 				    netdev->name, q->name);
 		stats->dma_map_err++;
@@ -84,36 +214,30 @@ static void ionic_rx_page_free(struct ionic_queue *q,
 		return;
 
 	dma_unmap_page(dev, buf_info->dma_addr, IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
-	__free_pages(buf_info->page, 0);
+	__free_pages(buf_info->page, IONIC_PAGE_ORDER);
 	buf_info->page = NULL;
 }
 
-static bool ionic_rx_buf_recycle(struct ionic_queue *q,
-				 struct ionic_buf_info *buf_info, u32 used)
+static void ionic_rx_add_skb_frag(struct ionic_queue *q,
+				  struct sk_buff *skb,
+				  struct ionic_buf_info *buf_info,
+				  u32 off, u32 len)
 {
-	u32 size;
-
-	/* don't re-use pages allocated in low-mem condition */
-	if (page_is_pfmemalloc(buf_info->page))
-		return false;
-
-	/* don't re-use buffers from non-local numa nodes */
-	if (page_to_nid(buf_info->page) != numa_mem_id())
-		return false;
-
-	size = ALIGN(used, IONIC_PAGE_SPLIT_SZ);
-	buf_info->page_offset += size;
-	if (buf_info->page_offset >= IONIC_PAGE_SIZE)
-		return false;
+	dma_sync_single_for_cpu(q->dev,
+				ionic_rx_buf_pa(buf_info) + off,
+				len, DMA_FROM_DEVICE);
 
-	get_page(buf_info->page);
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+			buf_info->page, buf_info->page_offset + off,
+			len,
+			IONIC_PAGE_SIZE);
 
-	return true;
+	ionic_rx_buf_complete(q, buf_info, off + len);
 }
 
-static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
-				      struct ionic_desc_info *desc_info,
-				      struct ionic_rxq_comp *comp)
+static struct sk_buff *ionic_rx_build_skb(struct ionic_queue *q,
+					  struct ionic_desc_info *desc_info,
+					  struct ionic_rxq_comp *comp)
 {
 	struct net_device *netdev = q->lif->netdev;
 	struct ionic_buf_info *buf_info;
@@ -121,73 +245,24 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 	struct device *dev = q->dev;
 	struct sk_buff *skb;
 	unsigned int i;
+	u16 head_len;
 	u16 frag_len;
+	u16 copy_len;
 	u16 len;
 
 	stats = q_to_rx_stats(q);
 
 	buf_info = &desc_info->bufs[0];
-	len = le16_to_cpu(comp->len);
-
-	prefetchw(buf_info->page);
 
-	skb = napi_get_frags(&q_to_qcq(q)->napi);
-	if (unlikely(!skb)) {
-		net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
-				     netdev->name, q->name);
-		stats->alloc_err++;
+	if (unlikely(!buf_info->page))
 		return NULL;
-	}
-
-	i = comp->num_sg_elems + 1;
-	do {
-		if (unlikely(!buf_info->page)) {
-			dev_kfree_skb(skb);
-			return NULL;
-		}
-
-		frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset);
-		len -= frag_len;
-
-		dma_sync_single_for_cpu(dev,
-					buf_info->dma_addr + buf_info->page_offset,
-					frag_len, DMA_FROM_DEVICE);
-
-		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-				buf_info->page, buf_info->page_offset, frag_len,
-				IONIC_PAGE_SIZE);
-
-		if (!ionic_rx_buf_recycle(q, buf_info, frag_len)) {
-			dma_unmap_page(dev, buf_info->dma_addr,
-				       IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
-			buf_info->page = NULL;
-		}
-
-		buf_info++;
 
-		i--;
-	} while (i > 0);
-
-	return skb;
-}
-
-static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q,
-					  struct ionic_desc_info *desc_info,
-					  struct ionic_rxq_comp *comp)
-{
-	struct net_device *netdev = q->lif->netdev;
-	struct ionic_buf_info *buf_info;
-	struct ionic_rx_stats *stats;
-	struct device *dev = q->dev;
-	struct sk_buff *skb;
-	u16 len;
-
-	stats = q_to_rx_stats(q);
+	prefetchw(buf_info->page);
 
-	buf_info = &desc_info->bufs[0];
 	len = le16_to_cpu(comp->len);
+	head_len = min_t(u16, q->lif->rx_copybreak, len);
 
-	skb = napi_alloc_skb(&q_to_qcq(q)->napi, len);
+	skb = napi_alloc_skb(&q_to_qcq(q)->napi, head_len);
 	if (unlikely(!skb)) {
 		net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
 				     netdev->name, q->name);
@@ -195,21 +270,41 @@ static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q,
 		return NULL;
 	}
 
-	if (unlikely(!buf_info->page)) {
-		dev_kfree_skb(skb);
-		return NULL;
-	}
+	copy_len = ALIGN(head_len, sizeof(long)); /* for better memcpy performance */
+	dma_sync_single_for_cpu(dev, ionic_rx_buf_pa(buf_info), copy_len, DMA_FROM_DEVICE);
+	skb_copy_to_linear_data(skb, ionic_rx_buf_va(buf_info), copy_len);
+	skb_put(skb, head_len);
 
-	dma_sync_single_for_cpu(dev, buf_info->dma_addr + buf_info->page_offset,
-				len, DMA_FROM_DEVICE);
-	skb_copy_to_linear_data(skb, page_address(buf_info->page) + buf_info->page_offset, len);
-	dma_sync_single_for_device(dev, buf_info->dma_addr + buf_info->page_offset,
-				   len, DMA_FROM_DEVICE);
+	if (len > head_len) {
+		len -= head_len;
+		frag_len = min_t(u16, len, ionic_rx_buf_size(buf_info) - head_len);
+		len -= frag_len;
+		ionic_rx_add_skb_frag(q, skb, buf_info, head_len, frag_len);
+		buf_info++;
+		for (i = 0; i < comp->num_sg_elems; i++) {
+			if (len == 0)
+				goto err_out;
+			if (unlikely(!buf_info->page))
+				goto err_out;
+			frag_len = min_t(u16, len, ionic_rx_buf_size(buf_info));
+			len -= frag_len;
+			ionic_rx_add_skb_frag(q, skb, buf_info, 0, frag_len);
+			buf_info++;
+		}
+	} else {
+		dma_sync_single_for_device(dev,
+					   ionic_rx_buf_pa(buf_info),
+					   len, DMA_FROM_DEVICE);
+	}
 
-	skb_put(skb, len);
 	skb->protocol = eth_type_trans(skb, q->lif->netdev);
 
 	return skb;
+
+err_out:
+	if (skb)
+		dev_kfree_skb(skb);
+	return NULL;
 }
 
 static void ionic_rx_clean(struct ionic_queue *q,
@@ -235,11 +330,7 @@ static void ionic_rx_clean(struct ionic_queue *q,
 	stats->pkts++;
 	stats->bytes += le16_to_cpu(comp->len);
 
-	if (le16_to_cpu(comp->len) <= q->lif->rx_copybreak)
-		skb = ionic_rx_copybreak(q, desc_info, comp);
-	else
-		skb = ionic_rx_frags(q, desc_info, comp);
-
+	skb = ionic_rx_build_skb(q, desc_info, comp);
 	if (unlikely(!skb)) {
 		stats->dropped++;
 		return;
@@ -305,10 +396,7 @@ static void ionic_rx_clean(struct ionic_queue *q,
 		}
 	}
 
-	if (le16_to_cpu(comp->len) <= q->lif->rx_copybreak)
-		napi_gro_receive(&qcq->napi, skb);
-	else
-		napi_gro_frags(&qcq->napi);
+	napi_gro_receive(&qcq->napi, skb);
 }
 
 bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
@@ -382,8 +470,8 @@ void ionic_rx_fill(struct ionic_queue *q)
 		}
 
 		/* fill main descriptor - buf[0] */
-		desc->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset);
-		frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset);
+		desc->addr = cpu_to_le64(ionic_rx_buf_pa(buf_info));
+		frag_len = min_t(u16, len, ionic_rx_buf_size(buf_info));
 		desc->len = cpu_to_le16(frag_len);
 		remain_len -= frag_len;
 		buf_info++;
@@ -401,8 +489,8 @@ void ionic_rx_fill(struct ionic_queue *q)
 				}
 			}
 
-			sg_elem->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset);
-			frag_len = min_t(u16, remain_len, IONIC_PAGE_SIZE - buf_info->page_offset);
+			sg_elem->addr = cpu_to_le64(ionic_rx_buf_pa(buf_info));
+			frag_len = min_t(u16, remain_len, ionic_rx_buf_size(buf_info));
 			sg_elem->len = cpu_to_le16(frag_len);
 			remain_len -= frag_len;
 			buf_info++;
@@ -451,6 +539,8 @@ void ionic_rx_empty(struct ionic_queue *q)
 
 	q->head_idx = 0;
 	q->tail_idx = 0;
+
+	ionic_rx_cache_drain(q);
 }
 
 static void ionic_dim_update(struct ionic_qcq *qcq, int napi_mode)
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ