netdev - [PATCHv2 1/2] forcedeth: add recv cache to make nic work steadily

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1563713633-25528-2-git-send-email-yanjun.zhu@oracle.com>
Date:   Sun, 21 Jul 2019 08:53:52 -0400
From:   Zhu Yanjun <yanjun.zhu@...cle.com>
To:     yanjun.zhu@...cle.com, davem@...emloft.net, netdev@...r.kernel.org
Subject: [PATCHv2 1/2] forcedeth: add recv cache to make nic work steadily

A recv cache is added. The size of recv cache is 1000Mbit / skb_length.
When the system memory is not enough, this recv cache can make nic work
steadily.
When nic is up, this recv cache and work queue are created. When nic
is down, this recv cache will be destroyed and delayed workqueue is
canceled.
When nic is polled or rx interrupt is triggerred, rx handler will
get a skb from recv cache. Then a work is queued to fill up recv cache.
When skb size is changed, the old recv cache is destroyed and new recv
cache is created.
When the system memory is not enough, the allocation of skb failed. Then
recv cache will continue allocate skb with GFP_KERNEL until the recv
cache is filled up. When the system memory is not enough, this can make
nic work steadily. Becase of recv cache, the performance of nic is
enhanced.

CC: Joe Jin <joe.jin@...cle.com>
CC: Junxiao Bi <junxiao.bi@...cle.com>
Tested-by: Nan san <nan.1986san@...il.com>
Signed-off-by: Zhu Yanjun <yanjun.zhu@...cle.com>
---
 drivers/net/ethernet/nvidia/forcedeth.c | 103 +++++++++++++++++++++++++++++++-
 1 file changed, 101 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index b327b29..f8e766f 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -674,6 +674,11 @@ struct nv_ethtool_stats {
 	u64 tx_broadcast;
 };
 
+/* 1000Mb is 125M bytes, 125 * 1024 * 1024 bytes
+ * The length of recv cache is 125M / skb_length
+ */
+#define RECV_CACHE_LIST_LENGTH		(125 * 1024 * 1024 / np->rx_buf_sz)
+
 #define NV_DEV_STATISTICS_V3_COUNT (sizeof(struct nv_ethtool_stats)/sizeof(u64))
 #define NV_DEV_STATISTICS_V2_COUNT (NV_DEV_STATISTICS_V3_COUNT - 3)
 #define NV_DEV_STATISTICS_V1_COUNT (NV_DEV_STATISTICS_V2_COUNT - 6)
@@ -844,6 +849,11 @@ struct fe_priv {
 	char name_rx[IFNAMSIZ + 3];       /* -rx    */
 	char name_tx[IFNAMSIZ + 3];       /* -tx    */
 	char name_other[IFNAMSIZ + 6];    /* -other */
+
+	/* This is to schedule work */
+	struct delayed_work     recv_cache_work;
+	/* This list is to store skb queue for recv */
+	struct sk_buff_head recv_list;
 };
 
 /*
@@ -1804,7 +1814,8 @@ static int nv_alloc_rx(struct net_device *dev)
 		less_rx = np->last_rx.orig;
 
 	while (np->put_rx.orig != less_rx) {
-		struct sk_buff *skb = netdev_alloc_skb(dev, np->rx_buf_sz + NV_RX_ALLOC_PAD);
+		struct sk_buff *skb = skb_dequeue(&np->recv_list);
+
 		if (likely(skb)) {
 			np->put_rx_ctx->skb = skb;
 			np->put_rx_ctx->dma = dma_map_single(&np->pci_dev->dev,
@@ -1829,9 +1840,15 @@ static int nv_alloc_rx(struct net_device *dev)
 			u64_stats_update_begin(&np->swstats_rx_syncp);
 			np->stat_rx_dropped++;
 			u64_stats_update_end(&np->swstats_rx_syncp);
+
+			schedule_delayed_work(&np->recv_cache_work, 0);
+
 			return 1;
 		}
 	}
+
+	schedule_delayed_work(&np->recv_cache_work, 0);
+
 	return 0;
 }
 
@@ -1845,7 +1862,8 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
 		less_rx = np->last_rx.ex;
 
 	while (np->put_rx.ex != less_rx) {
-		struct sk_buff *skb = netdev_alloc_skb(dev, np->rx_buf_sz + NV_RX_ALLOC_PAD);
+		struct sk_buff *skb = skb_dequeue(&np->recv_list);
+
 		if (likely(skb)) {
 			np->put_rx_ctx->skb = skb;
 			np->put_rx_ctx->dma = dma_map_single(&np->pci_dev->dev,
@@ -1871,9 +1889,15 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
 			u64_stats_update_begin(&np->swstats_rx_syncp);
 			np->stat_rx_dropped++;
 			u64_stats_update_end(&np->swstats_rx_syncp);
+
+			schedule_delayed_work(&np->recv_cache_work, 0);
+
 			return 1;
 		}
 	}
+
+	schedule_delayed_work(&np->recv_cache_work, 0);
+
 	return 0;
 }
 
@@ -1957,6 +1981,43 @@ static void nv_init_tx(struct net_device *dev)
 	}
 }
 
+static void nv_init_recv_cache(struct net_device *dev)
+{
+	struct fe_priv *np = netdev_priv(dev);
+
+	skb_queue_head_init(&np->recv_list);
+	while (skb_queue_len(&np->recv_list) < RECV_CACHE_LIST_LENGTH) {
+		struct sk_buff *skb = netdev_alloc_skb(dev,
+				 np->rx_buf_sz + NV_RX_ALLOC_PAD);
+		/* skb is null. This indicates that memory is not
+		 * enough.
+		 */
+		if (unlikely(!skb)) {
+			/* When allocating memory with GFP_ATOMIC fails,
+			 * allocating with GFP_KERNEL will get memory
+			 * finally.
+			 */
+			skb = __netdev_alloc_skb(dev,
+						 np->rx_buf_sz +
+						 NV_RX_ALLOC_PAD,
+						 GFP_KERNEL);
+		}
+
+		skb_queue_tail(&np->recv_list, skb);
+	}
+}
+
+static void nv_destroy_recv_cache(struct net_device *dev)
+{
+	struct fe_priv *np = netdev_priv(dev);
+
+	cancel_delayed_work_sync(&np->recv_cache_work);
+	WARN_ON(delayed_work_pending(&np->recv_cache_work));
+
+	skb_queue_purge(&np->recv_list);
+	WARN_ON(skb_queue_len(&np->recv_list));
+}
+
 static int nv_init_ring(struct net_device *dev)
 {
 	struct fe_priv *np = netdev_priv(dev);
@@ -3047,6 +3108,8 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu)
 		nv_drain_rxtx(dev);
 		/* reinit driver view of the rx queue */
 		set_bufsize(dev);
+		nv_destroy_recv_cache(dev);
+		nv_init_recv_cache(dev);
 		if (nv_init_ring(dev)) {
 			if (!np->in_shutdown)
 				mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
@@ -4074,6 +4137,32 @@ static void nv_free_irq(struct net_device *dev)
 	}
 }
 
+static void nv_recv_cache_worker(struct work_struct *work)
+{
+	struct fe_priv *np = container_of(work, struct fe_priv,
+					  recv_cache_work.work);
+
+	while (skb_queue_len(&np->recv_list) < RECV_CACHE_LIST_LENGTH) {
+		struct sk_buff *skb = netdev_alloc_skb(np->dev,
+				np->rx_buf_sz + NV_RX_ALLOC_PAD);
+		/* skb is null. This indicates that memory is not
+		 * enough.
+		 */
+		if (unlikely(!skb)) {
+			/* When allocating memory with GFP_ATOMIC fails,
+			 * allocating with GFP_KERNEL will get memory
+			 * finally.
+			 */
+			skb = __netdev_alloc_skb(np->dev,
+						 np->rx_buf_sz +
+						 NV_RX_ALLOC_PAD,
+						 GFP_KERNEL);
+		}
+
+		skb_queue_tail(&np->recv_list, skb);
+	}
+}
+
 static void nv_do_nic_poll(struct timer_list *t)
 {
 	struct fe_priv *np = from_timer(np, t, nic_poll);
@@ -4129,6 +4218,8 @@ static void nv_do_nic_poll(struct timer_list *t)
 			nv_drain_rxtx(dev);
 			/* reinit driver view of the rx queue */
 			set_bufsize(dev);
+			nv_destroy_recv_cache(dev);
+			nv_init_recv_cache(dev);
 			if (nv_init_ring(dev)) {
 				if (!np->in_shutdown)
 					mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
@@ -4681,6 +4772,8 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri
 	if (netif_running(dev)) {
 		/* reinit driver view of the queues */
 		set_bufsize(dev);
+		nv_destroy_recv_cache(dev);
+		nv_init_recv_cache(dev);
 		if (nv_init_ring(dev)) {
 			if (!np->in_shutdown)
 				mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
@@ -5402,6 +5495,9 @@ static int nv_open(struct net_device *dev)
 
 	/* initialize descriptor rings */
 	set_bufsize(dev);
+	nv_init_recv_cache(dev);
+
+	INIT_DELAYED_WORK(&np->recv_cache_work, nv_recv_cache_worker);
 	oom = nv_init_ring(dev);
 
 	writel(0, base + NvRegLinkSpeed);
@@ -5583,6 +5679,9 @@ static int nv_close(struct net_device *dev)
 		nv_txrx_gate(dev, true);
 	}
 
+	/* free all SKBs in recv cache */
+	nv_destroy_recv_cache(dev);
+
 	/* FIXME: power down nic */
 
 	return 0;
-- 
2.7.4