lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon,  3 Sep 2018 14:35:18 +0100
From:   Jose Abreu <Jose.Abreu@...opsys.com>
To:     netdev@...r.kernel.org
Cc:     Jose Abreu <Jose.Abreu@...opsys.com>,
        Jerome Brunet <jbrunet@...libre.com>,
        Martin Blumenstingl <martin.blumenstingl@...glemail.com>,
        "David S. Miller" <davem@...emloft.net>,
        Joao Pinto <Joao.Pinto@...opsys.com>,
        Giuseppe Cavallaro <peppe.cavallaro@...com>,
        Alexandre Torgue <alexandre.torgue@...com>
Subject: [PATCH net-next 1/2] net: stmmac: Rework coalesce timer and fix multi-queue races

This follows David Miller advice and tries to fix coalesce timer in
multi-queue scenarios.

We are now using per-queue coalesce values and per-queue TX timer.

Coalesce timer default values was changed to 1ms and the coalesce frames
to 25.

Tested in B2B setup between XGMAC2 and GMAC5.

Signed-off-by: Jose Abreu <joabreu@...opsys.com>
Cc: Jerome Brunet <jbrunet@...libre.com>
Cc: Martin Blumenstingl <martin.blumenstingl@...glemail.com>
Cc: David S. Miller <davem@...emloft.net>
Cc: Joao Pinto <jpinto@...opsys.com>
Cc: Giuseppe Cavallaro <peppe.cavallaro@...com>
Cc: Alexandre Torgue <alexandre.torgue@...com>
---
Jerome,

Can I have your Tested-by in this patch?

Thanks and Best Regards,
Jose Miguel Abreu
---
 drivers/net/ethernet/stmicro/stmmac/common.h      |   4 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac.h      |   7 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 177 +++++++++++++++-------
 3 files changed, 126 insertions(+), 62 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 1854f270ad66..b1b305f8f414 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -258,10 +258,10 @@ struct stmmac_safety_stats {
 #define MAX_DMA_RIWT		0xff
 #define MIN_DMA_RIWT		0x20
 /* Tx coalesce parameters */
-#define STMMAC_COAL_TX_TIMER	40000
+#define STMMAC_COAL_TX_TIMER	1000
 #define STMMAC_MAX_COAL_TX_TICK	100000
 #define STMMAC_TX_MAX_FRAMES	256
-#define STMMAC_TX_FRAMES	64
+#define STMMAC_TX_FRAMES	25
 
 /* Packets types */
 enum packets_types {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 76649adf8fb0..957030cfb833 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -48,6 +48,9 @@ struct stmmac_tx_info {
 
 /* Frequently used values are kept adjacent for cache effect */
 struct stmmac_tx_queue {
+	u32 tx_count_frames;
+	int tx_timer_active;
+	struct timer_list txtimer;
 	u32 queue_index;
 	struct stmmac_priv *priv_data;
 	struct dma_extended_desc *dma_etx ____cacheline_aligned_in_smp;
@@ -59,6 +62,7 @@ struct stmmac_tx_queue {
 	dma_addr_t dma_tx_phy;
 	u32 tx_tail_addr;
 	u32 mss;
+	struct napi_struct napi ____cacheline_aligned_in_smp;
 };
 
 struct stmmac_rx_queue {
@@ -109,15 +113,12 @@ struct stmmac_pps_cfg {
 
 struct stmmac_priv {
 	/* Frequently used values are kept adjacent for cache effect */
-	u32 tx_count_frames;
 	u32 tx_coal_frames;
 	u32 tx_coal_timer;
-	bool tx_timer_armed;
 
 	int tx_coalesce;
 	int hwts_tx_en;
 	bool tx_path_in_lpi_mode;
-	struct timer_list txtimer;
 	bool tso;
 
 	unsigned int dma_buf_sz;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index ff1ffb46198a..1fca66ad6b17 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -148,6 +148,7 @@ static void stmmac_verify_args(void)
 static void stmmac_disable_all_queues(struct stmmac_priv *priv)
 {
 	u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
+	u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
 	u32 queue;
 
 	for (queue = 0; queue < rx_queues_cnt; queue++) {
@@ -155,6 +156,12 @@ static void stmmac_disable_all_queues(struct stmmac_priv *priv)
 
 		napi_disable(&rx_q->napi);
 	}
+
+	for (queue = 0; queue < tx_queues_cnt; queue++) {
+		struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+		napi_disable(&tx_q->napi);
+	}
 }
 
 /**
@@ -164,6 +171,7 @@ static void stmmac_disable_all_queues(struct stmmac_priv *priv)
 static void stmmac_enable_all_queues(struct stmmac_priv *priv)
 {
 	u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
+	u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
 	u32 queue;
 
 	for (queue = 0; queue < rx_queues_cnt; queue++) {
@@ -171,6 +179,12 @@ static void stmmac_enable_all_queues(struct stmmac_priv *priv)
 
 		napi_enable(&rx_q->napi);
 	}
+
+	for (queue = 0; queue < tx_queues_cnt; queue++) {
+		struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+		napi_enable(&tx_q->napi);
+	}
 }
 
 /**
@@ -1843,18 +1857,18 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
  * @queue: TX queue index
  * Description: it reclaims the transmit resources after transmission completes.
  */
-static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
+static int stmmac_tx_clean(struct stmmac_priv *priv, int limit, u32 queue)
 {
 	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
 	unsigned int bytes_compl = 0, pkts_compl = 0;
-	unsigned int entry;
+	unsigned int entry, count = 0;
 
 	netif_tx_lock(priv->dev);
 
 	priv->xstats.tx_clean++;
 
 	entry = tx_q->dirty_tx;
-	while (entry != tx_q->cur_tx) {
+	while ((entry != tx_q->cur_tx) && (count < limit)) {
 		struct sk_buff *skb = tx_q->tx_skbuff[entry];
 		struct dma_desc *p;
 		int status;
@@ -1870,6 +1884,8 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
 		if (unlikely(status & tx_dma_own))
 			break;
 
+		count++;
+
 		/* Make sure descriptor fields are read after reading
 		 * the own bit.
 		 */
@@ -1938,6 +1954,8 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
 		mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
 	}
 	netif_tx_unlock(priv->dev);
+
+	return count;
 }
 
 /**
@@ -2034,7 +2052,6 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 	u32 channels_to_check = tx_channel_count > rx_channel_count ?
 				tx_channel_count : rx_channel_count;
 	u32 chan;
-	bool poll_scheduled = false;
 	int status[max_t(u32, MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES)];
 
 	/* Make sure we never check beyond our status buffer. */
@@ -2055,11 +2072,8 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 		if (likely(status[chan] & handle_rx)) {
 			struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
 
-			if (likely(napi_schedule_prep(&rx_q->napi))) {
-				stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
+			if (likely(napi_schedule_prep(&rx_q->napi)))
 				__napi_schedule(&rx_q->napi);
-				poll_scheduled = true;
-			}
 		}
 	}
 
@@ -2067,22 +2081,12 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 	 * If we didn't schedule poll, see if any DMA channel (used by tx) has a
 	 * completed transmission, if so, call stmmac_poll (once).
 	 */
-	if (!poll_scheduled) {
-		for (chan = 0; chan < tx_channel_count; chan++) {
-			if (status[chan] & handle_tx) {
-				/* It doesn't matter what rx queue we choose
-				 * here. We use 0 since it always exists.
-				 */
-				struct stmmac_rx_queue *rx_q =
-					&priv->rx_queue[0];
+	for (chan = 0; chan < tx_channel_count; chan++) {
+		if (status[chan] & handle_tx) {
+			struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
 
-				if (likely(napi_schedule_prep(&rx_q->napi))) {
-					stmmac_disable_dma_irq(priv,
-							priv->ioaddr, chan);
-					__napi_schedule(&rx_q->napi);
-				}
-				break;
-			}
+			if (likely(napi_schedule_prep(&tx_q->napi)))
+				__napi_schedule(&tx_q->napi);
 		}
 	}
 
@@ -2241,13 +2245,15 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
  */
 static void stmmac_tx_timer(struct timer_list *t)
 {
-	struct stmmac_priv *priv = from_timer(priv, t, txtimer);
-	u32 tx_queues_count = priv->plat->tx_queues_to_use;
-	u32 queue;
+	struct stmmac_tx_queue *tx_q = from_timer(tx_q, t, txtimer);
+	struct stmmac_priv *priv = tx_q->priv_data;
 
-	/* let's scan all the tx queues */
-	for (queue = 0; queue < tx_queues_count; queue++)
-		stmmac_tx_clean(priv, queue);
+	if (napi_schedule_prep(&tx_q->napi)) {
+		stmmac_disable_dma_irq(priv, priv->ioaddr, tx_q->queue_index);
+		__napi_schedule(&tx_q->napi);
+	}
+
+	tx_q->tx_timer_active = 0;
 }
 
 /**
@@ -2260,11 +2266,17 @@ static void stmmac_tx_timer(struct timer_list *t)
  */
 static void stmmac_init_tx_coalesce(struct stmmac_priv *priv)
 {
+	u32 tx_channel_count = priv->plat->tx_queues_to_use;
+	u32 chan;
+
 	priv->tx_coal_frames = STMMAC_TX_FRAMES;
 	priv->tx_coal_timer = STMMAC_COAL_TX_TIMER;
-	timer_setup(&priv->txtimer, stmmac_tx_timer, 0);
-	priv->txtimer.expires = STMMAC_COAL_TIMER(priv->tx_coal_timer);
-	add_timer(&priv->txtimer);
+
+	for (chan = 0; chan < tx_channel_count; chan++) {
+		struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
+
+		timer_setup(&tx_q->txtimer, stmmac_tx_timer, 0);
+	}
 }
 
 static void stmmac_set_rings_length(struct stmmac_priv *priv)
@@ -2592,6 +2604,7 @@ static void stmmac_hw_teardown(struct net_device *dev)
 static int stmmac_open(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
+	u32 chan;
 	int ret;
 
 	stmmac_check_ether_addr(priv);
@@ -2688,7 +2701,9 @@ static int stmmac_open(struct net_device *dev)
 	if (dev->phydev)
 		phy_stop(dev->phydev);
 
-	del_timer_sync(&priv->txtimer);
+	for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
+		del_timer_sync(&priv->tx_queue[chan].txtimer);
+
 	stmmac_hw_teardown(dev);
 init_error:
 	free_dma_desc_resources(priv);
@@ -2708,6 +2723,7 @@ static int stmmac_open(struct net_device *dev)
 static int stmmac_release(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
+	u32 chan;
 
 	if (priv->eee_enabled)
 		del_timer_sync(&priv->eee_ctrl_timer);
@@ -2722,7 +2738,8 @@ static int stmmac_release(struct net_device *dev)
 
 	stmmac_disable_all_queues(priv);
 
-	del_timer_sync(&priv->txtimer);
+	for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
+		del_timer_sync(&priv->tx_queue[chan].txtimer);
 
 	/* Free the IRQ lines */
 	free_irq(dev->irq, dev);
@@ -2828,6 +2845,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	int tmp_pay_len = 0;
 	u32 pay_len, mss;
 	u8 proto_hdr_len;
+	bool tx_ic;
 	int i;
 
 	tx_q = &priv->tx_queue[queue];
@@ -2936,12 +2954,17 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	priv->xstats.tx_tso_nfrags += nfrags;
 
 	/* Manage tx mitigation */
-	priv->tx_count_frames += nfrags + 1;
-	if (likely(priv->tx_coal_frames > priv->tx_count_frames)) {
-		mod_timer(&priv->txtimer,
-			  STMMAC_COAL_TIMER(priv->tx_coal_timer));
-	} else {
-		priv->tx_count_frames = 0;
+	tx_q->tx_count_frames += nfrags + 1;
+	if (!priv->tx_coal_frames)
+		tx_ic = false;
+	else if ((nfrags + 1) > priv->tx_coal_frames)
+		tx_ic = true;
+	else if ((tx_q->tx_count_frames % priv->tx_coal_frames) < (nfrags + 1))
+		tx_ic = true;
+	else
+		tx_ic = false;
+
+	if (tx_ic) {
 		stmmac_set_tx_ic(priv, desc);
 		priv->xstats.tx_set_ic_bit++;
 	}
@@ -2994,6 +3017,12 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
 
+	if (priv->tx_coal_timer && !tx_q->tx_timer_active) {
+		tx_q->tx_timer_active = 1;
+		mod_timer(&tx_q->txtimer,
+				STMMAC_COAL_TIMER(priv->tx_coal_timer));
+	}
+
 	return NETDEV_TX_OK;
 
 dma_map_err:
@@ -3024,6 +3053,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct stmmac_tx_queue *tx_q;
 	unsigned int enh_desc;
 	unsigned int des;
+	bool tx_ic;
 
 	tx_q = &priv->tx_queue[queue];
 
@@ -3146,17 +3176,19 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * This approach takes care about the fragments: desc is the first
 	 * element in case of no SG.
 	 */
-	priv->tx_count_frames += nfrags + 1;
-	if (likely(priv->tx_coal_frames > priv->tx_count_frames) &&
-	    !priv->tx_timer_armed) {
-		mod_timer(&priv->txtimer,
-			  STMMAC_COAL_TIMER(priv->tx_coal_timer));
-		priv->tx_timer_armed = true;
-	} else {
-		priv->tx_count_frames = 0;
+	tx_q->tx_count_frames += nfrags + 1;
+	if (!priv->tx_coal_frames)
+		tx_ic = false;
+	else if ((nfrags + 1) > priv->tx_coal_frames)
+		tx_ic = true;
+	else if ((tx_q->tx_count_frames % priv->tx_coal_frames) < (nfrags + 1))
+		tx_ic = true;
+	else
+		tx_ic = false;
+
+	if (tx_ic) {
 		stmmac_set_tx_ic(priv, desc);
 		priv->xstats.tx_set_ic_bit++;
-		priv->tx_timer_armed = false;
 	}
 
 	skb_tx_timestamp(skb);
@@ -3202,8 +3234,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
 
 	stmmac_enable_dma_transmission(priv, priv->ioaddr);
+
 	stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
 
+	if (priv->tx_coal_timer && !tx_q->tx_timer_active) {
+		tx_q->tx_timer_active = 1;
+		mod_timer(&tx_q->txtimer,
+				STMMAC_COAL_TIMER(priv->tx_coal_timer));
+	}
+
 	return NETDEV_TX_OK;
 
 dma_map_err:
@@ -3517,22 +3556,16 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
  *  Description :
  *  To look at the incoming frames and clear the tx resources.
  */
-static int stmmac_poll(struct napi_struct *napi, int budget)
+static int stmmac_rx_poll(struct napi_struct *napi, int budget)
 {
 	struct stmmac_rx_queue *rx_q =
 		container_of(napi, struct stmmac_rx_queue, napi);
 	struct stmmac_priv *priv = rx_q->priv_data;
-	u32 tx_count = priv->plat->tx_queues_to_use;
 	u32 chan = rx_q->queue_index;
 	int work_done = 0;
-	u32 queue;
 
 	priv->xstats.napi_poll++;
 
-	/* check all the queues */
-	for (queue = 0; queue < tx_count; queue++)
-		stmmac_tx_clean(priv, queue);
-
 	work_done = stmmac_rx(priv, budget, rx_q->queue_index);
 	if (work_done < budget) {
 		napi_complete_done(napi, work_done);
@@ -3541,6 +3574,24 @@ static int stmmac_poll(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
+static int stmmac_tx_poll(struct napi_struct *napi, int budget)
+{
+	struct stmmac_tx_queue *tx_q =
+		container_of(napi, struct stmmac_tx_queue, napi);
+	struct stmmac_priv *priv = tx_q->priv_data;
+	u32 chan = tx_q->queue_index;
+	int work_done = 0;
+
+	priv->xstats.napi_poll++;
+
+	work_done = stmmac_tx_clean(priv, budget, chan);
+	if (work_done < budget) {
+		napi_complete_done(napi, work_done);
+		stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
+	}
+	return work_done;
+}
+
 /**
  *  stmmac_tx_timeout
  *  @dev : Pointer to net device structure
@@ -4328,10 +4379,17 @@ int stmmac_dvr_probe(struct device *device,
 	for (queue = 0; queue < priv->plat->rx_queues_to_use; queue++) {
 		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
 
-		netif_napi_add(ndev, &rx_q->napi, stmmac_poll,
+		netif_napi_add(ndev, &rx_q->napi, stmmac_rx_poll,
 			       (8 * priv->plat->rx_queues_to_use));
 	}
 
+	for (queue = 0; queue < priv->plat->tx_queues_to_use; queue++) {
+		struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+		netif_napi_add(ndev, &tx_q->napi, stmmac_tx_poll,
+			       (8 * priv->plat->tx_queues_to_use));
+	}
+
 	mutex_init(&priv->lock);
 
 	/* If a specific clk_csr value is passed from the platform
@@ -4380,6 +4438,11 @@ int stmmac_dvr_probe(struct device *device,
 
 		netif_napi_del(&rx_q->napi);
 	}
+	for (queue = 0; queue < priv->plat->tx_queues_to_use; queue++) {
+		struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+		netif_napi_del(&tx_q->napi);
+	}
 error_hw_init:
 	destroy_workqueue(priv->wq);
 error_wq:
-- 
2.7.4


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ