[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ff2f8f89-134e-b7f8-1a40-d87f371be303@baylibre.com>
Date: Mon, 10 Sep 2018 13:43:04 +0200
From: Neil Armstrong <narmstrong@...libre.com>
To: Jose Abreu <Jose.Abreu@...opsys.com>, netdev@...r.kernel.org
Cc: Jerome Brunet <jbrunet@...libre.com>,
Martin Blumenstingl <martin.blumenstingl@...glemail.com>,
"David S. Miller" <davem@...emloft.net>,
Joao Pinto <Joao.Pinto@...opsys.com>,
Giuseppe Cavallaro <peppe.cavallaro@...com>,
Alexandre Torgue <alexandre.torgue@...com>
Subject: Re: [net-next, v2, 1/2] net: stmmac: Rework coalesce timer and fix
multi-queue races
Hi Jose,
On 10/09/2018 11:14, Jose Abreu wrote:
> This follows David Miller advice and tries to fix coalesce timer in
> multi-queue scenarios.
>
> We are now using per-queue coalesce values and per-queue TX timer.
>
> Coalesce timer default values was changed to 1ms and the coalesce frames
> to 25.
>
> Tested in B2B setup between XGMAC2 and GMAC5.
>
> Signed-off-by: Jose Abreu <joabreu@...opsys.com>
> Cc: Jerome Brunet <jbrunet@...libre.com>
> Cc: Martin Blumenstingl <martin.blumenstingl@...glemail.com>
> Cc: David S. Miller <davem@...emloft.net>
> Cc: Joao Pinto <jpinto@...opsys.com>
> Cc: Giuseppe Cavallaro <peppe.cavallaro@...com>
> Cc: Alexandre Torgue <alexandre.torgue@...com>
> ---
> Jerome,
Jerome is in holidays...
>
> Can you please test if this one is okay ?
I tested this patch on top of 4.18.7 with the previous patch (4ae0169fd1b3) reverted.
The TX or RX stopped a few seconds after iperf3 started :
(iperf3 is running in server mode on the Amlogic A113D board)
$ iperf3 -c 10.1.4.95
Connecting to host 10.1.4.95, port 5201
[ 4] local 10.1.2.12 port 39906 connected to 10.1.4.95 port 5201
[ ID] Interval Transfer Bandwidth Retr Cwnd
[ 4] 0.00-1.00 sec 56.2 MBytes 472 Mbits/sec 0 660 KBytes
[ 4] 1.00-2.00 sec 56.2 MBytes 472 Mbits/sec 0 660 KBytes
[ 4] 2.00-3.00 sec 38.8 MBytes 325 Mbits/sec 1 1.41 KBytes
[ 4] 3.00-4.00 sec 0.00 Bytes 0.00 bits/sec 1 1.41 KBytes
[ 4] 4.00-5.00 sec 0.00 Bytes 0.00 bits/sec 1 1.41 KBytes
^C[ 4] 5.00-5.61 sec 0.00 Bytes 0.00 bits/sec 0 1.41 KBytes
- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval Transfer Bandwidth Retr
[ 4] 0.00-5.61 sec 151 MBytes 226 Mbits/sec 3 sender
[ 4] 0.00-5.61 sec 0.00 Bytes 0.00 bits/sec receiver
iperf3: interrupt - the client has terminated
$ iperf3 -c 10.1.4.95 -R
Connecting to host 10.1.4.95, port 5201
Reverse mode, remote host 10.1.4.95 is sending
[ 4] local 10.1.2.12 port 39894 connected to 10.1.4.95 port 5201
[ ID] Interval Transfer Bandwidth
[ 4] 0.00-1.00 sec 82.2 MBytes 690 Mbits/sec
[ 4] 1.00-2.00 sec 82.6 MBytes 693 Mbits/sec
[ 4] 2.00-3.00 sec 24.2 MBytes 203 Mbits/sec
[ 4] 3.00-4.00 sec 0.00 Bytes 0.00 bits/sec
[ 4] 4.00-5.00 sec 0.00 Bytes 0.00 bits/sec
^C[ 4] 5.00-5.53 sec 0.00 Bytes 0.00 bits/sec
- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval Transfer Bandwidth
[ 4] 0.00-5.53 sec 0.00 Bytes 0.00 bits/sec sender
[ 4] 0.00-5.53 sec 189 MBytes 287 Mbits/sec receiver
iperf3: interrupt - the client has terminated
These works for hours without this patch applied.
Neil
>
> Thanks and Best Regards,
> Jose Miguel Abreu
> ---
> drivers/net/ethernet/stmicro/stmmac/common.h | 4 +-
> drivers/net/ethernet/stmicro/stmmac/stmmac.h | 6 +-
> drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 207 ++++++++++++++--------
> 3 files changed, 135 insertions(+), 82 deletions(-)
>
> diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
> index 1854f270ad66..b1b305f8f414 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/common.h
> +++ b/drivers/net/ethernet/stmicro/stmmac/common.h
> @@ -258,10 +258,10 @@ struct stmmac_safety_stats {
> #define MAX_DMA_RIWT 0xff
> #define MIN_DMA_RIWT 0x20
> /* Tx coalesce parameters */
> -#define STMMAC_COAL_TX_TIMER 40000
> +#define STMMAC_COAL_TX_TIMER 1000
> #define STMMAC_MAX_COAL_TX_TICK 100000
> #define STMMAC_TX_MAX_FRAMES 256
> -#define STMMAC_TX_FRAMES 64
> +#define STMMAC_TX_FRAMES 25
>
> /* Packets types */
> enum packets_types {
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> index c0a855b7ab3b..957030cfb833 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> @@ -48,6 +48,9 @@ struct stmmac_tx_info {
>
> /* Frequently used values are kept adjacent for cache effect */
> struct stmmac_tx_queue {
> + u32 tx_count_frames;
> + int tx_timer_active;
> + struct timer_list txtimer;
> u32 queue_index;
> struct stmmac_priv *priv_data;
> struct dma_extended_desc *dma_etx ____cacheline_aligned_in_smp;
> @@ -59,6 +62,7 @@ struct stmmac_tx_queue {
> dma_addr_t dma_tx_phy;
> u32 tx_tail_addr;
> u32 mss;
> + struct napi_struct napi ____cacheline_aligned_in_smp;
> };
>
> struct stmmac_rx_queue {
> @@ -109,14 +113,12 @@ struct stmmac_pps_cfg {
>
> struct stmmac_priv {
> /* Frequently used values are kept adjacent for cache effect */
> - u32 tx_count_frames;
> u32 tx_coal_frames;
> u32 tx_coal_timer;
>
> int tx_coalesce;
> int hwts_tx_en;
> bool tx_path_in_lpi_mode;
> - struct timer_list txtimer;
> bool tso;
>
> unsigned int dma_buf_sz;
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> index 9f458bb16f2a..9809c2b319fe 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> @@ -148,6 +148,7 @@ static void stmmac_verify_args(void)
> static void stmmac_disable_all_queues(struct stmmac_priv *priv)
> {
> u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
> + u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
> u32 queue;
>
> for (queue = 0; queue < rx_queues_cnt; queue++) {
> @@ -155,6 +156,12 @@ static void stmmac_disable_all_queues(struct stmmac_priv *priv)
>
> napi_disable(&rx_q->napi);
> }
> +
> + for (queue = 0; queue < tx_queues_cnt; queue++) {
> + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
> +
> + napi_disable(&tx_q->napi);
> + }
> }
>
> /**
> @@ -164,6 +171,7 @@ static void stmmac_disable_all_queues(struct stmmac_priv *priv)
> static void stmmac_enable_all_queues(struct stmmac_priv *priv)
> {
> u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
> + u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
> u32 queue;
>
> for (queue = 0; queue < rx_queues_cnt; queue++) {
> @@ -171,6 +179,12 @@ static void stmmac_enable_all_queues(struct stmmac_priv *priv)
>
> napi_enable(&rx_q->napi);
> }
> +
> + for (queue = 0; queue < tx_queues_cnt; queue++) {
> + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
> +
> + napi_enable(&tx_q->napi);
> + }
> }
>
> /**
> @@ -1843,7 +1857,8 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
> * @queue: TX queue index
> * Description: it reclaims the transmit resources after transmission completes.
> */
> -static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
> +static int stmmac_tx_clean(struct stmmac_priv *priv, int limit, u32 queue,
> + bool *more)
> {
> struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
> unsigned int bytes_compl = 0, pkts_compl = 0;
> @@ -1851,10 +1866,13 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
>
> netif_tx_lock(priv->dev);
>
> + if (more)
> + *more = false;
> +
> priv->xstats.tx_clean++;
>
> entry = tx_q->dirty_tx;
> - while (entry != tx_q->cur_tx) {
> + while ((entry != tx_q->cur_tx) && (pkts_compl < limit)) {
> struct sk_buff *skb = tx_q->tx_skbuff[entry];
> struct dma_desc *p;
> int status;
> @@ -1937,7 +1955,13 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
> stmmac_enable_eee_mode(priv);
> mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
> }
> +
> + if (more && (tx_q->dirty_tx != tx_q->cur_tx))
> + *more = true;
> +
> netif_tx_unlock(priv->dev);
> +
> + return pkts_compl;
> }
>
> /**
> @@ -2020,6 +2044,34 @@ static bool stmmac_safety_feat_interrupt(struct stmmac_priv *priv)
> return false;
> }
>
> +static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan)
> +{
> + int status = stmmac_dma_interrupt_status(priv, priv->ioaddr,
> + &priv->xstats, chan);
> +
> + if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) {
> + struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
> +
> + if (likely(napi_schedule_prep(&rx_q->napi))) {
> + stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
> + __napi_schedule(&rx_q->napi);
> + }
> + } else {
> + status &= ~handle_rx;
> + }
> +
> + if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) {
> + struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
> +
> + if (likely(napi_schedule_prep(&tx_q->napi)))
> + __napi_schedule(&tx_q->napi);
> + } else {
> + status &= ~handle_tx;
> + }
> +
> + return status;
> +}
> +
> /**
> * stmmac_dma_interrupt - DMA ISR
> * @priv: driver private structure
> @@ -2034,57 +2086,14 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
> u32 channels_to_check = tx_channel_count > rx_channel_count ?
> tx_channel_count : rx_channel_count;
> u32 chan;
> - bool poll_scheduled = false;
> int status[max_t(u32, MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES)];
>
> /* Make sure we never check beyond our status buffer. */
> if (WARN_ON_ONCE(channels_to_check > ARRAY_SIZE(status)))
> channels_to_check = ARRAY_SIZE(status);
>
> - /* Each DMA channel can be used for rx and tx simultaneously, yet
> - * napi_struct is embedded in struct stmmac_rx_queue rather than in a
> - * stmmac_channel struct.
> - * Because of this, stmmac_poll currently checks (and possibly wakes)
> - * all tx queues rather than just a single tx queue.
> - */
> for (chan = 0; chan < channels_to_check; chan++)
> - status[chan] = stmmac_dma_interrupt_status(priv, priv->ioaddr,
> - &priv->xstats, chan);
> -
> - for (chan = 0; chan < rx_channel_count; chan++) {
> - if (likely(status[chan] & handle_rx)) {
> - struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
> -
> - if (likely(napi_schedule_prep(&rx_q->napi))) {
> - stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
> - __napi_schedule(&rx_q->napi);
> - poll_scheduled = true;
> - }
> - }
> - }
> -
> - /* If we scheduled poll, we already know that tx queues will be checked.
> - * If we didn't schedule poll, see if any DMA channel (used by tx) has a
> - * completed transmission, if so, call stmmac_poll (once).
> - */
> - if (!poll_scheduled) {
> - for (chan = 0; chan < tx_channel_count; chan++) {
> - if (status[chan] & handle_tx) {
> - /* It doesn't matter what rx queue we choose
> - * here. We use 0 since it always exists.
> - */
> - struct stmmac_rx_queue *rx_q =
> - &priv->rx_queue[0];
> -
> - if (likely(napi_schedule_prep(&rx_q->napi))) {
> - stmmac_disable_dma_irq(priv,
> - priv->ioaddr, chan);
> - __napi_schedule(&rx_q->napi);
> - }
> - break;
> - }
> - }
> - }
> + status[chan] = stmmac_napi_check(priv, chan);
>
> for (chan = 0; chan < tx_channel_count; chan++) {
> if (unlikely(status[chan] & tx_hard_error_bump_tc)) {
> @@ -2241,13 +2250,11 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
> */
> static void stmmac_tx_timer(struct timer_list *t)
> {
> - struct stmmac_priv *priv = from_timer(priv, t, txtimer);
> - u32 tx_queues_count = priv->plat->tx_queues_to_use;
> - u32 queue;
> + struct stmmac_tx_queue *tx_q = from_timer(tx_q, t, txtimer);
>
> - /* let's scan all the tx queues */
> - for (queue = 0; queue < tx_queues_count; queue++)
> - stmmac_tx_clean(priv, queue);
> + if (likely(napi_schedule_prep(&tx_q->napi)))
> + __napi_schedule(&tx_q->napi);
> + tx_q->tx_timer_active = 0;
> }
>
> /**
> @@ -2260,11 +2267,17 @@ static void stmmac_tx_timer(struct timer_list *t)
> */
> static void stmmac_init_tx_coalesce(struct stmmac_priv *priv)
> {
> + u32 tx_channel_count = priv->plat->tx_queues_to_use;
> + u32 chan;
> +
> priv->tx_coal_frames = STMMAC_TX_FRAMES;
> priv->tx_coal_timer = STMMAC_COAL_TX_TIMER;
> - timer_setup(&priv->txtimer, stmmac_tx_timer, 0);
> - priv->txtimer.expires = STMMAC_COAL_TIMER(priv->tx_coal_timer);
> - add_timer(&priv->txtimer);
> +
> + for (chan = 0; chan < tx_channel_count; chan++) {
> + struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
> +
> + timer_setup(&tx_q->txtimer, stmmac_tx_timer, 0);
> + }
> }
>
> static void stmmac_set_rings_length(struct stmmac_priv *priv)
> @@ -2592,6 +2605,7 @@ static void stmmac_hw_teardown(struct net_device *dev)
> static int stmmac_open(struct net_device *dev)
> {
> struct stmmac_priv *priv = netdev_priv(dev);
> + u32 chan;
> int ret;
>
> stmmac_check_ether_addr(priv);
> @@ -2688,7 +2702,9 @@ static int stmmac_open(struct net_device *dev)
> if (dev->phydev)
> phy_stop(dev->phydev);
>
> - del_timer_sync(&priv->txtimer);
> + for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
> + del_timer_sync(&priv->tx_queue[chan].txtimer);
> +
> stmmac_hw_teardown(dev);
> init_error:
> free_dma_desc_resources(priv);
> @@ -2708,6 +2724,7 @@ static int stmmac_open(struct net_device *dev)
> static int stmmac_release(struct net_device *dev)
> {
> struct stmmac_priv *priv = netdev_priv(dev);
> + u32 chan;
>
> if (priv->eee_enabled)
> del_timer_sync(&priv->eee_ctrl_timer);
> @@ -2722,7 +2739,8 @@ static int stmmac_release(struct net_device *dev)
>
> stmmac_disable_all_queues(priv);
>
> - del_timer_sync(&priv->txtimer);
> + for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
> + del_timer_sync(&priv->tx_queue[chan].txtimer);
>
> /* Free the IRQ lines */
> free_irq(dev->irq, dev);
> @@ -2936,14 +2954,11 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
> priv->xstats.tx_tso_nfrags += nfrags;
>
> /* Manage tx mitigation */
> - priv->tx_count_frames += nfrags + 1;
> - if (likely(priv->tx_coal_frames > priv->tx_count_frames)) {
> - mod_timer(&priv->txtimer,
> - STMMAC_COAL_TIMER(priv->tx_coal_timer));
> - } else {
> - priv->tx_count_frames = 0;
> + tx_q->tx_count_frames += nfrags + 1;
> + if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
> stmmac_set_tx_ic(priv, desc);
> priv->xstats.tx_set_ic_bit++;
> + tx_q->tx_count_frames = 0;
> }
>
> skb_tx_timestamp(skb);
> @@ -2994,6 +3009,12 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
>
> stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
>
> + if (priv->tx_coal_timer && !tx_q->tx_timer_active) {
> + tx_q->tx_timer_active = 1;
> + mod_timer(&tx_q->txtimer,
> + STMMAC_COAL_TIMER(priv->tx_coal_timer));
> + }
> +
> return NETDEV_TX_OK;
>
> dma_map_err:
> @@ -3146,14 +3167,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
> * This approach takes care about the fragments: desc is the first
> * element in case of no SG.
> */
> - priv->tx_count_frames += nfrags + 1;
> - if (likely(priv->tx_coal_frames > priv->tx_count_frames)) {
> - mod_timer(&priv->txtimer,
> - STMMAC_COAL_TIMER(priv->tx_coal_timer));
> - } else {
> - priv->tx_count_frames = 0;
> + tx_q->tx_count_frames += nfrags + 1;
> + if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
> stmmac_set_tx_ic(priv, desc);
> priv->xstats.tx_set_ic_bit++;
> + tx_q->tx_count_frames = 0;
> }
>
> skb_tx_timestamp(skb);
> @@ -3199,8 +3217,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
> netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
>
> stmmac_enable_dma_transmission(priv, priv->ioaddr);
> +
> stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
>
> + if (priv->tx_coal_timer && !tx_q->tx_timer_active) {
> + tx_q->tx_timer_active = 1;
> + mod_timer(&tx_q->txtimer,
> + STMMAC_COAL_TIMER(priv->tx_coal_timer));
> + }
> +
> return NETDEV_TX_OK;
>
> dma_map_err:
> @@ -3514,27 +3539,41 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
> * Description :
> * To look at the incoming frames and clear the tx resources.
> */
> -static int stmmac_poll(struct napi_struct *napi, int budget)
> +static int stmmac_rx_poll(struct napi_struct *napi, int budget)
> {
> struct stmmac_rx_queue *rx_q =
> container_of(napi, struct stmmac_rx_queue, napi);
> struct stmmac_priv *priv = rx_q->priv_data;
> - u32 tx_count = priv->plat->tx_queues_to_use;
> u32 chan = rx_q->queue_index;
> int work_done = 0;
> - u32 queue;
>
> priv->xstats.napi_poll++;
>
> - /* check all the queues */
> - for (queue = 0; queue < tx_count; queue++)
> - stmmac_tx_clean(priv, queue);
> -
> work_done = stmmac_rx(priv, budget, rx_q->queue_index);
> + if (work_done < budget && napi_complete_done(napi, work_done))
> + stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
> +
> + return work_done;
> +}
> +
> +static int stmmac_tx_poll(struct napi_struct *napi, int budget)
> +{
> + struct stmmac_tx_queue *tx_q =
> + container_of(napi, struct stmmac_tx_queue, napi);
> + struct stmmac_priv *priv = tx_q->priv_data;
> + u32 chan = tx_q->queue_index;
> + int work_done = 0;
> + bool more;
> +
> + priv->xstats.napi_poll++;
> +
> + work_done = stmmac_tx_clean(priv, budget, chan, &more);
> if (work_done < budget) {
> napi_complete_done(napi, work_done);
> - stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
> + if (more)
> + napi_reschedule(napi);
> }
> +
> return work_done;
> }
>
> @@ -4325,10 +4364,17 @@ int stmmac_dvr_probe(struct device *device,
> for (queue = 0; queue < priv->plat->rx_queues_to_use; queue++) {
> struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
>
> - netif_napi_add(ndev, &rx_q->napi, stmmac_poll,
> + netif_napi_add(ndev, &rx_q->napi, stmmac_rx_poll,
> (8 * priv->plat->rx_queues_to_use));
> }
>
> + for (queue = 0; queue < priv->plat->tx_queues_to_use; queue++) {
> + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
> +
> + netif_napi_add(ndev, &tx_q->napi, stmmac_tx_poll,
> + (8 * priv->plat->tx_queues_to_use));
> + }
> +
> mutex_init(&priv->lock);
>
> /* If a specific clk_csr value is passed from the platform
> @@ -4377,6 +4423,11 @@ int stmmac_dvr_probe(struct device *device,
>
> netif_napi_del(&rx_q->napi);
> }
> + for (queue = 0; queue < priv->plat->tx_queues_to_use; queue++) {
> + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
> +
> + netif_napi_del(&tx_q->napi);
> + }
> error_hw_init:
> destroy_workqueue(priv->wq);
> error_wq:
>
Powered by blists - more mailing lists