[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <d46a4d9c-7253-4318-90b7-08646493e7db@nvidia.com>
Date: Thu, 8 Feb 2024 13:11:16 +0000
From: Jon Hunter <jonathanh@...dia.com>
To: Simon Horman <horms@...nel.org>
Cc: Furong Xu <0x1207@...il.com>, "David S. Miller" <davem@...emloft.net>,
Alexandre Torgue <alexandre.torgue@...s.st.com>,
Jose Abreu <joabreu@...opsys.com>, Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>,
Maxime Coquelin <mcoquelin.stm32@...il.com>, Joao Pinto
<jpinto@...opsys.com>, Serge Semin <fancer.lancer@...il.com>,
netdev@...r.kernel.org, linux-stm32@...md-mailman.stormreply.com,
linux-arm-kernel@...ts.infradead.org, linux-kernel@...r.kernel.org,
xfr@...look.com, rock.xu@....com,
"linux-tegra@...r.kernel.org" <linux-tegra@...r.kernel.org>
Subject: Re: [PATCH net v4] net: stmmac: xgmac: fix handling of DPP safety
error for DMA channels
On 08/02/2024 09:53, Simon Horman wrote:
> On Thu, Feb 08, 2024 at 09:26:27AM +0000, Simon Horman wrote:
>> On Wed, Feb 07, 2024 at 11:56:26AM +0000, Jon Hunter wrote:
>>>
>>> On 03/02/2024 05:14, Furong Xu wrote:
>>>> Commit 56e58d6c8a56 ("net: stmmac: Implement Safety Features in
>>>> XGMAC core") checks and reports safety errors, but leaves the
>>>> Data Path Parity Errors for each channel in DMA unhandled at all, lead to
>>>> a storm of interrupt.
>>>> Fix it by checking and clearing the DMA_DPP_Interrupt_Status register.
>>>>
>>>> Fixes: 56e58d6c8a56 ("net: stmmac: Implement Safety Features in XGMAC core")
>>>> Signed-off-by: Furong Xu <0x1207@...il.com>
>>>> Reviewed-by: Simon Horman <horms@...nel.org>
>>>> Reviewed-by: Serge Semin <fancer.lancer@...il.com>
>>>> ---
>>>> Changes in v4:
>>>> - fix a typo name of DDPP bit, thanks Serge Semin
>>>>
>>>> Changes in v3:
>>>> - code style fix, thanks Paolo Abeni
>>>>
>>>> Changes in v2:
>>>> - explicit enable Data Path Parity Protection
>>>> - add new counters to stmmac_safety_stats
>>>> - add detailed log
>>>> ---
>>>> drivers/net/ethernet/stmicro/stmmac/common.h | 1 +
>>>> .../net/ethernet/stmicro/stmmac/dwxgmac2.h | 3 +
>>>> .../ethernet/stmicro/stmmac/dwxgmac2_core.c | 57 ++++++++++++++++++-
>>>> 3 files changed, 60 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
>>>> index 721c1f8e892f..b4f60ab078d6 100644
>>>> --- a/drivers/net/ethernet/stmicro/stmmac/common.h
>>>> +++ b/drivers/net/ethernet/stmicro/stmmac/common.h
>>>> @@ -216,6 +216,7 @@ struct stmmac_safety_stats {
>>>> unsigned long mac_errors[32];
>>>> unsigned long mtl_errors[32];
>>>> unsigned long dma_errors[32];
>>>> + unsigned long dma_dpp_errors[32];
>>>> };
>>>> /* Number of fields in Safety Stats */
>>>> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
>>>> index 207ff1799f2c..5c67a3f89f08 100644
>>>> --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
>>>> +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
>>>> @@ -303,6 +303,8 @@
>>>> #define XGMAC_RXCEIE BIT(4)
>>>> #define XGMAC_TXCEIE BIT(0)
>>>> #define XGMAC_MTL_ECC_INT_STATUS 0x000010cc
>>>> +#define XGMAC_MTL_DPP_CONTROL 0x000010e0
>>>> +#define XGMAC_DPP_DISABLE BIT(0)
>>>> #define XGMAC_MTL_TXQ_OPMODE(x) (0x00001100 + (0x80 * (x)))
>>>> #define XGMAC_TQS GENMASK(25, 16)
>>>> #define XGMAC_TQS_SHIFT 16
>>>> @@ -385,6 +387,7 @@
>>>> #define XGMAC_DCEIE BIT(1)
>>>> #define XGMAC_TCEIE BIT(0)
>>>> #define XGMAC_DMA_ECC_INT_STATUS 0x0000306c
>>>> +#define XGMAC_DMA_DPP_INT_STATUS 0x00003074
>>>> #define XGMAC_DMA_CH_CONTROL(x) (0x00003100 + (0x80 * (x)))
>>>> #define XGMAC_SPH BIT(24)
>>>> #define XGMAC_PBLx8 BIT(16)
>>>> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
>>>> index eb48211d9b0e..04d7c4dc2e35 100644
>>>> --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
>>>> +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
>>>> @@ -830,6 +830,43 @@ static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= {
>>>> { false, "UNKNOWN", "Unknown Error" }, /* 31 */
>>>> };
>>>> +static const char * const dpp_rx_err = "Read Rx Descriptor Parity checker Error";
>>>> +static const char * const dpp_tx_err = "Read Tx Descriptor Parity checker Error";
>>>> +static const struct dwxgmac3_error_desc dwxgmac3_dma_dpp_errors[32] = {
>>>> + { true, "TDPES0", dpp_tx_err },
>>>> + { true, "TDPES1", dpp_tx_err },
>>>> + { true, "TDPES2", dpp_tx_err },
>>>> + { true, "TDPES3", dpp_tx_err },
>>>> + { true, "TDPES4", dpp_tx_err },
>>>> + { true, "TDPES5", dpp_tx_err },
>>>> + { true, "TDPES6", dpp_tx_err },
>>>> + { true, "TDPES7", dpp_tx_err },
>>>> + { true, "TDPES8", dpp_tx_err },
>>>> + { true, "TDPES9", dpp_tx_err },
>>>> + { true, "TDPES10", dpp_tx_err },
>>>> + { true, "TDPES11", dpp_tx_err },
>>>> + { true, "TDPES12", dpp_tx_err },
>>>> + { true, "TDPES13", dpp_tx_err },
>>>> + { true, "TDPES14", dpp_tx_err },
>>>> + { true, "TDPES15", dpp_tx_err },
>>>> + { true, "RDPES0", dpp_rx_err },
>>>> + { true, "RDPES1", dpp_rx_err },
>>>> + { true, "RDPES2", dpp_rx_err },
>>>> + { true, "RDPES3", dpp_rx_err },
>>>> + { true, "RDPES4", dpp_rx_err },
>>>> + { true, "RDPES5", dpp_rx_err },
>>>> + { true, "RDPES6", dpp_rx_err },
>>>> + { true, "RDPES7", dpp_rx_err },
>>>> + { true, "RDPES8", dpp_rx_err },
>>>> + { true, "RDPES9", dpp_rx_err },
>>>> + { true, "RDPES10", dpp_rx_err },
>>>> + { true, "RDPES11", dpp_rx_err },
>>>> + { true, "RDPES12", dpp_rx_err },
>>>> + { true, "RDPES13", dpp_rx_err },
>>>> + { true, "RDPES14", dpp_rx_err },
>>>> + { true, "RDPES15", dpp_rx_err },
>>>> +};
>>>> +
>>>> static void dwxgmac3_handle_dma_err(struct net_device *ndev,
>>>> void __iomem *ioaddr, bool correctable,
>>>> struct stmmac_safety_stats *stats)
>>>> @@ -841,6 +878,13 @@ static void dwxgmac3_handle_dma_err(struct net_device *ndev,
>>>> dwxgmac3_log_error(ndev, value, correctable, "DMA",
>>>> dwxgmac3_dma_errors, STAT_OFF(dma_errors), stats);
>>>> +
>>>> + value = readl(ioaddr + XGMAC_DMA_DPP_INT_STATUS);
>>>> + writel(value, ioaddr + XGMAC_DMA_DPP_INT_STATUS);
>>>> +
>>>> + dwxgmac3_log_error(ndev, value, false, "DMA_DPP",
>>>> + dwxgmac3_dma_dpp_errors,
>>>> + STAT_OFF(dma_dpp_errors), stats);
>>>> }
>>>> static int
>>>> @@ -881,6 +925,12 @@ dwxgmac3_safety_feat_config(void __iomem *ioaddr, unsigned int asp,
>>>> value |= XGMAC_TMOUTEN; /* FSM Timeout Feature */
>>>> writel(value, ioaddr + XGMAC_MAC_FSM_CONTROL);
>>>> + /* 5. Enable Data Path Parity Protection */
>>>> + value = readl(ioaddr + XGMAC_MTL_DPP_CONTROL);
>>>> + /* already enabled by default, explicit enable it again */
>>>> + value &= ~XGMAC_DPP_DISABLE;
>>>> + writel(value, ioaddr + XGMAC_MTL_DPP_CONTROL);
>>>> +
>>>> return 0;
>>>> }
>>>> @@ -914,7 +964,11 @@ static int dwxgmac3_safety_feat_irq_status(struct net_device *ndev,
>>>> ret |= !corr;
>>>> }
>>>> - err = dma & (XGMAC_DEUIS | XGMAC_DECIS);
>>>> + /* DMA_DPP_Interrupt_Status is indicated by MCSIS bit in
>>>> + * DMA_Safety_Interrupt_Status, so we handle DMA Data Path
>>>> + * Parity Errors here
>>>> + */
>>>> + err = dma & (XGMAC_DEUIS | XGMAC_DECIS | XGMAC_MCSIS);
>>>> corr = dma & XGMAC_DECIS;
>>>> if (err) {
>>>> dwxgmac3_handle_dma_err(ndev, ioaddr, corr, stats);
>>>> @@ -930,6 +984,7 @@ static const struct dwxgmac3_error {
>>>> { dwxgmac3_mac_errors },
>>>> { dwxgmac3_mtl_errors },
>>>> { dwxgmac3_dma_errors },
>>>> + { dwxgmac3_dma_dpp_errors },
>>>> };
>>>> static int dwxgmac3_safety_feat_dump(struct stmmac_safety_stats *stats,
>>>
>>>
>>> This change is breaking the build on some of our builders that are still using GCC 6.x ...
>>>
>>> drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c:836:20: error: initialiser element is not constant
>>> { true, "TDPES0", dpp_tx_err },
>>> ^~~~~~~~~~
>>> drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c:836:20: note: (near initialisation for ‘dwxgmac3_dma_dpp_errors[0].detailed_desc’)
>>> drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c:837:20: error: initialiser element is not constant
>>> { true, "TDPES1", dpp_tx_err },
>>> ^~~~~~~~~~
>>> drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c:837:20: note: (near initialisation for ‘dwxgmac3_dma_dpp_errors[1].detailed_desc’)
>>> ...
>>>
>>> I know that this is quite old but the minimum supported by the kernel is v5.1 ...
>>>
>>> https://www.kernel.org/doc/html/next/process/changes.html
>>
>> Thanks Jon,
>>
>> I separately received a notification about this occurring with gcc 7.
>>
>> https://lore.kernel.org/oe-kbuild-all/202402081135.lAxxBXHk-lkp@intel.com/
>>
>> It is unclear to me why this occurs, as dpp_tx_err and dpp_tx_err are const.
>> But I do seem to be able to address this problem by using #defines for
>> these values instead.
>>
>> I plan to post a patch shortly.
>
> Patch posted:
> - [PATCH net] net: stmmac: xgmac: use #define for string constants
> https://lore.kernel.org/netdev/20240208-xgmac-const-v1-1-e69a1eeabfc8@kernel.org/
>
Thanks for fixing! Works for me.
Jon
--
nvpublic
Powered by blists - more mailing lists