[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZPXs6i2S8GSCpVOV@shell.armlinux.org.uk>
Date: Mon, 4 Sep 2023 15:42:50 +0100
From: "Russell King (Oracle)" <linux@...linux.org.uk>
To: Jijie Shao <shaojijie@...wei.com>
Cc: f.fainelli@...il.com, Andrew Lunn <andrew@...n.ch>, davem@...emloft.net,
edumazet@...gle.com, hkallweit1@...il.com, kuba@...nel.org,
netdev@...r.kernel.org, pabeni@...hat.com,
"shenjian15@...wei.com" <shenjian15@...wei.com>,
"liuyonglong@...wei.com" <liuyonglong@...wei.com>,
wangjie125@...wei.com, chenhao418@...wei.com,
Hao Lan <lanhao@...wei.com>,
"wangpeiyang1@...wei.com" <wangpeiyang1@...wei.com>
Subject: Re: [PATCH net-next] net: phy: avoid kernel warning dump when
stopping an errored PHY
On Mon, Sep 04, 2023 at 05:50:32PM +0800, Jijie Shao wrote:
> Hi all,
> We encountered an issue when resetting our netdevice recently, it seems
> related to this patch.
>
> During our process, we stop phy first and call phy_start() later.
> phy_check_link_status returns error because it read mdio failed. The
> reason why it happened is that the cmdq is unusable when we reset and we
> can't access to mdio.
Are you suggesting that the sequence is:
phy_stop();
reset netdev
phy_start();
?
Is the reason for doing this because you've already detected an issue
with the hardware, and you're trying to recover it - and before you've
called phy_stop() the hardware is already dead?
If that is the case, I'm not really sure what you expect to happen
here. You've identified a race where the state machine is running in
unison with phy_stop(), but in this circumstance it is also possible
that the state machine could complete executing and have called
phy_error_precise() before phy_stop() has even been called. In that
case, you'll still get a warning-splat on the console from
phy_error_precise().
The only difference is that phy_stop() won't warn.
That all said, this is obviously buggy, because phy_stop() has set
the phydev state to PHY_HALTED and the state machine has unexpectedly
changed its state.
I wonder whether we should be tracking the phy_start/stop state
separately, since we've had issues with phy_stop() warning when an
error has occurred (see commit 59088b5a946e).
Maybe something like this (untested)?
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index df54c137c5f5..d57f6de8a562 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -810,7 +810,8 @@ int phy_start_cable_test(struct phy_device *phydev,
goto out;
}
- if (phydev->state < PHY_UP ||
+ if (phydev->oper_state != PHY_OPER_STARTED ||
+ phydev->state < PHY_UP ||
phydev->state > PHY_CABLETEST) {
NL_SET_ERR_MSG(extack,
"PHY not configured. Try setting interface up");
@@ -881,7 +882,8 @@ int phy_start_cable_test_tdr(struct phy_device *phydev,
goto out;
}
- if (phydev->state < PHY_UP ||
+ if (phydev->oper_state != PHY_OPER_STARTED ||
+ phydev->state < PHY_UP ||
phydev->state > PHY_CABLETEST) {
NL_SET_ERR_MSG(extack,
"PHY not configured. Try setting interface up");
@@ -1364,10 +1366,8 @@ void phy_stop(struct phy_device *phydev)
struct net_device *dev = phydev->attached_dev;
enum phy_state old_state;
- if (!phy_is_started(phydev) && phydev->state != PHY_DOWN &&
- phydev->state != PHY_ERROR) {
- WARN(1, "called from state %s\n",
- phy_state_to_str(phydev->state));
+ if (phydev->oper_state != PHY_OPER_STARTED) {
+ WARN(1, "called when not started\n");
return;
}
@@ -1382,6 +1382,7 @@ void phy_stop(struct phy_device *phydev)
if (phydev->sfp_bus)
sfp_upstream_stop(phydev->sfp_bus);
+ phydev->oper_state = PHY_OPER_STOPPED;
phydev->state = PHY_HALTED;
phy_process_state_change(phydev, old_state);
@@ -1411,9 +1412,8 @@ void phy_start(struct phy_device *phydev)
{
mutex_lock(&phydev->lock);
- if (phydev->state != PHY_READY && phydev->state != PHY_HALTED) {
- WARN(1, "called from state %s\n",
- phy_state_to_str(phydev->state));
+ if (phydev->oper_state != PHY_OPER_STOPPED) {
+ WARN(1, "called when not stopped\n");
goto out;
}
@@ -1423,6 +1423,7 @@ void phy_start(struct phy_device *phydev)
/* if phy was suspended, bring the physical link up again */
__phy_resume(phydev);
+ phydev->oper_state = PHY_OPER_STARTED;
phydev->state = PHY_UP;
phy_start_machine(phydev);
@@ -1442,14 +1443,18 @@ void phy_state_machine(struct work_struct *work)
container_of(dwork, struct phy_device, state_queue);
struct net_device *dev = phydev->attached_dev;
bool needs_aneg = false, do_suspend = false;
- enum phy_state old_state;
+ enum phy_state old_state, state;
const void *func = NULL;
bool finished = false;
int err = 0;
mutex_lock(&phydev->lock);
- old_state = phydev->state;
+ state = old_state = phydev->state;
+
+ /* If the PHY is stopped, then force state to halted. */
+ if (phydev->oper_state == PHY_OPER_STOPPED)
+ state = PHY_HALTED;
switch (phydev->state) {
case PHY_DOWN:
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 5dcab361a220..b128d903adb3 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -519,6 +519,11 @@ enum phy_state {
PHY_CABLETEST,
};
+enum phy_oper_state {
+ PHY_OPER_STOPPED,
+ PHY_OPER_STARTED,
+};
+
#define MDIO_MMD_NUM 32
/**
@@ -670,6 +675,7 @@ struct phy_device {
int rate_matching;
enum phy_state state;
+ enum phy_oper_state oper_state;
u32 dev_flags;
@@ -1221,7 +1227,8 @@ int phy_speed_down_core(struct phy_device *phydev);
*/
static inline bool phy_is_started(struct phy_device *phydev)
{
- return phydev->state >= PHY_UP;
+ return phydev->oper_state == PHY_OPER_STARTED &&
+ phydev->state >= PHY_UP;
}
void phy_resolve_aneg_pause(struct phy_device *phydev);
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!
Powered by blists - more mailing lists