[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20250205203258.1767c337@kernel.org>
Date: Wed, 5 Feb 2025 20:32:58 -0800
From: Jakub Kicinski <kuba@...nel.org>
To: Jacob Keller <jacob.e.keller@...el.com>
Cc: "netdev@...r.kernel.org" <netdev@...r.kernel.org>
Subject: Re: iAVF circular lock dependency due to netdev_lock
On Wed, 5 Feb 2025 18:27:40 -0800 Jacob Keller wrote:
> > Not sure either, the locking in this driver is quite odd. Do you know
> > why it's registering the netdev from a workqueue, and what the entry
> > points to the driver are?
>
> Yes, the locking in iAVF has been problematic for years :(
>
> We register the netdevice from a work queue because we are waiting on
> messages from the PF over virtchnl. I don't fully understand the
> motivation behind the way the initialization was moved into a work
> queue, but this appears to be the historical reasoning from examining
> commit logs.
>
> > Normally before the netdev is registered it can't get called, so all
> > the locking is moot. But IDK if we need to protect from some FW
> > interactions, maybe?
>
> We had a lot of issues with locking and pain getting to the state we are
> in today. I think part of the challenge is that the VF is communicating
> asynchronously over virtchnl queue messages to the PF for setup.
>
> Its a mess :( I could re-order the locks so we go "RTNL -> crit_lock ->
> netdev_lock" but that will only work as long as no flow from the kernel
> does something like "RTNL -> netdev_lock -> <driver callback that takes
> crit lock>" which seems unlikely :(
>
> Its a mess and I don't quite know how to dig out of it.
Stanislav suggested off list that we could add a _locked() version of
register_netdevice(). I'm worried that it's just digging a deeper hole.
We'd cover with the lock parts of core which weren't covered before.
Maybe the saving grace is that the driver appears to never transition
out of the registered state. And netdev lock only protects the core.
So we could elide taking the netdev lock if device is not registered
yet? We'd still need to convince lockdep that its okay to take the
netdev lock under crit lock once.
Code below is incomplete but hopefully it will illustrate.
Key unanswered question is how to explain this to lockdep..
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 4fe481433842..79904d49792a 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -1974,6 +1974,67 @@ static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter, bool runni
return err;
}
+enum iavf_lock_level {
+ IAVF_LOCK_REMOVING,
+ IAVF_LOCK_CONTENDED,
+ IAVF_LOCK_CRIT_ONLY,
+ IAVF_LOCK_FULL, /* CRIT + netdev_lock */
+};
+
+static enum iavf_lock_level iavf_lock(struct iavf_adapter *adapter, bool try)
+{
+ struct net_device *netdev = adapter->netdev;
+ enum iavf_lock_level ret;
+
+ if (READ_ONCE(netdev->reg_state) >= NETREG_REGISTERED) {
+ netdev_lock(netdev);
+ ret = IAVF_LOCK_FULL;
+ } else {
+ ret = IAVF_LOCK_CRIT_ONLY;
+ }
+
+ if (!try) {
+ mutex_lock(&adapter->crit_lock);
+ } else if (!mutex_trylock(&adapter->crit_lock)) {
+ if (ret == IAVF_LOCK_FULL)
+ netdev_unlock(netdev);
+
+ return adapter->state == __IAVF_REMOVE ?
+ IAVF_LOCK_REMOVING : IAVF_LOCK_CONTENDED;
+ }
+
+ /* Incredibly unlucky, we saw unregistered device yet didn't contend
+ * with registration for the crit lock. Act as if we did contend.
+ */
+ if (ret == IAVF_LOCK_CRIT_ONLY &&
+ READ_ONCE(netdev->reg_state) >= NETREG_REGISTERED) {
+ mutex_unlock(&adapter->crit_lock);
+ return IAVF_LOCK_CONTENDED;
+ }
+
+ return ret;
+}
+
+static void iavf_unlock(struct iavf_adapter *adapter, enum iavf_lock_level lock)
+{
+ struct net_device *netdev = adapter->netdev;
+
+ switch (lock) {
+ case IAVF_LOCK_REMOVING:
+ case IAVF_LOCK_CONTENDED:
+ WARN_ON_ONCE(1);
+ return;
+
+ case IAVF_LOCK_CRIT_ONLY:
+ mutex_unlock(&adapter->crit_lock);
+ break;
+ case IAVF_LOCK_FULL:
+ mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
+ break;
+ }
+}
+
/**
* iavf_finish_config - do all netdev work that needs RTNL
* @work: our work_struct
@@ -1983,7 +2044,7 @@ static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter, bool runni
static void iavf_finish_config(struct work_struct *work)
{
struct iavf_adapter *adapter;
- bool netdev_released = false;
+ enum iavf_lock_level lock;
int pairs, err;
adapter = container_of(work, struct iavf_adapter, finish_config);
@@ -1992,8 +2053,7 @@ static void iavf_finish_config(struct work_struct *work)
* The dev->lock is needed to update the queue number
*/
rtnl_lock();
- netdev_lock(adapter->netdev);
- mutex_lock(&adapter->crit_lock);
+ lock = iavf_lock(adapter, false);
if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) &&
adapter->netdev->reg_state == NETREG_REGISTERED &&
@@ -2012,8 +2072,6 @@ static void iavf_finish_config(struct work_struct *work)
netif_set_real_num_tx_queues(adapter->netdev, pairs);
if (adapter->netdev->reg_state != NETREG_REGISTERED) {
- netdev_unlock(adapter->netdev);
- netdev_released = true;
err = register_netdevice(adapter->netdev);
if (err) {
dev_err(&adapter->pdev->dev, "Unable to register netdev (%d)\n",
@@ -2040,9 +2098,7 @@ static void iavf_finish_config(struct work_struct *work)
}
out:
- mutex_unlock(&adapter->crit_lock);
- if (!netdev_released)
- netdev_unlock(adapter->netdev);
+ iavf_unlock(adapter, lock);
rtnl_unlock();
}
@@ -2737,16 +2793,18 @@ static void iavf_watchdog_task(struct work_struct *work)
watchdog_task.work);
struct net_device *netdev = adapter->netdev;
struct iavf_hw *hw = &adapter->hw;
+ enum iavf_lock_level lock;
u32 reg_val;
- netdev_lock(netdev);
- if (!mutex_trylock(&adapter->crit_lock)) {
- if (adapter->state == __IAVF_REMOVE) {
- netdev_unlock(netdev);
- return;
- }
-
+ lock = iavf_lock(adapter, true);
+ switch (lock) {
+ case IAVF_LOCK_REMOVING:
+ return;
+ case IAVF_LOCK_CONTENDED:
goto restart_watchdog;
+ case IAVF_LOCK_CRIT_ONLY:
+ case IAVF_LOCK_FULL:
+ break; /* continue */
}
if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
@@ -2755,15 +2813,13 @@ static void iavf_watchdog_task(struct work_struct *work)
switch (adapter->state) {
case __IAVF_STARTUP:
iavf_startup(adapter);
- mutex_unlock(&adapter->crit_lock);
- netdev_unlock(netdev);
+ iavf_unlock(adapter, lock);
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(30));
return;
case __IAVF_INIT_VERSION_CHECK:
iavf_init_version_check(adapter);
- mutex_unlock(&adapter->crit_lock);
- netdev_unlock(netdev);
+ iavf_unlock(adapter, lock);
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(30));
return;
@@ -2902,8 +2958,7 @@ static void iavf_watchdog_task(struct work_struct *work)
return;
}
- mutex_unlock(&adapter->crit_lock);
- netdev_unlock(netdev);
+ iavf_unlock(adapter, lock);
restart_watchdog:
if (adapter->state >= __IAVF_DOWN)
queue_work(adapter->wq, &adapter->adminq_task);
@@ -3022,6 +3077,7 @@ static void iavf_reset_task(struct work_struct *work)
struct iavf_hw *hw = &adapter->hw;
struct iavf_mac_filter *f, *ftmp;
struct iavf_cloud_filter *cf;
+ enum iavf_lock_level lock;
enum iavf_status status;
u32 reg_val;
int i = 0, err;
@@ -3030,13 +3086,16 @@ static void iavf_reset_task(struct work_struct *work)
/* When device is being removed it doesn't make sense to run the reset
* task, just return in such a case.
*/
- netdev_lock(netdev);
- if (!mutex_trylock(&adapter->crit_lock)) {
- if (adapter->state != __IAVF_REMOVE)
- queue_work(adapter->wq, &adapter->reset_task);
-
- netdev_unlock(netdev);
+ lock = iavf_lock(adapter, true);
+ switch (lock) {
+ case IAVF_LOCK_REMOVING:
return;
+ case IAVF_LOCK_CONTENDED:
+ queue_work(adapter->wq, &adapter->reset_task);
+ return;
+ case IAVF_LOCK_CRIT_ONLY:
+ case IAVF_LOCK_FULL:
+ break; /* continue */
}
iavf_misc_irq_disable(adapter);
@@ -3082,8 +3141,7 @@ static void iavf_reset_task(struct work_struct *work)
dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n",
reg_val);
iavf_disable_vf(adapter);
- mutex_unlock(&adapter->crit_lock);
- netdev_unlock(netdev);
+ iavf_unlock(adapter, lock);
return; /* Do not attempt to reinit. It's dead, Jim. */
}
@@ -3223,8 +3281,7 @@ static void iavf_reset_task(struct work_struct *work)
adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
wake_up(&adapter->reset_waitqueue);
- mutex_unlock(&adapter->crit_lock);
- netdev_unlock(netdev);
+ iavf_unlock(adapter, lock);
return;
reset_err:
@@ -3234,8 +3291,7 @@ static void iavf_reset_task(struct work_struct *work)
}
iavf_disable_vf(adapter);
- mutex_unlock(&adapter->crit_lock);
- netdev_unlock(netdev);
+ iavf_unlock(adapter, lock);
dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
}
Powered by blists - more mailing lists