lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <96939b80-b789-41a6-bea6-78f16833bbc9@intel.com>
Date: Mon, 22 Apr 2024 16:32:01 -0700
From: Jacob Keller <jacob.e.keller@...el.com>
To: Tony Nguyen <anthony.l.nguyen@...el.com>, <davem@...emloft.net>,
	<kuba@...nel.org>, <pabeni@...hat.com>, <edumazet@...gle.com>,
	<netdev@...r.kernel.org>
CC: Lukas Wunner <lukas@...ner.de>, <sasha.neftin@...el.com>, Roman Lozko
	<lozko.roma@...il.com>, Marek Marczykowski-Górecki
	<marmarek@...isiblethingslab.com>, Kurt Kanzenbach <kurt@...utronix.de>,
	Heiner Kallweit <hkallweit1@...il.com>, Simon Horman <horms@...nel.org>,
	Naama Meir <naamax.meir@...ux.intel.com>
Subject: Re: [PATCH net] igc: Fix LED-related deadlock on driver unbind



On 4/22/2024 1:45 PM, Tony Nguyen wrote:
> From: Lukas Wunner <lukas@...ner.de>
> 
> Roman reports a deadlock on unplug of a Thunderbolt docking station
> containing an Intel I225 Ethernet adapter.
> 
> The root cause is that led_classdev's for LEDs on the adapter are
> registered such that they're device-managed by the netdev.  That
> results in recursive acquisition of the rtnl_lock() mutex on unplug:
> 
> When the driver calls unregister_netdev(), it acquires rtnl_lock(),
> then frees the device-managed resources.  Upon unregistering the LEDs,
> netdev_trig_deactivate() invokes unregister_netdevice_notifier(),
> which tries to acquire rtnl_lock() again.
> 
> Avoid by using non-device-managed LED registration.
> 

Could we instead switch to using devm with the PCI device struct instead
of the netdev struct? That would make it still get automatically cleaned
up, but by cleaning it up only when the PCIe device goes away, which
should be after rtnl_lock() is released..

I guess I don't have an objection to this patch in principle since it
does resolve the issue, but it seems like it would be simpler to switch
which device managed the resources vs re-adding the manual handling.

Thanks,
Jake

> Stack trace for posterity:
> 
>   schedule+0x6e/0xf0
>   schedule_preempt_disabled+0x15/0x20
>   __mutex_lock+0x2a0/0x750
>   unregister_netdevice_notifier+0x40/0x150
>   netdev_trig_deactivate+0x1f/0x60 [ledtrig_netdev]
>   led_trigger_set+0x102/0x330
>   led_classdev_unregister+0x4b/0x110
>   release_nodes+0x3d/0xb0
>   devres_release_all+0x8b/0xc0
>   device_del+0x34f/0x3c0
>   unregister_netdevice_many_notify+0x80b/0xaf0
>   unregister_netdev+0x7c/0xd0
>   igc_remove+0xd8/0x1e0 [igc]
>   pci_device_remove+0x3f/0xb0
> 
> Fixes: ea578703b03d ("igc: Add support for LEDs on i225/i226")
> Reported-by: Roman Lozko <lozko.roma@...il.com>
> Closes: https://lore.kernel.org/r/CAEhC_B=ksywxCG_+aQqXUrGEgKq+4mqnSV8EBHOKbC3-Obj9+Q@mail.gmail.com/
> Reported-by: "Marek Marczykowski-Górecki" <marmarek@...isiblethingslab.com>
> Closes: https://lore.kernel.org/r/ZhRD3cOtz5i-61PB@mail-itl/
> Signed-off-by: Kurt Kanzenbach <kurt@...utronix.de>
> Signed-off-by: Lukas Wunner <lukas@...ner.de>
> Cc: Heiner Kallweit <hkallweit1@...il.com>
> Reviewed-by: Simon Horman <horms@...nel.org>
> Reviewed-by: Kurt Kanzenbach <kurt@...utronix.de>
> Tested-by: Kurt Kanzenbach <kurt@...utronix.de> # Intel i225
> Tested-by: Naama Meir <naamax.meir@...ux.intel.com>
> Signed-off-by: Tony Nguyen <anthony.l.nguyen@...el.com>
> ---
>  drivers/net/ethernet/intel/igc/igc.h      |  2 ++
>  drivers/net/ethernet/intel/igc/igc_leds.c | 38 ++++++++++++++++++-----
>  drivers/net/ethernet/intel/igc/igc_main.c |  3 ++
>  3 files changed, 35 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
> index 90316dc58630..6bc56c7c181e 100644
> --- a/drivers/net/ethernet/intel/igc/igc.h
> +++ b/drivers/net/ethernet/intel/igc/igc.h
> @@ -298,6 +298,7 @@ struct igc_adapter {
>  
>  	/* LEDs */
>  	struct mutex led_mutex;
> +	struct igc_led_classdev *leds;
>  };
>  
>  void igc_up(struct igc_adapter *adapter);
> @@ -723,6 +724,7 @@ void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts);
>  void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter);
>  
>  int igc_led_setup(struct igc_adapter *adapter);
> +void igc_led_free(struct igc_adapter *adapter);
>  
>  #define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring))
>  
> diff --git a/drivers/net/ethernet/intel/igc/igc_leds.c b/drivers/net/ethernet/intel/igc/igc_leds.c
> index bf240c5daf86..3929b25b6ae6 100644
> --- a/drivers/net/ethernet/intel/igc/igc_leds.c
> +++ b/drivers/net/ethernet/intel/igc/igc_leds.c
> @@ -236,8 +236,8 @@ static void igc_led_get_name(struct igc_adapter *adapter, int index, char *buf,
>  		 pci_dev_id(adapter->pdev), index);
>  }
>  
> -static void igc_setup_ldev(struct igc_led_classdev *ldev,
> -			   struct net_device *netdev, int index)
> +static int igc_setup_ldev(struct igc_led_classdev *ldev,
> +			  struct net_device *netdev, int index)
>  {
>  	struct igc_adapter *adapter = netdev_priv(netdev);
>  	struct led_classdev *led_cdev = &ldev->led;
> @@ -257,24 +257,46 @@ static void igc_setup_ldev(struct igc_led_classdev *ldev,
>  	led_cdev->hw_control_get = igc_led_hw_control_get;
>  	led_cdev->hw_control_get_device = igc_led_hw_control_get_device;
>  
> -	devm_led_classdev_register(&netdev->dev, led_cdev);
> +	return led_classdev_register(&netdev->dev, led_cdev);
>  }
>  
>  int igc_led_setup(struct igc_adapter *adapter)
>  {
>  	struct net_device *netdev = adapter->netdev;
> -	struct device *dev = &netdev->dev;
>  	struct igc_led_classdev *leds;
> -	int i;
> +	int i, err;
>  
>  	mutex_init(&adapter->led_mutex);
>  
> -	leds = devm_kcalloc(dev, IGC_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
> +	leds = kcalloc(IGC_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
>  	if (!leds)
>  		return -ENOMEM;
>  
> -	for (i = 0; i < IGC_NUM_LEDS; i++)
> -		igc_setup_ldev(leds + i, netdev, i);
> +	for (i = 0; i < IGC_NUM_LEDS; i++) {
> +		err = igc_setup_ldev(leds + i, netdev, i);
> +		if (err)
> +			goto err;
> +	}
> +
> +	adapter->leds = leds;
>  
>  	return 0;
> +
> +err:
> +	for (i--; i >= 0; i--)
> +		led_classdev_unregister(&((leds + i)->led));
> +
> +	kfree(leds);
> +	return err;
> +}
> +
> +void igc_led_free(struct igc_adapter *adapter)
> +{
> +	struct igc_led_classdev *leds = adapter->leds;
> +	int i;
> +
> +	for (i = 0; i < IGC_NUM_LEDS; i++)
> +		led_classdev_unregister(&((leds + i)->led));
> +
> +	kfree(leds);
>  }
> diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
> index 35ad40a803cb..4d975d620a8e 100644
> --- a/drivers/net/ethernet/intel/igc/igc_main.c
> +++ b/drivers/net/ethernet/intel/igc/igc_main.c
> @@ -7021,6 +7021,9 @@ static void igc_remove(struct pci_dev *pdev)
>  	cancel_work_sync(&adapter->watchdog_task);
>  	hrtimer_cancel(&adapter->hrtimer);
>  
> +	if (IS_ENABLED(CONFIG_IGC_LEDS))
> +		igc_led_free(adapter);
> +
>  	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
>  	 * would have already happened in close and is redundant.
>  	 */

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ