lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 28 Dec 2021 20:25:54 +0100
From:   Heiner Kallweit <hkallweit1@...il.com>
To:     Thomas Gleixner <tglx@...utronix.de>,
        kernel test robot <oliver.sang@...el.com>
Cc:     Michael Kelley <mikelley@...rosoft.com>,
        Nishanth Menon <nm@...com>, Jason Gunthorpe <jgg@...dia.com>,
        LKML <linux-kernel@...r.kernel.org>, x86@...nel.org,
        lkp@...ts.01.org, lkp@...el.com, nic_swsd@...ltek.com,
        "David S. Miller" <davem@...emloft.net>,
        Jakub Kicinski <kuba@...nel.org>, netdev@...r.kernel.org
Subject: Re: [genirq/msi] 495c66aca3:
 BUG:sleeping_function_called_from_invalid_context_at_kernel/locking/mutex.c

On 28.12.2021 19:40, Thomas Gleixner wrote:
> On Mon, Dec 27 2021 at 23:05, kernel test robot wrote:
>>
>> FYI, we noticed the following commit (built with gcc-9):
>>
>> commit: 495c66aca3da704e063fa373fdbe371e71d3f4ee ("genirq/msi: Convert to new functions")
>> https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git irq/msi
>> kern  :err   : [  126.209306] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:280
>> kern  :err   : [  126.209308] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 5183, name: ls
>> kern  :err   : [  126.209311] preempt_count: 2, expected: 0
>> kern  :warn  : [  126.209312] CPU: 2 PID: 5183 Comm: ls Not tainted 5.16.0-rc5-00091-g495c66aca3da #1
>> kern  :warn  : [  126.209315] Hardware name: Hewlett-Packard HP Pro 3340 MT/17A1, BIOS 8.07 01/24/2013
>> kern  :warn  : [  126.209316] Call Trace:
>> kern  :warn  : [  126.209318]  <TASK>
>> kern :warn : [  126.209319] dump_stack_lvl (lib/dump_stack.c:107) 
>> kern :warn : [  126.209323] __might_resched.cold (kernel/sched/core.c:9539 kernel/sched/core.c:9492) 
>> kern :warn : [  126.209326] ? kasan_unpoison (mm/kasan/shadow.c:108 mm/kasan/shadow.c:142) 
>> kern :warn : [  126.209330] mutex_lock (kernel/locking/mutex.c:280) 
>> kern :warn : [  126.209335] ? __mutex_lock_slowpath (kernel/locking/mutex.c:279) 
>> kern :warn : [  126.209339] ? _raw_spin_lock_irqsave (arch/x86/include/asm/atomic.h:202 include/linux/atomic/atomic-instrumented.h:513 include/asm-generic/qspinlock.h:82 include/linux/spinlock.h:185 include/linux/spinlock_api_smp.h:111 kernel/locking/spinlock.c:162) 
>> kern :warn : [  126.209342] ? _raw_read_unlock_irqrestore (kernel/locking/spinlock.c:161) 
>> kern :warn : [  126.209344] msi_get_virq (kernel/irq/msi.c:332) 
>> kern :warn : [  126.209349] pci_irq_vector (drivers/pci/msi/msi.c:1085 drivers/pci/msi/msi.c:1077) 
>> kern :warn : [  126.209354] rtl8169_netpoll (drivers/net/ethernet/realtek/r8169_main.c:4722) 
>> kern :warn : [  126.209358] netpoll_poll_dev (net/core/netpoll.c:166 net/core/netpoll.c:195) 
>> kern :warn : [  126.209363] netpoll_send_skb (net/core/netpoll.c:350 net/core/netpoll.c:376) 
>> kern :warn : [  126.209367] write_msg (drivers/net/netconsole.c:862 drivers/net/netconsole.c:836) netconsole
> 
> Fix below.
> 
> Thanks,
> 
>         tglx
> ---
>  drivers/net/ethernet/realtek/r8169_main.c |   14 +++++++-------
>  1 file changed, 7 insertions(+), 7 deletions(-)
> 
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -615,6 +615,7 @@ struct rtl8169_private {
>  	struct ring_info tx_skb[NUM_TX_DESC];	/* Tx data buffers */
>  	u16 cp_cmd;
>  	u32 irq_mask;
> +	int irq;
>  	struct clk *clk;
>  
>  	struct {
> @@ -4698,7 +4699,7 @@ static int rtl8169_close(struct net_devi
>  
>  	cancel_work_sync(&tp->wk.work);
>  
> -	free_irq(pci_irq_vector(pdev, 0), tp);
> +	free_irq(tp->irq, tp);
>  
>  	phy_disconnect(tp->phydev);
>  
> @@ -4719,7 +4720,7 @@ static void rtl8169_netpoll(struct net_d
>  {
>  	struct rtl8169_private *tp = netdev_priv(dev);
>  
> -	rtl8169_interrupt(pci_irq_vector(tp->pci_dev, 0), tp);
> +	rtl8169_interrupt(tp->irq, tp);
>  }
>  #endif
>  
> @@ -4753,8 +4754,7 @@ static int rtl_open(struct net_device *d
>  	rtl_request_firmware(tp);
>  
>  	irqflags = pci_dev_msi_enabled(pdev) ? IRQF_NO_THREAD : IRQF_SHARED;
> -	retval = request_irq(pci_irq_vector(pdev, 0), rtl8169_interrupt,
> -			     irqflags, dev->name, tp);
> +	retval = request_irq(tp->irq, rtl8169_interrupt, irqflags, dev->name, tp);
>  	if (retval < 0)
>  		goto err_release_fw_2;
>  
> @@ -4771,7 +4771,7 @@ static int rtl_open(struct net_device *d
>  	return retval;
>  
>  err_free_irq:
> -	free_irq(pci_irq_vector(pdev, 0), tp);
> +	free_irq(tp->irq, tp);
>  err_release_fw_2:
>  	rtl_release_firmware(tp);
>  	rtl8169_rx_clear(tp);
> @@ -5341,6 +5341,7 @@ static int rtl_init_one(struct pci_dev *
>  		dev_err(&pdev->dev, "Can't allocate interrupt\n");
>  		return rc;
>  	}
> +	tp->irq = pci_irq_vector(pdev, 0);
>  
>  	INIT_WORK(&tp->wk.work, rtl_task);
>  
> @@ -5416,8 +5417,7 @@ static int rtl_init_one(struct pci_dev *
>  		return rc;
>  
>  	netdev_info(dev, "%s, %pM, XID %03x, IRQ %d\n",
> -		    rtl_chip_infos[chipset].name, dev->dev_addr, xid,
> -		    pci_irq_vector(pdev, 0));
> +		    rtl_chip_infos[chipset].name, dev->dev_addr, xid, tp->irq);
>  
>  	if (jumbo_max)
>  		netdev_info(dev, "jumbo features [frames: %d bytes, tx checksumming: %s]\n",

Thanks for the patch, I'll submit it with your SoB.

Apart from pci_irq_vector() incl. underlying msi_get_virq(), are there more functions
that must not be called from atomic context any longer? Maybe the new constraint
should be added to kernel-doc of affected functions?

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ