[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20190215121118.vfaijy7fxqc3qcd3@shell.armlinux.org.uk>
Date: Fri, 15 Feb 2019 12:11:18 +0000
From: Russell King - ARM Linux admin <linux@...linux.org.uk>
To: Andrew Lunn <andrew@...n.ch>,
Vivien Didelot <vivien.didelot@...il.com>
Cc: netdev@...r.kernel.org
Subject: Re: [BUG] 4.20: mv88e6xxx: WARNING: possible circular locking
dependency detected
On Fri, Feb 15, 2019 at 11:46:17AM +0000, Russell King - ARM Linux admin wrote:
> Hi Andrew, Vivien,
>
> I decided to try adding support for the DSA switch interrupt on
> SolidRun's Clearfog platform, but I notice having done so I get:
>
> WARNING: possible circular locking dependency detected
> 4.20.0+ #297 Not tainted
> ------------------------------------------------------
> systemd-udevd/157 is trying to acquire lock:
> ecc4a080 (&chip->reg_lock){+.+.}, at: __setup_irq+0x640/0x704
>
> but task is already holding lock:
> edf9c940 (&desc->request_mutex){+.+.}, at: __setup_irq+0xa0/0x704
>
> which lock already depends on the new lock.
>
>
> the existing dependency chain (in reverse order) is:
>
> -> #1 (&desc->request_mutex){+.+.}:
> mutex_lock_nested+0x1c/0x24
> __setup_irq+0xa0/0x704
> request_threaded_irq+0xd0/0x150
> mv88e6xxx_probe+0x41c/0x694 [mv88e6xxx]
> mdio_probe+0x2c/0x54
> really_probe+0x200/0x2c4
> driver_probe_device+0x5c/0x174
> __driver_attach+0xd8/0xdc
> bus_for_each_dev+0x58/0x7c
> bus_add_driver+0xe4/0x1f0
> driver_register+0x7c/0x110
> mdio_driver_register+0x24/0x58
> do_one_initcall+0x74/0x2e8
> do_init_module+0x60/0x1d0
> load_module+0x1968/0x1ff4
> sys_finit_module+0x8c/0x98
> ret_fast_syscall+0x0/0x28
> 0xbee82ae8
>
> -> #0 (&chip->reg_lock){+.+.}:
> __mutex_lock+0x50/0x8b8
> mutex_lock_nested+0x1c/0x24
> __setup_irq+0x640/0x704
> request_threaded_irq+0xd0/0x150
> mv88e6xxx_g2_irq_setup+0xcc/0x1b4 [mv88e6xxx]
> mv88e6xxx_probe+0x44c/0x694 [mv88e6xxx]
> mdio_probe+0x2c/0x54
> really_probe+0x200/0x2c4
> driver_probe_device+0x5c/0x174
> __driver_attach+0xd8/0xdc
> bus_for_each_dev+0x58/0x7c
> bus_add_driver+0xe4/0x1f0
> driver_register+0x7c/0x110
> mdio_driver_register+0x24/0x58
> do_one_initcall+0x74/0x2e8
> do_init_module+0x60/0x1d0
> load_module+0x1968/0x1ff4
> sys_finit_module+0x8c/0x98
> ret_fast_syscall+0x0/0x28
> 0xbee82ae8
>
> other info that might help us debug this:
>
> Possible unsafe locking scenario:
>
> CPU0 CPU1
> ---- ----
> lock(&desc->request_mutex);
> lock(&chip->reg_lock);
> lock(&desc->request_mutex);
> lock(&chip->reg_lock);
>
> *** DEADLOCK ***
>
> 2 locks held by systemd-udevd/157:
> #0: ee040868 (&dev->mutex){....}, at: __driver_attach+0x70/0xdc
> ------------[ cut here ]------------
> #1: edf9c940 (&desc->request_mutex){+.+.}, at: __setup_irq+0xa0/0x704
> WARNING: CPU: 0 PID: 152 at kernel/locking/lockdep.c:355
> stack backtrace:
> downgrading a read lock
> Modules linked in:
> CPU: 1 PID: 157 Comm: systemd-udevd Not tainted 4.20.0+ #297
> marvell_cesa(+) mv88e6xxx(+) dsa_core devlink xhci_plat_hcd(+) xhci_hcd armada_Hardware name: Marvell Armada 380/385 (Device Tree)
> [<c0019638>] (unwind_backtrace) from [<c0014888>] (show_stack+0x10/0x14)
> [<c0014888>] (show_stack) from [<c07f55c0>] (dump_stack+0x9c/0xd4)
> [<c07f55c0>] (dump_stack) from [<c0088afc>] (print_circular_bug+0x284/0x2d8)
> [<c0088afc>] (print_circular_bug) from [<c0086b5c>] (__lock_acquire+0x15d4/0x19b[<c0086b5c>] (__lock_acquire) from [<c0087828>] (lock_acquire+0xc4/0x1dc)
> [<c0087828>] (lock_acquire) from [<c080fe68>] (__mutex_lock+0x50/0x8b8)
> [<c080fe68>] (__mutex_lock) from [<c0810758>] (mutex_lock_nested+0x1c/0x24)
> [<c0810758>] (mutex_lock_nested) from [<c009e060>] (__setup_irq+0x640/0x704)
> [<c009e060>] (__setup_irq) from [<c009e2e0>] (request_threaded_irq+0xd0/0x150)
> [<c009e2e0>] (request_threaded_irq) from [<bf0dc970>] (mv88e6xxx_g2_irq_setup+0x[<bf0dc970>] (mv88e6xxx_g2_irq_setup [mv88e6xxx]) from [<bf0d5a90>] (mv88e6xxx_p[<bf0d5a90>] (mv88e6xxx_probe [mv88e6xxx]) from [<c050d420>] (mdio_probe+0x2c/0x[<c050d420>] (mdio_probe) from [<c0496eac>] (really_probe+0x200/0x2c4)
> [<c0496eac>] (really_probe) from [<c0497140>] (driver_probe_device+0x5c/0x174)
> [<c0497140>] (driver_probe_device) from [<c0497330>] (__driver_attach+0xd8/0xdc)[<c0497330>] (__driver_attach) from [<c0495494>] (bus_for_each_dev+0x58/0x7c)
> [<c0495494>] (bus_for_each_dev) from [<c04963d4>] (bus_add_driver+0xe4/0x1f0)
> [<c04963d4>] (bus_add_driver) from [<c0498038>] (driver_register+0x7c/0x110)
> [<c0498038>] (driver_register) from [<c050d338>] (mdio_driver_register+0x24/0x58[<c050d338>] (mdio_driver_register) from [<c000afdc>] (do_one_initcall+0x74/0x2e[<c000afdc>] (do_one_initcall) from [<c00d4994>] (do_init_module+0x60/0x1d0)
> [<c00d4994>] (do_init_module) from [<c00d39e0>] (load_module+0x1968/0x1ff4)
> [<c00d39e0>] (load_module) from [<c00d4248>] (sys_finit_module+0x8c/0x98)
> [<c00d4248>] (sys_finit_module) from [<c0009000>] (ret_fast_syscall+0x0/0x28)
> Exception stack(0xed42bfa8 to 0xed42bff0)
> bfa0: 00020000 00000000 0000000b b6e814b5 00000000 010b31e0
> bfc0: 00020000 00000000 00000000 0000017b 010b1b30 00020000 00000000 010b31e0
> bfe0: bee82af8 bee82ae8 b6e7b2ac b6ddad70
> CPU: 0 PID: 152 Comm: systemd-udevd Not tainted 4.20.0+ #297
> Hardware name: Marvell Armada 380/385 (Device Tree)
> [<c0019638>] (unwind_backtrace) from [<c0014888>] (show_stack+0x10/0x14)
> [<c0014888>] (show_stack) from [<c07f55c0>] (dump_stack+0x9c/0xd4)
> [<c07f55c0>] (dump_stack) from [<c00312bc>] (__warn+0xf8/0x124)
> [<c00312bc>] (__warn) from [<c00313b0>] (warn_slowpath_fmt+0x38/0x48)
> [<c00313b0>] (warn_slowpath_fmt) from [<c0087518>] (lock_downgrade+0x14c/0x1b8)
> [<c0087518>] (lock_downgrade) from [<c0081650>] (downgrade_write+0x14/0xd4)
> [<c0081650>] (downgrade_write) from [<c0189d10>] (__do_munmap+0x2b8/0x31c)
> [<c0189d10>] (__do_munmap) from [<c0189dd4>] (__vm_munmap+0x60/0xa0)
> [<c0189dd4>] (__vm_munmap) from [<c0009000>] (ret_fast_syscall+0x0/0x28)
> Exception stack(0xed443fa8 to 0xed443ff0)
> 3fa0: 010a8240 00001000 b665f000 00001000 00000000 00000000
> 3fc0: 010a8240 00001000 00000000 0000005b 00000000 00000007 b6ee5f10 00000000
> 3fe0: fbad2418 bee7f124 b6d7c7b4 b6ddafac
> irq event stamp: 83666
> hardirqs last enabled at (83665): [<c001d6b0>] do_page_fault+0x190/0x360
> hardirqs last disabled at (83666): [<c080e474>] __schedule+0xbc/0x9c4
> softirqs last enabled at (82980): [<c000a484>] __do_softirq+0x344/0x540
> softirqs last disabled at (82971): [<c00386e0>] irq_exit+0x124/0x144
> ---[ end trace c91466d44e5e3485 ]---
>
> This is caused by the locking order inversion in mv88e6xxx_probe:
>
> mutex_lock(&chip->reg_lock);
> if (chip->irq > 0)
> err = mv88e6xxx_g1_irq_setup(chip);
> else
> err = mv88e6xxx_irq_poll_setup(chip);
> mutex_unlock(&chip->reg_lock);
>
> Here, we take chip->reg_lock, and then call into mv88e6xxx_g1_irq_setup()
> which then calls request_threaded_irq(), taking the request_mutex.
> However, when we request the g2 interrupt, we call request_threaded_irq()
> again, which takes the request_mutex, which then goes on to call
> chip_bus_lock(). This comes through to mv88e6xxx_g1_irq_bus_lock,
> which then tries to grab chip->reg_lock.
>
> It looks to me like the mutex_lock()/unlock() for reg_lock should be
> moved inside mv88e6xxx_g1_irq_free_common() and
> mv88e6xxx_g1_irq_setup_common(), which will avoid holding it while
> calling request_threaded_irq() or setting up the delayed work.
Maybe something like this, which seems to solve the problem here:
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index c771a58b975e..c859efd8d329 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -349,9 +349,11 @@ static void mv88e6xxx_g1_irq_free_common(struct mv88e6xxx_chip *chip)
int irq, virq;
u16 mask;
+ mutex_lock(&chip->reg_lock);
mv88e6xxx_g1_read(chip, MV88E6XXX_G1_CTL1, &mask);
mask &= ~GENMASK(chip->g1_irq.nirqs, 0);
mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, mask);
+ mutex_unlock(&chip->reg_lock);
for (irq = 0; irq < chip->g1_irq.nirqs; irq++) {
virq = irq_find_mapping(chip->g1_irq.domain, irq);
@@ -369,9 +371,7 @@ static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
*/
free_irq(chip->irq, chip);
- mutex_lock(&chip->reg_lock);
mv88e6xxx_g1_irq_free_common(chip);
- mutex_unlock(&chip->reg_lock);
}
static int mv88e6xxx_g1_irq_setup_common(struct mv88e6xxx_chip *chip)
@@ -392,6 +392,7 @@ static int mv88e6xxx_g1_irq_setup_common(struct mv88e6xxx_chip *chip)
chip->g1_irq.chip = mv88e6xxx_g1_irq_chip;
chip->g1_irq.masked = ~0;
+ mutex_lock(&chip->reg_lock);
err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_CTL1, &mask);
if (err)
goto out_mapping;
@@ -406,6 +407,7 @@ static int mv88e6xxx_g1_irq_setup_common(struct mv88e6xxx_chip *chip)
err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_STS, ®);
if (err)
goto out_disable;
+ mutex_unlock(&chip->reg_lock);
return 0;
@@ -414,6 +416,7 @@ static int mv88e6xxx_g1_irq_setup_common(struct mv88e6xxx_chip *chip)
mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, mask);
out_mapping:
+ mutex_unlock(&chip->reg_lock);
for (irq = 0; irq < 16; irq++) {
virq = irq_find_mapping(chip->g1_irq.domain, irq);
irq_dispose_mapping(virq);
@@ -479,9 +482,7 @@ static void mv88e6xxx_irq_poll_free(struct mv88e6xxx_chip *chip)
kthread_cancel_delayed_work_sync(&chip->irq_poll_work);
kthread_destroy_worker(chip->kworker);
- mutex_lock(&chip->reg_lock);
mv88e6xxx_g1_irq_free_common(chip);
- mutex_unlock(&chip->reg_lock);
}
int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask)
@@ -4808,12 +4809,10 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
* the PHYs will link their interrupts to these interrupt
* controllers
*/
- mutex_lock(&chip->reg_lock);
if (chip->irq > 0)
err = mv88e6xxx_g1_irq_setup(chip);
else
err = mv88e6xxx_irq_poll_setup(chip);
- mutex_unlock(&chip->reg_lock);
if (err)
goto out;
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up
Powered by blists - more mailing lists