[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e512ee85-7fa6-e5fe-eb30-f088bb83cf23@samsung.com>
Date: Tue, 12 Jan 2021 22:34:19 +0100
From: Marek Szyprowski <m.szyprowski@...sung.com>
To: David Collins <collinsd@...eaurora.org>,
Mark Brown <broonie@...nel.org>,
Liam Girdwood <lgirdwood@...il.com>,
Krzysztof Kozlowski <krzk@...nel.org>,
Bartlomiej Zolnierkiewicz <b.zolnierkie@...sung.com>
Cc: linux-arm-msm@...r.kernel.org, linux-kernel@...r.kernel.org,
'Linux Samsung SOC' <linux-samsung-soc@...r.kernel.org>
Subject: Re: [PATCH] regulator: core: avoid regulator_resolve_supply() race
condition
Hi,
On 08.01.2021 02:16, David Collins wrote:
> The final step in regulator_register() is to call
> regulator_resolve_supply() for each registered regulator
> (including the one in the process of being registered). The
> regulator_resolve_supply() function first checks if rdev->supply
> is NULL, then it performs various steps to try to find the supply.
> If successful, rdev->supply is set inside of set_supply().
>
> This procedure can encounter a race condition if two concurrent
> tasks call regulator_register() near to each other on separate CPUs
> and one of the regulators has rdev->supply_name specified. There
> is currently nothing guaranteeing atomicity between the rdev->supply
> check and set steps. Thus, both tasks can observe rdev->supply==NULL
> in their regulator_resolve_supply() calls. This then results in
> both creating a struct regulator for the supply. One ends up
> actually stored in rdev->supply and the other is lost (though still
> present in the supply's consumer_list).
>
> Here is a kernel log snippet showing the issue:
>
> [ 12.421768] gpu_cc_gx_gdsc: supplied by pm8350_s5_level
> [ 12.425854] gpu_cc_gx_gdsc: supplied by pm8350_s5_level
> [ 12.429064] debugfs: Directory 'regulator.4-SUPPLY' with parent
> '17a00000.rsc:rpmh-regulator-gfxlvl-pm8350_s5_level'
> already present!
>
> Avoid this race condition by holding the rdev->mutex lock inside
> of regulator_resolve_supply() while checking and setting
> rdev->supply.
>
> Signed-off-by: David Collins <collinsd@...eaurora.org>
This patch landed in linux next-20210112 as commit eaa7995c529b
("regulator: core: avoid regulator_resolve_supply() race condition"). I
found that it triggers a following lockdep warning during the DWC3
driver registration on some Exynos based boards (this log is from
Samsung Exynos5420-based Peach-Pit board):
======================================================
WARNING: possible circular locking dependency detected
5.11.0-rc1-00008-geaa7995c529b #10095 Not tainted
------------------------------------------------------
swapper/0/1 is trying to acquire lock:
c12e1b80 (regulator_list_mutex){+.+.}-{3:3}, at:
regulator_lock_dependent+0x4c/0x2b0
but task is already holding lock:
df7190c0 (regulator_ww_class_mutex){+.+.}-{3:3}, at:
regulator_resolve_supply+0x44/0x318
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (regulator_ww_class_mutex){+.+.}-{3:3}:
ww_mutex_lock+0x48/0x88
regulator_lock_recursive+0x84/0x1f4
regulator_lock_dependent+0x184/0x2b0
regulator_enable+0x30/0xe4
dwc3_exynos_probe+0x17c/0x2c0
platform_probe+0x80/0xc0
really_probe+0x1c4/0x4e4
driver_probe_device+0x78/0x1d8
device_driver_attach+0x58/0x60
__driver_attach+0xfc/0x160
bus_for_each_dev+0x6c/0xb8
bus_add_driver+0x170/0x20c
driver_register+0x78/0x10c
do_one_initcall+0x88/0x438
kernel_init_freeable+0x18c/0x1dc
kernel_init+0x8/0x118
ret_from_fork+0x14/0x38
0x0
-> #1 (regulator_ww_class_acquire){+.+.}-{0:0}:
regulator_enable+0x30/0xe4
dwc3_exynos_probe+0x17c/0x2c0
platform_probe+0x80/0xc0
really_probe+0x1c4/0x4e4
driver_probe_device+0x78/0x1d8
device_driver_attach+0x58/0x60
__driver_attach+0xfc/0x160
bus_for_each_dev+0x6c/0xb8
bus_add_driver+0x170/0x20c
driver_register+0x78/0x10c
do_one_initcall+0x88/0x438
kernel_init_freeable+0x18c/0x1dc
kernel_init+0x8/0x118
ret_from_fork+0x14/0x38
0x0
-> #0 (regulator_list_mutex){+.+.}-{3:3}:
lock_acquire+0x2e4/0x5dc
__mutex_lock+0xa4/0xb60
mutex_lock_nested+0x1c/0x24
regulator_lock_dependent+0x4c/0x2b0
regulator_enable+0x30/0xe4
regulator_resolve_supply+0x1cc/0x318
regulator_register_resolve_supply+0x14/0x78
class_for_each_device+0x68/0xe8
regulator_register+0xa2c/0xc9c
devm_regulator_register+0x40/0x70
tps65090_regulator_probe+0x150/0x648
platform_probe+0x80/0xc0
really_probe+0x1c4/0x4e4
driver_probe_device+0x78/0x1d8
bus_for_each_drv+0x78/0xbc
__device_attach+0xe8/0x180
bus_probe_device+0x88/0x90
device_add+0x4c4/0x7e8
platform_device_add+0x120/0x25c
mfd_add_devices+0x580/0x60c
tps65090_i2c_probe+0xb8/0x184
i2c_device_probe+0x234/0x2a4
really_probe+0x1c4/0x4e4
driver_probe_device+0x78/0x1d8
bus_for_each_drv+0x78/0xbc
__device_attach+0xe8/0x180
bus_probe_device+0x88/0x90
device_add+0x4c4/0x7e8
i2c_new_client_device+0x15c/0x27c
of_i2c_register_devices+0x114/0x184
i2c_register_adapter+0x1d8/0x6dc
ec_i2c_probe+0xc8/0x124
platform_probe+0x80/0xc0
really_probe+0x1c4/0x4e4
driver_probe_device+0x78/0x1d8
bus_for_each_drv+0x78/0xbc
__device_attach+0xe8/0x180
bus_probe_device+0x88/0x90
device_add+0x4c4/0x7e8
of_platform_device_create_pdata+0x90/0xc8
of_platform_bus_create+0x1a0/0x4ec
of_platform_populate+0x88/0x120
devm_of_platform_populate+0x40/0x80
cros_ec_register+0x174/0x308
cros_ec_spi_probe+0x16c/0x1ec
spi_probe+0x88/0xac
really_probe+0x1c4/0x4e4
driver_probe_device+0x78/0x1d8
device_driver_attach+0x58/0x60
__driver_attach+0xfc/0x160
bus_for_each_dev+0x6c/0xb8
bus_add_driver+0x170/0x20c
driver_register+0x78/0x10c
do_one_initcall+0x88/0x438
kernel_init_freeable+0x18c/0x1dc
kernel_init+0x8/0x118
ret_from_fork+0x14/0x38
0x0
other info that might help us debug this:
Chain exists of:
regulator_list_mutex --> regulator_ww_class_acquire -->
regulator_ww_class_mutex
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(regulator_ww_class_mutex);
lock(regulator_ww_class_acquire);
lock(regulator_ww_class_mutex);
lock(regulator_list_mutex);
*** DEADLOCK ***
5 locks held by swapper/0/1:
#0: dfb6e4c8 (&dev->mutex){....}-{3:3}, at: device_driver_attach+0x18/0x60
#1: c1fedcd8 (&dev->mutex){....}-{3:3}, at: __device_attach+0x34/0x180
#2: df53a4e8 (&dev->mutex){....}-{3:3}, at: __device_attach+0x34/0x180
#3: df5224d8 (&dev->mutex){....}-{3:3}, at: __device_attach+0x34/0x180
#4: df7190c0 (regulator_ww_class_mutex){+.+.}-{3:3}, at:
regulator_resolve_supply+0x44/0x318
stack backtrace:
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.11.0-rc1-00008-geaa7995c529b
#10095
Hardware name: Samsung Exynos (Flattened Device Tree)
[<c01116e8>] (unwind_backtrace) from [<c010cf58>] (show_stack+0x10/0x14)
[<c010cf58>] (show_stack) from [<c0b38ffc>] (dump_stack+0xa4/0xc4)
[<c0b38ffc>] (dump_stack) from [<c0193458>] (check_noncircular+0x14c/0x164)
[<c0193458>] (check_noncircular) from [<c0196b90>]
(__lock_acquire+0x1830/0x31cc)
[<c0196b90>] (__lock_acquire) from [<c01991e4>] (lock_acquire+0x2e4/0x5dc)
[<c01991e4>] (lock_acquire) from [<c0b4043c>] (__mutex_lock+0xa4/0xb60)
[<c0b4043c>] (__mutex_lock) from [<c0b40f14>] (mutex_lock_nested+0x1c/0x24)
[<c0b40f14>] (mutex_lock_nested) from [<c05ccd94>]
(regulator_lock_dependent+0x4c/0x2b0)
[<c05ccd94>] (regulator_lock_dependent) from [<c05d220c>]
(regulator_enable+0x30/0xe4)
[<c05d220c>] (regulator_enable) from [<c05d248c>]
(regulator_resolve_supply+0x1cc/0x318)
[<c05d248c>] (regulator_resolve_supply) from [<c05d2974>]
(regulator_register_resolve_supply+0x14/0x78)
[<c05d2974>] (regulator_register_resolve_supply) from [<c06a3000>]
(class_for_each_device+0x68/0xe8)
[<c06a3000>] (class_for_each_device) from [<c05d3e20>]
(regulator_register+0xa2c/0xc9c)
[<c05d3e20>] (regulator_register) from [<c05d5c70>]
(devm_regulator_register+0x40/0x70)
[<c05d5c70>] (devm_regulator_register) from [<c05dea58>]
(tps65090_regulator_probe+0x150/0x648)
[<c05dea58>] (tps65090_regulator_probe) from [<c06a3fe8>]
(platform_probe+0x80/0xc0)
[<c06a3fe8>] (platform_probe) from [<c06a1114>] (really_probe+0x1c4/0x4e4)
[<c06a1114>] (really_probe) from [<c06a14ac>]
(driver_probe_device+0x78/0x1d8)
[<c06a14ac>] (driver_probe_device) from [<c069f1a4>]
(bus_for_each_drv+0x78/0xbc)
[<c069f1a4>] (bus_for_each_drv) from [<c06a0eb0>]
(__device_attach+0xe8/0x180)
[<c06a0eb0>] (__device_attach) from [<c069ff50>]
(bus_probe_device+0x88/0x90)
[<c069ff50>] (bus_probe_device) from [<c069dbac>] (device_add+0x4c4/0x7e8)
[<c069dbac>] (device_add) from [<c06a3bac>]
(platform_device_add+0x120/0x25c)
[<c06a3bac>] (platform_device_add) from [<c06d5c7c>]
(mfd_add_devices+0x580/0x60c)
[<c06d5c7c>] (mfd_add_devices) from [<c06d80e8>]
(tps65090_i2c_probe+0xb8/0x184)
[<c06d80e8>] (tps65090_i2c_probe) from [<c0822520>]
(i2c_device_probe+0x234/0x2a4)
[<c0822520>] (i2c_device_probe) from [<c06a1114>] (really_probe+0x1c4/0x4e4)
[<c06a1114>] (really_probe) from [<c06a14ac>]
(driver_probe_device+0x78/0x1d8)
[<c06a14ac>] (driver_probe_device) from [<c069f1a4>]
(bus_for_each_drv+0x78/0xbc)
[<c069f1a4>] (bus_for_each_drv) from [<c06a0eb0>]
(__device_attach+0xe8/0x180)
[<c06a0eb0>] (__device_attach) from [<c069ff50>]
(bus_probe_device+0x88/0x90)
[<c069ff50>] (bus_probe_device) from [<c069dbac>] (device_add+0x4c4/0x7e8)
[<c069dbac>] (device_add) from [<c0824aec>]
(i2c_new_client_device+0x15c/0x27c)
[<c0824aec>] (i2c_new_client_device) from [<c08285e0>]
(of_i2c_register_devices+0x114/0x184)
[<c08285e0>] (of_i2c_register_devices) from [<c08254b8>]
(i2c_register_adapter+0x1d8/0x6dc)
[<c08254b8>] (i2c_register_adapter) from [<c082dd1c>]
(ec_i2c_probe+0xc8/0x124)
[<c082dd1c>] (ec_i2c_probe) from [<c06a3fe8>] (platform_probe+0x80/0xc0)
[<c06a3fe8>] (platform_probe) from [<c06a1114>] (really_probe+0x1c4/0x4e4)
[<c06a1114>] (really_probe) from [<c06a14ac>]
(driver_probe_device+0x78/0x1d8)
[<c06a14ac>] (driver_probe_device) from [<c069f1a4>]
(bus_for_each_drv+0x78/0xbc)
[<c069f1a4>] (bus_for_each_drv) from [<c06a0eb0>]
(__device_attach+0xe8/0x180)
[<c06a0eb0>] (__device_attach) from [<c069ff50>]
(bus_probe_device+0x88/0x90)
[<c069ff50>] (bus_probe_device) from [<c069dbac>] (device_add+0x4c4/0x7e8)
[<c069dbac>] (device_add) from [<c08b140c>]
(of_platform_device_create_pdata+0x90/0xc8)
[<c08b140c>] (of_platform_device_create_pdata) from [<c08b15f0>]
(of_platform_bus_create+0x1a0/0x4ec)
[<c08b15f0>] (of_platform_bus_create) from [<c08b1af0>]
(of_platform_populate+0x88/0x120)
[<c08b1af0>] (of_platform_populate) from [<c08b1bdc>]
(devm_of_platform_populate+0x40/0x80)
[<c08b1bdc>] (devm_of_platform_populate) from [<c08b72fc>]
(cros_ec_register+0x174/0x308)
[<c08b72fc>] (cros_ec_register) from [<c08b868c>]
(cros_ec_spi_probe+0x16c/0x1ec)
[<c08b868c>] (cros_ec_spi_probe) from [<c071b2f4>] (spi_probe+0x88/0xac)
[<c071b2f4>] (spi_probe) from [<c06a1114>] (really_probe+0x1c4/0x4e4)
[<c06a1114>] (really_probe) from [<c06a14ac>]
(driver_probe_device+0x78/0x1d8)
[<c06a14ac>] (driver_probe_device) from [<c06a19c4>]
(device_driver_attach+0x58/0x60)
[<c06a19c4>] (device_driver_attach) from [<c06a1ac8>]
(__driver_attach+0xfc/0x160)
[<c06a1ac8>] (__driver_attach) from [<c069f0cc>]
(bus_for_each_dev+0x6c/0xb8)
[<c069f0cc>] (bus_for_each_dev) from [<c06a0204>]
(bus_add_driver+0x170/0x20c)
[<c06a0204>] (bus_add_driver) from [<c06a2968>] (driver_register+0x78/0x10c)
[<c06a2968>] (driver_register) from [<c0102428>]
(do_one_initcall+0x88/0x438)
[<c0102428>] (do_one_initcall) from [<c1101104>]
(kernel_init_freeable+0x18c/0x1dc)
[<c1101104>] (kernel_init_freeable) from [<c0b3c65c>]
(kernel_init+0x8/0x118)
[<c0b3c65c>] (kernel_init) from [<c010011c>] (ret_from_fork+0x14/0x38)
Exception stack(0xc1ce3fb0 to 0xc1ce3ff8)
3fa0: 00000000 00000000 00000000
00000000
3fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000
00000000
3fe0: 00000000 00000000 00000000 00000000 00000013 00000000
I didn't analyze it yet if this warning is really an issue or just a
false positive. If you have any hints or comments let me know.
> ---
> drivers/regulator/core.c | 39 ++++++++++++++++++++++++++++-----------
> 1 file changed, 28 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
> index fee9241..3ae5ccd 100644
> --- a/drivers/regulator/core.c
> +++ b/drivers/regulator/core.c
> @@ -1813,23 +1813,34 @@ static int regulator_resolve_supply(struct regulator_dev *rdev)
> {
> struct regulator_dev *r;
> struct device *dev = rdev->dev.parent;
> - int ret;
> + int ret = 0;
>
> /* No supply to resolve? */
> if (!rdev->supply_name)
> return 0;
>
> - /* Supply already resolved? */
> + /* Supply already resolved? (fast-path without locking contention) */
> if (rdev->supply)
> return 0;
>
> + /*
> + * Recheck rdev->supply with rdev->mutex lock held to avoid a race
> + * between rdev->supply null check and setting rdev->supply in
> + * set_supply() from concurrent tasks.
> + */
> + regulator_lock(rdev);
> +
> + /* Supply just resolved by a concurrent task? */
> + if (rdev->supply)
> + goto out;
> +
> r = regulator_dev_lookup(dev, rdev->supply_name);
> if (IS_ERR(r)) {
> ret = PTR_ERR(r);
>
> /* Did the lookup explicitly defer for us? */
> if (ret == -EPROBE_DEFER)
> - return ret;
> + goto out;
>
> if (have_full_constraints()) {
> r = dummy_regulator_rdev;
> @@ -1837,15 +1848,18 @@ static int regulator_resolve_supply(struct regulator_dev *rdev)
> } else {
> dev_err(dev, "Failed to resolve %s-supply for %s\n",
> rdev->supply_name, rdev->desc->name);
> - return -EPROBE_DEFER;
> + ret = -EPROBE_DEFER;
> + goto out;
> }
> }
>
> if (r == rdev) {
> dev_err(dev, "Supply for %s (%s) resolved to itself\n",
> rdev->desc->name, rdev->supply_name);
> - if (!have_full_constraints())
> - return -EINVAL;
> + if (!have_full_constraints()) {
> + ret = -EINVAL;
> + goto out;
> + }
> r = dummy_regulator_rdev;
> get_device(&r->dev);
> }
> @@ -1859,7 +1873,8 @@ static int regulator_resolve_supply(struct regulator_dev *rdev)
> if (r->dev.parent && r->dev.parent != rdev->dev.parent) {
> if (!device_is_bound(r->dev.parent)) {
> put_device(&r->dev);
> - return -EPROBE_DEFER;
> + ret = -EPROBE_DEFER;
> + goto out;
> }
> }
>
> @@ -1867,13 +1882,13 @@ static int regulator_resolve_supply(struct regulator_dev *rdev)
> ret = regulator_resolve_supply(r);
> if (ret < 0) {
> put_device(&r->dev);
> - return ret;
> + goto out;
> }
>
> ret = set_supply(rdev, r);
> if (ret < 0) {
> put_device(&r->dev);
> - return ret;
> + goto out;
> }
>
> /*
> @@ -1886,11 +1901,13 @@ static int regulator_resolve_supply(struct regulator_dev *rdev)
> if (ret < 0) {
> _regulator_put(rdev->supply);
> rdev->supply = NULL;
> - return ret;
> + goto out;
> }
> }
>
> - return 0;
> +out:
> + regulator_unlock(rdev);
> + return ret;
> }
>
> /* Internal regulator request function */
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
Powered by blists - more mailing lists