lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <s5hpnidgaln.wl-tiwai@suse.de>
Date:   Wed, 30 Oct 2019 22:57:40 +0100
From:   Takashi Iwai <tiwai@...e.de>
To:     "Kirill A. Shutemov" <kirill@...temov.name>
Cc:     Jaroslav Kysela <perex@...ex.cz>, Takashi Iwai <tiwai@...e.com>,
        Kirill Smelkov <kirr@...edi.com>,
        Guenter Roeck <linux@...ck-us.net>,
        Enric Balletbo i Serra <enric.balletbo@...labora.com>,
        Logan Gunthorpe <logang@...tatee.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        alsa-devel@...a-project.org, linux-kernel@...r.kernel.org
Subject: Re: sound/core/timer: Deadlock on register_mutex

On Wed, 30 Oct 2019 16:28:23 +0100,
Kirill A. Shutemov wrote:
> 
> On Wed, Oct 30, 2019 at 06:11:37PM +0300, Kirill A. Shutemov wrote:
> > On Wed, Oct 30, 2019 at 04:04:01PM +0100, Takashi Iwai wrote:
> > > On Wed, 30 Oct 2019 15:10:29 +0100,
> > > Kirill A. Shutemov wrote:
> > > > 
> > > > Hi,
> > > > 
> > > > I've stepped on this after pulling USB sound card:
> > > > 
> > > > 	 ============================================
> > > > 	 WARNING: possible recursive locking detected
> > > > 	 5.4.0-rc4-00090-g95b5dc072cc3-dirty #48 Not tainted
> > > > 	 --------------------------------------------
> > > > 	 xdg-screensaver/1321 is trying to acquire lock:
> > > > 	 ffffffffbaf6b3a0 (register_mutex){+.+.}, at: snd_timer_free.part.0 (/include/linux/compiler.h:199 /include/linux/list.h:268 /sound/core/timer.c:944)
> > > > 
> > > > 	but task is already holding lock:
> > > > 	 ffffffffbaf6b3a0 (register_mutex){+.+.}, at: snd_timer_close (/sound/core/timer.c:416)
> > > > 
> > > > 	other info that might help us debug this:
> > > > 	  Possible unsafe locking scenario:
> > > > 
> > > > 		CPU0
> > > > 		----
> > > > 	   lock(register_mutex);
> > > > 	   lock(register_mutex);
> > > > 
> > > > 	*** DEADLOCK ***
> > > > 
> > > > 	  May be due to missing lock nesting notation
> > > > 
> > > > 	 2 locks held by xdg-screensaver/1321:
> > > > 	 #0: ffff9f74bbf5ef50 (&tu->ioctl_lock){+.+.}, at: snd_timer_user_release (/sound/core/timer.c:1467)
> > > > 	 #1: ffffffffbaf6b3a0 (register_mutex){+.+.}, at: snd_timer_close (/sound/core/timer.c:416)
> > > > 
> > > > 	stack backtrace:
> > > > 	 CPU: 27 PID: 1321 Comm: xdg-screensaver Not tainted 5.4.0-rc4-00090-g95b5dc072cc3-dirty #48
> > > > 	 Hardware name: Gigabyte Technology Co., Ltd. X299 AORUS Gaming 3 Pro/X299 AORUS Gaming 3 Pro-CF, BIOS F3 12/28/2017
> > > > 	 Call Trace:
> > > > 	 dump_stack (/lib/dump_stack.c:115)
> > > > 	 __lock_acquire.cold (/kernel/locking/lockdep.c:2371 /kernel/locking/lockdep.c:2412 /kernel/locking/lockdep.c:2955 /kernel/locking/lockdep.c:3955)
> > > > 	 ? __lock_acquire (/kernel/locking/lockdep.c:3962)
> > > > 	 lock_acquire (/arch/x86/include/asm/current.h:15 /kernel/locking/lockdep.c:4489)
> > > > 	 ? snd_timer_free.part.0 (/include/linux/compiler.h:199 /include/linux/list.h:268 /sound/core/timer.c:944)
> > > > 	 __mutex_lock (/include/linux/compiler.h:199 /arch/x86/include/asm/atomic64_64.h:22 /include/asm-generic/atomic-instrumented.h:837 /include/asm-generic/atomic-long.h:28 /kernel/locking/mutex.c:111 /kernel/locking/mutex.c:152 /kernel/locking/mutex.c:958 /kernel/locking/mutex.c:1103)
> > > > 	 ? snd_timer_free.part.0 (/include/linux/compiler.h:199 /include/linux/list.h:268 /sound/core/timer.c:944)
> > > > 	 ? __mutex_lock (/include/linux/compiler.h:199 /arch/x86/include/asm/atomic64_64.h:22 /include/asm-generic/atomic-instrumented.h:837 /include/asm-generic/atomic-long.h:28 /kernel/locking/mutex.c:111 /kernel/locking/mutex.c:152 /kernel/locking/mutex.c:958 /kernel/locking/mutex.c:1103)
> > > > 	 ? __mutex_lock (/arch/x86/include/asm/preempt.h:102 /kernel/locking/mutex.c:964 /kernel/locking/mutex.c:1103)
> > > > 	 ? snd_timer_free.part.0 (/include/linux/compiler.h:199 /include/linux/list.h:268 /sound/core/timer.c:944)
> > > > 	 ? snd_timer_free.part.0 (/include/linux/compiler.h:199 /include/linux/list.h:268 /sound/core/timer.c:944)
> > > > 	 ? lockdep_hardirqs_on (/kernel/locking/lockdep.c:3394 /kernel/locking/lockdep.c:3434)
> > > > 	 snd_timer_free.part.0 (/include/linux/compiler.h:199 /include/linux/list.h:268 /sound/core/timer.c:944)
> > > > 	 snd_timer_dev_free (/sound/core/timer.c:967)
> > > > 	 __snd_device_free (/sound/core/device.c:76)
> > > > 	 snd_device_free_all (/sound/core/device.c:228)
> > > > 	 release_card_device (/sound/core/init.c:471 /sound/core/init.c:140)
> > > > 	 device_release (/drivers/base/core.c:1105)
> > > > 	 kobject_put (/lib/kobject.c:697 /lib/kobject.c:722 /include/linux/kref.h:65 /lib/kobject.c:739)
> > > > 	 snd_timer_close_locked (/sound/core/timer.c:398)
> > > > 	 snd_timer_close (/sound/core/timer.c:417)
> > > > 	 snd_timer_user_release (/sound/core/timer.c:1469)
> > > > 	 __fput (/fs/file_table.c:281)
> > > > 	 task_work_run (/kernel/task_work.c:115 (discriminator 1))
> > > > 	 exit_to_usermode_loop (/include/linux/tracehook.h:188 /arch/x86/entry/common.c:163)
> > > > 	 do_syscall_64 (/arch/x86/entry/common.c:194 /arch/x86/entry/common.c:274 /arch/x86/entry/common.c:300)
> > > > 	 entry_SYSCALL_64_after_hwframe (/arch/x86/entry/entry_64.S:177)
> > > 
> > > OK, this looks like a deadlock that is via put_device() called at
> > > closing the timer device that is the last open instance while freeing
> > > the card.
> > > 
> > > Could you try the patch below?
> > 
> > I can, but I'm not sure if I can trigger the issue for the second time.
> 
> Yeah, I was able to reproduce it without the patch and cannot with.
> 
> Reported-and-tested-by: Kirill A. Shutemov <kirill.shutemov@...ux.intel.com>

Great, thanks for quick testing!

FWIW, below is the proper patch with the description I'm going to
queue.


Takashi

-- 8< --
From: Takashi Iwai <tiwai@...e.de>
Subject: [PATCH] ALSA: timer: Fix mutex deadlock at releasing card

When a card is disconnected while in use, the system waits until all
opened files are closed then releases the card.  This is done via
put_device() of the card device in each device release code.

The recently reported mutex deadlock bug happens in this code path;
snd_timer_close() for the timer device deals with the global
register_mutex and it calls put_device() there.  When this timer
device is the last one, the card gets freed and it eventually calls
snd_timer_free(), which has again the protection with the global
register_mutex -- boom.

Basically put_device() call itself is race-free, so a relative simple
workaround is to move this put_device() call out of the mutex.  For
achieving that, in this patch, snd_timer_close_locked() got a new
argument to store the card device pointer in return, and each caller
invokes put_device() with the returned object after the mutex unlock.

Reported-and-tested-by: Kirill A. Shutemov <kirill.shutemov@...ux.intel.com>
Cc: <stable@...r.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@...e.de>
---
 sound/core/timer.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/sound/core/timer.c b/sound/core/timer.c
index 5c9fbf3f4340..6b724d2ee2de 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -226,7 +226,8 @@ static int snd_timer_check_master(struct snd_timer_instance *master)
 	return 0;
 }
 
-static int snd_timer_close_locked(struct snd_timer_instance *timeri);
+static int snd_timer_close_locked(struct snd_timer_instance *timeri,
+				  struct device **card_devp_to_put);
 
 /*
  * open a timer instance
@@ -238,6 +239,7 @@ int snd_timer_open(struct snd_timer_instance **ti,
 {
 	struct snd_timer *timer;
 	struct snd_timer_instance *timeri = NULL;
+	struct device *card_dev_to_put = NULL;
 	int err;
 
 	mutex_lock(&register_mutex);
@@ -261,7 +263,7 @@ int snd_timer_open(struct snd_timer_instance **ti,
 		list_add_tail(&timeri->open_list, &snd_timer_slave_list);
 		err = snd_timer_check_slave(timeri);
 		if (err < 0) {
-			snd_timer_close_locked(timeri);
+			snd_timer_close_locked(timeri, &card_dev_to_put);
 			timeri = NULL;
 		}
 		goto unlock;
@@ -313,7 +315,7 @@ int snd_timer_open(struct snd_timer_instance **ti,
 			timeri = NULL;
 
 			if (timer->card)
-				put_device(&timer->card->card_dev);
+				card_dev_to_put = &timer->card->card_dev;
 			module_put(timer->module);
 			goto unlock;
 		}
@@ -323,12 +325,15 @@ int snd_timer_open(struct snd_timer_instance **ti,
 	timer->num_instances++;
 	err = snd_timer_check_master(timeri);
 	if (err < 0) {
-		snd_timer_close_locked(timeri);
+		snd_timer_close_locked(timeri, &card_dev_to_put);
 		timeri = NULL;
 	}
 
  unlock:
 	mutex_unlock(&register_mutex);
+	/* put_device() is called after unlock for avoiding deadlock */
+	if (card_dev_to_put)
+		put_device(card_dev_to_put);
 	*ti = timeri;
 	return err;
 }
@@ -338,7 +343,8 @@ EXPORT_SYMBOL(snd_timer_open);
  * close a timer instance
  * call this with register_mutex down.
  */
-static int snd_timer_close_locked(struct snd_timer_instance *timeri)
+static int snd_timer_close_locked(struct snd_timer_instance *timeri,
+				  struct device **card_devp_to_put)
 {
 	struct snd_timer *timer = timeri->timer;
 	struct snd_timer_instance *slave, *tmp;
@@ -395,7 +401,7 @@ static int snd_timer_close_locked(struct snd_timer_instance *timeri)
 			timer->hw.close(timer);
 		/* release a card refcount for safe disconnection */
 		if (timer->card)
-			put_device(&timer->card->card_dev);
+			*card_devp_to_put = &timer->card->card_dev;
 		module_put(timer->module);
 	}
 
@@ -407,14 +413,18 @@ static int snd_timer_close_locked(struct snd_timer_instance *timeri)
  */
 int snd_timer_close(struct snd_timer_instance *timeri)
 {
+	struct device *card_dev_to_put = NULL;
 	int err;
 
 	if (snd_BUG_ON(!timeri))
 		return -ENXIO;
 
 	mutex_lock(&register_mutex);
-	err = snd_timer_close_locked(timeri);
+	err = snd_timer_close_locked(timeri, &card_dev_to_put);
 	mutex_unlock(&register_mutex);
+	/* put_device() is called after unlock for avoiding deadlock */
+	if (card_dev_to_put)
+		put_device(card_dev_to_put);
 	return err;
 }
 EXPORT_SYMBOL(snd_timer_close);
-- 
2.16.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ