[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150308211449.GG11991@spacedout.fries.net>
Date:	Sun, 8 Mar 2015 16:14:49 -0500
From:	David Fries <david@...es.net>
To:	??????? ??????? <zbr@...emap.net>
Cc:	Thorsten Bschorr <thorsten@...horr.de>,
	Jonathan ALIBERT <jonathan.alibert@...il.com>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH] Avoid null-pointer access in w1/slaves/w1_therm
On Wed, Mar 04, 2015 at 06:36:41PM +0300, ??????? ??????? wrote:
> Hi David
> 
> 02.03.2015, 03:17, "David Fries" <david@...es.net>:
> 
> > You are correct, it would be a race condition if it doesn't increment
> > the refcnt before unlocking the mutex, and it should get the mutex
> > before unref.  Here's an updated version, I haven't even tried to
> > compile it.
> >
> > What do you think Evgeniy?
> 
> Sounds correct, it should do the trick, please cook up a patch, Thorsten, can you test it?
I quickly found out that strategy wasn't going to work.
w1_unref_slave calls w1_family_notify(BUS_NOTIFY_DEL_DEVICE, sl);
which calls sysfs_remove_groups which deadlocks,
[<ffffffff81047426>] ? wait_woken+0x54/0x54
[<ffffffff810e7d6e>] ? kernfs_remove_by_name_ns+0x67/0x82
[<ffffffff810e97c8>] ? remove_files+0x30/0x58
[<ffffffff810e9b8a>] ? sysfs_remove_group+0x60/0x7b
[<ffffffff810e9bc2>] ? sysfs_remove_groups+0x1d/0x2f
[<ffffffffa01951ae>] ? w1_unref_slave+0xd9/0x13b [wire]
[<ffffffffa01a516e>] ? w1_slave_show+0x11a/0x335 [w1_therm]
Here's a different strategy, add a w1_therm family_data specific mutex
so w1_therm_remove_slave won't return while another thread is in
w1_slave_show.  Thoughts?
I included three patches, the first is the proposed fix, the second
makes it more reproducible (and since my testing doesn't have external
power I had to ignore that check), the third is just some debugging
messages.  For testing I'm starting a read from w1_slave then manually
remove the sensor, which seems to do the trick.
echo 28-[ds18b20_id] > /sys/devices/w1_bus_master1/w1_master_remove
Give it a try and let me know how it goes.
My test system host is running 3.16, I'm using kvm to test 3.19 giving
it access to the USB one wire dongle, and there's some refcnt problem
I've not been able to track down.  Once and a while when I have the
kvm grab the USB device the host will start printing a refcount
problem, which takes a reboot to clear, which is annoying because the
reason to test in kvm is to not break the host.
w1_master_driver w1_bus_master1: Waiting for w1_bus_master1 to become
free: refcnt=1.
>From bcde1a8d3d00cc3b13dff7e2476f8c03efc59001 Mon Sep 17 00:00:00 2001
From: David Fries <David@...es.net>
Date: Sat, 7 Mar 2015 22:25:37 -0600
Subject: [PATCH 1/3] w1_therm, don't let the slave go away while in
 w1_slave_show
A temperature conversion can take 750 ms to complete, if the sensor is
externally powered it releases the bus_mutex while it waits, but if
the slave device is removed sl becomes a dangling pointer.
The race condition window can be 10 * 750 ms = 7.5 seconds if the crc
check fails.
Reported-By: Thorsten Bschorr <thorsten@...horr.de>
Signed-off-by: David Fries <David@...es.net>
---
 drivers/w1/slaves/w1_therm.c |   52 ++++++++++++++++++++++++++++++------------
 1 file changed, 38 insertions(+), 14 deletions(-)
diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c
index 1f11a20..6b3ef93 100644
--- a/drivers/w1/slaves/w1_therm.c
+++ b/drivers/w1/slaves/w1_therm.c
@@ -59,9 +59,20 @@ MODULE_ALIAS("w1-family-" __stringify(W1_THERM_DS28EA00));
 static int w1_strong_pullup = 1;
 module_param_named(strong_pullup, w1_strong_pullup, int, 0);
 
+struct w1_therm_family_data {
+	uint8_t rom[9];
+	struct mutex lock;
+};
+
+/* return the address of the lock in the family data */
+#define THERM_LOCK(sl) \
+	(&((struct w1_therm_family_data*)sl->family_data)->lock)
+
 static int w1_therm_add_slave(struct w1_slave *sl)
 {
-	sl->family_data = kzalloc(9, GFP_KERNEL);
+	sl->family_data = kzalloc(sizeof(struct w1_therm_family_data),
+		GFP_KERNEL);
+	mutex_init(THERM_LOCK(sl));
 	if (!sl->family_data)
 		return -ENOMEM;
 	return 0;
@@ -69,6 +80,11 @@ static int w1_therm_add_slave(struct w1_slave *sl)
 
 static void w1_therm_remove_slave(struct w1_slave *sl)
 {
+	/* Getting the lock means w1_slave_show isn't sleeping and the
+	 * family_data can be freed.
+	 */
+	mutex_lock(THERM_LOCK(sl));
+	mutex_unlock(THERM_LOCK(sl));
 	kfree(sl->family_data);
 	sl->family_data = NULL;
 }
@@ -194,13 +210,15 @@ static ssize_t w1_slave_show(struct device *device,
 	struct w1_slave *sl = dev_to_w1_slave(device);
 	struct w1_master *dev = sl->master;
 	u8 rom[9], crc, verdict, external_power;
-	int i, max_trying = 10;
+	int i, ret, max_trying = 10;
 	ssize_t c = PAGE_SIZE;
 
-	i = mutex_lock_interruptible(&dev->bus_mutex);
-	if (i != 0)
-		return i;
+	ret = mutex_lock_interruptible(&dev->bus_mutex);
+	if (ret != 0)
+		goto post_unlock;
 
+	/* prevent the slave from going away in sleep */
+	mutex_lock(THERM_LOCK(sl));
 	memset(rom, 0, sizeof(rom));
 
 	while (max_trying--) {
@@ -229,18 +247,19 @@ static ssize_t w1_slave_show(struct device *device,
 			if (external_power) {
 				mutex_unlock(&dev->bus_mutex);
 
-				sleep_rem = msleep_interruptible(tm);
-				if (sleep_rem != 0)
-					return -EINTR;
+				if (sleep_rem != 0) {
+					ret = -EINTR;
+					goto post_unlock;
+				}
 
-				i = mutex_lock_interruptible(&dev->bus_mutex);
-				if (i != 0)
-					return i;
+				ret = mutex_lock_interruptible(&dev->bus_mutex);
+				if (ret != 0)
+					goto post_unlock;
 			} else if (!w1_strong_pullup) {
 				sleep_rem = msleep_interruptible(tm);
 				if (sleep_rem != 0) {
-					mutex_unlock(&dev->bus_mutex);
-					return -EINTR;
+					ret = -EINTR;
+					goto pre_unlock;
 				}
 			}
 
@@ -279,9 +298,14 @@ static ssize_t w1_slave_show(struct device *device,
 
 	c -= snprintf(buf + PAGE_SIZE - c, c, "t=%d\n",
 		w1_convert_temp(rom, sl->family->fid));
+	ret = PAGE_SIZE - c;
+
+pre_unlock:
 	mutex_unlock(&dev->bus_mutex);
 
-	return PAGE_SIZE - c;
+post_unlock:
+	mutex_unlock(THERM_LOCK(sl));
+	return ret;
 }
 
 static int __init w1_therm_init(void)
-- 
1.7.10.4
>From bb3686e0d1c38798ce922e2565ac094757f02f9d Mon Sep 17 00:00:00 2001
From: David Fries <David@...es.net>
Date: Sat, 7 Mar 2015 19:53:54 -0600
Subject: [PATCH 2/3] increase w1_therm race condition window
Also disable the check for external power as the current setup
doesn't have it.
---
 drivers/w1/slaves/w1_therm.c |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c
index 6b3ef93..cb46e85 100644
--- a/drivers/w1/slaves/w1_therm.c
+++ b/drivers/w1/slaves/w1_therm.c
@@ -244,9 +244,11 @@ static ssize_t w1_slave_show(struct device *device,
 
 			w1_write_8(dev, W1_CONVERT_TEMP);
 
-			if (external_power) {
+			/*if (external_power)*/ if(true) {
 				mutex_unlock(&dev->bus_mutex);
 
+				//sleep_rem = msleep_interruptible(tm);
+				sleep_rem = msleep_interruptible(10*1000);
 				if (sleep_rem != 0) {
 					ret = -EINTR;
 					goto post_unlock;
-- 
1.7.10.4
>From 6eff9cbdacb36f0e960b1117b48584af18160443 Mon Sep 17 00:00:00 2001
From: David Fries <David@...es.net>
Date: Sun, 8 Mar 2015 14:01:28 -0500
Subject: [PATCH 3/3] debugging w1_therm race condition
---
 drivers/w1/slaves/w1_therm.c |    4 ++++
 drivers/w1/w1.c              |    4 ++++
 2 files changed, 8 insertions(+)
diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c
index cb46e85..cb0fe1b 100644
--- a/drivers/w1/slaves/w1_therm.c
+++ b/drivers/w1/slaves/w1_therm.c
@@ -80,11 +80,13 @@ static int w1_therm_add_slave(struct w1_slave *sl)
 
 static void w1_therm_remove_slave(struct w1_slave *sl)
 {
+	printk(KERN_NOTICE "%s about to lock faily_data->lock\n", __func__);
 	/* Getting the lock means w1_slave_show isn't sleeping and the
 	 * family_data can be freed.
 	 */
 	mutex_lock(THERM_LOCK(sl));
 	mutex_unlock(THERM_LOCK(sl));
+	printk(KERN_NOTICE "%s unlocked faily_data->lock\n", __func__);
 	kfree(sl->family_data);
 	sl->family_data = NULL;
 }
@@ -248,7 +250,9 @@ static ssize_t w1_slave_show(struct device *device,
 				mutex_unlock(&dev->bus_mutex);
 
 				//sleep_rem = msleep_interruptible(tm);
+				printk(KERN_NOTICE "start sleep %p refcnt %u\n", sl, atomic_read(&sl->refcnt));
 				sleep_rem = msleep_interruptible(10*1000);
+				printk(KERN_NOTICE "end sleep %p refcnt %u\n", sl, atomic_read(&sl->refcnt));
 				if (sleep_rem != 0) {
 					ret = -EINTR;
 					goto post_unlock;
diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c
index 181f41c..9bb3116 100644
--- a/drivers/w1/w1.c
+++ b/drivers/w1/w1.c
@@ -650,8 +650,10 @@ static int w1_family_notify(unsigned long action, struct w1_slave *sl)
 	case BUS_NOTIFY_DEL_DEVICE:
 		if (fops->remove_slave)
 			sl->family->fops->remove_slave(sl);
+		printk(KERN_NOTICE "calling sysfs_remove_groups\n");
 		if (fops->groups)
 			sysfs_remove_groups(&sl->dev.kobj, fops->groups);
+		printk(KERN_NOTICE "returned sysfs_remove_groups\n");
 		break;
 	}
 	return 0;
@@ -769,6 +771,7 @@ int w1_unref_slave(struct w1_slave *sl)
 	int refcnt;
 	mutex_lock(&dev->list_mutex);
 	refcnt = atomic_sub_return(1, &sl->refcnt);
+	printk(KERN_NOTICE "%s slave %p refcnt now %u\n", __func__, sl, refcnt);
 	if (refcnt == 0) {
 		struct w1_netlink_msg msg;
 
@@ -788,6 +791,7 @@ int w1_unref_slave(struct w1_slave *sl)
 		memset(sl, 0, sizeof(*sl));
 		#endif
 		kfree(sl);
+		printk(KERN_NOTICE "%s w1_slave %p\n", __func__, sl);
 	}
 	atomic_dec(&dev->refcnt);
 	mutex_unlock(&dev->list_mutex);
-- 
1.7.10.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Powered by blists - more mailing lists
 
