lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1244689688.2560.268.camel@ymzhang>
Date:	Thu, 11 Jun 2009 11:08:07 +0800
From:	"Zhang, Yanmin" <yanmin_zhang@...ux.intel.com>
To:	Darren Hart <dvhltc@...ibm.com>
Cc:	Peter Zijlstra <peterz@...radead.org>,
	Rusty Russell <rusty@...tcorp.com.au>,
	LKML <linux-kernel@...r.kernel.org>
Subject: Bug: fio traps into kernel without exiting because futex has a
	deadloop

I investigate a fio hang issue. When I run fio multi-process
testing on many disks, fio traps into kernel and doesn't exit
(mostly hit once after runing sub test cases for hundreds of times).

Oprofile data shows kernel consumes time with some futex functions.
Command kill couldn't kill the process and machine reboot also hangs.

Eventually, I locate the root cause as a bug of futex. Kernel enters
a deadloop between 'retry' and 'goto retry' in function futex_wake_op.
By unknown reason (might be an issue of fio or glibc), parameter uaddr2
points to an area which is READONLY. So futex_atomic_op_inuser returns
-EFAULT when trying to changing the data at uaddr2, but later get_user
still succeeds becasue the area is READONLY. Then go back to retry.

I create a simple test case to trigger it, which just shmat an READONLY
area for address uaddr2.

It could be used as a DOS attack.

'git log kernel/futex.c' shows below commit creates the issue obviously.

commit e4dc5b7a36a49eff97050894cf1b3a9a02523717
Author: Darren Hart <dvhltc@...ibm.com>
Date:   Thu Mar 12 00:56:13 2009 -0700

    futex: clean up fault logic
    
    Impact: cleanup
    
    Older versions of the futex code held the mmap_sem which had to
    be dropped in order to call get_user(), so a two-pronged fault
    handling mechanism was employed to handle faults of the atomic
    operations.  The mmap_sem is no longer held, so get_user()
    should be adequate.  This patch greatly simplifies the logic and
    improves legibility.


Reverting it fixes the issue. In addition, the patch deletes some comments
around calling futex_handle_fault, but forgots in futex_unlock_pi.

There is a confliction to revert the commit. I worked out a patch.

The futex codes seem complicated. We could work out cleanup patches later after
applying the reverting patch.

---

--- linux-2.6.30-rc8/kernel/futex.c	2009-06-10 06:32:19.000000000 +0800
+++ linux-2.6.30-rc8_futex/kernel/futex.c	2009-06-10 00:07:08.000000000 +0800
@@ -300,6 +300,41 @@ static int get_futex_value_locked(u32 *d
 	return ret ? -EFAULT : 0;
 }
 
+/*
+ * Fault handling.
+ */
+static int futex_handle_fault(unsigned long address, int attempt)
+{
+	struct vm_area_struct * vma;
+	struct mm_struct *mm = current->mm;
+	int ret = -EFAULT;
+
+	if (attempt > 2)
+		return ret;
+
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, address);
+	if (vma && address >= vma->vm_start &&
+	    (vma->vm_flags & VM_WRITE)) {
+		int fault;
+		fault = handle_mm_fault(mm, vma, address, 1);
+		if (unlikely((fault & VM_FAULT_ERROR))) {
+#if 0
+			/* XXX: let's do this when we verify it is OK */
+			if (ret & VM_FAULT_OOM)
+				ret = -ENOMEM;
+#endif
+		} else {
+			ret = 0;
+			if (fault & VM_FAULT_MAJOR)
+				current->maj_flt++;
+			else
+				current->min_flt++;
+		}
+	}
+	up_read(&mm->mmap_sem);
+	return ret;
+}
 
 /*
  * PI code:
@@ -722,9 +757,9 @@ futex_wake_op(u32 __user *uaddr1, int fs
 	struct futex_hash_bucket *hb1, *hb2;
 	struct plist_head *head;
 	struct futex_q *this, *next;
-	int ret, op_ret;
+	int ret, op_ret, attempt = 0;
 
-retry:
+retryfull:
 	ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ);
 	if (unlikely(ret != 0))
 		goto out;
@@ -735,8 +770,9 @@ retry:
 	hb1 = hash_futex(&key1);
 	hb2 = hash_futex(&key2);
 
+retry:
 	double_lock_hb(hb1, hb2);
-retry_private:
+
 	op_ret = futex_atomic_op_inuser(op, uaddr2);
 	if (unlikely(op_ret < 0)) {
 		u32 dummy;
@@ -757,16 +793,28 @@ retry_private:
 			goto out_put_keys;
 		}
 
+		/*
+		 * futex_atomic_op_inuser needs to both read and write
+		 * *(int __user *)uaddr2, but we can't modify it
+		 * non-atomically.  Therefore, if get_user below is not
+		 * enough, we need to handle the fault ourselves, while
+		 * still holding the mmap_sem.
+		 */
+		if (attempt++) {
+			ret = futex_handle_fault((unsigned long)uaddr2,
+						 attempt);
+			if (ret)
+				goto out_put_keys;
+			goto retry;
+		}
+
 		ret = get_user(dummy, uaddr2);
 		if (ret)
 			goto out_put_keys;
 
-		if (!fshared)
-			goto retry_private;
-
 		put_futex_key(fshared, &key2);
 		put_futex_key(fshared, &key1);
-		goto retry;
+		goto retryfull;
 	}
 
 	head = &hb1->chain;
@@ -826,7 +874,6 @@ retry:
 	hb1 = hash_futex(&key1);
 	hb2 = hash_futex(&key2);
 
-retry_private:
 	double_lock_hb(hb1, hb2);
 
 	if (likely(cmpval != NULL)) {
@@ -837,16 +884,15 @@ retry_private:
 		if (unlikely(ret)) {
 			double_unlock_hb(hb1, hb2);
 
+			put_futex_key(fshared, &key2);
+			put_futex_key(fshared, &key1);
+
 			ret = get_user(curval, uaddr1);
-			if (ret)
-				goto out_put_keys;
 
-			if (!fshared)
-				goto retry_private;
+			if (!ret)
+				goto retry;
 
-			put_futex_key(fshared, &key2);
-			put_futex_key(fshared, &key1);
-			goto retry;
+			goto out_put_keys;
 		}
 		if (curval != *cmpval) {
 			ret = -EAGAIN;
@@ -1026,7 +1072,7 @@ static int fixup_pi_state_owner(u32 __us
 	struct futex_pi_state *pi_state = q->pi_state;
 	struct task_struct *oldowner = pi_state->owner;
 	u32 uval, curval, newval;
-	int ret;
+	int ret, attempt = 0;
 
 	/* Owner died? */
 	if (!pi_state->owner)
@@ -1097,7 +1143,7 @@ retry:
 handle_fault:
 	spin_unlock(q->lock_ptr);
 
-	ret = get_user(uval, uaddr);
+	ret = futex_handle_fault((unsigned long)uaddr, attempt++);
 
 	spin_lock(q->lock_ptr);
 
@@ -1146,7 +1192,6 @@ retry:
 	if (unlikely(ret != 0))
 		goto out;
 
-retry_private:
 	hb = queue_lock(&q);
 
 	/*
@@ -1173,16 +1218,13 @@ retry_private:
 
 	if (unlikely(ret)) {
 		queue_unlock(&q, hb);
+		put_futex_key(fshared, &q.key);
 
 		ret = get_user(uval, uaddr);
-		if (ret)
-			goto out_put_key;
 
-		if (!fshared)
-			goto retry_private;
-
-		put_futex_key(fshared, &q.key);
-		goto retry;
+		if (!ret)
+			goto retry;
+		goto out;
 	}
 	ret = -EWOULDBLOCK;
 	if (unlikely(uval != val)) {
@@ -1316,7 +1358,7 @@ static int futex_lock_pi(u32 __user *uad
 	struct futex_hash_bucket *hb;
 	u32 uval, newval, curval;
 	struct futex_q q;
-	int ret, lock_taken, ownerdied = 0;
+	int ret, lock_taken, ownerdied = 0, attempt = 0;
 
 	if (refill_pi_state_cache())
 		return -ENOMEM;
@@ -1336,7 +1378,7 @@ retry:
 	if (unlikely(ret != 0))
 		goto out;
 
-retry_private:
+retry_unlocked:
 	hb = queue_lock(&q);
 
 retry_locked:
@@ -1561,15 +1603,18 @@ uaddr_faulted:
 	 */
 	queue_unlock(&q, hb);
 
-	ret = get_user(uval, uaddr);
-	if (ret)
-		goto out_put_key;
+	if (attempt++) {
+		ret = futex_handle_fault((unsigned long)uaddr, attempt);
+		if (ret)
+			goto out_put_key;
+		goto retry_unlocked;
+	}
 
-	if (!fshared)
-		goto retry_private;
+	ret = get_user(uval, uaddr);
+	if (!ret)
+		goto retry_unlocked;
 
-	put_futex_key(fshared, &q.key);
-	goto retry;
+	goto out_put_key;
 }
 
 
@@ -1585,7 +1630,7 @@ static int futex_unlock_pi(u32 __user *u
 	u32 uval;
 	struct plist_head *head;
 	union futex_key key = FUTEX_KEY_INIT;
-	int ret;
+	int ret, attempt = 0;
 
 retry:
 	if (get_user(uval, uaddr))
@@ -1601,6 +1646,7 @@ retry:
 		goto out;
 
 	hb = hash_futex(&key);
+retry_unlocked:
 	spin_lock(&hb->lock);
 
 	/*
@@ -1665,9 +1711,17 @@ pi_faulted:
 	 * we have to drop the mmap_sem in order to call get_user().
 	 */
 	spin_unlock(&hb->lock);
-	put_futex_key(fshared, &key);
+
+	if (attempt++) {
+		ret = futex_handle_fault((unsigned long)uaddr, attempt);
+		if (ret)
+			goto out;
+		uval = 0;
+		goto retry_unlocked;
+	}
 
 	ret = get_user(uval, uaddr);
+	put_futex_key(fshared, &key);
 	if (!ret)
 		goto retry;
 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ