lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Sun, 26 Oct 2014 07:22:32 +0100
From:	Mike Galbraith <umgwanakikbuti@...il.com>
To:	Brian Silverman <bsilver16384@...il.com>
Cc:	tglx@...utronix.de, austin.linux@...il.com,
	linux-kernel@...r.kernel.org, darren@...art.com,
	peterz@...radead.org
Subject: Re: [PATCH v2] futex: fix a race condition between REQUEUE_PI and
 task death

3.12.30-rt40 + patch has been running your testcase, futextests and
stockfish concurrently (on a 28 core +ht box) for over an hour now.

On Sat, 2014-10-25 at 20:20 -0400, Brian Silverman wrote: 
> free_pi_state and exit_pi_state_list both clean up futex_pi_state's.
> exit_pi_state_list takes the hb lock first, and most callers of
> free_pi_state do too. requeue_pi didn't, which means free_pi_state can
> free the pi_state out from under exit_pi_state_list. For example:
> task A                            |  task B
> exit_pi_state_list                |
>   pi_state =                      |
>       curr->pi_state_list->next   |
>                                   |  futex_requeue(requeue_pi=1)
>                                   |    // pi_state is the same as
>                                   |    // the one in task A
>                                   |    free_pi_state(pi_state)
>                                   |      list_del_init(&pi_state->list)
>                                   |      kfree(pi_state)
>   list_del_init(&pi_state->list)  |
> 
> Move the free_pi_state calls in requeue_pi to before it drops the hb
> locks which it's already holding.
> 
> I have test code that forks a bunch of processes, which all fork more
> processes that do futex(FUTEX_WAIT_REQUEUE_PI), then do
> futex(FUTEX_CMP_REQUEUE_PI) before killing the waiting processes. That
> test consistently breaks QEMU VMs without this patch.
> 
> Although they both make it faster to crash, CONFIG_PREEMPT_RT and lots
> of CPUs are not necessary to reproduce this problem. The symptoms range
> from random reboots to corrupting the test program to corrupting whole
> filesystems to strange BIOS errors. Crashdumps (when they get created at
> all) are usually a mess, to the point of crashing tools used to examine
> them.
> 
> Signed-off-by: Brian Silverman <bsilver16384@...il.com>
> ---
>  kernel/futex.c |   44 +++++++++++++++++++++++++++-----------------
>  1 file changed, 27 insertions(+), 17 deletions(-)
> 
> diff --git a/kernel/futex.c b/kernel/futex.c
> index 815d7af..c2c35a5 100644
> --- a/kernel/futex.c
> +++ b/kernel/futex.c
> @@ -64,6 +64,7 @@
>  #include <linux/hugetlb.h>
>  #include <linux/freezer.h>
>  #include <linux/bootmem.h>
> +#include <linux/lockdep.h>
>  
>  #include <asm/futex.h>
>  
> @@ -639,8 +640,16 @@ static struct futex_pi_state * alloc_pi_state(void)
>  	return pi_state;
>  }
>  
> -static void free_pi_state(struct futex_pi_state *pi_state)
> +/**
> + * Must be called with the hb lock held.
> + */
> +static void free_pi_state(struct futex_pi_state *pi_state,
> +					struct futex_hash_bucket *hb)
>  {
> +	if (!pi_state) return;
> +
> +	lockdep_assert_held(&hb->lock);
> +
>  	if (!atomic_dec_and_test(&pi_state->refcount))
>  		return;
>  
> @@ -1519,15 +1528,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
>  	}
>  
>  retry:
> -	if (pi_state != NULL) {
> -		/*
> -		 * We will have to lookup the pi_state again, so free this one
> -		 * to keep the accounting correct.
> -		 */
> -		free_pi_state(pi_state);
> -		pi_state = NULL;
> -	}
> -
>  	ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
>  	if (unlikely(ret != 0))
>  		goto out;
> @@ -1558,6 +1558,12 @@ retry_private:
>  		ret = get_futex_value_locked(&curval, uaddr1);
>  
>  		if (unlikely(ret)) {
> +			/*
> +			 * We will have to lookup the pi_state again, so
> +			 * free this one to keep the accounting correct.
> +			 */
> +			free_pi_state(pi_state, hb2);
> +			pi_state = NULL;
>  			double_unlock_hb(hb1, hb2);
>  			hb_waiters_dec(hb2);
>  
> @@ -1617,6 +1623,8 @@ retry_private:
>  		case 0:
>  			break;
>  		case -EFAULT:
> +			free_pi_state(pi_state, hb2);
> +			pi_state = NULL;
>  			double_unlock_hb(hb1, hb2);
>  			hb_waiters_dec(hb2);
>  			put_futex_key(&key2);
> @@ -1632,6 +1640,8 @@ retry_private:
>  			 *   exit to complete.
>  			 * - The user space value changed.
>  			 */
> +			free_pi_state(pi_state, hb2);
> +			pi_state = NULL;
>  			double_unlock_hb(hb1, hb2);
>  			hb_waiters_dec(hb2);
>  			put_futex_key(&key2);
> @@ -1699,7 +1709,7 @@ retry_private:
>  			} else if (ret) {
>  				/* -EDEADLK */
>  				this->pi_state = NULL;
> -				free_pi_state(pi_state);
> +				free_pi_state(pi_state, hb2);
>  				goto out_unlock;
>  			}
>  		}
> @@ -1708,6 +1718,7 @@ retry_private:
>  	}
>  
>  out_unlock:
> +	free_pi_state(pi_state, hb2);
>  	double_unlock_hb(hb1, hb2);
>  	hb_waiters_dec(hb2);
>  
> @@ -1725,8 +1736,6 @@ out_put_keys:
>  out_put_key1:
>  	put_futex_key(&key1);
>  out:
> -	if (pi_state != NULL)
> -		free_pi_state(pi_state);
>  	return ret ? ret : task_count;
>  }
>  
> @@ -1850,14 +1859,15 @@ retry:
>   * PI futexes can not be requeued and must remove themself from the
>   * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry
>   * and dropped here.
> + * Must be called with the hb lock held.
>   */
> -static void unqueue_me_pi(struct futex_q *q)
> +static void unqueue_me_pi(struct futex_q *q, struct futex_hash_bucket *hb)
>  	__releases(q->lock_ptr)
>  {
>  	__unqueue_futex(q);
>  
>  	BUG_ON(!q->pi_state);
> -	free_pi_state(q->pi_state);
> +	free_pi_state(q->pi_state, hb);
>  	q->pi_state = NULL;
>  
>  	spin_unlock(q->lock_ptr);
> @@ -2340,7 +2350,7 @@ retry_private:
>  		rt_mutex_unlock(&q.pi_state->pi_mutex);
>  
>  	/* Unqueue and drop the lock */
> -	unqueue_me_pi(&q);
> +	unqueue_me_pi(&q, hb);
>  
>  	goto out_put_key;
>  
> @@ -2651,7 +2661,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
>  			ret = (res < 0) ? res : 0;
>  
>  		/* Unqueue and drop the lock. */
> -		unqueue_me_pi(&q);
> +		unqueue_me_pi(&q, hb);
>  	}
>  
>  	/*


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists