--- linux-2.6.18-rc4/include/linux/futex.h 2006-08-08 22:46:13.000000000 +0200 +++ linux-2.6.18-rc4-ed/include/linux/futex.h 2006-08-08 23:23:13.000000000 +0200 @@ -15,6 +15,11 @@ #define FUTEX_LOCK_PI 6 #define FUTEX_UNLOCK_PI 7 #define FUTEX_TRYLOCK_PI 8 +#define FUTEX_WAIT_PRIVATE 9 +#define FUTEX_WAKE_PRIVATE 10 +#define FUTEX_REQUEUE_PRIVATE 11 +#define FUTEX_CMP_REQUEUE_PRIVATE 12 +#define FUTEX_WAKE_OP_PRIVATE 13 /* * Support for robust futexes: the kernel cleans up held futexes at --- linux-2.6.18-rc4/kernel/futex.c 2006-08-08 22:45:46.000000000 +0200 +++ linux-2.6.18-rc4-ed/kernel/futex.c 2006-08-09 07:26:19.000000000 +0200 @@ -60,8 +60,9 @@ * Don't rearrange members without looking at hash_futex(). * * offset is aligned to a multiple of sizeof(u32) (== 4) by definition. - * We set bit 0 to indicate if it's an inode-based key. */ +#define OFF_INODE 1 /* We set bit 0 if it has a reference on inode */ +#define OFF_MMSHARED 2 /* We set bit 1 if it has a reference on mm */ union futex_key { struct { unsigned long pgoff; @@ -79,6 +80,8 @@ int offset; } both; }; +#define FUT_SHARED 1 /* we should walk vmas */ +#define FUT_PRIVATE 0 /* private futex: no need to walk vmas*/ /* * Priority Inheritance state: @@ -140,7 +143,7 @@ static struct futex_hash_bucket futex_queues[1<mm->mmap_sem but NOT any spinlocks. */ -static int get_futex_key(u32 __user *uaddr, union futex_key *key) +static int get_futex_key(u32 __user *uaddr, union futex_key *key, int shared) { unsigned long address = (unsigned long)uaddr; struct mm_struct *mm = current->mm; @@ -191,6 +194,11 @@ return -EINVAL; address -= key->both.offset; + if (shared == FUT_PRIVATE) { + key->private.mm = mm; + key->private.address = address; + return 0; + } /* * The futex is hashed differently depending on whether * it's in a shared or private mapping. So check vma first. @@ -215,6 +223,7 @@ * mappings of _writable_ handles. */ if (likely(!(vma->vm_flags & VM_MAYSHARE))) { + key->both.offset += OFF_MMSHARED; /* reference taken on mm */ key->private.mm = mm; key->private.address = address; return 0; @@ -224,7 +233,7 @@ * Linear file mappings are also simple. */ key->shared.inode = vma->vm_file->f_dentry->d_inode; - key->both.offset++; /* Bit 0 of offset indicates inode-based key. */ + key->both.offset += OFF_INODE; /* reference taken on inode */ if (likely(!(vma->vm_flags & VM_NONLINEAR))) { key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff); @@ -256,8 +265,8 @@ */ static inline void get_key_refs(union futex_key *key) { - if (key->both.ptr != 0) { - if (key->both.offset & 1) + if (key->both.offset & (OFF_INODE|OFF_MMSHARED)) { + if (key->both.offset & OFF_INODE) atomic_inc(&key->shared.inode->i_count); else atomic_inc(&key->private.mm->mm_count); @@ -270,8 +279,8 @@ */ static void drop_key_refs(union futex_key *key) { - if (key->both.ptr != 0) { - if (key->both.offset & 1) + if (key->both.offset & (OFF_INODE|OFF_MMSHARED)) { + if (key->both.offset & OFF_INODE) iput(key->shared.inode); else mmdrop(key->private.mm); @@ -650,7 +659,7 @@ * Wake up all waiters hashed on the physical page that is mapped * to this virtual address: */ -static int futex_wake(u32 __user *uaddr, int nr_wake) +static int futex_wake(u32 __user *uaddr, int nr_wake, int shared) { struct futex_hash_bucket *hb; struct futex_q *this, *next; @@ -658,9 +667,10 @@ union futex_key key; int ret; - down_read(¤t->mm->mmap_sem); + if (shared) + down_read(¤t->mm->mmap_sem); - ret = get_futex_key(uaddr, &key); + ret = get_futex_key(uaddr, &key, shared); if (unlikely(ret != 0)) goto out; @@ -682,7 +692,8 @@ spin_unlock(&hb->lock); out: - up_read(¤t->mm->mmap_sem); + if (shared) + up_read(¤t->mm->mmap_sem); return ret; } @@ -692,7 +703,7 @@ */ static int futex_wake_op(u32 __user *uaddr1, u32 __user *uaddr2, - int nr_wake, int nr_wake2, int op) + int nr_wake, int nr_wake2, int op, int shared) { union futex_key key1, key2; struct futex_hash_bucket *hb1, *hb2; @@ -703,10 +714,10 @@ retryfull: down_read(¤t->mm->mmap_sem); - ret = get_futex_key(uaddr1, &key1); + ret = get_futex_key(uaddr1, &key1, shared); if (unlikely(ret != 0)) goto out; - ret = get_futex_key(uaddr2, &key2); + ret = get_futex_key(uaddr2, &key2, shared); if (unlikely(ret != 0)) goto out; @@ -802,7 +813,7 @@ * physical page. */ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2, - int nr_wake, int nr_requeue, u32 *cmpval) + int nr_wake, int nr_requeue, u32 *cmpval, int shared) { union futex_key key1, key2; struct futex_hash_bucket *hb1, *hb2; @@ -811,12 +822,13 @@ int ret, drop_count = 0; retry: - down_read(¤t->mm->mmap_sem); + if (shared) + down_read(¤t->mm->mmap_sem); - ret = get_futex_key(uaddr1, &key1); + ret = get_futex_key(uaddr1, &key1, shared); if (unlikely(ret != 0)) goto out; - ret = get_futex_key(uaddr2, &key2); + ret = get_futex_key(uaddr2, &key2, shared); if (unlikely(ret != 0)) goto out; @@ -839,7 +851,8 @@ * If we would have faulted, release mmap_sem, fault * it in and start all over again. */ - up_read(¤t->mm->mmap_sem); + if (shared) + up_read(¤t->mm->mmap_sem); ret = get_user(curval, uaddr1); @@ -888,7 +901,8 @@ drop_key_refs(&key1); out: - up_read(¤t->mm->mmap_sem); + if (shared) + up_read(¤t->mm->mmap_sem); return ret; } @@ -999,7 +1013,7 @@ drop_key_refs(&q->key); } -static int futex_wait(u32 __user *uaddr, u32 val, unsigned long time) +static int futex_wait(u32 __user *uaddr, u32 val, unsigned long time, int shared) { struct task_struct *curr = current; DECLARE_WAITQUEUE(wait, curr); @@ -1010,9 +1024,10 @@ q.pi_state = NULL; retry: - down_read(&curr->mm->mmap_sem); + if (shared) + down_read(&curr->mm->mmap_sem); - ret = get_futex_key(uaddr, &q.key); + ret = get_futex_key(uaddr, &q.key, shared); if (unlikely(ret != 0)) goto out_release_sem; @@ -1047,7 +1062,8 @@ * If we would have faulted, release mmap_sem, fault it in and * start all over again. */ - up_read(&curr->mm->mmap_sem); + if (shared) + up_read(&curr->mm->mmap_sem); ret = get_user(uval, uaddr); @@ -1066,7 +1082,8 @@ * Now the futex is queued and we have checked the data, we * don't want to hold mmap_sem while we sleep. */ - up_read(&curr->mm->mmap_sem); + if (shared) + up_read(&curr->mm->mmap_sem); /* * There might have been scheduling since the queue_me(), as we @@ -1108,7 +1125,8 @@ queue_unlock(&q, hb); out_release_sem: - up_read(&curr->mm->mmap_sem); + if (shared) + up_read(&curr->mm->mmap_sem); return ret; } @@ -1134,7 +1152,7 @@ retry: down_read(&curr->mm->mmap_sem); - ret = get_futex_key(uaddr, &q.key); + ret = get_futex_key(uaddr, &q.key, FUT_SHARED); if (unlikely(ret != 0)) goto out_release_sem; @@ -1435,7 +1453,7 @@ */ down_read(¤t->mm->mmap_sem); - ret = get_futex_key(uaddr, &key); + ret = get_futex_key(uaddr, &key, FUT_SHARED); if (unlikely(ret != 0)) goto out; @@ -1551,7 +1569,7 @@ return ret; } -static struct file_operations futex_fops = { +static const struct file_operations futex_fops = { .release = futex_close, .poll = futex_poll, }; @@ -1600,7 +1618,7 @@ q->pi_state = NULL; down_read(¤t->mm->mmap_sem); - err = get_futex_key(uaddr, &q->key); + err = get_futex_key(uaddr, &q->key, FUT_SHARED); if (unlikely(err != 0)) { up_read(¤t->mm->mmap_sem); @@ -1742,7 +1760,7 @@ */ if (!pi) { if (uval & FUTEX_WAITERS) - futex_wake(uaddr, 1); + futex_wake(uaddr, 1, FUT_SHARED); } } return 0; @@ -1830,23 +1848,38 @@ switch (op) { case FUTEX_WAIT: - ret = futex_wait(uaddr, val, timeout); + ret = futex_wait(uaddr, val, timeout, FUT_SHARED); + break; + case FUTEX_WAIT_PRIVATE: + ret = futex_wait(uaddr, val, timeout, FUT_PRIVATE); break; case FUTEX_WAKE: - ret = futex_wake(uaddr, val); + ret = futex_wake(uaddr, val, FUT_SHARED); + break; + case FUTEX_WAKE_PRIVATE: + ret = futex_wake(uaddr, val, FUT_PRIVATE); break; case FUTEX_FD: /* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */ ret = futex_fd(uaddr, val); break; case FUTEX_REQUEUE: - ret = futex_requeue(uaddr, uaddr2, val, val2, NULL); + ret = futex_requeue(uaddr, uaddr2, val, val2, NULL, FUT_SHARED); + break; + case FUTEX_REQUEUE_PRIVATE: + ret = futex_requeue(uaddr, uaddr2, val, val2, NULL, FUT_PRIVATE); break; case FUTEX_CMP_REQUEUE: - ret = futex_requeue(uaddr, uaddr2, val, val2, &val3); + ret = futex_requeue(uaddr, uaddr2, val, val2, &val3, FUT_SHARED); + break; + case FUTEX_CMP_REQUEUE_PRIVATE: + ret = futex_requeue(uaddr, uaddr2, val, val2, &val3, FUT_PRIVATE); break; case FUTEX_WAKE_OP: - ret = futex_wake_op(uaddr, uaddr2, val, val2, val3); + ret = futex_wake_op(uaddr, uaddr2, val, val2, val3, FUT_SHARED); + break; + case FUTEX_WAKE_OP_PRIVATE: + ret = futex_wake_op(uaddr, uaddr2, val, val2, val3, FUT_PRIVATE); break; case FUTEX_LOCK_PI: ret = futex_lock_pi(uaddr, val, timeout, val2, 0);