typedef struct { volatile unsigned int lock; } arch_spinlock_t; #define WORKERS 16 int workers = WORKERS; struct sh { arch_spinlock_t l; char pad1[60]; long success; char pad2[56]; unsigned long critzone; char pad3[56]; unsigned long running; char pad4[56]; long worker[WORKERS]; int wraps[WORKERS]; } *s; int me; /* cut & paste infrastructure from kernel start here */ typedef unsigned long __u64; typedef unsigned int __u32; typedef unsigned short __u16; typedef unsigned char __u8; #define __always_inline inline __attribute__((always_inline)) #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) extern unsigned long __bad_size_for_ia64_fetch_and_add (void); extern unsigned long __bad_increment_for_ia64_fetch_and_add (void); #define ia64_invala() asm volatile ("invala" ::: "memory") #define ia64_hint_pause 0 #define ia64_hint(mode) \ ({ \ switch (mode) { \ case ia64_hint_pause: \ asm volatile ("hint @pause" ::: "memory"); \ break; \ } \ }) #define cpu_relax() ia64_hint(ia64_hint_pause) #define ia64_fetchadd4_acq(p, inc) \ ({ \ \ __u64 ia64_intri_res; \ asm volatile ("fetchadd4.acq %0=[%1],%2" \ : "=r"(ia64_intri_res) : "r"(p), "i" (inc) \ : "memory"); \ \ ia64_intri_res; \ }) #define IA64_FETCHADD(tmp,v,n,sz,sem) \ ({ \ switch (sz) { \ case 4: \ tmp = ia64_fetchadd4_##sem((unsigned int *) v, n); \ break; \ \ case 8: \ tmp = ia64_fetchadd8_##sem((unsigned long *) v, n); \ break; \ \ default: \ __bad_size_for_ia64_fetch_and_add(); \ } \ }) #define ia64_fetchadd(i,v,sem) \ ({ \ __u64 _tmp; \ volatile __typeof__(*(v)) *_v = (v); \ /* Can't use a switch () here: gcc isn't always smart enough for that... */ \ if ((i) == -16) \ IA64_FETCHADD(_tmp, _v, -16, sizeof(*(v)), sem); \ else if ((i) == -8) \ IA64_FETCHADD(_tmp, _v, -8, sizeof(*(v)), sem); \ else if ((i) == -4) \ IA64_FETCHADD(_tmp, _v, -4, sizeof(*(v)), sem); \ else if ((i) == -1) \ IA64_FETCHADD(_tmp, _v, -1, sizeof(*(v)), sem); \ else if ((i) == 1) \ IA64_FETCHADD(_tmp, _v, 1, sizeof(*(v)), sem); \ else if ((i) == 4) \ IA64_FETCHADD(_tmp, _v, 4, sizeof(*(v)), sem); \ else if ((i) == 8) \ IA64_FETCHADD(_tmp, _v, 8, sizeof(*(v)), sem); \ else if ((i) == 16) \ IA64_FETCHADD(_tmp, _v, 16, sizeof(*(v)), sem); \ else \ _tmp = __bad_increment_for_ia64_fetch_and_add(); \ (__typeof__(*(v))) (_tmp); /* return old value */ \ }) #define ia64_cmpxchg4_acq(ptr, new, old) \ ({ \ __u64 ia64_intri_res; \ asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ asm volatile ("cmpxchg4.acq %0=[%1],%2,ar.ccv": \ "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ ia64_intri_res; \ }) /* * This function doesn't exist, so you'll get a linker error * if something tries to do an invalid cmpxchg(). */ extern long ia64_cmpxchg_called_with_bad_pointer (void); #define ia64_cmpxchg(sem,ptr,old,new,size) \ ({ \ __u64 _o_, _r_; \ \ switch (size) { \ case 1: _o_ = (__u8 ) (long) (old); break; \ case 2: _o_ = (__u16) (long) (old); break; \ case 4: _o_ = (__u32) (long) (old); break; \ case 8: _o_ = (__u64) (long) (old); break; \ default: break; \ } \ switch (size) { \ case 1: \ _r_ = ia64_cmpxchg1_##sem((__u8 *) ptr, new, _o_); \ break; \ \ case 2: \ _r_ = ia64_cmpxchg2_##sem((__u16 *) ptr, new, _o_); \ break; \ \ case 4: \ _r_ = ia64_cmpxchg4_##sem((__u32 *) ptr, new, _o_); \ break; \ \ case 8: \ _r_ = ia64_cmpxchg8_##sem((__u64 *) ptr, new, _o_); \ break; \ \ default: \ _r_ = ia64_cmpxchg_called_with_bad_pointer(); \ break; \ } \ (__typeof__(old)) _r_; \ }) #define TICKET_SHIFT 17 /*was17*/ #define TICKET_BITS 15 /*was15*/ #define TICKET_MASK ((1 << TICKET_BITS) - 1) static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) { int *p = (int *)&lock->lock, ticket, serve; ticket = ia64_fetchadd(1, p, acq); if (ticket == 0) s->wraps[me]++; if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK)) return; ia64_invala(); for (;;) { asm volatile ("ld4.c.nc %0=[%1]" : "=r"(serve) : "r"(p) : "memory"); if (!(((serve >> TICKET_SHIFT) ^ ticket) & TICKET_MASK)) { return; } cpu_relax(); } } static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) { int tmp = ACCESS_ONCE(lock->lock); if (tmp == 0) s->wraps[me]++; if (!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK)) return ia64_cmpxchg(acq, &lock->lock, tmp, tmp + 1, sizeof (tmp)) == tmp; return 0; } static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) { unsigned short *p = (unsigned short *)&lock->lock + 1, tmp; asm volatile ("ld2.bias %0=[%1]" : "=r"(tmp) : "r"(p)); ACCESS_ONCE(*p) = (tmp + 2) & ~1; } /* cut & paste infrastructure from kernel ends here */ #include #include #include #include #include extern void nop(void); void delay(int scale) { int r = random(); r &= 0xffff; r *= scale; while (r-- > 0) nop(); } work() { unsigned long crit, mymask = (1ul << me); int gotlock; srandom(getpid()); while (s->running) { delay(workers); if (me & 1) { gotlock = __ticket_spin_trylock(&s->l); } else { __ticket_spin_lock(&s->l); gotlock = 1; } if (gotlock) { if (crit = s->critzone) { s->running = 0; printf("Worker %d saw %lx at entry\n", me, crit); break; } s->critzone |= mymask; s->success++; s->worker[me]++; delay(1); if ((crit = s->critzone) != mymask) { printf("Worker %d saw %lx at exit\n", me, crit); s->running = 0; break; } s->critzone = 0; __ticket_spin_unlock(&s->l); } } } main(int argc, char **argv) { int i, pid; if (argc > 1) { workers = atoi(argv[1]); if (workers < 1 || workers > WORKERS) workers = WORKERS; } s = mmap(NULL, 65536, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0L); printf("shared mapping at %p\n", s); s->running = 1; for (i = 0; i < workers; i++) switch (pid = fork()) { case -1: perror("fork"); s->running = 0; return 1; case 0: me = i; work(); return 0; } while (s->running) { sleep(5); printf("%ld [lock = %.8x]\n", s->success, s->l.lock); for (i = 0; i < workers; i++) printf(" %ld %.8x\n", s->worker[i], s->wraps[i]); } }