typedef struct { volatile unsigned int lock; } arch_spinlock_t; #define WORKERS 8 struct sh { arch_spinlock_t l; char pad1[60]; long success; char pad2[56]; long worker[WORKERS]; int locks[WORKERS]; } *s; int me; /* cut & paste infrastructure from kernel start here */ typedef unsigned long __u64; #define __always_inline inline __attribute__((always_inline)) #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) extern unsigned long __bad_size_for_ia64_fetch_and_add (void); extern unsigned long __bad_increment_for_ia64_fetch_and_add (void); #define ia64_invala() asm volatile ("invala" ::: "memory") #define ia64_hint_pause 0 #define ia64_hint(mode) \ ({ \ switch (mode) { \ case ia64_hint_pause: \ asm volatile ("hint @pause" ::: "memory"); \ break; \ } \ }) #define cpu_relax() ia64_hint(ia64_hint_pause) #define ia64_fetchadd4_acq(p, inc) \ ({ \ \ __u64 ia64_intri_res; \ asm volatile ("fetchadd4.acq %0=[%1],%2" \ : "=r"(ia64_intri_res) : "r"(p), "i" (inc) \ : "memory"); \ \ ia64_intri_res; \ }) #define IA64_FETCHADD(tmp,v,n,sz,sem) \ ({ \ switch (sz) { \ case 4: \ tmp = ia64_fetchadd4_##sem((unsigned int *) v, n); \ break; \ \ case 8: \ tmp = ia64_fetchadd8_##sem((unsigned long *) v, n); \ break; \ \ default: \ __bad_size_for_ia64_fetch_and_add(); \ } \ }) #define ia64_fetchadd(i,v,sem) \ ({ \ __u64 _tmp; \ volatile __typeof__(*(v)) *_v = (v); \ /* Can't use a switch () here: gcc isn't always smart enough for that... */ \ if ((i) == -16) \ IA64_FETCHADD(_tmp, _v, -16, sizeof(*(v)), sem); \ else if ((i) == -8) \ IA64_FETCHADD(_tmp, _v, -8, sizeof(*(v)), sem); \ else if ((i) == -4) \ IA64_FETCHADD(_tmp, _v, -4, sizeof(*(v)), sem); \ else if ((i) == -1) \ IA64_FETCHADD(_tmp, _v, -1, sizeof(*(v)), sem); \ else if ((i) == 1) \ IA64_FETCHADD(_tmp, _v, 1, sizeof(*(v)), sem); \ else if ((i) == 4) \ IA64_FETCHADD(_tmp, _v, 4, sizeof(*(v)), sem); \ else if ((i) == 8) \ IA64_FETCHADD(_tmp, _v, 8, sizeof(*(v)), sem); \ else if ((i) == 16) \ IA64_FETCHADD(_tmp, _v, 16, sizeof(*(v)), sem); \ else \ _tmp = __bad_increment_for_ia64_fetch_and_add(); \ (__typeof__(*(v))) (_tmp); /* return old value */ \ }) #define TICKET_SHIFT 17 #define TICKET_BITS 15 #define TICKET_MASK ((1 << TICKET_BITS) - 1) static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) { int *p = (int *)&lock->lock, ticket, serve; ticket = ia64_fetchadd(1, p, acq); if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK)) return; ia64_invala(); s->locks[me] = ticket; for (;;) { asm volatile ("ld4.c.nc %0=[%1]" : "=r"(serve) : "r"(p) : "memory"); if (!(((serve >> TICKET_SHIFT) ^ ticket) & TICKET_MASK)) { s->locks[me] = 0; return; } cpu_relax(); } } static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) { unsigned short *p = (unsigned short *)&lock->lock + 1, tmp; asm volatile ("ld2.bias %0=[%1]" : "=r"(tmp) : "r"(p)); ACCESS_ONCE(*p) = (tmp + 2) & ~1; } /* cut & paste infrastructure from kernel ends here */ #include #include #include #include #include work() { printf("Starting worker %d\n", me); while (1) { __ticket_spin_lock(&s->l); s->success++; s->worker[me]++; __ticket_spin_unlock(&s->l); } } main(int argc, char **argv) { int i, pid; int workers = WORKERS; if (argc > 1) { workers = atoi(argv[1]); if (workers < 1 || workers > WORKERS) workers = WORKERS; } s = mmap(NULL, 65536, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0L); printf("shared mapping at %p\n", s); for (i = 0; i < workers; i++) switch (pid = fork()) { case -1: perror("fork"); return 1; case 0: me = i; work(); return 0; } while (1) { sleep(5); printf("%ld [lock = %.8x]\n", s->success, s->l.lock); for (i = 0; i < workers; i++) printf(" %ld %.8x\n", s->worker[i], s->locks[i]); } }