#define _GNU_SOURCE #include #include #include #include #include #include static short glock; #define LOCK_INIT 1 static void lock(short *lock) { __asm__ __volatile__ ("1:\n\t" "lock ; decb %0\n\t" "jns 2f\n\t" "3:\n\t" "rep ; nop\n\t" "cmpb $0,%0\n\t" "jle 3b\n\t" "jmp 1b\n\t" "2:\n\t" : "+m" (*lock) : : "memory"); } static void unlock(short *lock) { __asm__ __volatile__ ("movb $1,%0\n\t" : "+m" (*lock) : : "memory"); } #define XLOCK_INIT 0 static void xlock(short *lock) { short i = 0x0100; __asm__ __volatile__ ("lock ; xaddw %%ax, %1\n\t" "1:\n\t" "cmpb %%ah, %%al\n\t" "je 2f\n\t" "rep ; nop\n\t" "movb %1, %%al\n\t" "lfence\n\t" "jmp 1b\n\t" "2:\n\t" : "+a" (i), "+m" (*lock) : : "memory"); } static void xunlock(short *lock) { __asm__ __volatile__ ("incb %0\n\t" : "+m" (*lock) : : "memory"); } #define NR_THREADS 16 #define ITERS (1024*1024) static int seq; static int started, finished; static void *thread(void *arg) { unsigned long nr = (unsigned long)arg; int i; int oldseq = -1; int max_row = 0; int row = 0; cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(nr, &cpuset); if (sched_setaffinity(0, sizeof(cpuset), &cpuset) == -1) perror("sched_setaffinity"), exit(1); lock(&glock); started++; unlock(&glock); for (i = 0; i < ITERS; i++) { int tmp; lock(&glock); tmp = seq; seq++; unlock(&glock); if (started == NR_THREADS && !finished && tmp == oldseq) { row++; if (row > max_row) max_row = row; } else row = 0; oldseq = tmp+1; } lock(&glock); finished++; unlock(&glock); printf("inc-lock maximum unfair locks = %d\n", max_row); return NULL; } static void *xthread(void *arg) { unsigned long nr = (unsigned long)arg; int i; int oldseq = -1; int max_row = 0; int row = 0; cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(nr, &cpuset); if (sched_setaffinity(0, sizeof(cpuset), &cpuset) == -1) perror("sched_setaffinity"), exit(1); xlock(&glock); started++; xunlock(&glock); for (i = 0; i < ITERS; i++) { int tmp; xlock(&glock); tmp = seq; seq++; xunlock(&glock); if (started == NR_THREADS && !finished && tmp == oldseq) { row++; if (row > max_row) max_row = row; } else { row = 0; } oldseq = tmp+1; } xlock(&glock); finished++; xunlock(&glock); printf("xadd-lock maximum unfair locks = %d\n", max_row); return NULL; } int main(void) { struct timeval start, end; unsigned long long usec; pthread_t t[NR_THREADS]; int i; seq = started = finished = 0; glock = LOCK_INIT; lock(&glock); for (i = 0; i < NR_THREADS; i++) { if (pthread_create(&t[i], NULL, thread, (void *)(unsigned long)i) == -1) perror("pthread_create"), exit(1); } usleep(1000000); gettimeofday(&start, NULL); unlock(&glock); for (i = 0; i < NR_THREADS; i++) { if (pthread_join(t[i], NULL) == -1) perror("pthread_join"), exit(1); } gettimeofday(&end, NULL); usec = end.tv_usec + 1000000*(end.tv_sec - start.tv_sec) - start.tv_usec; printf("inc-lock contended takes %0.2lfns\n", (double)usec * 1000 / ITERS); seq = started = finished = 0; glock = XLOCK_INIT; xlock(&glock); for (i = 0; i < NR_THREADS; i++) { if (pthread_create(&t[i], NULL, xthread, (void *)(unsigned long)i) == -1) perror("pthread_create"), exit(1); } usleep(1000000); gettimeofday(&start, NULL); xunlock(&glock); for (i = 0; i < NR_THREADS; i++) { if (pthread_join(t[i], NULL) == -1) perror("pthread_join"), exit(1); } gettimeofday(&end, NULL); usec = end.tv_usec + 1000000*(end.tv_sec - start.tv_sec) - start.tv_usec; printf("xadd-lock contended takes %0.2lfns\n", (double)usec * 1000 / ITERS); return 0; }