#include #include #include struct page { short lock; unsigned int next; }; #define NR_PAGES (2*1024*1024) static struct page pages[NR_PAGES]; #define LOCK_INIT 1 static void lock(short *lock) { __asm__ __volatile__ ("1:\n\t" "lock ; decb %0\n\t" "jns 2f\n\t" "3:\n\t" "rep ; nop\n\t" "cmpb $0,%0\n\t" "jle 3b\n\t" "jmp 1b\n\t" "2:\n\t" : "+m" (*lock) : : "memory"); } static void unlock(short *lock) { __asm__ __volatile__ ("movb $1,%0\n\t" : "+m" (*lock) : : "memory"); } #define XLOCK_INIT 0 static void xlock(short *lock) { short i = 0x0100; __asm__ __volatile__ ("lock ; xaddw %%ax, %1\n\t" "1:\n\t" "cmpb %%ah, %%al\n\t" "je 2f\n\t" "rep ; nop\n\t" "movb %1, %%al\n\t" "lfence\n\t" "jmp 1b\n\t" "2:\n\t" : "+a" (i), "+m" (*lock) : : "memory"); } static void xunlock(short *lock) { __asm__ __volatile__ ("incb %0\n\t" : "+m" (*lock) : : "memory"); } static int xlock_is_locked(short *lock) { short tmp = *lock; char *x = (char *)&tmp; return (*x != *(x+1)); } #define ITERS (16*1024*1024) #define IN_ITERS (ITERS*5) int main(void) { int nr_pages; int nr; int i; struct page *p; struct timeval start, end; unsigned long long usec; unsigned int tmp; nr_pages = 10; srandom(10); p = &pages[0]; i = 0; nr = 0; while (nr < nr_pages-1) { unsigned int n; n = random() % NR_PAGES; while (p == &pages[n] || pages[n].next) n = (n+1) % NR_PAGES; p->next = n; p = &pages[n]; nr++; } p->next = 0; for (i = 0; i < NR_PAGES; i++) pages[i].lock = LOCK_INIT; gettimeofday(&start, NULL); p = &pages[0]; for (i = 0; i < IN_ITERS; i++) { lock(&p->lock); tmp = p->next; unlock(&p->lock); p = &pages[tmp]; } gettimeofday(&end, NULL); usec = end.tv_usec + 1000000*(end.tv_sec - start.tv_sec) - start.tv_usec; printf("inc-lock in cache takes %0.2lfns\n", (double)usec * 1000 / IN_ITERS); for (i = 0; i < NR_PAGES; i++) pages[i].lock = XLOCK_INIT; gettimeofday(&start, NULL); p = &pages[0]; for (i = 0; i < IN_ITERS; i++) { xlock(&p->lock); tmp = p->next; xunlock(&p->lock); p = &pages[tmp]; } gettimeofday(&end, NULL); usec = end.tv_usec + 1000000*(end.tv_sec - start.tv_sec) - start.tv_usec; printf("xadd-lock in cache takes %0.2lfns\n", (double)usec * 1000 / IN_ITERS); for (i = 0; i < NR_PAGES; i++) pages[i].next = 0; nr_pages = NR_PAGES; srandom(10); p = &pages[0]; i = 0; nr = 0; while (nr < nr_pages-1) { unsigned int n; n = random() % NR_PAGES; while (p == &pages[n] || pages[n].next) n = (n+1) % NR_PAGES; p->next = n; p = &pages[n]; nr++; } p->next = 0; for (i = 0; i < NR_PAGES; i++) pages[i].lock = LOCK_INIT; gettimeofday(&start, NULL); p = &pages[0]; for (i = 0; i < ITERS; i++) { lock(&p->lock); tmp = p->next; unlock(&p->lock); p = &pages[tmp]; } gettimeofday(&end, NULL); usec = end.tv_usec + 1000000*(end.tv_sec - start.tv_sec) - start.tv_usec; printf("inc-lock out of cache takes %0.2lfns\n", (double)usec * 1000 / ITERS); for (i = 0; i < NR_PAGES; i++) pages[i].lock = XLOCK_INIT; gettimeofday(&start, NULL); p = &pages[0]; for (i = 0; i < ITERS; i++) { xlock(&p->lock); tmp = p->next; xunlock(&p->lock); p = &pages[tmp]; } gettimeofday(&end, NULL); usec = end.tv_usec + 1000000*(end.tv_sec - start.tv_sec) - start.tv_usec; printf("xadd-lock out of cache takes %0.2lfns\n", (double)usec * 1000 / ITERS); return 0; }