// Run with: GLIBC_TUNABLES=glibc.pthread.rseq=0 #include #include #include #include #include #include #include #include #include #include #include #include #include #include "rseq-abi.h" #include #define rseq(rseq, len, flags, sig) syscall(SYS_rseq, rseq, len, \ flags, sig); #define __weak __attribute__((weak)) //#define barrier() asm volatile ("" ::: "memory") #define rmb() asm volatile ("lfence" ::: "memory") #define wmb() asm volatile ("sfence" ::: "memory") static pthread_barrier_t pbarrier; static __thread struct rseq_abi __attribute__((aligned(sizeof(struct rseq_abi)))) rseq_map; static __thread struct rseq_abi *rseq_ptr; static bool no_rseq; static void init_extend_map(void) { extern ptrdiff_t __rseq_offset; extern unsigned int __rseq_size; int ret; if (no_rseq) return; if (__rseq_size) { if (__rseq_size < sizeof(rseq_map)) { printf("glibc rseq less than required mapping\n"); return; } rseq_ptr = __builtin_thread_pointer() + __rseq_offset; printf("Using glibc rseq %p\n", rseq_ptr); return; } rseq_ptr = &rseq_map; ret = rseq(rseq_ptr, sizeof(rseq_map), 0, 0); perror("rseq"); printf("ret = %d (%zd) %p\n", ret, sizeof(rseq_map), &rseq_map); if (ret < 0) rseq_ptr = NULL; } struct data; struct thread_data { unsigned long long start_wait; unsigned long long x_count; unsigned long long total; unsigned long long max; unsigned long long min; unsigned long long total_wait; unsigned long long max_wait; unsigned long long min_wait; struct data *data; }; struct data { unsigned long long x; unsigned long lock; struct thread_data *tdata; bool done; }; static inline unsigned long cmpxchg(volatile unsigned long *ptr, unsigned long old, unsigned long new) { unsigned long prev; asm volatile("lock; cmpxchg %b1,%2" : "=a"(prev) : "q"(new), "m"(*(ptr)), "0"(old) : "memory"); return prev; } static inline unsigned clrbit(volatile unsigned *ptr) { unsigned ret; asm volatile("andb %b1,%0" : "+m" (*(volatile char *)ptr) : "iq" (0x2) : "memory"); ret = *ptr; *ptr = 0; return ret; } static void extend(void) { if (!rseq_ptr) return; rseq_ptr->cr_flags = 1; } static void unextend(void) { unsigned prev; if (!rseq_ptr) return; prev = clrbit(&rseq_ptr->cr_flags); if (prev & 2) { tracefs_printf(NULL, "Yield!\n"); sched_yield(); } } #define sec2usec(sec) (sec * 1000000ULL) #define usec2sec(usec) (usec / 1000000ULL) static unsigned long long get_time(void) { struct timeval tv; unsigned long long time; gettimeofday(&tv, NULL); time = sec2usec(tv.tv_sec); time += tv.tv_usec; return time; } static void grab_lock(struct thread_data *tdata, struct data *data) { unsigned long long start, end, delta; unsigned long long end_wait; unsigned long long last; unsigned long prev; if (!tdata->start_wait) tdata->start_wait = get_time(); while (data->lock && !data->done) rmb(); extend(); start = get_time(); prev = cmpxchg(&data->lock, 0, 1); if (prev) { unextend(); return; } end_wait = get_time(); tracefs_printf(NULL, "Have lock!\n"); delta = end_wait - tdata->start_wait; tdata->start_wait = 0; if (!tdata->total_wait || tdata->max_wait < delta) tdata->max_wait = delta; if (!tdata->total_wait || tdata->min_wait > delta) tdata->min_wait = delta; tdata->total_wait += delta; data->x++; last = data->x; if (data->lock != 1) { printf("Failed locking\n"); exit(-1); } prev = cmpxchg(&data->lock, 1, 0); end = get_time(); if (prev != 1) { printf("Failed unlocking\n"); exit(-1); } tracefs_printf(NULL, "released lock!\n"); unextend(); delta = end - start; if (!tdata->total || tdata->max < delta) tdata->max = delta; if (!tdata->total || tdata->min > delta) tdata->min = delta; tdata->total += delta; tdata->x_count++; /* Let someone else have a turn */ while (data->x == last && !data->done) rmb(); } static void *run_thread(void *d) { struct thread_data *tdata = d; struct data *data = tdata->data; init_extend_map(); pthread_barrier_wait(&pbarrier); while (!data->done) { grab_lock(tdata, data); } return NULL; } int main (int argc, char **argv) { unsigned long long total_wait = 0; unsigned long long secs; pthread_t *threads; struct data data; int cpus; memset(&data, 0, sizeof(data)); cpus = sysconf(_SC_NPROCESSORS_CONF); threads = calloc(cpus + 1, sizeof(*threads)); if (!threads) { perror("threads"); exit(-1); } data.tdata = calloc(cpus + 1, sizeof(*data.tdata)); if (!data.tdata) { perror("Allocating tdata"); exit(-1); } tracefs_print_init(NULL); pthread_barrier_init(&pbarrier, NULL, cpus + 2); for (int i = 0; i <= cpus; i++) { int ret; data.tdata[i].data = &data; ret = pthread_create(&threads[i], NULL, run_thread, &data.tdata[i]); if (ret < 0) { perror("creating threads"); exit(-1); } } pthread_barrier_wait(&pbarrier); sleep(5); printf("Finish up\n"); data.done = true; wmb(); for (int i = 0; i <= cpus; i++) { pthread_join(threads[i], NULL); printf("thread %i:\n", i); printf(" count:\t%lld\n", data.tdata[i].x_count); printf(" total:\t%lld\n", data.tdata[i].total); printf(" max:\t%lld\n", data.tdata[i].max); printf(" min:\t%lld\n", data.tdata[i].min); printf(" total wait:\t%lld\n", data.tdata[i].total_wait); printf(" max wait:\t%lld\n", data.tdata[i].max_wait); printf(" min wait:\t%lld\n", data.tdata[i].min_wait); total_wait += data.tdata[i].total_wait; } secs = usec2sec(total_wait); printf("Ran for %lld times\n", data.x); printf("Total wait time: %lld.%06lld\n", secs, total_wait - sec2usec(secs)); return 0; }