#include <asm-x86_64/msr.h>
#include <stdio.h>
#include <stdlib.h>

#define noinline __attribute__((__noinline__))

static inline int empty() {
  return 1;
}

static inline int gdt_limit() {
  /* "segment" value is from Linux 2.6.26/include/asm-x86/segment.h */
  const int segment = (15 * 8 + 3);
  int limit;
  asm volatile("lsl %1,%0" : "=r" (limit) : "r" (segment));
  return limit;
}

static inline int idt_limit() {
  struct {
    char pad[6];                        /* Align accesses. */
    unsigned int limit;                 /* 16b */
    unsigned long long address;         /* 64b */
  } idt;
  asm volatile("sidt %0" : "=m"(idt));
  return idt.limit;
}

static inline int tscp_aux() {
  int eax, edx, aux;
  asm volatile(".byte 0x0f,0x01,0xf9" : "=a" (eax), "=d" (edx), "=c" (aux));
  return aux;
}

static inline int cpuid_edx_val(unsigned int op) {
  int eax, edx;

  asm("cpuid" : "=a" (eax), "=d" (edx) : "0" (op) : "bx", "cx");
  return edx;
}

inline int /*bool*/ have_tscp() {
  return (cpuid_edx_val(0x80000001) & (1 << 27)) != 0;
}


typedef long long tsc;

noinline tsc now() {
  unsigned int eax_lo, edx_hi;
  tsc now;
  asm volatile("rdtsc" : "=a" (eax_lo), "=d" (edx_hi));
  now = ((tsc)eax_lo) | ((tsc)(edx_hi) << 32);
  return now;
}

int tsc_sort_pred(const void *va, const void *vb) {
  const tsc *a = (const tsc *)(va);
  const tsc *b = (const tsc *)(vb);
  return *a - *b;
}

typedef enum which {
  EMPTY,                     /* Must be first (0'th) to set base_cost. */
  GDT_LIMIT,
  IDT_LIMIT,
  TSCP_AUX,
} which;

volatile int g_sink;

static inline int/*bool*/ run_test(tsc *delta, int n, which test) {
  int i;

  if ((test == TSCP_AUX) && !have_tscp())
    return 0;

  for (i = 0; i < n; ++i) {
    int val;                        /* Written before read.  Really!*/
    tsc stop;
    tsc start = now();
    asm volatile("nop" ::: "memory");
    switch (test) {
      case EMPTY:      val = empty();      break;
      case GDT_LIMIT:  val = gdt_limit();  break;
      case IDT_LIMIT:  val = idt_limit();  break;
      case TSCP_AUX:   val = tscp_aux();   break;
    }
    asm volatile("nop" ::: "memory");
    stop = now();
    g_sink = val;
    *delta++ = stop - start;
  }
  return 1;
}

noinline int/*bool*/ run_test_empty(tsc *delta, int n) {
  return run_test(tsc, n, EMPTY);
}

noinline int/*bool*/ run_test_gdt_limit(tsc *delta, int n) {
  return run_test(tsc, n, GDT_LIMIT);
}

noinline int/*bool*/ run_test_idt_limit(tsc *delta, int n) {
  return run_test(tsc, n, IDT_LIMIT);
}

noinline int/*bool*/ run_test_tscp_aux(tsc *delta, int n) {
  return run_test(tsc, n, TSCP_AUX);
}

typedef int/*bool*/ (*funcptr)(tsc *delta, int n);

/* Obfuscate pointers so various run_test*() cases do not get inlined. */
funcptr func[] = {
  run_test_empty,
  run_test_gdt_limit,
  run_test_idt_limit,
  run_test_tscp_aux
};

const char *names[] = { "EMPTY", "GDT_LIMIT", "IDT_LIMIT", "TSCP_AUX" };

int main (int argc, char **argv) {
  int t;
  const int N = 1000;
  tsc delta[N];
  tsc base_cost = 0;

  /* In principle can change 'func' so compiler cannot dispatch */
  printf("Starting tests...\n");

  for (t=0; t<=TSCP_AUX; ++t) {
    int ran = (*func[t])(delta, N);
    const char *name = names[t];

    if (!ran) {
      printf("Not-run: %s\n", name);
      continue;
    }

    {
      static int bin[] = { 5, 10, 20, 50 };
      int i;
      qsort(delta, N, sizeof(tsc), tsc_sort_pred);
      printf("Run: %s\ttotal-tests= %d\tbests: ", name, N);
      for (i = 0; i < 5 ; ++i)
        printf(" %2lld", delta[i] - base_cost);
      printf("\tmedians:");
      for (i = 0; i < sizeof(bin)/sizeof(bin[0]); ++i)
        printf(" %2d%%: %2lld", bin[i], delta[N * bin[i] / 100] - base_cost);
      printf("\n");
    }

    if (t == EMPTY)
      base_cost = delta[0];
  }
  return 0;
}