/* Demonstration of Linux PERF bug: * Linux is unable to count BRANCH_INSTRUCTIONS or BRANCH_MISSES * at the same time as CACHE_REFERENCES or CACHE_MISSES. */ #include #include #include #include #include #include #include #include #include #include static int perf_event_open ( struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags ) { int ret; ret = (int) syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); return ret; } int main( int argc, const char *const* argv) { struct perf_event_attr pea = {0}; struct evcfg { uint64_t perf_type; uint64_t perf_cfg; const char *name; int fd; uint64_t id; } pe [] = { #ifndef USE_RAW_PMU // so we can test using Generic Kernel Event mapping: #ifndef NO_BUG_NO_BRANCH #ifndef NO_BUG_NO_BRANCH_INST { PERF_TYPE_HARDWARE , PERF_COUNT_HW_BRANCH_INSTRUCTIONS , "Branch Instructions" , -1, 0 } #endif #ifndef NO_BUG_NO_BRANCH_MISS , { PERF_TYPE_HARDWARE , PERF_COUNT_HW_BRANCH_MISSES , "Branch Misses" , -1, 0 } #endif , #endif { PERF_TYPE_HARDWARE , PERF_COUNT_HW_INSTRUCTIONS , "Instructions" , -1, 0 } , { PERF_TYPE_HARDWARE , PERF_COUNT_HW_CPU_CYCLES , "CPU Cycles" , -1, 0 } , { PERF_TYPE_HARDWARE , PERF_COUNT_HW_REF_CPU_CYCLES , "Ref. CPU Cycles" , -1, 0 } , { PERF_TYPE_HARDWARE , PERF_COUNT_HW_BUS_CYCLES , "Bus Cycles" , -1, 0 } #ifndef NO_BUG_NO_CACHE #ifndef NO_BUG_NO_CACHE_REF , { PERF_TYPE_HARDWARE , PERF_COUNT_HW_CACHE_REFERENCES , "Cache References" , -1, 0 } #endif #ifndef NO_BUG_NO_CACHE_MISS , { PERF_TYPE_HARDWARE , PERF_COUNT_HW_CACHE_MISSES , "Cache Misses" , -1, 0 } #endif #endif #else // or test using raw PMU codes - these come from the Intel SDM, Chapter 19, Table 19-1, // and I've checked they are identical to the values in // /sys/bus/event_source/devices/cpu/events/{ #ifndef NO_BUG_NO_BRANCH #ifndef NO_BUG_NO_BRANCH_INST { PERF_TYPE_RAW , (1UL<<63U) | 0xC4 // branch_instructions , "Branch Instructions" , -1, 0 } #endif #ifndef NO_BUG_NO_BRANCH_MISS , { PERF_TYPE_RAW , (1UL<<63U) | 0xC5 // branch_misses , "Branch Misses" , -1, 0 } #endif , #endif { PERF_TYPE_RAW , (1UL<<63U) | 0xC0 // instructions , "Instructions" , -1, 0 } , { PERF_TYPE_RAW , (1UL<<63U) | 0x3C // cpu cycles , "CPU Cycles" , -1, 0 } , { PERF_TYPE_RAW , (1UL<<63U) | 0x0300 // ref cpu cycles , "Ref. CPU Cycles" , -1, 0 } , { PERF_TYPE_RAW , (1UL<<63U) | 0x013C // bus cycles , "Bus Cycles" , -1, 0 } #ifndef NO_BUG_NO_CACHE #ifndef NO_BUG_NO_CACHE_REF , { PERF_TYPE_RAW , (1UL<<63U) | 0x04F2E // cache references , "Cache References" , -1, 0 } #endif #ifndef NO_BUG_NO_CACHE_MISS , { PERF_TYPE_RAW , (1UL<<63U) | 0x0412E // cache misses , "Cache Misses" , -1, 0 } #endif #endif #endif }; #define N_EV (sizeof(pe)/sizeof(struct evcfg)) int fd=-1; int n_ev=0; pid_t pid=getpid(); for(; n_ev < N_EV; n_ev += 1) { memset(&pea, '\0', sizeof(pea)); pea.size = PERF_ATTR_SIZE_VER5; pea.type = pe[n_ev].perf_type; pea.config = pe[n_ev].perf_cfg; pea.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID | PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING ; pea.disabled=1; pea.exclude_kernel = 1; pea.exclude_idle = 1; pea.exclude_hv = 1; if((pe[n_ev].fd = perf_event_open ( &pea, pid, -1, fd, 0) ) == -1 ) { fprintf(stderr,"perf_event_open failed : %d : '%s'.\n", errno, strerror(errno)); return 1; } if( fd == -1) fd = pe[n_ev].fd; // this is the Group Leader FD if( 0 != ioctl( pe[n_ev].fd, PERF_EVENT_IOC_ID, &pe[n_ev].id)) { fprintf(stderr,"ioctl(fd, PERF_EVENT_IOC_ID) failed for #%d : %d : '%s'.\n", n_ev, errno, strerror(errno)); return 1; } } if( 0 != ioctl( fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP)) { fprintf(stderr,"ioctl(fd, PERF_EVENT_IOC_RESET) failed : %d : '%s'.\n", errno, strerror(errno)); return 1; } // do something to measure - let's try 100 long divisions: uint64_t a_num = 0x0102030405060708; uint64_t b_num = ~a_num; int cnt=100; if( 0 != ioctl( fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP)) { fprintf(stderr,"ioctl(fd, PERF_EVENT_IOC_ID) failed : %d : '%s'.\n", errno, strerror(errno)); return 1; } do { a_num=(b_num /= a_num); } while(--cnt); if( 0 != ioctl( fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP)) { fprintf(stderr,"ioctl(fd, PERF_EVENT_IOC_ID) failed : %d : '%s'.\n", errno, strerror(errno)); return 1; } struct { uint64_t nr, time_enabled, time_running; struct event { uint64_t value,id; } ev[N_EV]; } events; if( read(fd, &events, sizeof(events)) != sizeof(events)) { fprintf(stderr,"read of event group leader FD failed : %d : '%s'.\n", errno, strerror(errno)); return 1; } if( events.nr != N_EV ) { fprintf(stderr,"unexpected number of events read: %lu\n", events.nr); return 1; } struct event *ev = &events.ev[0]; bool non_zero_event=false; do { bool found=0; for(n_ev=0; n_ev < N_EV; n_ev += 1) { if( pe[n_ev].id == ev->id ) { found = true; break; } } if( ! found ) { fprintf(stderr,"Kernel returned unknown event ID: %lu", ev->id); return 1; } printf("EVENT: %s : %lu\n", pe[n_ev].name, ev->value); if (!non_zero_event) non_zero_event = ev->value != 0; ++ev; } while( --events.nr ); return (non_zero_event ? 0 : 1); }