Currently perf_sched_cb_{inc,dec}() are called from pmu::{start,stop}(), which has the problem that this can happen from NMI context, this is making it hard to optimize perf_pmu_sched_task(). Furthermore, we really only need this accounting on pmu::{add,del}(), so doing it from pmu::{start,stop}() is doing more work than we really need. Introduce x86_pmu::{add,del}() and wire up the LBR and PEBS. Signed-off-by: Peter Zijlstra (Intel) --- arch/x86/events/core.c | 24 ++++++++++++++++++++++-- arch/x86/events/intel/core.c | 31 ++++++++++++++++++------------- arch/x86/events/intel/ds.c | 8 ++------ arch/x86/events/intel/lbr.c | 4 ++-- arch/x86/events/perf_event.h | 10 ++++++++-- 5 files changed, 52 insertions(+), 25 deletions(-) --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1197,6 +1197,9 @@ static int x86_pmu_add(struct perf_event * If group events scheduling transaction was started, * skip the schedulability test here, it will be performed * at commit time (->commit_txn) as a whole. + * + * If commit fails, we'll call ->del() on all events + * for which ->add() was called. */ if (cpuc->txn_flags & PERF_PMU_TXN_ADD) goto done_collect; @@ -1219,6 +1222,14 @@ static int x86_pmu_add(struct perf_event cpuc->n_added += n - n0; cpuc->n_txn += n - n0; + if (x86_pmu.add) { + /* + * This is before x86_pmu_enable() will call x86_pmu_start(), + * so we enable LBRs before an event needs them etc.. + */ + x86_pmu.add(event); + } + ret = 0; out: return ret; @@ -1342,7 +1353,7 @@ static void x86_pmu_del(struct perf_even event->hw.flags &= ~PERF_X86_EVENT_COMMITTED; /* - * If we're called during a txn, we don't need to do anything. + * If we're called during a txn, we only need to undo x86_pmu.add. * The events never got scheduled and ->cancel_txn will truncate * the event_list. * @@ -1350,7 +1361,7 @@ static void x86_pmu_del(struct perf_even * an event added during that same TXN. */ if (cpuc->txn_flags & PERF_PMU_TXN_ADD) - return; + goto do_del; /* * Not a TXN, therefore cleanup properly. @@ -1380,6 +1391,15 @@ static void x86_pmu_del(struct perf_even --cpuc->n_events; perf_event_update_userpage(event); + +do_del: + if (x86_pmu.del) { + /* + * This is after x86_pmu_stop(); so we disable LBRs after any + * event can need them etc.. + */ + x86_pmu.del(event); + } } int x86_pmu_handle_irq(struct pt_regs *regs) --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -1907,13 +1907,6 @@ static void intel_pmu_disable_event(stru cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); cpuc->intel_cp_status &= ~(1ull << hwc->idx); - /* - * must disable before any actual event - * because any event may be combined with LBR - */ - if (needs_branch_stack(event)) - intel_pmu_lbr_disable(event); - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_disable_fixed(hwc); return; @@ -1925,6 +1918,14 @@ static void intel_pmu_disable_event(stru intel_pmu_pebs_disable(event); } +static void intel_pmu_del_event(struct perf_event *event) +{ + if (needs_branch_stack(event)) + intel_pmu_lbr_del(event); + if (event->attr.precise_ip) + intel_pmu_pebs_del(event); +} + static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) { int idx = hwc->idx - INTEL_PMC_IDX_FIXED; @@ -1968,12 +1969,6 @@ static void intel_pmu_enable_event(struc intel_pmu_enable_bts(hwc->config); return; } - /* - * must enabled before any actual event - * because any event may be combined with LBR - */ - if (needs_branch_stack(event)) - intel_pmu_lbr_enable(event); if (event->attr.exclude_host) cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); @@ -1994,6 +1989,14 @@ static void intel_pmu_enable_event(struc __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); } +static void intel_pmu_add_event(struct perf_event *event) +{ + if (event->attr.precise_ip) + intel_pmu_pebs_add(event); + if (needs_branch_stack(event)) + intel_pmu_lbr_add(event); +} + /* * Save and restart an expired event. Called by NMI contexts, * so it has to be careful about preempting normal event ops: @@ -3290,6 +3293,8 @@ static __initconst const struct x86_pmu .enable_all = intel_pmu_enable_all, .enable = intel_pmu_enable_event, .disable = intel_pmu_disable_event, + .add = intel_pmu_add_event, + .del = intel_pmu_del_event, .hw_config = intel_pmu_hw_config, .schedule_events = x86_schedule_events, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -831,7 +831,7 @@ static inline void pebs_update_threshold ds->pebs_interrupt_threshold = threshold; } -static void intel_pmu_pebs_add(struct perf_event *event) +void intel_pmu_pebs_add(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; @@ -853,8 +853,6 @@ void intel_pmu_pebs_enable(struct perf_e struct hw_perf_event *hwc = &event->hw; struct debug_store *ds = cpuc->ds; - intel_pmu_pebs_add(event); - hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; cpuc->pebs_enabled |= 1ULL << hwc->idx; @@ -874,7 +872,7 @@ void intel_pmu_pebs_enable(struct perf_e } } -static void intel_pmu_pebs_del(struct perf_event *event) +void intel_pmu_pebs_del(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; @@ -910,8 +908,6 @@ void intel_pmu_pebs_disable(struct perf_ wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); hwc->config |= ARCH_PERFMON_EVENTSEL_INT; - - intel_pmu_pebs_del(event); } void intel_pmu_pebs_enable_all(void) --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -422,7 +422,7 @@ static inline bool branch_user_callstack return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK); } -void intel_pmu_lbr_enable(struct perf_event *event) +void intel_pmu_lbr_add(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct x86_perf_task_context *task_ctx; @@ -450,7 +450,7 @@ void intel_pmu_lbr_enable(struct perf_ev perf_sched_cb_inc(event->ctx->pmu); } -void intel_pmu_lbr_disable(struct perf_event *event) +void intel_pmu_lbr_del(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct x86_perf_task_context *task_ctx; --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -510,6 +510,8 @@ struct x86_pmu { void (*enable_all)(int added); void (*enable)(struct perf_event *); void (*disable)(struct perf_event *); + void (*add)(struct perf_event *); + void (*del)(struct perf_event *); int (*hw_config)(struct perf_event *event); int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); unsigned eventsel; @@ -890,6 +892,10 @@ extern struct event_constraint intel_skl struct event_constraint *intel_pebs_constraints(struct perf_event *event); +void intel_pmu_pebs_add(struct perf_event *event); + +void intel_pmu_pebs_del(struct perf_event *event); + void intel_pmu_pebs_enable(struct perf_event *event); void intel_pmu_pebs_disable(struct perf_event *event); @@ -908,9 +914,9 @@ u64 lbr_from_signext_quirk_wr(u64 val); void intel_pmu_lbr_reset(void); -void intel_pmu_lbr_enable(struct perf_event *event); +void intel_pmu_lbr_add(struct perf_event *event); -void intel_pmu_lbr_disable(struct perf_event *event); +void intel_pmu_lbr_del(struct perf_event *event); void intel_pmu_lbr_enable_all(bool pmi);