Currently PEBS/BTS buffers are allocated when we instantiate the first event, when this fails everything fails. This is a problem because esp. BTS tries to allocate a rather large buffer (64K), which can easily fail. This patches changes the logic such that when either buffer allocation fails, we simply don't allow events that would use these facilities, but continue functioning for all other events. This logic comes from a much larger patch proposed by Stephane. Suggested-by: Stephane Eranian Signed-off-by: Peter Zijlstra LKML-Reference: --- arch/x86/kernel/cpu/perf_event.c | 5 +- arch/x86/kernel/cpu/perf_event_intel_ds.c | 58 ++++++++++++++++++++++-------- 2 files changed, 47 insertions(+), 16 deletions(-) Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c +++ linux-2.6/arch/x86/kernel/cpu/perf_event.c @@ -238,6 +238,7 @@ struct x86_pmu { * Intel DebugStore bits */ int bts, pebs; + int bts_active, pebs_active; int pebs_record_size; void (*drain_pebs)(struct pt_regs *regs); struct event_constraint *pebs_constraints; @@ -478,7 +479,7 @@ static int x86_setup_perfctr(struct perf if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && (hwc->sample_period == 1)) { /* BTS is not supported by this architecture. */ - if (!x86_pmu.bts) + if (!x86_pmu.bts_active) return -EOPNOTSUPP; /* BTS is currently only allowed for user-mode. */ @@ -497,7 +498,7 @@ static int x86_pmu_hw_config(struct perf int precise = 0; /* Support for constant skid */ - if (x86_pmu.pebs) { + if (x86_pmu.pebs_active) { precise++; /* Support for IP fixup */ Index: linux-2.6/arch/x86/kernel/cpu/perf_event_intel_ds.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ linux-2.6/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -193,36 +193,66 @@ static void release_ds_buffers(void) static int reserve_ds_buffers(void) { - int cpu, err = 0; + int bts_err = 0, pebs_err = 0; + int cpu; + + x86_pmu.bts_active = 0; + x86_pmu.pebs_active = 0; if (!x86_pmu.bts && !x86_pmu.pebs) return 0; + if (!x86_pmu.bts) + bts_err = 1; + + if (!x86_pmu.pebs) + pebs_err = 1; + get_online_cpus(); for_each_possible_cpu(cpu) { - if (alloc_ds_buffer(cpu)) - break; + if (alloc_ds_buffer(cpu)) { + bts_err = 1; + pebs_err = 1; + } - if (alloc_bts_buffer(cpu)) - break; + if (!bts_err && alloc_bts_buffer(cpu)) + bts_err = 1; + + if (!pebs_err && alloc_pebs_buffer(cpu)) + pebs_err = 1; - if (alloc_pebs_buffer(cpu)) + if (bts_err && pebs_err) break; + } + + if (bts_err) { + for_each_possible_cpu(cpu) + release_bts_buffer(cpu); + } - err = 0; + if (pebs_err) { + for_each_possible_cpu(cpu) + release_pebs_buffer(cpu); } - if (err) - release_ds_buffers(); - else { + if (bts_err && pebs_err) { + for_each_possible_cpu(cpu) + release_ds_buffer(cpu); + } else { + if (x86_pmu.bts && !bts_err) + x86_pmu.bts_active = 1; + + if (x86_pmu.pebs && !pebs_err) + x86_pmu.pebs_active = 1; + for_each_online_cpu(cpu) init_debug_store_on_cpu(cpu); } put_online_cpus(); - return err; + return 0; } /* @@ -287,7 +317,7 @@ static int intel_pmu_drain_bts_buffer(vo if (!event) return 0; - if (!ds) + if (!x86_pmu.bts_active) return 0; at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; @@ -557,7 +587,7 @@ static void intel_pmu_drain_pebs_core(st struct pebs_record_core *at, *top; int n; - if (!ds || !x86_pmu.pebs) + if (!x86_pmu.pebs_active) return; at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; @@ -599,7 +629,7 @@ static void intel_pmu_drain_pebs_nhm(str u64 status = 0; int bit, n; - if (!ds || !x86_pmu.pebs) + if (!x86_pmu.pebs_active) return; at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/