[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1269353485.5109.48.camel@twins>
Date: Tue, 23 Mar 2010 15:11:25 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: Stephane Eranian <eranian@...gle.com>
Cc: linux-kernel@...r.kernel.org, mingo <mingo@...e.hu>,
paulus@...ba.org, davem@...emloft.net, fweisbec@...il.com,
robert.richter@....com, perfmon2-devel@...ts.sf.net,
eranian@...il.com, "hpa@...or.com" <hpa@...or.com>,
Thomas Gleixner <tglx@...utronix.de>,
Manfred Spraul <manfred@...orfullife.com>
Subject: Re: [PATCH] perf_events: fix bug in AMD per-cpu initialization
On Thu, 2010-03-18 at 01:33 +0100, Stephane Eranian wrote:
> On Thu, Mar 18, 2010 at 12:47 AM, Peter Zijlstra <peterz@...radead.org> wrote:
> > On Wed, 2010-03-17 at 10:40 +0200, Stephane Eranian wrote:
> >> On AMD processors, we need to allocate a data structure per Northbridge
> >> to handle certain events.
> >>
> >> On CPU initialization, we need to query the Northbridge id and check
> >> whether the structure is already allocated or not. We use the
> >> amd_get_nb_id() function to request the Northbridge identification.
> >>
> >> The recent cleanup of the CPU online/offline initialization introduced
> >> a bug. AMD cpu initialization is invoked on CPU_UP_PREPARE callback.
> >> This is before the CPU Northbridge id is calculated. Therefore no
> >> processor had a Northbridge structure allocated except the boot
> >> processor. That was causing bogus NB event scheduling.
> >>
> >> This patch uses the CPU_ONLINE callback to initialize the AMD
> >> Northbridge structure. This way amd_get_nb_id() returns valid
> >> information.
> >>
> >> The x86_cpu_up() callback was added. Could not call it cpu_online
> >> because of existing macro.
> >>
> >> Signed-off-by: Stephane Eranian <eranian@...gle.com>
> >
> >
> > No, ONLINE is not exposed for a good reason, its always wrong.
> >
> > Use prepare to allocate data, and starting to initialize stuff on the
> > cpu proper once its up. Online is after the cpu is up and running and we
> > are already scheduling stuff on it so its too late to initialize things.
> >
> >
> I can't because amd_get_nb_id() returns garbage for the CPU
> when you call from CPU_STARTING. So looks like initialization
> of cpu_llc_id needs to be moved way earlier. I don't want to have
> to duplicate that code.
Crap, you're right, either notify_cpu_starting() is done too early or
smp_store_cpu_info() is done too late.
Since smp_store_cpu_info() relies on the result of calibrate_delay() we
can't easily change that order, but since there really isn't any other
CPU_STARTING user in tree (I appear to have created the first?!) we can
easily move that notifier thing later.
(What's up with that IRQ-enable over calibrate_delay(), can't we simply
enable the NMI watchdog later?)
So I guess something like the below should work:
---
arch/x86/kernel/smpboot.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 06d98ae..6808b93 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -242,8 +242,6 @@ static void __cpuinit smp_callin(void)
end_local_APIC_setup();
map_cpu_to_logical_apicid();
- notify_cpu_starting(cpuid);
-
/*
* Need to setup vector mappings before we enable interrupts.
*/
@@ -264,6 +262,8 @@ static void __cpuinit smp_callin(void)
*/
smp_store_cpu_info(cpuid);
+ notify_cpu_starting(cpuid);
+
/*
* Allow the master to continue.
*/
Which then allows us to write something like:
---
arch/x86/kernel/cpu/perf_event.c | 8 ++-
arch/x86/kernel/cpu/perf_event_amd.c | 69 ++++++++++++++++++---------------
2 files changed, 43 insertions(+), 34 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index f571f51..4db6eaf 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -209,7 +209,7 @@ struct x86_pmu {
struct event_constraint *event_constraints;
void (*quirks)(void);
- void (*cpu_prepare)(int cpu);
+ int (*cpu_prepare)(int cpu);
void (*cpu_starting)(int cpu);
void (*cpu_dying)(int cpu);
void (*cpu_dead)(int cpu);
@@ -1330,11 +1330,12 @@ static int __cpuinit
x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
{
unsigned int cpu = (long)hcpu;
+ int ret = NOTIFY_OK;
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_UP_PREPARE:
if (x86_pmu.cpu_prepare)
- x86_pmu.cpu_prepare(cpu);
+ ret = x86_pmu.cpu_prepare(cpu);
break;
case CPU_STARTING:
@@ -1347,6 +1348,7 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
x86_pmu.cpu_dying(cpu);
break;
+ case CPU_UP_CANCELED:
case CPU_DEAD:
if (x86_pmu.cpu_dead)
x86_pmu.cpu_dead(cpu);
@@ -1356,7 +1358,7 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
break;
}
- return NOTIFY_OK;
+ return ret;
}
static void __init pmu_check_apic(void)
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index bdd8f8e..b78c6c5 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -293,51 +293,55 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
return nb;
}
-static void amd_pmu_cpu_online(int cpu)
+static int amd_pmu_cpu_prepare(int cpu)
{
- struct cpu_hw_events *cpu1, *cpu2;
- struct amd_nb *nb = NULL;
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+ WARN_ON_ONCE(cpuc->amd_nb);
+
+ if (boot_cpu_data.x86_max_cores < 2)
+ return NOTIFY_OK;
+
+ cpuc->amd_nb = amd_alloc_nb(cpu, -1);
+ if (!cpuc->amd_nb)
+ return NOTIFY_BAD;
+
+ return NOTIFY_OK;
+}
+
+static void amd_pmu_cpu_starting(int cpu)
+{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ struct amd_nb *nb;
int i, nb_id;
if (boot_cpu_data.x86_max_cores < 2)
return;
- /*
- * function may be called too early in the
- * boot process, in which case nb_id is bogus
- */
nb_id = amd_get_nb_id(cpu);
- if (nb_id == BAD_APICID)
- return;
-
- cpu1 = &per_cpu(cpu_hw_events, cpu);
- cpu1->amd_nb = NULL;
+ WARN_ON_ONCE(nb_id == BAD_APICID);
raw_spin_lock(&amd_nb_lock);
for_each_online_cpu(i) {
- cpu2 = &per_cpu(cpu_hw_events, i);
- nb = cpu2->amd_nb;
- if (!nb)
+ nb = per_cpu(cpu_hw_events, i).amd_nb;
+ if (WARN_ON_ONCE(!nb))
continue;
- if (nb->nb_id == nb_id)
- goto found;
- }
- nb = amd_alloc_nb(cpu, nb_id);
- if (!nb) {
- pr_err("perf_events: failed NB allocation for CPU%d\n", cpu);
- raw_spin_unlock(&amd_nb_lock);
- return;
+ if (nb->nb_id == nb_id) {
+ kfree(cpuc->amd_nb);
+ cpuc->amd_nb = nb;
+ break;
+ }
}
-found:
- nb->refcnt++;
- cpu1->amd_nb = nb;
+
+ cpuc->amd_nb->nb_id = nb_id;
+ cpuc->amd_nb->refcnt++;
raw_spin_unlock(&amd_nb_lock);
}
-static void amd_pmu_cpu_offline(int cpu)
+static void amd_pmu_cpu_dead(int cpu)
{
struct cpu_hw_events *cpuhw;
@@ -349,8 +353,10 @@ static void amd_pmu_cpu_offline(int cpu)
raw_spin_lock(&amd_nb_lock);
if (cpuhw->amd_nb) {
- if (--cpuhw->amd_nb->refcnt == 0)
- kfree(cpuhw->amd_nb);
+ struct amd_nb *nb = cpuhw->amd_nb;
+
+ if (nb->nb_id == -1 || --nb->refcnt == 0)
+ kfree(nb);
cpuhw->amd_nb = NULL;
}
@@ -381,8 +387,9 @@ static __initconst struct x86_pmu amd_pmu = {
.get_event_constraints = amd_get_event_constraints,
.put_event_constraints = amd_put_event_constraints,
- .cpu_prepare = amd_pmu_cpu_online,
- .cpu_dead = amd_pmu_cpu_offline,
+ .cpu_prepare = amd_pmu_cpu_prepare,
+ .cpu_starting = amd_pmu_cpu_starting,
+ .cpu_dead = amd_pmu_cpu_dead,
};
static __init int amd_pmu_init(void)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists