[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20161110174447.11848-6-bigeasy@linutronix.de>
Date: Thu, 10 Nov 2016 18:44:45 +0100
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To: linux-kernel@...r.kernel.org
Cc: tglx@...utronix.de, rt@...utronix.de,
Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
Tony Luck <tony.luck@...el.com>,
Borislav Petkov <bp@...en8.de>, linux-edac@...r.kernel.org,
x86@...nel.org
Subject: [PATCH 5/7] x86/mcheck: reorganize the hotplug callbacks
Initially I wanted to remove mcheck_cpu_init() from identify_cpu() and let it
become an independent early hotplug callback. The main problem here was that
the init on the boot CPU may happen too late
(device_initcall_sync(mcheck_init_device)) and nobody wanted to risk receiving
and MCE event at boot time leading to a shutdown (if the MCE feature is not yet
enabled).
Here is attempt two: the timming stays as-is but the ordering of the functions
is changed:
- mcheck_cpu_init() (which is run from identify_cpu()) will setup the timer
struct but won't fire the timer. This is moved to CPU_ONLINE since its
cleanup part is in CPU_DOWN_PREPARE. So if it is okay to stop the timer early
in the shutdown phase, it should be okay to start it late in the bring up phase.
- CPU_DOWN_PREPARE disables the MCE feature flags for !INTEL CPUs in
mce_disable_cpu(). If a failure occures it would be re-enabled on all vendor
CPUs (including Intel where it was not disabled during shutdown). To keep this
working I am moving it to CPU_ONLINE. smp_call_function_single() is dropped
beause the notifier runs nowdays on the target CPU.
- CPU_ONLINE is invoking mce_device_create() + mce_threshold_create_device()
but its cleanup part is in CPU_DEAD (mce_threshold_remove_device() and
mce_device_remove()). In order to keep this symmetrical I am moving the clean
up from CPU_DEAD to CPU_DOWN_PREPARE.
Cc: Tony Luck <tony.luck@...el.com>
Cc: Borislav Petkov <bp@...en8.de>
Cc: linux-edac@...r.kernel.org
Cc: x86@...nel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
---
arch/x86/kernel/cpu/mcheck/mce.c | 31 +++++++++++++++----------------
1 file changed, 15 insertions(+), 16 deletions(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 052b5e05c3c4..3da6fd94fa2e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1771,6 +1771,9 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) =
*/
void mcheck_cpu_init(struct cpuinfo_x86 *c)
{
+ struct timer_list *t = this_cpu_ptr(&mce_timer);
+ unsigned int cpu = smp_processor_id();
+
if (mca_cfg.disabled)
return;
@@ -1796,7 +1799,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(c);
__mcheck_cpu_init_clear_banks();
- __mcheck_cpu_init_timer();
+ setup_pinned_timer(t, mce_timer_fn, cpu);
}
/*
@@ -2470,28 +2473,25 @@ static void mce_device_remove(unsigned int cpu)
}
/* Make sure there are no machine checks on offlined CPUs. */
-static void mce_disable_cpu(void *h)
+static void mce_disable_cpu(void)
{
- unsigned long action = *(unsigned long *)h;
-
if (!mce_available(raw_cpu_ptr(&cpu_info)))
return;
- if (!(action & CPU_TASKS_FROZEN))
+ if (!cpuhp_tasks_frozen)
cmci_clear();
vendor_disable_error_reporting();
}
-static void mce_reenable_cpu(void *h)
+static void mce_reenable_cpu(void)
{
- unsigned long action = *(unsigned long *)h;
int i;
if (!mce_available(raw_cpu_ptr(&cpu_info)))
return;
- if (!(action & CPU_TASKS_FROZEN))
+ if (!cpuhp_tasks_frozen)
cmci_reenable();
for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
@@ -2510,6 +2510,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
+ case CPU_DOWN_FAILED:
mce_device_create(cpu);
@@ -2517,11 +2518,10 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
mce_device_remove(cpu);
return NOTIFY_BAD;
}
-
+ mce_reenable_cpu();
+ mce_start_timer(cpu, t);
break;
case CPU_DEAD:
- mce_threshold_remove_device(cpu);
- mce_device_remove(cpu);
mce_intel_hcpu_update(cpu);
/* intentionally ignoring frozen here */
@@ -2529,12 +2529,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
cmci_rediscover();
break;
case CPU_DOWN_PREPARE:
- smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
+ mce_disable_cpu();
del_timer_sync(t);
- break;
- case CPU_DOWN_FAILED:
- smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
- mce_start_timer(cpu, t);
+
+ mce_threshold_remove_device(cpu);
+ mce_device_remove(cpu);
break;
}
--
2.10.2
Powered by blists - more mailing lists