[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20120524061145.GA18284@liondog.tnic>
Date: Thu, 24 May 2012 08:11:45 +0200
From: Borislav Petkov <bp@...en8.de>
To: ShuoX Liu <shuox.liu@...el.com>
Cc: "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
andi@...stfloor.org, Andrew Morton <akpm@...ux-foundation.org>,
Yanmin Zhang <yanmin_zhang@...ux.intel.com>,
Tony Luck <tony.luck@...el.com>, Ingo Molnar <mingo@...e.hu>
Subject: Re: [PATCH v2] printk: ignore recursion_bug flag in HW error handle
process
On Thu, May 24, 2012 at 01:59:38PM +0800, ShuoX Liu wrote:
> From: ShuoX Liu <shuox.liu@...el.com>
>
> When MCE happens in printk, we ignore recursion_bug to make sure MCE logs printed out.
>
> According to Boris' suggestion, we add some helper functions.
> 1) hw_error_enter: Call it when specific arch begins to process a hardware error.
> 2) hw_error_exit: Call it when specific arch finishes the processing of a hardware error.
> 3) in_hw_error():indicates whether HW error handling is in processing.
>
> Each arch could call the helpers in their arch-dependent HW error handlers.
Yep, looks better, thanks. Design question though:
>
> Signed-off-by: Yanmin Zhang <yanmin_zhang@...ux.intel.com>
> Signed-off-by: ShuoX Liu <shuox.liu@...el.com>
> ---
> arch/x86/include/asm/mce.h | 2 --
> arch/x86/kernel/cpu/mcheck/mce.c | 6 ++----
> include/linux/cpu.h | 17 +++++++++++++++++
> kernel/cpu.c | 3 +++
> kernel/printk.c | 3 ++-
> 5 files changed, 24 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> index 441520e..aeda4cc 100644
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -187,8 +187,6 @@ int mce_available(struct cpuinfo_x86 *c);
> DECLARE_PER_CPU(unsigned, mce_exception_count);
> DECLARE_PER_CPU(unsigned, mce_poll_count);
>
> -extern atomic_t mce_entry;
> -
> typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
> DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
>
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> index 2afcbd2..aaf41d2 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> @@ -61,8 +61,6 @@ int mce_disabled __read_mostly;
>
> #define SPINUNIT 100 /* 100ns */
>
> -atomic_t mce_entry;
> -
> DEFINE_PER_CPU(unsigned, mce_exception_count);
>
> /*
> @@ -1015,7 +1013,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
> DECLARE_BITMAP(toclear, MAX_NR_BANKS);
> char *msg = "Unknown";
>
> - atomic_inc(&mce_entry);
> + hw_error_enter();
>
> this_cpu_inc(mce_exception_count);
>
> @@ -1143,7 +1141,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
> mce_report_event(regs);
> mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
> out:
> - atomic_dec(&mce_entry);
> + hw_error_exit();
> sync_core();
> }
> EXPORT_SYMBOL_GPL(do_machine_check);
> diff --git a/include/linux/cpu.h b/include/linux/cpu.h
> index 7230bb5..beb56d0 100644
> --- a/include/linux/cpu.h
> +++ b/include/linux/cpu.h
> @@ -210,4 +210,21 @@ static inline int disable_nonboot_cpus(void) { return 0; }
> static inline void enable_nonboot_cpus(void) {}
> #endif /* !CONFIG_PM_SLEEP_SMP */
>
> +/* HW error handle status helpers */
> +extern atomic_t hw_error;
> +static inline void hw_error_enter(void)
> +{
> + atomic_inc(&hw_error);
> +}
> +
> +static inline void hw_error_exit(void)
> +{
> + atomic_dec(&hw_error);
> +}
> +
> +static inline int in_hw_error(void)
> +{
> + return atomic_read(&hw_error);
> +}
Shouldn't those be generic empty functions and each arch implement their
own with the stuff they want to do on the respective architecture when
they get a hardware error?
Andrew, Ingo?
Thanks.
--
Regards/Gruss,
Boris.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists