[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <12e7c12f-3862-ab52-7d5d-ba55764c6a79@oracle.com>
Date: Mon, 17 Oct 2016 21:46:52 -0500
From: Babu Moger <babu.moger@...cle.com>
To: Don Zickus <dzickus@...hat.com>
Cc: mingo@...nel.org, akpm@...ux-foundation.org, ak@...ux.intel.com,
jkosina@...e.cz, baiyaowei@...s.chinamobile.com,
atomlin@...hat.com, uobergfe@...hat.com, tj@...nel.org,
hidehiro.kawai.ez@...achi.com, johunt@...mai.com,
davem@...emloft.net, sparclinux@...r.kernel.org,
linux-kernel@...r.kernel.org, sam@...nborg.org
Subject: Re: [PATCH v2 1/2] watchdog: Introduce arch_watchdog_nmi_enable and
arch_watchdog_nmi_disable
Don,
On 10/17/2016 12:31 PM, Don Zickus wrote:
> On Thu, Oct 13, 2016 at 01:38:01PM -0700, Babu Moger wrote:
>> Currently we do not have a way to enable/disable arch specific
>> watchdog handlers if it was implemented by any of the architectures.
>>
>> This patch introduces new functions arch_watchdog_nmi_enable and
>> arch_watchdog_nmi_disable which can be used to enable/disable architecture
>> specific NMI watchdog handlers. These functions are defined as weak as
>> architectures can override their definitions to enable/disable nmi
>> watchdog behaviour.
> Hi Babu,
>
> This patch tested fine on my x86 box and I am ok with the changes.
>
> I do have one small cosmetic request below for a failure path. Other than
> that I will give my ack.
Yes. I am testing these changes. If everything goes as expected, I will
post v3 version
tomorrow. Thanks Babu
>
> Cheers,
> Don
>
>> Signed-off-by: Babu Moger <babu.moger@...cle.com>
>> ---
>> kernel/watchdog.c | 65 +++++++++++++++++++++++++++++++++++-----------------
>> 1 files changed, 44 insertions(+), 21 deletions(-)
>>
>> diff --git a/kernel/watchdog.c b/kernel/watchdog.c
>> index 9acb29f..d1e84e6 100644
>> --- a/kernel/watchdog.c
>> +++ b/kernel/watchdog.c
>> @@ -46,7 +46,7 @@
>>
>> static DEFINE_MUTEX(watchdog_proc_mutex);
>>
>> -#ifdef CONFIG_HARDLOCKUP_DETECTOR
>> +#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
>> static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
>> #else
>> static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
>> @@ -585,15 +585,11 @@ static void watchdog(unsigned int cpu)
>> */
>> static unsigned long cpu0_err;
>>
>> -static int watchdog_nmi_enable(unsigned int cpu)
>> +static int arch_watchdog_nmi_enable(unsigned int cpu)
>> {
>> struct perf_event_attr *wd_attr;
>> struct perf_event *event = per_cpu(watchdog_ev, cpu);
>>
>> - /* nothing to do if the hard lockup detector is disabled */
>> - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
>> - goto out;
>> -
>> /* is it already setup and enabled? */
>> if (event && event->state > PERF_EVENT_STATE_OFF)
>> goto out;
>> @@ -619,18 +615,6 @@ static int watchdog_nmi_enable(unsigned int cpu)
>> goto out_save;
>> }
>>
>> - /*
>> - * Disable the hard lockup detector if _any_ CPU fails to set up
>> - * set up the hardware perf event. The watchdog() function checks
>> - * the NMI_WATCHDOG_ENABLED bit periodically.
>> - *
>> - * The barriers are for syncing up watchdog_enabled across all the
>> - * cpus, as clear_bit() does not use barriers.
>> - */
>> - smp_mb__before_atomic();
>> - clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
>> - smp_mb__after_atomic();
>> -
>> /* skip displaying the same error again */
>> if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
>> return PTR_ERR(event);
> In the arch_watchdog_nmi_enable code is a pr_info on failure
>
> pr_info("Shutting down hard lockup detector on all cpus\n");
>
> that should be moved to below..
>
>
>> @@ -658,7 +642,7 @@ out:
>> return 0;
>> }
>>
>> -static void watchdog_nmi_disable(unsigned int cpu)
>> +static void arch_watchdog_nmi_disable(unsigned int cpu)
>> {
>> struct perf_event *event = per_cpu(watchdog_ev, cpu);
>>
>> @@ -676,8 +660,13 @@ static void watchdog_nmi_disable(unsigned int cpu)
>> }
>>
>> #else
>> -static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
>> -static void watchdog_nmi_disable(unsigned int cpu) { return; }
>> +/*
>> + * These two functions are mostly architecture specific
>> + * defining them as weak here.
>> + */
>> +int __weak arch_watchdog_nmi_enable(unsigned int cpu) { return 0; }
>> +void __weak arch_watchdog_nmi_disable(unsigned int cpu) { return; }
>> +
>> #endif /* CONFIG_HARDLOCKUP_DETECTOR */
>>
>> static struct smp_hotplug_thread watchdog_threads = {
>> @@ -781,6 +770,40 @@ void lockup_detector_resume(void)
>> put_online_cpus();
>> }
>>
>> +void watchdog_nmi_disable(unsigned int cpu)
>> +{
>> + arch_watchdog_nmi_disable(cpu);
>> +}
>> +
>> +int watchdog_nmi_enable(unsigned int cpu)
>> +{
>> + int err;
>> +
>> + /* nothing to do if the hard lockup detector is disabled */
>> + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
>> + return 0;
>> +
>> + err = arch_watchdog_nmi_enable(cpu);
>> +
>> + if (err) {
>> + /*
>> + * Disable the hard lockup detector if _any_ CPU fails to set up
>> + * set up the hardware perf event. The watchdog() function checks
>> + * the NMI_WATCHDOG_ENABLED bit periodically.
>> + *
>> + * The barriers are for syncing up watchdog_enabled across all the
>> + * cpus, as clear_bit() does not use barriers.
>> + */
>> + smp_mb__before_atomic();
>> + clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
>> + smp_mb__after_atomic();
> moved to here:
>
> pr_info("Shutting down hard lockup detector on all cpus\n");
>
>
> This lets the failure message be displayed on all arches instead of just
> x86. Though I guess sparc does not call theirs 'hard lockup detector'.
> Hmm...
>
>
>> +
>> + return err;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> static int update_watchdog_all_cpus(void)
>> {
>> int ret;
>> --
>> 1.7.1
>>
Powered by blists - more mailing lists