[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEEQ3w=c=ooNhDknfaO3_7c72n_fSXQD=WZnuXF5EcDugkgZ5Q@mail.gmail.com>
Date: Wed, 17 Dec 2025 10:55:03 +0800
From: yunhui cui <cuiyunhui@...edance.com>
To: Yicong Yang <yang.yicong@...oheart.com>
Cc: akpm@...ux-foundation.org, alex@...ti.fr, anup@...infault.org,
aou@...s.berkeley.edu, atish.patra@...ux.dev, catalin.marinas@....com,
dianders@...omium.org, johannes@...solutions.net, lihuafei1@...wei.com,
mark.rutland@....com, masahiroy@...nel.org, maz@...nel.org, mingo@...nel.org,
nicolas.schier@...ux.dev, palmer@...belt.com, paul.walmsley@...ive.com,
suzuki.poulose@....com, thorsten.blum@...ux.dev, wangjinchao600@...il.com,
will@...nel.org, yangyicong@...ilicon.com, zhanjie9@...ilicon.com,
linux-arm-kernel@...ts.infradead.org, linux-kernel@...r.kernel.org,
linux-perf-users@...r.kernel.org, linux-riscv@...ts.infradead.org,
geshijian@...oheart.com
Subject: Re: [External] Re: [PATCH v6 1/2] watchdog: move arm64 watchdog_hld
into common code
Hi Yicong,
On Fri, Dec 5, 2025 at 2:53 PM Yicong Yang <yang.yicong@...oheart.com> wrote:
>
> On 2025/11/14 11:32, Yunhui Cui wrote:
> > Move the contents of arch/arm64/watchdog_hld.c to kernel/watchdog_perf.c
> > to create a generic implementation that can be reused by other arch,
> > such as RISC-V.
> >
> > Signed-off-by: Yunhui Cui <cuiyunhui@...edance.com>
> > Reviewed-by: Douglas Anderson <dianders@...omium.org>
> > ---
> > arch/arm64/Kconfig | 1 +
> > arch/arm64/kernel/Makefile | 1 -
> > arch/arm64/kernel/watchdog_hld.c | 94 --------------------------------
> > drivers/perf/arm_pmu.c | 10 +++-
> > include/linux/perf/arm_pmu.h | 2 -
> > kernel/watchdog_perf.c | 83 ++++++++++++++++++++++++++++
> > lib/Kconfig.debug | 8 +++
> > 7 files changed, 101 insertions(+), 98 deletions(-)
> > delete mode 100644 arch/arm64/kernel/watchdog_hld.c
> >
> > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > index 6663ffd23f252..6614ff45b1e06 100644
> > --- a/arch/arm64/Kconfig
> > +++ b/arch/arm64/Kconfig
> > @@ -231,6 +231,7 @@ config ARM64
> > select HAVE_GCC_PLUGINS
> > select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && \
> > HW_PERF_EVENTS && HAVE_PERF_EVENTS_NMI
> > + select WATCHDOG_PERF_ADJUST_PERIOD if HARDLOCKUP_DETECTOR_PERF
> > select HAVE_HW_BREAKPOINT if PERF_EVENTS
> > select HAVE_IOREMAP_PROT
> > select HAVE_IRQ_TIME_ACCOUNTING
> > diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
> > index 76f32e424065e..12d77f373fea4 100644
> > --- a/arch/arm64/kernel/Makefile
> > +++ b/arch/arm64/kernel/Makefile
> > @@ -44,7 +44,6 @@ obj-$(CONFIG_KUSER_HELPERS) += kuser32.o
> > obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o
> > obj-$(CONFIG_MODULES) += module.o module-plts.o
> > obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o
> > -obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
> > obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
> > obj-$(CONFIG_CPU_PM) += sleep.o suspend.o
> > obj-$(CONFIG_KGDB) += kgdb.o
> > diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c
> > deleted file mode 100644
> > index 3093037dcb7be..0000000000000
> > --- a/arch/arm64/kernel/watchdog_hld.c
> > +++ /dev/null
> > @@ -1,94 +0,0 @@
> > -// SPDX-License-Identifier: GPL-2.0
> > -#include <linux/nmi.h>
> > -#include <linux/cpufreq.h>
> > -#include <linux/perf/arm_pmu.h>
> > -
> > -/*
> > - * Safe maximum CPU frequency in case a particular platform doesn't implement
> > - * cpufreq driver. Although, architecture doesn't put any restrictions on
> > - * maximum frequency but 5 GHz seems to be safe maximum given the available
> > - * Arm CPUs in the market which are clocked much less than 5 GHz. On the other
> > - * hand, we can't make it much higher as it would lead to a large hard-lockup
> > - * detection timeout on parts which are running slower (eg. 1GHz on
> > - * Developerbox) and doesn't possess a cpufreq driver.
> > - */
> > -#define SAFE_MAX_CPU_FREQ 5000000000UL // 5 GHz
> > -u64 hw_nmi_get_sample_period(int watchdog_thresh)
> > -{
> > - unsigned int cpu = smp_processor_id();
> > - unsigned long max_cpu_freq;
> > -
> > - max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
> > - if (!max_cpu_freq)
> > - max_cpu_freq = SAFE_MAX_CPU_FREQ;
> > -
> > - return (u64)max_cpu_freq * watchdog_thresh;
> > -}
> > -
> > -bool __init arch_perf_nmi_is_available(void)
> > -{
> > - /*
> > - * hardlockup_detector_perf_init() will success even if Pseudo-NMI turns off,
> > - * however, the pmu interrupts will act like a normal interrupt instead of
> > - * NMI and the hardlockup detector would be broken.
> > - */
> > - return arm_pmu_irq_is_nmi();
> > -}
> > -
> > -static int watchdog_perf_update_period(void *data)
> > -{
> > - int cpu = smp_processor_id();
> > - u64 max_cpu_freq, new_period;
> > -
> > - max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
> > - if (!max_cpu_freq)
> > - return 0;
> > -
> > - new_period = watchdog_thresh * max_cpu_freq;
> > - hardlockup_detector_perf_adjust_period(new_period);
> > -
> > - return 0;
> > -}
> > -
> > -static int watchdog_freq_notifier_callback(struct notifier_block *nb,
> > - unsigned long val, void *data)
> > -{
> > - struct cpufreq_policy *policy = data;
> > - int cpu;
> > -
> > - if (val != CPUFREQ_CREATE_POLICY)
> > - return NOTIFY_DONE;
> > -
> > - /*
> > - * Let each online CPU related to the policy update the period by their
> > - * own. This will serialize with the framework on start/stop the lockup
> > - * detector (softlockup_{start,stop}_all) and avoid potential race
> > - * condition. Otherwise we may have below theoretical race condition:
> > - * (core 0/1 share the same policy)
> > - * [core 0] [core 1]
> > - * hardlockup_detector_event_create()
> > - * hw_nmi_get_sample_period()
> > - * (cpufreq registered, notifier callback invoked)
> > - * watchdog_freq_notifier_callback()
> > - * watchdog_perf_update_period()
> > - * (since core 1's event's not yet created,
> > - * the period is not set)
> > - * perf_event_create_kernel_counter()
> > - * (event's period is SAFE_MAX_CPU_FREQ)
> > - */
> > - for_each_cpu(cpu, policy->cpus)
> > - smp_call_on_cpu(cpu, watchdog_perf_update_period, NULL, false);
> > -
> > - return NOTIFY_DONE;
> > -}
> > -
> > -static struct notifier_block watchdog_freq_notifier = {
> > - .notifier_call = watchdog_freq_notifier_callback,
> > -};
> > -
> > -static int __init init_watchdog_freq_notifier(void)
> > -{
> > - return cpufreq_register_notifier(&watchdog_freq_notifier,
> > - CPUFREQ_POLICY_NOTIFIER);
> > -}
> > -core_initcall(init_watchdog_freq_notifier);
> > diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
> > index 973a027d90639..294a5c3438ad1 100644
> > --- a/drivers/perf/arm_pmu.c
> > +++ b/drivers/perf/arm_pmu.c
> > @@ -17,6 +17,7 @@
> > #include <linux/cpu_pm.h>
> > #include <linux/export.h>
> > #include <linux/kernel.h>
> > +#include <linux/nmi.h>
> > #include <linux/perf/arm_pmu.h>
> > #include <linux/slab.h>
> > #include <linux/sched/clock.h>
> > @@ -703,10 +704,17 @@ static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu)
> > return per_cpu(hw_events->irq, cpu);
> > }
> >
> > -bool arm_pmu_irq_is_nmi(void)
> > +#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
> > +bool arch_perf_nmi_is_available(void)
> > {
> > + /*
> > + * watchdog_hardlockup_probe() will success even if Pseudo-NMI turns off,
> > + * however, the pmu interrupts will act like a normal interrupt instead of
> > + * NMI and the hardlockup detector would be broken.
> > + */
>
> the comment makes me a bit confused. we'll only declare the NMI support if we successfully
> registered the PMU interrupt as NMI interrupt. If pseudo-nmi is turned off, we'll
> failed to register the NMI interrupt since neither the gicv3 irqchip will declare NMI
> suppport [1] nor the irq_chip::irq_nmi_setup() will success [2], then the has_nmi will
> be false. In this case the hardlockup detector is not probed at all [3].
>
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/irqchip/irq-gic-v3.c#n1956
> [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/irqchip/irq-gic-v3.c#n602
> [3] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/watchdog_perf.c#n268
This patch does not modify this part of the logic itself.
>
> > return has_nmi;
> > }
> > +#endif
> >
> > /*
> > * PMU hardware loses all context when a CPU goes offline.
> > diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
> > index 52b37f7bdbf9e..8f06751f1e176 100644
> > --- a/include/linux/perf/arm_pmu.h
> > +++ b/include/linux/perf/arm_pmu.h
> > @@ -183,8 +183,6 @@ void kvm_host_pmu_init(struct arm_pmu *pmu);
> > #define kvm_host_pmu_init(x) do { } while(0)
> > #endif
> >
> > -bool arm_pmu_irq_is_nmi(void);
> > -
> > /* Internal functions only for core arm_pmu code */
> > struct arm_pmu *armpmu_alloc(void);
> > void armpmu_free(struct arm_pmu *pmu);
> > diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c
> > index d3ca70e3c256a..8aa5e9a0ba20a 100644
> > --- a/kernel/watchdog_perf.c
> > +++ b/kernel/watchdog_perf.c
> > @@ -15,6 +15,7 @@
> > #include <linux/panic.h>
> > #include <linux/nmi.h>
> > #include <linux/atomic.h>
> > +#include <linux/cpufreq.h>
> > #include <linux/module.h>
> > #include <linux/sched/debug.h>
> >
> > @@ -306,3 +307,85 @@ void __init hardlockup_config_perf_event(const char *str)
> > wd_hw_attr.type = PERF_TYPE_RAW;
> > wd_hw_attr.config = config;
> > }
> > +
> > +#ifdef CONFIG_WATCHDOG_PERF_ADJUST_PERIOD
> > +/*
> > + * Safe maximum CPU frequency in case a particular platform doesn't implement
> > + * cpufreq driver. Although, architecture doesn't put any restrictions on
> > + * maximum frequency but 5 GHz seems to be safe maximum given the available
> > + * CPUs in the market which are clocked much less than 5 GHz. On the other
> > + * hand, we can't make it much higher as it would lead to a large hard-lockup
> > + * detection timeout on parts which are running slower (eg. 1GHz on
> > + * Developerbox) and doesn't possess a cpufreq driver.
> > + */
> > +#define SAFE_MAX_CPU_FREQ 5000000000UL // 5 GHz
> > +u64 hw_nmi_get_sample_period(int watchdog_thresh)
> > +{
> > + unsigned int cpu = smp_processor_id();
> > + unsigned long max_cpu_freq;
> > +
> > + max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
> > + if (!max_cpu_freq)
> > + max_cpu_freq = SAFE_MAX_CPU_FREQ;
> > +
> > + return (u64)max_cpu_freq * watchdog_thresh;
> > +}
> > +
> > +static int watchdog_perf_update_period(void *data)
> > +{
> > + int cpu = smp_processor_id();
> > + u64 max_cpu_freq, new_period;
> > +
> > + max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
> > + if (!max_cpu_freq)
> > + return 0;
> > +
> > + new_period = watchdog_thresh * max_cpu_freq;
> > + hardlockup_detector_perf_adjust_period(new_period);
> > +
> > + return 0;
> > +}
> > +
> > +static int watchdog_freq_notifier_callback(struct notifier_block *nb,
> > + unsigned long val, void *data)
> > +{
> > + struct cpufreq_policy *policy = data;
> > + int cpu;
> > +
> > + if (val != CPUFREQ_CREATE_POLICY)
> > + return NOTIFY_DONE;
> > +
> > + /*
> > + * Let each online CPU related to the policy update the period by their
> > + * own. This will serialize with the framework on start/stop the lockup
> > + * detector (softlockup_{start,stop}_all) and avoid potential race
> > + * condition. Otherwise we may have below theoretical race condition:
> > + * (core 0/1 share the same policy)
> > + * [core 0] [core 1]
> > + * hardlockup_detector_event_create()
> > + * hw_nmi_get_sample_period()
> > + * (cpufreq registered, notifier callback invoked)
> > + * watchdog_freq_notifier_callback()
> > + * watchdog_perf_update_period()
> > + * (since core 1's event's not yet created,
> > + * the period is not set)
> > + * perf_event_create_kernel_counter()
> > + * (event's period is SAFE_MAX_CPU_FREQ)
> > + */
> > + for_each_cpu(cpu, policy->cpus)
> > + smp_call_on_cpu(cpu, watchdog_perf_update_period, NULL, false);
> > +
> > + return NOTIFY_DONE;
> > +}
> > +
> > +static struct notifier_block watchdog_freq_notifier = {
> > + .notifier_call = watchdog_freq_notifier_callback,
> > +};
> > +
> > +static int __init init_watchdog_freq_notifier(void)
> > +{
> > + return cpufreq_register_notifier(&watchdog_freq_notifier,
> > + CPUFREQ_POLICY_NOTIFIER);
> > +}
> > +core_initcall(init_watchdog_freq_notifier);
> > +#endif
> > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> > index 2cbfa3dead0be..85fdfa9b52346 100644
> > --- a/lib/Kconfig.debug
> > +++ b/lib/Kconfig.debug
> > @@ -1193,6 +1193,14 @@ config HARDLOCKUP_DETECTOR_PERF
> > depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH
> > select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER
> >
> > +config WATCHDOG_PERF_ADJUST_PERIOD
> > + bool
>
> it's reasonable to make it depends on HARDLOCKUP_DETECTOR_PERF.
In arch/(arm64|riscv)/Kconfig, there is the configuration line:
select WATCHDOG_PERF_ADJUST_PERIOD if HARDLOCKUP_DETECTOR_PERF
>
> otherwise looks good to me :)
>
> Thanks.
>
> > + help
> > + Adjust the watchdog hardlockup detector's period based on CPU max
> > + frequency. Uses cpufreq if available; falls back to a safe 5 GHz
> > + default otherwise. Registers a cpufreq notifier to update the period
> > + automatically.
> > +
> > config HARDLOCKUP_DETECTOR_BUDDY
> > bool
> > depends on HARDLOCKUP_DETECTOR
Thanks,
Yunhui
Powered by blists - more mailing lists