linux-kernel - Re: [PATCH] watchdog/hardlockup: reassign last

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <201910140411.gT5xQJve%lkp@intel.com>
Date:   Mon, 14 Oct 2019 04:34:55 +0800
From:   kbuild test robot <lkp@...el.com>
To:     Li RongQing <lirongqing@...du.com>
Cc:     kbuild-all@...ts.01.org, akpm@...ux-foundation.org,
        sergey.senozhatsky.work@...il.com, linux-kernel@...r.kernel.org,
        tglx@...utronix.de
Subject: Re: [PATCH] watchdog/hardlockup: reassign last_timestamp when enable
 nmi event

Hi Li,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[cannot apply to v5.4-rc2 next-20191011]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Li-RongQing/watchdog-hardlockup-reassign-last_timestamp-when-enable-nmi-event/20191014-022936
config: i386-randconfig-g004-201941 (attached as .config)
compiler: gcc-7 (Debian 7.4.0-13) 7.4.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@...el.com>

All error/warnings (new ones prefixed by >>):

   In file included from include/asm-generic/percpu.h:7:0,
                    from arch/x86/include/asm/percpu.h:556,
                    from arch/x86/include/asm/current.h:6,
                    from include/linux/sched.h:12,
                    from include/linux/nmi.h:8,
                    from kernel/watchdog_hld.c:15:
   kernel/watchdog_hld.c: In function 'hardlockup_detector_perf_enable':
>> kernel/watchdog_hld.c:201:17: error: 'last_timestamp' undeclared (first use in this function); did you mean 'statx_timestamp'?
     this_cpu_write(last_timestamp, now);
                    ^
   include/linux/percpu-defs.h:220:47: note: in definition of macro '__verify_pcpu_ptr'
     const void __percpu *__vpp_verify = (typeof((ptr) + 0))NULL; \
                                                  ^~~
>> include/linux/percpu-defs.h:509:34: note: in expansion of macro '__pcpu_size_call'
    #define this_cpu_write(pcp, val) __pcpu_size_call(this_cpu_write_, pcp, val)
                                     ^~~~~~~~~~~~~~~~
>> kernel/watchdog_hld.c:201:2: note: in expansion of macro 'this_cpu_write'
     this_cpu_write(last_timestamp, now);
     ^~~~~~~~~~~~~~
   kernel/watchdog_hld.c:201:17: note: each undeclared identifier is reported only once for each function it appears in
     this_cpu_write(last_timestamp, now);
                    ^
   include/linux/percpu-defs.h:220:47: note: in definition of macro '__verify_pcpu_ptr'
     const void __percpu *__vpp_verify = (typeof((ptr) + 0))NULL; \
                                                  ^~~
>> include/linux/percpu-defs.h:509:34: note: in expansion of macro '__pcpu_size_call'
    #define this_cpu_write(pcp, val) __pcpu_size_call(this_cpu_write_, pcp, val)
                                     ^~~~~~~~~~~~~~~~
>> kernel/watchdog_hld.c:201:2: note: in expansion of macro 'this_cpu_write'
     this_cpu_write(last_timestamp, now);
     ^~~~~~~~~~~~~~
   kernel/watchdog_hld.c: In function 'hardlockup_detector_perf_restart':
   kernel/watchdog_hld.c:283:12: error: 'last_timestamp' undeclared (first use in this function); did you mean 'statx_timestamp'?
       per_cpu(last_timestamp, cpu) = now;
               ^
   include/linux/percpu-defs.h:220:47: note: in definition of macro '__verify_pcpu_ptr'
     const void __percpu *__vpp_verify = (typeof((ptr) + 0))NULL; \
                                                  ^~~
   include/linux/percpu-defs.h:270:29: note: in expansion of macro 'per_cpu_ptr'
    #define per_cpu(var, cpu) (*per_cpu_ptr(&(var), cpu))
                                ^~~~~~~~~~~
>> kernel/watchdog_hld.c:283:4: note: in expansion of macro 'per_cpu'
       per_cpu(last_timestamp, cpu) = now;
       ^~~~~~~

vim +201 kernel/watchdog_hld.c

    14	
  > 15	#include <linux/nmi.h>
    16	#include <linux/atomic.h>
    17	#include <linux/module.h>
    18	#include <linux/sched/debug.h>
    19	
    20	#include <asm/irq_regs.h>
    21	#include <linux/perf_event.h>
    22	
    23	static DEFINE_PER_CPU(bool, hard_watchdog_warn);
    24	static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
    25	static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
    26	static DEFINE_PER_CPU(struct perf_event *, dead_event);
    27	static struct cpumask dead_events_mask;
    28	
    29	static unsigned long hardlockup_allcpu_dumped;
    30	static atomic_t watchdog_cpus = ATOMIC_INIT(0);
    31	
    32	notrace void arch_touch_nmi_watchdog(void)
    33	{
    34		/*
    35		 * Using __raw here because some code paths have
    36		 * preemption enabled.  If preemption is enabled
    37		 * then interrupts should be enabled too, in which
    38		 * case we shouldn't have to worry about the watchdog
    39		 * going off.
    40		 */
    41		raw_cpu_write(watchdog_nmi_touch, true);
    42	}
    43	EXPORT_SYMBOL(arch_touch_nmi_watchdog);
    44	
    45	#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP
    46	static DEFINE_PER_CPU(ktime_t, last_timestamp);
    47	static DEFINE_PER_CPU(unsigned int, nmi_rearmed);
    48	static ktime_t watchdog_hrtimer_sample_threshold __read_mostly;
    49	
    50	void watchdog_update_hrtimer_threshold(u64 period)
    51	{
    52		/*
    53		 * The hrtimer runs with a period of (watchdog_threshold * 2) / 5
    54		 *
    55		 * So it runs effectively with 2.5 times the rate of the NMI
    56		 * watchdog. That means the hrtimer should fire 2-3 times before
    57		 * the NMI watchdog expires. The NMI watchdog on x86 is based on
    58		 * unhalted CPU cycles, so if Turbo-Mode is enabled the CPU cycles
    59		 * might run way faster than expected and the NMI fires in a
    60		 * smaller period than the one deduced from the nominal CPU
    61		 * frequency. Depending on the Turbo-Mode factor this might be fast
    62		 * enough to get the NMI period smaller than the hrtimer watchdog
    63		 * period and trigger false positives.
    64		 *
    65		 * The sample threshold is used to check in the NMI handler whether
    66		 * the minimum time between two NMI samples has elapsed. That
    67		 * prevents false positives.
    68		 *
    69		 * Set this to 4/5 of the actual watchdog threshold period so the
    70		 * hrtimer is guaranteed to fire at least once within the real
    71		 * watchdog threshold.
    72		 */
    73		watchdog_hrtimer_sample_threshold = period * 2;
    74	}
    75	
    76	static bool watchdog_check_timestamp(void)
    77	{
    78		ktime_t delta, now = ktime_get_mono_fast_ns();
    79	
    80		delta = now - __this_cpu_read(last_timestamp);
    81		if (delta < watchdog_hrtimer_sample_threshold) {
    82			/*
    83			 * If ktime is jiffies based, a stalled timer would prevent
    84			 * jiffies from being incremented and the filter would look
    85			 * at a stale timestamp and never trigger.
    86			 */
    87			if (__this_cpu_inc_return(nmi_rearmed) < 10)
    88				return false;
    89		}
    90		__this_cpu_write(nmi_rearmed, 0);
    91		__this_cpu_write(last_timestamp, now);
    92		return true;
    93	}
    94	#else
    95	static inline bool watchdog_check_timestamp(void)
    96	{
    97		return true;
    98	}
    99	#endif
   100	
   101	static struct perf_event_attr wd_hw_attr = {
   102		.type		= PERF_TYPE_HARDWARE,
   103		.config		= PERF_COUNT_HW_CPU_CYCLES,
   104		.size		= sizeof(struct perf_event_attr),
   105		.pinned		= 1,
   106		.disabled	= 1,
   107	};
   108	
   109	/* Callback function for perf event subsystem */
   110	static void watchdog_overflow_callback(struct perf_event *event,
   111					       struct perf_sample_data *data,
   112					       struct pt_regs *regs)
   113	{
   114		/* Ensure the watchdog never gets throttled */
   115		event->hw.interrupts = 0;
   116	
   117		if (__this_cpu_read(watchdog_nmi_touch) == true) {
   118			__this_cpu_write(watchdog_nmi_touch, false);
   119			return;
   120		}
   121	
   122		if (!watchdog_check_timestamp())
   123			return;
   124	
   125		/* check for a hardlockup
   126		 * This is done by making sure our timer interrupt
   127		 * is incrementing.  The timer interrupt should have
   128		 * fired multiple times before we overflow'd.  If it hasn't
   129		 * then this is a good indication the cpu is stuck
   130		 */
   131		if (is_hardlockup()) {
   132			int this_cpu = smp_processor_id();
   133	
   134			/* only print hardlockups once */
   135			if (__this_cpu_read(hard_watchdog_warn) == true)
   136				return;
   137	
   138			pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n",
   139				 this_cpu);
   140			print_modules();
   141			print_irqtrace_events(current);
   142			if (regs)
   143				show_regs(regs);
   144			else
   145				dump_stack();
   146	
   147			/*
   148			 * Perform all-CPU dump only once to avoid multiple hardlockups
   149			 * generating interleaving traces
   150			 */
   151			if (sysctl_hardlockup_all_cpu_backtrace &&
   152					!test_and_set_bit(0, &hardlockup_allcpu_dumped))
   153				trigger_allbutself_cpu_backtrace();
   154	
   155			if (hardlockup_panic)
   156				nmi_panic(regs, "Hard LOCKUP");
   157	
   158			__this_cpu_write(hard_watchdog_warn, true);
   159			return;
   160		}
   161	
   162		__this_cpu_write(hard_watchdog_warn, false);
   163		return;
   164	}
   165	
   166	static int hardlockup_detector_event_create(void)
   167	{
   168		unsigned int cpu = smp_processor_id();
   169		struct perf_event_attr *wd_attr;
   170		struct perf_event *evt;
   171	
   172		wd_attr = &wd_hw_attr;
   173		wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
   174	
   175		/* Try to register using hardware perf events */
   176		evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
   177						       watchdog_overflow_callback, NULL);
   178		if (IS_ERR(evt)) {
   179			pr_debug("Perf event create on CPU %d failed with %ld\n", cpu,
   180				 PTR_ERR(evt));
   181			return PTR_ERR(evt);
   182		}
   183		this_cpu_write(watchdog_ev, evt);
   184		return 0;
   185	}
   186	
   187	/**
   188	 * hardlockup_detector_perf_enable - Enable the local event
   189	 */
   190	void hardlockup_detector_perf_enable(void)
   191	{
   192		ktime_t now = ktime_get_mono_fast_ns();
   193	
   194		if (hardlockup_detector_event_create())
   195			return;
   196	
   197		/* use original value for check */
   198		if (!atomic_fetch_inc(&watchdog_cpus))
   199			pr_info("Enabled. Permanently consumes one hw-PMU counter.\n");
   200	
 > 201		this_cpu_write(last_timestamp, now);
   202		perf_event_enable(this_cpu_read(watchdog_ev));
   203	}
   204	
   205	/**
   206	 * hardlockup_detector_perf_disable - Disable the local event
   207	 */
   208	void hardlockup_detector_perf_disable(void)
   209	{
   210		struct perf_event *event = this_cpu_read(watchdog_ev);
   211	
   212		if (event) {
   213			perf_event_disable(event);
   214			this_cpu_write(watchdog_ev, NULL);
   215			this_cpu_write(dead_event, event);
   216			cpumask_set_cpu(smp_processor_id(), &dead_events_mask);
   217			atomic_dec(&watchdog_cpus);
   218		}
   219	}
   220	
   221	/**
   222	 * hardlockup_detector_perf_cleanup - Cleanup disabled events and destroy them
   223	 *
   224	 * Called from lockup_detector_cleanup(). Serialized by the caller.
   225	 */
   226	void hardlockup_detector_perf_cleanup(void)
   227	{
   228		int cpu;
   229	
   230		for_each_cpu(cpu, &dead_events_mask) {
   231			struct perf_event *event = per_cpu(dead_event, cpu);
   232	
   233			/*
   234			 * Required because for_each_cpu() reports  unconditionally
   235			 * CPU0 as set on UP kernels. Sigh.
   236			 */
   237			if (event)
   238				perf_event_release_kernel(event);
   239			per_cpu(dead_event, cpu) = NULL;
   240		}
   241		cpumask_clear(&dead_events_mask);
   242	}
   243	
   244	/**
   245	 * hardlockup_detector_perf_stop - Globally stop watchdog events
   246	 *
   247	 * Special interface for x86 to handle the perf HT bug.
   248	 */
   249	void __init hardlockup_detector_perf_stop(void)
   250	{
   251		int cpu;
   252	
   253		lockdep_assert_cpus_held();
   254	
   255		for_each_online_cpu(cpu) {
   256			struct perf_event *event = per_cpu(watchdog_ev, cpu);
   257	
   258			if (event)
   259				perf_event_disable(event);
   260		}
   261	}
   262	
   263	/**
   264	 * hardlockup_detector_perf_restart - Globally restart watchdog events
   265	 *
   266	 * Special interface for x86 to handle the perf HT bug.
   267	 */
   268	void __init hardlockup_detector_perf_restart(void)
   269	{
   270		int cpu;
   271	
   272		lockdep_assert_cpus_held();
   273	
   274		if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
   275			return;
   276	
   277		for_each_online_cpu(cpu) {
   278			struct perf_event *event = per_cpu(watchdog_ev, cpu);
   279	
   280			if (event) {
   281				ktime_t now = ktime_get_mono_fast_ns();
   282	
 > 283				per_cpu(last_timestamp, cpu) = now;
   284				perf_event_enable(event);
   285			}
   286		}
   287	}
   288	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

Download attachment ".config.gz" of type "application/gzip" (36034 bytes)