linux-kernel - Re: [PATCH] rcu/cpu_stall_cputime: fix the hardirq count for x86 architecture

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <202501090842.SfI6QPGS-lkp@intel.com>
Date: Thu, 9 Jan 2025 08:51:14 +0800
From: kernel test robot <lkp@...el.com>
To: Yongliang Gao <leonylgao@...il.com>, paulmck@...nel.org,
	frederic@...nel.org, thunder.leizhen@...wei.com
Cc: oe-kbuild-all@...ts.linux.dev, frankjpliu@...cent.com,
	rcu@...r.kernel.org, linux-kernel@...r.kernel.org,
	Yongliang Gao <leonylgao@...cent.com>
Subject: Re: [PATCH] rcu/cpu_stall_cputime: fix the hardirq count for x86
 architecture

Hi Yongliang,

kernel test robot noticed the following build errors:

[auto build test ERROR on paulmck-rcu/dev]
[also build test ERROR on linus/master v6.13-rc6 next-20250108]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Yongliang-Gao/rcu-cpu_stall_cputime-fix-the-hardirq-count-for-x86-architecture/20250108-145810
base:   https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
patch link:    https://lore.kernel.org/r/20250108065716.2888148-1-leonylgao%40gmail.com
patch subject: [PATCH] rcu/cpu_stall_cputime: fix the hardirq count for x86 architecture
config: arm64-allnoconfig (https://download.01.org/0day-ci/archive/20250109/202501090842.SfI6QPGS-lkp@intel.com/config)
compiler: aarch64-linux-gcc (GCC) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250109/202501090842.SfI6QPGS-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@...el.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202501090842.SfI6QPGS-lkp@intel.com/

All errors (new ones prefixed by >>):

   kernel/rcu/tree.c: In function 'rcu_watching_snap_recheck':
>> kernel/rcu/tree.c:960:71: error: implicit declaration of function 'arch_irq_stat_cpu' [-Wimplicit-function-declaration]
     960 |                         rsrp->nr_hardirqs = kstat_cpu_irqs_sum(cpu) + arch_irq_stat_cpu(cpu);
         |                                                                       ^~~~~~~~~~~~~~~~~


vim +/arch_irq_stat_cpu +960 kernel/rcu/tree.c

   824	
   825	/*
   826	 * Returns positive if the specified CPU has passed through a quiescent state
   827	 * by virtue of being in or having passed through an dynticks idle state since
   828	 * the last call to rcu_watching_snap_save() for this same CPU, or by
   829	 * virtue of having been offline.
   830	 *
   831	 * Returns negative if the specified CPU needs a force resched.
   832	 *
   833	 * Returns zero otherwise.
   834	 */
   835	static int rcu_watching_snap_recheck(struct rcu_data *rdp)
   836	{
   837		unsigned long jtsq;
   838		int ret = 0;
   839		struct rcu_node *rnp = rdp->mynode;
   840	
   841		/*
   842		 * If the CPU passed through or entered a dynticks idle phase with
   843		 * no active irq/NMI handlers, then we can safely pretend that the CPU
   844		 * already acknowledged the request to pass through a quiescent
   845		 * state.  Either way, that CPU cannot possibly be in an RCU
   846		 * read-side critical section that started before the beginning
   847		 * of the current RCU grace period.
   848		 */
   849		if (rcu_watching_snap_stopped_since(rdp, rdp->watching_snap)) {
   850			trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti"));
   851			rcu_gpnum_ovf(rnp, rdp);
   852			return 1;
   853		}
   854	
   855		/*
   856		 * Complain if a CPU that is considered to be offline from RCU's
   857		 * perspective has not yet reported a quiescent state.  After all,
   858		 * the offline CPU should have reported a quiescent state during
   859		 * the CPU-offline process, or, failing that, by rcu_gp_init()
   860		 * if it ran concurrently with either the CPU going offline or the
   861		 * last task on a leaf rcu_node structure exiting its RCU read-side
   862		 * critical section while all CPUs corresponding to that structure
   863		 * are offline.  This added warning detects bugs in any of these
   864		 * code paths.
   865		 *
   866		 * The rcu_node structure's ->lock is held here, which excludes
   867		 * the relevant portions the CPU-hotplug code, the grace-period
   868		 * initialization code, and the rcu_read_unlock() code paths.
   869		 *
   870		 * For more detail, please refer to the "Hotplug CPU" section
   871		 * of RCU's Requirements documentation.
   872		 */
   873		if (WARN_ON_ONCE(!rcu_rdp_cpu_online(rdp))) {
   874			struct rcu_node *rnp1;
   875	
   876			pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",
   877				__func__, rnp->grplo, rnp->grphi, rnp->level,
   878				(long)rnp->gp_seq, (long)rnp->completedqs);
   879			for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent)
   880				pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx ->rcu_gp_init_mask %#lx\n",
   881					__func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext, rnp1->rcu_gp_init_mask);
   882			pr_info("%s %d: %c online: %ld(%d) offline: %ld(%d)\n",
   883				__func__, rdp->cpu, ".o"[rcu_rdp_cpu_online(rdp)],
   884				(long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_state,
   885				(long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_state);
   886			return 1; /* Break things loose after complaining. */
   887		}
   888	
   889		/*
   890		 * A CPU running for an extended time within the kernel can
   891		 * delay RCU grace periods: (1) At age jiffies_to_sched_qs,
   892		 * set .rcu_urgent_qs, (2) At age 2*jiffies_to_sched_qs, set
   893		 * both .rcu_need_heavy_qs and .rcu_urgent_qs.  Note that the
   894		 * unsynchronized assignments to the per-CPU rcu_need_heavy_qs
   895		 * variable are safe because the assignments are repeated if this
   896		 * CPU failed to pass through a quiescent state.  This code
   897		 * also checks .jiffies_resched in case jiffies_to_sched_qs
   898		 * is set way high.
   899		 */
   900		jtsq = READ_ONCE(jiffies_to_sched_qs);
   901		if (!READ_ONCE(rdp->rcu_need_heavy_qs) &&
   902		    (time_after(jiffies, rcu_state.gp_start + jtsq * 2) ||
   903		     time_after(jiffies, rcu_state.jiffies_resched) ||
   904		     rcu_state.cbovld)) {
   905			WRITE_ONCE(rdp->rcu_need_heavy_qs, true);
   906			/* Store rcu_need_heavy_qs before rcu_urgent_qs. */
   907			smp_store_release(&rdp->rcu_urgent_qs, true);
   908		} else if (time_after(jiffies, rcu_state.gp_start + jtsq)) {
   909			WRITE_ONCE(rdp->rcu_urgent_qs, true);
   910		}
   911	
   912		/*
   913		 * NO_HZ_FULL CPUs can run in-kernel without rcu_sched_clock_irq!
   914		 * The above code handles this, but only for straight cond_resched().
   915		 * And some in-kernel loops check need_resched() before calling
   916		 * cond_resched(), which defeats the above code for CPUs that are
   917		 * running in-kernel with scheduling-clock interrupts disabled.
   918		 * So hit them over the head with the resched_cpu() hammer!
   919		 */
   920		if (tick_nohz_full_cpu(rdp->cpu) &&
   921		    (time_after(jiffies, READ_ONCE(rdp->last_fqs_resched) + jtsq * 3) ||
   922		     rcu_state.cbovld)) {
   923			WRITE_ONCE(rdp->rcu_urgent_qs, true);
   924			WRITE_ONCE(rdp->last_fqs_resched, jiffies);
   925			ret = -1;
   926		}
   927	
   928		/*
   929		 * If more than halfway to RCU CPU stall-warning time, invoke
   930		 * resched_cpu() more frequently to try to loosen things up a bit.
   931		 * Also check to see if the CPU is getting hammered with interrupts,
   932		 * but only once per grace period, just to keep the IPIs down to
   933		 * a dull roar.
   934		 */
   935		if (time_after(jiffies, rcu_state.jiffies_resched)) {
   936			if (time_after(jiffies,
   937				       READ_ONCE(rdp->last_fqs_resched) + jtsq)) {
   938				WRITE_ONCE(rdp->last_fqs_resched, jiffies);
   939				ret = -1;
   940			}
   941			if (IS_ENABLED(CONFIG_IRQ_WORK) &&
   942			    !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
   943			    (rnp->ffmask & rdp->grpmask)) {
   944				rdp->rcu_iw_pending = true;
   945				rdp->rcu_iw_gp_seq = rnp->gp_seq;
   946				irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
   947			}
   948	
   949			if (rcu_cpu_stall_cputime && rdp->snap_record.gp_seq != rdp->gp_seq) {
   950				int cpu = rdp->cpu;
   951				struct rcu_snap_record *rsrp;
   952				struct kernel_cpustat *kcsp;
   953	
   954				kcsp = &kcpustat_cpu(cpu);
   955	
   956				rsrp = &rdp->snap_record;
   957				rsrp->cputime_irq     = kcpustat_field(kcsp, CPUTIME_IRQ, cpu);
   958				rsrp->cputime_softirq = kcpustat_field(kcsp, CPUTIME_SOFTIRQ, cpu);
   959				rsrp->cputime_system  = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu);
 > 960				rsrp->nr_hardirqs = kstat_cpu_irqs_sum(cpu) + arch_irq_stat_cpu(cpu);
   961				rsrp->nr_softirqs = kstat_cpu_softirqs_sum(cpu);
   962				rsrp->nr_csw = nr_context_switches_cpu(cpu);
   963				rsrp->jiffies = jiffies;
   964				rsrp->gp_seq = rdp->gp_seq;
   965			}
   966		}
   967	
   968		return ret;
   969	}
   970	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki