lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <cb8012ab-2599-90fd-ca9f-6d1baeea5b12@microway.com>
Date:   Wed, 16 May 2018 19:02:38 -0400
From:   Rick Warner <rick@...roway.com>
To:     Thomas Gleixner <tglx@...utronix.de>
Cc:     Linux Kernel Mailing List <linux-kernel@...r.kernel.org>
Subject: Re: [bisected] rcu_sched detected stalls - 4.15 or newer kernel with
 some Xeon skylake CPUs and extended APIC

Thanks Thomas!

I've attached the dmesg output with the kernel parameter and supplied patch.

Here is the excerpt of the ftrace dump:
Dumping ftrace buffer:
---------------------------------
swapper/-1       0d... 463331us : __x2apic_send_IPI_mask: To:
swapper/-1       0d... 463333us : __x2apic_send_IPI_mask: CPU: 0
cluster: 0-1
swapper/-1       0d... 463334us : __x2apic_send_IPI_mask: dest 00010001
--> tmpmsk
swapper/-1       0d... 504423us : __x2apic_send_IPI_mask: To:
swapper/-1       0d... 504424us : __x2apic_send_IPI_mask: CPU: 1
cluster: 0-1
swapper/-1       0d... 504425us : __x2apic_send_IPI_mask: dest 00000400
--> tmpmsk
swapper/-1       0d... 504425us : __x2apic_send_IPI_mask: CPU: 2 cluster: 2
swapper/-1       0d... 504425us : __x2apic_send_IPI_mask: dest 00010010
--> tmpmsk
swapper/-1       0d... 504425us : __x2apic_send_IPI_mask: CPU: 3 cluster: 3
swapper/-1       0d... 504426us : __x2apic_send_IPI_mask: dest 00010040
--> tmpmsk
swapper/-1       0d... 504430us : __x2apic_send_IPI_mask: To:
swapper/-1       0d... 504430us : __x2apic_send_IPI_mask: CPU: 1
cluster: 0-1
swapper/-1       0d... 504431us : __x2apic_send_IPI_mask: dest 00000400
--> tmpmsk
swapper/-1       0d... 504431us : __x2apic_send_IPI_mask: CPU: 2 cluster: 2
swapper/-1       0d... 504431us : __x2apic_send_IPI_mask: dest 00010010
--> tmpmsk
swapper/-1       0d... 504431us : __x2apic_send_IPI_mask: CPU: 3 cluster: 3
swapper/-1       0d... 504431us : __x2apic_send_IPI_mask: dest 00010040
--> tmpmsk
swapper/-1       0d... 504433us : __x2apic_send_IPI_mask: To:
swapper/-1       0d... 504433us : __x2apic_send_IPI_mask: CPU: 1
cluster: 0-1
swapper/-1       0d... 504433us : __x2apic_send_IPI_mask: dest 00000400
--> tmpmsk
swapper/-1       0d... 504434us : __x2apic_send_IPI_mask: CPU: 2 cluster: 2
swapper/-1       0d... 504434us : __x2apic_send_IPI_mask: dest 00010010
--> tmpmsk
swapper/-1       0d... 504434us : __x2apic_send_IPI_mask: CPU: 3 cluster: 3
swapper/-1       0d... 504434us : __x2apic_send_IPI_mask: dest 00010040
--> tmpmsk
swapper/-1       3d... 1813404us : __x2apic_send_IPI_mask: To:
swapper/-1       3d... 1813405us : __x2apic_send_IPI_mask: CPU: 0
cluster: 0-1
swapper/-1       3d... 1813406us : __x2apic_send_IPI_mask: dest 00010001
--> tmpmsk
swapper/-1       3.... 1879964us : on_each_cpu_cond: 0-1,3
swapper/-1       3.... 1879972us : on_each_cpu_cond: 0-1,3
swapper/-1       3.... 1879975us : on_each_cpu_cond: 0-1,3
swapper/-1       3d... 1879976us : __x2apic_send_IPI_mask: To:
swapper/-1       3d... 1879977us : __x2apic_send_IPI_mask: CPU: 0
cluster: 0-1
swapper/-1       3d... 1879977us : __x2apic_send_IPI_mask: dest 00010401
--> tmpmsk
  <idle>-0       0d.h. 61903171us : __x2apic_send_IPI_mask: To:
  <idle>-0       0d.h. 61903172us : __x2apic_send_IPI_mask: CPU: 3
cluster: 3
  <idle>-0       0d.h. 61903172us : __x2apic_send_IPI_mask: dest
00010040 --> tmpmsk
  <idle>-0       0d.h. 61904223us : __x2apic_send_IPI_mask: To:
  <idle>-0       0d.h. 61904223us : __x2apic_send_IPI_mask: CPU: 1
cluster: 0-1
  <idle>-0       0d.h. 61904223us : __x2apic_send_IPI_mask: dest
00000400 --> tmpmsk
  <idle>-0       0d.h. 61904223us : __x2apic_send_IPI_mask: CPU: 2
cluster: 2
  <idle>-0       0d.h. 61904224us : __x2apic_send_IPI_mask: dest
00010010 --> tmpmsk
  <idle>-0       0d.h. 61904224us : __x2apic_send_IPI_mask: CPU: 3
cluster: 3
  <idle>-0       0d.h. 61904224us : __x2apic_send_IPI_mask: dest
00010040 --> tmpmsk
  <idle>-0       0d.h. 62924392us : __x2apic_send_IPI_mask: To:
  <idle>-0       0d.h. 62924393us : __x2apic_send_IPI_mask: CPU: 3
cluster: 3
  <idle>-0       0d.h. 62924394us : __x2apic_send_IPI_mask: dest
00010040 --> tmpmsk

Thanks,
Rick

On 05/16/18 10:50, Thomas Gleixner wrote:
> Rick,
>
> On Tue, 15 May 2018, Thomas Gleixner wrote:
>> I can't spot an immediate fail with that commit, but I'll have a look
>> tomorrow for instrumenting this with tracepoints which can be dumped from
>> the stall detector.
> can you please give the patch below a try? I assume you have a serial
> console, otherwise this is going to be tedious.
>
> Please add the following to the kernel command line:
>
>   ftrace_dump_on_oops
>
> Note, the box will panic after the first stall and spill out the trace
> buffer over serial, which might take a while.
>
> Thanks,
>
> 	tglx
>
> 8<--------------------
>
> --- a/arch/x86/kernel/apic/x2apic_cluster.c
> +++ b/arch/x86/kernel/apic/x2apic_cluster.c
> @@ -52,20 +52,28 @@ static void
>  	if (apic_dest != APIC_DEST_ALLINC)
>  		cpumask_clear_cpu(smp_processor_id(), tmpmsk);
>  
> +	trace_printk("To: %*pbl\n", cpumask_pr_args(tmpmsk));
> +
>  	/* Collapse cpus in a cluster so a single IPI per cluster is sent */
>  	for_each_cpu(cpu, tmpmsk) {
>  		struct cluster_mask *cmsk = per_cpu(cluster_masks, cpu);
>  
>  		dest = 0;
> +		trace_printk("CPU: %u cluster: %*pbl\n", cpu,
> +			     cpumask_pr_args(&cmsk->mask));
>  		for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask)
>  			dest |= per_cpu(x86_cpu_to_logical_apicid, clustercpu);
>  
> -		if (!dest)
> +		if (!dest) {
> +			trace_printk("dest = 0!?\n");
>  			continue;
> +		}
>  
>  		__x2apic_send_IPI_dest(dest, vector, apic->dest_logical);
>  		/* Remove cluster CPUs from tmpmask */
>  		cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask);
> +		trace_printk("dest %08x --> tmpmsk %*pbl\n", dest,
> +			     cpumask_pr_args(tmpmsk));
>  	}
>  
>  	local_irq_restore(flags);
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -124,7 +124,7 @@ int rcu_num_lvls __read_mostly = RCU_NUM
>  int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
>  int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
>  /* panic() on RCU Stall sysctl. */
> -int sysctl_panic_on_rcu_stall __read_mostly;
> +int sysctl_panic_on_rcu_stall __read_mostly = 1;
>  
>  /*
>   * The rcu_scheduler_active variable is initialized to the value
> --- a/kernel/smp.c
> +++ b/kernel/smp.c
> @@ -681,6 +681,7 @@ void on_each_cpu_cond(bool (*cond_func)(
>  		for_each_online_cpu(cpu)
>  			if (cond_func(cpu, info))
>  				cpumask_set_cpu(cpu, cpus);
> +		trace_printk("%*pbl\n", cpumask_pr_args(cpus));
>  		on_each_cpu_mask(cpus, func, info, wait);
>  		preempt_enable();
>  		free_cpumask_var(cpus);


View attachment "x2apic-hang-on-boot-dmesg-patch1.txt" of type "text/plain" (24287 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ