[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <cb8012ab-2599-90fd-ca9f-6d1baeea5b12@microway.com>
Date: Wed, 16 May 2018 19:02:38 -0400
From: Rick Warner <rick@...roway.com>
To: Thomas Gleixner <tglx@...utronix.de>
Cc: Linux Kernel Mailing List <linux-kernel@...r.kernel.org>
Subject: Re: [bisected] rcu_sched detected stalls - 4.15 or newer kernel with
some Xeon skylake CPUs and extended APIC
Thanks Thomas!
I've attached the dmesg output with the kernel parameter and supplied patch.
Here is the excerpt of the ftrace dump:
Dumping ftrace buffer:
---------------------------------
swapper/-1 0d... 463331us : __x2apic_send_IPI_mask: To:
swapper/-1 0d... 463333us : __x2apic_send_IPI_mask: CPU: 0
cluster: 0-1
swapper/-1 0d... 463334us : __x2apic_send_IPI_mask: dest 00010001
--> tmpmsk
swapper/-1 0d... 504423us : __x2apic_send_IPI_mask: To:
swapper/-1 0d... 504424us : __x2apic_send_IPI_mask: CPU: 1
cluster: 0-1
swapper/-1 0d... 504425us : __x2apic_send_IPI_mask: dest 00000400
--> tmpmsk
swapper/-1 0d... 504425us : __x2apic_send_IPI_mask: CPU: 2 cluster: 2
swapper/-1 0d... 504425us : __x2apic_send_IPI_mask: dest 00010010
--> tmpmsk
swapper/-1 0d... 504425us : __x2apic_send_IPI_mask: CPU: 3 cluster: 3
swapper/-1 0d... 504426us : __x2apic_send_IPI_mask: dest 00010040
--> tmpmsk
swapper/-1 0d... 504430us : __x2apic_send_IPI_mask: To:
swapper/-1 0d... 504430us : __x2apic_send_IPI_mask: CPU: 1
cluster: 0-1
swapper/-1 0d... 504431us : __x2apic_send_IPI_mask: dest 00000400
--> tmpmsk
swapper/-1 0d... 504431us : __x2apic_send_IPI_mask: CPU: 2 cluster: 2
swapper/-1 0d... 504431us : __x2apic_send_IPI_mask: dest 00010010
--> tmpmsk
swapper/-1 0d... 504431us : __x2apic_send_IPI_mask: CPU: 3 cluster: 3
swapper/-1 0d... 504431us : __x2apic_send_IPI_mask: dest 00010040
--> tmpmsk
swapper/-1 0d... 504433us : __x2apic_send_IPI_mask: To:
swapper/-1 0d... 504433us : __x2apic_send_IPI_mask: CPU: 1
cluster: 0-1
swapper/-1 0d... 504433us : __x2apic_send_IPI_mask: dest 00000400
--> tmpmsk
swapper/-1 0d... 504434us : __x2apic_send_IPI_mask: CPU: 2 cluster: 2
swapper/-1 0d... 504434us : __x2apic_send_IPI_mask: dest 00010010
--> tmpmsk
swapper/-1 0d... 504434us : __x2apic_send_IPI_mask: CPU: 3 cluster: 3
swapper/-1 0d... 504434us : __x2apic_send_IPI_mask: dest 00010040
--> tmpmsk
swapper/-1 3d... 1813404us : __x2apic_send_IPI_mask: To:
swapper/-1 3d... 1813405us : __x2apic_send_IPI_mask: CPU: 0
cluster: 0-1
swapper/-1 3d... 1813406us : __x2apic_send_IPI_mask: dest 00010001
--> tmpmsk
swapper/-1 3.... 1879964us : on_each_cpu_cond: 0-1,3
swapper/-1 3.... 1879972us : on_each_cpu_cond: 0-1,3
swapper/-1 3.... 1879975us : on_each_cpu_cond: 0-1,3
swapper/-1 3d... 1879976us : __x2apic_send_IPI_mask: To:
swapper/-1 3d... 1879977us : __x2apic_send_IPI_mask: CPU: 0
cluster: 0-1
swapper/-1 3d... 1879977us : __x2apic_send_IPI_mask: dest 00010401
--> tmpmsk
<idle>-0 0d.h. 61903171us : __x2apic_send_IPI_mask: To:
<idle>-0 0d.h. 61903172us : __x2apic_send_IPI_mask: CPU: 3
cluster: 3
<idle>-0 0d.h. 61903172us : __x2apic_send_IPI_mask: dest
00010040 --> tmpmsk
<idle>-0 0d.h. 61904223us : __x2apic_send_IPI_mask: To:
<idle>-0 0d.h. 61904223us : __x2apic_send_IPI_mask: CPU: 1
cluster: 0-1
<idle>-0 0d.h. 61904223us : __x2apic_send_IPI_mask: dest
00000400 --> tmpmsk
<idle>-0 0d.h. 61904223us : __x2apic_send_IPI_mask: CPU: 2
cluster: 2
<idle>-0 0d.h. 61904224us : __x2apic_send_IPI_mask: dest
00010010 --> tmpmsk
<idle>-0 0d.h. 61904224us : __x2apic_send_IPI_mask: CPU: 3
cluster: 3
<idle>-0 0d.h. 61904224us : __x2apic_send_IPI_mask: dest
00010040 --> tmpmsk
<idle>-0 0d.h. 62924392us : __x2apic_send_IPI_mask: To:
<idle>-0 0d.h. 62924393us : __x2apic_send_IPI_mask: CPU: 3
cluster: 3
<idle>-0 0d.h. 62924394us : __x2apic_send_IPI_mask: dest
00010040 --> tmpmsk
Thanks,
Rick
On 05/16/18 10:50, Thomas Gleixner wrote:
> Rick,
>
> On Tue, 15 May 2018, Thomas Gleixner wrote:
>> I can't spot an immediate fail with that commit, but I'll have a look
>> tomorrow for instrumenting this with tracepoints which can be dumped from
>> the stall detector.
> can you please give the patch below a try? I assume you have a serial
> console, otherwise this is going to be tedious.
>
> Please add the following to the kernel command line:
>
> ftrace_dump_on_oops
>
> Note, the box will panic after the first stall and spill out the trace
> buffer over serial, which might take a while.
>
> Thanks,
>
> tglx
>
> 8<--------------------
>
> --- a/arch/x86/kernel/apic/x2apic_cluster.c
> +++ b/arch/x86/kernel/apic/x2apic_cluster.c
> @@ -52,20 +52,28 @@ static void
> if (apic_dest != APIC_DEST_ALLINC)
> cpumask_clear_cpu(smp_processor_id(), tmpmsk);
>
> + trace_printk("To: %*pbl\n", cpumask_pr_args(tmpmsk));
> +
> /* Collapse cpus in a cluster so a single IPI per cluster is sent */
> for_each_cpu(cpu, tmpmsk) {
> struct cluster_mask *cmsk = per_cpu(cluster_masks, cpu);
>
> dest = 0;
> + trace_printk("CPU: %u cluster: %*pbl\n", cpu,
> + cpumask_pr_args(&cmsk->mask));
> for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask)
> dest |= per_cpu(x86_cpu_to_logical_apicid, clustercpu);
>
> - if (!dest)
> + if (!dest) {
> + trace_printk("dest = 0!?\n");
> continue;
> + }
>
> __x2apic_send_IPI_dest(dest, vector, apic->dest_logical);
> /* Remove cluster CPUs from tmpmask */
> cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask);
> + trace_printk("dest %08x --> tmpmsk %*pbl\n", dest,
> + cpumask_pr_args(tmpmsk));
> }
>
> local_irq_restore(flags);
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -124,7 +124,7 @@ int rcu_num_lvls __read_mostly = RCU_NUM
> int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
> int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
> /* panic() on RCU Stall sysctl. */
> -int sysctl_panic_on_rcu_stall __read_mostly;
> +int sysctl_panic_on_rcu_stall __read_mostly = 1;
>
> /*
> * The rcu_scheduler_active variable is initialized to the value
> --- a/kernel/smp.c
> +++ b/kernel/smp.c
> @@ -681,6 +681,7 @@ void on_each_cpu_cond(bool (*cond_func)(
> for_each_online_cpu(cpu)
> if (cond_func(cpu, info))
> cpumask_set_cpu(cpu, cpus);
> + trace_printk("%*pbl\n", cpumask_pr_args(cpus));
> on_each_cpu_mask(cpus, func, info, wait);
> preempt_enable();
> free_cpumask_var(cpus);
View attachment "x2apic-hang-on-boot-dmesg-patch1.txt" of type "text/plain" (24287 bytes)
Powered by blists - more mailing lists