[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5177cb97-e5d9-018e-781a-fc98a24f4173@freenet.de>
Date: Mon, 03 Sep 2018 11:30:00 +0000
From: Viktor Jägersküpper
<viktor_jaegerskuepper@...enet.de>
To: Peter Zijlstra <peterz@...radead.org>,
Thomas Gleixner <tglx@...utronix.de>
Cc: Kevin Shanahan <kevin@...nahan.id.au>,
Siegfried Metz <frame@...lbox.org>,
linux-kernel@...r.kernel.org, rafael.j.wysocki@...el.com,
len.brown@...el.com, rjw@...ysocki.net, diego.viola@...il.com,
rui.zhang@...el.com
Subject: Re: REGRESSION: boot stalls on several old dual core Intel CPUs
Peter Zijlstra:
> On Mon, Sep 03, 2018 at 10:54:23AM +0200, Peter Zijlstra wrote:
>> On Mon, Sep 03, 2018 at 09:38:15AM +0200, Thomas Gleixner wrote:
>>> On Mon, 3 Sep 2018, Peter Zijlstra wrote:
>>>> On Sat, Sep 01, 2018 at 11:51:26AM +0930, Kevin Shanahan wrote:
>>>>> commit 01548f4d3e8e94caf323a4f664eb347fd34a34ab
>>>>> Author: Martin Schwidefsky <schwidefsky@...ibm.com>
>>>>> Date: Tue Aug 18 17:09:42 2009 +0200
>>>>>
>>>>> clocksource: Avoid clocksource watchdog circular locking dependency
>>>>>
>>>>> stop_machine from a multithreaded workqueue is not allowed because
>>>>> of a circular locking dependency between cpu_down and the workqueue
>>>>> execution. Use a kernel thread to do the clocksource downgrade.
>>>>
>>>> I cannot find stop_machine usage there; either it went away or I need to
>>>> like wake up.
>>>
>>> timekeeping_notify() which is involved in switching clock source uses stomp
>>> machine.
>>
>> ARGH... OK, lemme see if I can come up with something other than
>> endlessly spawning that kthread.
>>
>> A special purpose kthread_worker would make more sense than that.
>
> Can someone test this?
>
> ---
> kernel/time/clocksource.c | 28 ++++++++++++++++++++++------
> 1 file changed, 22 insertions(+), 6 deletions(-)
>
> diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
> index f74fb00d8064..898976d0082a 100644
> --- a/kernel/time/clocksource.c
> +++ b/kernel/time/clocksource.c
> @@ -112,13 +112,28 @@ static int finished_booting;
> static u64 suspend_start;
>
> #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
> -static void clocksource_watchdog_work(struct work_struct *work);
> +static void clocksource_watchdog_work(struct kthread_work *work);
> static void clocksource_select(void);
>
> static LIST_HEAD(watchdog_list);
> static struct clocksource *watchdog;
> static struct timer_list watchdog_timer;
> -static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
> +
> +/*
> + * We must use a kthread_worker here, because:
> + *
> + * clocksource_watchdog_work()
> + * clocksource_select()
> + * __clocksource_select()
> + * timekeeping_notify()
> + * stop_machine()
> + *
> + * cannot be called from a reqular workqueue, because of deadlocks between
> + * workqueue and stopmachine.
> + */
> +static struct kthread_worker *watchdog_worker;
> +static DEFINE_KTHREAD_WORK(watchdog_work, clocksource_watchdog_work);
> +
> static DEFINE_SPINLOCK(watchdog_lock);
> static int watchdog_running;
> static atomic_t watchdog_reset_pending;
> @@ -158,7 +173,7 @@ static void __clocksource_unstable(struct clocksource *cs)
>
> /* kick clocksource_watchdog_work() */
> if (finished_booting)
> - schedule_work(&watchdog_work);
> + kthread_queue_work(watchdog_worker, &watchdog_work);
> }
>
> /**
> @@ -199,7 +214,7 @@ static void clocksource_watchdog(struct timer_list *unused)
> /* Clocksource already marked unstable? */
> if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
> if (finished_booting)
> - schedule_work(&watchdog_work);
> + kthread_queue_work(watchdog_worker, &watchdog_work);
> continue;
> }
>
> @@ -269,7 +284,7 @@ static void clocksource_watchdog(struct timer_list *unused)
> */
> if (cs != curr_clocksource) {
> cs->flags |= CLOCK_SOURCE_RESELECT;
> - schedule_work(&watchdog_work);
> + kthread_queue_work(watchdog_worker, &watchdog_work);
> } else {
> tick_clock_notify();
> }
> @@ -418,7 +433,7 @@ static int __clocksource_watchdog_work(void)
> return select;
> }
>
> -static void clocksource_watchdog_work(struct work_struct *work)
> +static void clocksource_watchdog_work(struct kthread_work *work)
> {
> mutex_lock(&clocksource_mutex);
> if (__clocksource_watchdog_work())
> @@ -806,6 +821,7 @@ static int __init clocksource_done_booting(void)
> {
> mutex_lock(&clocksource_mutex);
> curr_clocksource = clocksource_default_clock();
> + watchdog_worker = kthread_create_worker(0, "cs-watchdog");
> finished_booting = 1;
> /*
> * Run the watchdog first to eliminate unstable clock sources
>
Applied on mainline tag v4.19-rc2. Tested without additional parameters,
with "quiet" and with "debug", my PC booted successfully in all three
cases, whereas it stalled almost always in these three cases before.
Thanks!
Powered by blists - more mailing lists